1 /* 2 * builtin-record.c 3 * 4 * Builtin record command: Record the profile of a workload 5 * (or a CPU, or a PID) into the perf.data output file - for 6 * later analysis via perf report. 7 */ 8 #define _FILE_OFFSET_BITS 64 9 10 #include "builtin.h" 11 12 #include "perf.h" 13 14 #include "util/build-id.h" 15 #include "util/util.h" 16 #include "util/parse-options.h" 17 #include "util/parse-events.h" 18 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/symbol.h" 27 #include "util/cpumap.h" 28 #include "util/thread_map.h" 29 30 #include <unistd.h> 31 #include <sched.h> 32 #include <sys/mman.h> 33 34 #ifndef HAVE_ON_EXIT 35 #ifndef ATEXIT_MAX 36 #define ATEXIT_MAX 32 37 #endif 38 static int __on_exit_count = 0; 39 typedef void (*on_exit_func_t) (int, void *); 40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX]; 41 static void *__on_exit_args[ATEXIT_MAX]; 42 static int __exitcode = 0; 43 static void __handle_on_exit_funcs(void); 44 static int on_exit(on_exit_func_t function, void *arg); 45 #define exit(x) (exit)(__exitcode = (x)) 46 47 static int on_exit(on_exit_func_t function, void *arg) 48 { 49 if (__on_exit_count == ATEXIT_MAX) 50 return -ENOMEM; 51 else if (__on_exit_count == 0) 52 atexit(__handle_on_exit_funcs); 53 __on_exit_funcs[__on_exit_count] = function; 54 __on_exit_args[__on_exit_count++] = arg; 55 return 0; 56 } 57 58 static void __handle_on_exit_funcs(void) 59 { 60 int i; 61 for (i = 0; i < __on_exit_count; i++) 62 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]); 63 } 64 #endif 65 66 enum write_mode_t { 67 WRITE_FORCE, 68 WRITE_APPEND 69 }; 70 71 struct perf_record { 72 struct perf_tool tool; 73 struct perf_record_opts opts; 74 u64 bytes_written; 75 const char *output_name; 76 struct perf_evlist *evlist; 77 struct perf_session *session; 78 const char *progname; 79 int output; 80 unsigned int page_size; 81 int realtime_prio; 82 enum write_mode_t write_mode; 83 bool no_buildid; 84 bool no_buildid_cache; 85 bool force; 86 bool file_new; 87 bool append_file; 88 long samples; 89 off_t post_processing_offset; 90 }; 91 92 static void advance_output(struct perf_record *rec, size_t size) 93 { 94 rec->bytes_written += size; 95 } 96 97 static int write_output(struct perf_record *rec, void *buf, size_t size) 98 { 99 while (size) { 100 int ret = write(rec->output, buf, size); 101 102 if (ret < 0) { 103 pr_err("failed to write\n"); 104 return -1; 105 } 106 107 size -= ret; 108 buf += ret; 109 110 rec->bytes_written += ret; 111 } 112 113 return 0; 114 } 115 116 static int process_synthesized_event(struct perf_tool *tool, 117 union perf_event *event, 118 struct perf_sample *sample __maybe_unused, 119 struct machine *machine __maybe_unused) 120 { 121 struct perf_record *rec = container_of(tool, struct perf_record, tool); 122 if (write_output(rec, event, event->header.size) < 0) 123 return -1; 124 125 return 0; 126 } 127 128 static int perf_record__mmap_read(struct perf_record *rec, 129 struct perf_mmap *md) 130 { 131 unsigned int head = perf_mmap__read_head(md); 132 unsigned int old = md->prev; 133 unsigned char *data = md->base + rec->page_size; 134 unsigned long size; 135 void *buf; 136 int rc = 0; 137 138 if (old == head) 139 return 0; 140 141 rec->samples++; 142 143 size = head - old; 144 145 if ((old & md->mask) + size != (head & md->mask)) { 146 buf = &data[old & md->mask]; 147 size = md->mask + 1 - (old & md->mask); 148 old += size; 149 150 if (write_output(rec, buf, size) < 0) { 151 rc = -1; 152 goto out; 153 } 154 } 155 156 buf = &data[old & md->mask]; 157 size = head - old; 158 old += size; 159 160 if (write_output(rec, buf, size) < 0) { 161 rc = -1; 162 goto out; 163 } 164 165 md->prev = old; 166 perf_mmap__write_tail(md, old); 167 168 out: 169 return rc; 170 } 171 172 static volatile int done = 0; 173 static volatile int signr = -1; 174 static volatile int child_finished = 0; 175 176 static void sig_handler(int sig) 177 { 178 if (sig == SIGCHLD) 179 child_finished = 1; 180 181 done = 1; 182 signr = sig; 183 } 184 185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg) 186 { 187 struct perf_record *rec = arg; 188 int status; 189 190 if (rec->evlist->workload.pid > 0) { 191 if (!child_finished) 192 kill(rec->evlist->workload.pid, SIGTERM); 193 194 wait(&status); 195 if (WIFSIGNALED(status)) 196 psignal(WTERMSIG(status), rec->progname); 197 } 198 199 if (signr == -1 || signr == SIGUSR1) 200 return; 201 202 signal(signr, SIG_DFL); 203 kill(getpid(), signr); 204 } 205 206 static bool perf_evlist__equal(struct perf_evlist *evlist, 207 struct perf_evlist *other) 208 { 209 struct perf_evsel *pos, *pair; 210 211 if (evlist->nr_entries != other->nr_entries) 212 return false; 213 214 pair = perf_evlist__first(other); 215 216 list_for_each_entry(pos, &evlist->entries, node) { 217 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) 218 return false; 219 pair = perf_evsel__next(pair); 220 } 221 222 return true; 223 } 224 225 static int perf_record__open(struct perf_record *rec) 226 { 227 struct perf_evsel *pos; 228 struct perf_evlist *evlist = rec->evlist; 229 struct perf_session *session = rec->session; 230 struct perf_record_opts *opts = &rec->opts; 231 int rc = 0; 232 233 /* 234 * Set the evsel leader links before we configure attributes, 235 * since some might depend on this info. 236 */ 237 if (opts->group) 238 perf_evlist__set_leader(evlist); 239 240 perf_evlist__config_attrs(evlist, opts); 241 242 list_for_each_entry(pos, &evlist->entries, node) { 243 struct perf_event_attr *attr = &pos->attr; 244 /* 245 * Check if parse_single_tracepoint_event has already asked for 246 * PERF_SAMPLE_TIME. 247 * 248 * XXX this is kludgy but short term fix for problems introduced by 249 * eac23d1c that broke 'perf script' by having different sample_types 250 * when using multiple tracepoint events when we use a perf binary 251 * that tries to use sample_id_all on an older kernel. 252 * 253 * We need to move counter creation to perf_session, support 254 * different sample_types, etc. 255 */ 256 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; 257 258 fallback_missing_features: 259 if (opts->exclude_guest_missing) 260 attr->exclude_guest = attr->exclude_host = 0; 261 retry_sample_id: 262 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; 263 try_again: 264 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { 265 int err = errno; 266 267 if (err == EPERM || err == EACCES) { 268 ui__error_paranoid(); 269 rc = -err; 270 goto out; 271 } else if (err == ENODEV && opts->target.cpu_list) { 272 pr_err("No such device - did you specify" 273 " an out-of-range profile CPU?\n"); 274 rc = -err; 275 goto out; 276 } else if (err == EINVAL) { 277 if (!opts->exclude_guest_missing && 278 (attr->exclude_guest || attr->exclude_host)) { 279 pr_debug("Old kernel, cannot exclude " 280 "guest or host samples.\n"); 281 opts->exclude_guest_missing = true; 282 goto fallback_missing_features; 283 } else if (!opts->sample_id_all_missing) { 284 /* 285 * Old kernel, no attr->sample_id_type_all field 286 */ 287 opts->sample_id_all_missing = true; 288 if (!opts->sample_time && !opts->raw_samples && !time_needed) 289 attr->sample_type &= ~PERF_SAMPLE_TIME; 290 291 goto retry_sample_id; 292 } 293 } 294 295 /* 296 * If it's cycles then fall back to hrtimer 297 * based cpu-clock-tick sw counter, which 298 * is always available even if no PMU support. 299 * 300 * PPC returns ENXIO until 2.6.37 (behavior changed 301 * with commit b0a873e). 302 */ 303 if ((err == ENOENT || err == ENXIO) 304 && attr->type == PERF_TYPE_HARDWARE 305 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 306 307 if (verbose) 308 ui__warning("The cycles event is not supported, " 309 "trying to fall back to cpu-clock-ticks\n"); 310 attr->type = PERF_TYPE_SOFTWARE; 311 attr->config = PERF_COUNT_SW_CPU_CLOCK; 312 if (pos->name) { 313 free(pos->name); 314 pos->name = NULL; 315 } 316 goto try_again; 317 } 318 319 if (err == ENOENT) { 320 ui__error("The %s event is not supported.\n", 321 perf_evsel__name(pos)); 322 rc = -err; 323 goto out; 324 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) { 325 ui__error("\'precise\' request may not be supported. " 326 "Try removing 'p' modifier\n"); 327 rc = -err; 328 goto out; 329 } 330 331 printf("\n"); 332 error("sys_perf_event_open() syscall returned with %d " 333 "(%s) for event %s. /bin/dmesg may provide " 334 "additional information.\n", 335 err, strerror(err), perf_evsel__name(pos)); 336 337 #if defined(__i386__) || defined(__x86_64__) 338 if (attr->type == PERF_TYPE_HARDWARE && 339 err == EOPNOTSUPP) { 340 pr_err("No hardware sampling interrupt available." 341 " No APIC? If so then you can boot the kernel" 342 " with the \"lapic\" boot parameter to" 343 " force-enable it.\n"); 344 rc = -err; 345 goto out; 346 } 347 #endif 348 349 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 350 rc = -err; 351 goto out; 352 } 353 } 354 355 if (perf_evlist__apply_filters(evlist)) { 356 error("failed to set filter with %d (%s)\n", errno, 357 strerror(errno)); 358 rc = -1; 359 goto out; 360 } 361 362 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { 363 if (errno == EPERM) { 364 pr_err("Permission error mapping pages.\n" 365 "Consider increasing " 366 "/proc/sys/kernel/perf_event_mlock_kb,\n" 367 "or try again with a smaller value of -m/--mmap_pages.\n" 368 "(current value: %d)\n", opts->mmap_pages); 369 rc = -errno; 370 } else if (!is_power_of_2(opts->mmap_pages) && 371 (opts->mmap_pages != UINT_MAX)) { 372 pr_err("--mmap_pages/-m value must be a power of two."); 373 rc = -EINVAL; 374 } else { 375 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno)); 376 rc = -errno; 377 } 378 goto out; 379 } 380 381 if (rec->file_new) 382 session->evlist = evlist; 383 else { 384 if (!perf_evlist__equal(session->evlist, evlist)) { 385 fprintf(stderr, "incompatible append\n"); 386 rc = -1; 387 goto out; 388 } 389 } 390 391 perf_session__set_id_hdr_size(session); 392 out: 393 return rc; 394 } 395 396 static int process_buildids(struct perf_record *rec) 397 { 398 u64 size = lseek(rec->output, 0, SEEK_CUR); 399 400 if (size == 0) 401 return 0; 402 403 rec->session->fd = rec->output; 404 return __perf_session__process_events(rec->session, rec->post_processing_offset, 405 size - rec->post_processing_offset, 406 size, &build_id__mark_dso_hit_ops); 407 } 408 409 static void perf_record__exit(int status, void *arg) 410 { 411 struct perf_record *rec = arg; 412 413 if (status != 0) 414 return; 415 416 if (!rec->opts.pipe_output) { 417 rec->session->header.data_size += rec->bytes_written; 418 419 if (!rec->no_buildid) 420 process_buildids(rec); 421 perf_session__write_header(rec->session, rec->evlist, 422 rec->output, true); 423 perf_session__delete(rec->session); 424 perf_evlist__delete(rec->evlist); 425 symbol__exit(); 426 } 427 } 428 429 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 430 { 431 int err; 432 struct perf_tool *tool = data; 433 434 if (machine__is_host(machine)) 435 return; 436 437 /* 438 *As for guest kernel when processing subcommand record&report, 439 *we arrange module mmap prior to guest kernel mmap and trigger 440 *a preload dso because default guest module symbols are loaded 441 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 442 *method is used to avoid symbol missing when the first addr is 443 *in module instead of in guest kernel. 444 */ 445 err = perf_event__synthesize_modules(tool, process_synthesized_event, 446 machine); 447 if (err < 0) 448 pr_err("Couldn't record guest kernel [%d]'s reference" 449 " relocation symbol.\n", machine->pid); 450 451 /* 452 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 453 * have no _text sometimes. 454 */ 455 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 456 machine, "_text"); 457 if (err < 0) 458 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 459 machine, "_stext"); 460 if (err < 0) 461 pr_err("Couldn't record guest kernel [%d]'s reference" 462 " relocation symbol.\n", machine->pid); 463 } 464 465 static struct perf_event_header finished_round_event = { 466 .size = sizeof(struct perf_event_header), 467 .type = PERF_RECORD_FINISHED_ROUND, 468 }; 469 470 static int perf_record__mmap_read_all(struct perf_record *rec) 471 { 472 int i; 473 int rc = 0; 474 475 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 476 if (rec->evlist->mmap[i].base) { 477 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) { 478 rc = -1; 479 goto out; 480 } 481 } 482 } 483 484 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA)) 485 rc = write_output(rec, &finished_round_event, 486 sizeof(finished_round_event)); 487 488 out: 489 return rc; 490 } 491 492 static int __cmd_record(struct perf_record *rec, int argc, const char **argv) 493 { 494 struct stat st; 495 int flags; 496 int err, output, feat; 497 unsigned long waking = 0; 498 const bool forks = argc > 0; 499 struct machine *machine; 500 struct perf_tool *tool = &rec->tool; 501 struct perf_record_opts *opts = &rec->opts; 502 struct perf_evlist *evsel_list = rec->evlist; 503 const char *output_name = rec->output_name; 504 struct perf_session *session; 505 bool disabled = false; 506 507 rec->progname = argv[0]; 508 509 rec->page_size = sysconf(_SC_PAGE_SIZE); 510 511 on_exit(perf_record__sig_exit, rec); 512 signal(SIGCHLD, sig_handler); 513 signal(SIGINT, sig_handler); 514 signal(SIGUSR1, sig_handler); 515 516 if (!output_name) { 517 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 518 opts->pipe_output = true; 519 else 520 rec->output_name = output_name = "perf.data"; 521 } 522 if (output_name) { 523 if (!strcmp(output_name, "-")) 524 opts->pipe_output = true; 525 else if (!stat(output_name, &st) && st.st_size) { 526 if (rec->write_mode == WRITE_FORCE) { 527 char oldname[PATH_MAX]; 528 snprintf(oldname, sizeof(oldname), "%s.old", 529 output_name); 530 unlink(oldname); 531 rename(output_name, oldname); 532 } 533 } else if (rec->write_mode == WRITE_APPEND) { 534 rec->write_mode = WRITE_FORCE; 535 } 536 } 537 538 flags = O_CREAT|O_RDWR; 539 if (rec->write_mode == WRITE_APPEND) 540 rec->file_new = 0; 541 else 542 flags |= O_TRUNC; 543 544 if (opts->pipe_output) 545 output = STDOUT_FILENO; 546 else 547 output = open(output_name, flags, S_IRUSR | S_IWUSR); 548 if (output < 0) { 549 perror("failed to create output file"); 550 return -1; 551 } 552 553 rec->output = output; 554 555 session = perf_session__new(output_name, O_WRONLY, 556 rec->write_mode == WRITE_FORCE, false, NULL); 557 if (session == NULL) { 558 pr_err("Not enough memory for reading perf file header\n"); 559 return -1; 560 } 561 562 rec->session = session; 563 564 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 565 perf_header__set_feat(&session->header, feat); 566 567 if (rec->no_buildid) 568 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 569 570 if (!have_tracepoints(&evsel_list->entries)) 571 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 572 573 if (!rec->opts.branch_stack) 574 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 575 576 if (!rec->file_new) { 577 err = perf_session__read_header(session, output); 578 if (err < 0) 579 goto out_delete_session; 580 } 581 582 if (forks) { 583 err = perf_evlist__prepare_workload(evsel_list, opts, argv); 584 if (err < 0) { 585 pr_err("Couldn't run the workload!\n"); 586 goto out_delete_session; 587 } 588 } 589 590 if (perf_record__open(rec) != 0) { 591 err = -1; 592 goto out_delete_session; 593 } 594 595 /* 596 * perf_session__delete(session) will be called at perf_record__exit() 597 */ 598 on_exit(perf_record__exit, rec); 599 600 if (opts->pipe_output) { 601 err = perf_header__write_pipe(output); 602 if (err < 0) 603 goto out_delete_session; 604 } else if (rec->file_new) { 605 err = perf_session__write_header(session, evsel_list, 606 output, false); 607 if (err < 0) 608 goto out_delete_session; 609 } 610 611 if (!rec->no_buildid 612 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 613 pr_err("Couldn't generate buildids. " 614 "Use --no-buildid to profile anyway.\n"); 615 err = -1; 616 goto out_delete_session; 617 } 618 619 rec->post_processing_offset = lseek(output, 0, SEEK_CUR); 620 621 machine = perf_session__find_host_machine(session); 622 if (!machine) { 623 pr_err("Couldn't find native kernel information.\n"); 624 err = -1; 625 goto out_delete_session; 626 } 627 628 if (opts->pipe_output) { 629 err = perf_event__synthesize_attrs(tool, session, 630 process_synthesized_event); 631 if (err < 0) { 632 pr_err("Couldn't synthesize attrs.\n"); 633 goto out_delete_session; 634 } 635 636 err = perf_event__synthesize_event_types(tool, process_synthesized_event, 637 machine); 638 if (err < 0) { 639 pr_err("Couldn't synthesize event_types.\n"); 640 goto out_delete_session; 641 } 642 643 if (have_tracepoints(&evsel_list->entries)) { 644 /* 645 * FIXME err <= 0 here actually means that 646 * there were no tracepoints so its not really 647 * an error, just that we don't need to 648 * synthesize anything. We really have to 649 * return this more properly and also 650 * propagate errors that now are calling die() 651 */ 652 err = perf_event__synthesize_tracing_data(tool, output, evsel_list, 653 process_synthesized_event); 654 if (err <= 0) { 655 pr_err("Couldn't record tracing data.\n"); 656 goto out_delete_session; 657 } 658 advance_output(rec, err); 659 } 660 } 661 662 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 663 machine, "_text"); 664 if (err < 0) 665 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 666 machine, "_stext"); 667 if (err < 0) 668 pr_err("Couldn't record kernel reference relocation symbol\n" 669 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 670 "Check /proc/kallsyms permission or run as root.\n"); 671 672 err = perf_event__synthesize_modules(tool, process_synthesized_event, 673 machine); 674 if (err < 0) 675 pr_err("Couldn't record kernel module information.\n" 676 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 677 "Check /proc/modules permission or run as root.\n"); 678 679 if (perf_guest) 680 perf_session__process_machines(session, tool, 681 perf_event__synthesize_guest_os); 682 683 if (!opts->target.system_wide) 684 err = perf_event__synthesize_thread_map(tool, evsel_list->threads, 685 process_synthesized_event, 686 machine); 687 else 688 err = perf_event__synthesize_threads(tool, process_synthesized_event, 689 machine); 690 691 if (err != 0) 692 goto out_delete_session; 693 694 if (rec->realtime_prio) { 695 struct sched_param param; 696 697 param.sched_priority = rec->realtime_prio; 698 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 699 pr_err("Could not set realtime priority.\n"); 700 err = -1; 701 goto out_delete_session; 702 } 703 } 704 705 /* 706 * When perf is starting the traced process, all the events 707 * (apart from group members) have enable_on_exec=1 set, 708 * so don't spoil it by prematurely enabling them. 709 */ 710 if (!perf_target__none(&opts->target)) 711 perf_evlist__enable(evsel_list); 712 713 /* 714 * Let the child rip 715 */ 716 if (forks) 717 perf_evlist__start_workload(evsel_list); 718 719 for (;;) { 720 int hits = rec->samples; 721 722 if (perf_record__mmap_read_all(rec) < 0) { 723 err = -1; 724 goto out_delete_session; 725 } 726 727 if (hits == rec->samples) { 728 if (done) 729 break; 730 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); 731 waking++; 732 } 733 734 /* 735 * When perf is starting the traced process, at the end events 736 * die with the process and we wait for that. Thus no need to 737 * disable events in this case. 738 */ 739 if (done && !disabled && !perf_target__none(&opts->target)) { 740 perf_evlist__disable(evsel_list); 741 disabled = true; 742 } 743 } 744 745 if (quiet || signr == SIGUSR1) 746 return 0; 747 748 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 749 750 /* 751 * Approximate RIP event size: 24 bytes. 752 */ 753 fprintf(stderr, 754 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", 755 (double)rec->bytes_written / 1024.0 / 1024.0, 756 output_name, 757 rec->bytes_written / 24); 758 759 return 0; 760 761 out_delete_session: 762 perf_session__delete(session); 763 return err; 764 } 765 766 #define BRANCH_OPT(n, m) \ 767 { .name = n, .mode = (m) } 768 769 #define BRANCH_END { .name = NULL } 770 771 struct branch_mode { 772 const char *name; 773 int mode; 774 }; 775 776 static const struct branch_mode branch_modes[] = { 777 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), 778 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), 779 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), 780 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), 781 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), 782 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), 783 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), 784 BRANCH_END 785 }; 786 787 static int 788 parse_branch_stack(const struct option *opt, const char *str, int unset) 789 { 790 #define ONLY_PLM \ 791 (PERF_SAMPLE_BRANCH_USER |\ 792 PERF_SAMPLE_BRANCH_KERNEL |\ 793 PERF_SAMPLE_BRANCH_HV) 794 795 uint64_t *mode = (uint64_t *)opt->value; 796 const struct branch_mode *br; 797 char *s, *os = NULL, *p; 798 int ret = -1; 799 800 if (unset) 801 return 0; 802 803 /* 804 * cannot set it twice, -b + --branch-filter for instance 805 */ 806 if (*mode) 807 return -1; 808 809 /* str may be NULL in case no arg is passed to -b */ 810 if (str) { 811 /* because str is read-only */ 812 s = os = strdup(str); 813 if (!s) 814 return -1; 815 816 for (;;) { 817 p = strchr(s, ','); 818 if (p) 819 *p = '\0'; 820 821 for (br = branch_modes; br->name; br++) { 822 if (!strcasecmp(s, br->name)) 823 break; 824 } 825 if (!br->name) { 826 ui__warning("unknown branch filter %s," 827 " check man page\n", s); 828 goto error; 829 } 830 831 *mode |= br->mode; 832 833 if (!p) 834 break; 835 836 s = p + 1; 837 } 838 } 839 ret = 0; 840 841 /* default to any branch */ 842 if ((*mode & ~ONLY_PLM) == 0) { 843 *mode = PERF_SAMPLE_BRANCH_ANY; 844 } 845 error: 846 free(os); 847 return ret; 848 } 849 850 #ifdef LIBUNWIND_SUPPORT 851 static int get_stack_size(char *str, unsigned long *_size) 852 { 853 char *endptr; 854 unsigned long size; 855 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); 856 857 size = strtoul(str, &endptr, 0); 858 859 do { 860 if (*endptr) 861 break; 862 863 size = round_up(size, sizeof(u64)); 864 if (!size || size > max_size) 865 break; 866 867 *_size = size; 868 return 0; 869 870 } while (0); 871 872 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", 873 max_size, str); 874 return -1; 875 } 876 #endif /* LIBUNWIND_SUPPORT */ 877 878 static int 879 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, 880 int unset) 881 { 882 struct perf_record *rec = (struct perf_record *)opt->value; 883 char *tok, *name, *saveptr = NULL; 884 char *buf; 885 int ret = -1; 886 887 /* --no-call-graph */ 888 if (unset) 889 return 0; 890 891 /* We specified default option if none is provided. */ 892 BUG_ON(!arg); 893 894 /* We need buffer that we know we can write to. */ 895 buf = malloc(strlen(arg) + 1); 896 if (!buf) 897 return -ENOMEM; 898 899 strcpy(buf, arg); 900 901 tok = strtok_r((char *)buf, ",", &saveptr); 902 name = tok ? : (char *)buf; 903 904 do { 905 /* Framepointer style */ 906 if (!strncmp(name, "fp", sizeof("fp"))) { 907 if (!strtok_r(NULL, ",", &saveptr)) { 908 rec->opts.call_graph = CALLCHAIN_FP; 909 ret = 0; 910 } else 911 pr_err("callchain: No more arguments " 912 "needed for -g fp\n"); 913 break; 914 915 #ifdef LIBUNWIND_SUPPORT 916 /* Dwarf style */ 917 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { 918 const unsigned long default_stack_dump_size = 8192; 919 920 ret = 0; 921 rec->opts.call_graph = CALLCHAIN_DWARF; 922 rec->opts.stack_dump_size = default_stack_dump_size; 923 924 tok = strtok_r(NULL, ",", &saveptr); 925 if (tok) { 926 unsigned long size = 0; 927 928 ret = get_stack_size(tok, &size); 929 rec->opts.stack_dump_size = size; 930 } 931 932 if (!ret) 933 pr_debug("callchain: stack dump size %d\n", 934 rec->opts.stack_dump_size); 935 #endif /* LIBUNWIND_SUPPORT */ 936 } else { 937 pr_err("callchain: Unknown -g option " 938 "value: %s\n", arg); 939 break; 940 } 941 942 } while (0); 943 944 free(buf); 945 946 if (!ret) 947 pr_debug("callchain: type %d\n", rec->opts.call_graph); 948 949 return ret; 950 } 951 952 static const char * const record_usage[] = { 953 "perf record [<options>] [<command>]", 954 "perf record [<options>] -- <command> [<options>]", 955 NULL 956 }; 957 958 /* 959 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new 960 * because we need to have access to it in perf_record__exit, that is called 961 * after cmd_record() exits, but since record_options need to be accessible to 962 * builtin-script, leave it here. 963 * 964 * At least we don't ouch it in all the other functions here directly. 965 * 966 * Just say no to tons of global variables, sigh. 967 */ 968 static struct perf_record record = { 969 .opts = { 970 .mmap_pages = UINT_MAX, 971 .user_freq = UINT_MAX, 972 .user_interval = ULLONG_MAX, 973 .freq = 4000, 974 .target = { 975 .uses_mmap = true, 976 }, 977 }, 978 .write_mode = WRITE_FORCE, 979 .file_new = true, 980 }; 981 982 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " 983 984 #ifdef LIBUNWIND_SUPPORT 985 static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; 986 #else 987 static const char callchain_help[] = CALLCHAIN_HELP "[fp]"; 988 #endif 989 990 /* 991 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 992 * with it and switch to use the library functions in perf_evlist that came 993 * from builtin-record.c, i.e. use perf_record_opts, 994 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 995 * using pipes, etc. 996 */ 997 const struct option record_options[] = { 998 OPT_CALLBACK('e', "event", &record.evlist, "event", 999 "event selector. use 'perf list' to list available events", 1000 parse_events_option), 1001 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1002 "event filter", parse_filter), 1003 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1004 "record events on existing process id"), 1005 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1006 "record events on existing thread id"), 1007 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1008 "collect data with this RT SCHED_FIFO priority"), 1009 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay, 1010 "collect data without buffering"), 1011 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1012 "collect raw sample records from all opened counters"), 1013 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1014 "system-wide collection from all CPUs"), 1015 OPT_BOOLEAN('A', "append", &record.append_file, 1016 "append to the output file to do incremental profiling"), 1017 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1018 "list of cpus to monitor"), 1019 OPT_BOOLEAN('f', "force", &record.force, 1020 "overwrite existing data file (deprecated)"), 1021 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1022 OPT_STRING('o', "output", &record.output_name, "file", 1023 "output file name"), 1024 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, 1025 "child tasks do not inherit counters"), 1026 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1027 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages, 1028 "number of mmap data pages"), 1029 OPT_BOOLEAN(0, "group", &record.opts.group, 1030 "put the counters into a counter group"), 1031 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", 1032 callchain_help, &parse_callchain_opt, 1033 "fp"), 1034 OPT_INCR('v', "verbose", &verbose, 1035 "be more verbose (show counter open errors, etc)"), 1036 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1037 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1038 "per thread counts"), 1039 OPT_BOOLEAN('d', "data", &record.opts.sample_address, 1040 "Sample addresses"), 1041 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), 1042 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"), 1043 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1044 "don't sample"), 1045 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, 1046 "do not update the buildid cache"), 1047 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, 1048 "do not collect buildids in perf.data"), 1049 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1050 "monitor event in cgroup name only", 1051 parse_cgroups), 1052 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1053 "user to profile"), 1054 1055 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1056 "branch any", "sample any taken branches", 1057 parse_branch_stack), 1058 1059 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1060 "branch filter mask", "branch stack filter modes", 1061 parse_branch_stack), 1062 OPT_END() 1063 }; 1064 1065 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) 1066 { 1067 int err = -ENOMEM; 1068 struct perf_evsel *pos; 1069 struct perf_evlist *evsel_list; 1070 struct perf_record *rec = &record; 1071 char errbuf[BUFSIZ]; 1072 1073 evsel_list = perf_evlist__new(NULL, NULL); 1074 if (evsel_list == NULL) 1075 return -ENOMEM; 1076 1077 rec->evlist = evsel_list; 1078 1079 argc = parse_options(argc, argv, record_options, record_usage, 1080 PARSE_OPT_STOP_AT_NON_OPTION); 1081 if (!argc && perf_target__none(&rec->opts.target)) 1082 usage_with_options(record_usage, record_options); 1083 1084 if (rec->force && rec->append_file) { 1085 ui__error("Can't overwrite and append at the same time." 1086 " You need to choose between -f and -A"); 1087 usage_with_options(record_usage, record_options); 1088 } else if (rec->append_file) { 1089 rec->write_mode = WRITE_APPEND; 1090 } else { 1091 rec->write_mode = WRITE_FORCE; 1092 } 1093 1094 if (nr_cgroups && !rec->opts.target.system_wide) { 1095 ui__error("cgroup monitoring only available in" 1096 " system-wide mode\n"); 1097 usage_with_options(record_usage, record_options); 1098 } 1099 1100 symbol__init(); 1101 1102 if (symbol_conf.kptr_restrict) 1103 pr_warning( 1104 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1105 "check /proc/sys/kernel/kptr_restrict.\n\n" 1106 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1107 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1108 "Samples in kernel modules won't be resolved at all.\n\n" 1109 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1110 "even with a suitable vmlinux or kallsyms file.\n\n"); 1111 1112 if (rec->no_buildid_cache || rec->no_buildid) 1113 disable_buildid_cache(); 1114 1115 if (evsel_list->nr_entries == 0 && 1116 perf_evlist__add_default(evsel_list) < 0) { 1117 pr_err("Not enough memory for event selector list\n"); 1118 goto out_symbol_exit; 1119 } 1120 1121 err = perf_target__validate(&rec->opts.target); 1122 if (err) { 1123 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1124 ui__warning("%s", errbuf); 1125 } 1126 1127 err = perf_target__parse_uid(&rec->opts.target); 1128 if (err) { 1129 int saved_errno = errno; 1130 1131 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1132 ui__error("%s", errbuf); 1133 1134 err = -saved_errno; 1135 goto out_free_fd; 1136 } 1137 1138 err = -ENOMEM; 1139 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0) 1140 usage_with_options(record_usage, record_options); 1141 1142 list_for_each_entry(pos, &evsel_list->entries, node) { 1143 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos))) 1144 goto out_free_fd; 1145 } 1146 1147 if (rec->opts.user_interval != ULLONG_MAX) 1148 rec->opts.default_interval = rec->opts.user_interval; 1149 if (rec->opts.user_freq != UINT_MAX) 1150 rec->opts.freq = rec->opts.user_freq; 1151 1152 /* 1153 * User specified count overrides default frequency. 1154 */ 1155 if (rec->opts.default_interval) 1156 rec->opts.freq = 0; 1157 else if (rec->opts.freq) { 1158 rec->opts.default_interval = rec->opts.freq; 1159 } else { 1160 ui__error("frequency and count are zero, aborting\n"); 1161 err = -EINVAL; 1162 goto out_free_fd; 1163 } 1164 1165 err = __cmd_record(&record, argc, argv); 1166 out_free_fd: 1167 perf_evlist__delete_maps(evsel_list); 1168 out_symbol_exit: 1169 symbol__exit(); 1170 return err; 1171 } 1172