1 /* 2 * builtin-record.c 3 * 4 * Builtin record command: Record the profile of a workload 5 * (or a CPU, or a PID) into the perf.data output file - for 6 * later analysis via perf report. 7 */ 8 #include "builtin.h" 9 10 #include "perf.h" 11 12 #include "util/build-id.h" 13 #include "util/util.h" 14 #include <subcmd/parse-options.h> 15 #include "util/parse-events.h" 16 #include "util/config.h" 17 18 #include "util/callchain.h" 19 #include "util/cgroup.h" 20 #include "util/header.h" 21 #include "util/event.h" 22 #include "util/evlist.h" 23 #include "util/evsel.h" 24 #include "util/debug.h" 25 #include "util/drv_configs.h" 26 #include "util/session.h" 27 #include "util/tool.h" 28 #include "util/symbol.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/llvm-utils.h" 38 #include "util/bpf-loader.h" 39 #include "util/trigger.h" 40 #include "util/perf-hooks.h" 41 #include "util/time-utils.h" 42 #include "util/units.h" 43 #include "asm/bug.h" 44 45 #include <errno.h> 46 #include <inttypes.h> 47 #include <poll.h> 48 #include <unistd.h> 49 #include <sched.h> 50 #include <signal.h> 51 #include <sys/mman.h> 52 #include <sys/wait.h> 53 #include <asm/bug.h> 54 #include <linux/time64.h> 55 56 struct switch_output { 57 bool enabled; 58 bool signal; 59 unsigned long size; 60 unsigned long time; 61 const char *str; 62 bool set; 63 }; 64 65 struct record { 66 struct perf_tool tool; 67 struct record_opts opts; 68 u64 bytes_written; 69 struct perf_data_file file; 70 struct auxtrace_record *itr; 71 struct perf_evlist *evlist; 72 struct perf_session *session; 73 const char *progname; 74 int realtime_prio; 75 bool no_buildid; 76 bool no_buildid_set; 77 bool no_buildid_cache; 78 bool no_buildid_cache_set; 79 bool buildid_all; 80 bool timestamp_filename; 81 struct switch_output switch_output; 82 unsigned long long samples; 83 }; 84 85 static volatile int auxtrace_record__snapshot_started; 86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 87 static DEFINE_TRIGGER(switch_output_trigger); 88 89 static bool switch_output_signal(struct record *rec) 90 { 91 return rec->switch_output.signal && 92 trigger_is_ready(&switch_output_trigger); 93 } 94 95 static bool switch_output_size(struct record *rec) 96 { 97 return rec->switch_output.size && 98 trigger_is_ready(&switch_output_trigger) && 99 (rec->bytes_written >= rec->switch_output.size); 100 } 101 102 static bool switch_output_time(struct record *rec) 103 { 104 return rec->switch_output.time && 105 trigger_is_ready(&switch_output_trigger); 106 } 107 108 static int record__write(struct record *rec, void *bf, size_t size) 109 { 110 if (perf_data_file__write(rec->session->file, bf, size) < 0) { 111 pr_err("failed to write perf data, error: %m\n"); 112 return -1; 113 } 114 115 rec->bytes_written += size; 116 117 if (switch_output_size(rec)) 118 trigger_hit(&switch_output_trigger); 119 120 return 0; 121 } 122 123 static int process_synthesized_event(struct perf_tool *tool, 124 union perf_event *event, 125 struct perf_sample *sample __maybe_unused, 126 struct machine *machine __maybe_unused) 127 { 128 struct record *rec = container_of(tool, struct record, tool); 129 return record__write(rec, event, event->header.size); 130 } 131 132 static int 133 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) 134 { 135 struct perf_event_header *pheader; 136 u64 evt_head = head; 137 int size = mask + 1; 138 139 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); 140 pheader = (struct perf_event_header *)(buf + (head & mask)); 141 *start = head; 142 while (true) { 143 if (evt_head - head >= (unsigned int)size) { 144 pr_debug("Finished reading backward ring buffer: rewind\n"); 145 if (evt_head - head > (unsigned int)size) 146 evt_head -= pheader->size; 147 *end = evt_head; 148 return 0; 149 } 150 151 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); 152 153 if (pheader->size == 0) { 154 pr_debug("Finished reading backward ring buffer: get start\n"); 155 *end = evt_head; 156 return 0; 157 } 158 159 evt_head += pheader->size; 160 pr_debug3("move evt_head: %"PRIx64"\n", evt_head); 161 } 162 WARN_ONCE(1, "Shouldn't get here\n"); 163 return -1; 164 } 165 166 static int 167 rb_find_range(void *data, int mask, u64 head, u64 old, 168 u64 *start, u64 *end, bool backward) 169 { 170 if (!backward) { 171 *start = old; 172 *end = head; 173 return 0; 174 } 175 176 return backward_rb_find_range(data, mask, head, start, end); 177 } 178 179 static int 180 record__mmap_read(struct record *rec, struct perf_mmap *md, 181 bool overwrite, bool backward) 182 { 183 u64 head = perf_mmap__read_head(md); 184 u64 old = md->prev; 185 u64 end = head, start = old; 186 unsigned char *data = md->base + page_size; 187 unsigned long size; 188 void *buf; 189 int rc = 0; 190 191 if (rb_find_range(data, md->mask, head, 192 old, &start, &end, backward)) 193 return -1; 194 195 if (start == end) 196 return 0; 197 198 rec->samples++; 199 200 size = end - start; 201 if (size > (unsigned long)(md->mask) + 1) { 202 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 203 204 md->prev = head; 205 perf_mmap__consume(md, overwrite || backward); 206 return 0; 207 } 208 209 if ((start & md->mask) + size != (end & md->mask)) { 210 buf = &data[start & md->mask]; 211 size = md->mask + 1 - (start & md->mask); 212 start += size; 213 214 if (record__write(rec, buf, size) < 0) { 215 rc = -1; 216 goto out; 217 } 218 } 219 220 buf = &data[start & md->mask]; 221 size = end - start; 222 start += size; 223 224 if (record__write(rec, buf, size) < 0) { 225 rc = -1; 226 goto out; 227 } 228 229 md->prev = head; 230 perf_mmap__consume(md, overwrite || backward); 231 out: 232 return rc; 233 } 234 235 static volatile int done; 236 static volatile int signr = -1; 237 static volatile int child_finished; 238 239 static void sig_handler(int sig) 240 { 241 if (sig == SIGCHLD) 242 child_finished = 1; 243 else 244 signr = sig; 245 246 done = 1; 247 } 248 249 static void sigsegv_handler(int sig) 250 { 251 perf_hooks__recover(); 252 sighandler_dump_stack(sig); 253 } 254 255 static void record__sig_exit(void) 256 { 257 if (signr == -1) 258 return; 259 260 signal(signr, SIG_DFL); 261 raise(signr); 262 } 263 264 #ifdef HAVE_AUXTRACE_SUPPORT 265 266 static int record__process_auxtrace(struct perf_tool *tool, 267 union perf_event *event, void *data1, 268 size_t len1, void *data2, size_t len2) 269 { 270 struct record *rec = container_of(tool, struct record, tool); 271 struct perf_data_file *file = &rec->file; 272 size_t padding; 273 u8 pad[8] = {0}; 274 275 if (!perf_data_file__is_pipe(file)) { 276 off_t file_offset; 277 int fd = perf_data_file__fd(file); 278 int err; 279 280 file_offset = lseek(fd, 0, SEEK_CUR); 281 if (file_offset == -1) 282 return -1; 283 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 284 event, file_offset); 285 if (err) 286 return err; 287 } 288 289 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 290 padding = (len1 + len2) & 7; 291 if (padding) 292 padding = 8 - padding; 293 294 record__write(rec, event, event->header.size); 295 record__write(rec, data1, len1); 296 if (len2) 297 record__write(rec, data2, len2); 298 record__write(rec, &pad, padding); 299 300 return 0; 301 } 302 303 static int record__auxtrace_mmap_read(struct record *rec, 304 struct auxtrace_mmap *mm) 305 { 306 int ret; 307 308 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 309 record__process_auxtrace); 310 if (ret < 0) 311 return ret; 312 313 if (ret) 314 rec->samples++; 315 316 return 0; 317 } 318 319 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 320 struct auxtrace_mmap *mm) 321 { 322 int ret; 323 324 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 325 record__process_auxtrace, 326 rec->opts.auxtrace_snapshot_size); 327 if (ret < 0) 328 return ret; 329 330 if (ret) 331 rec->samples++; 332 333 return 0; 334 } 335 336 static int record__auxtrace_read_snapshot_all(struct record *rec) 337 { 338 int i; 339 int rc = 0; 340 341 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 342 struct auxtrace_mmap *mm = 343 &rec->evlist->mmap[i].auxtrace_mmap; 344 345 if (!mm->base) 346 continue; 347 348 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 349 rc = -1; 350 goto out; 351 } 352 } 353 out: 354 return rc; 355 } 356 357 static void record__read_auxtrace_snapshot(struct record *rec) 358 { 359 pr_debug("Recording AUX area tracing snapshot\n"); 360 if (record__auxtrace_read_snapshot_all(rec) < 0) { 361 trigger_error(&auxtrace_snapshot_trigger); 362 } else { 363 if (auxtrace_record__snapshot_finish(rec->itr)) 364 trigger_error(&auxtrace_snapshot_trigger); 365 else 366 trigger_ready(&auxtrace_snapshot_trigger); 367 } 368 } 369 370 #else 371 372 static inline 373 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 374 struct auxtrace_mmap *mm __maybe_unused) 375 { 376 return 0; 377 } 378 379 static inline 380 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 381 { 382 } 383 384 static inline 385 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 386 { 387 return 0; 388 } 389 390 #endif 391 392 static int record__mmap_evlist(struct record *rec, 393 struct perf_evlist *evlist) 394 { 395 struct record_opts *opts = &rec->opts; 396 char msg[512]; 397 398 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, 399 opts->auxtrace_mmap_pages, 400 opts->auxtrace_snapshot_mode) < 0) { 401 if (errno == EPERM) { 402 pr_err("Permission error mapping pages.\n" 403 "Consider increasing " 404 "/proc/sys/kernel/perf_event_mlock_kb,\n" 405 "or try again with a smaller value of -m/--mmap_pages.\n" 406 "(current value: %u,%u)\n", 407 opts->mmap_pages, opts->auxtrace_mmap_pages); 408 return -errno; 409 } else { 410 pr_err("failed to mmap with %d (%s)\n", errno, 411 str_error_r(errno, msg, sizeof(msg))); 412 if (errno) 413 return -errno; 414 else 415 return -EINVAL; 416 } 417 } 418 return 0; 419 } 420 421 static int record__mmap(struct record *rec) 422 { 423 return record__mmap_evlist(rec, rec->evlist); 424 } 425 426 static int record__open(struct record *rec) 427 { 428 char msg[BUFSIZ]; 429 struct perf_evsel *pos; 430 struct perf_evlist *evlist = rec->evlist; 431 struct perf_session *session = rec->session; 432 struct record_opts *opts = &rec->opts; 433 struct perf_evsel_config_term *err_term; 434 int rc = 0; 435 436 perf_evlist__config(evlist, opts, &callchain_param); 437 438 evlist__for_each_entry(evlist, pos) { 439 try_again: 440 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 441 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 442 if (verbose > 0) 443 ui__warning("%s\n", msg); 444 goto try_again; 445 } 446 447 rc = -errno; 448 perf_evsel__open_strerror(pos, &opts->target, 449 errno, msg, sizeof(msg)); 450 ui__error("%s\n", msg); 451 goto out; 452 } 453 } 454 455 if (perf_evlist__apply_filters(evlist, &pos)) { 456 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 457 pos->filter, perf_evsel__name(pos), errno, 458 str_error_r(errno, msg, sizeof(msg))); 459 rc = -1; 460 goto out; 461 } 462 463 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 464 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 465 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 466 str_error_r(errno, msg, sizeof(msg))); 467 rc = -1; 468 goto out; 469 } 470 471 rc = record__mmap(rec); 472 if (rc) 473 goto out; 474 475 session->evlist = evlist; 476 perf_session__set_id_hdr_size(session); 477 out: 478 return rc; 479 } 480 481 static int process_sample_event(struct perf_tool *tool, 482 union perf_event *event, 483 struct perf_sample *sample, 484 struct perf_evsel *evsel, 485 struct machine *machine) 486 { 487 struct record *rec = container_of(tool, struct record, tool); 488 489 rec->samples++; 490 491 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 492 } 493 494 static int process_buildids(struct record *rec) 495 { 496 struct perf_data_file *file = &rec->file; 497 struct perf_session *session = rec->session; 498 499 if (file->size == 0) 500 return 0; 501 502 /* 503 * During this process, it'll load kernel map and replace the 504 * dso->long_name to a real pathname it found. In this case 505 * we prefer the vmlinux path like 506 * /lib/modules/3.16.4/build/vmlinux 507 * 508 * rather than build-id path (in debug directory). 509 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 510 */ 511 symbol_conf.ignore_vmlinux_buildid = true; 512 513 /* 514 * If --buildid-all is given, it marks all DSO regardless of hits, 515 * so no need to process samples. 516 */ 517 if (rec->buildid_all) 518 rec->tool.sample = NULL; 519 520 return perf_session__process_events(session); 521 } 522 523 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 524 { 525 int err; 526 struct perf_tool *tool = data; 527 /* 528 *As for guest kernel when processing subcommand record&report, 529 *we arrange module mmap prior to guest kernel mmap and trigger 530 *a preload dso because default guest module symbols are loaded 531 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 532 *method is used to avoid symbol missing when the first addr is 533 *in module instead of in guest kernel. 534 */ 535 err = perf_event__synthesize_modules(tool, process_synthesized_event, 536 machine); 537 if (err < 0) 538 pr_err("Couldn't record guest kernel [%d]'s reference" 539 " relocation symbol.\n", machine->pid); 540 541 /* 542 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 543 * have no _text sometimes. 544 */ 545 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 546 machine); 547 if (err < 0) 548 pr_err("Couldn't record guest kernel [%d]'s reference" 549 " relocation symbol.\n", machine->pid); 550 } 551 552 static struct perf_event_header finished_round_event = { 553 .size = sizeof(struct perf_event_header), 554 .type = PERF_RECORD_FINISHED_ROUND, 555 }; 556 557 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 558 bool backward) 559 { 560 u64 bytes_written = rec->bytes_written; 561 int i; 562 int rc = 0; 563 struct perf_mmap *maps; 564 565 if (!evlist) 566 return 0; 567 568 maps = backward ? evlist->backward_mmap : evlist->mmap; 569 if (!maps) 570 return 0; 571 572 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 573 return 0; 574 575 for (i = 0; i < evlist->nr_mmaps; i++) { 576 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 577 578 if (maps[i].base) { 579 if (record__mmap_read(rec, &maps[i], 580 evlist->overwrite, backward) != 0) { 581 rc = -1; 582 goto out; 583 } 584 } 585 586 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 587 record__auxtrace_mmap_read(rec, mm) != 0) { 588 rc = -1; 589 goto out; 590 } 591 } 592 593 /* 594 * Mark the round finished in case we wrote 595 * at least one event. 596 */ 597 if (bytes_written != rec->bytes_written) 598 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 599 600 if (backward) 601 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 602 out: 603 return rc; 604 } 605 606 static int record__mmap_read_all(struct record *rec) 607 { 608 int err; 609 610 err = record__mmap_read_evlist(rec, rec->evlist, false); 611 if (err) 612 return err; 613 614 return record__mmap_read_evlist(rec, rec->evlist, true); 615 } 616 617 static void record__init_features(struct record *rec) 618 { 619 struct perf_session *session = rec->session; 620 int feat; 621 622 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 623 perf_header__set_feat(&session->header, feat); 624 625 if (rec->no_buildid) 626 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 627 628 if (!have_tracepoints(&rec->evlist->entries)) 629 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 630 631 if (!rec->opts.branch_stack) 632 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 633 634 if (!rec->opts.full_auxtrace) 635 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 636 637 perf_header__clear_feat(&session->header, HEADER_STAT); 638 } 639 640 static void 641 record__finish_output(struct record *rec) 642 { 643 struct perf_data_file *file = &rec->file; 644 int fd = perf_data_file__fd(file); 645 646 if (file->is_pipe) 647 return; 648 649 rec->session->header.data_size += rec->bytes_written; 650 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 651 652 if (!rec->no_buildid) { 653 process_buildids(rec); 654 655 if (rec->buildid_all) 656 dsos__hit_all(rec->session); 657 } 658 perf_session__write_header(rec->session, rec->evlist, fd, true); 659 660 return; 661 } 662 663 static int record__synthesize_workload(struct record *rec, bool tail) 664 { 665 int err; 666 struct thread_map *thread_map; 667 668 if (rec->opts.tail_synthesize != tail) 669 return 0; 670 671 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 672 if (thread_map == NULL) 673 return -1; 674 675 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 676 process_synthesized_event, 677 &rec->session->machines.host, 678 rec->opts.sample_address, 679 rec->opts.proc_map_timeout); 680 thread_map__put(thread_map); 681 return err; 682 } 683 684 static int record__synthesize(struct record *rec, bool tail); 685 686 static int 687 record__switch_output(struct record *rec, bool at_exit) 688 { 689 struct perf_data_file *file = &rec->file; 690 int fd, err; 691 692 /* Same Size: "2015122520103046"*/ 693 char timestamp[] = "InvalidTimestamp"; 694 695 record__synthesize(rec, true); 696 if (target__none(&rec->opts.target)) 697 record__synthesize_workload(rec, true); 698 699 rec->samples = 0; 700 record__finish_output(rec); 701 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 702 if (err) { 703 pr_err("Failed to get current timestamp\n"); 704 return -EINVAL; 705 } 706 707 fd = perf_data_file__switch(file, timestamp, 708 rec->session->header.data_offset, 709 at_exit); 710 if (fd >= 0 && !at_exit) { 711 rec->bytes_written = 0; 712 rec->session->header.data_size = 0; 713 } 714 715 if (!quiet) 716 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 717 file->path, timestamp); 718 719 /* Output tracking events */ 720 if (!at_exit) { 721 record__synthesize(rec, false); 722 723 /* 724 * In 'perf record --switch-output' without -a, 725 * record__synthesize() in record__switch_output() won't 726 * generate tracking events because there's no thread_map 727 * in evlist. Which causes newly created perf.data doesn't 728 * contain map and comm information. 729 * Create a fake thread_map and directly call 730 * perf_event__synthesize_thread_map() for those events. 731 */ 732 if (target__none(&rec->opts.target)) 733 record__synthesize_workload(rec, false); 734 } 735 return fd; 736 } 737 738 static volatile int workload_exec_errno; 739 740 /* 741 * perf_evlist__prepare_workload will send a SIGUSR1 742 * if the fork fails, since we asked by setting its 743 * want_signal to true. 744 */ 745 static void workload_exec_failed_signal(int signo __maybe_unused, 746 siginfo_t *info, 747 void *ucontext __maybe_unused) 748 { 749 workload_exec_errno = info->si_value.sival_int; 750 done = 1; 751 child_finished = 1; 752 } 753 754 static void snapshot_sig_handler(int sig); 755 static void alarm_sig_handler(int sig); 756 757 int __weak 758 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 759 struct perf_tool *tool __maybe_unused, 760 perf_event__handler_t process __maybe_unused, 761 struct machine *machine __maybe_unused) 762 { 763 return 0; 764 } 765 766 static const struct perf_event_mmap_page * 767 perf_evlist__pick_pc(struct perf_evlist *evlist) 768 { 769 if (evlist) { 770 if (evlist->mmap && evlist->mmap[0].base) 771 return evlist->mmap[0].base; 772 if (evlist->backward_mmap && evlist->backward_mmap[0].base) 773 return evlist->backward_mmap[0].base; 774 } 775 return NULL; 776 } 777 778 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 779 { 780 const struct perf_event_mmap_page *pc; 781 782 pc = perf_evlist__pick_pc(rec->evlist); 783 if (pc) 784 return pc; 785 return NULL; 786 } 787 788 static int record__synthesize(struct record *rec, bool tail) 789 { 790 struct perf_session *session = rec->session; 791 struct machine *machine = &session->machines.host; 792 struct perf_data_file *file = &rec->file; 793 struct record_opts *opts = &rec->opts; 794 struct perf_tool *tool = &rec->tool; 795 int fd = perf_data_file__fd(file); 796 int err = 0; 797 798 if (rec->opts.tail_synthesize != tail) 799 return 0; 800 801 if (file->is_pipe) { 802 err = perf_event__synthesize_attrs(tool, session, 803 process_synthesized_event); 804 if (err < 0) { 805 pr_err("Couldn't synthesize attrs.\n"); 806 goto out; 807 } 808 809 if (have_tracepoints(&rec->evlist->entries)) { 810 /* 811 * FIXME err <= 0 here actually means that 812 * there were no tracepoints so its not really 813 * an error, just that we don't need to 814 * synthesize anything. We really have to 815 * return this more properly and also 816 * propagate errors that now are calling die() 817 */ 818 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 819 process_synthesized_event); 820 if (err <= 0) { 821 pr_err("Couldn't record tracing data.\n"); 822 goto out; 823 } 824 rec->bytes_written += err; 825 } 826 } 827 828 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 829 process_synthesized_event, machine); 830 if (err) 831 goto out; 832 833 if (rec->opts.full_auxtrace) { 834 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 835 session, process_synthesized_event); 836 if (err) 837 goto out; 838 } 839 840 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 841 machine); 842 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 843 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 844 "Check /proc/kallsyms permission or run as root.\n"); 845 846 err = perf_event__synthesize_modules(tool, process_synthesized_event, 847 machine); 848 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 849 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 850 "Check /proc/modules permission or run as root.\n"); 851 852 if (perf_guest) { 853 machines__process_guests(&session->machines, 854 perf_event__synthesize_guest_os, tool); 855 } 856 857 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 858 process_synthesized_event, opts->sample_address, 859 opts->proc_map_timeout); 860 out: 861 return err; 862 } 863 864 static int __cmd_record(struct record *rec, int argc, const char **argv) 865 { 866 int err; 867 int status = 0; 868 unsigned long waking = 0; 869 const bool forks = argc > 0; 870 struct machine *machine; 871 struct perf_tool *tool = &rec->tool; 872 struct record_opts *opts = &rec->opts; 873 struct perf_data_file *file = &rec->file; 874 struct perf_session *session; 875 bool disabled = false, draining = false; 876 int fd; 877 878 rec->progname = argv[0]; 879 880 atexit(record__sig_exit); 881 signal(SIGCHLD, sig_handler); 882 signal(SIGINT, sig_handler); 883 signal(SIGTERM, sig_handler); 884 signal(SIGSEGV, sigsegv_handler); 885 886 if (rec->opts.record_namespaces) 887 tool->namespace_events = true; 888 889 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 890 signal(SIGUSR2, snapshot_sig_handler); 891 if (rec->opts.auxtrace_snapshot_mode) 892 trigger_on(&auxtrace_snapshot_trigger); 893 if (rec->switch_output.enabled) 894 trigger_on(&switch_output_trigger); 895 } else { 896 signal(SIGUSR2, SIG_IGN); 897 } 898 899 session = perf_session__new(file, false, tool); 900 if (session == NULL) { 901 pr_err("Perf session creation failed.\n"); 902 return -1; 903 } 904 905 fd = perf_data_file__fd(file); 906 rec->session = session; 907 908 record__init_features(rec); 909 910 if (forks) { 911 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 912 argv, file->is_pipe, 913 workload_exec_failed_signal); 914 if (err < 0) { 915 pr_err("Couldn't run the workload!\n"); 916 status = err; 917 goto out_delete_session; 918 } 919 } 920 921 if (record__open(rec) != 0) { 922 err = -1; 923 goto out_child; 924 } 925 926 err = bpf__apply_obj_config(); 927 if (err) { 928 char errbuf[BUFSIZ]; 929 930 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 931 pr_err("ERROR: Apply config to BPF failed: %s\n", 932 errbuf); 933 goto out_child; 934 } 935 936 /* 937 * Normally perf_session__new would do this, but it doesn't have the 938 * evlist. 939 */ 940 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 941 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 942 rec->tool.ordered_events = false; 943 } 944 945 if (!rec->evlist->nr_groups) 946 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 947 948 if (file->is_pipe) { 949 err = perf_header__write_pipe(fd); 950 if (err < 0) 951 goto out_child; 952 } else { 953 err = perf_session__write_header(session, rec->evlist, fd, false); 954 if (err < 0) 955 goto out_child; 956 } 957 958 if (!rec->no_buildid 959 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 960 pr_err("Couldn't generate buildids. " 961 "Use --no-buildid to profile anyway.\n"); 962 err = -1; 963 goto out_child; 964 } 965 966 machine = &session->machines.host; 967 968 err = record__synthesize(rec, false); 969 if (err < 0) 970 goto out_child; 971 972 if (rec->realtime_prio) { 973 struct sched_param param; 974 975 param.sched_priority = rec->realtime_prio; 976 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 977 pr_err("Could not set realtime priority.\n"); 978 err = -1; 979 goto out_child; 980 } 981 } 982 983 /* 984 * When perf is starting the traced process, all the events 985 * (apart from group members) have enable_on_exec=1 set, 986 * so don't spoil it by prematurely enabling them. 987 */ 988 if (!target__none(&opts->target) && !opts->initial_delay) 989 perf_evlist__enable(rec->evlist); 990 991 /* 992 * Let the child rip 993 */ 994 if (forks) { 995 union perf_event *event; 996 pid_t tgid; 997 998 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 999 if (event == NULL) { 1000 err = -ENOMEM; 1001 goto out_child; 1002 } 1003 1004 /* 1005 * Some H/W events are generated before COMM event 1006 * which is emitted during exec(), so perf script 1007 * cannot see a correct process name for those events. 1008 * Synthesize COMM event to prevent it. 1009 */ 1010 tgid = perf_event__synthesize_comm(tool, event, 1011 rec->evlist->workload.pid, 1012 process_synthesized_event, 1013 machine); 1014 free(event); 1015 1016 if (tgid == -1) 1017 goto out_child; 1018 1019 event = malloc(sizeof(event->namespaces) + 1020 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1021 machine->id_hdr_size); 1022 if (event == NULL) { 1023 err = -ENOMEM; 1024 goto out_child; 1025 } 1026 1027 /* 1028 * Synthesize NAMESPACES event for the command specified. 1029 */ 1030 perf_event__synthesize_namespaces(tool, event, 1031 rec->evlist->workload.pid, 1032 tgid, process_synthesized_event, 1033 machine); 1034 free(event); 1035 1036 perf_evlist__start_workload(rec->evlist); 1037 } 1038 1039 if (opts->initial_delay) { 1040 usleep(opts->initial_delay * USEC_PER_MSEC); 1041 perf_evlist__enable(rec->evlist); 1042 } 1043 1044 trigger_ready(&auxtrace_snapshot_trigger); 1045 trigger_ready(&switch_output_trigger); 1046 perf_hooks__invoke_record_start(); 1047 for (;;) { 1048 unsigned long long hits = rec->samples; 1049 1050 /* 1051 * rec->evlist->bkw_mmap_state is possible to be 1052 * BKW_MMAP_EMPTY here: when done == true and 1053 * hits != rec->samples in previous round. 1054 * 1055 * perf_evlist__toggle_bkw_mmap ensure we never 1056 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1057 */ 1058 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1059 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1060 1061 if (record__mmap_read_all(rec) < 0) { 1062 trigger_error(&auxtrace_snapshot_trigger); 1063 trigger_error(&switch_output_trigger); 1064 err = -1; 1065 goto out_child; 1066 } 1067 1068 if (auxtrace_record__snapshot_started) { 1069 auxtrace_record__snapshot_started = 0; 1070 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1071 record__read_auxtrace_snapshot(rec); 1072 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1073 pr_err("AUX area tracing snapshot failed\n"); 1074 err = -1; 1075 goto out_child; 1076 } 1077 } 1078 1079 if (trigger_is_hit(&switch_output_trigger)) { 1080 /* 1081 * If switch_output_trigger is hit, the data in 1082 * overwritable ring buffer should have been collected, 1083 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1084 * 1085 * If SIGUSR2 raise after or during record__mmap_read_all(), 1086 * record__mmap_read_all() didn't collect data from 1087 * overwritable ring buffer. Read again. 1088 */ 1089 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1090 continue; 1091 trigger_ready(&switch_output_trigger); 1092 1093 /* 1094 * Reenable events in overwrite ring buffer after 1095 * record__mmap_read_all(): we should have collected 1096 * data from it. 1097 */ 1098 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1099 1100 if (!quiet) 1101 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1102 waking); 1103 waking = 0; 1104 fd = record__switch_output(rec, false); 1105 if (fd < 0) { 1106 pr_err("Failed to switch to new file\n"); 1107 trigger_error(&switch_output_trigger); 1108 err = fd; 1109 goto out_child; 1110 } 1111 1112 /* re-arm the alarm */ 1113 if (rec->switch_output.time) 1114 alarm(rec->switch_output.time); 1115 } 1116 1117 if (hits == rec->samples) { 1118 if (done || draining) 1119 break; 1120 err = perf_evlist__poll(rec->evlist, -1); 1121 /* 1122 * Propagate error, only if there's any. Ignore positive 1123 * number of returned events and interrupt error. 1124 */ 1125 if (err > 0 || (err < 0 && errno == EINTR)) 1126 err = 0; 1127 waking++; 1128 1129 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1130 draining = true; 1131 } 1132 1133 /* 1134 * When perf is starting the traced process, at the end events 1135 * die with the process and we wait for that. Thus no need to 1136 * disable events in this case. 1137 */ 1138 if (done && !disabled && !target__none(&opts->target)) { 1139 trigger_off(&auxtrace_snapshot_trigger); 1140 perf_evlist__disable(rec->evlist); 1141 disabled = true; 1142 } 1143 } 1144 trigger_off(&auxtrace_snapshot_trigger); 1145 trigger_off(&switch_output_trigger); 1146 1147 if (forks && workload_exec_errno) { 1148 char msg[STRERR_BUFSIZE]; 1149 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1150 pr_err("Workload failed: %s\n", emsg); 1151 err = -1; 1152 goto out_child; 1153 } 1154 1155 if (!quiet) 1156 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1157 1158 if (target__none(&rec->opts.target)) 1159 record__synthesize_workload(rec, true); 1160 1161 out_child: 1162 if (forks) { 1163 int exit_status; 1164 1165 if (!child_finished) 1166 kill(rec->evlist->workload.pid, SIGTERM); 1167 1168 wait(&exit_status); 1169 1170 if (err < 0) 1171 status = err; 1172 else if (WIFEXITED(exit_status)) 1173 status = WEXITSTATUS(exit_status); 1174 else if (WIFSIGNALED(exit_status)) 1175 signr = WTERMSIG(exit_status); 1176 } else 1177 status = err; 1178 1179 record__synthesize(rec, true); 1180 /* this will be recalculated during process_buildids() */ 1181 rec->samples = 0; 1182 1183 if (!err) { 1184 if (!rec->timestamp_filename) { 1185 record__finish_output(rec); 1186 } else { 1187 fd = record__switch_output(rec, true); 1188 if (fd < 0) { 1189 status = fd; 1190 goto out_delete_session; 1191 } 1192 } 1193 } 1194 1195 perf_hooks__invoke_record_end(); 1196 1197 if (!err && !quiet) { 1198 char samples[128]; 1199 const char *postfix = rec->timestamp_filename ? 1200 ".<timestamp>" : ""; 1201 1202 if (rec->samples && !rec->opts.full_auxtrace) 1203 scnprintf(samples, sizeof(samples), 1204 " (%" PRIu64 " samples)", rec->samples); 1205 else 1206 samples[0] = '\0'; 1207 1208 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1209 perf_data_file__size(file) / 1024.0 / 1024.0, 1210 file->path, postfix, samples); 1211 } 1212 1213 out_delete_session: 1214 perf_session__delete(session); 1215 return status; 1216 } 1217 1218 static void callchain_debug(struct callchain_param *callchain) 1219 { 1220 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1221 1222 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1223 1224 if (callchain->record_mode == CALLCHAIN_DWARF) 1225 pr_debug("callchain: stack dump size %d\n", 1226 callchain->dump_size); 1227 } 1228 1229 int record_opts__parse_callchain(struct record_opts *record, 1230 struct callchain_param *callchain, 1231 const char *arg, bool unset) 1232 { 1233 int ret; 1234 callchain->enabled = !unset; 1235 1236 /* --no-call-graph */ 1237 if (unset) { 1238 callchain->record_mode = CALLCHAIN_NONE; 1239 pr_debug("callchain: disabled\n"); 1240 return 0; 1241 } 1242 1243 ret = parse_callchain_record_opt(arg, callchain); 1244 if (!ret) { 1245 /* Enable data address sampling for DWARF unwind. */ 1246 if (callchain->record_mode == CALLCHAIN_DWARF) 1247 record->sample_address = true; 1248 callchain_debug(callchain); 1249 } 1250 1251 return ret; 1252 } 1253 1254 int record_parse_callchain_opt(const struct option *opt, 1255 const char *arg, 1256 int unset) 1257 { 1258 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1259 } 1260 1261 int record_callchain_opt(const struct option *opt, 1262 const char *arg __maybe_unused, 1263 int unset __maybe_unused) 1264 { 1265 struct callchain_param *callchain = opt->value; 1266 1267 callchain->enabled = true; 1268 1269 if (callchain->record_mode == CALLCHAIN_NONE) 1270 callchain->record_mode = CALLCHAIN_FP; 1271 1272 callchain_debug(callchain); 1273 return 0; 1274 } 1275 1276 static int perf_record_config(const char *var, const char *value, void *cb) 1277 { 1278 struct record *rec = cb; 1279 1280 if (!strcmp(var, "record.build-id")) { 1281 if (!strcmp(value, "cache")) 1282 rec->no_buildid_cache = false; 1283 else if (!strcmp(value, "no-cache")) 1284 rec->no_buildid_cache = true; 1285 else if (!strcmp(value, "skip")) 1286 rec->no_buildid = true; 1287 else 1288 return -1; 1289 return 0; 1290 } 1291 if (!strcmp(var, "record.call-graph")) 1292 var = "call-graph.record-mode"; /* fall-through */ 1293 1294 return perf_default_config(var, value, cb); 1295 } 1296 1297 struct clockid_map { 1298 const char *name; 1299 int clockid; 1300 }; 1301 1302 #define CLOCKID_MAP(n, c) \ 1303 { .name = n, .clockid = (c), } 1304 1305 #define CLOCKID_END { .name = NULL, } 1306 1307 1308 /* 1309 * Add the missing ones, we need to build on many distros... 1310 */ 1311 #ifndef CLOCK_MONOTONIC_RAW 1312 #define CLOCK_MONOTONIC_RAW 4 1313 #endif 1314 #ifndef CLOCK_BOOTTIME 1315 #define CLOCK_BOOTTIME 7 1316 #endif 1317 #ifndef CLOCK_TAI 1318 #define CLOCK_TAI 11 1319 #endif 1320 1321 static const struct clockid_map clockids[] = { 1322 /* available for all events, NMI safe */ 1323 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1324 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1325 1326 /* available for some events */ 1327 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1328 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1329 CLOCKID_MAP("tai", CLOCK_TAI), 1330 1331 /* available for the lazy */ 1332 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1333 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1334 CLOCKID_MAP("real", CLOCK_REALTIME), 1335 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1336 1337 CLOCKID_END, 1338 }; 1339 1340 static int parse_clockid(const struct option *opt, const char *str, int unset) 1341 { 1342 struct record_opts *opts = (struct record_opts *)opt->value; 1343 const struct clockid_map *cm; 1344 const char *ostr = str; 1345 1346 if (unset) { 1347 opts->use_clockid = 0; 1348 return 0; 1349 } 1350 1351 /* no arg passed */ 1352 if (!str) 1353 return 0; 1354 1355 /* no setting it twice */ 1356 if (opts->use_clockid) 1357 return -1; 1358 1359 opts->use_clockid = true; 1360 1361 /* if its a number, we're done */ 1362 if (sscanf(str, "%d", &opts->clockid) == 1) 1363 return 0; 1364 1365 /* allow a "CLOCK_" prefix to the name */ 1366 if (!strncasecmp(str, "CLOCK_", 6)) 1367 str += 6; 1368 1369 for (cm = clockids; cm->name; cm++) { 1370 if (!strcasecmp(str, cm->name)) { 1371 opts->clockid = cm->clockid; 1372 return 0; 1373 } 1374 } 1375 1376 opts->use_clockid = false; 1377 ui__warning("unknown clockid %s, check man page\n", ostr); 1378 return -1; 1379 } 1380 1381 static int record__parse_mmap_pages(const struct option *opt, 1382 const char *str, 1383 int unset __maybe_unused) 1384 { 1385 struct record_opts *opts = opt->value; 1386 char *s, *p; 1387 unsigned int mmap_pages; 1388 int ret; 1389 1390 if (!str) 1391 return -EINVAL; 1392 1393 s = strdup(str); 1394 if (!s) 1395 return -ENOMEM; 1396 1397 p = strchr(s, ','); 1398 if (p) 1399 *p = '\0'; 1400 1401 if (*s) { 1402 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1403 if (ret) 1404 goto out_free; 1405 opts->mmap_pages = mmap_pages; 1406 } 1407 1408 if (!p) { 1409 ret = 0; 1410 goto out_free; 1411 } 1412 1413 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1414 if (ret) 1415 goto out_free; 1416 1417 opts->auxtrace_mmap_pages = mmap_pages; 1418 1419 out_free: 1420 free(s); 1421 return ret; 1422 } 1423 1424 static void switch_output_size_warn(struct record *rec) 1425 { 1426 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1427 struct switch_output *s = &rec->switch_output; 1428 1429 wakeup_size /= 2; 1430 1431 if (s->size < wakeup_size) { 1432 char buf[100]; 1433 1434 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1435 pr_warning("WARNING: switch-output data size lower than " 1436 "wakeup kernel buffer size (%s) " 1437 "expect bigger perf.data sizes\n", buf); 1438 } 1439 } 1440 1441 static int switch_output_setup(struct record *rec) 1442 { 1443 struct switch_output *s = &rec->switch_output; 1444 static struct parse_tag tags_size[] = { 1445 { .tag = 'B', .mult = 1 }, 1446 { .tag = 'K', .mult = 1 << 10 }, 1447 { .tag = 'M', .mult = 1 << 20 }, 1448 { .tag = 'G', .mult = 1 << 30 }, 1449 { .tag = 0 }, 1450 }; 1451 static struct parse_tag tags_time[] = { 1452 { .tag = 's', .mult = 1 }, 1453 { .tag = 'm', .mult = 60 }, 1454 { .tag = 'h', .mult = 60*60 }, 1455 { .tag = 'd', .mult = 60*60*24 }, 1456 { .tag = 0 }, 1457 }; 1458 unsigned long val; 1459 1460 if (!s->set) 1461 return 0; 1462 1463 if (!strcmp(s->str, "signal")) { 1464 s->signal = true; 1465 pr_debug("switch-output with SIGUSR2 signal\n"); 1466 goto enabled; 1467 } 1468 1469 val = parse_tag_value(s->str, tags_size); 1470 if (val != (unsigned long) -1) { 1471 s->size = val; 1472 pr_debug("switch-output with %s size threshold\n", s->str); 1473 goto enabled; 1474 } 1475 1476 val = parse_tag_value(s->str, tags_time); 1477 if (val != (unsigned long) -1) { 1478 s->time = val; 1479 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1480 s->str, s->time); 1481 goto enabled; 1482 } 1483 1484 return -1; 1485 1486 enabled: 1487 rec->timestamp_filename = true; 1488 s->enabled = true; 1489 1490 if (s->size && !rec->opts.no_buffering) 1491 switch_output_size_warn(rec); 1492 1493 return 0; 1494 } 1495 1496 static const char * const __record_usage[] = { 1497 "perf record [<options>] [<command>]", 1498 "perf record [<options>] -- <command> [<options>]", 1499 NULL 1500 }; 1501 const char * const *record_usage = __record_usage; 1502 1503 /* 1504 * XXX Ideally would be local to cmd_record() and passed to a record__new 1505 * because we need to have access to it in record__exit, that is called 1506 * after cmd_record() exits, but since record_options need to be accessible to 1507 * builtin-script, leave it here. 1508 * 1509 * At least we don't ouch it in all the other functions here directly. 1510 * 1511 * Just say no to tons of global variables, sigh. 1512 */ 1513 static struct record record = { 1514 .opts = { 1515 .sample_time = true, 1516 .mmap_pages = UINT_MAX, 1517 .user_freq = UINT_MAX, 1518 .user_interval = ULLONG_MAX, 1519 .freq = 4000, 1520 .target = { 1521 .uses_mmap = true, 1522 .default_per_cpu = true, 1523 }, 1524 .proc_map_timeout = 500, 1525 }, 1526 .tool = { 1527 .sample = process_sample_event, 1528 .fork = perf_event__process_fork, 1529 .exit = perf_event__process_exit, 1530 .comm = perf_event__process_comm, 1531 .namespaces = perf_event__process_namespaces, 1532 .mmap = perf_event__process_mmap, 1533 .mmap2 = perf_event__process_mmap2, 1534 .ordered_events = true, 1535 }, 1536 }; 1537 1538 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1539 "\n\t\t\t\tDefault: fp"; 1540 1541 static bool dry_run; 1542 1543 /* 1544 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1545 * with it and switch to use the library functions in perf_evlist that came 1546 * from builtin-record.c, i.e. use record_opts, 1547 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1548 * using pipes, etc. 1549 */ 1550 static struct option __record_options[] = { 1551 OPT_CALLBACK('e', "event", &record.evlist, "event", 1552 "event selector. use 'perf list' to list available events", 1553 parse_events_option), 1554 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1555 "event filter", parse_filter), 1556 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1557 NULL, "don't record events from perf itself", 1558 exclude_perf), 1559 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1560 "record events on existing process id"), 1561 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1562 "record events on existing thread id"), 1563 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1564 "collect data with this RT SCHED_FIFO priority"), 1565 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1566 "collect data without buffering"), 1567 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1568 "collect raw sample records from all opened counters"), 1569 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1570 "system-wide collection from all CPUs"), 1571 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1572 "list of cpus to monitor"), 1573 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1574 OPT_STRING('o', "output", &record.file.path, "file", 1575 "output file name"), 1576 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1577 &record.opts.no_inherit_set, 1578 "child tasks do not inherit counters"), 1579 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1580 "synthesize non-sample events at the end of output"), 1581 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1582 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1583 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1584 "number of mmap data pages and AUX area tracing mmap pages", 1585 record__parse_mmap_pages), 1586 OPT_BOOLEAN(0, "group", &record.opts.group, 1587 "put the counters into a counter group"), 1588 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1589 NULL, "enables call-graph recording" , 1590 &record_callchain_opt), 1591 OPT_CALLBACK(0, "call-graph", &record.opts, 1592 "record_mode[,record_size]", record_callchain_help, 1593 &record_parse_callchain_opt), 1594 OPT_INCR('v', "verbose", &verbose, 1595 "be more verbose (show counter open errors, etc)"), 1596 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1597 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1598 "per thread counts"), 1599 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1600 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1601 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1602 &record.opts.sample_time_set, 1603 "Record the sample timestamps"), 1604 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1605 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1606 "don't sample"), 1607 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1608 &record.no_buildid_cache_set, 1609 "do not update the buildid cache"), 1610 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1611 &record.no_buildid_set, 1612 "do not collect buildids in perf.data"), 1613 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1614 "monitor event in cgroup name only", 1615 parse_cgroups), 1616 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1617 "ms to wait before starting measurement after program start"), 1618 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1619 "user to profile"), 1620 1621 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1622 "branch any", "sample any taken branches", 1623 parse_branch_stack), 1624 1625 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1626 "branch filter mask", "branch stack filter modes", 1627 parse_branch_stack), 1628 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1629 "sample by weight (on special events only)"), 1630 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1631 "sample transaction flags (special events only)"), 1632 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1633 "use per-thread mmaps"), 1634 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1635 "sample selected machine registers on interrupt," 1636 " use -I ? to list register names", parse_regs), 1637 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1638 "Record running/enabled time of read (:S) events"), 1639 OPT_CALLBACK('k', "clockid", &record.opts, 1640 "clockid", "clockid to use for events, see clock_gettime()", 1641 parse_clockid), 1642 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1643 "opts", "AUX area tracing Snapshot Mode", ""), 1644 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1645 "per thread proc mmap processing timeout in ms"), 1646 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1647 "Record namespaces events"), 1648 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1649 "Record context switch events"), 1650 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1651 "Configure all used events to run in kernel space.", 1652 PARSE_OPT_EXCLUSIVE), 1653 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1654 "Configure all used events to run in user space.", 1655 PARSE_OPT_EXCLUSIVE), 1656 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1657 "clang binary to use for compiling BPF scriptlets"), 1658 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1659 "options passed to clang when compiling BPF scriptlets"), 1660 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1661 "file", "vmlinux pathname"), 1662 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1663 "Record build-id of all DSOs regardless of hits"), 1664 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1665 "append timestamp to output filename"), 1666 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1667 &record.switch_output.set, "signal,size,time", 1668 "Switch output when receive SIGUSR2 or cross size,time threshold", 1669 "signal"), 1670 OPT_BOOLEAN(0, "dry-run", &dry_run, 1671 "Parse options then exit"), 1672 OPT_END() 1673 }; 1674 1675 struct option *record_options = __record_options; 1676 1677 int cmd_record(int argc, const char **argv) 1678 { 1679 int err; 1680 struct record *rec = &record; 1681 char errbuf[BUFSIZ]; 1682 1683 #ifndef HAVE_LIBBPF_SUPPORT 1684 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1685 set_nobuild('\0', "clang-path", true); 1686 set_nobuild('\0', "clang-opt", true); 1687 # undef set_nobuild 1688 #endif 1689 1690 #ifndef HAVE_BPF_PROLOGUE 1691 # if !defined (HAVE_DWARF_SUPPORT) 1692 # define REASON "NO_DWARF=1" 1693 # elif !defined (HAVE_LIBBPF_SUPPORT) 1694 # define REASON "NO_LIBBPF=1" 1695 # else 1696 # define REASON "this architecture doesn't support BPF prologue" 1697 # endif 1698 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1699 set_nobuild('\0', "vmlinux", true); 1700 # undef set_nobuild 1701 # undef REASON 1702 #endif 1703 1704 rec->evlist = perf_evlist__new(); 1705 if (rec->evlist == NULL) 1706 return -ENOMEM; 1707 1708 err = perf_config(perf_record_config, rec); 1709 if (err) 1710 return err; 1711 1712 argc = parse_options(argc, argv, record_options, record_usage, 1713 PARSE_OPT_STOP_AT_NON_OPTION); 1714 if (quiet) 1715 perf_quiet_option(); 1716 1717 /* Make system wide (-a) the default target. */ 1718 if (!argc && target__none(&rec->opts.target)) 1719 rec->opts.target.system_wide = true; 1720 1721 if (nr_cgroups && !rec->opts.target.system_wide) { 1722 usage_with_options_msg(record_usage, record_options, 1723 "cgroup monitoring only available in system-wide mode"); 1724 1725 } 1726 if (rec->opts.record_switch_events && 1727 !perf_can_record_switch_events()) { 1728 ui__error("kernel does not support recording context switch events\n"); 1729 parse_options_usage(record_usage, record_options, "switch-events", 0); 1730 return -EINVAL; 1731 } 1732 1733 if (switch_output_setup(rec)) { 1734 parse_options_usage(record_usage, record_options, "switch-output", 0); 1735 return -EINVAL; 1736 } 1737 1738 if (rec->switch_output.time) { 1739 signal(SIGALRM, alarm_sig_handler); 1740 alarm(rec->switch_output.time); 1741 } 1742 1743 if (!rec->itr) { 1744 rec->itr = auxtrace_record__init(rec->evlist, &err); 1745 if (err) 1746 goto out; 1747 } 1748 1749 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 1750 rec->opts.auxtrace_snapshot_opts); 1751 if (err) 1752 goto out; 1753 1754 /* 1755 * Allow aliases to facilitate the lookup of symbols for address 1756 * filters. Refer to auxtrace_parse_filters(). 1757 */ 1758 symbol_conf.allow_aliases = true; 1759 1760 symbol__init(NULL); 1761 1762 err = auxtrace_parse_filters(rec->evlist); 1763 if (err) 1764 goto out; 1765 1766 if (dry_run) 1767 goto out; 1768 1769 err = bpf__setup_stdout(rec->evlist); 1770 if (err) { 1771 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1772 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1773 errbuf); 1774 goto out; 1775 } 1776 1777 err = -ENOMEM; 1778 1779 if (symbol_conf.kptr_restrict) 1780 pr_warning( 1781 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1782 "check /proc/sys/kernel/kptr_restrict.\n\n" 1783 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1784 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1785 "Samples in kernel modules won't be resolved at all.\n\n" 1786 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1787 "even with a suitable vmlinux or kallsyms file.\n\n"); 1788 1789 if (rec->no_buildid_cache || rec->no_buildid) { 1790 disable_buildid_cache(); 1791 } else if (rec->switch_output.enabled) { 1792 /* 1793 * In 'perf record --switch-output', disable buildid 1794 * generation by default to reduce data file switching 1795 * overhead. Still generate buildid if they are required 1796 * explicitly using 1797 * 1798 * perf record --switch-output --no-no-buildid \ 1799 * --no-no-buildid-cache 1800 * 1801 * Following code equals to: 1802 * 1803 * if ((rec->no_buildid || !rec->no_buildid_set) && 1804 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1805 * disable_buildid_cache(); 1806 */ 1807 bool disable = true; 1808 1809 if (rec->no_buildid_set && !rec->no_buildid) 1810 disable = false; 1811 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1812 disable = false; 1813 if (disable) { 1814 rec->no_buildid = true; 1815 rec->no_buildid_cache = true; 1816 disable_buildid_cache(); 1817 } 1818 } 1819 1820 if (record.opts.overwrite) 1821 record.opts.tail_synthesize = true; 1822 1823 if (rec->evlist->nr_entries == 0 && 1824 perf_evlist__add_default(rec->evlist) < 0) { 1825 pr_err("Not enough memory for event selector list\n"); 1826 goto out; 1827 } 1828 1829 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1830 rec->opts.no_inherit = true; 1831 1832 err = target__validate(&rec->opts.target); 1833 if (err) { 1834 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1835 ui__warning("%s", errbuf); 1836 } 1837 1838 err = target__parse_uid(&rec->opts.target); 1839 if (err) { 1840 int saved_errno = errno; 1841 1842 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1843 ui__error("%s", errbuf); 1844 1845 err = -saved_errno; 1846 goto out; 1847 } 1848 1849 /* Enable ignoring missing threads when -u option is defined. */ 1850 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; 1851 1852 err = -ENOMEM; 1853 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1854 usage_with_options(record_usage, record_options); 1855 1856 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1857 if (err) 1858 goto out; 1859 1860 /* 1861 * We take all buildids when the file contains 1862 * AUX area tracing data because we do not decode the 1863 * trace because it would take too long. 1864 */ 1865 if (rec->opts.full_auxtrace) 1866 rec->buildid_all = true; 1867 1868 if (record_opts__config(&rec->opts)) { 1869 err = -EINVAL; 1870 goto out; 1871 } 1872 1873 err = __cmd_record(&record, argc, argv); 1874 out: 1875 perf_evlist__delete(rec->evlist); 1876 symbol__exit(); 1877 auxtrace_record__free(rec->itr); 1878 return err; 1879 } 1880 1881 static void snapshot_sig_handler(int sig __maybe_unused) 1882 { 1883 struct record *rec = &record; 1884 1885 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1886 trigger_hit(&auxtrace_snapshot_trigger); 1887 auxtrace_record__snapshot_started = 1; 1888 if (auxtrace_record__snapshot_start(record.itr)) 1889 trigger_error(&auxtrace_snapshot_trigger); 1890 } 1891 1892 if (switch_output_signal(rec)) 1893 trigger_hit(&switch_output_trigger); 1894 } 1895 1896 static void alarm_sig_handler(int sig __maybe_unused) 1897 { 1898 struct record *rec = &record; 1899 1900 if (switch_output_time(rec)) 1901 trigger_hit(&switch_output_trigger); 1902 } 1903