1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <locale.h> 49 #include <poll.h> 50 #include <unistd.h> 51 #include <sched.h> 52 #include <signal.h> 53 #include <sys/mman.h> 54 #include <sys/wait.h> 55 #include <linux/time64.h> 56 57 struct switch_output { 58 bool enabled; 59 bool signal; 60 unsigned long size; 61 unsigned long time; 62 const char *str; 63 bool set; 64 }; 65 66 struct record { 67 struct perf_tool tool; 68 struct record_opts opts; 69 u64 bytes_written; 70 struct perf_data data; 71 struct auxtrace_record *itr; 72 struct perf_evlist *evlist; 73 struct perf_session *session; 74 int realtime_prio; 75 bool no_buildid; 76 bool no_buildid_set; 77 bool no_buildid_cache; 78 bool no_buildid_cache_set; 79 bool buildid_all; 80 bool timestamp_filename; 81 bool timestamp_boundary; 82 struct switch_output switch_output; 83 unsigned long long samples; 84 }; 85 86 static volatile int auxtrace_record__snapshot_started; 87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 88 static DEFINE_TRIGGER(switch_output_trigger); 89 90 static bool switch_output_signal(struct record *rec) 91 { 92 return rec->switch_output.signal && 93 trigger_is_ready(&switch_output_trigger); 94 } 95 96 static bool switch_output_size(struct record *rec) 97 { 98 return rec->switch_output.size && 99 trigger_is_ready(&switch_output_trigger) && 100 (rec->bytes_written >= rec->switch_output.size); 101 } 102 103 static bool switch_output_time(struct record *rec) 104 { 105 return rec->switch_output.time && 106 trigger_is_ready(&switch_output_trigger); 107 } 108 109 static int record__write(struct record *rec, void *bf, size_t size) 110 { 111 if (perf_data__write(rec->session->data, bf, size) < 0) { 112 pr_err("failed to write perf data, error: %m\n"); 113 return -1; 114 } 115 116 rec->bytes_written += size; 117 118 if (switch_output_size(rec)) 119 trigger_hit(&switch_output_trigger); 120 121 return 0; 122 } 123 124 static int process_synthesized_event(struct perf_tool *tool, 125 union perf_event *event, 126 struct perf_sample *sample __maybe_unused, 127 struct machine *machine __maybe_unused) 128 { 129 struct record *rec = container_of(tool, struct record, tool); 130 return record__write(rec, event, event->header.size); 131 } 132 133 static int record__pushfn(void *to, void *bf, size_t size) 134 { 135 struct record *rec = to; 136 137 rec->samples++; 138 return record__write(rec, bf, size); 139 } 140 141 static volatile int done; 142 static volatile int signr = -1; 143 static volatile int child_finished; 144 145 static void sig_handler(int sig) 146 { 147 if (sig == SIGCHLD) 148 child_finished = 1; 149 else 150 signr = sig; 151 152 done = 1; 153 } 154 155 static void sigsegv_handler(int sig) 156 { 157 perf_hooks__recover(); 158 sighandler_dump_stack(sig); 159 } 160 161 static void record__sig_exit(void) 162 { 163 if (signr == -1) 164 return; 165 166 signal(signr, SIG_DFL); 167 raise(signr); 168 } 169 170 #ifdef HAVE_AUXTRACE_SUPPORT 171 172 static int record__process_auxtrace(struct perf_tool *tool, 173 union perf_event *event, void *data1, 174 size_t len1, void *data2, size_t len2) 175 { 176 struct record *rec = container_of(tool, struct record, tool); 177 struct perf_data *data = &rec->data; 178 size_t padding; 179 u8 pad[8] = {0}; 180 181 if (!perf_data__is_pipe(data)) { 182 off_t file_offset; 183 int fd = perf_data__fd(data); 184 int err; 185 186 file_offset = lseek(fd, 0, SEEK_CUR); 187 if (file_offset == -1) 188 return -1; 189 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 190 event, file_offset); 191 if (err) 192 return err; 193 } 194 195 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 196 padding = (len1 + len2) & 7; 197 if (padding) 198 padding = 8 - padding; 199 200 record__write(rec, event, event->header.size); 201 record__write(rec, data1, len1); 202 if (len2) 203 record__write(rec, data2, len2); 204 record__write(rec, &pad, padding); 205 206 return 0; 207 } 208 209 static int record__auxtrace_mmap_read(struct record *rec, 210 struct auxtrace_mmap *mm) 211 { 212 int ret; 213 214 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 215 record__process_auxtrace); 216 if (ret < 0) 217 return ret; 218 219 if (ret) 220 rec->samples++; 221 222 return 0; 223 } 224 225 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 226 struct auxtrace_mmap *mm) 227 { 228 int ret; 229 230 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 231 record__process_auxtrace, 232 rec->opts.auxtrace_snapshot_size); 233 if (ret < 0) 234 return ret; 235 236 if (ret) 237 rec->samples++; 238 239 return 0; 240 } 241 242 static int record__auxtrace_read_snapshot_all(struct record *rec) 243 { 244 int i; 245 int rc = 0; 246 247 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 248 struct auxtrace_mmap *mm = 249 &rec->evlist->mmap[i].auxtrace_mmap; 250 251 if (!mm->base) 252 continue; 253 254 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 255 rc = -1; 256 goto out; 257 } 258 } 259 out: 260 return rc; 261 } 262 263 static void record__read_auxtrace_snapshot(struct record *rec) 264 { 265 pr_debug("Recording AUX area tracing snapshot\n"); 266 if (record__auxtrace_read_snapshot_all(rec) < 0) { 267 trigger_error(&auxtrace_snapshot_trigger); 268 } else { 269 if (auxtrace_record__snapshot_finish(rec->itr)) 270 trigger_error(&auxtrace_snapshot_trigger); 271 else 272 trigger_ready(&auxtrace_snapshot_trigger); 273 } 274 } 275 276 static int record__auxtrace_init(struct record *rec) 277 { 278 int err; 279 280 if (!rec->itr) { 281 rec->itr = auxtrace_record__init(rec->evlist, &err); 282 if (err) 283 return err; 284 } 285 286 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 287 rec->opts.auxtrace_snapshot_opts); 288 if (err) 289 return err; 290 291 return auxtrace_parse_filters(rec->evlist); 292 } 293 294 #else 295 296 static inline 297 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 298 struct auxtrace_mmap *mm __maybe_unused) 299 { 300 return 0; 301 } 302 303 static inline 304 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 305 { 306 } 307 308 static inline 309 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 310 { 311 return 0; 312 } 313 314 static int record__auxtrace_init(struct record *rec __maybe_unused) 315 { 316 return 0; 317 } 318 319 #endif 320 321 static int record__mmap_evlist(struct record *rec, 322 struct perf_evlist *evlist) 323 { 324 struct record_opts *opts = &rec->opts; 325 char msg[512]; 326 327 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 328 opts->auxtrace_mmap_pages, 329 opts->auxtrace_snapshot_mode) < 0) { 330 if (errno == EPERM) { 331 pr_err("Permission error mapping pages.\n" 332 "Consider increasing " 333 "/proc/sys/kernel/perf_event_mlock_kb,\n" 334 "or try again with a smaller value of -m/--mmap_pages.\n" 335 "(current value: %u,%u)\n", 336 opts->mmap_pages, opts->auxtrace_mmap_pages); 337 return -errno; 338 } else { 339 pr_err("failed to mmap with %d (%s)\n", errno, 340 str_error_r(errno, msg, sizeof(msg))); 341 if (errno) 342 return -errno; 343 else 344 return -EINVAL; 345 } 346 } 347 return 0; 348 } 349 350 static int record__mmap(struct record *rec) 351 { 352 return record__mmap_evlist(rec, rec->evlist); 353 } 354 355 static int record__open(struct record *rec) 356 { 357 char msg[BUFSIZ]; 358 struct perf_evsel *pos; 359 struct perf_evlist *evlist = rec->evlist; 360 struct perf_session *session = rec->session; 361 struct record_opts *opts = &rec->opts; 362 struct perf_evsel_config_term *err_term; 363 int rc = 0; 364 365 /* 366 * For initial_delay we need to add a dummy event so that we can track 367 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 368 * real events, the ones asked by the user. 369 */ 370 if (opts->initial_delay) { 371 if (perf_evlist__add_dummy(evlist)) 372 return -ENOMEM; 373 374 pos = perf_evlist__first(evlist); 375 pos->tracking = 0; 376 pos = perf_evlist__last(evlist); 377 pos->tracking = 1; 378 pos->attr.enable_on_exec = 1; 379 } 380 381 perf_evlist__config(evlist, opts, &callchain_param); 382 383 evlist__for_each_entry(evlist, pos) { 384 try_again: 385 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 386 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 387 if (verbose > 0) 388 ui__warning("%s\n", msg); 389 goto try_again; 390 } 391 392 rc = -errno; 393 perf_evsel__open_strerror(pos, &opts->target, 394 errno, msg, sizeof(msg)); 395 ui__error("%s\n", msg); 396 goto out; 397 } 398 399 pos->supported = true; 400 } 401 402 if (perf_evlist__apply_filters(evlist, &pos)) { 403 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 404 pos->filter, perf_evsel__name(pos), errno, 405 str_error_r(errno, msg, sizeof(msg))); 406 rc = -1; 407 goto out; 408 } 409 410 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 411 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 412 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 413 str_error_r(errno, msg, sizeof(msg))); 414 rc = -1; 415 goto out; 416 } 417 418 rc = record__mmap(rec); 419 if (rc) 420 goto out; 421 422 session->evlist = evlist; 423 perf_session__set_id_hdr_size(session); 424 out: 425 return rc; 426 } 427 428 static int process_sample_event(struct perf_tool *tool, 429 union perf_event *event, 430 struct perf_sample *sample, 431 struct perf_evsel *evsel, 432 struct machine *machine) 433 { 434 struct record *rec = container_of(tool, struct record, tool); 435 436 if (rec->evlist->first_sample_time == 0) 437 rec->evlist->first_sample_time = sample->time; 438 439 rec->evlist->last_sample_time = sample->time; 440 441 if (rec->buildid_all) 442 return 0; 443 444 rec->samples++; 445 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 446 } 447 448 static int process_buildids(struct record *rec) 449 { 450 struct perf_data *data = &rec->data; 451 struct perf_session *session = rec->session; 452 453 if (data->size == 0) 454 return 0; 455 456 /* 457 * During this process, it'll load kernel map and replace the 458 * dso->long_name to a real pathname it found. In this case 459 * we prefer the vmlinux path like 460 * /lib/modules/3.16.4/build/vmlinux 461 * 462 * rather than build-id path (in debug directory). 463 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 464 */ 465 symbol_conf.ignore_vmlinux_buildid = true; 466 467 /* 468 * If --buildid-all is given, it marks all DSO regardless of hits, 469 * so no need to process samples. But if timestamp_boundary is enabled, 470 * it still needs to walk on all samples to get the timestamps of 471 * first/last samples. 472 */ 473 if (rec->buildid_all && !rec->timestamp_boundary) 474 rec->tool.sample = NULL; 475 476 return perf_session__process_events(session); 477 } 478 479 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 480 { 481 int err; 482 struct perf_tool *tool = data; 483 /* 484 *As for guest kernel when processing subcommand record&report, 485 *we arrange module mmap prior to guest kernel mmap and trigger 486 *a preload dso because default guest module symbols are loaded 487 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 488 *method is used to avoid symbol missing when the first addr is 489 *in module instead of in guest kernel. 490 */ 491 err = perf_event__synthesize_modules(tool, process_synthesized_event, 492 machine); 493 if (err < 0) 494 pr_err("Couldn't record guest kernel [%d]'s reference" 495 " relocation symbol.\n", machine->pid); 496 497 /* 498 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 499 * have no _text sometimes. 500 */ 501 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 502 machine); 503 if (err < 0) 504 pr_err("Couldn't record guest kernel [%d]'s reference" 505 " relocation symbol.\n", machine->pid); 506 } 507 508 static struct perf_event_header finished_round_event = { 509 .size = sizeof(struct perf_event_header), 510 .type = PERF_RECORD_FINISHED_ROUND, 511 }; 512 513 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 514 bool overwrite) 515 { 516 u64 bytes_written = rec->bytes_written; 517 int i; 518 int rc = 0; 519 struct perf_mmap *maps; 520 521 if (!evlist) 522 return 0; 523 524 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 525 if (!maps) 526 return 0; 527 528 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 529 return 0; 530 531 for (i = 0; i < evlist->nr_mmaps; i++) { 532 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 533 534 if (maps[i].base) { 535 if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) { 536 rc = -1; 537 goto out; 538 } 539 } 540 541 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 542 record__auxtrace_mmap_read(rec, mm) != 0) { 543 rc = -1; 544 goto out; 545 } 546 } 547 548 /* 549 * Mark the round finished in case we wrote 550 * at least one event. 551 */ 552 if (bytes_written != rec->bytes_written) 553 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 554 555 if (overwrite) 556 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 557 out: 558 return rc; 559 } 560 561 static int record__mmap_read_all(struct record *rec) 562 { 563 int err; 564 565 err = record__mmap_read_evlist(rec, rec->evlist, false); 566 if (err) 567 return err; 568 569 return record__mmap_read_evlist(rec, rec->evlist, true); 570 } 571 572 static void record__init_features(struct record *rec) 573 { 574 struct perf_session *session = rec->session; 575 int feat; 576 577 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 578 perf_header__set_feat(&session->header, feat); 579 580 if (rec->no_buildid) 581 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 582 583 if (!have_tracepoints(&rec->evlist->entries)) 584 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 585 586 if (!rec->opts.branch_stack) 587 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 588 589 if (!rec->opts.full_auxtrace) 590 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 591 592 perf_header__clear_feat(&session->header, HEADER_STAT); 593 } 594 595 static void 596 record__finish_output(struct record *rec) 597 { 598 struct perf_data *data = &rec->data; 599 int fd = perf_data__fd(data); 600 601 if (data->is_pipe) 602 return; 603 604 rec->session->header.data_size += rec->bytes_written; 605 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 606 607 if (!rec->no_buildid) { 608 process_buildids(rec); 609 610 if (rec->buildid_all) 611 dsos__hit_all(rec->session); 612 } 613 perf_session__write_header(rec->session, rec->evlist, fd, true); 614 615 return; 616 } 617 618 static int record__synthesize_workload(struct record *rec, bool tail) 619 { 620 int err; 621 struct thread_map *thread_map; 622 623 if (rec->opts.tail_synthesize != tail) 624 return 0; 625 626 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 627 if (thread_map == NULL) 628 return -1; 629 630 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 631 process_synthesized_event, 632 &rec->session->machines.host, 633 rec->opts.sample_address, 634 rec->opts.proc_map_timeout); 635 thread_map__put(thread_map); 636 return err; 637 } 638 639 static int record__synthesize(struct record *rec, bool tail); 640 641 static int 642 record__switch_output(struct record *rec, bool at_exit) 643 { 644 struct perf_data *data = &rec->data; 645 int fd, err; 646 647 /* Same Size: "2015122520103046"*/ 648 char timestamp[] = "InvalidTimestamp"; 649 650 record__synthesize(rec, true); 651 if (target__none(&rec->opts.target)) 652 record__synthesize_workload(rec, true); 653 654 rec->samples = 0; 655 record__finish_output(rec); 656 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 657 if (err) { 658 pr_err("Failed to get current timestamp\n"); 659 return -EINVAL; 660 } 661 662 fd = perf_data__switch(data, timestamp, 663 rec->session->header.data_offset, 664 at_exit); 665 if (fd >= 0 && !at_exit) { 666 rec->bytes_written = 0; 667 rec->session->header.data_size = 0; 668 } 669 670 if (!quiet) 671 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 672 data->file.path, timestamp); 673 674 /* Output tracking events */ 675 if (!at_exit) { 676 record__synthesize(rec, false); 677 678 /* 679 * In 'perf record --switch-output' without -a, 680 * record__synthesize() in record__switch_output() won't 681 * generate tracking events because there's no thread_map 682 * in evlist. Which causes newly created perf.data doesn't 683 * contain map and comm information. 684 * Create a fake thread_map and directly call 685 * perf_event__synthesize_thread_map() for those events. 686 */ 687 if (target__none(&rec->opts.target)) 688 record__synthesize_workload(rec, false); 689 } 690 return fd; 691 } 692 693 static volatile int workload_exec_errno; 694 695 /* 696 * perf_evlist__prepare_workload will send a SIGUSR1 697 * if the fork fails, since we asked by setting its 698 * want_signal to true. 699 */ 700 static void workload_exec_failed_signal(int signo __maybe_unused, 701 siginfo_t *info, 702 void *ucontext __maybe_unused) 703 { 704 workload_exec_errno = info->si_value.sival_int; 705 done = 1; 706 child_finished = 1; 707 } 708 709 static void snapshot_sig_handler(int sig); 710 static void alarm_sig_handler(int sig); 711 712 int __weak 713 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 714 struct perf_tool *tool __maybe_unused, 715 perf_event__handler_t process __maybe_unused, 716 struct machine *machine __maybe_unused) 717 { 718 return 0; 719 } 720 721 static const struct perf_event_mmap_page * 722 perf_evlist__pick_pc(struct perf_evlist *evlist) 723 { 724 if (evlist) { 725 if (evlist->mmap && evlist->mmap[0].base) 726 return evlist->mmap[0].base; 727 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 728 return evlist->overwrite_mmap[0].base; 729 } 730 return NULL; 731 } 732 733 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 734 { 735 const struct perf_event_mmap_page *pc; 736 737 pc = perf_evlist__pick_pc(rec->evlist); 738 if (pc) 739 return pc; 740 return NULL; 741 } 742 743 static int record__synthesize(struct record *rec, bool tail) 744 { 745 struct perf_session *session = rec->session; 746 struct machine *machine = &session->machines.host; 747 struct perf_data *data = &rec->data; 748 struct record_opts *opts = &rec->opts; 749 struct perf_tool *tool = &rec->tool; 750 int fd = perf_data__fd(data); 751 int err = 0; 752 753 if (rec->opts.tail_synthesize != tail) 754 return 0; 755 756 if (data->is_pipe) { 757 err = perf_event__synthesize_features( 758 tool, session, rec->evlist, process_synthesized_event); 759 if (err < 0) { 760 pr_err("Couldn't synthesize features.\n"); 761 return err; 762 } 763 764 err = perf_event__synthesize_attrs(tool, session, 765 process_synthesized_event); 766 if (err < 0) { 767 pr_err("Couldn't synthesize attrs.\n"); 768 goto out; 769 } 770 771 if (have_tracepoints(&rec->evlist->entries)) { 772 /* 773 * FIXME err <= 0 here actually means that 774 * there were no tracepoints so its not really 775 * an error, just that we don't need to 776 * synthesize anything. We really have to 777 * return this more properly and also 778 * propagate errors that now are calling die() 779 */ 780 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 781 process_synthesized_event); 782 if (err <= 0) { 783 pr_err("Couldn't record tracing data.\n"); 784 goto out; 785 } 786 rec->bytes_written += err; 787 } 788 } 789 790 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 791 process_synthesized_event, machine); 792 if (err) 793 goto out; 794 795 if (rec->opts.full_auxtrace) { 796 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 797 session, process_synthesized_event); 798 if (err) 799 goto out; 800 } 801 802 if (!perf_evlist__exclude_kernel(rec->evlist)) { 803 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 804 machine); 805 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 806 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 807 "Check /proc/kallsyms permission or run as root.\n"); 808 809 err = perf_event__synthesize_modules(tool, process_synthesized_event, 810 machine); 811 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 812 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 813 "Check /proc/modules permission or run as root.\n"); 814 } 815 816 if (perf_guest) { 817 machines__process_guests(&session->machines, 818 perf_event__synthesize_guest_os, tool); 819 } 820 821 err = perf_event__synthesize_extra_attr(&rec->tool, 822 rec->evlist, 823 process_synthesized_event, 824 data->is_pipe); 825 if (err) 826 goto out; 827 828 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 829 process_synthesized_event, 830 NULL); 831 if (err < 0) { 832 pr_err("Couldn't synthesize thread map.\n"); 833 return err; 834 } 835 836 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 837 process_synthesized_event, NULL); 838 if (err < 0) { 839 pr_err("Couldn't synthesize cpu map.\n"); 840 return err; 841 } 842 843 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 844 process_synthesized_event, opts->sample_address, 845 opts->proc_map_timeout, 1); 846 out: 847 return err; 848 } 849 850 static int __cmd_record(struct record *rec, int argc, const char **argv) 851 { 852 int err; 853 int status = 0; 854 unsigned long waking = 0; 855 const bool forks = argc > 0; 856 struct perf_tool *tool = &rec->tool; 857 struct record_opts *opts = &rec->opts; 858 struct perf_data *data = &rec->data; 859 struct perf_session *session; 860 bool disabled = false, draining = false; 861 int fd; 862 863 atexit(record__sig_exit); 864 signal(SIGCHLD, sig_handler); 865 signal(SIGINT, sig_handler); 866 signal(SIGTERM, sig_handler); 867 signal(SIGSEGV, sigsegv_handler); 868 869 if (rec->opts.record_namespaces) 870 tool->namespace_events = true; 871 872 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 873 signal(SIGUSR2, snapshot_sig_handler); 874 if (rec->opts.auxtrace_snapshot_mode) 875 trigger_on(&auxtrace_snapshot_trigger); 876 if (rec->switch_output.enabled) 877 trigger_on(&switch_output_trigger); 878 } else { 879 signal(SIGUSR2, SIG_IGN); 880 } 881 882 session = perf_session__new(data, false, tool); 883 if (session == NULL) { 884 pr_err("Perf session creation failed.\n"); 885 return -1; 886 } 887 888 fd = perf_data__fd(data); 889 rec->session = session; 890 891 record__init_features(rec); 892 893 if (forks) { 894 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 895 argv, data->is_pipe, 896 workload_exec_failed_signal); 897 if (err < 0) { 898 pr_err("Couldn't run the workload!\n"); 899 status = err; 900 goto out_delete_session; 901 } 902 } 903 904 /* 905 * If we have just single event and are sending data 906 * through pipe, we need to force the ids allocation, 907 * because we synthesize event name through the pipe 908 * and need the id for that. 909 */ 910 if (data->is_pipe && rec->evlist->nr_entries == 1) 911 rec->opts.sample_id = true; 912 913 if (record__open(rec) != 0) { 914 err = -1; 915 goto out_child; 916 } 917 918 err = bpf__apply_obj_config(); 919 if (err) { 920 char errbuf[BUFSIZ]; 921 922 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 923 pr_err("ERROR: Apply config to BPF failed: %s\n", 924 errbuf); 925 goto out_child; 926 } 927 928 /* 929 * Normally perf_session__new would do this, but it doesn't have the 930 * evlist. 931 */ 932 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 933 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 934 rec->tool.ordered_events = false; 935 } 936 937 if (!rec->evlist->nr_groups) 938 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 939 940 if (data->is_pipe) { 941 err = perf_header__write_pipe(fd); 942 if (err < 0) 943 goto out_child; 944 } else { 945 err = perf_session__write_header(session, rec->evlist, fd, false); 946 if (err < 0) 947 goto out_child; 948 } 949 950 if (!rec->no_buildid 951 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 952 pr_err("Couldn't generate buildids. " 953 "Use --no-buildid to profile anyway.\n"); 954 err = -1; 955 goto out_child; 956 } 957 958 err = record__synthesize(rec, false); 959 if (err < 0) 960 goto out_child; 961 962 if (rec->realtime_prio) { 963 struct sched_param param; 964 965 param.sched_priority = rec->realtime_prio; 966 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 967 pr_err("Could not set realtime priority.\n"); 968 err = -1; 969 goto out_child; 970 } 971 } 972 973 /* 974 * When perf is starting the traced process, all the events 975 * (apart from group members) have enable_on_exec=1 set, 976 * so don't spoil it by prematurely enabling them. 977 */ 978 if (!target__none(&opts->target) && !opts->initial_delay) 979 perf_evlist__enable(rec->evlist); 980 981 /* 982 * Let the child rip 983 */ 984 if (forks) { 985 struct machine *machine = &session->machines.host; 986 union perf_event *event; 987 pid_t tgid; 988 989 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 990 if (event == NULL) { 991 err = -ENOMEM; 992 goto out_child; 993 } 994 995 /* 996 * Some H/W events are generated before COMM event 997 * which is emitted during exec(), so perf script 998 * cannot see a correct process name for those events. 999 * Synthesize COMM event to prevent it. 1000 */ 1001 tgid = perf_event__synthesize_comm(tool, event, 1002 rec->evlist->workload.pid, 1003 process_synthesized_event, 1004 machine); 1005 free(event); 1006 1007 if (tgid == -1) 1008 goto out_child; 1009 1010 event = malloc(sizeof(event->namespaces) + 1011 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1012 machine->id_hdr_size); 1013 if (event == NULL) { 1014 err = -ENOMEM; 1015 goto out_child; 1016 } 1017 1018 /* 1019 * Synthesize NAMESPACES event for the command specified. 1020 */ 1021 perf_event__synthesize_namespaces(tool, event, 1022 rec->evlist->workload.pid, 1023 tgid, process_synthesized_event, 1024 machine); 1025 free(event); 1026 1027 perf_evlist__start_workload(rec->evlist); 1028 } 1029 1030 if (opts->initial_delay) { 1031 usleep(opts->initial_delay * USEC_PER_MSEC); 1032 perf_evlist__enable(rec->evlist); 1033 } 1034 1035 trigger_ready(&auxtrace_snapshot_trigger); 1036 trigger_ready(&switch_output_trigger); 1037 perf_hooks__invoke_record_start(); 1038 for (;;) { 1039 unsigned long long hits = rec->samples; 1040 1041 /* 1042 * rec->evlist->bkw_mmap_state is possible to be 1043 * BKW_MMAP_EMPTY here: when done == true and 1044 * hits != rec->samples in previous round. 1045 * 1046 * perf_evlist__toggle_bkw_mmap ensure we never 1047 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1048 */ 1049 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1050 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1051 1052 if (record__mmap_read_all(rec) < 0) { 1053 trigger_error(&auxtrace_snapshot_trigger); 1054 trigger_error(&switch_output_trigger); 1055 err = -1; 1056 goto out_child; 1057 } 1058 1059 if (auxtrace_record__snapshot_started) { 1060 auxtrace_record__snapshot_started = 0; 1061 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1062 record__read_auxtrace_snapshot(rec); 1063 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1064 pr_err("AUX area tracing snapshot failed\n"); 1065 err = -1; 1066 goto out_child; 1067 } 1068 } 1069 1070 if (trigger_is_hit(&switch_output_trigger)) { 1071 /* 1072 * If switch_output_trigger is hit, the data in 1073 * overwritable ring buffer should have been collected, 1074 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1075 * 1076 * If SIGUSR2 raise after or during record__mmap_read_all(), 1077 * record__mmap_read_all() didn't collect data from 1078 * overwritable ring buffer. Read again. 1079 */ 1080 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1081 continue; 1082 trigger_ready(&switch_output_trigger); 1083 1084 /* 1085 * Reenable events in overwrite ring buffer after 1086 * record__mmap_read_all(): we should have collected 1087 * data from it. 1088 */ 1089 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1090 1091 if (!quiet) 1092 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1093 waking); 1094 waking = 0; 1095 fd = record__switch_output(rec, false); 1096 if (fd < 0) { 1097 pr_err("Failed to switch to new file\n"); 1098 trigger_error(&switch_output_trigger); 1099 err = fd; 1100 goto out_child; 1101 } 1102 1103 /* re-arm the alarm */ 1104 if (rec->switch_output.time) 1105 alarm(rec->switch_output.time); 1106 } 1107 1108 if (hits == rec->samples) { 1109 if (done || draining) 1110 break; 1111 err = perf_evlist__poll(rec->evlist, -1); 1112 /* 1113 * Propagate error, only if there's any. Ignore positive 1114 * number of returned events and interrupt error. 1115 */ 1116 if (err > 0 || (err < 0 && errno == EINTR)) 1117 err = 0; 1118 waking++; 1119 1120 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1121 draining = true; 1122 } 1123 1124 /* 1125 * When perf is starting the traced process, at the end events 1126 * die with the process and we wait for that. Thus no need to 1127 * disable events in this case. 1128 */ 1129 if (done && !disabled && !target__none(&opts->target)) { 1130 trigger_off(&auxtrace_snapshot_trigger); 1131 perf_evlist__disable(rec->evlist); 1132 disabled = true; 1133 } 1134 } 1135 trigger_off(&auxtrace_snapshot_trigger); 1136 trigger_off(&switch_output_trigger); 1137 1138 if (forks && workload_exec_errno) { 1139 char msg[STRERR_BUFSIZE]; 1140 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1141 pr_err("Workload failed: %s\n", emsg); 1142 err = -1; 1143 goto out_child; 1144 } 1145 1146 if (!quiet) 1147 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1148 1149 if (target__none(&rec->opts.target)) 1150 record__synthesize_workload(rec, true); 1151 1152 out_child: 1153 if (forks) { 1154 int exit_status; 1155 1156 if (!child_finished) 1157 kill(rec->evlist->workload.pid, SIGTERM); 1158 1159 wait(&exit_status); 1160 1161 if (err < 0) 1162 status = err; 1163 else if (WIFEXITED(exit_status)) 1164 status = WEXITSTATUS(exit_status); 1165 else if (WIFSIGNALED(exit_status)) 1166 signr = WTERMSIG(exit_status); 1167 } else 1168 status = err; 1169 1170 record__synthesize(rec, true); 1171 /* this will be recalculated during process_buildids() */ 1172 rec->samples = 0; 1173 1174 if (!err) { 1175 if (!rec->timestamp_filename) { 1176 record__finish_output(rec); 1177 } else { 1178 fd = record__switch_output(rec, true); 1179 if (fd < 0) { 1180 status = fd; 1181 goto out_delete_session; 1182 } 1183 } 1184 } 1185 1186 perf_hooks__invoke_record_end(); 1187 1188 if (!err && !quiet) { 1189 char samples[128]; 1190 const char *postfix = rec->timestamp_filename ? 1191 ".<timestamp>" : ""; 1192 1193 if (rec->samples && !rec->opts.full_auxtrace) 1194 scnprintf(samples, sizeof(samples), 1195 " (%" PRIu64 " samples)", rec->samples); 1196 else 1197 samples[0] = '\0'; 1198 1199 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1200 perf_data__size(data) / 1024.0 / 1024.0, 1201 data->file.path, postfix, samples); 1202 } 1203 1204 out_delete_session: 1205 perf_session__delete(session); 1206 return status; 1207 } 1208 1209 static void callchain_debug(struct callchain_param *callchain) 1210 { 1211 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1212 1213 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1214 1215 if (callchain->record_mode == CALLCHAIN_DWARF) 1216 pr_debug("callchain: stack dump size %d\n", 1217 callchain->dump_size); 1218 } 1219 1220 int record_opts__parse_callchain(struct record_opts *record, 1221 struct callchain_param *callchain, 1222 const char *arg, bool unset) 1223 { 1224 int ret; 1225 callchain->enabled = !unset; 1226 1227 /* --no-call-graph */ 1228 if (unset) { 1229 callchain->record_mode = CALLCHAIN_NONE; 1230 pr_debug("callchain: disabled\n"); 1231 return 0; 1232 } 1233 1234 ret = parse_callchain_record_opt(arg, callchain); 1235 if (!ret) { 1236 /* Enable data address sampling for DWARF unwind. */ 1237 if (callchain->record_mode == CALLCHAIN_DWARF) 1238 record->sample_address = true; 1239 callchain_debug(callchain); 1240 } 1241 1242 return ret; 1243 } 1244 1245 int record_parse_callchain_opt(const struct option *opt, 1246 const char *arg, 1247 int unset) 1248 { 1249 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1250 } 1251 1252 int record_callchain_opt(const struct option *opt, 1253 const char *arg __maybe_unused, 1254 int unset __maybe_unused) 1255 { 1256 struct callchain_param *callchain = opt->value; 1257 1258 callchain->enabled = true; 1259 1260 if (callchain->record_mode == CALLCHAIN_NONE) 1261 callchain->record_mode = CALLCHAIN_FP; 1262 1263 callchain_debug(callchain); 1264 return 0; 1265 } 1266 1267 static int perf_record_config(const char *var, const char *value, void *cb) 1268 { 1269 struct record *rec = cb; 1270 1271 if (!strcmp(var, "record.build-id")) { 1272 if (!strcmp(value, "cache")) 1273 rec->no_buildid_cache = false; 1274 else if (!strcmp(value, "no-cache")) 1275 rec->no_buildid_cache = true; 1276 else if (!strcmp(value, "skip")) 1277 rec->no_buildid = true; 1278 else 1279 return -1; 1280 return 0; 1281 } 1282 if (!strcmp(var, "record.call-graph")) 1283 var = "call-graph.record-mode"; /* fall-through */ 1284 1285 return perf_default_config(var, value, cb); 1286 } 1287 1288 struct clockid_map { 1289 const char *name; 1290 int clockid; 1291 }; 1292 1293 #define CLOCKID_MAP(n, c) \ 1294 { .name = n, .clockid = (c), } 1295 1296 #define CLOCKID_END { .name = NULL, } 1297 1298 1299 /* 1300 * Add the missing ones, we need to build on many distros... 1301 */ 1302 #ifndef CLOCK_MONOTONIC_RAW 1303 #define CLOCK_MONOTONIC_RAW 4 1304 #endif 1305 #ifndef CLOCK_BOOTTIME 1306 #define CLOCK_BOOTTIME 7 1307 #endif 1308 #ifndef CLOCK_TAI 1309 #define CLOCK_TAI 11 1310 #endif 1311 1312 static const struct clockid_map clockids[] = { 1313 /* available for all events, NMI safe */ 1314 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1315 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1316 1317 /* available for some events */ 1318 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1319 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1320 CLOCKID_MAP("tai", CLOCK_TAI), 1321 1322 /* available for the lazy */ 1323 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1324 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1325 CLOCKID_MAP("real", CLOCK_REALTIME), 1326 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1327 1328 CLOCKID_END, 1329 }; 1330 1331 static int parse_clockid(const struct option *opt, const char *str, int unset) 1332 { 1333 struct record_opts *opts = (struct record_opts *)opt->value; 1334 const struct clockid_map *cm; 1335 const char *ostr = str; 1336 1337 if (unset) { 1338 opts->use_clockid = 0; 1339 return 0; 1340 } 1341 1342 /* no arg passed */ 1343 if (!str) 1344 return 0; 1345 1346 /* no setting it twice */ 1347 if (opts->use_clockid) 1348 return -1; 1349 1350 opts->use_clockid = true; 1351 1352 /* if its a number, we're done */ 1353 if (sscanf(str, "%d", &opts->clockid) == 1) 1354 return 0; 1355 1356 /* allow a "CLOCK_" prefix to the name */ 1357 if (!strncasecmp(str, "CLOCK_", 6)) 1358 str += 6; 1359 1360 for (cm = clockids; cm->name; cm++) { 1361 if (!strcasecmp(str, cm->name)) { 1362 opts->clockid = cm->clockid; 1363 return 0; 1364 } 1365 } 1366 1367 opts->use_clockid = false; 1368 ui__warning("unknown clockid %s, check man page\n", ostr); 1369 return -1; 1370 } 1371 1372 static int record__parse_mmap_pages(const struct option *opt, 1373 const char *str, 1374 int unset __maybe_unused) 1375 { 1376 struct record_opts *opts = opt->value; 1377 char *s, *p; 1378 unsigned int mmap_pages; 1379 int ret; 1380 1381 if (!str) 1382 return -EINVAL; 1383 1384 s = strdup(str); 1385 if (!s) 1386 return -ENOMEM; 1387 1388 p = strchr(s, ','); 1389 if (p) 1390 *p = '\0'; 1391 1392 if (*s) { 1393 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1394 if (ret) 1395 goto out_free; 1396 opts->mmap_pages = mmap_pages; 1397 } 1398 1399 if (!p) { 1400 ret = 0; 1401 goto out_free; 1402 } 1403 1404 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1405 if (ret) 1406 goto out_free; 1407 1408 opts->auxtrace_mmap_pages = mmap_pages; 1409 1410 out_free: 1411 free(s); 1412 return ret; 1413 } 1414 1415 static void switch_output_size_warn(struct record *rec) 1416 { 1417 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1418 struct switch_output *s = &rec->switch_output; 1419 1420 wakeup_size /= 2; 1421 1422 if (s->size < wakeup_size) { 1423 char buf[100]; 1424 1425 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1426 pr_warning("WARNING: switch-output data size lower than " 1427 "wakeup kernel buffer size (%s) " 1428 "expect bigger perf.data sizes\n", buf); 1429 } 1430 } 1431 1432 static int switch_output_setup(struct record *rec) 1433 { 1434 struct switch_output *s = &rec->switch_output; 1435 static struct parse_tag tags_size[] = { 1436 { .tag = 'B', .mult = 1 }, 1437 { .tag = 'K', .mult = 1 << 10 }, 1438 { .tag = 'M', .mult = 1 << 20 }, 1439 { .tag = 'G', .mult = 1 << 30 }, 1440 { .tag = 0 }, 1441 }; 1442 static struct parse_tag tags_time[] = { 1443 { .tag = 's', .mult = 1 }, 1444 { .tag = 'm', .mult = 60 }, 1445 { .tag = 'h', .mult = 60*60 }, 1446 { .tag = 'd', .mult = 60*60*24 }, 1447 { .tag = 0 }, 1448 }; 1449 unsigned long val; 1450 1451 if (!s->set) 1452 return 0; 1453 1454 if (!strcmp(s->str, "signal")) { 1455 s->signal = true; 1456 pr_debug("switch-output with SIGUSR2 signal\n"); 1457 goto enabled; 1458 } 1459 1460 val = parse_tag_value(s->str, tags_size); 1461 if (val != (unsigned long) -1) { 1462 s->size = val; 1463 pr_debug("switch-output with %s size threshold\n", s->str); 1464 goto enabled; 1465 } 1466 1467 val = parse_tag_value(s->str, tags_time); 1468 if (val != (unsigned long) -1) { 1469 s->time = val; 1470 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1471 s->str, s->time); 1472 goto enabled; 1473 } 1474 1475 return -1; 1476 1477 enabled: 1478 rec->timestamp_filename = true; 1479 s->enabled = true; 1480 1481 if (s->size && !rec->opts.no_buffering) 1482 switch_output_size_warn(rec); 1483 1484 return 0; 1485 } 1486 1487 static const char * const __record_usage[] = { 1488 "perf record [<options>] [<command>]", 1489 "perf record [<options>] -- <command> [<options>]", 1490 NULL 1491 }; 1492 const char * const *record_usage = __record_usage; 1493 1494 /* 1495 * XXX Ideally would be local to cmd_record() and passed to a record__new 1496 * because we need to have access to it in record__exit, that is called 1497 * after cmd_record() exits, but since record_options need to be accessible to 1498 * builtin-script, leave it here. 1499 * 1500 * At least we don't ouch it in all the other functions here directly. 1501 * 1502 * Just say no to tons of global variables, sigh. 1503 */ 1504 static struct record record = { 1505 .opts = { 1506 .sample_time = true, 1507 .mmap_pages = UINT_MAX, 1508 .user_freq = UINT_MAX, 1509 .user_interval = ULLONG_MAX, 1510 .freq = 4000, 1511 .target = { 1512 .uses_mmap = true, 1513 .default_per_cpu = true, 1514 }, 1515 .proc_map_timeout = 500, 1516 }, 1517 .tool = { 1518 .sample = process_sample_event, 1519 .fork = perf_event__process_fork, 1520 .exit = perf_event__process_exit, 1521 .comm = perf_event__process_comm, 1522 .namespaces = perf_event__process_namespaces, 1523 .mmap = perf_event__process_mmap, 1524 .mmap2 = perf_event__process_mmap2, 1525 .ordered_events = true, 1526 }, 1527 }; 1528 1529 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1530 "\n\t\t\t\tDefault: fp"; 1531 1532 static bool dry_run; 1533 1534 /* 1535 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1536 * with it and switch to use the library functions in perf_evlist that came 1537 * from builtin-record.c, i.e. use record_opts, 1538 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1539 * using pipes, etc. 1540 */ 1541 static struct option __record_options[] = { 1542 OPT_CALLBACK('e', "event", &record.evlist, "event", 1543 "event selector. use 'perf list' to list available events", 1544 parse_events_option), 1545 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1546 "event filter", parse_filter), 1547 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1548 NULL, "don't record events from perf itself", 1549 exclude_perf), 1550 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1551 "record events on existing process id"), 1552 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1553 "record events on existing thread id"), 1554 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1555 "collect data with this RT SCHED_FIFO priority"), 1556 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1557 "collect data without buffering"), 1558 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1559 "collect raw sample records from all opened counters"), 1560 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1561 "system-wide collection from all CPUs"), 1562 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1563 "list of cpus to monitor"), 1564 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1565 OPT_STRING('o', "output", &record.data.file.path, "file", 1566 "output file name"), 1567 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1568 &record.opts.no_inherit_set, 1569 "child tasks do not inherit counters"), 1570 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1571 "synthesize non-sample events at the end of output"), 1572 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1573 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 1574 "Fail if the specified frequency can't be used"), 1575 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 1576 "profile at this frequency", 1577 record__parse_freq), 1578 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1579 "number of mmap data pages and AUX area tracing mmap pages", 1580 record__parse_mmap_pages), 1581 OPT_BOOLEAN(0, "group", &record.opts.group, 1582 "put the counters into a counter group"), 1583 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1584 NULL, "enables call-graph recording" , 1585 &record_callchain_opt), 1586 OPT_CALLBACK(0, "call-graph", &record.opts, 1587 "record_mode[,record_size]", record_callchain_help, 1588 &record_parse_callchain_opt), 1589 OPT_INCR('v', "verbose", &verbose, 1590 "be more verbose (show counter open errors, etc)"), 1591 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1592 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1593 "per thread counts"), 1594 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1595 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1596 "Record the sample physical addresses"), 1597 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1598 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1599 &record.opts.sample_time_set, 1600 "Record the sample timestamps"), 1601 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 1602 "Record the sample period"), 1603 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1604 "don't sample"), 1605 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1606 &record.no_buildid_cache_set, 1607 "do not update the buildid cache"), 1608 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1609 &record.no_buildid_set, 1610 "do not collect buildids in perf.data"), 1611 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1612 "monitor event in cgroup name only", 1613 parse_cgroups), 1614 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1615 "ms to wait before starting measurement after program start"), 1616 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1617 "user to profile"), 1618 1619 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1620 "branch any", "sample any taken branches", 1621 parse_branch_stack), 1622 1623 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1624 "branch filter mask", "branch stack filter modes", 1625 parse_branch_stack), 1626 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1627 "sample by weight (on special events only)"), 1628 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1629 "sample transaction flags (special events only)"), 1630 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1631 "use per-thread mmaps"), 1632 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1633 "sample selected machine registers on interrupt," 1634 " use -I ? to list register names", parse_regs), 1635 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 1636 "sample selected machine registers on interrupt," 1637 " use -I ? to list register names", parse_regs), 1638 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1639 "Record running/enabled time of read (:S) events"), 1640 OPT_CALLBACK('k', "clockid", &record.opts, 1641 "clockid", "clockid to use for events, see clock_gettime()", 1642 parse_clockid), 1643 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1644 "opts", "AUX area tracing Snapshot Mode", ""), 1645 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1646 "per thread proc mmap processing timeout in ms"), 1647 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1648 "Record namespaces events"), 1649 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1650 "Record context switch events"), 1651 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1652 "Configure all used events to run in kernel space.", 1653 PARSE_OPT_EXCLUSIVE), 1654 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1655 "Configure all used events to run in user space.", 1656 PARSE_OPT_EXCLUSIVE), 1657 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1658 "clang binary to use for compiling BPF scriptlets"), 1659 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1660 "options passed to clang when compiling BPF scriptlets"), 1661 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1662 "file", "vmlinux pathname"), 1663 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1664 "Record build-id of all DSOs regardless of hits"), 1665 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1666 "append timestamp to output filename"), 1667 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 1668 "Record timestamp boundary (time of first/last samples)"), 1669 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1670 &record.switch_output.set, "signal,size,time", 1671 "Switch output when receive SIGUSR2 or cross size,time threshold", 1672 "signal"), 1673 OPT_BOOLEAN(0, "dry-run", &dry_run, 1674 "Parse options then exit"), 1675 OPT_END() 1676 }; 1677 1678 struct option *record_options = __record_options; 1679 1680 int cmd_record(int argc, const char **argv) 1681 { 1682 int err; 1683 struct record *rec = &record; 1684 char errbuf[BUFSIZ]; 1685 1686 setlocale(LC_ALL, ""); 1687 1688 #ifndef HAVE_LIBBPF_SUPPORT 1689 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1690 set_nobuild('\0', "clang-path", true); 1691 set_nobuild('\0', "clang-opt", true); 1692 # undef set_nobuild 1693 #endif 1694 1695 #ifndef HAVE_BPF_PROLOGUE 1696 # if !defined (HAVE_DWARF_SUPPORT) 1697 # define REASON "NO_DWARF=1" 1698 # elif !defined (HAVE_LIBBPF_SUPPORT) 1699 # define REASON "NO_LIBBPF=1" 1700 # else 1701 # define REASON "this architecture doesn't support BPF prologue" 1702 # endif 1703 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1704 set_nobuild('\0', "vmlinux", true); 1705 # undef set_nobuild 1706 # undef REASON 1707 #endif 1708 1709 rec->evlist = perf_evlist__new(); 1710 if (rec->evlist == NULL) 1711 return -ENOMEM; 1712 1713 err = perf_config(perf_record_config, rec); 1714 if (err) 1715 return err; 1716 1717 argc = parse_options(argc, argv, record_options, record_usage, 1718 PARSE_OPT_STOP_AT_NON_OPTION); 1719 if (quiet) 1720 perf_quiet_option(); 1721 1722 /* Make system wide (-a) the default target. */ 1723 if (!argc && target__none(&rec->opts.target)) 1724 rec->opts.target.system_wide = true; 1725 1726 if (nr_cgroups && !rec->opts.target.system_wide) { 1727 usage_with_options_msg(record_usage, record_options, 1728 "cgroup monitoring only available in system-wide mode"); 1729 1730 } 1731 if (rec->opts.record_switch_events && 1732 !perf_can_record_switch_events()) { 1733 ui__error("kernel does not support recording context switch events\n"); 1734 parse_options_usage(record_usage, record_options, "switch-events", 0); 1735 return -EINVAL; 1736 } 1737 1738 if (switch_output_setup(rec)) { 1739 parse_options_usage(record_usage, record_options, "switch-output", 0); 1740 return -EINVAL; 1741 } 1742 1743 if (rec->switch_output.time) { 1744 signal(SIGALRM, alarm_sig_handler); 1745 alarm(rec->switch_output.time); 1746 } 1747 1748 /* 1749 * Allow aliases to facilitate the lookup of symbols for address 1750 * filters. Refer to auxtrace_parse_filters(). 1751 */ 1752 symbol_conf.allow_aliases = true; 1753 1754 symbol__init(NULL); 1755 1756 err = record__auxtrace_init(rec); 1757 if (err) 1758 goto out; 1759 1760 if (dry_run) 1761 goto out; 1762 1763 err = bpf__setup_stdout(rec->evlist); 1764 if (err) { 1765 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1766 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1767 errbuf); 1768 goto out; 1769 } 1770 1771 err = -ENOMEM; 1772 1773 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 1774 pr_warning( 1775 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1776 "check /proc/sys/kernel/kptr_restrict.\n\n" 1777 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1778 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1779 "Samples in kernel modules won't be resolved at all.\n\n" 1780 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1781 "even with a suitable vmlinux or kallsyms file.\n\n"); 1782 1783 if (rec->no_buildid_cache || rec->no_buildid) { 1784 disable_buildid_cache(); 1785 } else if (rec->switch_output.enabled) { 1786 /* 1787 * In 'perf record --switch-output', disable buildid 1788 * generation by default to reduce data file switching 1789 * overhead. Still generate buildid if they are required 1790 * explicitly using 1791 * 1792 * perf record --switch-output --no-no-buildid \ 1793 * --no-no-buildid-cache 1794 * 1795 * Following code equals to: 1796 * 1797 * if ((rec->no_buildid || !rec->no_buildid_set) && 1798 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1799 * disable_buildid_cache(); 1800 */ 1801 bool disable = true; 1802 1803 if (rec->no_buildid_set && !rec->no_buildid) 1804 disable = false; 1805 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1806 disable = false; 1807 if (disable) { 1808 rec->no_buildid = true; 1809 rec->no_buildid_cache = true; 1810 disable_buildid_cache(); 1811 } 1812 } 1813 1814 if (record.opts.overwrite) 1815 record.opts.tail_synthesize = true; 1816 1817 if (rec->evlist->nr_entries == 0 && 1818 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1819 pr_err("Not enough memory for event selector list\n"); 1820 goto out; 1821 } 1822 1823 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1824 rec->opts.no_inherit = true; 1825 1826 err = target__validate(&rec->opts.target); 1827 if (err) { 1828 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1829 ui__warning("%s\n", errbuf); 1830 } 1831 1832 err = target__parse_uid(&rec->opts.target); 1833 if (err) { 1834 int saved_errno = errno; 1835 1836 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1837 ui__error("%s", errbuf); 1838 1839 err = -saved_errno; 1840 goto out; 1841 } 1842 1843 /* Enable ignoring missing threads when -u/-p option is defined. */ 1844 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 1845 1846 err = -ENOMEM; 1847 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1848 usage_with_options(record_usage, record_options); 1849 1850 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1851 if (err) 1852 goto out; 1853 1854 /* 1855 * We take all buildids when the file contains 1856 * AUX area tracing data because we do not decode the 1857 * trace because it would take too long. 1858 */ 1859 if (rec->opts.full_auxtrace) 1860 rec->buildid_all = true; 1861 1862 if (record_opts__config(&rec->opts)) { 1863 err = -EINVAL; 1864 goto out; 1865 } 1866 1867 err = __cmd_record(&record, argc, argv); 1868 out: 1869 perf_evlist__delete(rec->evlist); 1870 symbol__exit(); 1871 auxtrace_record__free(rec->itr); 1872 return err; 1873 } 1874 1875 static void snapshot_sig_handler(int sig __maybe_unused) 1876 { 1877 struct record *rec = &record; 1878 1879 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1880 trigger_hit(&auxtrace_snapshot_trigger); 1881 auxtrace_record__snapshot_started = 1; 1882 if (auxtrace_record__snapshot_start(record.itr)) 1883 trigger_error(&auxtrace_snapshot_trigger); 1884 } 1885 1886 if (switch_output_signal(rec)) 1887 trigger_hit(&switch_output_trigger); 1888 } 1889 1890 static void alarm_sig_handler(int sig __maybe_unused) 1891 { 1892 struct record *rec = &record; 1893 1894 if (switch_output_time(rec)) 1895 trigger_hit(&switch_output_trigger); 1896 } 1897