1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <locale.h> 49 #include <poll.h> 50 #include <unistd.h> 51 #include <sched.h> 52 #include <signal.h> 53 #include <sys/mman.h> 54 #include <sys/wait.h> 55 #include <linux/time64.h> 56 57 struct switch_output { 58 bool enabled; 59 bool signal; 60 unsigned long size; 61 unsigned long time; 62 const char *str; 63 bool set; 64 }; 65 66 struct record { 67 struct perf_tool tool; 68 struct record_opts opts; 69 u64 bytes_written; 70 struct perf_data data; 71 struct auxtrace_record *itr; 72 struct perf_evlist *evlist; 73 struct perf_session *session; 74 int realtime_prio; 75 bool no_buildid; 76 bool no_buildid_set; 77 bool no_buildid_cache; 78 bool no_buildid_cache_set; 79 bool buildid_all; 80 bool timestamp_filename; 81 bool timestamp_boundary; 82 struct switch_output switch_output; 83 unsigned long long samples; 84 }; 85 86 static volatile int auxtrace_record__snapshot_started; 87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 88 static DEFINE_TRIGGER(switch_output_trigger); 89 90 static bool switch_output_signal(struct record *rec) 91 { 92 return rec->switch_output.signal && 93 trigger_is_ready(&switch_output_trigger); 94 } 95 96 static bool switch_output_size(struct record *rec) 97 { 98 return rec->switch_output.size && 99 trigger_is_ready(&switch_output_trigger) && 100 (rec->bytes_written >= rec->switch_output.size); 101 } 102 103 static bool switch_output_time(struct record *rec) 104 { 105 return rec->switch_output.time && 106 trigger_is_ready(&switch_output_trigger); 107 } 108 109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 110 void *bf, size_t size) 111 { 112 struct perf_data_file *file = &rec->session->data->file; 113 114 if (perf_data_file__write(file, bf, size) < 0) { 115 pr_err("failed to write perf data, error: %m\n"); 116 return -1; 117 } 118 119 rec->bytes_written += size; 120 121 if (switch_output_size(rec)) 122 trigger_hit(&switch_output_trigger); 123 124 return 0; 125 } 126 127 static int process_synthesized_event(struct perf_tool *tool, 128 union perf_event *event, 129 struct perf_sample *sample __maybe_unused, 130 struct machine *machine __maybe_unused) 131 { 132 struct record *rec = container_of(tool, struct record, tool); 133 return record__write(rec, NULL, event, event->header.size); 134 } 135 136 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 137 { 138 struct record *rec = to; 139 140 rec->samples++; 141 return record__write(rec, map, bf, size); 142 } 143 144 static volatile int done; 145 static volatile int signr = -1; 146 static volatile int child_finished; 147 148 static void sig_handler(int sig) 149 { 150 if (sig == SIGCHLD) 151 child_finished = 1; 152 else 153 signr = sig; 154 155 done = 1; 156 } 157 158 static void sigsegv_handler(int sig) 159 { 160 perf_hooks__recover(); 161 sighandler_dump_stack(sig); 162 } 163 164 static void record__sig_exit(void) 165 { 166 if (signr == -1) 167 return; 168 169 signal(signr, SIG_DFL); 170 raise(signr); 171 } 172 173 #ifdef HAVE_AUXTRACE_SUPPORT 174 175 static int record__process_auxtrace(struct perf_tool *tool, 176 struct perf_mmap *map, 177 union perf_event *event, void *data1, 178 size_t len1, void *data2, size_t len2) 179 { 180 struct record *rec = container_of(tool, struct record, tool); 181 struct perf_data *data = &rec->data; 182 size_t padding; 183 u8 pad[8] = {0}; 184 185 if (!perf_data__is_pipe(data)) { 186 off_t file_offset; 187 int fd = perf_data__fd(data); 188 int err; 189 190 file_offset = lseek(fd, 0, SEEK_CUR); 191 if (file_offset == -1) 192 return -1; 193 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 194 event, file_offset); 195 if (err) 196 return err; 197 } 198 199 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 200 padding = (len1 + len2) & 7; 201 if (padding) 202 padding = 8 - padding; 203 204 record__write(rec, map, event, event->header.size); 205 record__write(rec, map, data1, len1); 206 if (len2) 207 record__write(rec, map, data2, len2); 208 record__write(rec, map, &pad, padding); 209 210 return 0; 211 } 212 213 static int record__auxtrace_mmap_read(struct record *rec, 214 struct perf_mmap *map) 215 { 216 int ret; 217 218 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 219 record__process_auxtrace); 220 if (ret < 0) 221 return ret; 222 223 if (ret) 224 rec->samples++; 225 226 return 0; 227 } 228 229 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 230 struct perf_mmap *map) 231 { 232 int ret; 233 234 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 235 record__process_auxtrace, 236 rec->opts.auxtrace_snapshot_size); 237 if (ret < 0) 238 return ret; 239 240 if (ret) 241 rec->samples++; 242 243 return 0; 244 } 245 246 static int record__auxtrace_read_snapshot_all(struct record *rec) 247 { 248 int i; 249 int rc = 0; 250 251 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 252 struct perf_mmap *map = &rec->evlist->mmap[i]; 253 254 if (!map->auxtrace_mmap.base) 255 continue; 256 257 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 258 rc = -1; 259 goto out; 260 } 261 } 262 out: 263 return rc; 264 } 265 266 static void record__read_auxtrace_snapshot(struct record *rec) 267 { 268 pr_debug("Recording AUX area tracing snapshot\n"); 269 if (record__auxtrace_read_snapshot_all(rec) < 0) { 270 trigger_error(&auxtrace_snapshot_trigger); 271 } else { 272 if (auxtrace_record__snapshot_finish(rec->itr)) 273 trigger_error(&auxtrace_snapshot_trigger); 274 else 275 trigger_ready(&auxtrace_snapshot_trigger); 276 } 277 } 278 279 static int record__auxtrace_init(struct record *rec) 280 { 281 int err; 282 283 if (!rec->itr) { 284 rec->itr = auxtrace_record__init(rec->evlist, &err); 285 if (err) 286 return err; 287 } 288 289 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 290 rec->opts.auxtrace_snapshot_opts); 291 if (err) 292 return err; 293 294 return auxtrace_parse_filters(rec->evlist); 295 } 296 297 #else 298 299 static inline 300 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 301 struct perf_mmap *map __maybe_unused) 302 { 303 return 0; 304 } 305 306 static inline 307 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 308 { 309 } 310 311 static inline 312 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 313 { 314 return 0; 315 } 316 317 static int record__auxtrace_init(struct record *rec __maybe_unused) 318 { 319 return 0; 320 } 321 322 #endif 323 324 static int record__mmap_evlist(struct record *rec, 325 struct perf_evlist *evlist) 326 { 327 struct record_opts *opts = &rec->opts; 328 char msg[512]; 329 330 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 331 opts->auxtrace_mmap_pages, 332 opts->auxtrace_snapshot_mode) < 0) { 333 if (errno == EPERM) { 334 pr_err("Permission error mapping pages.\n" 335 "Consider increasing " 336 "/proc/sys/kernel/perf_event_mlock_kb,\n" 337 "or try again with a smaller value of -m/--mmap_pages.\n" 338 "(current value: %u,%u)\n", 339 opts->mmap_pages, opts->auxtrace_mmap_pages); 340 return -errno; 341 } else { 342 pr_err("failed to mmap with %d (%s)\n", errno, 343 str_error_r(errno, msg, sizeof(msg))); 344 if (errno) 345 return -errno; 346 else 347 return -EINVAL; 348 } 349 } 350 return 0; 351 } 352 353 static int record__mmap(struct record *rec) 354 { 355 return record__mmap_evlist(rec, rec->evlist); 356 } 357 358 static int record__open(struct record *rec) 359 { 360 char msg[BUFSIZ]; 361 struct perf_evsel *pos; 362 struct perf_evlist *evlist = rec->evlist; 363 struct perf_session *session = rec->session; 364 struct record_opts *opts = &rec->opts; 365 struct perf_evsel_config_term *err_term; 366 int rc = 0; 367 368 /* 369 * For initial_delay we need to add a dummy event so that we can track 370 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 371 * real events, the ones asked by the user. 372 */ 373 if (opts->initial_delay) { 374 if (perf_evlist__add_dummy(evlist)) 375 return -ENOMEM; 376 377 pos = perf_evlist__first(evlist); 378 pos->tracking = 0; 379 pos = perf_evlist__last(evlist); 380 pos->tracking = 1; 381 pos->attr.enable_on_exec = 1; 382 } 383 384 perf_evlist__config(evlist, opts, &callchain_param); 385 386 evlist__for_each_entry(evlist, pos) { 387 try_again: 388 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 389 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 390 if (verbose > 0) 391 ui__warning("%s\n", msg); 392 goto try_again; 393 } 394 395 rc = -errno; 396 perf_evsel__open_strerror(pos, &opts->target, 397 errno, msg, sizeof(msg)); 398 ui__error("%s\n", msg); 399 goto out; 400 } 401 402 pos->supported = true; 403 } 404 405 if (perf_evlist__apply_filters(evlist, &pos)) { 406 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 407 pos->filter, perf_evsel__name(pos), errno, 408 str_error_r(errno, msg, sizeof(msg))); 409 rc = -1; 410 goto out; 411 } 412 413 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 414 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 415 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 416 str_error_r(errno, msg, sizeof(msg))); 417 rc = -1; 418 goto out; 419 } 420 421 rc = record__mmap(rec); 422 if (rc) 423 goto out; 424 425 session->evlist = evlist; 426 perf_session__set_id_hdr_size(session); 427 out: 428 return rc; 429 } 430 431 static int process_sample_event(struct perf_tool *tool, 432 union perf_event *event, 433 struct perf_sample *sample, 434 struct perf_evsel *evsel, 435 struct machine *machine) 436 { 437 struct record *rec = container_of(tool, struct record, tool); 438 439 if (rec->evlist->first_sample_time == 0) 440 rec->evlist->first_sample_time = sample->time; 441 442 rec->evlist->last_sample_time = sample->time; 443 444 if (rec->buildid_all) 445 return 0; 446 447 rec->samples++; 448 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 449 } 450 451 static int process_buildids(struct record *rec) 452 { 453 struct perf_data *data = &rec->data; 454 struct perf_session *session = rec->session; 455 456 if (data->size == 0) 457 return 0; 458 459 /* 460 * During this process, it'll load kernel map and replace the 461 * dso->long_name to a real pathname it found. In this case 462 * we prefer the vmlinux path like 463 * /lib/modules/3.16.4/build/vmlinux 464 * 465 * rather than build-id path (in debug directory). 466 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 467 */ 468 symbol_conf.ignore_vmlinux_buildid = true; 469 470 /* 471 * If --buildid-all is given, it marks all DSO regardless of hits, 472 * so no need to process samples. But if timestamp_boundary is enabled, 473 * it still needs to walk on all samples to get the timestamps of 474 * first/last samples. 475 */ 476 if (rec->buildid_all && !rec->timestamp_boundary) 477 rec->tool.sample = NULL; 478 479 return perf_session__process_events(session); 480 } 481 482 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 483 { 484 int err; 485 struct perf_tool *tool = data; 486 /* 487 *As for guest kernel when processing subcommand record&report, 488 *we arrange module mmap prior to guest kernel mmap and trigger 489 *a preload dso because default guest module symbols are loaded 490 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 491 *method is used to avoid symbol missing when the first addr is 492 *in module instead of in guest kernel. 493 */ 494 err = perf_event__synthesize_modules(tool, process_synthesized_event, 495 machine); 496 if (err < 0) 497 pr_err("Couldn't record guest kernel [%d]'s reference" 498 " relocation symbol.\n", machine->pid); 499 500 /* 501 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 502 * have no _text sometimes. 503 */ 504 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 505 machine); 506 if (err < 0) 507 pr_err("Couldn't record guest kernel [%d]'s reference" 508 " relocation symbol.\n", machine->pid); 509 } 510 511 static struct perf_event_header finished_round_event = { 512 .size = sizeof(struct perf_event_header), 513 .type = PERF_RECORD_FINISHED_ROUND, 514 }; 515 516 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 517 bool overwrite) 518 { 519 u64 bytes_written = rec->bytes_written; 520 int i; 521 int rc = 0; 522 struct perf_mmap *maps; 523 524 if (!evlist) 525 return 0; 526 527 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 528 if (!maps) 529 return 0; 530 531 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 532 return 0; 533 534 for (i = 0; i < evlist->nr_mmaps; i++) { 535 struct perf_mmap *map = &maps[i]; 536 537 if (map->base) { 538 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 539 rc = -1; 540 goto out; 541 } 542 } 543 544 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 545 record__auxtrace_mmap_read(rec, map) != 0) { 546 rc = -1; 547 goto out; 548 } 549 } 550 551 /* 552 * Mark the round finished in case we wrote 553 * at least one event. 554 */ 555 if (bytes_written != rec->bytes_written) 556 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 557 558 if (overwrite) 559 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 560 out: 561 return rc; 562 } 563 564 static int record__mmap_read_all(struct record *rec) 565 { 566 int err; 567 568 err = record__mmap_read_evlist(rec, rec->evlist, false); 569 if (err) 570 return err; 571 572 return record__mmap_read_evlist(rec, rec->evlist, true); 573 } 574 575 static void record__init_features(struct record *rec) 576 { 577 struct perf_session *session = rec->session; 578 int feat; 579 580 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 581 perf_header__set_feat(&session->header, feat); 582 583 if (rec->no_buildid) 584 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 585 586 if (!have_tracepoints(&rec->evlist->entries)) 587 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 588 589 if (!rec->opts.branch_stack) 590 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 591 592 if (!rec->opts.full_auxtrace) 593 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 594 595 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 596 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 597 598 perf_header__clear_feat(&session->header, HEADER_STAT); 599 } 600 601 static void 602 record__finish_output(struct record *rec) 603 { 604 struct perf_data *data = &rec->data; 605 int fd = perf_data__fd(data); 606 607 if (data->is_pipe) 608 return; 609 610 rec->session->header.data_size += rec->bytes_written; 611 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 612 613 if (!rec->no_buildid) { 614 process_buildids(rec); 615 616 if (rec->buildid_all) 617 dsos__hit_all(rec->session); 618 } 619 perf_session__write_header(rec->session, rec->evlist, fd, true); 620 621 return; 622 } 623 624 static int record__synthesize_workload(struct record *rec, bool tail) 625 { 626 int err; 627 struct thread_map *thread_map; 628 629 if (rec->opts.tail_synthesize != tail) 630 return 0; 631 632 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 633 if (thread_map == NULL) 634 return -1; 635 636 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 637 process_synthesized_event, 638 &rec->session->machines.host, 639 rec->opts.sample_address, 640 rec->opts.proc_map_timeout); 641 thread_map__put(thread_map); 642 return err; 643 } 644 645 static int record__synthesize(struct record *rec, bool tail); 646 647 static int 648 record__switch_output(struct record *rec, bool at_exit) 649 { 650 struct perf_data *data = &rec->data; 651 int fd, err; 652 653 /* Same Size: "2015122520103046"*/ 654 char timestamp[] = "InvalidTimestamp"; 655 656 record__synthesize(rec, true); 657 if (target__none(&rec->opts.target)) 658 record__synthesize_workload(rec, true); 659 660 rec->samples = 0; 661 record__finish_output(rec); 662 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 663 if (err) { 664 pr_err("Failed to get current timestamp\n"); 665 return -EINVAL; 666 } 667 668 fd = perf_data__switch(data, timestamp, 669 rec->session->header.data_offset, 670 at_exit); 671 if (fd >= 0 && !at_exit) { 672 rec->bytes_written = 0; 673 rec->session->header.data_size = 0; 674 } 675 676 if (!quiet) 677 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 678 data->file.path, timestamp); 679 680 /* Output tracking events */ 681 if (!at_exit) { 682 record__synthesize(rec, false); 683 684 /* 685 * In 'perf record --switch-output' without -a, 686 * record__synthesize() in record__switch_output() won't 687 * generate tracking events because there's no thread_map 688 * in evlist. Which causes newly created perf.data doesn't 689 * contain map and comm information. 690 * Create a fake thread_map and directly call 691 * perf_event__synthesize_thread_map() for those events. 692 */ 693 if (target__none(&rec->opts.target)) 694 record__synthesize_workload(rec, false); 695 } 696 return fd; 697 } 698 699 static volatile int workload_exec_errno; 700 701 /* 702 * perf_evlist__prepare_workload will send a SIGUSR1 703 * if the fork fails, since we asked by setting its 704 * want_signal to true. 705 */ 706 static void workload_exec_failed_signal(int signo __maybe_unused, 707 siginfo_t *info, 708 void *ucontext __maybe_unused) 709 { 710 workload_exec_errno = info->si_value.sival_int; 711 done = 1; 712 child_finished = 1; 713 } 714 715 static void snapshot_sig_handler(int sig); 716 static void alarm_sig_handler(int sig); 717 718 int __weak 719 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 720 struct perf_tool *tool __maybe_unused, 721 perf_event__handler_t process __maybe_unused, 722 struct machine *machine __maybe_unused) 723 { 724 return 0; 725 } 726 727 static const struct perf_event_mmap_page * 728 perf_evlist__pick_pc(struct perf_evlist *evlist) 729 { 730 if (evlist) { 731 if (evlist->mmap && evlist->mmap[0].base) 732 return evlist->mmap[0].base; 733 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 734 return evlist->overwrite_mmap[0].base; 735 } 736 return NULL; 737 } 738 739 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 740 { 741 const struct perf_event_mmap_page *pc; 742 743 pc = perf_evlist__pick_pc(rec->evlist); 744 if (pc) 745 return pc; 746 return NULL; 747 } 748 749 static int record__synthesize(struct record *rec, bool tail) 750 { 751 struct perf_session *session = rec->session; 752 struct machine *machine = &session->machines.host; 753 struct perf_data *data = &rec->data; 754 struct record_opts *opts = &rec->opts; 755 struct perf_tool *tool = &rec->tool; 756 int fd = perf_data__fd(data); 757 int err = 0; 758 759 if (rec->opts.tail_synthesize != tail) 760 return 0; 761 762 if (data->is_pipe) { 763 /* 764 * We need to synthesize events first, because some 765 * features works on top of them (on report side). 766 */ 767 err = perf_event__synthesize_attrs(tool, rec->evlist, 768 process_synthesized_event); 769 if (err < 0) { 770 pr_err("Couldn't synthesize attrs.\n"); 771 goto out; 772 } 773 774 err = perf_event__synthesize_features(tool, session, rec->evlist, 775 process_synthesized_event); 776 if (err < 0) { 777 pr_err("Couldn't synthesize features.\n"); 778 return err; 779 } 780 781 if (have_tracepoints(&rec->evlist->entries)) { 782 /* 783 * FIXME err <= 0 here actually means that 784 * there were no tracepoints so its not really 785 * an error, just that we don't need to 786 * synthesize anything. We really have to 787 * return this more properly and also 788 * propagate errors that now are calling die() 789 */ 790 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 791 process_synthesized_event); 792 if (err <= 0) { 793 pr_err("Couldn't record tracing data.\n"); 794 goto out; 795 } 796 rec->bytes_written += err; 797 } 798 } 799 800 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 801 process_synthesized_event, machine); 802 if (err) 803 goto out; 804 805 if (rec->opts.full_auxtrace) { 806 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 807 session, process_synthesized_event); 808 if (err) 809 goto out; 810 } 811 812 if (!perf_evlist__exclude_kernel(rec->evlist)) { 813 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 814 machine); 815 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 816 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 817 "Check /proc/kallsyms permission or run as root.\n"); 818 819 err = perf_event__synthesize_modules(tool, process_synthesized_event, 820 machine); 821 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 822 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 823 "Check /proc/modules permission or run as root.\n"); 824 } 825 826 if (perf_guest) { 827 machines__process_guests(&session->machines, 828 perf_event__synthesize_guest_os, tool); 829 } 830 831 err = perf_event__synthesize_extra_attr(&rec->tool, 832 rec->evlist, 833 process_synthesized_event, 834 data->is_pipe); 835 if (err) 836 goto out; 837 838 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 839 process_synthesized_event, 840 NULL); 841 if (err < 0) { 842 pr_err("Couldn't synthesize thread map.\n"); 843 return err; 844 } 845 846 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 847 process_synthesized_event, NULL); 848 if (err < 0) { 849 pr_err("Couldn't synthesize cpu map.\n"); 850 return err; 851 } 852 853 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 854 process_synthesized_event, opts->sample_address, 855 opts->proc_map_timeout, 1); 856 out: 857 return err; 858 } 859 860 static int __cmd_record(struct record *rec, int argc, const char **argv) 861 { 862 int err; 863 int status = 0; 864 unsigned long waking = 0; 865 const bool forks = argc > 0; 866 struct perf_tool *tool = &rec->tool; 867 struct record_opts *opts = &rec->opts; 868 struct perf_data *data = &rec->data; 869 struct perf_session *session; 870 bool disabled = false, draining = false; 871 int fd; 872 873 atexit(record__sig_exit); 874 signal(SIGCHLD, sig_handler); 875 signal(SIGINT, sig_handler); 876 signal(SIGTERM, sig_handler); 877 signal(SIGSEGV, sigsegv_handler); 878 879 if (rec->opts.record_namespaces) 880 tool->namespace_events = true; 881 882 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 883 signal(SIGUSR2, snapshot_sig_handler); 884 if (rec->opts.auxtrace_snapshot_mode) 885 trigger_on(&auxtrace_snapshot_trigger); 886 if (rec->switch_output.enabled) 887 trigger_on(&switch_output_trigger); 888 } else { 889 signal(SIGUSR2, SIG_IGN); 890 } 891 892 session = perf_session__new(data, false, tool); 893 if (session == NULL) { 894 pr_err("Perf session creation failed.\n"); 895 return -1; 896 } 897 898 fd = perf_data__fd(data); 899 rec->session = session; 900 901 record__init_features(rec); 902 903 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 904 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 905 906 if (forks) { 907 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 908 argv, data->is_pipe, 909 workload_exec_failed_signal); 910 if (err < 0) { 911 pr_err("Couldn't run the workload!\n"); 912 status = err; 913 goto out_delete_session; 914 } 915 } 916 917 /* 918 * If we have just single event and are sending data 919 * through pipe, we need to force the ids allocation, 920 * because we synthesize event name through the pipe 921 * and need the id for that. 922 */ 923 if (data->is_pipe && rec->evlist->nr_entries == 1) 924 rec->opts.sample_id = true; 925 926 if (record__open(rec) != 0) { 927 err = -1; 928 goto out_child; 929 } 930 931 err = bpf__apply_obj_config(); 932 if (err) { 933 char errbuf[BUFSIZ]; 934 935 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 936 pr_err("ERROR: Apply config to BPF failed: %s\n", 937 errbuf); 938 goto out_child; 939 } 940 941 /* 942 * Normally perf_session__new would do this, but it doesn't have the 943 * evlist. 944 */ 945 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 946 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 947 rec->tool.ordered_events = false; 948 } 949 950 if (!rec->evlist->nr_groups) 951 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 952 953 if (data->is_pipe) { 954 err = perf_header__write_pipe(fd); 955 if (err < 0) 956 goto out_child; 957 } else { 958 err = perf_session__write_header(session, rec->evlist, fd, false); 959 if (err < 0) 960 goto out_child; 961 } 962 963 if (!rec->no_buildid 964 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 965 pr_err("Couldn't generate buildids. " 966 "Use --no-buildid to profile anyway.\n"); 967 err = -1; 968 goto out_child; 969 } 970 971 err = record__synthesize(rec, false); 972 if (err < 0) 973 goto out_child; 974 975 if (rec->realtime_prio) { 976 struct sched_param param; 977 978 param.sched_priority = rec->realtime_prio; 979 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 980 pr_err("Could not set realtime priority.\n"); 981 err = -1; 982 goto out_child; 983 } 984 } 985 986 /* 987 * When perf is starting the traced process, all the events 988 * (apart from group members) have enable_on_exec=1 set, 989 * so don't spoil it by prematurely enabling them. 990 */ 991 if (!target__none(&opts->target) && !opts->initial_delay) 992 perf_evlist__enable(rec->evlist); 993 994 /* 995 * Let the child rip 996 */ 997 if (forks) { 998 struct machine *machine = &session->machines.host; 999 union perf_event *event; 1000 pid_t tgid; 1001 1002 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1003 if (event == NULL) { 1004 err = -ENOMEM; 1005 goto out_child; 1006 } 1007 1008 /* 1009 * Some H/W events are generated before COMM event 1010 * which is emitted during exec(), so perf script 1011 * cannot see a correct process name for those events. 1012 * Synthesize COMM event to prevent it. 1013 */ 1014 tgid = perf_event__synthesize_comm(tool, event, 1015 rec->evlist->workload.pid, 1016 process_synthesized_event, 1017 machine); 1018 free(event); 1019 1020 if (tgid == -1) 1021 goto out_child; 1022 1023 event = malloc(sizeof(event->namespaces) + 1024 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1025 machine->id_hdr_size); 1026 if (event == NULL) { 1027 err = -ENOMEM; 1028 goto out_child; 1029 } 1030 1031 /* 1032 * Synthesize NAMESPACES event for the command specified. 1033 */ 1034 perf_event__synthesize_namespaces(tool, event, 1035 rec->evlist->workload.pid, 1036 tgid, process_synthesized_event, 1037 machine); 1038 free(event); 1039 1040 perf_evlist__start_workload(rec->evlist); 1041 } 1042 1043 if (opts->initial_delay) { 1044 usleep(opts->initial_delay * USEC_PER_MSEC); 1045 perf_evlist__enable(rec->evlist); 1046 } 1047 1048 trigger_ready(&auxtrace_snapshot_trigger); 1049 trigger_ready(&switch_output_trigger); 1050 perf_hooks__invoke_record_start(); 1051 for (;;) { 1052 unsigned long long hits = rec->samples; 1053 1054 /* 1055 * rec->evlist->bkw_mmap_state is possible to be 1056 * BKW_MMAP_EMPTY here: when done == true and 1057 * hits != rec->samples in previous round. 1058 * 1059 * perf_evlist__toggle_bkw_mmap ensure we never 1060 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1061 */ 1062 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1063 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1064 1065 if (record__mmap_read_all(rec) < 0) { 1066 trigger_error(&auxtrace_snapshot_trigger); 1067 trigger_error(&switch_output_trigger); 1068 err = -1; 1069 goto out_child; 1070 } 1071 1072 if (auxtrace_record__snapshot_started) { 1073 auxtrace_record__snapshot_started = 0; 1074 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1075 record__read_auxtrace_snapshot(rec); 1076 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1077 pr_err("AUX area tracing snapshot failed\n"); 1078 err = -1; 1079 goto out_child; 1080 } 1081 } 1082 1083 if (trigger_is_hit(&switch_output_trigger)) { 1084 /* 1085 * If switch_output_trigger is hit, the data in 1086 * overwritable ring buffer should have been collected, 1087 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1088 * 1089 * If SIGUSR2 raise after or during record__mmap_read_all(), 1090 * record__mmap_read_all() didn't collect data from 1091 * overwritable ring buffer. Read again. 1092 */ 1093 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1094 continue; 1095 trigger_ready(&switch_output_trigger); 1096 1097 /* 1098 * Reenable events in overwrite ring buffer after 1099 * record__mmap_read_all(): we should have collected 1100 * data from it. 1101 */ 1102 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1103 1104 if (!quiet) 1105 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1106 waking); 1107 waking = 0; 1108 fd = record__switch_output(rec, false); 1109 if (fd < 0) { 1110 pr_err("Failed to switch to new file\n"); 1111 trigger_error(&switch_output_trigger); 1112 err = fd; 1113 goto out_child; 1114 } 1115 1116 /* re-arm the alarm */ 1117 if (rec->switch_output.time) 1118 alarm(rec->switch_output.time); 1119 } 1120 1121 if (hits == rec->samples) { 1122 if (done || draining) 1123 break; 1124 err = perf_evlist__poll(rec->evlist, -1); 1125 /* 1126 * Propagate error, only if there's any. Ignore positive 1127 * number of returned events and interrupt error. 1128 */ 1129 if (err > 0 || (err < 0 && errno == EINTR)) 1130 err = 0; 1131 waking++; 1132 1133 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1134 draining = true; 1135 } 1136 1137 /* 1138 * When perf is starting the traced process, at the end events 1139 * die with the process and we wait for that. Thus no need to 1140 * disable events in this case. 1141 */ 1142 if (done && !disabled && !target__none(&opts->target)) { 1143 trigger_off(&auxtrace_snapshot_trigger); 1144 perf_evlist__disable(rec->evlist); 1145 disabled = true; 1146 } 1147 } 1148 trigger_off(&auxtrace_snapshot_trigger); 1149 trigger_off(&switch_output_trigger); 1150 1151 if (forks && workload_exec_errno) { 1152 char msg[STRERR_BUFSIZE]; 1153 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1154 pr_err("Workload failed: %s\n", emsg); 1155 err = -1; 1156 goto out_child; 1157 } 1158 1159 if (!quiet) 1160 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1161 1162 if (target__none(&rec->opts.target)) 1163 record__synthesize_workload(rec, true); 1164 1165 out_child: 1166 if (forks) { 1167 int exit_status; 1168 1169 if (!child_finished) 1170 kill(rec->evlist->workload.pid, SIGTERM); 1171 1172 wait(&exit_status); 1173 1174 if (err < 0) 1175 status = err; 1176 else if (WIFEXITED(exit_status)) 1177 status = WEXITSTATUS(exit_status); 1178 else if (WIFSIGNALED(exit_status)) 1179 signr = WTERMSIG(exit_status); 1180 } else 1181 status = err; 1182 1183 record__synthesize(rec, true); 1184 /* this will be recalculated during process_buildids() */ 1185 rec->samples = 0; 1186 1187 if (!err) { 1188 if (!rec->timestamp_filename) { 1189 record__finish_output(rec); 1190 } else { 1191 fd = record__switch_output(rec, true); 1192 if (fd < 0) { 1193 status = fd; 1194 goto out_delete_session; 1195 } 1196 } 1197 } 1198 1199 perf_hooks__invoke_record_end(); 1200 1201 if (!err && !quiet) { 1202 char samples[128]; 1203 const char *postfix = rec->timestamp_filename ? 1204 ".<timestamp>" : ""; 1205 1206 if (rec->samples && !rec->opts.full_auxtrace) 1207 scnprintf(samples, sizeof(samples), 1208 " (%" PRIu64 " samples)", rec->samples); 1209 else 1210 samples[0] = '\0'; 1211 1212 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1213 perf_data__size(data) / 1024.0 / 1024.0, 1214 data->file.path, postfix, samples); 1215 } 1216 1217 out_delete_session: 1218 perf_session__delete(session); 1219 return status; 1220 } 1221 1222 static void callchain_debug(struct callchain_param *callchain) 1223 { 1224 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1225 1226 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1227 1228 if (callchain->record_mode == CALLCHAIN_DWARF) 1229 pr_debug("callchain: stack dump size %d\n", 1230 callchain->dump_size); 1231 } 1232 1233 int record_opts__parse_callchain(struct record_opts *record, 1234 struct callchain_param *callchain, 1235 const char *arg, bool unset) 1236 { 1237 int ret; 1238 callchain->enabled = !unset; 1239 1240 /* --no-call-graph */ 1241 if (unset) { 1242 callchain->record_mode = CALLCHAIN_NONE; 1243 pr_debug("callchain: disabled\n"); 1244 return 0; 1245 } 1246 1247 ret = parse_callchain_record_opt(arg, callchain); 1248 if (!ret) { 1249 /* Enable data address sampling for DWARF unwind. */ 1250 if (callchain->record_mode == CALLCHAIN_DWARF) 1251 record->sample_address = true; 1252 callchain_debug(callchain); 1253 } 1254 1255 return ret; 1256 } 1257 1258 int record_parse_callchain_opt(const struct option *opt, 1259 const char *arg, 1260 int unset) 1261 { 1262 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1263 } 1264 1265 int record_callchain_opt(const struct option *opt, 1266 const char *arg __maybe_unused, 1267 int unset __maybe_unused) 1268 { 1269 struct callchain_param *callchain = opt->value; 1270 1271 callchain->enabled = true; 1272 1273 if (callchain->record_mode == CALLCHAIN_NONE) 1274 callchain->record_mode = CALLCHAIN_FP; 1275 1276 callchain_debug(callchain); 1277 return 0; 1278 } 1279 1280 static int perf_record_config(const char *var, const char *value, void *cb) 1281 { 1282 struct record *rec = cb; 1283 1284 if (!strcmp(var, "record.build-id")) { 1285 if (!strcmp(value, "cache")) 1286 rec->no_buildid_cache = false; 1287 else if (!strcmp(value, "no-cache")) 1288 rec->no_buildid_cache = true; 1289 else if (!strcmp(value, "skip")) 1290 rec->no_buildid = true; 1291 else 1292 return -1; 1293 return 0; 1294 } 1295 if (!strcmp(var, "record.call-graph")) { 1296 var = "call-graph.record-mode"; 1297 return perf_default_config(var, value, cb); 1298 } 1299 1300 return 0; 1301 } 1302 1303 struct clockid_map { 1304 const char *name; 1305 int clockid; 1306 }; 1307 1308 #define CLOCKID_MAP(n, c) \ 1309 { .name = n, .clockid = (c), } 1310 1311 #define CLOCKID_END { .name = NULL, } 1312 1313 1314 /* 1315 * Add the missing ones, we need to build on many distros... 1316 */ 1317 #ifndef CLOCK_MONOTONIC_RAW 1318 #define CLOCK_MONOTONIC_RAW 4 1319 #endif 1320 #ifndef CLOCK_BOOTTIME 1321 #define CLOCK_BOOTTIME 7 1322 #endif 1323 #ifndef CLOCK_TAI 1324 #define CLOCK_TAI 11 1325 #endif 1326 1327 static const struct clockid_map clockids[] = { 1328 /* available for all events, NMI safe */ 1329 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1330 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1331 1332 /* available for some events */ 1333 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1334 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1335 CLOCKID_MAP("tai", CLOCK_TAI), 1336 1337 /* available for the lazy */ 1338 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1339 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1340 CLOCKID_MAP("real", CLOCK_REALTIME), 1341 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1342 1343 CLOCKID_END, 1344 }; 1345 1346 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1347 { 1348 struct timespec res; 1349 1350 *res_ns = 0; 1351 if (!clock_getres(clk_id, &res)) 1352 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1353 else 1354 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1355 1356 return 0; 1357 } 1358 1359 static int parse_clockid(const struct option *opt, const char *str, int unset) 1360 { 1361 struct record_opts *opts = (struct record_opts *)opt->value; 1362 const struct clockid_map *cm; 1363 const char *ostr = str; 1364 1365 if (unset) { 1366 opts->use_clockid = 0; 1367 return 0; 1368 } 1369 1370 /* no arg passed */ 1371 if (!str) 1372 return 0; 1373 1374 /* no setting it twice */ 1375 if (opts->use_clockid) 1376 return -1; 1377 1378 opts->use_clockid = true; 1379 1380 /* if its a number, we're done */ 1381 if (sscanf(str, "%d", &opts->clockid) == 1) 1382 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1383 1384 /* allow a "CLOCK_" prefix to the name */ 1385 if (!strncasecmp(str, "CLOCK_", 6)) 1386 str += 6; 1387 1388 for (cm = clockids; cm->name; cm++) { 1389 if (!strcasecmp(str, cm->name)) { 1390 opts->clockid = cm->clockid; 1391 return get_clockid_res(opts->clockid, 1392 &opts->clockid_res_ns); 1393 } 1394 } 1395 1396 opts->use_clockid = false; 1397 ui__warning("unknown clockid %s, check man page\n", ostr); 1398 return -1; 1399 } 1400 1401 static int record__parse_mmap_pages(const struct option *opt, 1402 const char *str, 1403 int unset __maybe_unused) 1404 { 1405 struct record_opts *opts = opt->value; 1406 char *s, *p; 1407 unsigned int mmap_pages; 1408 int ret; 1409 1410 if (!str) 1411 return -EINVAL; 1412 1413 s = strdup(str); 1414 if (!s) 1415 return -ENOMEM; 1416 1417 p = strchr(s, ','); 1418 if (p) 1419 *p = '\0'; 1420 1421 if (*s) { 1422 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1423 if (ret) 1424 goto out_free; 1425 opts->mmap_pages = mmap_pages; 1426 } 1427 1428 if (!p) { 1429 ret = 0; 1430 goto out_free; 1431 } 1432 1433 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1434 if (ret) 1435 goto out_free; 1436 1437 opts->auxtrace_mmap_pages = mmap_pages; 1438 1439 out_free: 1440 free(s); 1441 return ret; 1442 } 1443 1444 static void switch_output_size_warn(struct record *rec) 1445 { 1446 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1447 struct switch_output *s = &rec->switch_output; 1448 1449 wakeup_size /= 2; 1450 1451 if (s->size < wakeup_size) { 1452 char buf[100]; 1453 1454 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1455 pr_warning("WARNING: switch-output data size lower than " 1456 "wakeup kernel buffer size (%s) " 1457 "expect bigger perf.data sizes\n", buf); 1458 } 1459 } 1460 1461 static int switch_output_setup(struct record *rec) 1462 { 1463 struct switch_output *s = &rec->switch_output; 1464 static struct parse_tag tags_size[] = { 1465 { .tag = 'B', .mult = 1 }, 1466 { .tag = 'K', .mult = 1 << 10 }, 1467 { .tag = 'M', .mult = 1 << 20 }, 1468 { .tag = 'G', .mult = 1 << 30 }, 1469 { .tag = 0 }, 1470 }; 1471 static struct parse_tag tags_time[] = { 1472 { .tag = 's', .mult = 1 }, 1473 { .tag = 'm', .mult = 60 }, 1474 { .tag = 'h', .mult = 60*60 }, 1475 { .tag = 'd', .mult = 60*60*24 }, 1476 { .tag = 0 }, 1477 }; 1478 unsigned long val; 1479 1480 if (!s->set) 1481 return 0; 1482 1483 if (!strcmp(s->str, "signal")) { 1484 s->signal = true; 1485 pr_debug("switch-output with SIGUSR2 signal\n"); 1486 goto enabled; 1487 } 1488 1489 val = parse_tag_value(s->str, tags_size); 1490 if (val != (unsigned long) -1) { 1491 s->size = val; 1492 pr_debug("switch-output with %s size threshold\n", s->str); 1493 goto enabled; 1494 } 1495 1496 val = parse_tag_value(s->str, tags_time); 1497 if (val != (unsigned long) -1) { 1498 s->time = val; 1499 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1500 s->str, s->time); 1501 goto enabled; 1502 } 1503 1504 return -1; 1505 1506 enabled: 1507 rec->timestamp_filename = true; 1508 s->enabled = true; 1509 1510 if (s->size && !rec->opts.no_buffering) 1511 switch_output_size_warn(rec); 1512 1513 return 0; 1514 } 1515 1516 static const char * const __record_usage[] = { 1517 "perf record [<options>] [<command>]", 1518 "perf record [<options>] -- <command> [<options>]", 1519 NULL 1520 }; 1521 const char * const *record_usage = __record_usage; 1522 1523 /* 1524 * XXX Ideally would be local to cmd_record() and passed to a record__new 1525 * because we need to have access to it in record__exit, that is called 1526 * after cmd_record() exits, but since record_options need to be accessible to 1527 * builtin-script, leave it here. 1528 * 1529 * At least we don't ouch it in all the other functions here directly. 1530 * 1531 * Just say no to tons of global variables, sigh. 1532 */ 1533 static struct record record = { 1534 .opts = { 1535 .sample_time = true, 1536 .mmap_pages = UINT_MAX, 1537 .user_freq = UINT_MAX, 1538 .user_interval = ULLONG_MAX, 1539 .freq = 4000, 1540 .target = { 1541 .uses_mmap = true, 1542 .default_per_cpu = true, 1543 }, 1544 .proc_map_timeout = 500, 1545 }, 1546 .tool = { 1547 .sample = process_sample_event, 1548 .fork = perf_event__process_fork, 1549 .exit = perf_event__process_exit, 1550 .comm = perf_event__process_comm, 1551 .namespaces = perf_event__process_namespaces, 1552 .mmap = perf_event__process_mmap, 1553 .mmap2 = perf_event__process_mmap2, 1554 .ordered_events = true, 1555 }, 1556 }; 1557 1558 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1559 "\n\t\t\t\tDefault: fp"; 1560 1561 static bool dry_run; 1562 1563 /* 1564 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1565 * with it and switch to use the library functions in perf_evlist that came 1566 * from builtin-record.c, i.e. use record_opts, 1567 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1568 * using pipes, etc. 1569 */ 1570 static struct option __record_options[] = { 1571 OPT_CALLBACK('e', "event", &record.evlist, "event", 1572 "event selector. use 'perf list' to list available events", 1573 parse_events_option), 1574 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1575 "event filter", parse_filter), 1576 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1577 NULL, "don't record events from perf itself", 1578 exclude_perf), 1579 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1580 "record events on existing process id"), 1581 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1582 "record events on existing thread id"), 1583 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1584 "collect data with this RT SCHED_FIFO priority"), 1585 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1586 "collect data without buffering"), 1587 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1588 "collect raw sample records from all opened counters"), 1589 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1590 "system-wide collection from all CPUs"), 1591 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1592 "list of cpus to monitor"), 1593 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1594 OPT_STRING('o', "output", &record.data.file.path, "file", 1595 "output file name"), 1596 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1597 &record.opts.no_inherit_set, 1598 "child tasks do not inherit counters"), 1599 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1600 "synthesize non-sample events at the end of output"), 1601 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1602 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 1603 "Fail if the specified frequency can't be used"), 1604 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 1605 "profile at this frequency", 1606 record__parse_freq), 1607 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1608 "number of mmap data pages and AUX area tracing mmap pages", 1609 record__parse_mmap_pages), 1610 OPT_BOOLEAN(0, "group", &record.opts.group, 1611 "put the counters into a counter group"), 1612 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1613 NULL, "enables call-graph recording" , 1614 &record_callchain_opt), 1615 OPT_CALLBACK(0, "call-graph", &record.opts, 1616 "record_mode[,record_size]", record_callchain_help, 1617 &record_parse_callchain_opt), 1618 OPT_INCR('v', "verbose", &verbose, 1619 "be more verbose (show counter open errors, etc)"), 1620 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1621 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1622 "per thread counts"), 1623 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1624 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1625 "Record the sample physical addresses"), 1626 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1627 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1628 &record.opts.sample_time_set, 1629 "Record the sample timestamps"), 1630 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 1631 "Record the sample period"), 1632 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1633 "don't sample"), 1634 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1635 &record.no_buildid_cache_set, 1636 "do not update the buildid cache"), 1637 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1638 &record.no_buildid_set, 1639 "do not collect buildids in perf.data"), 1640 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1641 "monitor event in cgroup name only", 1642 parse_cgroups), 1643 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1644 "ms to wait before starting measurement after program start"), 1645 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1646 "user to profile"), 1647 1648 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1649 "branch any", "sample any taken branches", 1650 parse_branch_stack), 1651 1652 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1653 "branch filter mask", "branch stack filter modes", 1654 parse_branch_stack), 1655 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1656 "sample by weight (on special events only)"), 1657 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1658 "sample transaction flags (special events only)"), 1659 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1660 "use per-thread mmaps"), 1661 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1662 "sample selected machine registers on interrupt," 1663 " use -I ? to list register names", parse_regs), 1664 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 1665 "sample selected machine registers on interrupt," 1666 " use -I ? to list register names", parse_regs), 1667 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1668 "Record running/enabled time of read (:S) events"), 1669 OPT_CALLBACK('k', "clockid", &record.opts, 1670 "clockid", "clockid to use for events, see clock_gettime()", 1671 parse_clockid), 1672 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1673 "opts", "AUX area tracing Snapshot Mode", ""), 1674 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1675 "per thread proc mmap processing timeout in ms"), 1676 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1677 "Record namespaces events"), 1678 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1679 "Record context switch events"), 1680 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1681 "Configure all used events to run in kernel space.", 1682 PARSE_OPT_EXCLUSIVE), 1683 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1684 "Configure all used events to run in user space.", 1685 PARSE_OPT_EXCLUSIVE), 1686 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1687 "clang binary to use for compiling BPF scriptlets"), 1688 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1689 "options passed to clang when compiling BPF scriptlets"), 1690 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1691 "file", "vmlinux pathname"), 1692 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1693 "Record build-id of all DSOs regardless of hits"), 1694 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1695 "append timestamp to output filename"), 1696 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 1697 "Record timestamp boundary (time of first/last samples)"), 1698 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1699 &record.switch_output.set, "signal,size,time", 1700 "Switch output when receive SIGUSR2 or cross size,time threshold", 1701 "signal"), 1702 OPT_BOOLEAN(0, "dry-run", &dry_run, 1703 "Parse options then exit"), 1704 OPT_END() 1705 }; 1706 1707 struct option *record_options = __record_options; 1708 1709 int cmd_record(int argc, const char **argv) 1710 { 1711 int err; 1712 struct record *rec = &record; 1713 char errbuf[BUFSIZ]; 1714 1715 setlocale(LC_ALL, ""); 1716 1717 #ifndef HAVE_LIBBPF_SUPPORT 1718 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1719 set_nobuild('\0', "clang-path", true); 1720 set_nobuild('\0', "clang-opt", true); 1721 # undef set_nobuild 1722 #endif 1723 1724 #ifndef HAVE_BPF_PROLOGUE 1725 # if !defined (HAVE_DWARF_SUPPORT) 1726 # define REASON "NO_DWARF=1" 1727 # elif !defined (HAVE_LIBBPF_SUPPORT) 1728 # define REASON "NO_LIBBPF=1" 1729 # else 1730 # define REASON "this architecture doesn't support BPF prologue" 1731 # endif 1732 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1733 set_nobuild('\0', "vmlinux", true); 1734 # undef set_nobuild 1735 # undef REASON 1736 #endif 1737 1738 rec->evlist = perf_evlist__new(); 1739 if (rec->evlist == NULL) 1740 return -ENOMEM; 1741 1742 err = perf_config(perf_record_config, rec); 1743 if (err) 1744 return err; 1745 1746 argc = parse_options(argc, argv, record_options, record_usage, 1747 PARSE_OPT_STOP_AT_NON_OPTION); 1748 if (quiet) 1749 perf_quiet_option(); 1750 1751 /* Make system wide (-a) the default target. */ 1752 if (!argc && target__none(&rec->opts.target)) 1753 rec->opts.target.system_wide = true; 1754 1755 if (nr_cgroups && !rec->opts.target.system_wide) { 1756 usage_with_options_msg(record_usage, record_options, 1757 "cgroup monitoring only available in system-wide mode"); 1758 1759 } 1760 if (rec->opts.record_switch_events && 1761 !perf_can_record_switch_events()) { 1762 ui__error("kernel does not support recording context switch events\n"); 1763 parse_options_usage(record_usage, record_options, "switch-events", 0); 1764 return -EINVAL; 1765 } 1766 1767 if (switch_output_setup(rec)) { 1768 parse_options_usage(record_usage, record_options, "switch-output", 0); 1769 return -EINVAL; 1770 } 1771 1772 if (rec->switch_output.time) { 1773 signal(SIGALRM, alarm_sig_handler); 1774 alarm(rec->switch_output.time); 1775 } 1776 1777 /* 1778 * Allow aliases to facilitate the lookup of symbols for address 1779 * filters. Refer to auxtrace_parse_filters(). 1780 */ 1781 symbol_conf.allow_aliases = true; 1782 1783 symbol__init(NULL); 1784 1785 err = record__auxtrace_init(rec); 1786 if (err) 1787 goto out; 1788 1789 if (dry_run) 1790 goto out; 1791 1792 err = bpf__setup_stdout(rec->evlist); 1793 if (err) { 1794 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1795 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1796 errbuf); 1797 goto out; 1798 } 1799 1800 err = -ENOMEM; 1801 1802 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 1803 pr_warning( 1804 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1805 "check /proc/sys/kernel/kptr_restrict.\n\n" 1806 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1807 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1808 "Samples in kernel modules won't be resolved at all.\n\n" 1809 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1810 "even with a suitable vmlinux or kallsyms file.\n\n"); 1811 1812 if (rec->no_buildid_cache || rec->no_buildid) { 1813 disable_buildid_cache(); 1814 } else if (rec->switch_output.enabled) { 1815 /* 1816 * In 'perf record --switch-output', disable buildid 1817 * generation by default to reduce data file switching 1818 * overhead. Still generate buildid if they are required 1819 * explicitly using 1820 * 1821 * perf record --switch-output --no-no-buildid \ 1822 * --no-no-buildid-cache 1823 * 1824 * Following code equals to: 1825 * 1826 * if ((rec->no_buildid || !rec->no_buildid_set) && 1827 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1828 * disable_buildid_cache(); 1829 */ 1830 bool disable = true; 1831 1832 if (rec->no_buildid_set && !rec->no_buildid) 1833 disable = false; 1834 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1835 disable = false; 1836 if (disable) { 1837 rec->no_buildid = true; 1838 rec->no_buildid_cache = true; 1839 disable_buildid_cache(); 1840 } 1841 } 1842 1843 if (record.opts.overwrite) 1844 record.opts.tail_synthesize = true; 1845 1846 if (rec->evlist->nr_entries == 0 && 1847 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1848 pr_err("Not enough memory for event selector list\n"); 1849 goto out; 1850 } 1851 1852 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1853 rec->opts.no_inherit = true; 1854 1855 err = target__validate(&rec->opts.target); 1856 if (err) { 1857 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1858 ui__warning("%s\n", errbuf); 1859 } 1860 1861 err = target__parse_uid(&rec->opts.target); 1862 if (err) { 1863 int saved_errno = errno; 1864 1865 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1866 ui__error("%s", errbuf); 1867 1868 err = -saved_errno; 1869 goto out; 1870 } 1871 1872 /* Enable ignoring missing threads when -u/-p option is defined. */ 1873 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 1874 1875 err = -ENOMEM; 1876 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1877 usage_with_options(record_usage, record_options); 1878 1879 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1880 if (err) 1881 goto out; 1882 1883 /* 1884 * We take all buildids when the file contains 1885 * AUX area tracing data because we do not decode the 1886 * trace because it would take too long. 1887 */ 1888 if (rec->opts.full_auxtrace) 1889 rec->buildid_all = true; 1890 1891 if (record_opts__config(&rec->opts)) { 1892 err = -EINVAL; 1893 goto out; 1894 } 1895 1896 err = __cmd_record(&record, argc, argv); 1897 out: 1898 perf_evlist__delete(rec->evlist); 1899 symbol__exit(); 1900 auxtrace_record__free(rec->itr); 1901 return err; 1902 } 1903 1904 static void snapshot_sig_handler(int sig __maybe_unused) 1905 { 1906 struct record *rec = &record; 1907 1908 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1909 trigger_hit(&auxtrace_snapshot_trigger); 1910 auxtrace_record__snapshot_started = 1; 1911 if (auxtrace_record__snapshot_start(record.itr)) 1912 trigger_error(&auxtrace_snapshot_trigger); 1913 } 1914 1915 if (switch_output_signal(rec)) 1916 trigger_hit(&switch_output_trigger); 1917 } 1918 1919 static void alarm_sig_handler(int sig __maybe_unused) 1920 { 1921 struct record *rec = &record; 1922 1923 if (switch_output_time(rec)) 1924 trigger_hit(&switch_output_trigger); 1925 } 1926