1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <poll.h> 49 #include <unistd.h> 50 #include <sched.h> 51 #include <signal.h> 52 #include <sys/mman.h> 53 #include <sys/wait.h> 54 #include <linux/time64.h> 55 56 struct switch_output { 57 bool enabled; 58 bool signal; 59 unsigned long size; 60 unsigned long time; 61 const char *str; 62 bool set; 63 }; 64 65 struct record { 66 struct perf_tool tool; 67 struct record_opts opts; 68 u64 bytes_written; 69 struct perf_data data; 70 struct auxtrace_record *itr; 71 struct perf_evlist *evlist; 72 struct perf_session *session; 73 const char *progname; 74 int realtime_prio; 75 bool no_buildid; 76 bool no_buildid_set; 77 bool no_buildid_cache; 78 bool no_buildid_cache_set; 79 bool buildid_all; 80 bool timestamp_filename; 81 struct switch_output switch_output; 82 unsigned long long samples; 83 }; 84 85 static volatile int auxtrace_record__snapshot_started; 86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 87 static DEFINE_TRIGGER(switch_output_trigger); 88 89 static bool switch_output_signal(struct record *rec) 90 { 91 return rec->switch_output.signal && 92 trigger_is_ready(&switch_output_trigger); 93 } 94 95 static bool switch_output_size(struct record *rec) 96 { 97 return rec->switch_output.size && 98 trigger_is_ready(&switch_output_trigger) && 99 (rec->bytes_written >= rec->switch_output.size); 100 } 101 102 static bool switch_output_time(struct record *rec) 103 { 104 return rec->switch_output.time && 105 trigger_is_ready(&switch_output_trigger); 106 } 107 108 static int record__write(struct record *rec, void *bf, size_t size) 109 { 110 if (perf_data__write(rec->session->data, bf, size) < 0) { 111 pr_err("failed to write perf data, error: %m\n"); 112 return -1; 113 } 114 115 rec->bytes_written += size; 116 117 if (switch_output_size(rec)) 118 trigger_hit(&switch_output_trigger); 119 120 return 0; 121 } 122 123 static int process_synthesized_event(struct perf_tool *tool, 124 union perf_event *event, 125 struct perf_sample *sample __maybe_unused, 126 struct machine *machine __maybe_unused) 127 { 128 struct record *rec = container_of(tool, struct record, tool); 129 return record__write(rec, event, event->header.size); 130 } 131 132 static int record__pushfn(void *to, void *bf, size_t size) 133 { 134 struct record *rec = to; 135 136 rec->samples++; 137 return record__write(rec, bf, size); 138 } 139 140 static volatile int done; 141 static volatile int signr = -1; 142 static volatile int child_finished; 143 144 static void sig_handler(int sig) 145 { 146 if (sig == SIGCHLD) 147 child_finished = 1; 148 else 149 signr = sig; 150 151 done = 1; 152 } 153 154 static void sigsegv_handler(int sig) 155 { 156 perf_hooks__recover(); 157 sighandler_dump_stack(sig); 158 } 159 160 static void record__sig_exit(void) 161 { 162 if (signr == -1) 163 return; 164 165 signal(signr, SIG_DFL); 166 raise(signr); 167 } 168 169 #ifdef HAVE_AUXTRACE_SUPPORT 170 171 static int record__process_auxtrace(struct perf_tool *tool, 172 union perf_event *event, void *data1, 173 size_t len1, void *data2, size_t len2) 174 { 175 struct record *rec = container_of(tool, struct record, tool); 176 struct perf_data *data = &rec->data; 177 size_t padding; 178 u8 pad[8] = {0}; 179 180 if (!perf_data__is_pipe(data)) { 181 off_t file_offset; 182 int fd = perf_data__fd(data); 183 int err; 184 185 file_offset = lseek(fd, 0, SEEK_CUR); 186 if (file_offset == -1) 187 return -1; 188 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 189 event, file_offset); 190 if (err) 191 return err; 192 } 193 194 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 195 padding = (len1 + len2) & 7; 196 if (padding) 197 padding = 8 - padding; 198 199 record__write(rec, event, event->header.size); 200 record__write(rec, data1, len1); 201 if (len2) 202 record__write(rec, data2, len2); 203 record__write(rec, &pad, padding); 204 205 return 0; 206 } 207 208 static int record__auxtrace_mmap_read(struct record *rec, 209 struct auxtrace_mmap *mm) 210 { 211 int ret; 212 213 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 214 record__process_auxtrace); 215 if (ret < 0) 216 return ret; 217 218 if (ret) 219 rec->samples++; 220 221 return 0; 222 } 223 224 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 225 struct auxtrace_mmap *mm) 226 { 227 int ret; 228 229 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 230 record__process_auxtrace, 231 rec->opts.auxtrace_snapshot_size); 232 if (ret < 0) 233 return ret; 234 235 if (ret) 236 rec->samples++; 237 238 return 0; 239 } 240 241 static int record__auxtrace_read_snapshot_all(struct record *rec) 242 { 243 int i; 244 int rc = 0; 245 246 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 247 struct auxtrace_mmap *mm = 248 &rec->evlist->mmap[i].auxtrace_mmap; 249 250 if (!mm->base) 251 continue; 252 253 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 254 rc = -1; 255 goto out; 256 } 257 } 258 out: 259 return rc; 260 } 261 262 static void record__read_auxtrace_snapshot(struct record *rec) 263 { 264 pr_debug("Recording AUX area tracing snapshot\n"); 265 if (record__auxtrace_read_snapshot_all(rec) < 0) { 266 trigger_error(&auxtrace_snapshot_trigger); 267 } else { 268 if (auxtrace_record__snapshot_finish(rec->itr)) 269 trigger_error(&auxtrace_snapshot_trigger); 270 else 271 trigger_ready(&auxtrace_snapshot_trigger); 272 } 273 } 274 275 #else 276 277 static inline 278 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 279 struct auxtrace_mmap *mm __maybe_unused) 280 { 281 return 0; 282 } 283 284 static inline 285 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 286 { 287 } 288 289 static inline 290 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 291 { 292 return 0; 293 } 294 295 #endif 296 297 static int record__mmap_evlist(struct record *rec, 298 struct perf_evlist *evlist) 299 { 300 struct record_opts *opts = &rec->opts; 301 char msg[512]; 302 303 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 304 opts->auxtrace_mmap_pages, 305 opts->auxtrace_snapshot_mode) < 0) { 306 if (errno == EPERM) { 307 pr_err("Permission error mapping pages.\n" 308 "Consider increasing " 309 "/proc/sys/kernel/perf_event_mlock_kb,\n" 310 "or try again with a smaller value of -m/--mmap_pages.\n" 311 "(current value: %u,%u)\n", 312 opts->mmap_pages, opts->auxtrace_mmap_pages); 313 return -errno; 314 } else { 315 pr_err("failed to mmap with %d (%s)\n", errno, 316 str_error_r(errno, msg, sizeof(msg))); 317 if (errno) 318 return -errno; 319 else 320 return -EINVAL; 321 } 322 } 323 return 0; 324 } 325 326 static int record__mmap(struct record *rec) 327 { 328 return record__mmap_evlist(rec, rec->evlist); 329 } 330 331 static int record__open(struct record *rec) 332 { 333 char msg[BUFSIZ]; 334 struct perf_evsel *pos; 335 struct perf_evlist *evlist = rec->evlist; 336 struct perf_session *session = rec->session; 337 struct record_opts *opts = &rec->opts; 338 struct perf_evsel_config_term *err_term; 339 int rc = 0; 340 341 /* 342 * For initial_delay we need to add a dummy event so that we can track 343 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 344 * real events, the ones asked by the user. 345 */ 346 if (opts->initial_delay) { 347 if (perf_evlist__add_dummy(evlist)) 348 return -ENOMEM; 349 350 pos = perf_evlist__first(evlist); 351 pos->tracking = 0; 352 pos = perf_evlist__last(evlist); 353 pos->tracking = 1; 354 pos->attr.enable_on_exec = 1; 355 } 356 357 perf_evlist__config(evlist, opts, &callchain_param); 358 359 evlist__for_each_entry(evlist, pos) { 360 try_again: 361 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 362 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 363 if (verbose > 0) 364 ui__warning("%s\n", msg); 365 goto try_again; 366 } 367 368 rc = -errno; 369 perf_evsel__open_strerror(pos, &opts->target, 370 errno, msg, sizeof(msg)); 371 ui__error("%s\n", msg); 372 goto out; 373 } 374 375 pos->supported = true; 376 } 377 378 if (perf_evlist__apply_filters(evlist, &pos)) { 379 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 380 pos->filter, perf_evsel__name(pos), errno, 381 str_error_r(errno, msg, sizeof(msg))); 382 rc = -1; 383 goto out; 384 } 385 386 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 387 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 388 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 389 str_error_r(errno, msg, sizeof(msg))); 390 rc = -1; 391 goto out; 392 } 393 394 rc = record__mmap(rec); 395 if (rc) 396 goto out; 397 398 session->evlist = evlist; 399 perf_session__set_id_hdr_size(session); 400 out: 401 return rc; 402 } 403 404 static int process_sample_event(struct perf_tool *tool, 405 union perf_event *event, 406 struct perf_sample *sample, 407 struct perf_evsel *evsel, 408 struct machine *machine) 409 { 410 struct record *rec = container_of(tool, struct record, tool); 411 412 rec->samples++; 413 414 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 415 } 416 417 static int process_buildids(struct record *rec) 418 { 419 struct perf_data *data = &rec->data; 420 struct perf_session *session = rec->session; 421 422 if (data->size == 0) 423 return 0; 424 425 /* 426 * During this process, it'll load kernel map and replace the 427 * dso->long_name to a real pathname it found. In this case 428 * we prefer the vmlinux path like 429 * /lib/modules/3.16.4/build/vmlinux 430 * 431 * rather than build-id path (in debug directory). 432 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 433 */ 434 symbol_conf.ignore_vmlinux_buildid = true; 435 436 /* 437 * If --buildid-all is given, it marks all DSO regardless of hits, 438 * so no need to process samples. 439 */ 440 if (rec->buildid_all) 441 rec->tool.sample = NULL; 442 443 return perf_session__process_events(session); 444 } 445 446 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 447 { 448 int err; 449 struct perf_tool *tool = data; 450 /* 451 *As for guest kernel when processing subcommand record&report, 452 *we arrange module mmap prior to guest kernel mmap and trigger 453 *a preload dso because default guest module symbols are loaded 454 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 455 *method is used to avoid symbol missing when the first addr is 456 *in module instead of in guest kernel. 457 */ 458 err = perf_event__synthesize_modules(tool, process_synthesized_event, 459 machine); 460 if (err < 0) 461 pr_err("Couldn't record guest kernel [%d]'s reference" 462 " relocation symbol.\n", machine->pid); 463 464 /* 465 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 466 * have no _text sometimes. 467 */ 468 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 469 machine); 470 if (err < 0) 471 pr_err("Couldn't record guest kernel [%d]'s reference" 472 " relocation symbol.\n", machine->pid); 473 } 474 475 static struct perf_event_header finished_round_event = { 476 .size = sizeof(struct perf_event_header), 477 .type = PERF_RECORD_FINISHED_ROUND, 478 }; 479 480 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 481 bool overwrite) 482 { 483 u64 bytes_written = rec->bytes_written; 484 int i; 485 int rc = 0; 486 struct perf_mmap *maps; 487 488 if (!evlist) 489 return 0; 490 491 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 492 if (!maps) 493 return 0; 494 495 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 496 return 0; 497 498 for (i = 0; i < evlist->nr_mmaps; i++) { 499 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 500 501 if (maps[i].base) { 502 if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) { 503 rc = -1; 504 goto out; 505 } 506 } 507 508 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 509 record__auxtrace_mmap_read(rec, mm) != 0) { 510 rc = -1; 511 goto out; 512 } 513 } 514 515 /* 516 * Mark the round finished in case we wrote 517 * at least one event. 518 */ 519 if (bytes_written != rec->bytes_written) 520 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 521 522 if (overwrite) 523 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 524 out: 525 return rc; 526 } 527 528 static int record__mmap_read_all(struct record *rec) 529 { 530 int err; 531 532 err = record__mmap_read_evlist(rec, rec->evlist, false); 533 if (err) 534 return err; 535 536 return record__mmap_read_evlist(rec, rec->evlist, true); 537 } 538 539 static void record__init_features(struct record *rec) 540 { 541 struct perf_session *session = rec->session; 542 int feat; 543 544 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 545 perf_header__set_feat(&session->header, feat); 546 547 if (rec->no_buildid) 548 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 549 550 if (!have_tracepoints(&rec->evlist->entries)) 551 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 552 553 if (!rec->opts.branch_stack) 554 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 555 556 if (!rec->opts.full_auxtrace) 557 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 558 559 perf_header__clear_feat(&session->header, HEADER_STAT); 560 } 561 562 static void 563 record__finish_output(struct record *rec) 564 { 565 struct perf_data *data = &rec->data; 566 int fd = perf_data__fd(data); 567 568 if (data->is_pipe) 569 return; 570 571 rec->session->header.data_size += rec->bytes_written; 572 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 573 574 if (!rec->no_buildid) { 575 process_buildids(rec); 576 577 if (rec->buildid_all) 578 dsos__hit_all(rec->session); 579 } 580 perf_session__write_header(rec->session, rec->evlist, fd, true); 581 582 return; 583 } 584 585 static int record__synthesize_workload(struct record *rec, bool tail) 586 { 587 int err; 588 struct thread_map *thread_map; 589 590 if (rec->opts.tail_synthesize != tail) 591 return 0; 592 593 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 594 if (thread_map == NULL) 595 return -1; 596 597 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 598 process_synthesized_event, 599 &rec->session->machines.host, 600 rec->opts.sample_address, 601 rec->opts.proc_map_timeout); 602 thread_map__put(thread_map); 603 return err; 604 } 605 606 static int record__synthesize(struct record *rec, bool tail); 607 608 static int 609 record__switch_output(struct record *rec, bool at_exit) 610 { 611 struct perf_data *data = &rec->data; 612 int fd, err; 613 614 /* Same Size: "2015122520103046"*/ 615 char timestamp[] = "InvalidTimestamp"; 616 617 record__synthesize(rec, true); 618 if (target__none(&rec->opts.target)) 619 record__synthesize_workload(rec, true); 620 621 rec->samples = 0; 622 record__finish_output(rec); 623 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 624 if (err) { 625 pr_err("Failed to get current timestamp\n"); 626 return -EINVAL; 627 } 628 629 fd = perf_data__switch(data, timestamp, 630 rec->session->header.data_offset, 631 at_exit); 632 if (fd >= 0 && !at_exit) { 633 rec->bytes_written = 0; 634 rec->session->header.data_size = 0; 635 } 636 637 if (!quiet) 638 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 639 data->file.path, timestamp); 640 641 /* Output tracking events */ 642 if (!at_exit) { 643 record__synthesize(rec, false); 644 645 /* 646 * In 'perf record --switch-output' without -a, 647 * record__synthesize() in record__switch_output() won't 648 * generate tracking events because there's no thread_map 649 * in evlist. Which causes newly created perf.data doesn't 650 * contain map and comm information. 651 * Create a fake thread_map and directly call 652 * perf_event__synthesize_thread_map() for those events. 653 */ 654 if (target__none(&rec->opts.target)) 655 record__synthesize_workload(rec, false); 656 } 657 return fd; 658 } 659 660 static volatile int workload_exec_errno; 661 662 /* 663 * perf_evlist__prepare_workload will send a SIGUSR1 664 * if the fork fails, since we asked by setting its 665 * want_signal to true. 666 */ 667 static void workload_exec_failed_signal(int signo __maybe_unused, 668 siginfo_t *info, 669 void *ucontext __maybe_unused) 670 { 671 workload_exec_errno = info->si_value.sival_int; 672 done = 1; 673 child_finished = 1; 674 } 675 676 static void snapshot_sig_handler(int sig); 677 static void alarm_sig_handler(int sig); 678 679 int __weak 680 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 681 struct perf_tool *tool __maybe_unused, 682 perf_event__handler_t process __maybe_unused, 683 struct machine *machine __maybe_unused) 684 { 685 return 0; 686 } 687 688 static const struct perf_event_mmap_page * 689 perf_evlist__pick_pc(struct perf_evlist *evlist) 690 { 691 if (evlist) { 692 if (evlist->mmap && evlist->mmap[0].base) 693 return evlist->mmap[0].base; 694 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 695 return evlist->overwrite_mmap[0].base; 696 } 697 return NULL; 698 } 699 700 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 701 { 702 const struct perf_event_mmap_page *pc; 703 704 pc = perf_evlist__pick_pc(rec->evlist); 705 if (pc) 706 return pc; 707 return NULL; 708 } 709 710 static int record__synthesize(struct record *rec, bool tail) 711 { 712 struct perf_session *session = rec->session; 713 struct machine *machine = &session->machines.host; 714 struct perf_data *data = &rec->data; 715 struct record_opts *opts = &rec->opts; 716 struct perf_tool *tool = &rec->tool; 717 int fd = perf_data__fd(data); 718 int err = 0; 719 720 if (rec->opts.tail_synthesize != tail) 721 return 0; 722 723 if (data->is_pipe) { 724 err = perf_event__synthesize_features( 725 tool, session, rec->evlist, process_synthesized_event); 726 if (err < 0) { 727 pr_err("Couldn't synthesize features.\n"); 728 return err; 729 } 730 731 err = perf_event__synthesize_attrs(tool, session, 732 process_synthesized_event); 733 if (err < 0) { 734 pr_err("Couldn't synthesize attrs.\n"); 735 goto out; 736 } 737 738 if (have_tracepoints(&rec->evlist->entries)) { 739 /* 740 * FIXME err <= 0 here actually means that 741 * there were no tracepoints so its not really 742 * an error, just that we don't need to 743 * synthesize anything. We really have to 744 * return this more properly and also 745 * propagate errors that now are calling die() 746 */ 747 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 748 process_synthesized_event); 749 if (err <= 0) { 750 pr_err("Couldn't record tracing data.\n"); 751 goto out; 752 } 753 rec->bytes_written += err; 754 } 755 } 756 757 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 758 process_synthesized_event, machine); 759 if (err) 760 goto out; 761 762 if (rec->opts.full_auxtrace) { 763 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 764 session, process_synthesized_event); 765 if (err) 766 goto out; 767 } 768 769 if (!perf_evlist__exclude_kernel(rec->evlist)) { 770 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 771 machine); 772 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 773 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 774 "Check /proc/kallsyms permission or run as root.\n"); 775 776 err = perf_event__synthesize_modules(tool, process_synthesized_event, 777 machine); 778 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 779 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 780 "Check /proc/modules permission or run as root.\n"); 781 } 782 783 if (perf_guest) { 784 machines__process_guests(&session->machines, 785 perf_event__synthesize_guest_os, tool); 786 } 787 788 err = perf_event__synthesize_extra_attr(&rec->tool, 789 rec->evlist, 790 process_synthesized_event, 791 data->is_pipe); 792 if (err) 793 goto out; 794 795 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 796 process_synthesized_event, 797 NULL); 798 if (err < 0) { 799 pr_err("Couldn't synthesize thread map.\n"); 800 return err; 801 } 802 803 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 804 process_synthesized_event, NULL); 805 if (err < 0) { 806 pr_err("Couldn't synthesize cpu map.\n"); 807 return err; 808 } 809 810 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 811 process_synthesized_event, opts->sample_address, 812 opts->proc_map_timeout, 1); 813 out: 814 return err; 815 } 816 817 static int __cmd_record(struct record *rec, int argc, const char **argv) 818 { 819 int err; 820 int status = 0; 821 unsigned long waking = 0; 822 const bool forks = argc > 0; 823 struct machine *machine; 824 struct perf_tool *tool = &rec->tool; 825 struct record_opts *opts = &rec->opts; 826 struct perf_data *data = &rec->data; 827 struct perf_session *session; 828 bool disabled = false, draining = false; 829 int fd; 830 831 rec->progname = argv[0]; 832 833 atexit(record__sig_exit); 834 signal(SIGCHLD, sig_handler); 835 signal(SIGINT, sig_handler); 836 signal(SIGTERM, sig_handler); 837 signal(SIGSEGV, sigsegv_handler); 838 839 if (rec->opts.record_namespaces) 840 tool->namespace_events = true; 841 842 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 843 signal(SIGUSR2, snapshot_sig_handler); 844 if (rec->opts.auxtrace_snapshot_mode) 845 trigger_on(&auxtrace_snapshot_trigger); 846 if (rec->switch_output.enabled) 847 trigger_on(&switch_output_trigger); 848 } else { 849 signal(SIGUSR2, SIG_IGN); 850 } 851 852 session = perf_session__new(data, false, tool); 853 if (session == NULL) { 854 pr_err("Perf session creation failed.\n"); 855 return -1; 856 } 857 858 fd = perf_data__fd(data); 859 rec->session = session; 860 861 record__init_features(rec); 862 863 if (forks) { 864 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 865 argv, data->is_pipe, 866 workload_exec_failed_signal); 867 if (err < 0) { 868 pr_err("Couldn't run the workload!\n"); 869 status = err; 870 goto out_delete_session; 871 } 872 } 873 874 if (record__open(rec) != 0) { 875 err = -1; 876 goto out_child; 877 } 878 879 err = bpf__apply_obj_config(); 880 if (err) { 881 char errbuf[BUFSIZ]; 882 883 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 884 pr_err("ERROR: Apply config to BPF failed: %s\n", 885 errbuf); 886 goto out_child; 887 } 888 889 /* 890 * Normally perf_session__new would do this, but it doesn't have the 891 * evlist. 892 */ 893 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 894 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 895 rec->tool.ordered_events = false; 896 } 897 898 if (!rec->evlist->nr_groups) 899 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 900 901 if (data->is_pipe) { 902 err = perf_header__write_pipe(fd); 903 if (err < 0) 904 goto out_child; 905 } else { 906 err = perf_session__write_header(session, rec->evlist, fd, false); 907 if (err < 0) 908 goto out_child; 909 } 910 911 if (!rec->no_buildid 912 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 913 pr_err("Couldn't generate buildids. " 914 "Use --no-buildid to profile anyway.\n"); 915 err = -1; 916 goto out_child; 917 } 918 919 machine = &session->machines.host; 920 921 err = record__synthesize(rec, false); 922 if (err < 0) 923 goto out_child; 924 925 if (rec->realtime_prio) { 926 struct sched_param param; 927 928 param.sched_priority = rec->realtime_prio; 929 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 930 pr_err("Could not set realtime priority.\n"); 931 err = -1; 932 goto out_child; 933 } 934 } 935 936 /* 937 * When perf is starting the traced process, all the events 938 * (apart from group members) have enable_on_exec=1 set, 939 * so don't spoil it by prematurely enabling them. 940 */ 941 if (!target__none(&opts->target) && !opts->initial_delay) 942 perf_evlist__enable(rec->evlist); 943 944 /* 945 * Let the child rip 946 */ 947 if (forks) { 948 union perf_event *event; 949 pid_t tgid; 950 951 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 952 if (event == NULL) { 953 err = -ENOMEM; 954 goto out_child; 955 } 956 957 /* 958 * Some H/W events are generated before COMM event 959 * which is emitted during exec(), so perf script 960 * cannot see a correct process name for those events. 961 * Synthesize COMM event to prevent it. 962 */ 963 tgid = perf_event__synthesize_comm(tool, event, 964 rec->evlist->workload.pid, 965 process_synthesized_event, 966 machine); 967 free(event); 968 969 if (tgid == -1) 970 goto out_child; 971 972 event = malloc(sizeof(event->namespaces) + 973 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 974 machine->id_hdr_size); 975 if (event == NULL) { 976 err = -ENOMEM; 977 goto out_child; 978 } 979 980 /* 981 * Synthesize NAMESPACES event for the command specified. 982 */ 983 perf_event__synthesize_namespaces(tool, event, 984 rec->evlist->workload.pid, 985 tgid, process_synthesized_event, 986 machine); 987 free(event); 988 989 perf_evlist__start_workload(rec->evlist); 990 } 991 992 if (opts->initial_delay) { 993 usleep(opts->initial_delay * USEC_PER_MSEC); 994 perf_evlist__enable(rec->evlist); 995 } 996 997 trigger_ready(&auxtrace_snapshot_trigger); 998 trigger_ready(&switch_output_trigger); 999 perf_hooks__invoke_record_start(); 1000 for (;;) { 1001 unsigned long long hits = rec->samples; 1002 1003 /* 1004 * rec->evlist->bkw_mmap_state is possible to be 1005 * BKW_MMAP_EMPTY here: when done == true and 1006 * hits != rec->samples in previous round. 1007 * 1008 * perf_evlist__toggle_bkw_mmap ensure we never 1009 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1010 */ 1011 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1012 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1013 1014 if (record__mmap_read_all(rec) < 0) { 1015 trigger_error(&auxtrace_snapshot_trigger); 1016 trigger_error(&switch_output_trigger); 1017 err = -1; 1018 goto out_child; 1019 } 1020 1021 if (auxtrace_record__snapshot_started) { 1022 auxtrace_record__snapshot_started = 0; 1023 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1024 record__read_auxtrace_snapshot(rec); 1025 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1026 pr_err("AUX area tracing snapshot failed\n"); 1027 err = -1; 1028 goto out_child; 1029 } 1030 } 1031 1032 if (trigger_is_hit(&switch_output_trigger)) { 1033 /* 1034 * If switch_output_trigger is hit, the data in 1035 * overwritable ring buffer should have been collected, 1036 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1037 * 1038 * If SIGUSR2 raise after or during record__mmap_read_all(), 1039 * record__mmap_read_all() didn't collect data from 1040 * overwritable ring buffer. Read again. 1041 */ 1042 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1043 continue; 1044 trigger_ready(&switch_output_trigger); 1045 1046 /* 1047 * Reenable events in overwrite ring buffer after 1048 * record__mmap_read_all(): we should have collected 1049 * data from it. 1050 */ 1051 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1052 1053 if (!quiet) 1054 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1055 waking); 1056 waking = 0; 1057 fd = record__switch_output(rec, false); 1058 if (fd < 0) { 1059 pr_err("Failed to switch to new file\n"); 1060 trigger_error(&switch_output_trigger); 1061 err = fd; 1062 goto out_child; 1063 } 1064 1065 /* re-arm the alarm */ 1066 if (rec->switch_output.time) 1067 alarm(rec->switch_output.time); 1068 } 1069 1070 if (hits == rec->samples) { 1071 if (done || draining) 1072 break; 1073 err = perf_evlist__poll(rec->evlist, -1); 1074 /* 1075 * Propagate error, only if there's any. Ignore positive 1076 * number of returned events and interrupt error. 1077 */ 1078 if (err > 0 || (err < 0 && errno == EINTR)) 1079 err = 0; 1080 waking++; 1081 1082 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1083 draining = true; 1084 } 1085 1086 /* 1087 * When perf is starting the traced process, at the end events 1088 * die with the process and we wait for that. Thus no need to 1089 * disable events in this case. 1090 */ 1091 if (done && !disabled && !target__none(&opts->target)) { 1092 trigger_off(&auxtrace_snapshot_trigger); 1093 perf_evlist__disable(rec->evlist); 1094 disabled = true; 1095 } 1096 } 1097 trigger_off(&auxtrace_snapshot_trigger); 1098 trigger_off(&switch_output_trigger); 1099 1100 if (forks && workload_exec_errno) { 1101 char msg[STRERR_BUFSIZE]; 1102 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1103 pr_err("Workload failed: %s\n", emsg); 1104 err = -1; 1105 goto out_child; 1106 } 1107 1108 if (!quiet) 1109 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1110 1111 if (target__none(&rec->opts.target)) 1112 record__synthesize_workload(rec, true); 1113 1114 out_child: 1115 if (forks) { 1116 int exit_status; 1117 1118 if (!child_finished) 1119 kill(rec->evlist->workload.pid, SIGTERM); 1120 1121 wait(&exit_status); 1122 1123 if (err < 0) 1124 status = err; 1125 else if (WIFEXITED(exit_status)) 1126 status = WEXITSTATUS(exit_status); 1127 else if (WIFSIGNALED(exit_status)) 1128 signr = WTERMSIG(exit_status); 1129 } else 1130 status = err; 1131 1132 record__synthesize(rec, true); 1133 /* this will be recalculated during process_buildids() */ 1134 rec->samples = 0; 1135 1136 if (!err) { 1137 if (!rec->timestamp_filename) { 1138 record__finish_output(rec); 1139 } else { 1140 fd = record__switch_output(rec, true); 1141 if (fd < 0) { 1142 status = fd; 1143 goto out_delete_session; 1144 } 1145 } 1146 } 1147 1148 perf_hooks__invoke_record_end(); 1149 1150 if (!err && !quiet) { 1151 char samples[128]; 1152 const char *postfix = rec->timestamp_filename ? 1153 ".<timestamp>" : ""; 1154 1155 if (rec->samples && !rec->opts.full_auxtrace) 1156 scnprintf(samples, sizeof(samples), 1157 " (%" PRIu64 " samples)", rec->samples); 1158 else 1159 samples[0] = '\0'; 1160 1161 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1162 perf_data__size(data) / 1024.0 / 1024.0, 1163 data->file.path, postfix, samples); 1164 } 1165 1166 out_delete_session: 1167 perf_session__delete(session); 1168 return status; 1169 } 1170 1171 static void callchain_debug(struct callchain_param *callchain) 1172 { 1173 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1174 1175 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1176 1177 if (callchain->record_mode == CALLCHAIN_DWARF) 1178 pr_debug("callchain: stack dump size %d\n", 1179 callchain->dump_size); 1180 } 1181 1182 int record_opts__parse_callchain(struct record_opts *record, 1183 struct callchain_param *callchain, 1184 const char *arg, bool unset) 1185 { 1186 int ret; 1187 callchain->enabled = !unset; 1188 1189 /* --no-call-graph */ 1190 if (unset) { 1191 callchain->record_mode = CALLCHAIN_NONE; 1192 pr_debug("callchain: disabled\n"); 1193 return 0; 1194 } 1195 1196 ret = parse_callchain_record_opt(arg, callchain); 1197 if (!ret) { 1198 /* Enable data address sampling for DWARF unwind. */ 1199 if (callchain->record_mode == CALLCHAIN_DWARF) 1200 record->sample_address = true; 1201 callchain_debug(callchain); 1202 } 1203 1204 return ret; 1205 } 1206 1207 int record_parse_callchain_opt(const struct option *opt, 1208 const char *arg, 1209 int unset) 1210 { 1211 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1212 } 1213 1214 int record_callchain_opt(const struct option *opt, 1215 const char *arg __maybe_unused, 1216 int unset __maybe_unused) 1217 { 1218 struct callchain_param *callchain = opt->value; 1219 1220 callchain->enabled = true; 1221 1222 if (callchain->record_mode == CALLCHAIN_NONE) 1223 callchain->record_mode = CALLCHAIN_FP; 1224 1225 callchain_debug(callchain); 1226 return 0; 1227 } 1228 1229 static int perf_record_config(const char *var, const char *value, void *cb) 1230 { 1231 struct record *rec = cb; 1232 1233 if (!strcmp(var, "record.build-id")) { 1234 if (!strcmp(value, "cache")) 1235 rec->no_buildid_cache = false; 1236 else if (!strcmp(value, "no-cache")) 1237 rec->no_buildid_cache = true; 1238 else if (!strcmp(value, "skip")) 1239 rec->no_buildid = true; 1240 else 1241 return -1; 1242 return 0; 1243 } 1244 if (!strcmp(var, "record.call-graph")) 1245 var = "call-graph.record-mode"; /* fall-through */ 1246 1247 return perf_default_config(var, value, cb); 1248 } 1249 1250 struct clockid_map { 1251 const char *name; 1252 int clockid; 1253 }; 1254 1255 #define CLOCKID_MAP(n, c) \ 1256 { .name = n, .clockid = (c), } 1257 1258 #define CLOCKID_END { .name = NULL, } 1259 1260 1261 /* 1262 * Add the missing ones, we need to build on many distros... 1263 */ 1264 #ifndef CLOCK_MONOTONIC_RAW 1265 #define CLOCK_MONOTONIC_RAW 4 1266 #endif 1267 #ifndef CLOCK_BOOTTIME 1268 #define CLOCK_BOOTTIME 7 1269 #endif 1270 #ifndef CLOCK_TAI 1271 #define CLOCK_TAI 11 1272 #endif 1273 1274 static const struct clockid_map clockids[] = { 1275 /* available for all events, NMI safe */ 1276 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1277 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1278 1279 /* available for some events */ 1280 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1281 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1282 CLOCKID_MAP("tai", CLOCK_TAI), 1283 1284 /* available for the lazy */ 1285 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1286 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1287 CLOCKID_MAP("real", CLOCK_REALTIME), 1288 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1289 1290 CLOCKID_END, 1291 }; 1292 1293 static int parse_clockid(const struct option *opt, const char *str, int unset) 1294 { 1295 struct record_opts *opts = (struct record_opts *)opt->value; 1296 const struct clockid_map *cm; 1297 const char *ostr = str; 1298 1299 if (unset) { 1300 opts->use_clockid = 0; 1301 return 0; 1302 } 1303 1304 /* no arg passed */ 1305 if (!str) 1306 return 0; 1307 1308 /* no setting it twice */ 1309 if (opts->use_clockid) 1310 return -1; 1311 1312 opts->use_clockid = true; 1313 1314 /* if its a number, we're done */ 1315 if (sscanf(str, "%d", &opts->clockid) == 1) 1316 return 0; 1317 1318 /* allow a "CLOCK_" prefix to the name */ 1319 if (!strncasecmp(str, "CLOCK_", 6)) 1320 str += 6; 1321 1322 for (cm = clockids; cm->name; cm++) { 1323 if (!strcasecmp(str, cm->name)) { 1324 opts->clockid = cm->clockid; 1325 return 0; 1326 } 1327 } 1328 1329 opts->use_clockid = false; 1330 ui__warning("unknown clockid %s, check man page\n", ostr); 1331 return -1; 1332 } 1333 1334 static int record__parse_mmap_pages(const struct option *opt, 1335 const char *str, 1336 int unset __maybe_unused) 1337 { 1338 struct record_opts *opts = opt->value; 1339 char *s, *p; 1340 unsigned int mmap_pages; 1341 int ret; 1342 1343 if (!str) 1344 return -EINVAL; 1345 1346 s = strdup(str); 1347 if (!s) 1348 return -ENOMEM; 1349 1350 p = strchr(s, ','); 1351 if (p) 1352 *p = '\0'; 1353 1354 if (*s) { 1355 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1356 if (ret) 1357 goto out_free; 1358 opts->mmap_pages = mmap_pages; 1359 } 1360 1361 if (!p) { 1362 ret = 0; 1363 goto out_free; 1364 } 1365 1366 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1367 if (ret) 1368 goto out_free; 1369 1370 opts->auxtrace_mmap_pages = mmap_pages; 1371 1372 out_free: 1373 free(s); 1374 return ret; 1375 } 1376 1377 static void switch_output_size_warn(struct record *rec) 1378 { 1379 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1380 struct switch_output *s = &rec->switch_output; 1381 1382 wakeup_size /= 2; 1383 1384 if (s->size < wakeup_size) { 1385 char buf[100]; 1386 1387 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1388 pr_warning("WARNING: switch-output data size lower than " 1389 "wakeup kernel buffer size (%s) " 1390 "expect bigger perf.data sizes\n", buf); 1391 } 1392 } 1393 1394 static int switch_output_setup(struct record *rec) 1395 { 1396 struct switch_output *s = &rec->switch_output; 1397 static struct parse_tag tags_size[] = { 1398 { .tag = 'B', .mult = 1 }, 1399 { .tag = 'K', .mult = 1 << 10 }, 1400 { .tag = 'M', .mult = 1 << 20 }, 1401 { .tag = 'G', .mult = 1 << 30 }, 1402 { .tag = 0 }, 1403 }; 1404 static struct parse_tag tags_time[] = { 1405 { .tag = 's', .mult = 1 }, 1406 { .tag = 'm', .mult = 60 }, 1407 { .tag = 'h', .mult = 60*60 }, 1408 { .tag = 'd', .mult = 60*60*24 }, 1409 { .tag = 0 }, 1410 }; 1411 unsigned long val; 1412 1413 if (!s->set) 1414 return 0; 1415 1416 if (!strcmp(s->str, "signal")) { 1417 s->signal = true; 1418 pr_debug("switch-output with SIGUSR2 signal\n"); 1419 goto enabled; 1420 } 1421 1422 val = parse_tag_value(s->str, tags_size); 1423 if (val != (unsigned long) -1) { 1424 s->size = val; 1425 pr_debug("switch-output with %s size threshold\n", s->str); 1426 goto enabled; 1427 } 1428 1429 val = parse_tag_value(s->str, tags_time); 1430 if (val != (unsigned long) -1) { 1431 s->time = val; 1432 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1433 s->str, s->time); 1434 goto enabled; 1435 } 1436 1437 return -1; 1438 1439 enabled: 1440 rec->timestamp_filename = true; 1441 s->enabled = true; 1442 1443 if (s->size && !rec->opts.no_buffering) 1444 switch_output_size_warn(rec); 1445 1446 return 0; 1447 } 1448 1449 static const char * const __record_usage[] = { 1450 "perf record [<options>] [<command>]", 1451 "perf record [<options>] -- <command> [<options>]", 1452 NULL 1453 }; 1454 const char * const *record_usage = __record_usage; 1455 1456 /* 1457 * XXX Ideally would be local to cmd_record() and passed to a record__new 1458 * because we need to have access to it in record__exit, that is called 1459 * after cmd_record() exits, but since record_options need to be accessible to 1460 * builtin-script, leave it here. 1461 * 1462 * At least we don't ouch it in all the other functions here directly. 1463 * 1464 * Just say no to tons of global variables, sigh. 1465 */ 1466 static struct record record = { 1467 .opts = { 1468 .sample_time = true, 1469 .mmap_pages = UINT_MAX, 1470 .user_freq = UINT_MAX, 1471 .user_interval = ULLONG_MAX, 1472 .freq = 4000, 1473 .target = { 1474 .uses_mmap = true, 1475 .default_per_cpu = true, 1476 }, 1477 .proc_map_timeout = 500, 1478 }, 1479 .tool = { 1480 .sample = process_sample_event, 1481 .fork = perf_event__process_fork, 1482 .exit = perf_event__process_exit, 1483 .comm = perf_event__process_comm, 1484 .namespaces = perf_event__process_namespaces, 1485 .mmap = perf_event__process_mmap, 1486 .mmap2 = perf_event__process_mmap2, 1487 .ordered_events = true, 1488 }, 1489 }; 1490 1491 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1492 "\n\t\t\t\tDefault: fp"; 1493 1494 static bool dry_run; 1495 1496 /* 1497 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1498 * with it and switch to use the library functions in perf_evlist that came 1499 * from builtin-record.c, i.e. use record_opts, 1500 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1501 * using pipes, etc. 1502 */ 1503 static struct option __record_options[] = { 1504 OPT_CALLBACK('e', "event", &record.evlist, "event", 1505 "event selector. use 'perf list' to list available events", 1506 parse_events_option), 1507 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1508 "event filter", parse_filter), 1509 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1510 NULL, "don't record events from perf itself", 1511 exclude_perf), 1512 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1513 "record events on existing process id"), 1514 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1515 "record events on existing thread id"), 1516 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1517 "collect data with this RT SCHED_FIFO priority"), 1518 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1519 "collect data without buffering"), 1520 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1521 "collect raw sample records from all opened counters"), 1522 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1523 "system-wide collection from all CPUs"), 1524 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1525 "list of cpus to monitor"), 1526 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1527 OPT_STRING('o', "output", &record.data.file.path, "file", 1528 "output file name"), 1529 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1530 &record.opts.no_inherit_set, 1531 "child tasks do not inherit counters"), 1532 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1533 "synthesize non-sample events at the end of output"), 1534 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1535 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1536 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1537 "number of mmap data pages and AUX area tracing mmap pages", 1538 record__parse_mmap_pages), 1539 OPT_BOOLEAN(0, "group", &record.opts.group, 1540 "put the counters into a counter group"), 1541 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1542 NULL, "enables call-graph recording" , 1543 &record_callchain_opt), 1544 OPT_CALLBACK(0, "call-graph", &record.opts, 1545 "record_mode[,record_size]", record_callchain_help, 1546 &record_parse_callchain_opt), 1547 OPT_INCR('v', "verbose", &verbose, 1548 "be more verbose (show counter open errors, etc)"), 1549 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1550 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1551 "per thread counts"), 1552 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1553 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1554 "Record the sample physical addresses"), 1555 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1556 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1557 &record.opts.sample_time_set, 1558 "Record the sample timestamps"), 1559 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1560 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1561 "don't sample"), 1562 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1563 &record.no_buildid_cache_set, 1564 "do not update the buildid cache"), 1565 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1566 &record.no_buildid_set, 1567 "do not collect buildids in perf.data"), 1568 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1569 "monitor event in cgroup name only", 1570 parse_cgroups), 1571 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1572 "ms to wait before starting measurement after program start"), 1573 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1574 "user to profile"), 1575 1576 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1577 "branch any", "sample any taken branches", 1578 parse_branch_stack), 1579 1580 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1581 "branch filter mask", "branch stack filter modes", 1582 parse_branch_stack), 1583 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1584 "sample by weight (on special events only)"), 1585 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1586 "sample transaction flags (special events only)"), 1587 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1588 "use per-thread mmaps"), 1589 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1590 "sample selected machine registers on interrupt," 1591 " use -I ? to list register names", parse_regs), 1592 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 1593 "sample selected machine registers on interrupt," 1594 " use -I ? to list register names", parse_regs), 1595 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1596 "Record running/enabled time of read (:S) events"), 1597 OPT_CALLBACK('k', "clockid", &record.opts, 1598 "clockid", "clockid to use for events, see clock_gettime()", 1599 parse_clockid), 1600 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1601 "opts", "AUX area tracing Snapshot Mode", ""), 1602 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1603 "per thread proc mmap processing timeout in ms"), 1604 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1605 "Record namespaces events"), 1606 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1607 "Record context switch events"), 1608 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1609 "Configure all used events to run in kernel space.", 1610 PARSE_OPT_EXCLUSIVE), 1611 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1612 "Configure all used events to run in user space.", 1613 PARSE_OPT_EXCLUSIVE), 1614 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1615 "clang binary to use for compiling BPF scriptlets"), 1616 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1617 "options passed to clang when compiling BPF scriptlets"), 1618 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1619 "file", "vmlinux pathname"), 1620 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1621 "Record build-id of all DSOs regardless of hits"), 1622 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1623 "append timestamp to output filename"), 1624 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1625 &record.switch_output.set, "signal,size,time", 1626 "Switch output when receive SIGUSR2 or cross size,time threshold", 1627 "signal"), 1628 OPT_BOOLEAN(0, "dry-run", &dry_run, 1629 "Parse options then exit"), 1630 OPT_END() 1631 }; 1632 1633 struct option *record_options = __record_options; 1634 1635 int cmd_record(int argc, const char **argv) 1636 { 1637 int err; 1638 struct record *rec = &record; 1639 char errbuf[BUFSIZ]; 1640 1641 #ifndef HAVE_LIBBPF_SUPPORT 1642 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1643 set_nobuild('\0', "clang-path", true); 1644 set_nobuild('\0', "clang-opt", true); 1645 # undef set_nobuild 1646 #endif 1647 1648 #ifndef HAVE_BPF_PROLOGUE 1649 # if !defined (HAVE_DWARF_SUPPORT) 1650 # define REASON "NO_DWARF=1" 1651 # elif !defined (HAVE_LIBBPF_SUPPORT) 1652 # define REASON "NO_LIBBPF=1" 1653 # else 1654 # define REASON "this architecture doesn't support BPF prologue" 1655 # endif 1656 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1657 set_nobuild('\0', "vmlinux", true); 1658 # undef set_nobuild 1659 # undef REASON 1660 #endif 1661 1662 rec->evlist = perf_evlist__new(); 1663 if (rec->evlist == NULL) 1664 return -ENOMEM; 1665 1666 err = perf_config(perf_record_config, rec); 1667 if (err) 1668 return err; 1669 1670 argc = parse_options(argc, argv, record_options, record_usage, 1671 PARSE_OPT_STOP_AT_NON_OPTION); 1672 if (quiet) 1673 perf_quiet_option(); 1674 1675 /* Make system wide (-a) the default target. */ 1676 if (!argc && target__none(&rec->opts.target)) 1677 rec->opts.target.system_wide = true; 1678 1679 if (nr_cgroups && !rec->opts.target.system_wide) { 1680 usage_with_options_msg(record_usage, record_options, 1681 "cgroup monitoring only available in system-wide mode"); 1682 1683 } 1684 if (rec->opts.record_switch_events && 1685 !perf_can_record_switch_events()) { 1686 ui__error("kernel does not support recording context switch events\n"); 1687 parse_options_usage(record_usage, record_options, "switch-events", 0); 1688 return -EINVAL; 1689 } 1690 1691 if (switch_output_setup(rec)) { 1692 parse_options_usage(record_usage, record_options, "switch-output", 0); 1693 return -EINVAL; 1694 } 1695 1696 if (rec->switch_output.time) { 1697 signal(SIGALRM, alarm_sig_handler); 1698 alarm(rec->switch_output.time); 1699 } 1700 1701 if (!rec->itr) { 1702 rec->itr = auxtrace_record__init(rec->evlist, &err); 1703 if (err) 1704 goto out; 1705 } 1706 1707 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 1708 rec->opts.auxtrace_snapshot_opts); 1709 if (err) 1710 goto out; 1711 1712 /* 1713 * Allow aliases to facilitate the lookup of symbols for address 1714 * filters. Refer to auxtrace_parse_filters(). 1715 */ 1716 symbol_conf.allow_aliases = true; 1717 1718 symbol__init(NULL); 1719 1720 err = auxtrace_parse_filters(rec->evlist); 1721 if (err) 1722 goto out; 1723 1724 if (dry_run) 1725 goto out; 1726 1727 err = bpf__setup_stdout(rec->evlist); 1728 if (err) { 1729 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1730 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1731 errbuf); 1732 goto out; 1733 } 1734 1735 err = -ENOMEM; 1736 1737 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 1738 pr_warning( 1739 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1740 "check /proc/sys/kernel/kptr_restrict.\n\n" 1741 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1742 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1743 "Samples in kernel modules won't be resolved at all.\n\n" 1744 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1745 "even with a suitable vmlinux or kallsyms file.\n\n"); 1746 1747 if (rec->no_buildid_cache || rec->no_buildid) { 1748 disable_buildid_cache(); 1749 } else if (rec->switch_output.enabled) { 1750 /* 1751 * In 'perf record --switch-output', disable buildid 1752 * generation by default to reduce data file switching 1753 * overhead. Still generate buildid if they are required 1754 * explicitly using 1755 * 1756 * perf record --switch-output --no-no-buildid \ 1757 * --no-no-buildid-cache 1758 * 1759 * Following code equals to: 1760 * 1761 * if ((rec->no_buildid || !rec->no_buildid_set) && 1762 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1763 * disable_buildid_cache(); 1764 */ 1765 bool disable = true; 1766 1767 if (rec->no_buildid_set && !rec->no_buildid) 1768 disable = false; 1769 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1770 disable = false; 1771 if (disable) { 1772 rec->no_buildid = true; 1773 rec->no_buildid_cache = true; 1774 disable_buildid_cache(); 1775 } 1776 } 1777 1778 if (record.opts.overwrite) 1779 record.opts.tail_synthesize = true; 1780 1781 if (rec->evlist->nr_entries == 0 && 1782 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1783 pr_err("Not enough memory for event selector list\n"); 1784 goto out; 1785 } 1786 1787 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1788 rec->opts.no_inherit = true; 1789 1790 err = target__validate(&rec->opts.target); 1791 if (err) { 1792 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1793 ui__warning("%s", errbuf); 1794 } 1795 1796 err = target__parse_uid(&rec->opts.target); 1797 if (err) { 1798 int saved_errno = errno; 1799 1800 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1801 ui__error("%s", errbuf); 1802 1803 err = -saved_errno; 1804 goto out; 1805 } 1806 1807 /* Enable ignoring missing threads when -u/-p option is defined. */ 1808 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 1809 1810 err = -ENOMEM; 1811 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1812 usage_with_options(record_usage, record_options); 1813 1814 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1815 if (err) 1816 goto out; 1817 1818 /* 1819 * We take all buildids when the file contains 1820 * AUX area tracing data because we do not decode the 1821 * trace because it would take too long. 1822 */ 1823 if (rec->opts.full_auxtrace) 1824 rec->buildid_all = true; 1825 1826 if (record_opts__config(&rec->opts)) { 1827 err = -EINVAL; 1828 goto out; 1829 } 1830 1831 err = __cmd_record(&record, argc, argv); 1832 out: 1833 perf_evlist__delete(rec->evlist); 1834 symbol__exit(); 1835 auxtrace_record__free(rec->itr); 1836 return err; 1837 } 1838 1839 static void snapshot_sig_handler(int sig __maybe_unused) 1840 { 1841 struct record *rec = &record; 1842 1843 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1844 trigger_hit(&auxtrace_snapshot_trigger); 1845 auxtrace_record__snapshot_started = 1; 1846 if (auxtrace_record__snapshot_start(record.itr)) 1847 trigger_error(&auxtrace_snapshot_trigger); 1848 } 1849 1850 if (switch_output_signal(rec)) 1851 trigger_hit(&switch_output_trigger); 1852 } 1853 1854 static void alarm_sig_handler(int sig __maybe_unused) 1855 { 1856 struct record *rec = &record; 1857 1858 if (switch_output_time(rec)) 1859 trigger_hit(&switch_output_trigger); 1860 } 1861