1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include <subcmd/parse-options.h> 15 #include "util/parse-events.h" 16 #include "util/config.h" 17 18 #include "util/callchain.h" 19 #include "util/cgroup.h" 20 #include "util/header.h" 21 #include "util/event.h" 22 #include "util/evlist.h" 23 #include "util/evsel.h" 24 #include "util/debug.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/cpumap.h" 29 #include "util/thread_map.h" 30 #include "util/data.h" 31 #include "util/perf_regs.h" 32 #include "util/auxtrace.h" 33 #include "util/tsc.h" 34 #include "util/parse-branch-options.h" 35 #include "util/parse-regs-options.h" 36 #include "util/llvm-utils.h" 37 #include "util/bpf-loader.h" 38 #include "util/trigger.h" 39 #include "util/perf-hooks.h" 40 #include "util/cpu-set-sched.h" 41 #include "util/time-utils.h" 42 #include "util/units.h" 43 #include "util/bpf-event.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <locale.h> 49 #include <poll.h> 50 #include <unistd.h> 51 #include <sched.h> 52 #include <signal.h> 53 #include <sys/mman.h> 54 #include <sys/wait.h> 55 #include <linux/time64.h> 56 #include <linux/zalloc.h> 57 58 struct switch_output { 59 bool enabled; 60 bool signal; 61 unsigned long size; 62 unsigned long time; 63 const char *str; 64 bool set; 65 char **filenames; 66 int num_files; 67 int cur_file; 68 }; 69 70 struct record { 71 struct perf_tool tool; 72 struct record_opts opts; 73 u64 bytes_written; 74 struct perf_data data; 75 struct auxtrace_record *itr; 76 struct evlist *evlist; 77 struct perf_session *session; 78 int realtime_prio; 79 bool no_buildid; 80 bool no_buildid_set; 81 bool no_buildid_cache; 82 bool no_buildid_cache_set; 83 bool buildid_all; 84 bool timestamp_filename; 85 bool timestamp_boundary; 86 struct switch_output switch_output; 87 unsigned long long samples; 88 cpu_set_t affinity_mask; 89 }; 90 91 static volatile int auxtrace_record__snapshot_started; 92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 93 static DEFINE_TRIGGER(switch_output_trigger); 94 95 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 96 "SYS", "NODE", "CPU" 97 }; 98 99 static bool switch_output_signal(struct record *rec) 100 { 101 return rec->switch_output.signal && 102 trigger_is_ready(&switch_output_trigger); 103 } 104 105 static bool switch_output_size(struct record *rec) 106 { 107 return rec->switch_output.size && 108 trigger_is_ready(&switch_output_trigger) && 109 (rec->bytes_written >= rec->switch_output.size); 110 } 111 112 static bool switch_output_time(struct record *rec) 113 { 114 return rec->switch_output.time && 115 trigger_is_ready(&switch_output_trigger); 116 } 117 118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 119 void *bf, size_t size) 120 { 121 struct perf_data_file *file = &rec->session->data->file; 122 123 if (perf_data_file__write(file, bf, size) < 0) { 124 pr_err("failed to write perf data, error: %m\n"); 125 return -1; 126 } 127 128 rec->bytes_written += size; 129 130 if (switch_output_size(rec)) 131 trigger_hit(&switch_output_trigger); 132 133 return 0; 134 } 135 136 static int record__aio_enabled(struct record *rec); 137 static int record__comp_enabled(struct record *rec); 138 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 139 void *src, size_t src_size); 140 141 #ifdef HAVE_AIO_SUPPORT 142 static int record__aio_write(struct aiocb *cblock, int trace_fd, 143 void *buf, size_t size, off_t off) 144 { 145 int rc; 146 147 cblock->aio_fildes = trace_fd; 148 cblock->aio_buf = buf; 149 cblock->aio_nbytes = size; 150 cblock->aio_offset = off; 151 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 152 153 do { 154 rc = aio_write(cblock); 155 if (rc == 0) { 156 break; 157 } else if (errno != EAGAIN) { 158 cblock->aio_fildes = -1; 159 pr_err("failed to queue perf data, error: %m\n"); 160 break; 161 } 162 } while (1); 163 164 return rc; 165 } 166 167 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) 168 { 169 void *rem_buf; 170 off_t rem_off; 171 size_t rem_size; 172 int rc, aio_errno; 173 ssize_t aio_ret, written; 174 175 aio_errno = aio_error(cblock); 176 if (aio_errno == EINPROGRESS) 177 return 0; 178 179 written = aio_ret = aio_return(cblock); 180 if (aio_ret < 0) { 181 if (aio_errno != EINTR) 182 pr_err("failed to write perf data, error: %m\n"); 183 written = 0; 184 } 185 186 rem_size = cblock->aio_nbytes - written; 187 188 if (rem_size == 0) { 189 cblock->aio_fildes = -1; 190 /* 191 * md->refcount is incremented in record__aio_pushfn() for 192 * every aio write request started in record__aio_push() so 193 * decrement it because the request is now complete. 194 */ 195 perf_mmap__put(md); 196 rc = 1; 197 } else { 198 /* 199 * aio write request may require restart with the 200 * reminder if the kernel didn't write whole 201 * chunk at once. 202 */ 203 rem_off = cblock->aio_offset + written; 204 rem_buf = (void *)(cblock->aio_buf + written); 205 record__aio_write(cblock, cblock->aio_fildes, 206 rem_buf, rem_size, rem_off); 207 rc = 0; 208 } 209 210 return rc; 211 } 212 213 static int record__aio_sync(struct perf_mmap *md, bool sync_all) 214 { 215 struct aiocb **aiocb = md->aio.aiocb; 216 struct aiocb *cblocks = md->aio.cblocks; 217 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 218 int i, do_suspend; 219 220 do { 221 do_suspend = 0; 222 for (i = 0; i < md->aio.nr_cblocks; ++i) { 223 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 224 if (sync_all) 225 aiocb[i] = NULL; 226 else 227 return i; 228 } else { 229 /* 230 * Started aio write is not complete yet 231 * so it has to be waited before the 232 * next allocation. 233 */ 234 aiocb[i] = &cblocks[i]; 235 do_suspend = 1; 236 } 237 } 238 if (!do_suspend) 239 return -1; 240 241 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 242 if (!(errno == EAGAIN || errno == EINTR)) 243 pr_err("failed to sync perf data, error: %m\n"); 244 } 245 } while (1); 246 } 247 248 struct record_aio { 249 struct record *rec; 250 void *data; 251 size_t size; 252 }; 253 254 static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) 255 { 256 struct record_aio *aio = to; 257 258 /* 259 * map->base data pointed by buf is copied into free map->aio.data[] buffer 260 * to release space in the kernel buffer as fast as possible, calling 261 * perf_mmap__consume() from perf_mmap__push() function. 262 * 263 * That lets the kernel to proceed with storing more profiling data into 264 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 265 * 266 * Coping can be done in two steps in case the chunk of profiling data 267 * crosses the upper bound of the kernel buffer. In this case we first move 268 * part of data from map->start till the upper bound and then the reminder 269 * from the beginning of the kernel buffer till the end of the data chunk. 270 */ 271 272 if (record__comp_enabled(aio->rec)) { 273 size = zstd_compress(aio->rec->session, aio->data + aio->size, 274 perf_mmap__mmap_len(map) - aio->size, 275 buf, size); 276 } else { 277 memcpy(aio->data + aio->size, buf, size); 278 } 279 280 if (!aio->size) { 281 /* 282 * Increment map->refcount to guard map->aio.data[] buffer 283 * from premature deallocation because map object can be 284 * released earlier than aio write request started on 285 * map->aio.data[] buffer is complete. 286 * 287 * perf_mmap__put() is done at record__aio_complete() 288 * after started aio request completion or at record__aio_push() 289 * if the request failed to start. 290 */ 291 perf_mmap__get(map); 292 } 293 294 aio->size += size; 295 296 return size; 297 } 298 299 static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) 300 { 301 int ret, idx; 302 int trace_fd = rec->session->data->file.fd; 303 struct record_aio aio = { .rec = rec, .size = 0 }; 304 305 /* 306 * Call record__aio_sync() to wait till map->aio.data[] buffer 307 * becomes available after previous aio write operation. 308 */ 309 310 idx = record__aio_sync(map, false); 311 aio.data = map->aio.data[idx]; 312 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 313 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 314 return ret; 315 316 rec->samples++; 317 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 318 if (!ret) { 319 *off += aio.size; 320 rec->bytes_written += aio.size; 321 if (switch_output_size(rec)) 322 trigger_hit(&switch_output_trigger); 323 } else { 324 /* 325 * Decrement map->refcount incremented in record__aio_pushfn() 326 * back if record__aio_write() operation failed to start, otherwise 327 * map->refcount is decremented in record__aio_complete() after 328 * aio write operation finishes successfully. 329 */ 330 perf_mmap__put(map); 331 } 332 333 return ret; 334 } 335 336 static off_t record__aio_get_pos(int trace_fd) 337 { 338 return lseek(trace_fd, 0, SEEK_CUR); 339 } 340 341 static void record__aio_set_pos(int trace_fd, off_t pos) 342 { 343 lseek(trace_fd, pos, SEEK_SET); 344 } 345 346 static void record__aio_mmap_read_sync(struct record *rec) 347 { 348 int i; 349 struct evlist *evlist = rec->evlist; 350 struct perf_mmap *maps = evlist->mmap; 351 352 if (!record__aio_enabled(rec)) 353 return; 354 355 for (i = 0; i < evlist->nr_mmaps; i++) { 356 struct perf_mmap *map = &maps[i]; 357 358 if (map->base) 359 record__aio_sync(map, true); 360 } 361 } 362 363 static int nr_cblocks_default = 1; 364 static int nr_cblocks_max = 4; 365 366 static int record__aio_parse(const struct option *opt, 367 const char *str, 368 int unset) 369 { 370 struct record_opts *opts = (struct record_opts *)opt->value; 371 372 if (unset) { 373 opts->nr_cblocks = 0; 374 } else { 375 if (str) 376 opts->nr_cblocks = strtol(str, NULL, 0); 377 if (!opts->nr_cblocks) 378 opts->nr_cblocks = nr_cblocks_default; 379 } 380 381 return 0; 382 } 383 #else /* HAVE_AIO_SUPPORT */ 384 static int nr_cblocks_max = 0; 385 386 static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, 387 off_t *off __maybe_unused) 388 { 389 return -1; 390 } 391 392 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 393 { 394 return -1; 395 } 396 397 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 398 { 399 } 400 401 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 402 { 403 } 404 #endif 405 406 static int record__aio_enabled(struct record *rec) 407 { 408 return rec->opts.nr_cblocks > 0; 409 } 410 411 #define MMAP_FLUSH_DEFAULT 1 412 static int record__mmap_flush_parse(const struct option *opt, 413 const char *str, 414 int unset) 415 { 416 int flush_max; 417 struct record_opts *opts = (struct record_opts *)opt->value; 418 static struct parse_tag tags[] = { 419 { .tag = 'B', .mult = 1 }, 420 { .tag = 'K', .mult = 1 << 10 }, 421 { .tag = 'M', .mult = 1 << 20 }, 422 { .tag = 'G', .mult = 1 << 30 }, 423 { .tag = 0 }, 424 }; 425 426 if (unset) 427 return 0; 428 429 if (str) { 430 opts->mmap_flush = parse_tag_value(str, tags); 431 if (opts->mmap_flush == (int)-1) 432 opts->mmap_flush = strtol(str, NULL, 0); 433 } 434 435 if (!opts->mmap_flush) 436 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 437 438 flush_max = perf_evlist__mmap_size(opts->mmap_pages); 439 flush_max /= 4; 440 if (opts->mmap_flush > flush_max) 441 opts->mmap_flush = flush_max; 442 443 return 0; 444 } 445 446 #ifdef HAVE_ZSTD_SUPPORT 447 static unsigned int comp_level_default = 1; 448 449 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 450 { 451 struct record_opts *opts = opt->value; 452 453 if (unset) { 454 opts->comp_level = 0; 455 } else { 456 if (str) 457 opts->comp_level = strtol(str, NULL, 0); 458 if (!opts->comp_level) 459 opts->comp_level = comp_level_default; 460 } 461 462 return 0; 463 } 464 #endif 465 static unsigned int comp_level_max = 22; 466 467 static int record__comp_enabled(struct record *rec) 468 { 469 return rec->opts.comp_level > 0; 470 } 471 472 static int process_synthesized_event(struct perf_tool *tool, 473 union perf_event *event, 474 struct perf_sample *sample __maybe_unused, 475 struct machine *machine __maybe_unused) 476 { 477 struct record *rec = container_of(tool, struct record, tool); 478 return record__write(rec, NULL, event, event->header.size); 479 } 480 481 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 482 { 483 struct record *rec = to; 484 485 if (record__comp_enabled(rec)) { 486 size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); 487 bf = map->data; 488 } 489 490 rec->samples++; 491 return record__write(rec, map, bf, size); 492 } 493 494 static volatile int done; 495 static volatile int signr = -1; 496 static volatile int child_finished; 497 498 static void sig_handler(int sig) 499 { 500 if (sig == SIGCHLD) 501 child_finished = 1; 502 else 503 signr = sig; 504 505 done = 1; 506 } 507 508 static void sigsegv_handler(int sig) 509 { 510 perf_hooks__recover(); 511 sighandler_dump_stack(sig); 512 } 513 514 static void record__sig_exit(void) 515 { 516 if (signr == -1) 517 return; 518 519 signal(signr, SIG_DFL); 520 raise(signr); 521 } 522 523 #ifdef HAVE_AUXTRACE_SUPPORT 524 525 static int record__process_auxtrace(struct perf_tool *tool, 526 struct perf_mmap *map, 527 union perf_event *event, void *data1, 528 size_t len1, void *data2, size_t len2) 529 { 530 struct record *rec = container_of(tool, struct record, tool); 531 struct perf_data *data = &rec->data; 532 size_t padding; 533 u8 pad[8] = {0}; 534 535 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { 536 off_t file_offset; 537 int fd = perf_data__fd(data); 538 int err; 539 540 file_offset = lseek(fd, 0, SEEK_CUR); 541 if (file_offset == -1) 542 return -1; 543 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 544 event, file_offset); 545 if (err) 546 return err; 547 } 548 549 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 550 padding = (len1 + len2) & 7; 551 if (padding) 552 padding = 8 - padding; 553 554 record__write(rec, map, event, event->header.size); 555 record__write(rec, map, data1, len1); 556 if (len2) 557 record__write(rec, map, data2, len2); 558 record__write(rec, map, &pad, padding); 559 560 return 0; 561 } 562 563 static int record__auxtrace_mmap_read(struct record *rec, 564 struct perf_mmap *map) 565 { 566 int ret; 567 568 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 569 record__process_auxtrace); 570 if (ret < 0) 571 return ret; 572 573 if (ret) 574 rec->samples++; 575 576 return 0; 577 } 578 579 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 580 struct perf_mmap *map) 581 { 582 int ret; 583 584 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 585 record__process_auxtrace, 586 rec->opts.auxtrace_snapshot_size); 587 if (ret < 0) 588 return ret; 589 590 if (ret) 591 rec->samples++; 592 593 return 0; 594 } 595 596 static int record__auxtrace_read_snapshot_all(struct record *rec) 597 { 598 int i; 599 int rc = 0; 600 601 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 602 struct perf_mmap *map = &rec->evlist->mmap[i]; 603 604 if (!map->auxtrace_mmap.base) 605 continue; 606 607 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 608 rc = -1; 609 goto out; 610 } 611 } 612 out: 613 return rc; 614 } 615 616 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 617 { 618 pr_debug("Recording AUX area tracing snapshot\n"); 619 if (record__auxtrace_read_snapshot_all(rec) < 0) { 620 trigger_error(&auxtrace_snapshot_trigger); 621 } else { 622 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 623 trigger_error(&auxtrace_snapshot_trigger); 624 else 625 trigger_ready(&auxtrace_snapshot_trigger); 626 } 627 } 628 629 static int record__auxtrace_snapshot_exit(struct record *rec) 630 { 631 if (trigger_is_error(&auxtrace_snapshot_trigger)) 632 return 0; 633 634 if (!auxtrace_record__snapshot_started && 635 auxtrace_record__snapshot_start(rec->itr)) 636 return -1; 637 638 record__read_auxtrace_snapshot(rec, true); 639 if (trigger_is_error(&auxtrace_snapshot_trigger)) 640 return -1; 641 642 return 0; 643 } 644 645 static int record__auxtrace_init(struct record *rec) 646 { 647 int err; 648 649 if (!rec->itr) { 650 rec->itr = auxtrace_record__init(rec->evlist, &err); 651 if (err) 652 return err; 653 } 654 655 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 656 rec->opts.auxtrace_snapshot_opts); 657 if (err) 658 return err; 659 660 return auxtrace_parse_filters(rec->evlist); 661 } 662 663 #else 664 665 static inline 666 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 667 struct perf_mmap *map __maybe_unused) 668 { 669 return 0; 670 } 671 672 static inline 673 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 674 bool on_exit __maybe_unused) 675 { 676 } 677 678 static inline 679 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 680 { 681 return 0; 682 } 683 684 static inline 685 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 686 { 687 return 0; 688 } 689 690 static int record__auxtrace_init(struct record *rec __maybe_unused) 691 { 692 return 0; 693 } 694 695 #endif 696 697 static int record__mmap_evlist(struct record *rec, 698 struct evlist *evlist) 699 { 700 struct record_opts *opts = &rec->opts; 701 char msg[512]; 702 703 if (opts->affinity != PERF_AFFINITY_SYS) 704 cpu__setup_cpunode_map(); 705 706 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 707 opts->auxtrace_mmap_pages, 708 opts->auxtrace_snapshot_mode, 709 opts->nr_cblocks, opts->affinity, 710 opts->mmap_flush, opts->comp_level) < 0) { 711 if (errno == EPERM) { 712 pr_err("Permission error mapping pages.\n" 713 "Consider increasing " 714 "/proc/sys/kernel/perf_event_mlock_kb,\n" 715 "or try again with a smaller value of -m/--mmap_pages.\n" 716 "(current value: %u,%u)\n", 717 opts->mmap_pages, opts->auxtrace_mmap_pages); 718 return -errno; 719 } else { 720 pr_err("failed to mmap with %d (%s)\n", errno, 721 str_error_r(errno, msg, sizeof(msg))); 722 if (errno) 723 return -errno; 724 else 725 return -EINVAL; 726 } 727 } 728 return 0; 729 } 730 731 static int record__mmap(struct record *rec) 732 { 733 return record__mmap_evlist(rec, rec->evlist); 734 } 735 736 static int record__open(struct record *rec) 737 { 738 char msg[BUFSIZ]; 739 struct evsel *pos; 740 struct evlist *evlist = rec->evlist; 741 struct perf_session *session = rec->session; 742 struct record_opts *opts = &rec->opts; 743 int rc = 0; 744 745 /* 746 * For initial_delay we need to add a dummy event so that we can track 747 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 748 * real events, the ones asked by the user. 749 */ 750 if (opts->initial_delay) { 751 if (perf_evlist__add_dummy(evlist)) 752 return -ENOMEM; 753 754 pos = perf_evlist__first(evlist); 755 pos->tracking = 0; 756 pos = perf_evlist__last(evlist); 757 pos->tracking = 1; 758 pos->core.attr.enable_on_exec = 1; 759 } 760 761 perf_evlist__config(evlist, opts, &callchain_param); 762 763 evlist__for_each_entry(evlist, pos) { 764 try_again: 765 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 766 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 767 if (verbose > 0) 768 ui__warning("%s\n", msg); 769 goto try_again; 770 } 771 if ((errno == EINVAL || errno == EBADF) && 772 pos->leader != pos && 773 pos->weak_group) { 774 pos = perf_evlist__reset_weak_group(evlist, pos); 775 goto try_again; 776 } 777 rc = -errno; 778 perf_evsel__open_strerror(pos, &opts->target, 779 errno, msg, sizeof(msg)); 780 ui__error("%s\n", msg); 781 goto out; 782 } 783 784 pos->supported = true; 785 } 786 787 if (perf_evlist__apply_filters(evlist, &pos)) { 788 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 789 pos->filter, perf_evsel__name(pos), errno, 790 str_error_r(errno, msg, sizeof(msg))); 791 rc = -1; 792 goto out; 793 } 794 795 rc = record__mmap(rec); 796 if (rc) 797 goto out; 798 799 session->evlist = evlist; 800 perf_session__set_id_hdr_size(session); 801 out: 802 return rc; 803 } 804 805 static int process_sample_event(struct perf_tool *tool, 806 union perf_event *event, 807 struct perf_sample *sample, 808 struct evsel *evsel, 809 struct machine *machine) 810 { 811 struct record *rec = container_of(tool, struct record, tool); 812 813 if (rec->evlist->first_sample_time == 0) 814 rec->evlist->first_sample_time = sample->time; 815 816 rec->evlist->last_sample_time = sample->time; 817 818 if (rec->buildid_all) 819 return 0; 820 821 rec->samples++; 822 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 823 } 824 825 static int process_buildids(struct record *rec) 826 { 827 struct perf_session *session = rec->session; 828 829 if (perf_data__size(&rec->data) == 0) 830 return 0; 831 832 /* 833 * During this process, it'll load kernel map and replace the 834 * dso->long_name to a real pathname it found. In this case 835 * we prefer the vmlinux path like 836 * /lib/modules/3.16.4/build/vmlinux 837 * 838 * rather than build-id path (in debug directory). 839 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 840 */ 841 symbol_conf.ignore_vmlinux_buildid = true; 842 843 /* 844 * If --buildid-all is given, it marks all DSO regardless of hits, 845 * so no need to process samples. But if timestamp_boundary is enabled, 846 * it still needs to walk on all samples to get the timestamps of 847 * first/last samples. 848 */ 849 if (rec->buildid_all && !rec->timestamp_boundary) 850 rec->tool.sample = NULL; 851 852 return perf_session__process_events(session); 853 } 854 855 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 856 { 857 int err; 858 struct perf_tool *tool = data; 859 /* 860 *As for guest kernel when processing subcommand record&report, 861 *we arrange module mmap prior to guest kernel mmap and trigger 862 *a preload dso because default guest module symbols are loaded 863 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 864 *method is used to avoid symbol missing when the first addr is 865 *in module instead of in guest kernel. 866 */ 867 err = perf_event__synthesize_modules(tool, process_synthesized_event, 868 machine); 869 if (err < 0) 870 pr_err("Couldn't record guest kernel [%d]'s reference" 871 " relocation symbol.\n", machine->pid); 872 873 /* 874 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 875 * have no _text sometimes. 876 */ 877 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 878 machine); 879 if (err < 0) 880 pr_err("Couldn't record guest kernel [%d]'s reference" 881 " relocation symbol.\n", machine->pid); 882 } 883 884 static struct perf_event_header finished_round_event = { 885 .size = sizeof(struct perf_event_header), 886 .type = PERF_RECORD_FINISHED_ROUND, 887 }; 888 889 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 890 { 891 if (rec->opts.affinity != PERF_AFFINITY_SYS && 892 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 893 CPU_ZERO(&rec->affinity_mask); 894 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 895 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 896 } 897 } 898 899 static size_t process_comp_header(void *record, size_t increment) 900 { 901 struct compressed_event *event = record; 902 size_t size = sizeof(*event); 903 904 if (increment) { 905 event->header.size += increment; 906 return increment; 907 } 908 909 event->header.type = PERF_RECORD_COMPRESSED; 910 event->header.size = size; 911 912 return size; 913 } 914 915 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 916 void *src, size_t src_size) 917 { 918 size_t compressed; 919 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1; 920 921 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, 922 max_record_size, process_comp_header); 923 924 session->bytes_transferred += src_size; 925 session->bytes_compressed += compressed; 926 927 return compressed; 928 } 929 930 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 931 bool overwrite, bool synch) 932 { 933 u64 bytes_written = rec->bytes_written; 934 int i; 935 int rc = 0; 936 struct perf_mmap *maps; 937 int trace_fd = rec->data.file.fd; 938 off_t off = 0; 939 940 if (!evlist) 941 return 0; 942 943 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 944 if (!maps) 945 return 0; 946 947 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 948 return 0; 949 950 if (record__aio_enabled(rec)) 951 off = record__aio_get_pos(trace_fd); 952 953 for (i = 0; i < evlist->nr_mmaps; i++) { 954 u64 flush = 0; 955 struct perf_mmap *map = &maps[i]; 956 957 if (map->base) { 958 record__adjust_affinity(rec, map); 959 if (synch) { 960 flush = map->flush; 961 map->flush = 1; 962 } 963 if (!record__aio_enabled(rec)) { 964 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 965 if (synch) 966 map->flush = flush; 967 rc = -1; 968 goto out; 969 } 970 } else { 971 if (record__aio_push(rec, map, &off) < 0) { 972 record__aio_set_pos(trace_fd, off); 973 if (synch) 974 map->flush = flush; 975 rc = -1; 976 goto out; 977 } 978 } 979 if (synch) 980 map->flush = flush; 981 } 982 983 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 984 record__auxtrace_mmap_read(rec, map) != 0) { 985 rc = -1; 986 goto out; 987 } 988 } 989 990 if (record__aio_enabled(rec)) 991 record__aio_set_pos(trace_fd, off); 992 993 /* 994 * Mark the round finished in case we wrote 995 * at least one event. 996 */ 997 if (bytes_written != rec->bytes_written) 998 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 999 1000 if (overwrite) 1001 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1002 out: 1003 return rc; 1004 } 1005 1006 static int record__mmap_read_all(struct record *rec, bool synch) 1007 { 1008 int err; 1009 1010 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1011 if (err) 1012 return err; 1013 1014 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1015 } 1016 1017 static void record__init_features(struct record *rec) 1018 { 1019 struct perf_session *session = rec->session; 1020 int feat; 1021 1022 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1023 perf_header__set_feat(&session->header, feat); 1024 1025 if (rec->no_buildid) 1026 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1027 1028 if (!have_tracepoints(&rec->evlist->core.entries)) 1029 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1030 1031 if (!rec->opts.branch_stack) 1032 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1033 1034 if (!rec->opts.full_auxtrace) 1035 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1036 1037 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1038 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1039 1040 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1041 if (!record__comp_enabled(rec)) 1042 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1043 1044 perf_header__clear_feat(&session->header, HEADER_STAT); 1045 } 1046 1047 static void 1048 record__finish_output(struct record *rec) 1049 { 1050 struct perf_data *data = &rec->data; 1051 int fd = perf_data__fd(data); 1052 1053 if (data->is_pipe) 1054 return; 1055 1056 rec->session->header.data_size += rec->bytes_written; 1057 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1058 1059 if (!rec->no_buildid) { 1060 process_buildids(rec); 1061 1062 if (rec->buildid_all) 1063 dsos__hit_all(rec->session); 1064 } 1065 perf_session__write_header(rec->session, rec->evlist, fd, true); 1066 1067 return; 1068 } 1069 1070 static int record__synthesize_workload(struct record *rec, bool tail) 1071 { 1072 int err; 1073 struct perf_thread_map *thread_map; 1074 1075 if (rec->opts.tail_synthesize != tail) 1076 return 0; 1077 1078 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1079 if (thread_map == NULL) 1080 return -1; 1081 1082 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1083 process_synthesized_event, 1084 &rec->session->machines.host, 1085 rec->opts.sample_address); 1086 perf_thread_map__put(thread_map); 1087 return err; 1088 } 1089 1090 static int record__synthesize(struct record *rec, bool tail); 1091 1092 static int 1093 record__switch_output(struct record *rec, bool at_exit) 1094 { 1095 struct perf_data *data = &rec->data; 1096 int fd, err; 1097 char *new_filename; 1098 1099 /* Same Size: "2015122520103046"*/ 1100 char timestamp[] = "InvalidTimestamp"; 1101 1102 record__aio_mmap_read_sync(rec); 1103 1104 record__synthesize(rec, true); 1105 if (target__none(&rec->opts.target)) 1106 record__synthesize_workload(rec, true); 1107 1108 rec->samples = 0; 1109 record__finish_output(rec); 1110 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1111 if (err) { 1112 pr_err("Failed to get current timestamp\n"); 1113 return -EINVAL; 1114 } 1115 1116 fd = perf_data__switch(data, timestamp, 1117 rec->session->header.data_offset, 1118 at_exit, &new_filename); 1119 if (fd >= 0 && !at_exit) { 1120 rec->bytes_written = 0; 1121 rec->session->header.data_size = 0; 1122 } 1123 1124 if (!quiet) 1125 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1126 data->path, timestamp); 1127 1128 if (rec->switch_output.num_files) { 1129 int n = rec->switch_output.cur_file + 1; 1130 1131 if (n >= rec->switch_output.num_files) 1132 n = 0; 1133 rec->switch_output.cur_file = n; 1134 if (rec->switch_output.filenames[n]) { 1135 remove(rec->switch_output.filenames[n]); 1136 zfree(&rec->switch_output.filenames[n]); 1137 } 1138 rec->switch_output.filenames[n] = new_filename; 1139 } else { 1140 free(new_filename); 1141 } 1142 1143 /* Output tracking events */ 1144 if (!at_exit) { 1145 record__synthesize(rec, false); 1146 1147 /* 1148 * In 'perf record --switch-output' without -a, 1149 * record__synthesize() in record__switch_output() won't 1150 * generate tracking events because there's no thread_map 1151 * in evlist. Which causes newly created perf.data doesn't 1152 * contain map and comm information. 1153 * Create a fake thread_map and directly call 1154 * perf_event__synthesize_thread_map() for those events. 1155 */ 1156 if (target__none(&rec->opts.target)) 1157 record__synthesize_workload(rec, false); 1158 } 1159 return fd; 1160 } 1161 1162 static volatile int workload_exec_errno; 1163 1164 /* 1165 * perf_evlist__prepare_workload will send a SIGUSR1 1166 * if the fork fails, since we asked by setting its 1167 * want_signal to true. 1168 */ 1169 static void workload_exec_failed_signal(int signo __maybe_unused, 1170 siginfo_t *info, 1171 void *ucontext __maybe_unused) 1172 { 1173 workload_exec_errno = info->si_value.sival_int; 1174 done = 1; 1175 child_finished = 1; 1176 } 1177 1178 static void snapshot_sig_handler(int sig); 1179 static void alarm_sig_handler(int sig); 1180 1181 int __weak 1182 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 1183 struct perf_tool *tool __maybe_unused, 1184 perf_event__handler_t process __maybe_unused, 1185 struct machine *machine __maybe_unused) 1186 { 1187 return 0; 1188 } 1189 1190 static const struct perf_event_mmap_page * 1191 perf_evlist__pick_pc(struct evlist *evlist) 1192 { 1193 if (evlist) { 1194 if (evlist->mmap && evlist->mmap[0].base) 1195 return evlist->mmap[0].base; 1196 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 1197 return evlist->overwrite_mmap[0].base; 1198 } 1199 return NULL; 1200 } 1201 1202 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1203 { 1204 const struct perf_event_mmap_page *pc; 1205 1206 pc = perf_evlist__pick_pc(rec->evlist); 1207 if (pc) 1208 return pc; 1209 return NULL; 1210 } 1211 1212 static int record__synthesize(struct record *rec, bool tail) 1213 { 1214 struct perf_session *session = rec->session; 1215 struct machine *machine = &session->machines.host; 1216 struct perf_data *data = &rec->data; 1217 struct record_opts *opts = &rec->opts; 1218 struct perf_tool *tool = &rec->tool; 1219 int fd = perf_data__fd(data); 1220 int err = 0; 1221 1222 if (rec->opts.tail_synthesize != tail) 1223 return 0; 1224 1225 if (data->is_pipe) { 1226 /* 1227 * We need to synthesize events first, because some 1228 * features works on top of them (on report side). 1229 */ 1230 err = perf_event__synthesize_attrs(tool, rec->evlist, 1231 process_synthesized_event); 1232 if (err < 0) { 1233 pr_err("Couldn't synthesize attrs.\n"); 1234 goto out; 1235 } 1236 1237 err = perf_event__synthesize_features(tool, session, rec->evlist, 1238 process_synthesized_event); 1239 if (err < 0) { 1240 pr_err("Couldn't synthesize features.\n"); 1241 return err; 1242 } 1243 1244 if (have_tracepoints(&rec->evlist->core.entries)) { 1245 /* 1246 * FIXME err <= 0 here actually means that 1247 * there were no tracepoints so its not really 1248 * an error, just that we don't need to 1249 * synthesize anything. We really have to 1250 * return this more properly and also 1251 * propagate errors that now are calling die() 1252 */ 1253 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 1254 process_synthesized_event); 1255 if (err <= 0) { 1256 pr_err("Couldn't record tracing data.\n"); 1257 goto out; 1258 } 1259 rec->bytes_written += err; 1260 } 1261 } 1262 1263 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1264 process_synthesized_event, machine); 1265 if (err) 1266 goto out; 1267 1268 if (rec->opts.full_auxtrace) { 1269 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1270 session, process_synthesized_event); 1271 if (err) 1272 goto out; 1273 } 1274 1275 if (!perf_evlist__exclude_kernel(rec->evlist)) { 1276 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1277 machine); 1278 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1279 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1280 "Check /proc/kallsyms permission or run as root.\n"); 1281 1282 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1283 machine); 1284 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1285 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1286 "Check /proc/modules permission or run as root.\n"); 1287 } 1288 1289 if (perf_guest) { 1290 machines__process_guests(&session->machines, 1291 perf_event__synthesize_guest_os, tool); 1292 } 1293 1294 err = perf_event__synthesize_extra_attr(&rec->tool, 1295 rec->evlist, 1296 process_synthesized_event, 1297 data->is_pipe); 1298 if (err) 1299 goto out; 1300 1301 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1302 process_synthesized_event, 1303 NULL); 1304 if (err < 0) { 1305 pr_err("Couldn't synthesize thread map.\n"); 1306 return err; 1307 } 1308 1309 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus, 1310 process_synthesized_event, NULL); 1311 if (err < 0) { 1312 pr_err("Couldn't synthesize cpu map.\n"); 1313 return err; 1314 } 1315 1316 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1317 machine, opts); 1318 if (err < 0) 1319 pr_warning("Couldn't synthesize bpf events.\n"); 1320 1321 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, 1322 process_synthesized_event, opts->sample_address, 1323 1); 1324 out: 1325 return err; 1326 } 1327 1328 static int __cmd_record(struct record *rec, int argc, const char **argv) 1329 { 1330 int err; 1331 int status = 0; 1332 unsigned long waking = 0; 1333 const bool forks = argc > 0; 1334 struct perf_tool *tool = &rec->tool; 1335 struct record_opts *opts = &rec->opts; 1336 struct perf_data *data = &rec->data; 1337 struct perf_session *session; 1338 bool disabled = false, draining = false; 1339 struct evlist *sb_evlist = NULL; 1340 int fd; 1341 float ratio = 0; 1342 1343 atexit(record__sig_exit); 1344 signal(SIGCHLD, sig_handler); 1345 signal(SIGINT, sig_handler); 1346 signal(SIGTERM, sig_handler); 1347 signal(SIGSEGV, sigsegv_handler); 1348 1349 if (rec->opts.record_namespaces) 1350 tool->namespace_events = true; 1351 1352 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1353 signal(SIGUSR2, snapshot_sig_handler); 1354 if (rec->opts.auxtrace_snapshot_mode) 1355 trigger_on(&auxtrace_snapshot_trigger); 1356 if (rec->switch_output.enabled) 1357 trigger_on(&switch_output_trigger); 1358 } else { 1359 signal(SIGUSR2, SIG_IGN); 1360 } 1361 1362 session = perf_session__new(data, false, tool); 1363 if (session == NULL) { 1364 pr_err("Perf session creation failed.\n"); 1365 return -1; 1366 } 1367 1368 fd = perf_data__fd(data); 1369 rec->session = session; 1370 1371 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 1372 pr_err("Compression initialization failed.\n"); 1373 return -1; 1374 } 1375 1376 session->header.env.comp_type = PERF_COMP_ZSTD; 1377 session->header.env.comp_level = rec->opts.comp_level; 1378 1379 record__init_features(rec); 1380 1381 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1382 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 1383 1384 if (forks) { 1385 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 1386 argv, data->is_pipe, 1387 workload_exec_failed_signal); 1388 if (err < 0) { 1389 pr_err("Couldn't run the workload!\n"); 1390 status = err; 1391 goto out_delete_session; 1392 } 1393 } 1394 1395 /* 1396 * If we have just single event and are sending data 1397 * through pipe, we need to force the ids allocation, 1398 * because we synthesize event name through the pipe 1399 * and need the id for that. 1400 */ 1401 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 1402 rec->opts.sample_id = true; 1403 1404 if (record__open(rec) != 0) { 1405 err = -1; 1406 goto out_child; 1407 } 1408 session->header.env.comp_mmap_len = session->evlist->mmap_len; 1409 1410 err = bpf__apply_obj_config(); 1411 if (err) { 1412 char errbuf[BUFSIZ]; 1413 1414 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1415 pr_err("ERROR: Apply config to BPF failed: %s\n", 1416 errbuf); 1417 goto out_child; 1418 } 1419 1420 /* 1421 * Normally perf_session__new would do this, but it doesn't have the 1422 * evlist. 1423 */ 1424 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 1425 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1426 rec->tool.ordered_events = false; 1427 } 1428 1429 if (!rec->evlist->nr_groups) 1430 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1431 1432 if (data->is_pipe) { 1433 err = perf_header__write_pipe(fd); 1434 if (err < 0) 1435 goto out_child; 1436 } else { 1437 err = perf_session__write_header(session, rec->evlist, fd, false); 1438 if (err < 0) 1439 goto out_child; 1440 } 1441 1442 if (!rec->no_buildid 1443 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1444 pr_err("Couldn't generate buildids. " 1445 "Use --no-buildid to profile anyway.\n"); 1446 err = -1; 1447 goto out_child; 1448 } 1449 1450 if (!opts->no_bpf_event) 1451 bpf_event__add_sb_event(&sb_evlist, &session->header.env); 1452 1453 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { 1454 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1455 opts->no_bpf_event = true; 1456 } 1457 1458 err = record__synthesize(rec, false); 1459 if (err < 0) 1460 goto out_child; 1461 1462 if (rec->realtime_prio) { 1463 struct sched_param param; 1464 1465 param.sched_priority = rec->realtime_prio; 1466 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1467 pr_err("Could not set realtime priority.\n"); 1468 err = -1; 1469 goto out_child; 1470 } 1471 } 1472 1473 /* 1474 * When perf is starting the traced process, all the events 1475 * (apart from group members) have enable_on_exec=1 set, 1476 * so don't spoil it by prematurely enabling them. 1477 */ 1478 if (!target__none(&opts->target) && !opts->initial_delay) 1479 evlist__enable(rec->evlist); 1480 1481 /* 1482 * Let the child rip 1483 */ 1484 if (forks) { 1485 struct machine *machine = &session->machines.host; 1486 union perf_event *event; 1487 pid_t tgid; 1488 1489 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1490 if (event == NULL) { 1491 err = -ENOMEM; 1492 goto out_child; 1493 } 1494 1495 /* 1496 * Some H/W events are generated before COMM event 1497 * which is emitted during exec(), so perf script 1498 * cannot see a correct process name for those events. 1499 * Synthesize COMM event to prevent it. 1500 */ 1501 tgid = perf_event__synthesize_comm(tool, event, 1502 rec->evlist->workload.pid, 1503 process_synthesized_event, 1504 machine); 1505 free(event); 1506 1507 if (tgid == -1) 1508 goto out_child; 1509 1510 event = malloc(sizeof(event->namespaces) + 1511 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1512 machine->id_hdr_size); 1513 if (event == NULL) { 1514 err = -ENOMEM; 1515 goto out_child; 1516 } 1517 1518 /* 1519 * Synthesize NAMESPACES event for the command specified. 1520 */ 1521 perf_event__synthesize_namespaces(tool, event, 1522 rec->evlist->workload.pid, 1523 tgid, process_synthesized_event, 1524 machine); 1525 free(event); 1526 1527 perf_evlist__start_workload(rec->evlist); 1528 } 1529 1530 if (opts->initial_delay) { 1531 usleep(opts->initial_delay * USEC_PER_MSEC); 1532 evlist__enable(rec->evlist); 1533 } 1534 1535 trigger_ready(&auxtrace_snapshot_trigger); 1536 trigger_ready(&switch_output_trigger); 1537 perf_hooks__invoke_record_start(); 1538 for (;;) { 1539 unsigned long long hits = rec->samples; 1540 1541 /* 1542 * rec->evlist->bkw_mmap_state is possible to be 1543 * BKW_MMAP_EMPTY here: when done == true and 1544 * hits != rec->samples in previous round. 1545 * 1546 * perf_evlist__toggle_bkw_mmap ensure we never 1547 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1548 */ 1549 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1550 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1551 1552 if (record__mmap_read_all(rec, false) < 0) { 1553 trigger_error(&auxtrace_snapshot_trigger); 1554 trigger_error(&switch_output_trigger); 1555 err = -1; 1556 goto out_child; 1557 } 1558 1559 if (auxtrace_record__snapshot_started) { 1560 auxtrace_record__snapshot_started = 0; 1561 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1562 record__read_auxtrace_snapshot(rec, false); 1563 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1564 pr_err("AUX area tracing snapshot failed\n"); 1565 err = -1; 1566 goto out_child; 1567 } 1568 } 1569 1570 if (trigger_is_hit(&switch_output_trigger)) { 1571 /* 1572 * If switch_output_trigger is hit, the data in 1573 * overwritable ring buffer should have been collected, 1574 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1575 * 1576 * If SIGUSR2 raise after or during record__mmap_read_all(), 1577 * record__mmap_read_all() didn't collect data from 1578 * overwritable ring buffer. Read again. 1579 */ 1580 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1581 continue; 1582 trigger_ready(&switch_output_trigger); 1583 1584 /* 1585 * Reenable events in overwrite ring buffer after 1586 * record__mmap_read_all(): we should have collected 1587 * data from it. 1588 */ 1589 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1590 1591 if (!quiet) 1592 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1593 waking); 1594 waking = 0; 1595 fd = record__switch_output(rec, false); 1596 if (fd < 0) { 1597 pr_err("Failed to switch to new file\n"); 1598 trigger_error(&switch_output_trigger); 1599 err = fd; 1600 goto out_child; 1601 } 1602 1603 /* re-arm the alarm */ 1604 if (rec->switch_output.time) 1605 alarm(rec->switch_output.time); 1606 } 1607 1608 if (hits == rec->samples) { 1609 if (done || draining) 1610 break; 1611 err = perf_evlist__poll(rec->evlist, -1); 1612 /* 1613 * Propagate error, only if there's any. Ignore positive 1614 * number of returned events and interrupt error. 1615 */ 1616 if (err > 0 || (err < 0 && errno == EINTR)) 1617 err = 0; 1618 waking++; 1619 1620 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1621 draining = true; 1622 } 1623 1624 /* 1625 * When perf is starting the traced process, at the end events 1626 * die with the process and we wait for that. Thus no need to 1627 * disable events in this case. 1628 */ 1629 if (done && !disabled && !target__none(&opts->target)) { 1630 trigger_off(&auxtrace_snapshot_trigger); 1631 evlist__disable(rec->evlist); 1632 disabled = true; 1633 } 1634 } 1635 1636 trigger_off(&auxtrace_snapshot_trigger); 1637 trigger_off(&switch_output_trigger); 1638 1639 if (opts->auxtrace_snapshot_on_exit) 1640 record__auxtrace_snapshot_exit(rec); 1641 1642 if (forks && workload_exec_errno) { 1643 char msg[STRERR_BUFSIZE]; 1644 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1645 pr_err("Workload failed: %s\n", emsg); 1646 err = -1; 1647 goto out_child; 1648 } 1649 1650 if (!quiet) 1651 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1652 1653 if (target__none(&rec->opts.target)) 1654 record__synthesize_workload(rec, true); 1655 1656 out_child: 1657 record__mmap_read_all(rec, true); 1658 record__aio_mmap_read_sync(rec); 1659 1660 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 1661 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 1662 session->header.env.comp_ratio = ratio + 0.5; 1663 } 1664 1665 if (forks) { 1666 int exit_status; 1667 1668 if (!child_finished) 1669 kill(rec->evlist->workload.pid, SIGTERM); 1670 1671 wait(&exit_status); 1672 1673 if (err < 0) 1674 status = err; 1675 else if (WIFEXITED(exit_status)) 1676 status = WEXITSTATUS(exit_status); 1677 else if (WIFSIGNALED(exit_status)) 1678 signr = WTERMSIG(exit_status); 1679 } else 1680 status = err; 1681 1682 record__synthesize(rec, true); 1683 /* this will be recalculated during process_buildids() */ 1684 rec->samples = 0; 1685 1686 if (!err) { 1687 if (!rec->timestamp_filename) { 1688 record__finish_output(rec); 1689 } else { 1690 fd = record__switch_output(rec, true); 1691 if (fd < 0) { 1692 status = fd; 1693 goto out_delete_session; 1694 } 1695 } 1696 } 1697 1698 perf_hooks__invoke_record_end(); 1699 1700 if (!err && !quiet) { 1701 char samples[128]; 1702 const char *postfix = rec->timestamp_filename ? 1703 ".<timestamp>" : ""; 1704 1705 if (rec->samples && !rec->opts.full_auxtrace) 1706 scnprintf(samples, sizeof(samples), 1707 " (%" PRIu64 " samples)", rec->samples); 1708 else 1709 samples[0] = '\0'; 1710 1711 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 1712 perf_data__size(data) / 1024.0 / 1024.0, 1713 data->path, postfix, samples); 1714 if (ratio) { 1715 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 1716 rec->session->bytes_transferred / 1024.0 / 1024.0, 1717 ratio); 1718 } 1719 fprintf(stderr, " ]\n"); 1720 } 1721 1722 out_delete_session: 1723 zstd_fini(&session->zstd_data); 1724 perf_session__delete(session); 1725 1726 if (!opts->no_bpf_event) 1727 perf_evlist__stop_sb_thread(sb_evlist); 1728 return status; 1729 } 1730 1731 static void callchain_debug(struct callchain_param *callchain) 1732 { 1733 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1734 1735 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1736 1737 if (callchain->record_mode == CALLCHAIN_DWARF) 1738 pr_debug("callchain: stack dump size %d\n", 1739 callchain->dump_size); 1740 } 1741 1742 int record_opts__parse_callchain(struct record_opts *record, 1743 struct callchain_param *callchain, 1744 const char *arg, bool unset) 1745 { 1746 int ret; 1747 callchain->enabled = !unset; 1748 1749 /* --no-call-graph */ 1750 if (unset) { 1751 callchain->record_mode = CALLCHAIN_NONE; 1752 pr_debug("callchain: disabled\n"); 1753 return 0; 1754 } 1755 1756 ret = parse_callchain_record_opt(arg, callchain); 1757 if (!ret) { 1758 /* Enable data address sampling for DWARF unwind. */ 1759 if (callchain->record_mode == CALLCHAIN_DWARF) 1760 record->sample_address = true; 1761 callchain_debug(callchain); 1762 } 1763 1764 return ret; 1765 } 1766 1767 int record_parse_callchain_opt(const struct option *opt, 1768 const char *arg, 1769 int unset) 1770 { 1771 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1772 } 1773 1774 int record_callchain_opt(const struct option *opt, 1775 const char *arg __maybe_unused, 1776 int unset __maybe_unused) 1777 { 1778 struct callchain_param *callchain = opt->value; 1779 1780 callchain->enabled = true; 1781 1782 if (callchain->record_mode == CALLCHAIN_NONE) 1783 callchain->record_mode = CALLCHAIN_FP; 1784 1785 callchain_debug(callchain); 1786 return 0; 1787 } 1788 1789 static int perf_record_config(const char *var, const char *value, void *cb) 1790 { 1791 struct record *rec = cb; 1792 1793 if (!strcmp(var, "record.build-id")) { 1794 if (!strcmp(value, "cache")) 1795 rec->no_buildid_cache = false; 1796 else if (!strcmp(value, "no-cache")) 1797 rec->no_buildid_cache = true; 1798 else if (!strcmp(value, "skip")) 1799 rec->no_buildid = true; 1800 else 1801 return -1; 1802 return 0; 1803 } 1804 if (!strcmp(var, "record.call-graph")) { 1805 var = "call-graph.record-mode"; 1806 return perf_default_config(var, value, cb); 1807 } 1808 #ifdef HAVE_AIO_SUPPORT 1809 if (!strcmp(var, "record.aio")) { 1810 rec->opts.nr_cblocks = strtol(value, NULL, 0); 1811 if (!rec->opts.nr_cblocks) 1812 rec->opts.nr_cblocks = nr_cblocks_default; 1813 } 1814 #endif 1815 1816 return 0; 1817 } 1818 1819 struct clockid_map { 1820 const char *name; 1821 int clockid; 1822 }; 1823 1824 #define CLOCKID_MAP(n, c) \ 1825 { .name = n, .clockid = (c), } 1826 1827 #define CLOCKID_END { .name = NULL, } 1828 1829 1830 /* 1831 * Add the missing ones, we need to build on many distros... 1832 */ 1833 #ifndef CLOCK_MONOTONIC_RAW 1834 #define CLOCK_MONOTONIC_RAW 4 1835 #endif 1836 #ifndef CLOCK_BOOTTIME 1837 #define CLOCK_BOOTTIME 7 1838 #endif 1839 #ifndef CLOCK_TAI 1840 #define CLOCK_TAI 11 1841 #endif 1842 1843 static const struct clockid_map clockids[] = { 1844 /* available for all events, NMI safe */ 1845 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1846 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1847 1848 /* available for some events */ 1849 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1850 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1851 CLOCKID_MAP("tai", CLOCK_TAI), 1852 1853 /* available for the lazy */ 1854 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1855 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1856 CLOCKID_MAP("real", CLOCK_REALTIME), 1857 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1858 1859 CLOCKID_END, 1860 }; 1861 1862 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1863 { 1864 struct timespec res; 1865 1866 *res_ns = 0; 1867 if (!clock_getres(clk_id, &res)) 1868 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1869 else 1870 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1871 1872 return 0; 1873 } 1874 1875 static int parse_clockid(const struct option *opt, const char *str, int unset) 1876 { 1877 struct record_opts *opts = (struct record_opts *)opt->value; 1878 const struct clockid_map *cm; 1879 const char *ostr = str; 1880 1881 if (unset) { 1882 opts->use_clockid = 0; 1883 return 0; 1884 } 1885 1886 /* no arg passed */ 1887 if (!str) 1888 return 0; 1889 1890 /* no setting it twice */ 1891 if (opts->use_clockid) 1892 return -1; 1893 1894 opts->use_clockid = true; 1895 1896 /* if its a number, we're done */ 1897 if (sscanf(str, "%d", &opts->clockid) == 1) 1898 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1899 1900 /* allow a "CLOCK_" prefix to the name */ 1901 if (!strncasecmp(str, "CLOCK_", 6)) 1902 str += 6; 1903 1904 for (cm = clockids; cm->name; cm++) { 1905 if (!strcasecmp(str, cm->name)) { 1906 opts->clockid = cm->clockid; 1907 return get_clockid_res(opts->clockid, 1908 &opts->clockid_res_ns); 1909 } 1910 } 1911 1912 opts->use_clockid = false; 1913 ui__warning("unknown clockid %s, check man page\n", ostr); 1914 return -1; 1915 } 1916 1917 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 1918 { 1919 struct record_opts *opts = (struct record_opts *)opt->value; 1920 1921 if (unset || !str) 1922 return 0; 1923 1924 if (!strcasecmp(str, "node")) 1925 opts->affinity = PERF_AFFINITY_NODE; 1926 else if (!strcasecmp(str, "cpu")) 1927 opts->affinity = PERF_AFFINITY_CPU; 1928 1929 return 0; 1930 } 1931 1932 static int record__parse_mmap_pages(const struct option *opt, 1933 const char *str, 1934 int unset __maybe_unused) 1935 { 1936 struct record_opts *opts = opt->value; 1937 char *s, *p; 1938 unsigned int mmap_pages; 1939 int ret; 1940 1941 if (!str) 1942 return -EINVAL; 1943 1944 s = strdup(str); 1945 if (!s) 1946 return -ENOMEM; 1947 1948 p = strchr(s, ','); 1949 if (p) 1950 *p = '\0'; 1951 1952 if (*s) { 1953 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1954 if (ret) 1955 goto out_free; 1956 opts->mmap_pages = mmap_pages; 1957 } 1958 1959 if (!p) { 1960 ret = 0; 1961 goto out_free; 1962 } 1963 1964 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1965 if (ret) 1966 goto out_free; 1967 1968 opts->auxtrace_mmap_pages = mmap_pages; 1969 1970 out_free: 1971 free(s); 1972 return ret; 1973 } 1974 1975 static void switch_output_size_warn(struct record *rec) 1976 { 1977 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1978 struct switch_output *s = &rec->switch_output; 1979 1980 wakeup_size /= 2; 1981 1982 if (s->size < wakeup_size) { 1983 char buf[100]; 1984 1985 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1986 pr_warning("WARNING: switch-output data size lower than " 1987 "wakeup kernel buffer size (%s) " 1988 "expect bigger perf.data sizes\n", buf); 1989 } 1990 } 1991 1992 static int switch_output_setup(struct record *rec) 1993 { 1994 struct switch_output *s = &rec->switch_output; 1995 static struct parse_tag tags_size[] = { 1996 { .tag = 'B', .mult = 1 }, 1997 { .tag = 'K', .mult = 1 << 10 }, 1998 { .tag = 'M', .mult = 1 << 20 }, 1999 { .tag = 'G', .mult = 1 << 30 }, 2000 { .tag = 0 }, 2001 }; 2002 static struct parse_tag tags_time[] = { 2003 { .tag = 's', .mult = 1 }, 2004 { .tag = 'm', .mult = 60 }, 2005 { .tag = 'h', .mult = 60*60 }, 2006 { .tag = 'd', .mult = 60*60*24 }, 2007 { .tag = 0 }, 2008 }; 2009 unsigned long val; 2010 2011 if (!s->set) 2012 return 0; 2013 2014 if (!strcmp(s->str, "signal")) { 2015 s->signal = true; 2016 pr_debug("switch-output with SIGUSR2 signal\n"); 2017 goto enabled; 2018 } 2019 2020 val = parse_tag_value(s->str, tags_size); 2021 if (val != (unsigned long) -1) { 2022 s->size = val; 2023 pr_debug("switch-output with %s size threshold\n", s->str); 2024 goto enabled; 2025 } 2026 2027 val = parse_tag_value(s->str, tags_time); 2028 if (val != (unsigned long) -1) { 2029 s->time = val; 2030 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2031 s->str, s->time); 2032 goto enabled; 2033 } 2034 2035 return -1; 2036 2037 enabled: 2038 rec->timestamp_filename = true; 2039 s->enabled = true; 2040 2041 if (s->size && !rec->opts.no_buffering) 2042 switch_output_size_warn(rec); 2043 2044 return 0; 2045 } 2046 2047 static const char * const __record_usage[] = { 2048 "perf record [<options>] [<command>]", 2049 "perf record [<options>] -- <command> [<options>]", 2050 NULL 2051 }; 2052 const char * const *record_usage = __record_usage; 2053 2054 /* 2055 * XXX Ideally would be local to cmd_record() and passed to a record__new 2056 * because we need to have access to it in record__exit, that is called 2057 * after cmd_record() exits, but since record_options need to be accessible to 2058 * builtin-script, leave it here. 2059 * 2060 * At least we don't ouch it in all the other functions here directly. 2061 * 2062 * Just say no to tons of global variables, sigh. 2063 */ 2064 static struct record record = { 2065 .opts = { 2066 .sample_time = true, 2067 .mmap_pages = UINT_MAX, 2068 .user_freq = UINT_MAX, 2069 .user_interval = ULLONG_MAX, 2070 .freq = 4000, 2071 .target = { 2072 .uses_mmap = true, 2073 .default_per_cpu = true, 2074 }, 2075 .mmap_flush = MMAP_FLUSH_DEFAULT, 2076 }, 2077 .tool = { 2078 .sample = process_sample_event, 2079 .fork = perf_event__process_fork, 2080 .exit = perf_event__process_exit, 2081 .comm = perf_event__process_comm, 2082 .namespaces = perf_event__process_namespaces, 2083 .mmap = perf_event__process_mmap, 2084 .mmap2 = perf_event__process_mmap2, 2085 .ordered_events = true, 2086 }, 2087 }; 2088 2089 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 2090 "\n\t\t\t\tDefault: fp"; 2091 2092 static bool dry_run; 2093 2094 /* 2095 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 2096 * with it and switch to use the library functions in perf_evlist that came 2097 * from builtin-record.c, i.e. use record_opts, 2098 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 2099 * using pipes, etc. 2100 */ 2101 static struct option __record_options[] = { 2102 OPT_CALLBACK('e', "event", &record.evlist, "event", 2103 "event selector. use 'perf list' to list available events", 2104 parse_events_option), 2105 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 2106 "event filter", parse_filter), 2107 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 2108 NULL, "don't record events from perf itself", 2109 exclude_perf), 2110 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 2111 "record events on existing process id"), 2112 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 2113 "record events on existing thread id"), 2114 OPT_INTEGER('r', "realtime", &record.realtime_prio, 2115 "collect data with this RT SCHED_FIFO priority"), 2116 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 2117 "collect data without buffering"), 2118 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 2119 "collect raw sample records from all opened counters"), 2120 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 2121 "system-wide collection from all CPUs"), 2122 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 2123 "list of cpus to monitor"), 2124 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 2125 OPT_STRING('o', "output", &record.data.path, "file", 2126 "output file name"), 2127 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 2128 &record.opts.no_inherit_set, 2129 "child tasks do not inherit counters"), 2130 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 2131 "synthesize non-sample events at the end of output"), 2132 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 2133 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), 2134 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 2135 "Fail if the specified frequency can't be used"), 2136 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 2137 "profile at this frequency", 2138 record__parse_freq), 2139 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 2140 "number of mmap data pages and AUX area tracing mmap pages", 2141 record__parse_mmap_pages), 2142 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 2143 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 2144 record__mmap_flush_parse), 2145 OPT_BOOLEAN(0, "group", &record.opts.group, 2146 "put the counters into a counter group"), 2147 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 2148 NULL, "enables call-graph recording" , 2149 &record_callchain_opt), 2150 OPT_CALLBACK(0, "call-graph", &record.opts, 2151 "record_mode[,record_size]", record_callchain_help, 2152 &record_parse_callchain_opt), 2153 OPT_INCR('v', "verbose", &verbose, 2154 "be more verbose (show counter open errors, etc)"), 2155 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 2156 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 2157 "per thread counts"), 2158 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 2159 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 2160 "Record the sample physical addresses"), 2161 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 2162 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 2163 &record.opts.sample_time_set, 2164 "Record the sample timestamps"), 2165 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 2166 "Record the sample period"), 2167 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 2168 "don't sample"), 2169 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 2170 &record.no_buildid_cache_set, 2171 "do not update the buildid cache"), 2172 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 2173 &record.no_buildid_set, 2174 "do not collect buildids in perf.data"), 2175 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 2176 "monitor event in cgroup name only", 2177 parse_cgroups), 2178 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 2179 "ms to wait before starting measurement after program start"), 2180 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2181 "user to profile"), 2182 2183 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2184 "branch any", "sample any taken branches", 2185 parse_branch_stack), 2186 2187 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2188 "branch filter mask", "branch stack filter modes", 2189 parse_branch_stack), 2190 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2191 "sample by weight (on special events only)"), 2192 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2193 "sample transaction flags (special events only)"), 2194 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2195 "use per-thread mmaps"), 2196 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2197 "sample selected machine registers on interrupt," 2198 " use '-I?' to list register names", parse_intr_regs), 2199 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2200 "sample selected machine registers on interrupt," 2201 " use '--user-regs=?' to list register names", parse_user_regs), 2202 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2203 "Record running/enabled time of read (:S) events"), 2204 OPT_CALLBACK('k', "clockid", &record.opts, 2205 "clockid", "clockid to use for events, see clock_gettime()", 2206 parse_clockid), 2207 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2208 "opts", "AUX area tracing Snapshot Mode", ""), 2209 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2210 "per thread proc mmap processing timeout in ms"), 2211 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2212 "Record namespaces events"), 2213 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2214 "Record context switch events"), 2215 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2216 "Configure all used events to run in kernel space.", 2217 PARSE_OPT_EXCLUSIVE), 2218 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2219 "Configure all used events to run in user space.", 2220 PARSE_OPT_EXCLUSIVE), 2221 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 2222 "collect kernel callchains"), 2223 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 2224 "collect user callchains"), 2225 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2226 "clang binary to use for compiling BPF scriptlets"), 2227 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2228 "options passed to clang when compiling BPF scriptlets"), 2229 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2230 "file", "vmlinux pathname"), 2231 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2232 "Record build-id of all DSOs regardless of hits"), 2233 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2234 "append timestamp to output filename"), 2235 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2236 "Record timestamp boundary (time of first/last samples)"), 2237 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2238 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2239 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2240 "signal"), 2241 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2242 "Limit number of switch output generated files"), 2243 OPT_BOOLEAN(0, "dry-run", &dry_run, 2244 "Parse options then exit"), 2245 #ifdef HAVE_AIO_SUPPORT 2246 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2247 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2248 record__aio_parse), 2249 #endif 2250 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2251 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2252 record__parse_affinity), 2253 #ifdef HAVE_ZSTD_SUPPORT 2254 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, 2255 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 2256 record__parse_comp_level), 2257 #endif 2258 OPT_END() 2259 }; 2260 2261 struct option *record_options = __record_options; 2262 2263 int cmd_record(int argc, const char **argv) 2264 { 2265 int err; 2266 struct record *rec = &record; 2267 char errbuf[BUFSIZ]; 2268 2269 setlocale(LC_ALL, ""); 2270 2271 #ifndef HAVE_LIBBPF_SUPPORT 2272 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2273 set_nobuild('\0', "clang-path", true); 2274 set_nobuild('\0', "clang-opt", true); 2275 # undef set_nobuild 2276 #endif 2277 2278 #ifndef HAVE_BPF_PROLOGUE 2279 # if !defined (HAVE_DWARF_SUPPORT) 2280 # define REASON "NO_DWARF=1" 2281 # elif !defined (HAVE_LIBBPF_SUPPORT) 2282 # define REASON "NO_LIBBPF=1" 2283 # else 2284 # define REASON "this architecture doesn't support BPF prologue" 2285 # endif 2286 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2287 set_nobuild('\0', "vmlinux", true); 2288 # undef set_nobuild 2289 # undef REASON 2290 #endif 2291 2292 CPU_ZERO(&rec->affinity_mask); 2293 rec->opts.affinity = PERF_AFFINITY_SYS; 2294 2295 rec->evlist = evlist__new(); 2296 if (rec->evlist == NULL) 2297 return -ENOMEM; 2298 2299 err = perf_config(perf_record_config, rec); 2300 if (err) 2301 return err; 2302 2303 argc = parse_options(argc, argv, record_options, record_usage, 2304 PARSE_OPT_STOP_AT_NON_OPTION); 2305 if (quiet) 2306 perf_quiet_option(); 2307 2308 /* Make system wide (-a) the default target. */ 2309 if (!argc && target__none(&rec->opts.target)) 2310 rec->opts.target.system_wide = true; 2311 2312 if (nr_cgroups && !rec->opts.target.system_wide) { 2313 usage_with_options_msg(record_usage, record_options, 2314 "cgroup monitoring only available in system-wide mode"); 2315 2316 } 2317 2318 if (rec->opts.comp_level != 0) { 2319 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 2320 rec->no_buildid = true; 2321 } 2322 2323 if (rec->opts.record_switch_events && 2324 !perf_can_record_switch_events()) { 2325 ui__error("kernel does not support recording context switch events\n"); 2326 parse_options_usage(record_usage, record_options, "switch-events", 0); 2327 return -EINVAL; 2328 } 2329 2330 if (switch_output_setup(rec)) { 2331 parse_options_usage(record_usage, record_options, "switch-output", 0); 2332 return -EINVAL; 2333 } 2334 2335 if (rec->switch_output.time) { 2336 signal(SIGALRM, alarm_sig_handler); 2337 alarm(rec->switch_output.time); 2338 } 2339 2340 if (rec->switch_output.num_files) { 2341 rec->switch_output.filenames = calloc(sizeof(char *), 2342 rec->switch_output.num_files); 2343 if (!rec->switch_output.filenames) 2344 return -EINVAL; 2345 } 2346 2347 /* 2348 * Allow aliases to facilitate the lookup of symbols for address 2349 * filters. Refer to auxtrace_parse_filters(). 2350 */ 2351 symbol_conf.allow_aliases = true; 2352 2353 symbol__init(NULL); 2354 2355 err = record__auxtrace_init(rec); 2356 if (err) 2357 goto out; 2358 2359 if (dry_run) 2360 goto out; 2361 2362 err = bpf__setup_stdout(rec->evlist); 2363 if (err) { 2364 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2365 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2366 errbuf); 2367 goto out; 2368 } 2369 2370 err = -ENOMEM; 2371 2372 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 2373 pr_warning( 2374 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 2375 "check /proc/sys/kernel/kptr_restrict.\n\n" 2376 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 2377 "file is not found in the buildid cache or in the vmlinux path.\n\n" 2378 "Samples in kernel modules won't be resolved at all.\n\n" 2379 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 2380 "even with a suitable vmlinux or kallsyms file.\n\n"); 2381 2382 if (rec->no_buildid_cache || rec->no_buildid) { 2383 disable_buildid_cache(); 2384 } else if (rec->switch_output.enabled) { 2385 /* 2386 * In 'perf record --switch-output', disable buildid 2387 * generation by default to reduce data file switching 2388 * overhead. Still generate buildid if they are required 2389 * explicitly using 2390 * 2391 * perf record --switch-output --no-no-buildid \ 2392 * --no-no-buildid-cache 2393 * 2394 * Following code equals to: 2395 * 2396 * if ((rec->no_buildid || !rec->no_buildid_set) && 2397 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2398 * disable_buildid_cache(); 2399 */ 2400 bool disable = true; 2401 2402 if (rec->no_buildid_set && !rec->no_buildid) 2403 disable = false; 2404 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2405 disable = false; 2406 if (disable) { 2407 rec->no_buildid = true; 2408 rec->no_buildid_cache = true; 2409 disable_buildid_cache(); 2410 } 2411 } 2412 2413 if (record.opts.overwrite) 2414 record.opts.tail_synthesize = true; 2415 2416 if (rec->evlist->core.nr_entries == 0 && 2417 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 2418 pr_err("Not enough memory for event selector list\n"); 2419 goto out; 2420 } 2421 2422 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 2423 rec->opts.no_inherit = true; 2424 2425 err = target__validate(&rec->opts.target); 2426 if (err) { 2427 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2428 ui__warning("%s\n", errbuf); 2429 } 2430 2431 err = target__parse_uid(&rec->opts.target); 2432 if (err) { 2433 int saved_errno = errno; 2434 2435 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2436 ui__error("%s", errbuf); 2437 2438 err = -saved_errno; 2439 goto out; 2440 } 2441 2442 /* Enable ignoring missing threads when -u/-p option is defined. */ 2443 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 2444 2445 err = -ENOMEM; 2446 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 2447 usage_with_options(record_usage, record_options); 2448 2449 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 2450 if (err) 2451 goto out; 2452 2453 /* 2454 * We take all buildids when the file contains 2455 * AUX area tracing data because we do not decode the 2456 * trace because it would take too long. 2457 */ 2458 if (rec->opts.full_auxtrace) 2459 rec->buildid_all = true; 2460 2461 if (record_opts__config(&rec->opts)) { 2462 err = -EINVAL; 2463 goto out; 2464 } 2465 2466 if (rec->opts.nr_cblocks > nr_cblocks_max) 2467 rec->opts.nr_cblocks = nr_cblocks_max; 2468 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2469 2470 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2471 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2472 2473 if (rec->opts.comp_level > comp_level_max) 2474 rec->opts.comp_level = comp_level_max; 2475 pr_debug("comp level: %d\n", rec->opts.comp_level); 2476 2477 err = __cmd_record(&record, argc, argv); 2478 out: 2479 evlist__delete(rec->evlist); 2480 symbol__exit(); 2481 auxtrace_record__free(rec->itr); 2482 return err; 2483 } 2484 2485 static void snapshot_sig_handler(int sig __maybe_unused) 2486 { 2487 struct record *rec = &record; 2488 2489 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2490 trigger_hit(&auxtrace_snapshot_trigger); 2491 auxtrace_record__snapshot_started = 1; 2492 if (auxtrace_record__snapshot_start(record.itr)) 2493 trigger_error(&auxtrace_snapshot_trigger); 2494 } 2495 2496 if (switch_output_signal(rec)) 2497 trigger_hit(&switch_output_trigger); 2498 } 2499 2500 static void alarm_sig_handler(int sig __maybe_unused) 2501 { 2502 struct record *rec = &record; 2503 2504 if (switch_output_time(rec)) 2505 trigger_hit(&switch_output_trigger); 2506 } 2507