1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/target.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/symbol.h" 27 #include "util/record.h" 28 #include "util/cpumap.h" 29 #include "util/thread_map.h" 30 #include "util/data.h" 31 #include "util/perf_regs.h" 32 #include "util/auxtrace.h" 33 #include "util/tsc.h" 34 #include "util/parse-branch-options.h" 35 #include "util/parse-regs-options.h" 36 #include "util/llvm-utils.h" 37 #include "util/bpf-loader.h" 38 #include "util/trigger.h" 39 #include "util/perf-hooks.h" 40 #include "util/cpu-set-sched.h" 41 #include "util/time-utils.h" 42 #include "util/units.h" 43 #include "util/bpf-event.h" 44 #include "asm/bug.h" 45 #include "perf.h" 46 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <locale.h> 50 #include <poll.h> 51 #include <unistd.h> 52 #include <sched.h> 53 #include <signal.h> 54 #include <sys/mman.h> 55 #include <sys/wait.h> 56 #include <linux/time64.h> 57 #include <linux/zalloc.h> 58 59 struct switch_output { 60 bool enabled; 61 bool signal; 62 unsigned long size; 63 unsigned long time; 64 const char *str; 65 bool set; 66 char **filenames; 67 int num_files; 68 int cur_file; 69 }; 70 71 struct record { 72 struct perf_tool tool; 73 struct record_opts opts; 74 u64 bytes_written; 75 struct perf_data data; 76 struct auxtrace_record *itr; 77 struct evlist *evlist; 78 struct perf_session *session; 79 int realtime_prio; 80 bool no_buildid; 81 bool no_buildid_set; 82 bool no_buildid_cache; 83 bool no_buildid_cache_set; 84 bool buildid_all; 85 bool timestamp_filename; 86 bool timestamp_boundary; 87 struct switch_output switch_output; 88 unsigned long long samples; 89 cpu_set_t affinity_mask; 90 }; 91 92 static volatile int auxtrace_record__snapshot_started; 93 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 94 static DEFINE_TRIGGER(switch_output_trigger); 95 96 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 97 "SYS", "NODE", "CPU" 98 }; 99 100 static bool switch_output_signal(struct record *rec) 101 { 102 return rec->switch_output.signal && 103 trigger_is_ready(&switch_output_trigger); 104 } 105 106 static bool switch_output_size(struct record *rec) 107 { 108 return rec->switch_output.size && 109 trigger_is_ready(&switch_output_trigger) && 110 (rec->bytes_written >= rec->switch_output.size); 111 } 112 113 static bool switch_output_time(struct record *rec) 114 { 115 return rec->switch_output.time && 116 trigger_is_ready(&switch_output_trigger); 117 } 118 119 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 120 void *bf, size_t size) 121 { 122 struct perf_data_file *file = &rec->session->data->file; 123 124 if (perf_data_file__write(file, bf, size) < 0) { 125 pr_err("failed to write perf data, error: %m\n"); 126 return -1; 127 } 128 129 rec->bytes_written += size; 130 131 if (switch_output_size(rec)) 132 trigger_hit(&switch_output_trigger); 133 134 return 0; 135 } 136 137 static int record__aio_enabled(struct record *rec); 138 static int record__comp_enabled(struct record *rec); 139 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 140 void *src, size_t src_size); 141 142 #ifdef HAVE_AIO_SUPPORT 143 static int record__aio_write(struct aiocb *cblock, int trace_fd, 144 void *buf, size_t size, off_t off) 145 { 146 int rc; 147 148 cblock->aio_fildes = trace_fd; 149 cblock->aio_buf = buf; 150 cblock->aio_nbytes = size; 151 cblock->aio_offset = off; 152 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 153 154 do { 155 rc = aio_write(cblock); 156 if (rc == 0) { 157 break; 158 } else if (errno != EAGAIN) { 159 cblock->aio_fildes = -1; 160 pr_err("failed to queue perf data, error: %m\n"); 161 break; 162 } 163 } while (1); 164 165 return rc; 166 } 167 168 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) 169 { 170 void *rem_buf; 171 off_t rem_off; 172 size_t rem_size; 173 int rc, aio_errno; 174 ssize_t aio_ret, written; 175 176 aio_errno = aio_error(cblock); 177 if (aio_errno == EINPROGRESS) 178 return 0; 179 180 written = aio_ret = aio_return(cblock); 181 if (aio_ret < 0) { 182 if (aio_errno != EINTR) 183 pr_err("failed to write perf data, error: %m\n"); 184 written = 0; 185 } 186 187 rem_size = cblock->aio_nbytes - written; 188 189 if (rem_size == 0) { 190 cblock->aio_fildes = -1; 191 /* 192 * md->refcount is incremented in record__aio_pushfn() for 193 * every aio write request started in record__aio_push() so 194 * decrement it because the request is now complete. 195 */ 196 perf_mmap__put(md); 197 rc = 1; 198 } else { 199 /* 200 * aio write request may require restart with the 201 * reminder if the kernel didn't write whole 202 * chunk at once. 203 */ 204 rem_off = cblock->aio_offset + written; 205 rem_buf = (void *)(cblock->aio_buf + written); 206 record__aio_write(cblock, cblock->aio_fildes, 207 rem_buf, rem_size, rem_off); 208 rc = 0; 209 } 210 211 return rc; 212 } 213 214 static int record__aio_sync(struct perf_mmap *md, bool sync_all) 215 { 216 struct aiocb **aiocb = md->aio.aiocb; 217 struct aiocb *cblocks = md->aio.cblocks; 218 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 219 int i, do_suspend; 220 221 do { 222 do_suspend = 0; 223 for (i = 0; i < md->aio.nr_cblocks; ++i) { 224 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 225 if (sync_all) 226 aiocb[i] = NULL; 227 else 228 return i; 229 } else { 230 /* 231 * Started aio write is not complete yet 232 * so it has to be waited before the 233 * next allocation. 234 */ 235 aiocb[i] = &cblocks[i]; 236 do_suspend = 1; 237 } 238 } 239 if (!do_suspend) 240 return -1; 241 242 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 243 if (!(errno == EAGAIN || errno == EINTR)) 244 pr_err("failed to sync perf data, error: %m\n"); 245 } 246 } while (1); 247 } 248 249 struct record_aio { 250 struct record *rec; 251 void *data; 252 size_t size; 253 }; 254 255 static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) 256 { 257 struct record_aio *aio = to; 258 259 /* 260 * map->base data pointed by buf is copied into free map->aio.data[] buffer 261 * to release space in the kernel buffer as fast as possible, calling 262 * perf_mmap__consume() from perf_mmap__push() function. 263 * 264 * That lets the kernel to proceed with storing more profiling data into 265 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 266 * 267 * Coping can be done in two steps in case the chunk of profiling data 268 * crosses the upper bound of the kernel buffer. In this case we first move 269 * part of data from map->start till the upper bound and then the reminder 270 * from the beginning of the kernel buffer till the end of the data chunk. 271 */ 272 273 if (record__comp_enabled(aio->rec)) { 274 size = zstd_compress(aio->rec->session, aio->data + aio->size, 275 perf_mmap__mmap_len(map) - aio->size, 276 buf, size); 277 } else { 278 memcpy(aio->data + aio->size, buf, size); 279 } 280 281 if (!aio->size) { 282 /* 283 * Increment map->refcount to guard map->aio.data[] buffer 284 * from premature deallocation because map object can be 285 * released earlier than aio write request started on 286 * map->aio.data[] buffer is complete. 287 * 288 * perf_mmap__put() is done at record__aio_complete() 289 * after started aio request completion or at record__aio_push() 290 * if the request failed to start. 291 */ 292 perf_mmap__get(map); 293 } 294 295 aio->size += size; 296 297 return size; 298 } 299 300 static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) 301 { 302 int ret, idx; 303 int trace_fd = rec->session->data->file.fd; 304 struct record_aio aio = { .rec = rec, .size = 0 }; 305 306 /* 307 * Call record__aio_sync() to wait till map->aio.data[] buffer 308 * becomes available after previous aio write operation. 309 */ 310 311 idx = record__aio_sync(map, false); 312 aio.data = map->aio.data[idx]; 313 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 314 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 315 return ret; 316 317 rec->samples++; 318 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 319 if (!ret) { 320 *off += aio.size; 321 rec->bytes_written += aio.size; 322 if (switch_output_size(rec)) 323 trigger_hit(&switch_output_trigger); 324 } else { 325 /* 326 * Decrement map->refcount incremented in record__aio_pushfn() 327 * back if record__aio_write() operation failed to start, otherwise 328 * map->refcount is decremented in record__aio_complete() after 329 * aio write operation finishes successfully. 330 */ 331 perf_mmap__put(map); 332 } 333 334 return ret; 335 } 336 337 static off_t record__aio_get_pos(int trace_fd) 338 { 339 return lseek(trace_fd, 0, SEEK_CUR); 340 } 341 342 static void record__aio_set_pos(int trace_fd, off_t pos) 343 { 344 lseek(trace_fd, pos, SEEK_SET); 345 } 346 347 static void record__aio_mmap_read_sync(struct record *rec) 348 { 349 int i; 350 struct evlist *evlist = rec->evlist; 351 struct perf_mmap *maps = evlist->mmap; 352 353 if (!record__aio_enabled(rec)) 354 return; 355 356 for (i = 0; i < evlist->nr_mmaps; i++) { 357 struct perf_mmap *map = &maps[i]; 358 359 if (map->base) 360 record__aio_sync(map, true); 361 } 362 } 363 364 static int nr_cblocks_default = 1; 365 static int nr_cblocks_max = 4; 366 367 static int record__aio_parse(const struct option *opt, 368 const char *str, 369 int unset) 370 { 371 struct record_opts *opts = (struct record_opts *)opt->value; 372 373 if (unset) { 374 opts->nr_cblocks = 0; 375 } else { 376 if (str) 377 opts->nr_cblocks = strtol(str, NULL, 0); 378 if (!opts->nr_cblocks) 379 opts->nr_cblocks = nr_cblocks_default; 380 } 381 382 return 0; 383 } 384 #else /* HAVE_AIO_SUPPORT */ 385 static int nr_cblocks_max = 0; 386 387 static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, 388 off_t *off __maybe_unused) 389 { 390 return -1; 391 } 392 393 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 394 { 395 return -1; 396 } 397 398 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 399 { 400 } 401 402 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 403 { 404 } 405 #endif 406 407 static int record__aio_enabled(struct record *rec) 408 { 409 return rec->opts.nr_cblocks > 0; 410 } 411 412 #define MMAP_FLUSH_DEFAULT 1 413 static int record__mmap_flush_parse(const struct option *opt, 414 const char *str, 415 int unset) 416 { 417 int flush_max; 418 struct record_opts *opts = (struct record_opts *)opt->value; 419 static struct parse_tag tags[] = { 420 { .tag = 'B', .mult = 1 }, 421 { .tag = 'K', .mult = 1 << 10 }, 422 { .tag = 'M', .mult = 1 << 20 }, 423 { .tag = 'G', .mult = 1 << 30 }, 424 { .tag = 0 }, 425 }; 426 427 if (unset) 428 return 0; 429 430 if (str) { 431 opts->mmap_flush = parse_tag_value(str, tags); 432 if (opts->mmap_flush == (int)-1) 433 opts->mmap_flush = strtol(str, NULL, 0); 434 } 435 436 if (!opts->mmap_flush) 437 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 438 439 flush_max = perf_evlist__mmap_size(opts->mmap_pages); 440 flush_max /= 4; 441 if (opts->mmap_flush > flush_max) 442 opts->mmap_flush = flush_max; 443 444 return 0; 445 } 446 447 #ifdef HAVE_ZSTD_SUPPORT 448 static unsigned int comp_level_default = 1; 449 450 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 451 { 452 struct record_opts *opts = opt->value; 453 454 if (unset) { 455 opts->comp_level = 0; 456 } else { 457 if (str) 458 opts->comp_level = strtol(str, NULL, 0); 459 if (!opts->comp_level) 460 opts->comp_level = comp_level_default; 461 } 462 463 return 0; 464 } 465 #endif 466 static unsigned int comp_level_max = 22; 467 468 static int record__comp_enabled(struct record *rec) 469 { 470 return rec->opts.comp_level > 0; 471 } 472 473 static int process_synthesized_event(struct perf_tool *tool, 474 union perf_event *event, 475 struct perf_sample *sample __maybe_unused, 476 struct machine *machine __maybe_unused) 477 { 478 struct record *rec = container_of(tool, struct record, tool); 479 return record__write(rec, NULL, event, event->header.size); 480 } 481 482 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 483 { 484 struct record *rec = to; 485 486 if (record__comp_enabled(rec)) { 487 size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); 488 bf = map->data; 489 } 490 491 rec->samples++; 492 return record__write(rec, map, bf, size); 493 } 494 495 static volatile int done; 496 static volatile int signr = -1; 497 static volatile int child_finished; 498 499 static void sig_handler(int sig) 500 { 501 if (sig == SIGCHLD) 502 child_finished = 1; 503 else 504 signr = sig; 505 506 done = 1; 507 } 508 509 static void sigsegv_handler(int sig) 510 { 511 perf_hooks__recover(); 512 sighandler_dump_stack(sig); 513 } 514 515 static void record__sig_exit(void) 516 { 517 if (signr == -1) 518 return; 519 520 signal(signr, SIG_DFL); 521 raise(signr); 522 } 523 524 #ifdef HAVE_AUXTRACE_SUPPORT 525 526 static int record__process_auxtrace(struct perf_tool *tool, 527 struct perf_mmap *map, 528 union perf_event *event, void *data1, 529 size_t len1, void *data2, size_t len2) 530 { 531 struct record *rec = container_of(tool, struct record, tool); 532 struct perf_data *data = &rec->data; 533 size_t padding; 534 u8 pad[8] = {0}; 535 536 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { 537 off_t file_offset; 538 int fd = perf_data__fd(data); 539 int err; 540 541 file_offset = lseek(fd, 0, SEEK_CUR); 542 if (file_offset == -1) 543 return -1; 544 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 545 event, file_offset); 546 if (err) 547 return err; 548 } 549 550 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 551 padding = (len1 + len2) & 7; 552 if (padding) 553 padding = 8 - padding; 554 555 record__write(rec, map, event, event->header.size); 556 record__write(rec, map, data1, len1); 557 if (len2) 558 record__write(rec, map, data2, len2); 559 record__write(rec, map, &pad, padding); 560 561 return 0; 562 } 563 564 static int record__auxtrace_mmap_read(struct record *rec, 565 struct perf_mmap *map) 566 { 567 int ret; 568 569 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 570 record__process_auxtrace); 571 if (ret < 0) 572 return ret; 573 574 if (ret) 575 rec->samples++; 576 577 return 0; 578 } 579 580 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 581 struct perf_mmap *map) 582 { 583 int ret; 584 585 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 586 record__process_auxtrace, 587 rec->opts.auxtrace_snapshot_size); 588 if (ret < 0) 589 return ret; 590 591 if (ret) 592 rec->samples++; 593 594 return 0; 595 } 596 597 static int record__auxtrace_read_snapshot_all(struct record *rec) 598 { 599 int i; 600 int rc = 0; 601 602 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 603 struct perf_mmap *map = &rec->evlist->mmap[i]; 604 605 if (!map->auxtrace_mmap.base) 606 continue; 607 608 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 609 rc = -1; 610 goto out; 611 } 612 } 613 out: 614 return rc; 615 } 616 617 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 618 { 619 pr_debug("Recording AUX area tracing snapshot\n"); 620 if (record__auxtrace_read_snapshot_all(rec) < 0) { 621 trigger_error(&auxtrace_snapshot_trigger); 622 } else { 623 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 624 trigger_error(&auxtrace_snapshot_trigger); 625 else 626 trigger_ready(&auxtrace_snapshot_trigger); 627 } 628 } 629 630 static int record__auxtrace_snapshot_exit(struct record *rec) 631 { 632 if (trigger_is_error(&auxtrace_snapshot_trigger)) 633 return 0; 634 635 if (!auxtrace_record__snapshot_started && 636 auxtrace_record__snapshot_start(rec->itr)) 637 return -1; 638 639 record__read_auxtrace_snapshot(rec, true); 640 if (trigger_is_error(&auxtrace_snapshot_trigger)) 641 return -1; 642 643 return 0; 644 } 645 646 static int record__auxtrace_init(struct record *rec) 647 { 648 int err; 649 650 if (!rec->itr) { 651 rec->itr = auxtrace_record__init(rec->evlist, &err); 652 if (err) 653 return err; 654 } 655 656 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 657 rec->opts.auxtrace_snapshot_opts); 658 if (err) 659 return err; 660 661 return auxtrace_parse_filters(rec->evlist); 662 } 663 664 #else 665 666 static inline 667 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 668 struct perf_mmap *map __maybe_unused) 669 { 670 return 0; 671 } 672 673 static inline 674 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 675 bool on_exit __maybe_unused) 676 { 677 } 678 679 static inline 680 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 681 { 682 return 0; 683 } 684 685 static inline 686 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 687 { 688 return 0; 689 } 690 691 static int record__auxtrace_init(struct record *rec __maybe_unused) 692 { 693 return 0; 694 } 695 696 #endif 697 698 static int record__mmap_evlist(struct record *rec, 699 struct evlist *evlist) 700 { 701 struct record_opts *opts = &rec->opts; 702 char msg[512]; 703 704 if (opts->affinity != PERF_AFFINITY_SYS) 705 cpu__setup_cpunode_map(); 706 707 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 708 opts->auxtrace_mmap_pages, 709 opts->auxtrace_snapshot_mode, 710 opts->nr_cblocks, opts->affinity, 711 opts->mmap_flush, opts->comp_level) < 0) { 712 if (errno == EPERM) { 713 pr_err("Permission error mapping pages.\n" 714 "Consider increasing " 715 "/proc/sys/kernel/perf_event_mlock_kb,\n" 716 "or try again with a smaller value of -m/--mmap_pages.\n" 717 "(current value: %u,%u)\n", 718 opts->mmap_pages, opts->auxtrace_mmap_pages); 719 return -errno; 720 } else { 721 pr_err("failed to mmap with %d (%s)\n", errno, 722 str_error_r(errno, msg, sizeof(msg))); 723 if (errno) 724 return -errno; 725 else 726 return -EINVAL; 727 } 728 } 729 return 0; 730 } 731 732 static int record__mmap(struct record *rec) 733 { 734 return record__mmap_evlist(rec, rec->evlist); 735 } 736 737 static int record__open(struct record *rec) 738 { 739 char msg[BUFSIZ]; 740 struct evsel *pos; 741 struct evlist *evlist = rec->evlist; 742 struct perf_session *session = rec->session; 743 struct record_opts *opts = &rec->opts; 744 int rc = 0; 745 746 /* 747 * For initial_delay we need to add a dummy event so that we can track 748 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 749 * real events, the ones asked by the user. 750 */ 751 if (opts->initial_delay) { 752 if (perf_evlist__add_dummy(evlist)) 753 return -ENOMEM; 754 755 pos = perf_evlist__first(evlist); 756 pos->tracking = 0; 757 pos = perf_evlist__last(evlist); 758 pos->tracking = 1; 759 pos->core.attr.enable_on_exec = 1; 760 } 761 762 perf_evlist__config(evlist, opts, &callchain_param); 763 764 evlist__for_each_entry(evlist, pos) { 765 try_again: 766 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 767 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 768 if (verbose > 0) 769 ui__warning("%s\n", msg); 770 goto try_again; 771 } 772 if ((errno == EINVAL || errno == EBADF) && 773 pos->leader != pos && 774 pos->weak_group) { 775 pos = perf_evlist__reset_weak_group(evlist, pos); 776 goto try_again; 777 } 778 rc = -errno; 779 perf_evsel__open_strerror(pos, &opts->target, 780 errno, msg, sizeof(msg)); 781 ui__error("%s\n", msg); 782 goto out; 783 } 784 785 pos->supported = true; 786 } 787 788 if (perf_evlist__apply_filters(evlist, &pos)) { 789 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 790 pos->filter, perf_evsel__name(pos), errno, 791 str_error_r(errno, msg, sizeof(msg))); 792 rc = -1; 793 goto out; 794 } 795 796 rc = record__mmap(rec); 797 if (rc) 798 goto out; 799 800 session->evlist = evlist; 801 perf_session__set_id_hdr_size(session); 802 out: 803 return rc; 804 } 805 806 static int process_sample_event(struct perf_tool *tool, 807 union perf_event *event, 808 struct perf_sample *sample, 809 struct evsel *evsel, 810 struct machine *machine) 811 { 812 struct record *rec = container_of(tool, struct record, tool); 813 814 if (rec->evlist->first_sample_time == 0) 815 rec->evlist->first_sample_time = sample->time; 816 817 rec->evlist->last_sample_time = sample->time; 818 819 if (rec->buildid_all) 820 return 0; 821 822 rec->samples++; 823 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 824 } 825 826 static int process_buildids(struct record *rec) 827 { 828 struct perf_session *session = rec->session; 829 830 if (perf_data__size(&rec->data) == 0) 831 return 0; 832 833 /* 834 * During this process, it'll load kernel map and replace the 835 * dso->long_name to a real pathname it found. In this case 836 * we prefer the vmlinux path like 837 * /lib/modules/3.16.4/build/vmlinux 838 * 839 * rather than build-id path (in debug directory). 840 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 841 */ 842 symbol_conf.ignore_vmlinux_buildid = true; 843 844 /* 845 * If --buildid-all is given, it marks all DSO regardless of hits, 846 * so no need to process samples. But if timestamp_boundary is enabled, 847 * it still needs to walk on all samples to get the timestamps of 848 * first/last samples. 849 */ 850 if (rec->buildid_all && !rec->timestamp_boundary) 851 rec->tool.sample = NULL; 852 853 return perf_session__process_events(session); 854 } 855 856 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 857 { 858 int err; 859 struct perf_tool *tool = data; 860 /* 861 *As for guest kernel when processing subcommand record&report, 862 *we arrange module mmap prior to guest kernel mmap and trigger 863 *a preload dso because default guest module symbols are loaded 864 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 865 *method is used to avoid symbol missing when the first addr is 866 *in module instead of in guest kernel. 867 */ 868 err = perf_event__synthesize_modules(tool, process_synthesized_event, 869 machine); 870 if (err < 0) 871 pr_err("Couldn't record guest kernel [%d]'s reference" 872 " relocation symbol.\n", machine->pid); 873 874 /* 875 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 876 * have no _text sometimes. 877 */ 878 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 879 machine); 880 if (err < 0) 881 pr_err("Couldn't record guest kernel [%d]'s reference" 882 " relocation symbol.\n", machine->pid); 883 } 884 885 static struct perf_event_header finished_round_event = { 886 .size = sizeof(struct perf_event_header), 887 .type = PERF_RECORD_FINISHED_ROUND, 888 }; 889 890 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 891 { 892 if (rec->opts.affinity != PERF_AFFINITY_SYS && 893 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 894 CPU_ZERO(&rec->affinity_mask); 895 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 896 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 897 } 898 } 899 900 static size_t process_comp_header(void *record, size_t increment) 901 { 902 struct perf_record_compressed *event = record; 903 size_t size = sizeof(*event); 904 905 if (increment) { 906 event->header.size += increment; 907 return increment; 908 } 909 910 event->header.type = PERF_RECORD_COMPRESSED; 911 event->header.size = size; 912 913 return size; 914 } 915 916 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 917 void *src, size_t src_size) 918 { 919 size_t compressed; 920 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 921 922 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, 923 max_record_size, process_comp_header); 924 925 session->bytes_transferred += src_size; 926 session->bytes_compressed += compressed; 927 928 return compressed; 929 } 930 931 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 932 bool overwrite, bool synch) 933 { 934 u64 bytes_written = rec->bytes_written; 935 int i; 936 int rc = 0; 937 struct perf_mmap *maps; 938 int trace_fd = rec->data.file.fd; 939 off_t off = 0; 940 941 if (!evlist) 942 return 0; 943 944 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 945 if (!maps) 946 return 0; 947 948 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 949 return 0; 950 951 if (record__aio_enabled(rec)) 952 off = record__aio_get_pos(trace_fd); 953 954 for (i = 0; i < evlist->nr_mmaps; i++) { 955 u64 flush = 0; 956 struct perf_mmap *map = &maps[i]; 957 958 if (map->base) { 959 record__adjust_affinity(rec, map); 960 if (synch) { 961 flush = map->flush; 962 map->flush = 1; 963 } 964 if (!record__aio_enabled(rec)) { 965 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 966 if (synch) 967 map->flush = flush; 968 rc = -1; 969 goto out; 970 } 971 } else { 972 if (record__aio_push(rec, map, &off) < 0) { 973 record__aio_set_pos(trace_fd, off); 974 if (synch) 975 map->flush = flush; 976 rc = -1; 977 goto out; 978 } 979 } 980 if (synch) 981 map->flush = flush; 982 } 983 984 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 985 record__auxtrace_mmap_read(rec, map) != 0) { 986 rc = -1; 987 goto out; 988 } 989 } 990 991 if (record__aio_enabled(rec)) 992 record__aio_set_pos(trace_fd, off); 993 994 /* 995 * Mark the round finished in case we wrote 996 * at least one event. 997 */ 998 if (bytes_written != rec->bytes_written) 999 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1000 1001 if (overwrite) 1002 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1003 out: 1004 return rc; 1005 } 1006 1007 static int record__mmap_read_all(struct record *rec, bool synch) 1008 { 1009 int err; 1010 1011 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1012 if (err) 1013 return err; 1014 1015 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1016 } 1017 1018 static void record__init_features(struct record *rec) 1019 { 1020 struct perf_session *session = rec->session; 1021 int feat; 1022 1023 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1024 perf_header__set_feat(&session->header, feat); 1025 1026 if (rec->no_buildid) 1027 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1028 1029 if (!have_tracepoints(&rec->evlist->core.entries)) 1030 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1031 1032 if (!rec->opts.branch_stack) 1033 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1034 1035 if (!rec->opts.full_auxtrace) 1036 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1037 1038 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1039 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1040 1041 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1042 if (!record__comp_enabled(rec)) 1043 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1044 1045 perf_header__clear_feat(&session->header, HEADER_STAT); 1046 } 1047 1048 static void 1049 record__finish_output(struct record *rec) 1050 { 1051 struct perf_data *data = &rec->data; 1052 int fd = perf_data__fd(data); 1053 1054 if (data->is_pipe) 1055 return; 1056 1057 rec->session->header.data_size += rec->bytes_written; 1058 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1059 1060 if (!rec->no_buildid) { 1061 process_buildids(rec); 1062 1063 if (rec->buildid_all) 1064 dsos__hit_all(rec->session); 1065 } 1066 perf_session__write_header(rec->session, rec->evlist, fd, true); 1067 1068 return; 1069 } 1070 1071 static int record__synthesize_workload(struct record *rec, bool tail) 1072 { 1073 int err; 1074 struct perf_thread_map *thread_map; 1075 1076 if (rec->opts.tail_synthesize != tail) 1077 return 0; 1078 1079 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1080 if (thread_map == NULL) 1081 return -1; 1082 1083 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1084 process_synthesized_event, 1085 &rec->session->machines.host, 1086 rec->opts.sample_address); 1087 perf_thread_map__put(thread_map); 1088 return err; 1089 } 1090 1091 static int record__synthesize(struct record *rec, bool tail); 1092 1093 static int 1094 record__switch_output(struct record *rec, bool at_exit) 1095 { 1096 struct perf_data *data = &rec->data; 1097 int fd, err; 1098 char *new_filename; 1099 1100 /* Same Size: "2015122520103046"*/ 1101 char timestamp[] = "InvalidTimestamp"; 1102 1103 record__aio_mmap_read_sync(rec); 1104 1105 record__synthesize(rec, true); 1106 if (target__none(&rec->opts.target)) 1107 record__synthesize_workload(rec, true); 1108 1109 rec->samples = 0; 1110 record__finish_output(rec); 1111 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1112 if (err) { 1113 pr_err("Failed to get current timestamp\n"); 1114 return -EINVAL; 1115 } 1116 1117 fd = perf_data__switch(data, timestamp, 1118 rec->session->header.data_offset, 1119 at_exit, &new_filename); 1120 if (fd >= 0 && !at_exit) { 1121 rec->bytes_written = 0; 1122 rec->session->header.data_size = 0; 1123 } 1124 1125 if (!quiet) 1126 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1127 data->path, timestamp); 1128 1129 if (rec->switch_output.num_files) { 1130 int n = rec->switch_output.cur_file + 1; 1131 1132 if (n >= rec->switch_output.num_files) 1133 n = 0; 1134 rec->switch_output.cur_file = n; 1135 if (rec->switch_output.filenames[n]) { 1136 remove(rec->switch_output.filenames[n]); 1137 zfree(&rec->switch_output.filenames[n]); 1138 } 1139 rec->switch_output.filenames[n] = new_filename; 1140 } else { 1141 free(new_filename); 1142 } 1143 1144 /* Output tracking events */ 1145 if (!at_exit) { 1146 record__synthesize(rec, false); 1147 1148 /* 1149 * In 'perf record --switch-output' without -a, 1150 * record__synthesize() in record__switch_output() won't 1151 * generate tracking events because there's no thread_map 1152 * in evlist. Which causes newly created perf.data doesn't 1153 * contain map and comm information. 1154 * Create a fake thread_map and directly call 1155 * perf_event__synthesize_thread_map() for those events. 1156 */ 1157 if (target__none(&rec->opts.target)) 1158 record__synthesize_workload(rec, false); 1159 } 1160 return fd; 1161 } 1162 1163 static volatile int workload_exec_errno; 1164 1165 /* 1166 * perf_evlist__prepare_workload will send a SIGUSR1 1167 * if the fork fails, since we asked by setting its 1168 * want_signal to true. 1169 */ 1170 static void workload_exec_failed_signal(int signo __maybe_unused, 1171 siginfo_t *info, 1172 void *ucontext __maybe_unused) 1173 { 1174 workload_exec_errno = info->si_value.sival_int; 1175 done = 1; 1176 child_finished = 1; 1177 } 1178 1179 static void snapshot_sig_handler(int sig); 1180 static void alarm_sig_handler(int sig); 1181 1182 int __weak 1183 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 1184 struct perf_tool *tool __maybe_unused, 1185 perf_event__handler_t process __maybe_unused, 1186 struct machine *machine __maybe_unused) 1187 { 1188 return 0; 1189 } 1190 1191 static const struct perf_event_mmap_page * 1192 perf_evlist__pick_pc(struct evlist *evlist) 1193 { 1194 if (evlist) { 1195 if (evlist->mmap && evlist->mmap[0].base) 1196 return evlist->mmap[0].base; 1197 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 1198 return evlist->overwrite_mmap[0].base; 1199 } 1200 return NULL; 1201 } 1202 1203 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1204 { 1205 const struct perf_event_mmap_page *pc; 1206 1207 pc = perf_evlist__pick_pc(rec->evlist); 1208 if (pc) 1209 return pc; 1210 return NULL; 1211 } 1212 1213 static int record__synthesize(struct record *rec, bool tail) 1214 { 1215 struct perf_session *session = rec->session; 1216 struct machine *machine = &session->machines.host; 1217 struct perf_data *data = &rec->data; 1218 struct record_opts *opts = &rec->opts; 1219 struct perf_tool *tool = &rec->tool; 1220 int fd = perf_data__fd(data); 1221 int err = 0; 1222 1223 if (rec->opts.tail_synthesize != tail) 1224 return 0; 1225 1226 if (data->is_pipe) { 1227 /* 1228 * We need to synthesize events first, because some 1229 * features works on top of them (on report side). 1230 */ 1231 err = perf_event__synthesize_attrs(tool, rec->evlist, 1232 process_synthesized_event); 1233 if (err < 0) { 1234 pr_err("Couldn't synthesize attrs.\n"); 1235 goto out; 1236 } 1237 1238 err = perf_event__synthesize_features(tool, session, rec->evlist, 1239 process_synthesized_event); 1240 if (err < 0) { 1241 pr_err("Couldn't synthesize features.\n"); 1242 return err; 1243 } 1244 1245 if (have_tracepoints(&rec->evlist->core.entries)) { 1246 /* 1247 * FIXME err <= 0 here actually means that 1248 * there were no tracepoints so its not really 1249 * an error, just that we don't need to 1250 * synthesize anything. We really have to 1251 * return this more properly and also 1252 * propagate errors that now are calling die() 1253 */ 1254 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 1255 process_synthesized_event); 1256 if (err <= 0) { 1257 pr_err("Couldn't record tracing data.\n"); 1258 goto out; 1259 } 1260 rec->bytes_written += err; 1261 } 1262 } 1263 1264 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1265 process_synthesized_event, machine); 1266 if (err) 1267 goto out; 1268 1269 if (rec->opts.full_auxtrace) { 1270 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1271 session, process_synthesized_event); 1272 if (err) 1273 goto out; 1274 } 1275 1276 if (!perf_evlist__exclude_kernel(rec->evlist)) { 1277 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1278 machine); 1279 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1280 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1281 "Check /proc/kallsyms permission or run as root.\n"); 1282 1283 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1284 machine); 1285 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1286 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1287 "Check /proc/modules permission or run as root.\n"); 1288 } 1289 1290 if (perf_guest) { 1291 machines__process_guests(&session->machines, 1292 perf_event__synthesize_guest_os, tool); 1293 } 1294 1295 err = perf_event__synthesize_extra_attr(&rec->tool, 1296 rec->evlist, 1297 process_synthesized_event, 1298 data->is_pipe); 1299 if (err) 1300 goto out; 1301 1302 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1303 process_synthesized_event, 1304 NULL); 1305 if (err < 0) { 1306 pr_err("Couldn't synthesize thread map.\n"); 1307 return err; 1308 } 1309 1310 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus, 1311 process_synthesized_event, NULL); 1312 if (err < 0) { 1313 pr_err("Couldn't synthesize cpu map.\n"); 1314 return err; 1315 } 1316 1317 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1318 machine, opts); 1319 if (err < 0) 1320 pr_warning("Couldn't synthesize bpf events.\n"); 1321 1322 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, 1323 process_synthesized_event, opts->sample_address, 1324 1); 1325 out: 1326 return err; 1327 } 1328 1329 static int __cmd_record(struct record *rec, int argc, const char **argv) 1330 { 1331 int err; 1332 int status = 0; 1333 unsigned long waking = 0; 1334 const bool forks = argc > 0; 1335 struct perf_tool *tool = &rec->tool; 1336 struct record_opts *opts = &rec->opts; 1337 struct perf_data *data = &rec->data; 1338 struct perf_session *session; 1339 bool disabled = false, draining = false; 1340 struct evlist *sb_evlist = NULL; 1341 int fd; 1342 float ratio = 0; 1343 1344 atexit(record__sig_exit); 1345 signal(SIGCHLD, sig_handler); 1346 signal(SIGINT, sig_handler); 1347 signal(SIGTERM, sig_handler); 1348 signal(SIGSEGV, sigsegv_handler); 1349 1350 if (rec->opts.record_namespaces) 1351 tool->namespace_events = true; 1352 1353 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1354 signal(SIGUSR2, snapshot_sig_handler); 1355 if (rec->opts.auxtrace_snapshot_mode) 1356 trigger_on(&auxtrace_snapshot_trigger); 1357 if (rec->switch_output.enabled) 1358 trigger_on(&switch_output_trigger); 1359 } else { 1360 signal(SIGUSR2, SIG_IGN); 1361 } 1362 1363 session = perf_session__new(data, false, tool); 1364 if (session == NULL) { 1365 pr_err("Perf session creation failed.\n"); 1366 return -1; 1367 } 1368 1369 fd = perf_data__fd(data); 1370 rec->session = session; 1371 1372 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 1373 pr_err("Compression initialization failed.\n"); 1374 return -1; 1375 } 1376 1377 session->header.env.comp_type = PERF_COMP_ZSTD; 1378 session->header.env.comp_level = rec->opts.comp_level; 1379 1380 record__init_features(rec); 1381 1382 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1383 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 1384 1385 if (forks) { 1386 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 1387 argv, data->is_pipe, 1388 workload_exec_failed_signal); 1389 if (err < 0) { 1390 pr_err("Couldn't run the workload!\n"); 1391 status = err; 1392 goto out_delete_session; 1393 } 1394 } 1395 1396 /* 1397 * If we have just single event and are sending data 1398 * through pipe, we need to force the ids allocation, 1399 * because we synthesize event name through the pipe 1400 * and need the id for that. 1401 */ 1402 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 1403 rec->opts.sample_id = true; 1404 1405 if (record__open(rec) != 0) { 1406 err = -1; 1407 goto out_child; 1408 } 1409 session->header.env.comp_mmap_len = session->evlist->mmap_len; 1410 1411 err = bpf__apply_obj_config(); 1412 if (err) { 1413 char errbuf[BUFSIZ]; 1414 1415 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1416 pr_err("ERROR: Apply config to BPF failed: %s\n", 1417 errbuf); 1418 goto out_child; 1419 } 1420 1421 /* 1422 * Normally perf_session__new would do this, but it doesn't have the 1423 * evlist. 1424 */ 1425 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 1426 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1427 rec->tool.ordered_events = false; 1428 } 1429 1430 if (!rec->evlist->nr_groups) 1431 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1432 1433 if (data->is_pipe) { 1434 err = perf_header__write_pipe(fd); 1435 if (err < 0) 1436 goto out_child; 1437 } else { 1438 err = perf_session__write_header(session, rec->evlist, fd, false); 1439 if (err < 0) 1440 goto out_child; 1441 } 1442 1443 if (!rec->no_buildid 1444 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1445 pr_err("Couldn't generate buildids. " 1446 "Use --no-buildid to profile anyway.\n"); 1447 err = -1; 1448 goto out_child; 1449 } 1450 1451 if (!opts->no_bpf_event) 1452 bpf_event__add_sb_event(&sb_evlist, &session->header.env); 1453 1454 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { 1455 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1456 opts->no_bpf_event = true; 1457 } 1458 1459 err = record__synthesize(rec, false); 1460 if (err < 0) 1461 goto out_child; 1462 1463 if (rec->realtime_prio) { 1464 struct sched_param param; 1465 1466 param.sched_priority = rec->realtime_prio; 1467 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1468 pr_err("Could not set realtime priority.\n"); 1469 err = -1; 1470 goto out_child; 1471 } 1472 } 1473 1474 /* 1475 * When perf is starting the traced process, all the events 1476 * (apart from group members) have enable_on_exec=1 set, 1477 * so don't spoil it by prematurely enabling them. 1478 */ 1479 if (!target__none(&opts->target) && !opts->initial_delay) 1480 evlist__enable(rec->evlist); 1481 1482 /* 1483 * Let the child rip 1484 */ 1485 if (forks) { 1486 struct machine *machine = &session->machines.host; 1487 union perf_event *event; 1488 pid_t tgid; 1489 1490 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1491 if (event == NULL) { 1492 err = -ENOMEM; 1493 goto out_child; 1494 } 1495 1496 /* 1497 * Some H/W events are generated before COMM event 1498 * which is emitted during exec(), so perf script 1499 * cannot see a correct process name for those events. 1500 * Synthesize COMM event to prevent it. 1501 */ 1502 tgid = perf_event__synthesize_comm(tool, event, 1503 rec->evlist->workload.pid, 1504 process_synthesized_event, 1505 machine); 1506 free(event); 1507 1508 if (tgid == -1) 1509 goto out_child; 1510 1511 event = malloc(sizeof(event->namespaces) + 1512 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1513 machine->id_hdr_size); 1514 if (event == NULL) { 1515 err = -ENOMEM; 1516 goto out_child; 1517 } 1518 1519 /* 1520 * Synthesize NAMESPACES event for the command specified. 1521 */ 1522 perf_event__synthesize_namespaces(tool, event, 1523 rec->evlist->workload.pid, 1524 tgid, process_synthesized_event, 1525 machine); 1526 free(event); 1527 1528 perf_evlist__start_workload(rec->evlist); 1529 } 1530 1531 if (opts->initial_delay) { 1532 usleep(opts->initial_delay * USEC_PER_MSEC); 1533 evlist__enable(rec->evlist); 1534 } 1535 1536 trigger_ready(&auxtrace_snapshot_trigger); 1537 trigger_ready(&switch_output_trigger); 1538 perf_hooks__invoke_record_start(); 1539 for (;;) { 1540 unsigned long long hits = rec->samples; 1541 1542 /* 1543 * rec->evlist->bkw_mmap_state is possible to be 1544 * BKW_MMAP_EMPTY here: when done == true and 1545 * hits != rec->samples in previous round. 1546 * 1547 * perf_evlist__toggle_bkw_mmap ensure we never 1548 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1549 */ 1550 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1551 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1552 1553 if (record__mmap_read_all(rec, false) < 0) { 1554 trigger_error(&auxtrace_snapshot_trigger); 1555 trigger_error(&switch_output_trigger); 1556 err = -1; 1557 goto out_child; 1558 } 1559 1560 if (auxtrace_record__snapshot_started) { 1561 auxtrace_record__snapshot_started = 0; 1562 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1563 record__read_auxtrace_snapshot(rec, false); 1564 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1565 pr_err("AUX area tracing snapshot failed\n"); 1566 err = -1; 1567 goto out_child; 1568 } 1569 } 1570 1571 if (trigger_is_hit(&switch_output_trigger)) { 1572 /* 1573 * If switch_output_trigger is hit, the data in 1574 * overwritable ring buffer should have been collected, 1575 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1576 * 1577 * If SIGUSR2 raise after or during record__mmap_read_all(), 1578 * record__mmap_read_all() didn't collect data from 1579 * overwritable ring buffer. Read again. 1580 */ 1581 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1582 continue; 1583 trigger_ready(&switch_output_trigger); 1584 1585 /* 1586 * Reenable events in overwrite ring buffer after 1587 * record__mmap_read_all(): we should have collected 1588 * data from it. 1589 */ 1590 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1591 1592 if (!quiet) 1593 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1594 waking); 1595 waking = 0; 1596 fd = record__switch_output(rec, false); 1597 if (fd < 0) { 1598 pr_err("Failed to switch to new file\n"); 1599 trigger_error(&switch_output_trigger); 1600 err = fd; 1601 goto out_child; 1602 } 1603 1604 /* re-arm the alarm */ 1605 if (rec->switch_output.time) 1606 alarm(rec->switch_output.time); 1607 } 1608 1609 if (hits == rec->samples) { 1610 if (done || draining) 1611 break; 1612 err = perf_evlist__poll(rec->evlist, -1); 1613 /* 1614 * Propagate error, only if there's any. Ignore positive 1615 * number of returned events and interrupt error. 1616 */ 1617 if (err > 0 || (err < 0 && errno == EINTR)) 1618 err = 0; 1619 waking++; 1620 1621 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1622 draining = true; 1623 } 1624 1625 /* 1626 * When perf is starting the traced process, at the end events 1627 * die with the process and we wait for that. Thus no need to 1628 * disable events in this case. 1629 */ 1630 if (done && !disabled && !target__none(&opts->target)) { 1631 trigger_off(&auxtrace_snapshot_trigger); 1632 evlist__disable(rec->evlist); 1633 disabled = true; 1634 } 1635 } 1636 1637 trigger_off(&auxtrace_snapshot_trigger); 1638 trigger_off(&switch_output_trigger); 1639 1640 if (opts->auxtrace_snapshot_on_exit) 1641 record__auxtrace_snapshot_exit(rec); 1642 1643 if (forks && workload_exec_errno) { 1644 char msg[STRERR_BUFSIZE]; 1645 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1646 pr_err("Workload failed: %s\n", emsg); 1647 err = -1; 1648 goto out_child; 1649 } 1650 1651 if (!quiet) 1652 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1653 1654 if (target__none(&rec->opts.target)) 1655 record__synthesize_workload(rec, true); 1656 1657 out_child: 1658 record__mmap_read_all(rec, true); 1659 record__aio_mmap_read_sync(rec); 1660 1661 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 1662 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 1663 session->header.env.comp_ratio = ratio + 0.5; 1664 } 1665 1666 if (forks) { 1667 int exit_status; 1668 1669 if (!child_finished) 1670 kill(rec->evlist->workload.pid, SIGTERM); 1671 1672 wait(&exit_status); 1673 1674 if (err < 0) 1675 status = err; 1676 else if (WIFEXITED(exit_status)) 1677 status = WEXITSTATUS(exit_status); 1678 else if (WIFSIGNALED(exit_status)) 1679 signr = WTERMSIG(exit_status); 1680 } else 1681 status = err; 1682 1683 record__synthesize(rec, true); 1684 /* this will be recalculated during process_buildids() */ 1685 rec->samples = 0; 1686 1687 if (!err) { 1688 if (!rec->timestamp_filename) { 1689 record__finish_output(rec); 1690 } else { 1691 fd = record__switch_output(rec, true); 1692 if (fd < 0) { 1693 status = fd; 1694 goto out_delete_session; 1695 } 1696 } 1697 } 1698 1699 perf_hooks__invoke_record_end(); 1700 1701 if (!err && !quiet) { 1702 char samples[128]; 1703 const char *postfix = rec->timestamp_filename ? 1704 ".<timestamp>" : ""; 1705 1706 if (rec->samples && !rec->opts.full_auxtrace) 1707 scnprintf(samples, sizeof(samples), 1708 " (%" PRIu64 " samples)", rec->samples); 1709 else 1710 samples[0] = '\0'; 1711 1712 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 1713 perf_data__size(data) / 1024.0 / 1024.0, 1714 data->path, postfix, samples); 1715 if (ratio) { 1716 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 1717 rec->session->bytes_transferred / 1024.0 / 1024.0, 1718 ratio); 1719 } 1720 fprintf(stderr, " ]\n"); 1721 } 1722 1723 out_delete_session: 1724 zstd_fini(&session->zstd_data); 1725 perf_session__delete(session); 1726 1727 if (!opts->no_bpf_event) 1728 perf_evlist__stop_sb_thread(sb_evlist); 1729 return status; 1730 } 1731 1732 static void callchain_debug(struct callchain_param *callchain) 1733 { 1734 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1735 1736 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1737 1738 if (callchain->record_mode == CALLCHAIN_DWARF) 1739 pr_debug("callchain: stack dump size %d\n", 1740 callchain->dump_size); 1741 } 1742 1743 int record_opts__parse_callchain(struct record_opts *record, 1744 struct callchain_param *callchain, 1745 const char *arg, bool unset) 1746 { 1747 int ret; 1748 callchain->enabled = !unset; 1749 1750 /* --no-call-graph */ 1751 if (unset) { 1752 callchain->record_mode = CALLCHAIN_NONE; 1753 pr_debug("callchain: disabled\n"); 1754 return 0; 1755 } 1756 1757 ret = parse_callchain_record_opt(arg, callchain); 1758 if (!ret) { 1759 /* Enable data address sampling for DWARF unwind. */ 1760 if (callchain->record_mode == CALLCHAIN_DWARF) 1761 record->sample_address = true; 1762 callchain_debug(callchain); 1763 } 1764 1765 return ret; 1766 } 1767 1768 int record_parse_callchain_opt(const struct option *opt, 1769 const char *arg, 1770 int unset) 1771 { 1772 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1773 } 1774 1775 int record_callchain_opt(const struct option *opt, 1776 const char *arg __maybe_unused, 1777 int unset __maybe_unused) 1778 { 1779 struct callchain_param *callchain = opt->value; 1780 1781 callchain->enabled = true; 1782 1783 if (callchain->record_mode == CALLCHAIN_NONE) 1784 callchain->record_mode = CALLCHAIN_FP; 1785 1786 callchain_debug(callchain); 1787 return 0; 1788 } 1789 1790 static int perf_record_config(const char *var, const char *value, void *cb) 1791 { 1792 struct record *rec = cb; 1793 1794 if (!strcmp(var, "record.build-id")) { 1795 if (!strcmp(value, "cache")) 1796 rec->no_buildid_cache = false; 1797 else if (!strcmp(value, "no-cache")) 1798 rec->no_buildid_cache = true; 1799 else if (!strcmp(value, "skip")) 1800 rec->no_buildid = true; 1801 else 1802 return -1; 1803 return 0; 1804 } 1805 if (!strcmp(var, "record.call-graph")) { 1806 var = "call-graph.record-mode"; 1807 return perf_default_config(var, value, cb); 1808 } 1809 #ifdef HAVE_AIO_SUPPORT 1810 if (!strcmp(var, "record.aio")) { 1811 rec->opts.nr_cblocks = strtol(value, NULL, 0); 1812 if (!rec->opts.nr_cblocks) 1813 rec->opts.nr_cblocks = nr_cblocks_default; 1814 } 1815 #endif 1816 1817 return 0; 1818 } 1819 1820 struct clockid_map { 1821 const char *name; 1822 int clockid; 1823 }; 1824 1825 #define CLOCKID_MAP(n, c) \ 1826 { .name = n, .clockid = (c), } 1827 1828 #define CLOCKID_END { .name = NULL, } 1829 1830 1831 /* 1832 * Add the missing ones, we need to build on many distros... 1833 */ 1834 #ifndef CLOCK_MONOTONIC_RAW 1835 #define CLOCK_MONOTONIC_RAW 4 1836 #endif 1837 #ifndef CLOCK_BOOTTIME 1838 #define CLOCK_BOOTTIME 7 1839 #endif 1840 #ifndef CLOCK_TAI 1841 #define CLOCK_TAI 11 1842 #endif 1843 1844 static const struct clockid_map clockids[] = { 1845 /* available for all events, NMI safe */ 1846 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1847 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1848 1849 /* available for some events */ 1850 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1851 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1852 CLOCKID_MAP("tai", CLOCK_TAI), 1853 1854 /* available for the lazy */ 1855 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1856 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1857 CLOCKID_MAP("real", CLOCK_REALTIME), 1858 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1859 1860 CLOCKID_END, 1861 }; 1862 1863 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1864 { 1865 struct timespec res; 1866 1867 *res_ns = 0; 1868 if (!clock_getres(clk_id, &res)) 1869 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1870 else 1871 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1872 1873 return 0; 1874 } 1875 1876 static int parse_clockid(const struct option *opt, const char *str, int unset) 1877 { 1878 struct record_opts *opts = (struct record_opts *)opt->value; 1879 const struct clockid_map *cm; 1880 const char *ostr = str; 1881 1882 if (unset) { 1883 opts->use_clockid = 0; 1884 return 0; 1885 } 1886 1887 /* no arg passed */ 1888 if (!str) 1889 return 0; 1890 1891 /* no setting it twice */ 1892 if (opts->use_clockid) 1893 return -1; 1894 1895 opts->use_clockid = true; 1896 1897 /* if its a number, we're done */ 1898 if (sscanf(str, "%d", &opts->clockid) == 1) 1899 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1900 1901 /* allow a "CLOCK_" prefix to the name */ 1902 if (!strncasecmp(str, "CLOCK_", 6)) 1903 str += 6; 1904 1905 for (cm = clockids; cm->name; cm++) { 1906 if (!strcasecmp(str, cm->name)) { 1907 opts->clockid = cm->clockid; 1908 return get_clockid_res(opts->clockid, 1909 &opts->clockid_res_ns); 1910 } 1911 } 1912 1913 opts->use_clockid = false; 1914 ui__warning("unknown clockid %s, check man page\n", ostr); 1915 return -1; 1916 } 1917 1918 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 1919 { 1920 struct record_opts *opts = (struct record_opts *)opt->value; 1921 1922 if (unset || !str) 1923 return 0; 1924 1925 if (!strcasecmp(str, "node")) 1926 opts->affinity = PERF_AFFINITY_NODE; 1927 else if (!strcasecmp(str, "cpu")) 1928 opts->affinity = PERF_AFFINITY_CPU; 1929 1930 return 0; 1931 } 1932 1933 static int record__parse_mmap_pages(const struct option *opt, 1934 const char *str, 1935 int unset __maybe_unused) 1936 { 1937 struct record_opts *opts = opt->value; 1938 char *s, *p; 1939 unsigned int mmap_pages; 1940 int ret; 1941 1942 if (!str) 1943 return -EINVAL; 1944 1945 s = strdup(str); 1946 if (!s) 1947 return -ENOMEM; 1948 1949 p = strchr(s, ','); 1950 if (p) 1951 *p = '\0'; 1952 1953 if (*s) { 1954 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1955 if (ret) 1956 goto out_free; 1957 opts->mmap_pages = mmap_pages; 1958 } 1959 1960 if (!p) { 1961 ret = 0; 1962 goto out_free; 1963 } 1964 1965 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1966 if (ret) 1967 goto out_free; 1968 1969 opts->auxtrace_mmap_pages = mmap_pages; 1970 1971 out_free: 1972 free(s); 1973 return ret; 1974 } 1975 1976 static void switch_output_size_warn(struct record *rec) 1977 { 1978 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1979 struct switch_output *s = &rec->switch_output; 1980 1981 wakeup_size /= 2; 1982 1983 if (s->size < wakeup_size) { 1984 char buf[100]; 1985 1986 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1987 pr_warning("WARNING: switch-output data size lower than " 1988 "wakeup kernel buffer size (%s) " 1989 "expect bigger perf.data sizes\n", buf); 1990 } 1991 } 1992 1993 static int switch_output_setup(struct record *rec) 1994 { 1995 struct switch_output *s = &rec->switch_output; 1996 static struct parse_tag tags_size[] = { 1997 { .tag = 'B', .mult = 1 }, 1998 { .tag = 'K', .mult = 1 << 10 }, 1999 { .tag = 'M', .mult = 1 << 20 }, 2000 { .tag = 'G', .mult = 1 << 30 }, 2001 { .tag = 0 }, 2002 }; 2003 static struct parse_tag tags_time[] = { 2004 { .tag = 's', .mult = 1 }, 2005 { .tag = 'm', .mult = 60 }, 2006 { .tag = 'h', .mult = 60*60 }, 2007 { .tag = 'd', .mult = 60*60*24 }, 2008 { .tag = 0 }, 2009 }; 2010 unsigned long val; 2011 2012 if (!s->set) 2013 return 0; 2014 2015 if (!strcmp(s->str, "signal")) { 2016 s->signal = true; 2017 pr_debug("switch-output with SIGUSR2 signal\n"); 2018 goto enabled; 2019 } 2020 2021 val = parse_tag_value(s->str, tags_size); 2022 if (val != (unsigned long) -1) { 2023 s->size = val; 2024 pr_debug("switch-output with %s size threshold\n", s->str); 2025 goto enabled; 2026 } 2027 2028 val = parse_tag_value(s->str, tags_time); 2029 if (val != (unsigned long) -1) { 2030 s->time = val; 2031 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2032 s->str, s->time); 2033 goto enabled; 2034 } 2035 2036 return -1; 2037 2038 enabled: 2039 rec->timestamp_filename = true; 2040 s->enabled = true; 2041 2042 if (s->size && !rec->opts.no_buffering) 2043 switch_output_size_warn(rec); 2044 2045 return 0; 2046 } 2047 2048 static const char * const __record_usage[] = { 2049 "perf record [<options>] [<command>]", 2050 "perf record [<options>] -- <command> [<options>]", 2051 NULL 2052 }; 2053 const char * const *record_usage = __record_usage; 2054 2055 /* 2056 * XXX Ideally would be local to cmd_record() and passed to a record__new 2057 * because we need to have access to it in record__exit, that is called 2058 * after cmd_record() exits, but since record_options need to be accessible to 2059 * builtin-script, leave it here. 2060 * 2061 * At least we don't ouch it in all the other functions here directly. 2062 * 2063 * Just say no to tons of global variables, sigh. 2064 */ 2065 static struct record record = { 2066 .opts = { 2067 .sample_time = true, 2068 .mmap_pages = UINT_MAX, 2069 .user_freq = UINT_MAX, 2070 .user_interval = ULLONG_MAX, 2071 .freq = 4000, 2072 .target = { 2073 .uses_mmap = true, 2074 .default_per_cpu = true, 2075 }, 2076 .mmap_flush = MMAP_FLUSH_DEFAULT, 2077 }, 2078 .tool = { 2079 .sample = process_sample_event, 2080 .fork = perf_event__process_fork, 2081 .exit = perf_event__process_exit, 2082 .comm = perf_event__process_comm, 2083 .namespaces = perf_event__process_namespaces, 2084 .mmap = perf_event__process_mmap, 2085 .mmap2 = perf_event__process_mmap2, 2086 .ordered_events = true, 2087 }, 2088 }; 2089 2090 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 2091 "\n\t\t\t\tDefault: fp"; 2092 2093 static bool dry_run; 2094 2095 /* 2096 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 2097 * with it and switch to use the library functions in perf_evlist that came 2098 * from builtin-record.c, i.e. use record_opts, 2099 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 2100 * using pipes, etc. 2101 */ 2102 static struct option __record_options[] = { 2103 OPT_CALLBACK('e', "event", &record.evlist, "event", 2104 "event selector. use 'perf list' to list available events", 2105 parse_events_option), 2106 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 2107 "event filter", parse_filter), 2108 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 2109 NULL, "don't record events from perf itself", 2110 exclude_perf), 2111 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 2112 "record events on existing process id"), 2113 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 2114 "record events on existing thread id"), 2115 OPT_INTEGER('r', "realtime", &record.realtime_prio, 2116 "collect data with this RT SCHED_FIFO priority"), 2117 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 2118 "collect data without buffering"), 2119 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 2120 "collect raw sample records from all opened counters"), 2121 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 2122 "system-wide collection from all CPUs"), 2123 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 2124 "list of cpus to monitor"), 2125 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 2126 OPT_STRING('o', "output", &record.data.path, "file", 2127 "output file name"), 2128 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 2129 &record.opts.no_inherit_set, 2130 "child tasks do not inherit counters"), 2131 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 2132 "synthesize non-sample events at the end of output"), 2133 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 2134 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), 2135 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 2136 "Fail if the specified frequency can't be used"), 2137 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 2138 "profile at this frequency", 2139 record__parse_freq), 2140 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 2141 "number of mmap data pages and AUX area tracing mmap pages", 2142 record__parse_mmap_pages), 2143 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 2144 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 2145 record__mmap_flush_parse), 2146 OPT_BOOLEAN(0, "group", &record.opts.group, 2147 "put the counters into a counter group"), 2148 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 2149 NULL, "enables call-graph recording" , 2150 &record_callchain_opt), 2151 OPT_CALLBACK(0, "call-graph", &record.opts, 2152 "record_mode[,record_size]", record_callchain_help, 2153 &record_parse_callchain_opt), 2154 OPT_INCR('v', "verbose", &verbose, 2155 "be more verbose (show counter open errors, etc)"), 2156 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 2157 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 2158 "per thread counts"), 2159 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 2160 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 2161 "Record the sample physical addresses"), 2162 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 2163 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 2164 &record.opts.sample_time_set, 2165 "Record the sample timestamps"), 2166 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 2167 "Record the sample period"), 2168 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 2169 "don't sample"), 2170 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 2171 &record.no_buildid_cache_set, 2172 "do not update the buildid cache"), 2173 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 2174 &record.no_buildid_set, 2175 "do not collect buildids in perf.data"), 2176 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 2177 "monitor event in cgroup name only", 2178 parse_cgroups), 2179 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 2180 "ms to wait before starting measurement after program start"), 2181 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2182 "user to profile"), 2183 2184 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2185 "branch any", "sample any taken branches", 2186 parse_branch_stack), 2187 2188 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2189 "branch filter mask", "branch stack filter modes", 2190 parse_branch_stack), 2191 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2192 "sample by weight (on special events only)"), 2193 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2194 "sample transaction flags (special events only)"), 2195 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2196 "use per-thread mmaps"), 2197 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2198 "sample selected machine registers on interrupt," 2199 " use '-I?' to list register names", parse_intr_regs), 2200 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2201 "sample selected machine registers on interrupt," 2202 " use '--user-regs=?' to list register names", parse_user_regs), 2203 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2204 "Record running/enabled time of read (:S) events"), 2205 OPT_CALLBACK('k', "clockid", &record.opts, 2206 "clockid", "clockid to use for events, see clock_gettime()", 2207 parse_clockid), 2208 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2209 "opts", "AUX area tracing Snapshot Mode", ""), 2210 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2211 "per thread proc mmap processing timeout in ms"), 2212 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2213 "Record namespaces events"), 2214 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2215 "Record context switch events"), 2216 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2217 "Configure all used events to run in kernel space.", 2218 PARSE_OPT_EXCLUSIVE), 2219 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2220 "Configure all used events to run in user space.", 2221 PARSE_OPT_EXCLUSIVE), 2222 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 2223 "collect kernel callchains"), 2224 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 2225 "collect user callchains"), 2226 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2227 "clang binary to use for compiling BPF scriptlets"), 2228 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2229 "options passed to clang when compiling BPF scriptlets"), 2230 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2231 "file", "vmlinux pathname"), 2232 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2233 "Record build-id of all DSOs regardless of hits"), 2234 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2235 "append timestamp to output filename"), 2236 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2237 "Record timestamp boundary (time of first/last samples)"), 2238 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2239 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2240 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2241 "signal"), 2242 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2243 "Limit number of switch output generated files"), 2244 OPT_BOOLEAN(0, "dry-run", &dry_run, 2245 "Parse options then exit"), 2246 #ifdef HAVE_AIO_SUPPORT 2247 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2248 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2249 record__aio_parse), 2250 #endif 2251 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2252 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2253 record__parse_affinity), 2254 #ifdef HAVE_ZSTD_SUPPORT 2255 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, 2256 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 2257 record__parse_comp_level), 2258 #endif 2259 OPT_END() 2260 }; 2261 2262 struct option *record_options = __record_options; 2263 2264 int cmd_record(int argc, const char **argv) 2265 { 2266 int err; 2267 struct record *rec = &record; 2268 char errbuf[BUFSIZ]; 2269 2270 setlocale(LC_ALL, ""); 2271 2272 #ifndef HAVE_LIBBPF_SUPPORT 2273 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2274 set_nobuild('\0', "clang-path", true); 2275 set_nobuild('\0', "clang-opt", true); 2276 # undef set_nobuild 2277 #endif 2278 2279 #ifndef HAVE_BPF_PROLOGUE 2280 # if !defined (HAVE_DWARF_SUPPORT) 2281 # define REASON "NO_DWARF=1" 2282 # elif !defined (HAVE_LIBBPF_SUPPORT) 2283 # define REASON "NO_LIBBPF=1" 2284 # else 2285 # define REASON "this architecture doesn't support BPF prologue" 2286 # endif 2287 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2288 set_nobuild('\0', "vmlinux", true); 2289 # undef set_nobuild 2290 # undef REASON 2291 #endif 2292 2293 CPU_ZERO(&rec->affinity_mask); 2294 rec->opts.affinity = PERF_AFFINITY_SYS; 2295 2296 rec->evlist = evlist__new(); 2297 if (rec->evlist == NULL) 2298 return -ENOMEM; 2299 2300 err = perf_config(perf_record_config, rec); 2301 if (err) 2302 return err; 2303 2304 argc = parse_options(argc, argv, record_options, record_usage, 2305 PARSE_OPT_STOP_AT_NON_OPTION); 2306 if (quiet) 2307 perf_quiet_option(); 2308 2309 /* Make system wide (-a) the default target. */ 2310 if (!argc && target__none(&rec->opts.target)) 2311 rec->opts.target.system_wide = true; 2312 2313 if (nr_cgroups && !rec->opts.target.system_wide) { 2314 usage_with_options_msg(record_usage, record_options, 2315 "cgroup monitoring only available in system-wide mode"); 2316 2317 } 2318 2319 if (rec->opts.comp_level != 0) { 2320 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 2321 rec->no_buildid = true; 2322 } 2323 2324 if (rec->opts.record_switch_events && 2325 !perf_can_record_switch_events()) { 2326 ui__error("kernel does not support recording context switch events\n"); 2327 parse_options_usage(record_usage, record_options, "switch-events", 0); 2328 return -EINVAL; 2329 } 2330 2331 if (switch_output_setup(rec)) { 2332 parse_options_usage(record_usage, record_options, "switch-output", 0); 2333 return -EINVAL; 2334 } 2335 2336 if (rec->switch_output.time) { 2337 signal(SIGALRM, alarm_sig_handler); 2338 alarm(rec->switch_output.time); 2339 } 2340 2341 if (rec->switch_output.num_files) { 2342 rec->switch_output.filenames = calloc(sizeof(char *), 2343 rec->switch_output.num_files); 2344 if (!rec->switch_output.filenames) 2345 return -EINVAL; 2346 } 2347 2348 /* 2349 * Allow aliases to facilitate the lookup of symbols for address 2350 * filters. Refer to auxtrace_parse_filters(). 2351 */ 2352 symbol_conf.allow_aliases = true; 2353 2354 symbol__init(NULL); 2355 2356 err = record__auxtrace_init(rec); 2357 if (err) 2358 goto out; 2359 2360 if (dry_run) 2361 goto out; 2362 2363 err = bpf__setup_stdout(rec->evlist); 2364 if (err) { 2365 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2366 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2367 errbuf); 2368 goto out; 2369 } 2370 2371 err = -ENOMEM; 2372 2373 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 2374 pr_warning( 2375 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 2376 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 2377 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 2378 "file is not found in the buildid cache or in the vmlinux path.\n\n" 2379 "Samples in kernel modules won't be resolved at all.\n\n" 2380 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 2381 "even with a suitable vmlinux or kallsyms file.\n\n"); 2382 2383 if (rec->no_buildid_cache || rec->no_buildid) { 2384 disable_buildid_cache(); 2385 } else if (rec->switch_output.enabled) { 2386 /* 2387 * In 'perf record --switch-output', disable buildid 2388 * generation by default to reduce data file switching 2389 * overhead. Still generate buildid if they are required 2390 * explicitly using 2391 * 2392 * perf record --switch-output --no-no-buildid \ 2393 * --no-no-buildid-cache 2394 * 2395 * Following code equals to: 2396 * 2397 * if ((rec->no_buildid || !rec->no_buildid_set) && 2398 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2399 * disable_buildid_cache(); 2400 */ 2401 bool disable = true; 2402 2403 if (rec->no_buildid_set && !rec->no_buildid) 2404 disable = false; 2405 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2406 disable = false; 2407 if (disable) { 2408 rec->no_buildid = true; 2409 rec->no_buildid_cache = true; 2410 disable_buildid_cache(); 2411 } 2412 } 2413 2414 if (record.opts.overwrite) 2415 record.opts.tail_synthesize = true; 2416 2417 if (rec->evlist->core.nr_entries == 0 && 2418 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 2419 pr_err("Not enough memory for event selector list\n"); 2420 goto out; 2421 } 2422 2423 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 2424 rec->opts.no_inherit = true; 2425 2426 err = target__validate(&rec->opts.target); 2427 if (err) { 2428 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2429 ui__warning("%s\n", errbuf); 2430 } 2431 2432 err = target__parse_uid(&rec->opts.target); 2433 if (err) { 2434 int saved_errno = errno; 2435 2436 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2437 ui__error("%s", errbuf); 2438 2439 err = -saved_errno; 2440 goto out; 2441 } 2442 2443 /* Enable ignoring missing threads when -u/-p option is defined. */ 2444 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 2445 2446 err = -ENOMEM; 2447 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 2448 usage_with_options(record_usage, record_options); 2449 2450 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 2451 if (err) 2452 goto out; 2453 2454 /* 2455 * We take all buildids when the file contains 2456 * AUX area tracing data because we do not decode the 2457 * trace because it would take too long. 2458 */ 2459 if (rec->opts.full_auxtrace) 2460 rec->buildid_all = true; 2461 2462 if (record_opts__config(&rec->opts)) { 2463 err = -EINVAL; 2464 goto out; 2465 } 2466 2467 if (rec->opts.nr_cblocks > nr_cblocks_max) 2468 rec->opts.nr_cblocks = nr_cblocks_max; 2469 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2470 2471 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2472 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2473 2474 if (rec->opts.comp_level > comp_level_max) 2475 rec->opts.comp_level = comp_level_max; 2476 pr_debug("comp level: %d\n", rec->opts.comp_level); 2477 2478 err = __cmd_record(&record, argc, argv); 2479 out: 2480 evlist__delete(rec->evlist); 2481 symbol__exit(); 2482 auxtrace_record__free(rec->itr); 2483 return err; 2484 } 2485 2486 static void snapshot_sig_handler(int sig __maybe_unused) 2487 { 2488 struct record *rec = &record; 2489 2490 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2491 trigger_hit(&auxtrace_snapshot_trigger); 2492 auxtrace_record__snapshot_started = 1; 2493 if (auxtrace_record__snapshot_start(record.itr)) 2494 trigger_error(&auxtrace_snapshot_trigger); 2495 } 2496 2497 if (switch_output_signal(rec)) 2498 trigger_hit(&switch_output_trigger); 2499 } 2500 2501 static void alarm_sig_handler(int sig __maybe_unused) 2502 { 2503 struct record *rec = &record; 2504 2505 if (switch_output_time(rec)) 2506 trigger_hit(&switch_output_trigger); 2507 } 2508