1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/pmu.h" 50 #include "util/pmus.h" 51 #include "util/clockid.h" 52 #include "util/off_cpu.h" 53 #include "util/bpf-filter.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 u64 thread_bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool switch_output_event_set; 165 bool no_buildid; 166 bool no_buildid_set; 167 bool no_buildid_cache; 168 bool no_buildid_cache_set; 169 bool buildid_all; 170 bool buildid_mmap; 171 bool timestamp_filename; 172 bool timestamp_boundary; 173 bool off_cpu; 174 const char *filter_action; 175 struct switch_output switch_output; 176 unsigned long long samples; 177 unsigned long output_max_size; /* = 0: unlimited */ 178 struct perf_debuginfod debuginfod; 179 int nr_threads; 180 struct thread_mask *thread_masks; 181 struct record_thread *thread_data; 182 struct pollfd_index_map *index_map; 183 size_t index_map_sz; 184 size_t index_map_cnt; 185 }; 186 187 static volatile int done; 188 189 static volatile int auxtrace_record__snapshot_started; 190 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 191 static DEFINE_TRIGGER(switch_output_trigger); 192 193 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 194 "SYS", "NODE", "CPU" 195 }; 196 197 #ifndef HAVE_GETTID 198 static inline pid_t gettid(void) 199 { 200 return (pid_t)syscall(__NR_gettid); 201 } 202 #endif 203 204 static int record__threads_enabled(struct record *rec) 205 { 206 return rec->opts.threads_spec; 207 } 208 209 static bool switch_output_signal(struct record *rec) 210 { 211 return rec->switch_output.signal && 212 trigger_is_ready(&switch_output_trigger); 213 } 214 215 static bool switch_output_size(struct record *rec) 216 { 217 return rec->switch_output.size && 218 trigger_is_ready(&switch_output_trigger) && 219 (rec->bytes_written >= rec->switch_output.size); 220 } 221 222 static bool switch_output_time(struct record *rec) 223 { 224 return rec->switch_output.time && 225 trigger_is_ready(&switch_output_trigger); 226 } 227 228 static u64 record__bytes_written(struct record *rec) 229 { 230 return rec->bytes_written + rec->thread_bytes_written; 231 } 232 233 static bool record__output_max_size_exceeded(struct record *rec) 234 { 235 return rec->output_max_size && 236 (record__bytes_written(rec) >= rec->output_max_size); 237 } 238 239 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 240 void *bf, size_t size) 241 { 242 struct perf_data_file *file = &rec->session->data->file; 243 244 if (map && map->file) 245 file = map->file; 246 247 if (perf_data_file__write(file, bf, size) < 0) { 248 pr_err("failed to write perf data, error: %m\n"); 249 return -1; 250 } 251 252 if (map && map->file) { 253 thread->bytes_written += size; 254 rec->thread_bytes_written += size; 255 } else { 256 rec->bytes_written += size; 257 } 258 259 if (record__output_max_size_exceeded(rec) && !done) { 260 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 261 " stopping session ]\n", 262 record__bytes_written(rec) >> 10); 263 done = 1; 264 } 265 266 if (switch_output_size(rec)) 267 trigger_hit(&switch_output_trigger); 268 269 return 0; 270 } 271 272 static int record__aio_enabled(struct record *rec); 273 static int record__comp_enabled(struct record *rec); 274 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 275 void *dst, size_t dst_size, void *src, size_t src_size); 276 277 #ifdef HAVE_AIO_SUPPORT 278 static int record__aio_write(struct aiocb *cblock, int trace_fd, 279 void *buf, size_t size, off_t off) 280 { 281 int rc; 282 283 cblock->aio_fildes = trace_fd; 284 cblock->aio_buf = buf; 285 cblock->aio_nbytes = size; 286 cblock->aio_offset = off; 287 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 288 289 do { 290 rc = aio_write(cblock); 291 if (rc == 0) { 292 break; 293 } else if (errno != EAGAIN) { 294 cblock->aio_fildes = -1; 295 pr_err("failed to queue perf data, error: %m\n"); 296 break; 297 } 298 } while (1); 299 300 return rc; 301 } 302 303 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 304 { 305 void *rem_buf; 306 off_t rem_off; 307 size_t rem_size; 308 int rc, aio_errno; 309 ssize_t aio_ret, written; 310 311 aio_errno = aio_error(cblock); 312 if (aio_errno == EINPROGRESS) 313 return 0; 314 315 written = aio_ret = aio_return(cblock); 316 if (aio_ret < 0) { 317 if (aio_errno != EINTR) 318 pr_err("failed to write perf data, error: %m\n"); 319 written = 0; 320 } 321 322 rem_size = cblock->aio_nbytes - written; 323 324 if (rem_size == 0) { 325 cblock->aio_fildes = -1; 326 /* 327 * md->refcount is incremented in record__aio_pushfn() for 328 * every aio write request started in record__aio_push() so 329 * decrement it because the request is now complete. 330 */ 331 perf_mmap__put(&md->core); 332 rc = 1; 333 } else { 334 /* 335 * aio write request may require restart with the 336 * remainder if the kernel didn't write whole 337 * chunk at once. 338 */ 339 rem_off = cblock->aio_offset + written; 340 rem_buf = (void *)(cblock->aio_buf + written); 341 record__aio_write(cblock, cblock->aio_fildes, 342 rem_buf, rem_size, rem_off); 343 rc = 0; 344 } 345 346 return rc; 347 } 348 349 static int record__aio_sync(struct mmap *md, bool sync_all) 350 { 351 struct aiocb **aiocb = md->aio.aiocb; 352 struct aiocb *cblocks = md->aio.cblocks; 353 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 354 int i, do_suspend; 355 356 do { 357 do_suspend = 0; 358 for (i = 0; i < md->aio.nr_cblocks; ++i) { 359 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 360 if (sync_all) 361 aiocb[i] = NULL; 362 else 363 return i; 364 } else { 365 /* 366 * Started aio write is not complete yet 367 * so it has to be waited before the 368 * next allocation. 369 */ 370 aiocb[i] = &cblocks[i]; 371 do_suspend = 1; 372 } 373 } 374 if (!do_suspend) 375 return -1; 376 377 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 378 if (!(errno == EAGAIN || errno == EINTR)) 379 pr_err("failed to sync perf data, error: %m\n"); 380 } 381 } while (1); 382 } 383 384 struct record_aio { 385 struct record *rec; 386 void *data; 387 size_t size; 388 }; 389 390 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 391 { 392 struct record_aio *aio = to; 393 394 /* 395 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 396 * to release space in the kernel buffer as fast as possible, calling 397 * perf_mmap__consume() from perf_mmap__push() function. 398 * 399 * That lets the kernel to proceed with storing more profiling data into 400 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 401 * 402 * Coping can be done in two steps in case the chunk of profiling data 403 * crosses the upper bound of the kernel buffer. In this case we first move 404 * part of data from map->start till the upper bound and then the remainder 405 * from the beginning of the kernel buffer till the end of the data chunk. 406 */ 407 408 if (record__comp_enabled(aio->rec)) { 409 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 410 mmap__mmap_len(map) - aio->size, 411 buf, size); 412 if (compressed < 0) 413 return (int)compressed; 414 415 size = compressed; 416 } else { 417 memcpy(aio->data + aio->size, buf, size); 418 } 419 420 if (!aio->size) { 421 /* 422 * Increment map->refcount to guard map->aio.data[] buffer 423 * from premature deallocation because map object can be 424 * released earlier than aio write request started on 425 * map->aio.data[] buffer is complete. 426 * 427 * perf_mmap__put() is done at record__aio_complete() 428 * after started aio request completion or at record__aio_push() 429 * if the request failed to start. 430 */ 431 perf_mmap__get(&map->core); 432 } 433 434 aio->size += size; 435 436 return size; 437 } 438 439 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 440 { 441 int ret, idx; 442 int trace_fd = rec->session->data->file.fd; 443 struct record_aio aio = { .rec = rec, .size = 0 }; 444 445 /* 446 * Call record__aio_sync() to wait till map->aio.data[] buffer 447 * becomes available after previous aio write operation. 448 */ 449 450 idx = record__aio_sync(map, false); 451 aio.data = map->aio.data[idx]; 452 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 453 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 454 return ret; 455 456 rec->samples++; 457 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 458 if (!ret) { 459 *off += aio.size; 460 rec->bytes_written += aio.size; 461 if (switch_output_size(rec)) 462 trigger_hit(&switch_output_trigger); 463 } else { 464 /* 465 * Decrement map->refcount incremented in record__aio_pushfn() 466 * back if record__aio_write() operation failed to start, otherwise 467 * map->refcount is decremented in record__aio_complete() after 468 * aio write operation finishes successfully. 469 */ 470 perf_mmap__put(&map->core); 471 } 472 473 return ret; 474 } 475 476 static off_t record__aio_get_pos(int trace_fd) 477 { 478 return lseek(trace_fd, 0, SEEK_CUR); 479 } 480 481 static void record__aio_set_pos(int trace_fd, off_t pos) 482 { 483 lseek(trace_fd, pos, SEEK_SET); 484 } 485 486 static void record__aio_mmap_read_sync(struct record *rec) 487 { 488 int i; 489 struct evlist *evlist = rec->evlist; 490 struct mmap *maps = evlist->mmap; 491 492 if (!record__aio_enabled(rec)) 493 return; 494 495 for (i = 0; i < evlist->core.nr_mmaps; i++) { 496 struct mmap *map = &maps[i]; 497 498 if (map->core.base) 499 record__aio_sync(map, true); 500 } 501 } 502 503 static int nr_cblocks_default = 1; 504 static int nr_cblocks_max = 4; 505 506 static int record__aio_parse(const struct option *opt, 507 const char *str, 508 int unset) 509 { 510 struct record_opts *opts = (struct record_opts *)opt->value; 511 512 if (unset) { 513 opts->nr_cblocks = 0; 514 } else { 515 if (str) 516 opts->nr_cblocks = strtol(str, NULL, 0); 517 if (!opts->nr_cblocks) 518 opts->nr_cblocks = nr_cblocks_default; 519 } 520 521 return 0; 522 } 523 #else /* HAVE_AIO_SUPPORT */ 524 static int nr_cblocks_max = 0; 525 526 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 527 off_t *off __maybe_unused) 528 { 529 return -1; 530 } 531 532 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 533 { 534 return -1; 535 } 536 537 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 538 { 539 } 540 541 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 542 { 543 } 544 #endif 545 546 static int record__aio_enabled(struct record *rec) 547 { 548 return rec->opts.nr_cblocks > 0; 549 } 550 551 #define MMAP_FLUSH_DEFAULT 1 552 static int record__mmap_flush_parse(const struct option *opt, 553 const char *str, 554 int unset) 555 { 556 int flush_max; 557 struct record_opts *opts = (struct record_opts *)opt->value; 558 static struct parse_tag tags[] = { 559 { .tag = 'B', .mult = 1 }, 560 { .tag = 'K', .mult = 1 << 10 }, 561 { .tag = 'M', .mult = 1 << 20 }, 562 { .tag = 'G', .mult = 1 << 30 }, 563 { .tag = 0 }, 564 }; 565 566 if (unset) 567 return 0; 568 569 if (str) { 570 opts->mmap_flush = parse_tag_value(str, tags); 571 if (opts->mmap_flush == (int)-1) 572 opts->mmap_flush = strtol(str, NULL, 0); 573 } 574 575 if (!opts->mmap_flush) 576 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 577 578 flush_max = evlist__mmap_size(opts->mmap_pages); 579 flush_max /= 4; 580 if (opts->mmap_flush > flush_max) 581 opts->mmap_flush = flush_max; 582 583 return 0; 584 } 585 586 #ifdef HAVE_ZSTD_SUPPORT 587 static unsigned int comp_level_default = 1; 588 589 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 590 { 591 struct record_opts *opts = opt->value; 592 593 if (unset) { 594 opts->comp_level = 0; 595 } else { 596 if (str) 597 opts->comp_level = strtol(str, NULL, 0); 598 if (!opts->comp_level) 599 opts->comp_level = comp_level_default; 600 } 601 602 return 0; 603 } 604 #endif 605 static unsigned int comp_level_max = 22; 606 607 static int record__comp_enabled(struct record *rec) 608 { 609 return rec->opts.comp_level > 0; 610 } 611 612 static int process_synthesized_event(struct perf_tool *tool, 613 union perf_event *event, 614 struct perf_sample *sample __maybe_unused, 615 struct machine *machine __maybe_unused) 616 { 617 struct record *rec = container_of(tool, struct record, tool); 618 return record__write(rec, NULL, event, event->header.size); 619 } 620 621 static struct mutex synth_lock; 622 623 static int process_locked_synthesized_event(struct perf_tool *tool, 624 union perf_event *event, 625 struct perf_sample *sample __maybe_unused, 626 struct machine *machine __maybe_unused) 627 { 628 int ret; 629 630 mutex_lock(&synth_lock); 631 ret = process_synthesized_event(tool, event, sample, machine); 632 mutex_unlock(&synth_lock); 633 return ret; 634 } 635 636 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 637 { 638 struct record *rec = to; 639 640 if (record__comp_enabled(rec)) { 641 ssize_t compressed = zstd_compress(rec->session, map, map->data, 642 mmap__mmap_len(map), bf, size); 643 644 if (compressed < 0) 645 return (int)compressed; 646 647 size = compressed; 648 bf = map->data; 649 } 650 651 thread->samples++; 652 return record__write(rec, map, bf, size); 653 } 654 655 static volatile sig_atomic_t signr = -1; 656 static volatile sig_atomic_t child_finished; 657 #ifdef HAVE_EVENTFD_SUPPORT 658 static volatile sig_atomic_t done_fd = -1; 659 #endif 660 661 static void sig_handler(int sig) 662 { 663 if (sig == SIGCHLD) 664 child_finished = 1; 665 else 666 signr = sig; 667 668 done = 1; 669 #ifdef HAVE_EVENTFD_SUPPORT 670 if (done_fd >= 0) { 671 u64 tmp = 1; 672 int orig_errno = errno; 673 674 /* 675 * It is possible for this signal handler to run after done is 676 * checked in the main loop, but before the perf counter fds are 677 * polled. If this happens, the poll() will continue to wait 678 * even though done is set, and will only break out if either 679 * another signal is received, or the counters are ready for 680 * read. To ensure the poll() doesn't sleep when done is set, 681 * use an eventfd (done_fd) to wake up the poll(). 682 */ 683 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 684 pr_err("failed to signal wakeup fd, error: %m\n"); 685 686 errno = orig_errno; 687 } 688 #endif // HAVE_EVENTFD_SUPPORT 689 } 690 691 static void sigsegv_handler(int sig) 692 { 693 perf_hooks__recover(); 694 sighandler_dump_stack(sig); 695 } 696 697 static void record__sig_exit(void) 698 { 699 if (signr == -1) 700 return; 701 702 signal(signr, SIG_DFL); 703 raise(signr); 704 } 705 706 #ifdef HAVE_AUXTRACE_SUPPORT 707 708 static int record__process_auxtrace(struct perf_tool *tool, 709 struct mmap *map, 710 union perf_event *event, void *data1, 711 size_t len1, void *data2, size_t len2) 712 { 713 struct record *rec = container_of(tool, struct record, tool); 714 struct perf_data *data = &rec->data; 715 size_t padding; 716 u8 pad[8] = {0}; 717 718 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 719 off_t file_offset; 720 int fd = perf_data__fd(data); 721 int err; 722 723 file_offset = lseek(fd, 0, SEEK_CUR); 724 if (file_offset == -1) 725 return -1; 726 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 727 event, file_offset); 728 if (err) 729 return err; 730 } 731 732 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 733 padding = (len1 + len2) & 7; 734 if (padding) 735 padding = 8 - padding; 736 737 record__write(rec, map, event, event->header.size); 738 record__write(rec, map, data1, len1); 739 if (len2) 740 record__write(rec, map, data2, len2); 741 record__write(rec, map, &pad, padding); 742 743 return 0; 744 } 745 746 static int record__auxtrace_mmap_read(struct record *rec, 747 struct mmap *map) 748 { 749 int ret; 750 751 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 752 record__process_auxtrace); 753 if (ret < 0) 754 return ret; 755 756 if (ret) 757 rec->samples++; 758 759 return 0; 760 } 761 762 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 763 struct mmap *map) 764 { 765 int ret; 766 767 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 768 record__process_auxtrace, 769 rec->opts.auxtrace_snapshot_size); 770 if (ret < 0) 771 return ret; 772 773 if (ret) 774 rec->samples++; 775 776 return 0; 777 } 778 779 static int record__auxtrace_read_snapshot_all(struct record *rec) 780 { 781 int i; 782 int rc = 0; 783 784 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 785 struct mmap *map = &rec->evlist->mmap[i]; 786 787 if (!map->auxtrace_mmap.base) 788 continue; 789 790 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 791 rc = -1; 792 goto out; 793 } 794 } 795 out: 796 return rc; 797 } 798 799 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 800 { 801 pr_debug("Recording AUX area tracing snapshot\n"); 802 if (record__auxtrace_read_snapshot_all(rec) < 0) { 803 trigger_error(&auxtrace_snapshot_trigger); 804 } else { 805 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 806 trigger_error(&auxtrace_snapshot_trigger); 807 else 808 trigger_ready(&auxtrace_snapshot_trigger); 809 } 810 } 811 812 static int record__auxtrace_snapshot_exit(struct record *rec) 813 { 814 if (trigger_is_error(&auxtrace_snapshot_trigger)) 815 return 0; 816 817 if (!auxtrace_record__snapshot_started && 818 auxtrace_record__snapshot_start(rec->itr)) 819 return -1; 820 821 record__read_auxtrace_snapshot(rec, true); 822 if (trigger_is_error(&auxtrace_snapshot_trigger)) 823 return -1; 824 825 return 0; 826 } 827 828 static int record__auxtrace_init(struct record *rec) 829 { 830 int err; 831 832 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 833 && record__threads_enabled(rec)) { 834 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 835 return -EINVAL; 836 } 837 838 if (!rec->itr) { 839 rec->itr = auxtrace_record__init(rec->evlist, &err); 840 if (err) 841 return err; 842 } 843 844 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 845 rec->opts.auxtrace_snapshot_opts); 846 if (err) 847 return err; 848 849 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 850 rec->opts.auxtrace_sample_opts); 851 if (err) 852 return err; 853 854 auxtrace_regroup_aux_output(rec->evlist); 855 856 return auxtrace_parse_filters(rec->evlist); 857 } 858 859 #else 860 861 static inline 862 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 863 struct mmap *map __maybe_unused) 864 { 865 return 0; 866 } 867 868 static inline 869 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 870 bool on_exit __maybe_unused) 871 { 872 } 873 874 static inline 875 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 876 { 877 return 0; 878 } 879 880 static inline 881 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 882 { 883 return 0; 884 } 885 886 static int record__auxtrace_init(struct record *rec __maybe_unused) 887 { 888 return 0; 889 } 890 891 #endif 892 893 static int record__config_text_poke(struct evlist *evlist) 894 { 895 struct evsel *evsel; 896 897 /* Nothing to do if text poke is already configured */ 898 evlist__for_each_entry(evlist, evsel) { 899 if (evsel->core.attr.text_poke) 900 return 0; 901 } 902 903 evsel = evlist__add_dummy_on_all_cpus(evlist); 904 if (!evsel) 905 return -ENOMEM; 906 907 evsel->core.attr.text_poke = 1; 908 evsel->core.attr.ksymbol = 1; 909 evsel->immediate = true; 910 evsel__set_sample_bit(evsel, TIME); 911 912 return 0; 913 } 914 915 static int record__config_off_cpu(struct record *rec) 916 { 917 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 918 } 919 920 static bool record__tracking_system_wide(struct record *rec) 921 { 922 struct evlist *evlist = rec->evlist; 923 struct evsel *evsel; 924 925 /* 926 * If non-dummy evsel exists, system_wide sideband is need to 927 * help parse sample information. 928 * For example, PERF_EVENT_MMAP event to help parse symbol, 929 * and PERF_EVENT_COMM event to help parse task executable name. 930 */ 931 evlist__for_each_entry(evlist, evsel) { 932 if (!evsel__is_dummy_event(evsel)) 933 return true; 934 } 935 936 return false; 937 } 938 939 static int record__config_tracking_events(struct record *rec) 940 { 941 struct record_opts *opts = &rec->opts; 942 struct evlist *evlist = rec->evlist; 943 bool system_wide = false; 944 struct evsel *evsel; 945 946 /* 947 * For initial_delay, system wide or a hybrid system, we need to add 948 * tracking event so that we can track PERF_RECORD_MMAP to cover the 949 * delay of waiting or event synthesis. 950 */ 951 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 952 perf_pmus__num_core_pmus() > 1) { 953 954 /* 955 * User space tasks can migrate between CPUs, so when tracing 956 * selected CPUs, sideband for all CPUs is still needed. 957 */ 958 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 959 system_wide = true; 960 961 evsel = evlist__findnew_tracking_event(evlist, system_wide); 962 if (!evsel) 963 return -ENOMEM; 964 965 /* 966 * Enable the tracking event when the process is forked for 967 * initial_delay, immediately for system wide. 968 */ 969 if (opts->target.initial_delay && !evsel->immediate && 970 !target__has_cpu(&opts->target)) 971 evsel->core.attr.enable_on_exec = 1; 972 else 973 evsel->immediate = 1; 974 } 975 976 return 0; 977 } 978 979 static bool record__kcore_readable(struct machine *machine) 980 { 981 char kcore[PATH_MAX]; 982 int fd; 983 984 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 985 986 fd = open(kcore, O_RDONLY); 987 if (fd < 0) 988 return false; 989 990 close(fd); 991 992 return true; 993 } 994 995 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 996 { 997 char from_dir[PATH_MAX]; 998 char kcore_dir[PATH_MAX]; 999 int ret; 1000 1001 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1002 1003 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1004 if (ret) 1005 return ret; 1006 1007 return kcore_copy(from_dir, kcore_dir); 1008 } 1009 1010 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1011 { 1012 thread_data->pipes.msg[0] = -1; 1013 thread_data->pipes.msg[1] = -1; 1014 thread_data->pipes.ack[0] = -1; 1015 thread_data->pipes.ack[1] = -1; 1016 } 1017 1018 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1019 { 1020 if (pipe(thread_data->pipes.msg)) 1021 return -EINVAL; 1022 1023 if (pipe(thread_data->pipes.ack)) { 1024 close(thread_data->pipes.msg[0]); 1025 thread_data->pipes.msg[0] = -1; 1026 close(thread_data->pipes.msg[1]); 1027 thread_data->pipes.msg[1] = -1; 1028 return -EINVAL; 1029 } 1030 1031 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1032 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1033 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1034 1035 return 0; 1036 } 1037 1038 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1039 { 1040 if (thread_data->pipes.msg[0] != -1) { 1041 close(thread_data->pipes.msg[0]); 1042 thread_data->pipes.msg[0] = -1; 1043 } 1044 if (thread_data->pipes.msg[1] != -1) { 1045 close(thread_data->pipes.msg[1]); 1046 thread_data->pipes.msg[1] = -1; 1047 } 1048 if (thread_data->pipes.ack[0] != -1) { 1049 close(thread_data->pipes.ack[0]); 1050 thread_data->pipes.ack[0] = -1; 1051 } 1052 if (thread_data->pipes.ack[1] != -1) { 1053 close(thread_data->pipes.ack[1]); 1054 thread_data->pipes.ack[1] = -1; 1055 } 1056 } 1057 1058 static bool evlist__per_thread(struct evlist *evlist) 1059 { 1060 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1061 } 1062 1063 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1064 { 1065 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1066 struct mmap *mmap = evlist->mmap; 1067 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1068 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1069 bool per_thread = evlist__per_thread(evlist); 1070 1071 if (per_thread) 1072 thread_data->nr_mmaps = nr_mmaps; 1073 else 1074 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1075 thread_data->mask->maps.nbits); 1076 if (mmap) { 1077 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1078 if (!thread_data->maps) 1079 return -ENOMEM; 1080 } 1081 if (overwrite_mmap) { 1082 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1083 if (!thread_data->overwrite_maps) { 1084 zfree(&thread_data->maps); 1085 return -ENOMEM; 1086 } 1087 } 1088 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1089 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1090 1091 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1092 if (per_thread || 1093 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1094 if (thread_data->maps) { 1095 thread_data->maps[tm] = &mmap[m]; 1096 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1097 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1098 } 1099 if (thread_data->overwrite_maps) { 1100 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1101 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1102 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1103 } 1104 tm++; 1105 } 1106 } 1107 1108 return 0; 1109 } 1110 1111 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1112 { 1113 int f, tm, pos; 1114 struct mmap *map, *overwrite_map; 1115 1116 fdarray__init(&thread_data->pollfd, 64); 1117 1118 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1119 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1120 overwrite_map = thread_data->overwrite_maps ? 1121 thread_data->overwrite_maps[tm] : NULL; 1122 1123 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1124 void *ptr = evlist->core.pollfd.priv[f].ptr; 1125 1126 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1127 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1128 &evlist->core.pollfd); 1129 if (pos < 0) 1130 return pos; 1131 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1132 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1133 } 1134 } 1135 } 1136 1137 return 0; 1138 } 1139 1140 static void record__free_thread_data(struct record *rec) 1141 { 1142 int t; 1143 struct record_thread *thread_data = rec->thread_data; 1144 1145 if (thread_data == NULL) 1146 return; 1147 1148 for (t = 0; t < rec->nr_threads; t++) { 1149 record__thread_data_close_pipes(&thread_data[t]); 1150 zfree(&thread_data[t].maps); 1151 zfree(&thread_data[t].overwrite_maps); 1152 fdarray__exit(&thread_data[t].pollfd); 1153 } 1154 1155 zfree(&rec->thread_data); 1156 } 1157 1158 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1159 int evlist_pollfd_index, 1160 int thread_pollfd_index) 1161 { 1162 size_t x = rec->index_map_cnt; 1163 1164 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1165 return -ENOMEM; 1166 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1167 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1168 rec->index_map_cnt += 1; 1169 return 0; 1170 } 1171 1172 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1173 struct evlist *evlist, 1174 struct record_thread *thread_data) 1175 { 1176 struct pollfd *e_entries = evlist->core.pollfd.entries; 1177 struct pollfd *t_entries = thread_data->pollfd.entries; 1178 int err = 0; 1179 size_t i; 1180 1181 for (i = 0; i < rec->index_map_cnt; i++) { 1182 int e_pos = rec->index_map[i].evlist_pollfd_index; 1183 int t_pos = rec->index_map[i].thread_pollfd_index; 1184 1185 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1186 e_entries[e_pos].events != t_entries[t_pos].events) { 1187 pr_err("Thread and evlist pollfd index mismatch\n"); 1188 err = -EINVAL; 1189 continue; 1190 } 1191 e_entries[e_pos].revents = t_entries[t_pos].revents; 1192 } 1193 return err; 1194 } 1195 1196 static int record__dup_non_perf_events(struct record *rec, 1197 struct evlist *evlist, 1198 struct record_thread *thread_data) 1199 { 1200 struct fdarray *fda = &evlist->core.pollfd; 1201 int i, ret; 1202 1203 for (i = 0; i < fda->nr; i++) { 1204 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1205 continue; 1206 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1207 if (ret < 0) { 1208 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1209 return ret; 1210 } 1211 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1212 thread_data, ret, fda->entries[i].fd); 1213 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1214 if (ret < 0) { 1215 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1216 return ret; 1217 } 1218 } 1219 return 0; 1220 } 1221 1222 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1223 { 1224 int t, ret; 1225 struct record_thread *thread_data; 1226 1227 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1228 if (!rec->thread_data) { 1229 pr_err("Failed to allocate thread data\n"); 1230 return -ENOMEM; 1231 } 1232 thread_data = rec->thread_data; 1233 1234 for (t = 0; t < rec->nr_threads; t++) 1235 record__thread_data_init_pipes(&thread_data[t]); 1236 1237 for (t = 0; t < rec->nr_threads; t++) { 1238 thread_data[t].rec = rec; 1239 thread_data[t].mask = &rec->thread_masks[t]; 1240 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1241 if (ret) { 1242 pr_err("Failed to initialize thread[%d] maps\n", t); 1243 goto out_free; 1244 } 1245 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1246 if (ret) { 1247 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1248 goto out_free; 1249 } 1250 if (t) { 1251 thread_data[t].tid = -1; 1252 ret = record__thread_data_open_pipes(&thread_data[t]); 1253 if (ret) { 1254 pr_err("Failed to open thread[%d] communication pipes\n", t); 1255 goto out_free; 1256 } 1257 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1258 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1259 if (ret < 0) { 1260 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1261 goto out_free; 1262 } 1263 thread_data[t].ctlfd_pos = ret; 1264 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1265 thread_data, thread_data[t].ctlfd_pos, 1266 thread_data[t].pipes.msg[0]); 1267 } else { 1268 thread_data[t].tid = gettid(); 1269 1270 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1271 if (ret < 0) 1272 goto out_free; 1273 1274 thread_data[t].ctlfd_pos = -1; /* Not used */ 1275 } 1276 } 1277 1278 return 0; 1279 1280 out_free: 1281 record__free_thread_data(rec); 1282 1283 return ret; 1284 } 1285 1286 static int record__mmap_evlist(struct record *rec, 1287 struct evlist *evlist) 1288 { 1289 int i, ret; 1290 struct record_opts *opts = &rec->opts; 1291 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1292 opts->auxtrace_sample_mode; 1293 char msg[512]; 1294 1295 if (opts->affinity != PERF_AFFINITY_SYS) 1296 cpu__setup_cpunode_map(); 1297 1298 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1299 opts->auxtrace_mmap_pages, 1300 auxtrace_overwrite, 1301 opts->nr_cblocks, opts->affinity, 1302 opts->mmap_flush, opts->comp_level) < 0) { 1303 if (errno == EPERM) { 1304 pr_err("Permission error mapping pages.\n" 1305 "Consider increasing " 1306 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1307 "or try again with a smaller value of -m/--mmap_pages.\n" 1308 "(current value: %u,%u)\n", 1309 opts->mmap_pages, opts->auxtrace_mmap_pages); 1310 return -errno; 1311 } else { 1312 pr_err("failed to mmap with %d (%s)\n", errno, 1313 str_error_r(errno, msg, sizeof(msg))); 1314 if (errno) 1315 return -errno; 1316 else 1317 return -EINVAL; 1318 } 1319 } 1320 1321 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1322 return -1; 1323 1324 ret = record__alloc_thread_data(rec, evlist); 1325 if (ret) 1326 return ret; 1327 1328 if (record__threads_enabled(rec)) { 1329 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1330 if (ret) { 1331 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1332 return ret; 1333 } 1334 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1335 if (evlist->mmap) 1336 evlist->mmap[i].file = &rec->data.dir.files[i]; 1337 if (evlist->overwrite_mmap) 1338 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1339 } 1340 } 1341 1342 return 0; 1343 } 1344 1345 static int record__mmap(struct record *rec) 1346 { 1347 return record__mmap_evlist(rec, rec->evlist); 1348 } 1349 1350 static int record__open(struct record *rec) 1351 { 1352 char msg[BUFSIZ]; 1353 struct evsel *pos; 1354 struct evlist *evlist = rec->evlist; 1355 struct perf_session *session = rec->session; 1356 struct record_opts *opts = &rec->opts; 1357 int rc = 0; 1358 1359 evlist__for_each_entry(evlist, pos) { 1360 try_again: 1361 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1362 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1363 if (verbose > 0) 1364 ui__warning("%s\n", msg); 1365 goto try_again; 1366 } 1367 if ((errno == EINVAL || errno == EBADF) && 1368 pos->core.leader != &pos->core && 1369 pos->weak_group) { 1370 pos = evlist__reset_weak_group(evlist, pos, true); 1371 goto try_again; 1372 } 1373 rc = -errno; 1374 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1375 ui__error("%s\n", msg); 1376 goto out; 1377 } 1378 1379 pos->supported = true; 1380 } 1381 1382 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1383 pr_warning( 1384 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1385 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1386 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1387 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1388 "Samples in kernel modules won't be resolved at all.\n\n" 1389 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1390 "even with a suitable vmlinux or kallsyms file.\n\n"); 1391 } 1392 1393 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1394 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1395 pos->filter ?: "BPF", evsel__name(pos), errno, 1396 str_error_r(errno, msg, sizeof(msg))); 1397 rc = -1; 1398 goto out; 1399 } 1400 1401 rc = record__mmap(rec); 1402 if (rc) 1403 goto out; 1404 1405 session->evlist = evlist; 1406 perf_session__set_id_hdr_size(session); 1407 out: 1408 return rc; 1409 } 1410 1411 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1412 { 1413 if (rec->evlist->first_sample_time == 0) 1414 rec->evlist->first_sample_time = sample_time; 1415 1416 if (sample_time) 1417 rec->evlist->last_sample_time = sample_time; 1418 } 1419 1420 static int process_sample_event(struct perf_tool *tool, 1421 union perf_event *event, 1422 struct perf_sample *sample, 1423 struct evsel *evsel, 1424 struct machine *machine) 1425 { 1426 struct record *rec = container_of(tool, struct record, tool); 1427 1428 set_timestamp_boundary(rec, sample->time); 1429 1430 if (rec->buildid_all) 1431 return 0; 1432 1433 rec->samples++; 1434 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1435 } 1436 1437 static int process_buildids(struct record *rec) 1438 { 1439 struct perf_session *session = rec->session; 1440 1441 if (perf_data__size(&rec->data) == 0) 1442 return 0; 1443 1444 /* 1445 * During this process, it'll load kernel map and replace the 1446 * dso->long_name to a real pathname it found. In this case 1447 * we prefer the vmlinux path like 1448 * /lib/modules/3.16.4/build/vmlinux 1449 * 1450 * rather than build-id path (in debug directory). 1451 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1452 */ 1453 symbol_conf.ignore_vmlinux_buildid = true; 1454 1455 /* 1456 * If --buildid-all is given, it marks all DSO regardless of hits, 1457 * so no need to process samples. But if timestamp_boundary is enabled, 1458 * it still needs to walk on all samples to get the timestamps of 1459 * first/last samples. 1460 */ 1461 if (rec->buildid_all && !rec->timestamp_boundary) 1462 rec->tool.sample = NULL; 1463 1464 return perf_session__process_events(session); 1465 } 1466 1467 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1468 { 1469 int err; 1470 struct perf_tool *tool = data; 1471 /* 1472 *As for guest kernel when processing subcommand record&report, 1473 *we arrange module mmap prior to guest kernel mmap and trigger 1474 *a preload dso because default guest module symbols are loaded 1475 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1476 *method is used to avoid symbol missing when the first addr is 1477 *in module instead of in guest kernel. 1478 */ 1479 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1480 machine); 1481 if (err < 0) 1482 pr_err("Couldn't record guest kernel [%d]'s reference" 1483 " relocation symbol.\n", machine->pid); 1484 1485 /* 1486 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1487 * have no _text sometimes. 1488 */ 1489 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1490 machine); 1491 if (err < 0) 1492 pr_err("Couldn't record guest kernel [%d]'s reference" 1493 " relocation symbol.\n", machine->pid); 1494 } 1495 1496 static struct perf_event_header finished_round_event = { 1497 .size = sizeof(struct perf_event_header), 1498 .type = PERF_RECORD_FINISHED_ROUND, 1499 }; 1500 1501 static struct perf_event_header finished_init_event = { 1502 .size = sizeof(struct perf_event_header), 1503 .type = PERF_RECORD_FINISHED_INIT, 1504 }; 1505 1506 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1507 { 1508 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1509 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1510 thread->mask->affinity.nbits)) { 1511 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1512 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1513 map->affinity_mask.bits, thread->mask->affinity.nbits); 1514 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1515 (cpu_set_t *)thread->mask->affinity.bits); 1516 if (verbose == 2) { 1517 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1518 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1519 } 1520 } 1521 } 1522 1523 static size_t process_comp_header(void *record, size_t increment) 1524 { 1525 struct perf_record_compressed *event = record; 1526 size_t size = sizeof(*event); 1527 1528 if (increment) { 1529 event->header.size += increment; 1530 return increment; 1531 } 1532 1533 event->header.type = PERF_RECORD_COMPRESSED; 1534 event->header.size = size; 1535 1536 return size; 1537 } 1538 1539 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1540 void *dst, size_t dst_size, void *src, size_t src_size) 1541 { 1542 ssize_t compressed; 1543 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1544 struct zstd_data *zstd_data = &session->zstd_data; 1545 1546 if (map && map->file) 1547 zstd_data = &map->zstd_data; 1548 1549 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1550 max_record_size, process_comp_header); 1551 if (compressed < 0) 1552 return compressed; 1553 1554 if (map && map->file) { 1555 thread->bytes_transferred += src_size; 1556 thread->bytes_compressed += compressed; 1557 } else { 1558 session->bytes_transferred += src_size; 1559 session->bytes_compressed += compressed; 1560 } 1561 1562 return compressed; 1563 } 1564 1565 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1566 bool overwrite, bool synch) 1567 { 1568 u64 bytes_written = rec->bytes_written; 1569 int i; 1570 int rc = 0; 1571 int nr_mmaps; 1572 struct mmap **maps; 1573 int trace_fd = rec->data.file.fd; 1574 off_t off = 0; 1575 1576 if (!evlist) 1577 return 0; 1578 1579 nr_mmaps = thread->nr_mmaps; 1580 maps = overwrite ? thread->overwrite_maps : thread->maps; 1581 1582 if (!maps) 1583 return 0; 1584 1585 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1586 return 0; 1587 1588 if (record__aio_enabled(rec)) 1589 off = record__aio_get_pos(trace_fd); 1590 1591 for (i = 0; i < nr_mmaps; i++) { 1592 u64 flush = 0; 1593 struct mmap *map = maps[i]; 1594 1595 if (map->core.base) { 1596 record__adjust_affinity(rec, map); 1597 if (synch) { 1598 flush = map->core.flush; 1599 map->core.flush = 1; 1600 } 1601 if (!record__aio_enabled(rec)) { 1602 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1603 if (synch) 1604 map->core.flush = flush; 1605 rc = -1; 1606 goto out; 1607 } 1608 } else { 1609 if (record__aio_push(rec, map, &off) < 0) { 1610 record__aio_set_pos(trace_fd, off); 1611 if (synch) 1612 map->core.flush = flush; 1613 rc = -1; 1614 goto out; 1615 } 1616 } 1617 if (synch) 1618 map->core.flush = flush; 1619 } 1620 1621 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1622 !rec->opts.auxtrace_sample_mode && 1623 record__auxtrace_mmap_read(rec, map) != 0) { 1624 rc = -1; 1625 goto out; 1626 } 1627 } 1628 1629 if (record__aio_enabled(rec)) 1630 record__aio_set_pos(trace_fd, off); 1631 1632 /* 1633 * Mark the round finished in case we wrote 1634 * at least one event. 1635 * 1636 * No need for round events in directory mode, 1637 * because per-cpu maps and files have data 1638 * sorted by kernel. 1639 */ 1640 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1641 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1642 1643 if (overwrite) 1644 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1645 out: 1646 return rc; 1647 } 1648 1649 static int record__mmap_read_all(struct record *rec, bool synch) 1650 { 1651 int err; 1652 1653 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1654 if (err) 1655 return err; 1656 1657 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1658 } 1659 1660 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1661 void *arg __maybe_unused) 1662 { 1663 struct perf_mmap *map = fda->priv[fd].ptr; 1664 1665 if (map) 1666 perf_mmap__put(map); 1667 } 1668 1669 static void *record__thread(void *arg) 1670 { 1671 enum thread_msg msg = THREAD_MSG__READY; 1672 bool terminate = false; 1673 struct fdarray *pollfd; 1674 int err, ctlfd_pos; 1675 1676 thread = arg; 1677 thread->tid = gettid(); 1678 1679 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1680 if (err == -1) 1681 pr_warning("threads[%d]: failed to notify on start: %s\n", 1682 thread->tid, strerror(errno)); 1683 1684 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1685 1686 pollfd = &thread->pollfd; 1687 ctlfd_pos = thread->ctlfd_pos; 1688 1689 for (;;) { 1690 unsigned long long hits = thread->samples; 1691 1692 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1693 break; 1694 1695 if (hits == thread->samples) { 1696 1697 err = fdarray__poll(pollfd, -1); 1698 /* 1699 * Propagate error, only if there's any. Ignore positive 1700 * number of returned events and interrupt error. 1701 */ 1702 if (err > 0 || (err < 0 && errno == EINTR)) 1703 err = 0; 1704 thread->waking++; 1705 1706 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1707 record__thread_munmap_filtered, NULL) == 0) 1708 break; 1709 } 1710 1711 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1712 terminate = true; 1713 close(thread->pipes.msg[0]); 1714 thread->pipes.msg[0] = -1; 1715 pollfd->entries[ctlfd_pos].fd = -1; 1716 pollfd->entries[ctlfd_pos].events = 0; 1717 } 1718 1719 pollfd->entries[ctlfd_pos].revents = 0; 1720 } 1721 record__mmap_read_all(thread->rec, true); 1722 1723 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1724 if (err == -1) 1725 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1726 thread->tid, strerror(errno)); 1727 1728 return NULL; 1729 } 1730 1731 static void record__init_features(struct record *rec) 1732 { 1733 struct perf_session *session = rec->session; 1734 int feat; 1735 1736 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1737 perf_header__set_feat(&session->header, feat); 1738 1739 if (rec->no_buildid) 1740 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1741 1742 #ifdef HAVE_LIBTRACEEVENT 1743 if (!have_tracepoints(&rec->evlist->core.entries)) 1744 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1745 #endif 1746 1747 if (!rec->opts.branch_stack) 1748 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1749 1750 if (!rec->opts.full_auxtrace) 1751 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1752 1753 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1754 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1755 1756 if (!rec->opts.use_clockid) 1757 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1758 1759 if (!record__threads_enabled(rec)) 1760 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1761 1762 if (!record__comp_enabled(rec)) 1763 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1764 1765 perf_header__clear_feat(&session->header, HEADER_STAT); 1766 } 1767 1768 static void 1769 record__finish_output(struct record *rec) 1770 { 1771 int i; 1772 struct perf_data *data = &rec->data; 1773 int fd = perf_data__fd(data); 1774 1775 if (data->is_pipe) { 1776 /* Just to display approx. size */ 1777 data->file.size = rec->bytes_written; 1778 return; 1779 } 1780 1781 rec->session->header.data_size += rec->bytes_written; 1782 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1783 if (record__threads_enabled(rec)) { 1784 for (i = 0; i < data->dir.nr; i++) 1785 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1786 } 1787 1788 if (!rec->no_buildid) { 1789 process_buildids(rec); 1790 1791 if (rec->buildid_all) 1792 perf_session__dsos_hit_all(rec->session); 1793 } 1794 perf_session__write_header(rec->session, rec->evlist, fd, true); 1795 1796 return; 1797 } 1798 1799 static int record__synthesize_workload(struct record *rec, bool tail) 1800 { 1801 int err; 1802 struct perf_thread_map *thread_map; 1803 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1804 1805 if (rec->opts.tail_synthesize != tail) 1806 return 0; 1807 1808 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1809 if (thread_map == NULL) 1810 return -1; 1811 1812 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1813 process_synthesized_event, 1814 &rec->session->machines.host, 1815 needs_mmap, 1816 rec->opts.sample_address); 1817 perf_thread_map__put(thread_map); 1818 return err; 1819 } 1820 1821 static int write_finished_init(struct record *rec, bool tail) 1822 { 1823 if (rec->opts.tail_synthesize != tail) 1824 return 0; 1825 1826 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1827 } 1828 1829 static int record__synthesize(struct record *rec, bool tail); 1830 1831 static int 1832 record__switch_output(struct record *rec, bool at_exit) 1833 { 1834 struct perf_data *data = &rec->data; 1835 char *new_filename = NULL; 1836 int fd, err; 1837 1838 /* Same Size: "2015122520103046"*/ 1839 char timestamp[] = "InvalidTimestamp"; 1840 1841 record__aio_mmap_read_sync(rec); 1842 1843 write_finished_init(rec, true); 1844 1845 record__synthesize(rec, true); 1846 if (target__none(&rec->opts.target)) 1847 record__synthesize_workload(rec, true); 1848 1849 rec->samples = 0; 1850 record__finish_output(rec); 1851 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1852 if (err) { 1853 pr_err("Failed to get current timestamp\n"); 1854 return -EINVAL; 1855 } 1856 1857 fd = perf_data__switch(data, timestamp, 1858 rec->session->header.data_offset, 1859 at_exit, &new_filename); 1860 if (fd >= 0 && !at_exit) { 1861 rec->bytes_written = 0; 1862 rec->session->header.data_size = 0; 1863 } 1864 1865 if (!quiet) { 1866 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1867 data->path, timestamp); 1868 } 1869 1870 if (rec->switch_output.num_files) { 1871 int n = rec->switch_output.cur_file + 1; 1872 1873 if (n >= rec->switch_output.num_files) 1874 n = 0; 1875 rec->switch_output.cur_file = n; 1876 if (rec->switch_output.filenames[n]) { 1877 remove(rec->switch_output.filenames[n]); 1878 zfree(&rec->switch_output.filenames[n]); 1879 } 1880 rec->switch_output.filenames[n] = new_filename; 1881 } else { 1882 free(new_filename); 1883 } 1884 1885 /* Output tracking events */ 1886 if (!at_exit) { 1887 record__synthesize(rec, false); 1888 1889 /* 1890 * In 'perf record --switch-output' without -a, 1891 * record__synthesize() in record__switch_output() won't 1892 * generate tracking events because there's no thread_map 1893 * in evlist. Which causes newly created perf.data doesn't 1894 * contain map and comm information. 1895 * Create a fake thread_map and directly call 1896 * perf_event__synthesize_thread_map() for those events. 1897 */ 1898 if (target__none(&rec->opts.target)) 1899 record__synthesize_workload(rec, false); 1900 write_finished_init(rec, false); 1901 } 1902 return fd; 1903 } 1904 1905 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1906 struct perf_record_lost_samples *lost, 1907 int cpu_idx, int thread_idx, u64 lost_count, 1908 u16 misc_flag) 1909 { 1910 struct perf_sample_id *sid; 1911 struct perf_sample sample = {}; 1912 int id_hdr_size; 1913 1914 lost->lost = lost_count; 1915 if (evsel->core.ids) { 1916 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1917 sample.id = sid->id; 1918 } 1919 1920 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1921 evsel->core.attr.sample_type, &sample); 1922 lost->header.size = sizeof(*lost) + id_hdr_size; 1923 lost->header.misc = misc_flag; 1924 record__write(rec, NULL, lost, lost->header.size); 1925 } 1926 1927 static void record__read_lost_samples(struct record *rec) 1928 { 1929 struct perf_session *session = rec->session; 1930 struct perf_record_lost_samples_and_ids lost; 1931 struct evsel *evsel; 1932 1933 /* there was an error during record__open */ 1934 if (session->evlist == NULL) 1935 return; 1936 1937 evlist__for_each_entry(session->evlist, evsel) { 1938 struct xyarray *xy = evsel->core.sample_id; 1939 u64 lost_count; 1940 1941 if (xy == NULL || evsel->core.fd == NULL) 1942 continue; 1943 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1944 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1945 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1946 continue; 1947 } 1948 1949 for (int x = 0; x < xyarray__max_x(xy); x++) { 1950 for (int y = 0; y < xyarray__max_y(xy); y++) { 1951 struct perf_counts_values count; 1952 1953 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1954 pr_debug("read LOST count failed\n"); 1955 return; 1956 } 1957 1958 if (count.lost) { 1959 memset(&lost, 0, sizeof(lost)); 1960 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1961 __record__save_lost_samples(rec, evsel, &lost.lost, 1962 x, y, count.lost, 0); 1963 } 1964 } 1965 } 1966 1967 lost_count = perf_bpf_filter__lost_count(evsel); 1968 if (lost_count) { 1969 memset(&lost, 0, sizeof(lost)); 1970 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1971 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 1972 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1973 } 1974 } 1975 } 1976 1977 static volatile sig_atomic_t workload_exec_errno; 1978 1979 /* 1980 * evlist__prepare_workload will send a SIGUSR1 1981 * if the fork fails, since we asked by setting its 1982 * want_signal to true. 1983 */ 1984 static void workload_exec_failed_signal(int signo __maybe_unused, 1985 siginfo_t *info, 1986 void *ucontext __maybe_unused) 1987 { 1988 workload_exec_errno = info->si_value.sival_int; 1989 done = 1; 1990 child_finished = 1; 1991 } 1992 1993 static void snapshot_sig_handler(int sig); 1994 static void alarm_sig_handler(int sig); 1995 1996 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1997 { 1998 if (evlist) { 1999 if (evlist->mmap && evlist->mmap[0].core.base) 2000 return evlist->mmap[0].core.base; 2001 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2002 return evlist->overwrite_mmap[0].core.base; 2003 } 2004 return NULL; 2005 } 2006 2007 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2008 { 2009 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2010 if (pc) 2011 return pc; 2012 return NULL; 2013 } 2014 2015 static int record__synthesize(struct record *rec, bool tail) 2016 { 2017 struct perf_session *session = rec->session; 2018 struct machine *machine = &session->machines.host; 2019 struct perf_data *data = &rec->data; 2020 struct record_opts *opts = &rec->opts; 2021 struct perf_tool *tool = &rec->tool; 2022 int err = 0; 2023 event_op f = process_synthesized_event; 2024 2025 if (rec->opts.tail_synthesize != tail) 2026 return 0; 2027 2028 if (data->is_pipe) { 2029 err = perf_event__synthesize_for_pipe(tool, session, data, 2030 process_synthesized_event); 2031 if (err < 0) 2032 goto out; 2033 2034 rec->bytes_written += err; 2035 } 2036 2037 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2038 process_synthesized_event, machine); 2039 if (err) 2040 goto out; 2041 2042 /* Synthesize id_index before auxtrace_info */ 2043 err = perf_event__synthesize_id_index(tool, 2044 process_synthesized_event, 2045 session->evlist, machine); 2046 if (err) 2047 goto out; 2048 2049 if (rec->opts.full_auxtrace) { 2050 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2051 session, process_synthesized_event); 2052 if (err) 2053 goto out; 2054 } 2055 2056 if (!evlist__exclude_kernel(rec->evlist)) { 2057 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2058 machine); 2059 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2060 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2061 "Check /proc/kallsyms permission or run as root.\n"); 2062 2063 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2064 machine); 2065 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2066 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2067 "Check /proc/modules permission or run as root.\n"); 2068 } 2069 2070 if (perf_guest) { 2071 machines__process_guests(&session->machines, 2072 perf_event__synthesize_guest_os, tool); 2073 } 2074 2075 err = perf_event__synthesize_extra_attr(&rec->tool, 2076 rec->evlist, 2077 process_synthesized_event, 2078 data->is_pipe); 2079 if (err) 2080 goto out; 2081 2082 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2083 process_synthesized_event, 2084 NULL); 2085 if (err < 0) { 2086 pr_err("Couldn't synthesize thread map.\n"); 2087 return err; 2088 } 2089 2090 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2091 process_synthesized_event, NULL); 2092 if (err < 0) { 2093 pr_err("Couldn't synthesize cpu map.\n"); 2094 return err; 2095 } 2096 2097 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2098 machine, opts); 2099 if (err < 0) { 2100 pr_warning("Couldn't synthesize bpf events.\n"); 2101 err = 0; 2102 } 2103 2104 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2105 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2106 machine); 2107 if (err < 0) { 2108 pr_warning("Couldn't synthesize cgroup events.\n"); 2109 err = 0; 2110 } 2111 } 2112 2113 if (rec->opts.nr_threads_synthesize > 1) { 2114 mutex_init(&synth_lock); 2115 perf_set_multithreaded(); 2116 f = process_locked_synthesized_event; 2117 } 2118 2119 if (rec->opts.synth & PERF_SYNTH_TASK) { 2120 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2121 2122 err = __machine__synthesize_threads(machine, tool, &opts->target, 2123 rec->evlist->core.threads, 2124 f, needs_mmap, opts->sample_address, 2125 rec->opts.nr_threads_synthesize); 2126 } 2127 2128 if (rec->opts.nr_threads_synthesize > 1) { 2129 perf_set_singlethreaded(); 2130 mutex_destroy(&synth_lock); 2131 } 2132 2133 out: 2134 return err; 2135 } 2136 2137 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2138 { 2139 struct record *rec = data; 2140 pthread_kill(rec->thread_id, SIGUSR2); 2141 return 0; 2142 } 2143 2144 static int record__setup_sb_evlist(struct record *rec) 2145 { 2146 struct record_opts *opts = &rec->opts; 2147 2148 if (rec->sb_evlist != NULL) { 2149 /* 2150 * We get here if --switch-output-event populated the 2151 * sb_evlist, so associate a callback that will send a SIGUSR2 2152 * to the main thread. 2153 */ 2154 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2155 rec->thread_id = pthread_self(); 2156 } 2157 #ifdef HAVE_LIBBPF_SUPPORT 2158 if (!opts->no_bpf_event) { 2159 if (rec->sb_evlist == NULL) { 2160 rec->sb_evlist = evlist__new(); 2161 2162 if (rec->sb_evlist == NULL) { 2163 pr_err("Couldn't create side band evlist.\n."); 2164 return -1; 2165 } 2166 } 2167 2168 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2169 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2170 return -1; 2171 } 2172 } 2173 #endif 2174 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2175 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2176 opts->no_bpf_event = true; 2177 } 2178 2179 return 0; 2180 } 2181 2182 static int record__init_clock(struct record *rec) 2183 { 2184 struct perf_session *session = rec->session; 2185 struct timespec ref_clockid; 2186 struct timeval ref_tod; 2187 u64 ref; 2188 2189 if (!rec->opts.use_clockid) 2190 return 0; 2191 2192 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2193 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2194 2195 session->header.env.clock.clockid = rec->opts.clockid; 2196 2197 if (gettimeofday(&ref_tod, NULL) != 0) { 2198 pr_err("gettimeofday failed, cannot set reference time.\n"); 2199 return -1; 2200 } 2201 2202 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2203 pr_err("clock_gettime failed, cannot set reference time.\n"); 2204 return -1; 2205 } 2206 2207 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2208 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2209 2210 session->header.env.clock.tod_ns = ref; 2211 2212 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2213 (u64) ref_clockid.tv_nsec; 2214 2215 session->header.env.clock.clockid_ns = ref; 2216 return 0; 2217 } 2218 2219 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2220 { 2221 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2222 trigger_hit(&auxtrace_snapshot_trigger); 2223 auxtrace_record__snapshot_started = 1; 2224 if (auxtrace_record__snapshot_start(rec->itr)) 2225 trigger_error(&auxtrace_snapshot_trigger); 2226 } 2227 } 2228 2229 static int record__terminate_thread(struct record_thread *thread_data) 2230 { 2231 int err; 2232 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2233 pid_t tid = thread_data->tid; 2234 2235 close(thread_data->pipes.msg[1]); 2236 thread_data->pipes.msg[1] = -1; 2237 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2238 if (err > 0) 2239 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2240 else 2241 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2242 thread->tid, tid); 2243 2244 return 0; 2245 } 2246 2247 static int record__start_threads(struct record *rec) 2248 { 2249 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2250 struct record_thread *thread_data = rec->thread_data; 2251 sigset_t full, mask; 2252 pthread_t handle; 2253 pthread_attr_t attrs; 2254 2255 thread = &thread_data[0]; 2256 2257 if (!record__threads_enabled(rec)) 2258 return 0; 2259 2260 sigfillset(&full); 2261 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2262 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2263 return -1; 2264 } 2265 2266 pthread_attr_init(&attrs); 2267 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2268 2269 for (t = 1; t < nr_threads; t++) { 2270 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2271 2272 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2273 pthread_attr_setaffinity_np(&attrs, 2274 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2275 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2276 #endif 2277 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2278 for (tt = 1; tt < t; tt++) 2279 record__terminate_thread(&thread_data[t]); 2280 pr_err("Failed to start threads: %s\n", strerror(errno)); 2281 ret = -1; 2282 goto out_err; 2283 } 2284 2285 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2286 if (err > 0) 2287 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2288 thread_msg_tags[msg]); 2289 else 2290 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2291 thread->tid, rec->thread_data[t].tid); 2292 } 2293 2294 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2295 (cpu_set_t *)thread->mask->affinity.bits); 2296 2297 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2298 2299 out_err: 2300 pthread_attr_destroy(&attrs); 2301 2302 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2303 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2304 ret = -1; 2305 } 2306 2307 return ret; 2308 } 2309 2310 static int record__stop_threads(struct record *rec) 2311 { 2312 int t; 2313 struct record_thread *thread_data = rec->thread_data; 2314 2315 for (t = 1; t < rec->nr_threads; t++) 2316 record__terminate_thread(&thread_data[t]); 2317 2318 for (t = 0; t < rec->nr_threads; t++) { 2319 rec->samples += thread_data[t].samples; 2320 if (!record__threads_enabled(rec)) 2321 continue; 2322 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2323 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2324 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2325 thread_data[t].samples, thread_data[t].waking); 2326 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2327 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2328 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2329 else 2330 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2331 } 2332 2333 return 0; 2334 } 2335 2336 static unsigned long record__waking(struct record *rec) 2337 { 2338 int t; 2339 unsigned long waking = 0; 2340 struct record_thread *thread_data = rec->thread_data; 2341 2342 for (t = 0; t < rec->nr_threads; t++) 2343 waking += thread_data[t].waking; 2344 2345 return waking; 2346 } 2347 2348 static int __cmd_record(struct record *rec, int argc, const char **argv) 2349 { 2350 int err; 2351 int status = 0; 2352 const bool forks = argc > 0; 2353 struct perf_tool *tool = &rec->tool; 2354 struct record_opts *opts = &rec->opts; 2355 struct perf_data *data = &rec->data; 2356 struct perf_session *session; 2357 bool disabled = false, draining = false; 2358 int fd; 2359 float ratio = 0; 2360 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2361 2362 atexit(record__sig_exit); 2363 signal(SIGCHLD, sig_handler); 2364 signal(SIGINT, sig_handler); 2365 signal(SIGTERM, sig_handler); 2366 signal(SIGSEGV, sigsegv_handler); 2367 2368 if (rec->opts.record_namespaces) 2369 tool->namespace_events = true; 2370 2371 if (rec->opts.record_cgroup) { 2372 #ifdef HAVE_FILE_HANDLE 2373 tool->cgroup_events = true; 2374 #else 2375 pr_err("cgroup tracking is not supported\n"); 2376 return -1; 2377 #endif 2378 } 2379 2380 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2381 signal(SIGUSR2, snapshot_sig_handler); 2382 if (rec->opts.auxtrace_snapshot_mode) 2383 trigger_on(&auxtrace_snapshot_trigger); 2384 if (rec->switch_output.enabled) 2385 trigger_on(&switch_output_trigger); 2386 } else { 2387 signal(SIGUSR2, SIG_IGN); 2388 } 2389 2390 session = perf_session__new(data, tool); 2391 if (IS_ERR(session)) { 2392 pr_err("Perf session creation failed.\n"); 2393 return PTR_ERR(session); 2394 } 2395 2396 if (record__threads_enabled(rec)) { 2397 if (perf_data__is_pipe(&rec->data)) { 2398 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2399 return -1; 2400 } 2401 if (rec->opts.full_auxtrace) { 2402 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2403 return -1; 2404 } 2405 } 2406 2407 fd = perf_data__fd(data); 2408 rec->session = session; 2409 2410 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2411 pr_err("Compression initialization failed.\n"); 2412 return -1; 2413 } 2414 #ifdef HAVE_EVENTFD_SUPPORT 2415 done_fd = eventfd(0, EFD_NONBLOCK); 2416 if (done_fd < 0) { 2417 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2418 status = -1; 2419 goto out_delete_session; 2420 } 2421 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2422 if (err < 0) { 2423 pr_err("Failed to add wakeup eventfd to poll list\n"); 2424 status = err; 2425 goto out_delete_session; 2426 } 2427 #endif // HAVE_EVENTFD_SUPPORT 2428 2429 session->header.env.comp_type = PERF_COMP_ZSTD; 2430 session->header.env.comp_level = rec->opts.comp_level; 2431 2432 if (rec->opts.kcore && 2433 !record__kcore_readable(&session->machines.host)) { 2434 pr_err("ERROR: kcore is not readable.\n"); 2435 return -1; 2436 } 2437 2438 if (record__init_clock(rec)) 2439 return -1; 2440 2441 record__init_features(rec); 2442 2443 if (forks) { 2444 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2445 workload_exec_failed_signal); 2446 if (err < 0) { 2447 pr_err("Couldn't run the workload!\n"); 2448 status = err; 2449 goto out_delete_session; 2450 } 2451 } 2452 2453 /* 2454 * If we have just single event and are sending data 2455 * through pipe, we need to force the ids allocation, 2456 * because we synthesize event name through the pipe 2457 * and need the id for that. 2458 */ 2459 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2460 rec->opts.sample_id = true; 2461 2462 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2463 rec->timestamp_filename = false; 2464 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2465 } 2466 2467 evlist__uniquify_name(rec->evlist); 2468 2469 evlist__config(rec->evlist, opts, &callchain_param); 2470 2471 /* Debug message used by test scripts */ 2472 pr_debug3("perf record opening and mmapping events\n"); 2473 if (record__open(rec) != 0) { 2474 err = -1; 2475 goto out_free_threads; 2476 } 2477 /* Debug message used by test scripts */ 2478 pr_debug3("perf record done opening and mmapping events\n"); 2479 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2480 2481 if (rec->opts.kcore) { 2482 err = record__kcore_copy(&session->machines.host, data); 2483 if (err) { 2484 pr_err("ERROR: Failed to copy kcore\n"); 2485 goto out_free_threads; 2486 } 2487 } 2488 2489 /* 2490 * Normally perf_session__new would do this, but it doesn't have the 2491 * evlist. 2492 */ 2493 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2494 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2495 rec->tool.ordered_events = false; 2496 } 2497 2498 if (evlist__nr_groups(rec->evlist) == 0) 2499 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2500 2501 if (data->is_pipe) { 2502 err = perf_header__write_pipe(fd); 2503 if (err < 0) 2504 goto out_free_threads; 2505 } else { 2506 err = perf_session__write_header(session, rec->evlist, fd, false); 2507 if (err < 0) 2508 goto out_free_threads; 2509 } 2510 2511 err = -1; 2512 if (!rec->no_buildid 2513 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2514 pr_err("Couldn't generate buildids. " 2515 "Use --no-buildid to profile anyway.\n"); 2516 goto out_free_threads; 2517 } 2518 2519 err = record__setup_sb_evlist(rec); 2520 if (err) 2521 goto out_free_threads; 2522 2523 err = record__synthesize(rec, false); 2524 if (err < 0) 2525 goto out_free_threads; 2526 2527 if (rec->realtime_prio) { 2528 struct sched_param param; 2529 2530 param.sched_priority = rec->realtime_prio; 2531 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2532 pr_err("Could not set realtime priority.\n"); 2533 err = -1; 2534 goto out_free_threads; 2535 } 2536 } 2537 2538 if (record__start_threads(rec)) 2539 goto out_free_threads; 2540 2541 /* 2542 * When perf is starting the traced process, all the events 2543 * (apart from group members) have enable_on_exec=1 set, 2544 * so don't spoil it by prematurely enabling them. 2545 */ 2546 if (!target__none(&opts->target) && !opts->target.initial_delay) 2547 evlist__enable(rec->evlist); 2548 2549 /* 2550 * Let the child rip 2551 */ 2552 if (forks) { 2553 struct machine *machine = &session->machines.host; 2554 union perf_event *event; 2555 pid_t tgid; 2556 2557 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2558 if (event == NULL) { 2559 err = -ENOMEM; 2560 goto out_child; 2561 } 2562 2563 /* 2564 * Some H/W events are generated before COMM event 2565 * which is emitted during exec(), so perf script 2566 * cannot see a correct process name for those events. 2567 * Synthesize COMM event to prevent it. 2568 */ 2569 tgid = perf_event__synthesize_comm(tool, event, 2570 rec->evlist->workload.pid, 2571 process_synthesized_event, 2572 machine); 2573 free(event); 2574 2575 if (tgid == -1) 2576 goto out_child; 2577 2578 event = malloc(sizeof(event->namespaces) + 2579 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2580 machine->id_hdr_size); 2581 if (event == NULL) { 2582 err = -ENOMEM; 2583 goto out_child; 2584 } 2585 2586 /* 2587 * Synthesize NAMESPACES event for the command specified. 2588 */ 2589 perf_event__synthesize_namespaces(tool, event, 2590 rec->evlist->workload.pid, 2591 tgid, process_synthesized_event, 2592 machine); 2593 free(event); 2594 2595 evlist__start_workload(rec->evlist); 2596 } 2597 2598 if (opts->target.initial_delay) { 2599 pr_info(EVLIST_DISABLED_MSG); 2600 if (opts->target.initial_delay > 0) { 2601 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2602 evlist__enable(rec->evlist); 2603 pr_info(EVLIST_ENABLED_MSG); 2604 } 2605 } 2606 2607 err = event_enable_timer__start(rec->evlist->eet); 2608 if (err) 2609 goto out_child; 2610 2611 /* Debug message used by test scripts */ 2612 pr_debug3("perf record has started\n"); 2613 fflush(stderr); 2614 2615 trigger_ready(&auxtrace_snapshot_trigger); 2616 trigger_ready(&switch_output_trigger); 2617 perf_hooks__invoke_record_start(); 2618 2619 /* 2620 * Must write FINISHED_INIT so it will be seen after all other 2621 * synthesized user events, but before any regular events. 2622 */ 2623 err = write_finished_init(rec, false); 2624 if (err < 0) 2625 goto out_child; 2626 2627 for (;;) { 2628 unsigned long long hits = thread->samples; 2629 2630 /* 2631 * rec->evlist->bkw_mmap_state is possible to be 2632 * BKW_MMAP_EMPTY here: when done == true and 2633 * hits != rec->samples in previous round. 2634 * 2635 * evlist__toggle_bkw_mmap ensure we never 2636 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2637 */ 2638 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2639 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2640 2641 if (record__mmap_read_all(rec, false) < 0) { 2642 trigger_error(&auxtrace_snapshot_trigger); 2643 trigger_error(&switch_output_trigger); 2644 err = -1; 2645 goto out_child; 2646 } 2647 2648 if (auxtrace_record__snapshot_started) { 2649 auxtrace_record__snapshot_started = 0; 2650 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2651 record__read_auxtrace_snapshot(rec, false); 2652 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2653 pr_err("AUX area tracing snapshot failed\n"); 2654 err = -1; 2655 goto out_child; 2656 } 2657 } 2658 2659 if (trigger_is_hit(&switch_output_trigger)) { 2660 /* 2661 * If switch_output_trigger is hit, the data in 2662 * overwritable ring buffer should have been collected, 2663 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2664 * 2665 * If SIGUSR2 raise after or during record__mmap_read_all(), 2666 * record__mmap_read_all() didn't collect data from 2667 * overwritable ring buffer. Read again. 2668 */ 2669 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2670 continue; 2671 trigger_ready(&switch_output_trigger); 2672 2673 /* 2674 * Reenable events in overwrite ring buffer after 2675 * record__mmap_read_all(): we should have collected 2676 * data from it. 2677 */ 2678 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2679 2680 if (!quiet) 2681 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2682 record__waking(rec)); 2683 thread->waking = 0; 2684 fd = record__switch_output(rec, false); 2685 if (fd < 0) { 2686 pr_err("Failed to switch to new file\n"); 2687 trigger_error(&switch_output_trigger); 2688 err = fd; 2689 goto out_child; 2690 } 2691 2692 /* re-arm the alarm */ 2693 if (rec->switch_output.time) 2694 alarm(rec->switch_output.time); 2695 } 2696 2697 if (hits == thread->samples) { 2698 if (done || draining) 2699 break; 2700 err = fdarray__poll(&thread->pollfd, -1); 2701 /* 2702 * Propagate error, only if there's any. Ignore positive 2703 * number of returned events and interrupt error. 2704 */ 2705 if (err > 0 || (err < 0 && errno == EINTR)) 2706 err = 0; 2707 thread->waking++; 2708 2709 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2710 record__thread_munmap_filtered, NULL) == 0) 2711 draining = true; 2712 2713 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2714 if (err) 2715 goto out_child; 2716 } 2717 2718 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2719 switch (cmd) { 2720 case EVLIST_CTL_CMD_SNAPSHOT: 2721 hit_auxtrace_snapshot_trigger(rec); 2722 evlist__ctlfd_ack(rec->evlist); 2723 break; 2724 case EVLIST_CTL_CMD_STOP: 2725 done = 1; 2726 break; 2727 case EVLIST_CTL_CMD_ACK: 2728 case EVLIST_CTL_CMD_UNSUPPORTED: 2729 case EVLIST_CTL_CMD_ENABLE: 2730 case EVLIST_CTL_CMD_DISABLE: 2731 case EVLIST_CTL_CMD_EVLIST: 2732 case EVLIST_CTL_CMD_PING: 2733 default: 2734 break; 2735 } 2736 } 2737 2738 err = event_enable_timer__process(rec->evlist->eet); 2739 if (err < 0) 2740 goto out_child; 2741 if (err) { 2742 err = 0; 2743 done = 1; 2744 } 2745 2746 /* 2747 * When perf is starting the traced process, at the end events 2748 * die with the process and we wait for that. Thus no need to 2749 * disable events in this case. 2750 */ 2751 if (done && !disabled && !target__none(&opts->target)) { 2752 trigger_off(&auxtrace_snapshot_trigger); 2753 evlist__disable(rec->evlist); 2754 disabled = true; 2755 } 2756 } 2757 2758 trigger_off(&auxtrace_snapshot_trigger); 2759 trigger_off(&switch_output_trigger); 2760 2761 if (opts->auxtrace_snapshot_on_exit) 2762 record__auxtrace_snapshot_exit(rec); 2763 2764 if (forks && workload_exec_errno) { 2765 char msg[STRERR_BUFSIZE], strevsels[2048]; 2766 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2767 2768 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2769 2770 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2771 strevsels, argv[0], emsg); 2772 err = -1; 2773 goto out_child; 2774 } 2775 2776 if (!quiet) 2777 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2778 record__waking(rec)); 2779 2780 write_finished_init(rec, true); 2781 2782 if (target__none(&rec->opts.target)) 2783 record__synthesize_workload(rec, true); 2784 2785 out_child: 2786 record__stop_threads(rec); 2787 record__mmap_read_all(rec, true); 2788 out_free_threads: 2789 record__free_thread_data(rec); 2790 evlist__finalize_ctlfd(rec->evlist); 2791 record__aio_mmap_read_sync(rec); 2792 2793 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2794 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2795 session->header.env.comp_ratio = ratio + 0.5; 2796 } 2797 2798 if (forks) { 2799 int exit_status; 2800 2801 if (!child_finished) 2802 kill(rec->evlist->workload.pid, SIGTERM); 2803 2804 wait(&exit_status); 2805 2806 if (err < 0) 2807 status = err; 2808 else if (WIFEXITED(exit_status)) 2809 status = WEXITSTATUS(exit_status); 2810 else if (WIFSIGNALED(exit_status)) 2811 signr = WTERMSIG(exit_status); 2812 } else 2813 status = err; 2814 2815 if (rec->off_cpu) 2816 rec->bytes_written += off_cpu_write(rec->session); 2817 2818 record__read_lost_samples(rec); 2819 record__synthesize(rec, true); 2820 /* this will be recalculated during process_buildids() */ 2821 rec->samples = 0; 2822 2823 if (!err) { 2824 if (!rec->timestamp_filename) { 2825 record__finish_output(rec); 2826 } else { 2827 fd = record__switch_output(rec, true); 2828 if (fd < 0) { 2829 status = fd; 2830 goto out_delete_session; 2831 } 2832 } 2833 } 2834 2835 perf_hooks__invoke_record_end(); 2836 2837 if (!err && !quiet) { 2838 char samples[128]; 2839 const char *postfix = rec->timestamp_filename ? 2840 ".<timestamp>" : ""; 2841 2842 if (rec->samples && !rec->opts.full_auxtrace) 2843 scnprintf(samples, sizeof(samples), 2844 " (%" PRIu64 " samples)", rec->samples); 2845 else 2846 samples[0] = '\0'; 2847 2848 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2849 perf_data__size(data) / 1024.0 / 1024.0, 2850 data->path, postfix, samples); 2851 if (ratio) { 2852 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2853 rec->session->bytes_transferred / 1024.0 / 1024.0, 2854 ratio); 2855 } 2856 fprintf(stderr, " ]\n"); 2857 } 2858 2859 out_delete_session: 2860 #ifdef HAVE_EVENTFD_SUPPORT 2861 if (done_fd >= 0) { 2862 fd = done_fd; 2863 done_fd = -1; 2864 2865 close(fd); 2866 } 2867 #endif 2868 zstd_fini(&session->zstd_data); 2869 if (!opts->no_bpf_event) 2870 evlist__stop_sb_thread(rec->sb_evlist); 2871 2872 perf_session__delete(session); 2873 return status; 2874 } 2875 2876 static void callchain_debug(struct callchain_param *callchain) 2877 { 2878 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2879 2880 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2881 2882 if (callchain->record_mode == CALLCHAIN_DWARF) 2883 pr_debug("callchain: stack dump size %d\n", 2884 callchain->dump_size); 2885 } 2886 2887 int record_opts__parse_callchain(struct record_opts *record, 2888 struct callchain_param *callchain, 2889 const char *arg, bool unset) 2890 { 2891 int ret; 2892 callchain->enabled = !unset; 2893 2894 /* --no-call-graph */ 2895 if (unset) { 2896 callchain->record_mode = CALLCHAIN_NONE; 2897 pr_debug("callchain: disabled\n"); 2898 return 0; 2899 } 2900 2901 ret = parse_callchain_record_opt(arg, callchain); 2902 if (!ret) { 2903 /* Enable data address sampling for DWARF unwind. */ 2904 if (callchain->record_mode == CALLCHAIN_DWARF) 2905 record->sample_address = true; 2906 callchain_debug(callchain); 2907 } 2908 2909 return ret; 2910 } 2911 2912 int record_parse_callchain_opt(const struct option *opt, 2913 const char *arg, 2914 int unset) 2915 { 2916 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2917 } 2918 2919 int record_callchain_opt(const struct option *opt, 2920 const char *arg __maybe_unused, 2921 int unset __maybe_unused) 2922 { 2923 struct callchain_param *callchain = opt->value; 2924 2925 callchain->enabled = true; 2926 2927 if (callchain->record_mode == CALLCHAIN_NONE) 2928 callchain->record_mode = CALLCHAIN_FP; 2929 2930 callchain_debug(callchain); 2931 return 0; 2932 } 2933 2934 static int perf_record_config(const char *var, const char *value, void *cb) 2935 { 2936 struct record *rec = cb; 2937 2938 if (!strcmp(var, "record.build-id")) { 2939 if (!strcmp(value, "cache")) 2940 rec->no_buildid_cache = false; 2941 else if (!strcmp(value, "no-cache")) 2942 rec->no_buildid_cache = true; 2943 else if (!strcmp(value, "skip")) 2944 rec->no_buildid = true; 2945 else if (!strcmp(value, "mmap")) 2946 rec->buildid_mmap = true; 2947 else 2948 return -1; 2949 return 0; 2950 } 2951 if (!strcmp(var, "record.call-graph")) { 2952 var = "call-graph.record-mode"; 2953 return perf_default_config(var, value, cb); 2954 } 2955 #ifdef HAVE_AIO_SUPPORT 2956 if (!strcmp(var, "record.aio")) { 2957 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2958 if (!rec->opts.nr_cblocks) 2959 rec->opts.nr_cblocks = nr_cblocks_default; 2960 } 2961 #endif 2962 if (!strcmp(var, "record.debuginfod")) { 2963 rec->debuginfod.urls = strdup(value); 2964 if (!rec->debuginfod.urls) 2965 return -ENOMEM; 2966 rec->debuginfod.set = true; 2967 } 2968 2969 return 0; 2970 } 2971 2972 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2973 { 2974 struct record *rec = (struct record *)opt->value; 2975 2976 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2977 } 2978 2979 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2980 { 2981 struct record_opts *opts = (struct record_opts *)opt->value; 2982 2983 if (unset || !str) 2984 return 0; 2985 2986 if (!strcasecmp(str, "node")) 2987 opts->affinity = PERF_AFFINITY_NODE; 2988 else if (!strcasecmp(str, "cpu")) 2989 opts->affinity = PERF_AFFINITY_CPU; 2990 2991 return 0; 2992 } 2993 2994 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2995 { 2996 mask->nbits = nr_bits; 2997 mask->bits = bitmap_zalloc(mask->nbits); 2998 if (!mask->bits) 2999 return -ENOMEM; 3000 3001 return 0; 3002 } 3003 3004 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3005 { 3006 bitmap_free(mask->bits); 3007 mask->nbits = 0; 3008 } 3009 3010 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3011 { 3012 int ret; 3013 3014 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3015 if (ret) { 3016 mask->affinity.bits = NULL; 3017 return ret; 3018 } 3019 3020 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3021 if (ret) { 3022 record__mmap_cpu_mask_free(&mask->maps); 3023 mask->maps.bits = NULL; 3024 } 3025 3026 return ret; 3027 } 3028 3029 static void record__thread_mask_free(struct thread_mask *mask) 3030 { 3031 record__mmap_cpu_mask_free(&mask->maps); 3032 record__mmap_cpu_mask_free(&mask->affinity); 3033 } 3034 3035 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3036 { 3037 int s; 3038 struct record_opts *opts = opt->value; 3039 3040 if (unset || !str || !strlen(str)) { 3041 opts->threads_spec = THREAD_SPEC__CPU; 3042 } else { 3043 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3044 if (s == THREAD_SPEC__USER) { 3045 opts->threads_user_spec = strdup(str); 3046 if (!opts->threads_user_spec) 3047 return -ENOMEM; 3048 opts->threads_spec = THREAD_SPEC__USER; 3049 break; 3050 } 3051 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3052 opts->threads_spec = s; 3053 break; 3054 } 3055 } 3056 } 3057 3058 if (opts->threads_spec == THREAD_SPEC__USER) 3059 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3060 else 3061 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3062 3063 return 0; 3064 } 3065 3066 static int parse_output_max_size(const struct option *opt, 3067 const char *str, int unset) 3068 { 3069 unsigned long *s = (unsigned long *)opt->value; 3070 static struct parse_tag tags_size[] = { 3071 { .tag = 'B', .mult = 1 }, 3072 { .tag = 'K', .mult = 1 << 10 }, 3073 { .tag = 'M', .mult = 1 << 20 }, 3074 { .tag = 'G', .mult = 1 << 30 }, 3075 { .tag = 0 }, 3076 }; 3077 unsigned long val; 3078 3079 if (unset) { 3080 *s = 0; 3081 return 0; 3082 } 3083 3084 val = parse_tag_value(str, tags_size); 3085 if (val != (unsigned long) -1) { 3086 *s = val; 3087 return 0; 3088 } 3089 3090 return -1; 3091 } 3092 3093 static int record__parse_mmap_pages(const struct option *opt, 3094 const char *str, 3095 int unset __maybe_unused) 3096 { 3097 struct record_opts *opts = opt->value; 3098 char *s, *p; 3099 unsigned int mmap_pages; 3100 int ret; 3101 3102 if (!str) 3103 return -EINVAL; 3104 3105 s = strdup(str); 3106 if (!s) 3107 return -ENOMEM; 3108 3109 p = strchr(s, ','); 3110 if (p) 3111 *p = '\0'; 3112 3113 if (*s) { 3114 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3115 if (ret) 3116 goto out_free; 3117 opts->mmap_pages = mmap_pages; 3118 } 3119 3120 if (!p) { 3121 ret = 0; 3122 goto out_free; 3123 } 3124 3125 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3126 if (ret) 3127 goto out_free; 3128 3129 opts->auxtrace_mmap_pages = mmap_pages; 3130 3131 out_free: 3132 free(s); 3133 return ret; 3134 } 3135 3136 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3137 { 3138 } 3139 3140 static int parse_control_option(const struct option *opt, 3141 const char *str, 3142 int unset __maybe_unused) 3143 { 3144 struct record_opts *opts = opt->value; 3145 3146 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3147 } 3148 3149 static void switch_output_size_warn(struct record *rec) 3150 { 3151 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3152 struct switch_output *s = &rec->switch_output; 3153 3154 wakeup_size /= 2; 3155 3156 if (s->size < wakeup_size) { 3157 char buf[100]; 3158 3159 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3160 pr_warning("WARNING: switch-output data size lower than " 3161 "wakeup kernel buffer size (%s) " 3162 "expect bigger perf.data sizes\n", buf); 3163 } 3164 } 3165 3166 static int switch_output_setup(struct record *rec) 3167 { 3168 struct switch_output *s = &rec->switch_output; 3169 static struct parse_tag tags_size[] = { 3170 { .tag = 'B', .mult = 1 }, 3171 { .tag = 'K', .mult = 1 << 10 }, 3172 { .tag = 'M', .mult = 1 << 20 }, 3173 { .tag = 'G', .mult = 1 << 30 }, 3174 { .tag = 0 }, 3175 }; 3176 static struct parse_tag tags_time[] = { 3177 { .tag = 's', .mult = 1 }, 3178 { .tag = 'm', .mult = 60 }, 3179 { .tag = 'h', .mult = 60*60 }, 3180 { .tag = 'd', .mult = 60*60*24 }, 3181 { .tag = 0 }, 3182 }; 3183 unsigned long val; 3184 3185 /* 3186 * If we're using --switch-output-events, then we imply its 3187 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3188 * thread to its parent. 3189 */ 3190 if (rec->switch_output_event_set) { 3191 if (record__threads_enabled(rec)) { 3192 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3193 return 0; 3194 } 3195 goto do_signal; 3196 } 3197 3198 if (!s->set) 3199 return 0; 3200 3201 if (record__threads_enabled(rec)) { 3202 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3203 return 0; 3204 } 3205 3206 if (!strcmp(s->str, "signal")) { 3207 do_signal: 3208 s->signal = true; 3209 pr_debug("switch-output with SIGUSR2 signal\n"); 3210 goto enabled; 3211 } 3212 3213 val = parse_tag_value(s->str, tags_size); 3214 if (val != (unsigned long) -1) { 3215 s->size = val; 3216 pr_debug("switch-output with %s size threshold\n", s->str); 3217 goto enabled; 3218 } 3219 3220 val = parse_tag_value(s->str, tags_time); 3221 if (val != (unsigned long) -1) { 3222 s->time = val; 3223 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3224 s->str, s->time); 3225 goto enabled; 3226 } 3227 3228 return -1; 3229 3230 enabled: 3231 rec->timestamp_filename = true; 3232 s->enabled = true; 3233 3234 if (s->size && !rec->opts.no_buffering) 3235 switch_output_size_warn(rec); 3236 3237 return 0; 3238 } 3239 3240 static const char * const __record_usage[] = { 3241 "perf record [<options>] [<command>]", 3242 "perf record [<options>] -- <command> [<options>]", 3243 NULL 3244 }; 3245 const char * const *record_usage = __record_usage; 3246 3247 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3248 struct perf_sample *sample, struct machine *machine) 3249 { 3250 /* 3251 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3252 * no need to add them twice. 3253 */ 3254 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3255 return 0; 3256 return perf_event__process_mmap(tool, event, sample, machine); 3257 } 3258 3259 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3260 struct perf_sample *sample, struct machine *machine) 3261 { 3262 /* 3263 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3264 * no need to add them twice. 3265 */ 3266 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3267 return 0; 3268 3269 return perf_event__process_mmap2(tool, event, sample, machine); 3270 } 3271 3272 static int process_timestamp_boundary(struct perf_tool *tool, 3273 union perf_event *event __maybe_unused, 3274 struct perf_sample *sample, 3275 struct machine *machine __maybe_unused) 3276 { 3277 struct record *rec = container_of(tool, struct record, tool); 3278 3279 set_timestamp_boundary(rec, sample->time); 3280 return 0; 3281 } 3282 3283 static int parse_record_synth_option(const struct option *opt, 3284 const char *str, 3285 int unset __maybe_unused) 3286 { 3287 struct record_opts *opts = opt->value; 3288 char *p = strdup(str); 3289 3290 if (p == NULL) 3291 return -1; 3292 3293 opts->synth = parse_synth_opt(p); 3294 free(p); 3295 3296 if (opts->synth < 0) { 3297 pr_err("Invalid synth option: %s\n", str); 3298 return -1; 3299 } 3300 return 0; 3301 } 3302 3303 /* 3304 * XXX Ideally would be local to cmd_record() and passed to a record__new 3305 * because we need to have access to it in record__exit, that is called 3306 * after cmd_record() exits, but since record_options need to be accessible to 3307 * builtin-script, leave it here. 3308 * 3309 * At least we don't ouch it in all the other functions here directly. 3310 * 3311 * Just say no to tons of global variables, sigh. 3312 */ 3313 static struct record record = { 3314 .opts = { 3315 .sample_time = true, 3316 .mmap_pages = UINT_MAX, 3317 .user_freq = UINT_MAX, 3318 .user_interval = ULLONG_MAX, 3319 .freq = 4000, 3320 .target = { 3321 .uses_mmap = true, 3322 .default_per_cpu = true, 3323 }, 3324 .mmap_flush = MMAP_FLUSH_DEFAULT, 3325 .nr_threads_synthesize = 1, 3326 .ctl_fd = -1, 3327 .ctl_fd_ack = -1, 3328 .synth = PERF_SYNTH_ALL, 3329 }, 3330 .tool = { 3331 .sample = process_sample_event, 3332 .fork = perf_event__process_fork, 3333 .exit = perf_event__process_exit, 3334 .comm = perf_event__process_comm, 3335 .namespaces = perf_event__process_namespaces, 3336 .mmap = build_id__process_mmap, 3337 .mmap2 = build_id__process_mmap2, 3338 .itrace_start = process_timestamp_boundary, 3339 .aux = process_timestamp_boundary, 3340 .ordered_events = true, 3341 }, 3342 }; 3343 3344 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3345 "\n\t\t\t\tDefault: fp"; 3346 3347 static bool dry_run; 3348 3349 static struct parse_events_option_args parse_events_option_args = { 3350 .evlistp = &record.evlist, 3351 }; 3352 3353 static struct parse_events_option_args switch_output_parse_events_option_args = { 3354 .evlistp = &record.sb_evlist, 3355 }; 3356 3357 /* 3358 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3359 * with it and switch to use the library functions in perf_evlist that came 3360 * from builtin-record.c, i.e. use record_opts, 3361 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3362 * using pipes, etc. 3363 */ 3364 static struct option __record_options[] = { 3365 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3366 "event selector. use 'perf list' to list available events", 3367 parse_events_option), 3368 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3369 "event filter", parse_filter), 3370 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3371 NULL, "don't record events from perf itself", 3372 exclude_perf), 3373 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3374 "record events on existing process id"), 3375 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3376 "record events on existing thread id"), 3377 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3378 "collect data with this RT SCHED_FIFO priority"), 3379 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3380 "collect data without buffering"), 3381 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3382 "collect raw sample records from all opened counters"), 3383 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3384 "system-wide collection from all CPUs"), 3385 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3386 "list of cpus to monitor"), 3387 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3388 OPT_STRING('o', "output", &record.data.path, "file", 3389 "output file name"), 3390 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3391 &record.opts.no_inherit_set, 3392 "child tasks do not inherit counters"), 3393 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3394 "synthesize non-sample events at the end of output"), 3395 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3396 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3397 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3398 "Fail if the specified frequency can't be used"), 3399 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3400 "profile at this frequency", 3401 record__parse_freq), 3402 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3403 "number of mmap data pages and AUX area tracing mmap pages", 3404 record__parse_mmap_pages), 3405 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3406 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3407 record__mmap_flush_parse), 3408 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3409 NULL, "enables call-graph recording" , 3410 &record_callchain_opt), 3411 OPT_CALLBACK(0, "call-graph", &record.opts, 3412 "record_mode[,record_size]", record_callchain_help, 3413 &record_parse_callchain_opt), 3414 OPT_INCR('v', "verbose", &verbose, 3415 "be more verbose (show counter open errors, etc)"), 3416 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3417 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3418 "per thread counts"), 3419 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3420 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3421 "Record the sample physical addresses"), 3422 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3423 "Record the sampled data address data page size"), 3424 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3425 "Record the sampled code address (ip) page size"), 3426 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3427 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3428 "Record the sample identifier"), 3429 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3430 &record.opts.sample_time_set, 3431 "Record the sample timestamps"), 3432 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3433 "Record the sample period"), 3434 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3435 "don't sample"), 3436 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3437 &record.no_buildid_cache_set, 3438 "do not update the buildid cache"), 3439 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3440 &record.no_buildid_set, 3441 "do not collect buildids in perf.data"), 3442 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3443 "monitor event in cgroup name only", 3444 parse_cgroups), 3445 OPT_CALLBACK('D', "delay", &record, "ms", 3446 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3447 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3448 record__parse_event_enable_time), 3449 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3450 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3451 "user to profile"), 3452 3453 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3454 "branch any", "sample any taken branches", 3455 parse_branch_stack), 3456 3457 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3458 "branch filter mask", "branch stack filter modes", 3459 parse_branch_stack), 3460 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3461 "sample by weight (on special events only)"), 3462 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3463 "sample transaction flags (special events only)"), 3464 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3465 "use per-thread mmaps"), 3466 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3467 "sample selected machine registers on interrupt," 3468 " use '-I?' to list register names", parse_intr_regs), 3469 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3470 "sample selected machine registers on interrupt," 3471 " use '--user-regs=?' to list register names", parse_user_regs), 3472 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3473 "Record running/enabled time of read (:S) events"), 3474 OPT_CALLBACK('k', "clockid", &record.opts, 3475 "clockid", "clockid to use for events, see clock_gettime()", 3476 parse_clockid), 3477 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3478 "opts", "AUX area tracing Snapshot Mode", ""), 3479 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3480 "opts", "sample AUX area", ""), 3481 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3482 "per thread proc mmap processing timeout in ms"), 3483 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3484 "Record namespaces events"), 3485 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3486 "Record cgroup events"), 3487 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3488 &record.opts.record_switch_events_set, 3489 "Record context switch events"), 3490 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3491 "Configure all used events to run in kernel space.", 3492 PARSE_OPT_EXCLUSIVE), 3493 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3494 "Configure all used events to run in user space.", 3495 PARSE_OPT_EXCLUSIVE), 3496 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3497 "collect kernel callchains"), 3498 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3499 "collect user callchains"), 3500 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3501 "file", "vmlinux pathname"), 3502 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3503 "Record build-id of all DSOs regardless of hits"), 3504 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3505 "Record build-id in map events"), 3506 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3507 "append timestamp to output filename"), 3508 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3509 "Record timestamp boundary (time of first/last samples)"), 3510 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3511 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3512 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3513 "signal"), 3514 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3515 &record.switch_output_event_set, "switch output event", 3516 "switch output event selector. use 'perf list' to list available events", 3517 parse_events_option_new_evlist), 3518 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3519 "Limit number of switch output generated files"), 3520 OPT_BOOLEAN(0, "dry-run", &dry_run, 3521 "Parse options then exit"), 3522 #ifdef HAVE_AIO_SUPPORT 3523 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3524 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3525 record__aio_parse), 3526 #endif 3527 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3528 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3529 record__parse_affinity), 3530 #ifdef HAVE_ZSTD_SUPPORT 3531 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3532 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3533 record__parse_comp_level), 3534 #endif 3535 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3536 "size", "Limit the maximum size of the output file", parse_output_max_size), 3537 OPT_UINTEGER(0, "num-thread-synthesize", 3538 &record.opts.nr_threads_synthesize, 3539 "number of threads to run for event synthesis"), 3540 #ifdef HAVE_LIBPFM 3541 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3542 "libpfm4 event selector. use 'perf list' to list available events", 3543 parse_libpfm_events_option), 3544 #endif 3545 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3546 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3547 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3548 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3549 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3550 parse_control_option), 3551 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3552 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3553 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3554 &record.debuginfod.set, "debuginfod urls", 3555 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3556 "system"), 3557 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3558 "write collected trace data into several data files using parallel threads", 3559 record__parse_threads), 3560 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3561 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3562 "BPF filter action"), 3563 OPT_END() 3564 }; 3565 3566 struct option *record_options = __record_options; 3567 3568 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3569 { 3570 struct perf_cpu cpu; 3571 int idx; 3572 3573 if (cpu_map__is_dummy(cpus)) 3574 return 0; 3575 3576 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3577 /* Return ENODEV is input cpu is greater than max cpu */ 3578 if ((unsigned long)cpu.cpu > mask->nbits) 3579 return -ENODEV; 3580 __set_bit(cpu.cpu, mask->bits); 3581 } 3582 3583 return 0; 3584 } 3585 3586 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3587 { 3588 struct perf_cpu_map *cpus; 3589 3590 cpus = perf_cpu_map__new(mask_spec); 3591 if (!cpus) 3592 return -ENOMEM; 3593 3594 bitmap_zero(mask->bits, mask->nbits); 3595 if (record__mmap_cpu_mask_init(mask, cpus)) 3596 return -ENODEV; 3597 3598 perf_cpu_map__put(cpus); 3599 3600 return 0; 3601 } 3602 3603 static void record__free_thread_masks(struct record *rec, int nr_threads) 3604 { 3605 int t; 3606 3607 if (rec->thread_masks) 3608 for (t = 0; t < nr_threads; t++) 3609 record__thread_mask_free(&rec->thread_masks[t]); 3610 3611 zfree(&rec->thread_masks); 3612 } 3613 3614 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3615 { 3616 int t, ret; 3617 3618 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3619 if (!rec->thread_masks) { 3620 pr_err("Failed to allocate thread masks\n"); 3621 return -ENOMEM; 3622 } 3623 3624 for (t = 0; t < nr_threads; t++) { 3625 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3626 if (ret) { 3627 pr_err("Failed to allocate thread masks[%d]\n", t); 3628 goto out_free; 3629 } 3630 } 3631 3632 return 0; 3633 3634 out_free: 3635 record__free_thread_masks(rec, nr_threads); 3636 3637 return ret; 3638 } 3639 3640 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3641 { 3642 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3643 3644 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3645 if (ret) 3646 return ret; 3647 3648 rec->nr_threads = nr_cpus; 3649 pr_debug("nr_threads: %d\n", rec->nr_threads); 3650 3651 for (t = 0; t < rec->nr_threads; t++) { 3652 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3653 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3654 if (verbose > 0) { 3655 pr_debug("thread_masks[%d]: ", t); 3656 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3657 pr_debug("thread_masks[%d]: ", t); 3658 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3659 } 3660 } 3661 3662 return 0; 3663 } 3664 3665 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3666 const char **maps_spec, const char **affinity_spec, 3667 u32 nr_spec) 3668 { 3669 u32 s; 3670 int ret = 0, t = 0; 3671 struct mmap_cpu_mask cpus_mask; 3672 struct thread_mask thread_mask, full_mask, *thread_masks; 3673 3674 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3675 if (ret) { 3676 pr_err("Failed to allocate CPUs mask\n"); 3677 return ret; 3678 } 3679 3680 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3681 if (ret) { 3682 pr_err("Failed to init cpu mask\n"); 3683 goto out_free_cpu_mask; 3684 } 3685 3686 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3687 if (ret) { 3688 pr_err("Failed to allocate full mask\n"); 3689 goto out_free_cpu_mask; 3690 } 3691 3692 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3693 if (ret) { 3694 pr_err("Failed to allocate thread mask\n"); 3695 goto out_free_full_and_cpu_masks; 3696 } 3697 3698 for (s = 0; s < nr_spec; s++) { 3699 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3700 if (ret) { 3701 pr_err("Failed to initialize maps thread mask\n"); 3702 goto out_free; 3703 } 3704 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3705 if (ret) { 3706 pr_err("Failed to initialize affinity thread mask\n"); 3707 goto out_free; 3708 } 3709 3710 /* ignore invalid CPUs but do not allow empty masks */ 3711 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3712 cpus_mask.bits, thread_mask.maps.nbits)) { 3713 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3714 ret = -EINVAL; 3715 goto out_free; 3716 } 3717 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3718 cpus_mask.bits, thread_mask.affinity.nbits)) { 3719 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3720 ret = -EINVAL; 3721 goto out_free; 3722 } 3723 3724 /* do not allow intersection with other masks (full_mask) */ 3725 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3726 thread_mask.maps.nbits)) { 3727 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3728 ret = -EINVAL; 3729 goto out_free; 3730 } 3731 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3732 thread_mask.affinity.nbits)) { 3733 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3734 ret = -EINVAL; 3735 goto out_free; 3736 } 3737 3738 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3739 thread_mask.maps.bits, full_mask.maps.nbits); 3740 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3741 thread_mask.affinity.bits, full_mask.maps.nbits); 3742 3743 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3744 if (!thread_masks) { 3745 pr_err("Failed to reallocate thread masks\n"); 3746 ret = -ENOMEM; 3747 goto out_free; 3748 } 3749 rec->thread_masks = thread_masks; 3750 rec->thread_masks[t] = thread_mask; 3751 if (verbose > 0) { 3752 pr_debug("thread_masks[%d]: ", t); 3753 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3754 pr_debug("thread_masks[%d]: ", t); 3755 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3756 } 3757 t++; 3758 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3759 if (ret) { 3760 pr_err("Failed to allocate thread mask\n"); 3761 goto out_free_full_and_cpu_masks; 3762 } 3763 } 3764 rec->nr_threads = t; 3765 pr_debug("nr_threads: %d\n", rec->nr_threads); 3766 if (!rec->nr_threads) 3767 ret = -EINVAL; 3768 3769 out_free: 3770 record__thread_mask_free(&thread_mask); 3771 out_free_full_and_cpu_masks: 3772 record__thread_mask_free(&full_mask); 3773 out_free_cpu_mask: 3774 record__mmap_cpu_mask_free(&cpus_mask); 3775 3776 return ret; 3777 } 3778 3779 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3780 { 3781 int ret; 3782 struct cpu_topology *topo; 3783 3784 topo = cpu_topology__new(); 3785 if (!topo) { 3786 pr_err("Failed to allocate CPU topology\n"); 3787 return -ENOMEM; 3788 } 3789 3790 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3791 topo->core_cpus_list, topo->core_cpus_lists); 3792 cpu_topology__delete(topo); 3793 3794 return ret; 3795 } 3796 3797 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3798 { 3799 int ret; 3800 struct cpu_topology *topo; 3801 3802 topo = cpu_topology__new(); 3803 if (!topo) { 3804 pr_err("Failed to allocate CPU topology\n"); 3805 return -ENOMEM; 3806 } 3807 3808 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3809 topo->package_cpus_list, topo->package_cpus_lists); 3810 cpu_topology__delete(topo); 3811 3812 return ret; 3813 } 3814 3815 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3816 { 3817 u32 s; 3818 int ret; 3819 const char **spec; 3820 struct numa_topology *topo; 3821 3822 topo = numa_topology__new(); 3823 if (!topo) { 3824 pr_err("Failed to allocate NUMA topology\n"); 3825 return -ENOMEM; 3826 } 3827 3828 spec = zalloc(topo->nr * sizeof(char *)); 3829 if (!spec) { 3830 pr_err("Failed to allocate NUMA spec\n"); 3831 ret = -ENOMEM; 3832 goto out_delete_topo; 3833 } 3834 for (s = 0; s < topo->nr; s++) 3835 spec[s] = topo->nodes[s].cpus; 3836 3837 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3838 3839 zfree(&spec); 3840 3841 out_delete_topo: 3842 numa_topology__delete(topo); 3843 3844 return ret; 3845 } 3846 3847 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3848 { 3849 int t, ret; 3850 u32 s, nr_spec = 0; 3851 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3852 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3853 3854 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3855 spec = strtok_r(user_spec, ":", &spec_ptr); 3856 if (spec == NULL) 3857 break; 3858 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3859 mask = strtok_r(spec, "/", &mask_ptr); 3860 if (mask == NULL) 3861 break; 3862 pr_debug2(" maps mask: %s\n", mask); 3863 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3864 if (!tmp_spec) { 3865 pr_err("Failed to reallocate maps spec\n"); 3866 ret = -ENOMEM; 3867 goto out_free; 3868 } 3869 maps_spec = tmp_spec; 3870 maps_spec[nr_spec] = dup_mask = strdup(mask); 3871 if (!maps_spec[nr_spec]) { 3872 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3873 ret = -ENOMEM; 3874 goto out_free; 3875 } 3876 mask = strtok_r(NULL, "/", &mask_ptr); 3877 if (mask == NULL) { 3878 pr_err("Invalid thread maps or affinity specs\n"); 3879 ret = -EINVAL; 3880 goto out_free; 3881 } 3882 pr_debug2(" affinity mask: %s\n", mask); 3883 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3884 if (!tmp_spec) { 3885 pr_err("Failed to reallocate affinity spec\n"); 3886 ret = -ENOMEM; 3887 goto out_free; 3888 } 3889 affinity_spec = tmp_spec; 3890 affinity_spec[nr_spec] = strdup(mask); 3891 if (!affinity_spec[nr_spec]) { 3892 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3893 ret = -ENOMEM; 3894 goto out_free; 3895 } 3896 dup_mask = NULL; 3897 nr_spec++; 3898 } 3899 3900 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3901 (const char **)affinity_spec, nr_spec); 3902 3903 out_free: 3904 free(dup_mask); 3905 for (s = 0; s < nr_spec; s++) { 3906 if (maps_spec) 3907 free(maps_spec[s]); 3908 if (affinity_spec) 3909 free(affinity_spec[s]); 3910 } 3911 free(affinity_spec); 3912 free(maps_spec); 3913 3914 return ret; 3915 } 3916 3917 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3918 { 3919 int ret; 3920 3921 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3922 if (ret) 3923 return ret; 3924 3925 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3926 return -ENODEV; 3927 3928 rec->nr_threads = 1; 3929 3930 return 0; 3931 } 3932 3933 static int record__init_thread_masks(struct record *rec) 3934 { 3935 int ret = 0; 3936 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3937 3938 if (!record__threads_enabled(rec)) 3939 return record__init_thread_default_masks(rec, cpus); 3940 3941 if (evlist__per_thread(rec->evlist)) { 3942 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3943 return -EINVAL; 3944 } 3945 3946 switch (rec->opts.threads_spec) { 3947 case THREAD_SPEC__CPU: 3948 ret = record__init_thread_cpu_masks(rec, cpus); 3949 break; 3950 case THREAD_SPEC__CORE: 3951 ret = record__init_thread_core_masks(rec, cpus); 3952 break; 3953 case THREAD_SPEC__PACKAGE: 3954 ret = record__init_thread_package_masks(rec, cpus); 3955 break; 3956 case THREAD_SPEC__NUMA: 3957 ret = record__init_thread_numa_masks(rec, cpus); 3958 break; 3959 case THREAD_SPEC__USER: 3960 ret = record__init_thread_user_masks(rec, cpus); 3961 break; 3962 default: 3963 break; 3964 } 3965 3966 return ret; 3967 } 3968 3969 int cmd_record(int argc, const char **argv) 3970 { 3971 int err; 3972 struct record *rec = &record; 3973 char errbuf[BUFSIZ]; 3974 3975 setlocale(LC_ALL, ""); 3976 3977 #ifndef HAVE_BPF_SKEL 3978 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3979 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3980 # undef set_nobuild 3981 #endif 3982 3983 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 3984 symbol_conf.lazy_load_kernel_maps = true; 3985 rec->opts.affinity = PERF_AFFINITY_SYS; 3986 3987 rec->evlist = evlist__new(); 3988 if (rec->evlist == NULL) 3989 return -ENOMEM; 3990 3991 err = perf_config(perf_record_config, rec); 3992 if (err) 3993 return err; 3994 3995 argc = parse_options(argc, argv, record_options, record_usage, 3996 PARSE_OPT_STOP_AT_NON_OPTION); 3997 if (quiet) 3998 perf_quiet_option(); 3999 4000 err = symbol__validate_sym_arguments(); 4001 if (err) 4002 return err; 4003 4004 perf_debuginfod_setup(&record.debuginfod); 4005 4006 /* Make system wide (-a) the default target. */ 4007 if (!argc && target__none(&rec->opts.target)) 4008 rec->opts.target.system_wide = true; 4009 4010 if (nr_cgroups && !rec->opts.target.system_wide) { 4011 usage_with_options_msg(record_usage, record_options, 4012 "cgroup monitoring only available in system-wide mode"); 4013 4014 } 4015 4016 if (rec->buildid_mmap) { 4017 if (!perf_can_record_build_id()) { 4018 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4019 err = -EINVAL; 4020 goto out_opts; 4021 } 4022 pr_debug("Enabling build id in mmap2 events.\n"); 4023 /* Enable mmap build id synthesizing. */ 4024 symbol_conf.buildid_mmap2 = true; 4025 /* Enable perf_event_attr::build_id bit. */ 4026 rec->opts.build_id = true; 4027 /* Disable build id cache. */ 4028 rec->no_buildid = true; 4029 } 4030 4031 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4032 pr_err("Kernel has no cgroup sampling support.\n"); 4033 err = -EINVAL; 4034 goto out_opts; 4035 } 4036 4037 if (rec->opts.kcore) 4038 rec->opts.text_poke = true; 4039 4040 if (rec->opts.kcore || record__threads_enabled(rec)) 4041 rec->data.is_dir = true; 4042 4043 if (record__threads_enabled(rec)) { 4044 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4045 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4046 goto out_opts; 4047 } 4048 if (record__aio_enabled(rec)) { 4049 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4050 goto out_opts; 4051 } 4052 } 4053 4054 if (rec->opts.comp_level != 0) { 4055 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4056 rec->no_buildid = true; 4057 } 4058 4059 if (rec->opts.record_switch_events && 4060 !perf_can_record_switch_events()) { 4061 ui__error("kernel does not support recording context switch events\n"); 4062 parse_options_usage(record_usage, record_options, "switch-events", 0); 4063 err = -EINVAL; 4064 goto out_opts; 4065 } 4066 4067 if (switch_output_setup(rec)) { 4068 parse_options_usage(record_usage, record_options, "switch-output", 0); 4069 err = -EINVAL; 4070 goto out_opts; 4071 } 4072 4073 if (rec->switch_output.time) { 4074 signal(SIGALRM, alarm_sig_handler); 4075 alarm(rec->switch_output.time); 4076 } 4077 4078 if (rec->switch_output.num_files) { 4079 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4080 sizeof(char *)); 4081 if (!rec->switch_output.filenames) { 4082 err = -EINVAL; 4083 goto out_opts; 4084 } 4085 } 4086 4087 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4088 rec->timestamp_filename = false; 4089 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4090 } 4091 4092 if (rec->filter_action) { 4093 if (!strcmp(rec->filter_action, "pin")) 4094 err = perf_bpf_filter__pin(); 4095 else if (!strcmp(rec->filter_action, "unpin")) 4096 err = perf_bpf_filter__unpin(); 4097 else { 4098 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4099 err = -EINVAL; 4100 } 4101 goto out_opts; 4102 } 4103 4104 /* 4105 * Allow aliases to facilitate the lookup of symbols for address 4106 * filters. Refer to auxtrace_parse_filters(). 4107 */ 4108 symbol_conf.allow_aliases = true; 4109 4110 symbol__init(NULL); 4111 4112 err = record__auxtrace_init(rec); 4113 if (err) 4114 goto out; 4115 4116 if (dry_run) 4117 goto out; 4118 4119 err = -ENOMEM; 4120 4121 if (rec->no_buildid_cache || rec->no_buildid) { 4122 disable_buildid_cache(); 4123 } else if (rec->switch_output.enabled) { 4124 /* 4125 * In 'perf record --switch-output', disable buildid 4126 * generation by default to reduce data file switching 4127 * overhead. Still generate buildid if they are required 4128 * explicitly using 4129 * 4130 * perf record --switch-output --no-no-buildid \ 4131 * --no-no-buildid-cache 4132 * 4133 * Following code equals to: 4134 * 4135 * if ((rec->no_buildid || !rec->no_buildid_set) && 4136 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4137 * disable_buildid_cache(); 4138 */ 4139 bool disable = true; 4140 4141 if (rec->no_buildid_set && !rec->no_buildid) 4142 disable = false; 4143 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4144 disable = false; 4145 if (disable) { 4146 rec->no_buildid = true; 4147 rec->no_buildid_cache = true; 4148 disable_buildid_cache(); 4149 } 4150 } 4151 4152 if (record.opts.overwrite) 4153 record.opts.tail_synthesize = true; 4154 4155 if (rec->evlist->core.nr_entries == 0) { 4156 bool can_profile_kernel = perf_event_paranoid_check(1); 4157 4158 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); 4159 if (err) 4160 goto out; 4161 } 4162 4163 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4164 rec->opts.no_inherit = true; 4165 4166 err = target__validate(&rec->opts.target); 4167 if (err) { 4168 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4169 ui__warning("%s\n", errbuf); 4170 } 4171 4172 err = target__parse_uid(&rec->opts.target); 4173 if (err) { 4174 int saved_errno = errno; 4175 4176 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4177 ui__error("%s", errbuf); 4178 4179 err = -saved_errno; 4180 goto out; 4181 } 4182 4183 /* Enable ignoring missing threads when -u/-p option is defined. */ 4184 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4185 4186 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4187 4188 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4189 arch__add_leaf_frame_record_opts(&rec->opts); 4190 4191 err = -ENOMEM; 4192 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4193 if (rec->opts.target.pid != NULL) { 4194 pr_err("Couldn't create thread/CPU maps: %s\n", 4195 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4196 goto out; 4197 } 4198 else 4199 usage_with_options(record_usage, record_options); 4200 } 4201 4202 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4203 if (err) 4204 goto out; 4205 4206 /* 4207 * We take all buildids when the file contains 4208 * AUX area tracing data because we do not decode the 4209 * trace because it would take too long. 4210 */ 4211 if (rec->opts.full_auxtrace) 4212 rec->buildid_all = true; 4213 4214 if (rec->opts.text_poke) { 4215 err = record__config_text_poke(rec->evlist); 4216 if (err) { 4217 pr_err("record__config_text_poke failed, error %d\n", err); 4218 goto out; 4219 } 4220 } 4221 4222 if (rec->off_cpu) { 4223 err = record__config_off_cpu(rec); 4224 if (err) { 4225 pr_err("record__config_off_cpu failed, error %d\n", err); 4226 goto out; 4227 } 4228 } 4229 4230 if (record_opts__config(&rec->opts)) { 4231 err = -EINVAL; 4232 goto out; 4233 } 4234 4235 err = record__config_tracking_events(rec); 4236 if (err) { 4237 pr_err("record__config_tracking_events failed, error %d\n", err); 4238 goto out; 4239 } 4240 4241 err = record__init_thread_masks(rec); 4242 if (err) { 4243 pr_err("Failed to initialize parallel data streaming masks\n"); 4244 goto out; 4245 } 4246 4247 if (rec->opts.nr_cblocks > nr_cblocks_max) 4248 rec->opts.nr_cblocks = nr_cblocks_max; 4249 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4250 4251 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4252 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4253 4254 if (rec->opts.comp_level > comp_level_max) 4255 rec->opts.comp_level = comp_level_max; 4256 pr_debug("comp level: %d\n", rec->opts.comp_level); 4257 4258 err = __cmd_record(&record, argc, argv); 4259 out: 4260 record__free_thread_masks(rec, rec->nr_threads); 4261 rec->nr_threads = 0; 4262 symbol__exit(); 4263 auxtrace_record__free(rec->itr); 4264 out_opts: 4265 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4266 evlist__delete(rec->evlist); 4267 return err; 4268 } 4269 4270 static void snapshot_sig_handler(int sig __maybe_unused) 4271 { 4272 struct record *rec = &record; 4273 4274 hit_auxtrace_snapshot_trigger(rec); 4275 4276 if (switch_output_signal(rec)) 4277 trigger_hit(&switch_output_trigger); 4278 } 4279 4280 static void alarm_sig_handler(int sig __maybe_unused) 4281 { 4282 struct record *rec = &record; 4283 4284 if (switch_output_time(rec)) 4285 trigger_hit(&switch_output_trigger); 4286 } 4287