1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/arm64-frame-pointer-unwind-support.h" 18 #include "util/callchain.h" 19 #include "util/cgroup.h" 20 #include "util/header.h" 21 #include "util/event.h" 22 #include "util/evlist.h" 23 #include "util/evsel.h" 24 #include "util/debug.h" 25 #include "util/mmap.h" 26 #include "util/mutex.h" 27 #include "util/target.h" 28 #include "util/session.h" 29 #include "util/tool.h" 30 #include "util/stat.h" 31 #include "util/symbol.h" 32 #include "util/record.h" 33 #include "util/cpumap.h" 34 #include "util/thread_map.h" 35 #include "util/data.h" 36 #include "util/perf_regs.h" 37 #include "util/auxtrace.h" 38 #include "util/tsc.h" 39 #include "util/parse-branch-options.h" 40 #include "util/parse-regs-options.h" 41 #include "util/perf_api_probe.h" 42 #include "util/trigger.h" 43 #include "util/perf-hooks.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/pmu.h" 51 #include "util/pmus.h" 52 #include "util/clockid.h" 53 #include "util/off_cpu.h" 54 #include "util/bpf-filter.h" 55 #include "util/strbuf.h" 56 #include "asm/bug.h" 57 #include "perf.h" 58 #include "cputopo.h" 59 #include "dwarf-regs.h" 60 61 #include <errno.h> 62 #include <inttypes.h> 63 #include <locale.h> 64 #include <poll.h> 65 #include <pthread.h> 66 #include <unistd.h> 67 #ifndef HAVE_GETTID 68 #include <syscall.h> 69 #endif 70 #include <sched.h> 71 #include <signal.h> 72 #ifdef HAVE_EVENTFD_SUPPORT 73 #include <sys/eventfd.h> 74 #endif 75 #include <sys/mman.h> 76 #include <sys/wait.h> 77 #include <sys/types.h> 78 #include <sys/stat.h> 79 #include <fcntl.h> 80 #include <linux/err.h> 81 #include <linux/string.h> 82 #include <linux/time64.h> 83 #include <linux/zalloc.h> 84 #include <linux/bitmap.h> 85 #include <sys/time.h> 86 87 struct switch_output { 88 bool enabled; 89 bool signal; 90 unsigned long size; 91 unsigned long time; 92 const char *str; 93 bool set; 94 char **filenames; 95 int num_files; 96 int cur_file; 97 }; 98 99 struct thread_mask { 100 struct mmap_cpu_mask maps; 101 struct mmap_cpu_mask affinity; 102 }; 103 104 struct record_thread { 105 pid_t tid; 106 struct thread_mask *mask; 107 struct { 108 int msg[2]; 109 int ack[2]; 110 } pipes; 111 struct fdarray pollfd; 112 int ctlfd_pos; 113 int nr_mmaps; 114 struct mmap **maps; 115 struct mmap **overwrite_maps; 116 struct record *rec; 117 unsigned long long samples; 118 unsigned long waking; 119 u64 bytes_written; 120 u64 bytes_transferred; 121 u64 bytes_compressed; 122 }; 123 124 static __thread struct record_thread *thread; 125 126 enum thread_msg { 127 THREAD_MSG__UNDEFINED = 0, 128 THREAD_MSG__READY, 129 THREAD_MSG__MAX, 130 }; 131 132 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 133 "UNDEFINED", "READY" 134 }; 135 136 enum thread_spec { 137 THREAD_SPEC__UNDEFINED = 0, 138 THREAD_SPEC__CPU, 139 THREAD_SPEC__CORE, 140 THREAD_SPEC__PACKAGE, 141 THREAD_SPEC__NUMA, 142 THREAD_SPEC__USER, 143 THREAD_SPEC__MAX, 144 }; 145 146 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 147 "undefined", "cpu", "core", "package", "numa", "user" 148 }; 149 150 struct pollfd_index_map { 151 int evlist_pollfd_index; 152 int thread_pollfd_index; 153 }; 154 155 struct record { 156 struct perf_tool tool; 157 struct record_opts opts; 158 u64 bytes_written; 159 u64 thread_bytes_written; 160 struct perf_data data; 161 struct auxtrace_record *itr; 162 struct evlist *evlist; 163 struct perf_session *session; 164 struct evlist *sb_evlist; 165 pthread_t thread_id; 166 int realtime_prio; 167 bool latency; 168 bool switch_output_event_set; 169 bool no_buildid; 170 bool no_buildid_set; 171 bool no_buildid_cache; 172 bool no_buildid_cache_set; 173 bool buildid_all; 174 bool buildid_mmap; 175 bool buildid_mmap_set; 176 bool timestamp_filename; 177 bool timestamp_boundary; 178 bool off_cpu; 179 const char *filter_action; 180 const char *uid_str; 181 struct switch_output switch_output; 182 unsigned long long samples; 183 unsigned long output_max_size; /* = 0: unlimited */ 184 struct perf_debuginfod debuginfod; 185 int nr_threads; 186 struct thread_mask *thread_masks; 187 struct record_thread *thread_data; 188 struct pollfd_index_map *index_map; 189 size_t index_map_sz; 190 size_t index_map_cnt; 191 }; 192 193 static volatile int done; 194 195 static volatile int auxtrace_record__snapshot_started; 196 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 197 static DEFINE_TRIGGER(switch_output_trigger); 198 199 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 200 "SYS", "NODE", "CPU" 201 }; 202 203 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 204 struct perf_sample *sample, struct machine *machine); 205 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 206 struct perf_sample *sample, struct machine *machine); 207 static int process_timestamp_boundary(const struct perf_tool *tool, 208 union perf_event *event, 209 struct perf_sample *sample, 210 struct machine *machine); 211 212 #ifndef HAVE_GETTID 213 static inline pid_t gettid(void) 214 { 215 return (pid_t)syscall(__NR_gettid); 216 } 217 #endif 218 219 static int record__threads_enabled(struct record *rec) 220 { 221 return rec->opts.threads_spec; 222 } 223 224 static bool switch_output_signal(struct record *rec) 225 { 226 return rec->switch_output.signal && 227 trigger_is_ready(&switch_output_trigger); 228 } 229 230 static bool switch_output_size(struct record *rec) 231 { 232 return rec->switch_output.size && 233 trigger_is_ready(&switch_output_trigger) && 234 (rec->bytes_written >= rec->switch_output.size); 235 } 236 237 static bool switch_output_time(struct record *rec) 238 { 239 return rec->switch_output.time && 240 trigger_is_ready(&switch_output_trigger); 241 } 242 243 static u64 record__bytes_written(struct record *rec) 244 { 245 return rec->bytes_written + rec->thread_bytes_written; 246 } 247 248 static bool record__output_max_size_exceeded(struct record *rec) 249 { 250 return rec->output_max_size && 251 (record__bytes_written(rec) >= rec->output_max_size); 252 } 253 254 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 255 void *bf, size_t size) 256 { 257 struct perf_data_file *file = &rec->session->data->file; 258 259 if (map && map->file) 260 file = map->file; 261 262 if (perf_data_file__write(file, bf, size) < 0) { 263 pr_err("failed to write perf data, error: %m\n"); 264 return -1; 265 } 266 267 if (map && map->file) { 268 thread->bytes_written += size; 269 rec->thread_bytes_written += size; 270 } else { 271 rec->bytes_written += size; 272 } 273 274 if (record__output_max_size_exceeded(rec) && !done) { 275 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 276 " stopping session ]\n", 277 record__bytes_written(rec) >> 10); 278 done = 1; 279 } 280 281 if (switch_output_size(rec)) 282 trigger_hit(&switch_output_trigger); 283 284 return 0; 285 } 286 287 static int record__aio_enabled(struct record *rec); 288 static int record__comp_enabled(struct record *rec); 289 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 290 void *dst, size_t dst_size, void *src, size_t src_size); 291 292 #ifdef HAVE_AIO_SUPPORT 293 static int record__aio_write(struct aiocb *cblock, int trace_fd, 294 void *buf, size_t size, off_t off) 295 { 296 int rc; 297 298 cblock->aio_fildes = trace_fd; 299 cblock->aio_buf = buf; 300 cblock->aio_nbytes = size; 301 cblock->aio_offset = off; 302 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 303 304 do { 305 rc = aio_write(cblock); 306 if (rc == 0) { 307 break; 308 } else if (errno != EAGAIN) { 309 cblock->aio_fildes = -1; 310 pr_err("failed to queue perf data, error: %m\n"); 311 break; 312 } 313 } while (1); 314 315 return rc; 316 } 317 318 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 319 { 320 void *rem_buf; 321 off_t rem_off; 322 size_t rem_size; 323 int rc, aio_errno; 324 ssize_t aio_ret, written; 325 326 aio_errno = aio_error(cblock); 327 if (aio_errno == EINPROGRESS) 328 return 0; 329 330 written = aio_ret = aio_return(cblock); 331 if (aio_ret < 0) { 332 if (aio_errno != EINTR) 333 pr_err("failed to write perf data, error: %m\n"); 334 written = 0; 335 } 336 337 rem_size = cblock->aio_nbytes - written; 338 339 if (rem_size == 0) { 340 cblock->aio_fildes = -1; 341 /* 342 * md->refcount is incremented in record__aio_pushfn() for 343 * every aio write request started in record__aio_push() so 344 * decrement it because the request is now complete. 345 */ 346 perf_mmap__put(&md->core); 347 rc = 1; 348 } else { 349 /* 350 * aio write request may require restart with the 351 * remainder if the kernel didn't write whole 352 * chunk at once. 353 */ 354 rem_off = cblock->aio_offset + written; 355 rem_buf = (void *)(cblock->aio_buf + written); 356 record__aio_write(cblock, cblock->aio_fildes, 357 rem_buf, rem_size, rem_off); 358 rc = 0; 359 } 360 361 return rc; 362 } 363 364 static int record__aio_sync(struct mmap *md, bool sync_all) 365 { 366 struct aiocb **aiocb = md->aio.aiocb; 367 struct aiocb *cblocks = md->aio.cblocks; 368 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 369 int i, do_suspend; 370 371 do { 372 do_suspend = 0; 373 for (i = 0; i < md->aio.nr_cblocks; ++i) { 374 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 375 if (sync_all) 376 aiocb[i] = NULL; 377 else 378 return i; 379 } else { 380 /* 381 * Started aio write is not complete yet 382 * so it has to be waited before the 383 * next allocation. 384 */ 385 aiocb[i] = &cblocks[i]; 386 do_suspend = 1; 387 } 388 } 389 if (!do_suspend) 390 return -1; 391 392 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 393 if (!(errno == EAGAIN || errno == EINTR)) 394 pr_err("failed to sync perf data, error: %m\n"); 395 } 396 } while (1); 397 } 398 399 struct record_aio { 400 struct record *rec; 401 void *data; 402 size_t size; 403 }; 404 405 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 406 { 407 struct record_aio *aio = to; 408 409 /* 410 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 411 * to release space in the kernel buffer as fast as possible, calling 412 * perf_mmap__consume() from perf_mmap__push() function. 413 * 414 * That lets the kernel to proceed with storing more profiling data into 415 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 416 * 417 * Coping can be done in two steps in case the chunk of profiling data 418 * crosses the upper bound of the kernel buffer. In this case we first move 419 * part of data from map->start till the upper bound and then the remainder 420 * from the beginning of the kernel buffer till the end of the data chunk. 421 */ 422 423 if (record__comp_enabled(aio->rec)) { 424 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 425 mmap__mmap_len(map) - aio->size, 426 buf, size); 427 if (compressed < 0) 428 return (int)compressed; 429 430 size = compressed; 431 } else { 432 memcpy(aio->data + aio->size, buf, size); 433 } 434 435 if (!aio->size) { 436 /* 437 * Increment map->refcount to guard map->aio.data[] buffer 438 * from premature deallocation because map object can be 439 * released earlier than aio write request started on 440 * map->aio.data[] buffer is complete. 441 * 442 * perf_mmap__put() is done at record__aio_complete() 443 * after started aio request completion or at record__aio_push() 444 * if the request failed to start. 445 */ 446 perf_mmap__get(&map->core); 447 } 448 449 aio->size += size; 450 451 return size; 452 } 453 454 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 455 { 456 int ret, idx; 457 int trace_fd = perf_data__fd(rec->session->data); 458 struct record_aio aio = { .rec = rec, .size = 0 }; 459 460 /* 461 * Call record__aio_sync() to wait till map->aio.data[] buffer 462 * becomes available after previous aio write operation. 463 */ 464 465 idx = record__aio_sync(map, false); 466 aio.data = map->aio.data[idx]; 467 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 468 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 469 return ret; 470 471 rec->samples++; 472 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 473 if (!ret) { 474 *off += aio.size; 475 rec->bytes_written += aio.size; 476 if (switch_output_size(rec)) 477 trigger_hit(&switch_output_trigger); 478 } else { 479 /* 480 * Decrement map->refcount incremented in record__aio_pushfn() 481 * back if record__aio_write() operation failed to start, otherwise 482 * map->refcount is decremented in record__aio_complete() after 483 * aio write operation finishes successfully. 484 */ 485 perf_mmap__put(&map->core); 486 } 487 488 return ret; 489 } 490 491 static off_t record__aio_get_pos(int trace_fd) 492 { 493 return lseek(trace_fd, 0, SEEK_CUR); 494 } 495 496 static void record__aio_set_pos(int trace_fd, off_t pos) 497 { 498 lseek(trace_fd, pos, SEEK_SET); 499 } 500 501 static void record__aio_mmap_read_sync(struct record *rec) 502 { 503 int i; 504 struct evlist *evlist = rec->evlist; 505 struct mmap *maps = evlist->mmap; 506 507 if (!record__aio_enabled(rec)) 508 return; 509 510 for (i = 0; i < evlist->core.nr_mmaps; i++) { 511 struct mmap *map = &maps[i]; 512 513 if (map->core.base) 514 record__aio_sync(map, true); 515 } 516 } 517 518 static int nr_cblocks_default = 1; 519 static int nr_cblocks_max = 4; 520 521 static int record__aio_parse(const struct option *opt, 522 const char *str, 523 int unset) 524 { 525 struct record_opts *opts = (struct record_opts *)opt->value; 526 527 if (unset) { 528 opts->nr_cblocks = 0; 529 } else { 530 if (str) 531 opts->nr_cblocks = strtol(str, NULL, 0); 532 if (!opts->nr_cblocks) 533 opts->nr_cblocks = nr_cblocks_default; 534 } 535 536 return 0; 537 } 538 #else /* HAVE_AIO_SUPPORT */ 539 static int nr_cblocks_max = 0; 540 541 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 542 off_t *off __maybe_unused) 543 { 544 return -1; 545 } 546 547 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 548 { 549 return -1; 550 } 551 552 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 553 { 554 } 555 556 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 557 { 558 } 559 #endif 560 561 static int record__aio_enabled(struct record *rec) 562 { 563 return rec->opts.nr_cblocks > 0; 564 } 565 566 #define MMAP_FLUSH_DEFAULT 1 567 static int record__mmap_flush_parse(const struct option *opt, 568 const char *str, 569 int unset) 570 { 571 int flush_max; 572 struct record_opts *opts = (struct record_opts *)opt->value; 573 static struct parse_tag tags[] = { 574 { .tag = 'B', .mult = 1 }, 575 { .tag = 'K', .mult = 1 << 10 }, 576 { .tag = 'M', .mult = 1 << 20 }, 577 { .tag = 'G', .mult = 1 << 30 }, 578 { .tag = 0 }, 579 }; 580 581 if (unset) 582 return 0; 583 584 if (str) { 585 opts->mmap_flush = parse_tag_value(str, tags); 586 if (opts->mmap_flush == (int)-1) 587 opts->mmap_flush = strtol(str, NULL, 0); 588 } 589 590 if (!opts->mmap_flush) 591 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 592 593 flush_max = evlist__mmap_size(opts->mmap_pages); 594 flush_max /= 4; 595 if (opts->mmap_flush > flush_max) 596 opts->mmap_flush = flush_max; 597 598 return 0; 599 } 600 601 #ifdef HAVE_ZSTD_SUPPORT 602 static unsigned int comp_level_default = 1; 603 604 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 605 { 606 struct record_opts *opts = opt->value; 607 608 if (unset) { 609 opts->comp_level = 0; 610 } else { 611 if (str) 612 opts->comp_level = strtol(str, NULL, 0); 613 if (!opts->comp_level) 614 opts->comp_level = comp_level_default; 615 } 616 617 return 0; 618 } 619 #endif 620 static unsigned int comp_level_max = 22; 621 622 static int record__comp_enabled(struct record *rec) 623 { 624 return rec->opts.comp_level > 0; 625 } 626 627 static int process_synthesized_event(const struct perf_tool *tool, 628 union perf_event *event, 629 struct perf_sample *sample __maybe_unused, 630 struct machine *machine __maybe_unused) 631 { 632 struct record *rec = container_of(tool, struct record, tool); 633 return record__write(rec, NULL, event, event->header.size); 634 } 635 636 static struct mutex synth_lock; 637 638 static int process_locked_synthesized_event(const struct perf_tool *tool, 639 union perf_event *event, 640 struct perf_sample *sample __maybe_unused, 641 struct machine *machine __maybe_unused) 642 { 643 int ret; 644 645 mutex_lock(&synth_lock); 646 ret = process_synthesized_event(tool, event, sample, machine); 647 mutex_unlock(&synth_lock); 648 return ret; 649 } 650 651 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 652 { 653 struct record *rec = to; 654 655 if (record__comp_enabled(rec)) { 656 struct perf_record_compressed2 *event = map->data; 657 size_t padding = 0; 658 u8 pad[8] = {0}; 659 ssize_t compressed = zstd_compress(rec->session, map, map->data, 660 mmap__mmap_len(map), bf, size); 661 662 if (compressed < 0) 663 return (int)compressed; 664 665 bf = event; 666 thread->samples++; 667 668 /* 669 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan 670 * error. We make it aligned here. 671 */ 672 event->data_size = compressed - sizeof(struct perf_record_compressed2); 673 event->header.size = PERF_ALIGN(compressed, sizeof(u64)); 674 padding = event->header.size - compressed; 675 return record__write(rec, map, bf, compressed) || 676 record__write(rec, map, &pad, padding); 677 } 678 679 thread->samples++; 680 return record__write(rec, map, bf, size); 681 } 682 683 static volatile sig_atomic_t signr = -1; 684 static volatile sig_atomic_t child_finished; 685 #ifdef HAVE_EVENTFD_SUPPORT 686 static volatile sig_atomic_t done_fd = -1; 687 #endif 688 689 static void sig_handler(int sig) 690 { 691 if (sig == SIGCHLD) 692 child_finished = 1; 693 else 694 signr = sig; 695 696 done = 1; 697 #ifdef HAVE_EVENTFD_SUPPORT 698 if (done_fd >= 0) { 699 u64 tmp = 1; 700 int orig_errno = errno; 701 702 /* 703 * It is possible for this signal handler to run after done is 704 * checked in the main loop, but before the perf counter fds are 705 * polled. If this happens, the poll() will continue to wait 706 * even though done is set, and will only break out if either 707 * another signal is received, or the counters are ready for 708 * read. To ensure the poll() doesn't sleep when done is set, 709 * use an eventfd (done_fd) to wake up the poll(). 710 */ 711 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 712 pr_err("failed to signal wakeup fd, error: %m\n"); 713 714 errno = orig_errno; 715 } 716 #endif // HAVE_EVENTFD_SUPPORT 717 } 718 719 static void sigsegv_handler(int sig) 720 { 721 perf_hooks__recover(); 722 sighandler_dump_stack(sig); 723 } 724 725 static void record__sig_exit(void) 726 { 727 if (signr == -1) 728 return; 729 730 signal(signr, SIG_DFL); 731 raise(signr); 732 } 733 734 static int record__process_auxtrace(const struct perf_tool *tool, 735 struct mmap *map, 736 union perf_event *event, void *data1, 737 size_t len1, void *data2, size_t len2) 738 { 739 struct record *rec = container_of(tool, struct record, tool); 740 struct perf_data *data = &rec->data; 741 size_t padding; 742 u8 pad[8] = {0}; 743 744 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 745 off_t file_offset; 746 int fd = perf_data__fd(data); 747 int err; 748 749 file_offset = lseek(fd, 0, SEEK_CUR); 750 if (file_offset == -1) 751 return -1; 752 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 753 event, file_offset); 754 if (err) 755 return err; 756 } 757 758 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 759 padding = (len1 + len2) & 7; 760 if (padding) 761 padding = 8 - padding; 762 763 record__write(rec, map, event, event->header.size); 764 record__write(rec, map, data1, len1); 765 if (len2) 766 record__write(rec, map, data2, len2); 767 record__write(rec, map, &pad, padding); 768 769 return 0; 770 } 771 772 static int record__auxtrace_mmap_read(struct record *rec, 773 struct mmap *map) 774 { 775 int ret; 776 777 ret = auxtrace_mmap__read(map, rec->itr, 778 perf_session__env(rec->session), 779 &rec->tool, 780 record__process_auxtrace); 781 if (ret < 0) 782 return ret; 783 784 if (ret) 785 rec->samples++; 786 787 return 0; 788 } 789 790 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 791 struct mmap *map) 792 { 793 int ret; 794 795 ret = auxtrace_mmap__read_snapshot(map, rec->itr, 796 perf_session__env(rec->session), 797 &rec->tool, 798 record__process_auxtrace, 799 rec->opts.auxtrace_snapshot_size); 800 if (ret < 0) 801 return ret; 802 803 if (ret) 804 rec->samples++; 805 806 return 0; 807 } 808 809 static int record__auxtrace_read_snapshot_all(struct record *rec) 810 { 811 int i; 812 int rc = 0; 813 814 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 815 struct mmap *map = &rec->evlist->mmap[i]; 816 817 if (!map->auxtrace_mmap.base) 818 continue; 819 820 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 821 rc = -1; 822 goto out; 823 } 824 } 825 out: 826 return rc; 827 } 828 829 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 830 { 831 pr_debug("Recording AUX area tracing snapshot\n"); 832 if (record__auxtrace_read_snapshot_all(rec) < 0) { 833 trigger_error(&auxtrace_snapshot_trigger); 834 } else { 835 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 836 trigger_error(&auxtrace_snapshot_trigger); 837 else 838 trigger_ready(&auxtrace_snapshot_trigger); 839 } 840 } 841 842 static int record__auxtrace_snapshot_exit(struct record *rec) 843 { 844 if (trigger_is_error(&auxtrace_snapshot_trigger)) 845 return 0; 846 847 if (!auxtrace_record__snapshot_started && 848 auxtrace_record__snapshot_start(rec->itr)) 849 return -1; 850 851 record__read_auxtrace_snapshot(rec, true); 852 if (trigger_is_error(&auxtrace_snapshot_trigger)) 853 return -1; 854 855 return 0; 856 } 857 858 static int record__auxtrace_init(struct record *rec) 859 { 860 int err; 861 862 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 863 && record__threads_enabled(rec)) { 864 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 865 return -EINVAL; 866 } 867 868 if (!rec->itr) { 869 err = -EINVAL; 870 rec->itr = auxtrace_record__init(rec->evlist, &err); 871 if (err) 872 return err; 873 } 874 875 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 876 rec->opts.auxtrace_snapshot_opts); 877 if (err) 878 return err; 879 880 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 881 rec->opts.auxtrace_sample_opts); 882 if (err) 883 return err; 884 885 err = auxtrace_parse_aux_action(rec->evlist); 886 if (err) 887 return err; 888 889 return auxtrace_parse_filters(rec->evlist); 890 } 891 892 static int record__config_text_poke(struct evlist *evlist) 893 { 894 struct evsel *evsel; 895 896 /* Nothing to do if text poke is already configured */ 897 evlist__for_each_entry(evlist, evsel) { 898 if (evsel->core.attr.text_poke) 899 return 0; 900 } 901 902 evsel = evlist__add_dummy_on_all_cpus(evlist); 903 if (!evsel) 904 return -ENOMEM; 905 906 evsel->core.attr.text_poke = 1; 907 evsel->core.attr.ksymbol = 1; 908 evsel->immediate = true; 909 evsel__set_sample_bit(evsel, TIME); 910 911 return 0; 912 } 913 914 static int record__config_off_cpu(struct record *rec) 915 { 916 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 917 } 918 919 static bool record__tracking_system_wide(struct record *rec) 920 { 921 struct evlist *evlist = rec->evlist; 922 struct evsel *evsel; 923 924 /* 925 * If non-dummy evsel exists, system_wide sideband is need to 926 * help parse sample information. 927 * For example, PERF_EVENT_MMAP event to help parse symbol, 928 * and PERF_EVENT_COMM event to help parse task executable name. 929 */ 930 evlist__for_each_entry(evlist, evsel) { 931 if (!evsel__is_dummy_event(evsel)) 932 return true; 933 } 934 935 return false; 936 } 937 938 static int record__config_tracking_events(struct record *rec) 939 { 940 struct record_opts *opts = &rec->opts; 941 struct evlist *evlist = rec->evlist; 942 bool system_wide = false; 943 struct evsel *evsel; 944 945 /* 946 * For initial_delay, system wide or a hybrid system, we need to add 947 * tracking event so that we can track PERF_RECORD_MMAP to cover the 948 * delay of waiting or event synthesis. 949 */ 950 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 951 perf_pmus__num_core_pmus() > 1) { 952 /* 953 * User space tasks can migrate between CPUs, so when tracing 954 * selected CPUs, sideband for all CPUs is still needed. 955 */ 956 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 957 system_wide = true; 958 959 evsel = evlist__findnew_tracking_event(evlist, system_wide); 960 if (!evsel) 961 return -ENOMEM; 962 963 /* 964 * Enable the tracking event when the process is forked for 965 * initial_delay, immediately for system wide. 966 */ 967 if (opts->target.initial_delay && !evsel->immediate && 968 !target__has_cpu(&opts->target)) 969 evsel->core.attr.enable_on_exec = 1; 970 else 971 evsel->immediate = 1; 972 } 973 974 return 0; 975 } 976 977 static bool record__kcore_readable(struct machine *machine) 978 { 979 char kcore[PATH_MAX]; 980 int fd; 981 982 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 983 984 fd = open(kcore, O_RDONLY); 985 if (fd < 0) 986 return false; 987 988 close(fd); 989 990 return true; 991 } 992 993 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 994 { 995 char from_dir[PATH_MAX]; 996 char kcore_dir[PATH_MAX]; 997 int ret; 998 999 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1000 1001 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1002 if (ret) 1003 return ret; 1004 1005 return kcore_copy(from_dir, kcore_dir); 1006 } 1007 1008 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1009 { 1010 thread_data->pipes.msg[0] = -1; 1011 thread_data->pipes.msg[1] = -1; 1012 thread_data->pipes.ack[0] = -1; 1013 thread_data->pipes.ack[1] = -1; 1014 } 1015 1016 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1017 { 1018 if (pipe(thread_data->pipes.msg)) 1019 return -EINVAL; 1020 1021 if (pipe(thread_data->pipes.ack)) { 1022 close(thread_data->pipes.msg[0]); 1023 thread_data->pipes.msg[0] = -1; 1024 close(thread_data->pipes.msg[1]); 1025 thread_data->pipes.msg[1] = -1; 1026 return -EINVAL; 1027 } 1028 1029 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1030 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1031 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1032 1033 return 0; 1034 } 1035 1036 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1037 { 1038 if (thread_data->pipes.msg[0] != -1) { 1039 close(thread_data->pipes.msg[0]); 1040 thread_data->pipes.msg[0] = -1; 1041 } 1042 if (thread_data->pipes.msg[1] != -1) { 1043 close(thread_data->pipes.msg[1]); 1044 thread_data->pipes.msg[1] = -1; 1045 } 1046 if (thread_data->pipes.ack[0] != -1) { 1047 close(thread_data->pipes.ack[0]); 1048 thread_data->pipes.ack[0] = -1; 1049 } 1050 if (thread_data->pipes.ack[1] != -1) { 1051 close(thread_data->pipes.ack[1]); 1052 thread_data->pipes.ack[1] = -1; 1053 } 1054 } 1055 1056 static bool evlist__per_thread(struct evlist *evlist) 1057 { 1058 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1059 } 1060 1061 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1062 { 1063 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1064 struct mmap *mmap = evlist->mmap; 1065 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1066 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1067 bool per_thread = evlist__per_thread(evlist); 1068 1069 if (per_thread) 1070 thread_data->nr_mmaps = nr_mmaps; 1071 else 1072 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1073 thread_data->mask->maps.nbits); 1074 if (mmap) { 1075 thread_data->maps = calloc(thread_data->nr_mmaps, sizeof(struct mmap *)); 1076 if (!thread_data->maps) 1077 return -ENOMEM; 1078 } 1079 if (overwrite_mmap) { 1080 thread_data->overwrite_maps = calloc(thread_data->nr_mmaps, sizeof(struct mmap *)); 1081 if (!thread_data->overwrite_maps) { 1082 zfree(&thread_data->maps); 1083 return -ENOMEM; 1084 } 1085 } 1086 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1087 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1088 1089 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1090 if (per_thread || 1091 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1092 if (thread_data->maps) { 1093 thread_data->maps[tm] = &mmap[m]; 1094 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1095 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1096 } 1097 if (thread_data->overwrite_maps) { 1098 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1099 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1100 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1101 } 1102 tm++; 1103 } 1104 } 1105 1106 return 0; 1107 } 1108 1109 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1110 { 1111 int f, tm, pos; 1112 struct mmap *map, *overwrite_map; 1113 1114 fdarray__init(&thread_data->pollfd, 64); 1115 1116 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1117 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1118 overwrite_map = thread_data->overwrite_maps ? 1119 thread_data->overwrite_maps[tm] : NULL; 1120 1121 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1122 void *ptr = evlist->core.pollfd.priv[f].ptr; 1123 1124 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1125 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1126 &evlist->core.pollfd); 1127 if (pos < 0) 1128 return pos; 1129 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1130 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1131 } 1132 } 1133 } 1134 1135 return 0; 1136 } 1137 1138 static void record__free_thread_data(struct record *rec) 1139 { 1140 int t; 1141 struct record_thread *thread_data = rec->thread_data; 1142 1143 if (thread_data == NULL) 1144 return; 1145 1146 for (t = 0; t < rec->nr_threads; t++) { 1147 record__thread_data_close_pipes(&thread_data[t]); 1148 zfree(&thread_data[t].maps); 1149 zfree(&thread_data[t].overwrite_maps); 1150 fdarray__exit(&thread_data[t].pollfd); 1151 } 1152 1153 zfree(&rec->thread_data); 1154 } 1155 1156 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1157 int evlist_pollfd_index, 1158 int thread_pollfd_index) 1159 { 1160 size_t x = rec->index_map_cnt; 1161 1162 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1163 return -ENOMEM; 1164 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1165 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1166 rec->index_map_cnt += 1; 1167 return 0; 1168 } 1169 1170 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1171 struct evlist *evlist, 1172 struct record_thread *thread_data) 1173 { 1174 struct pollfd *e_entries = evlist->core.pollfd.entries; 1175 struct pollfd *t_entries = thread_data->pollfd.entries; 1176 int err = 0; 1177 size_t i; 1178 1179 for (i = 0; i < rec->index_map_cnt; i++) { 1180 int e_pos = rec->index_map[i].evlist_pollfd_index; 1181 int t_pos = rec->index_map[i].thread_pollfd_index; 1182 1183 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1184 e_entries[e_pos].events != t_entries[t_pos].events) { 1185 pr_err("Thread and evlist pollfd index mismatch\n"); 1186 err = -EINVAL; 1187 continue; 1188 } 1189 e_entries[e_pos].revents = t_entries[t_pos].revents; 1190 } 1191 return err; 1192 } 1193 1194 static int record__dup_non_perf_events(struct record *rec, 1195 struct evlist *evlist, 1196 struct record_thread *thread_data) 1197 { 1198 struct fdarray *fda = &evlist->core.pollfd; 1199 int i, ret; 1200 1201 for (i = 0; i < fda->nr; i++) { 1202 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1203 continue; 1204 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1205 if (ret < 0) { 1206 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1207 return ret; 1208 } 1209 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1210 thread_data, ret, fda->entries[i].fd); 1211 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1212 if (ret < 0) { 1213 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1214 return ret; 1215 } 1216 } 1217 return 0; 1218 } 1219 1220 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1221 { 1222 int t, ret; 1223 struct record_thread *thread_data; 1224 1225 rec->thread_data = calloc(rec->nr_threads, sizeof(*(rec->thread_data))); 1226 if (!rec->thread_data) { 1227 pr_err("Failed to allocate thread data\n"); 1228 return -ENOMEM; 1229 } 1230 thread_data = rec->thread_data; 1231 1232 for (t = 0; t < rec->nr_threads; t++) 1233 record__thread_data_init_pipes(&thread_data[t]); 1234 1235 for (t = 0; t < rec->nr_threads; t++) { 1236 thread_data[t].rec = rec; 1237 thread_data[t].mask = &rec->thread_masks[t]; 1238 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1239 if (ret) { 1240 pr_err("Failed to initialize thread[%d] maps\n", t); 1241 goto out_free; 1242 } 1243 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1244 if (ret) { 1245 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1246 goto out_free; 1247 } 1248 if (t) { 1249 thread_data[t].tid = -1; 1250 ret = record__thread_data_open_pipes(&thread_data[t]); 1251 if (ret) { 1252 pr_err("Failed to open thread[%d] communication pipes\n", t); 1253 goto out_free; 1254 } 1255 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1256 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1257 if (ret < 0) { 1258 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1259 goto out_free; 1260 } 1261 thread_data[t].ctlfd_pos = ret; 1262 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1263 thread_data, thread_data[t].ctlfd_pos, 1264 thread_data[t].pipes.msg[0]); 1265 } else { 1266 thread_data[t].tid = gettid(); 1267 1268 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1269 if (ret < 0) 1270 goto out_free; 1271 1272 thread_data[t].ctlfd_pos = -1; /* Not used */ 1273 } 1274 } 1275 1276 return 0; 1277 1278 out_free: 1279 record__free_thread_data(rec); 1280 1281 return ret; 1282 } 1283 1284 static int record__mmap_evlist(struct record *rec, 1285 struct evlist *evlist) 1286 { 1287 int i, ret; 1288 struct record_opts *opts = &rec->opts; 1289 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1290 opts->auxtrace_sample_mode; 1291 1292 if (opts->affinity != PERF_AFFINITY_SYS) 1293 cpu__setup_cpunode_map(); 1294 1295 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1296 opts->auxtrace_mmap_pages, 1297 auxtrace_overwrite, 1298 opts->nr_cblocks, opts->affinity, 1299 opts->mmap_flush, opts->comp_level) < 0) { 1300 if (errno == EPERM) { 1301 pr_err("Permission error mapping pages.\n" 1302 "Consider increasing " 1303 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1304 "or try again with a smaller value of -m/--mmap_pages.\n" 1305 "(current value: %u,%u)\n", 1306 opts->mmap_pages, opts->auxtrace_mmap_pages); 1307 return -errno; 1308 } else { 1309 pr_err("failed to mmap: %m\n"); 1310 if (errno) 1311 return -errno; 1312 else 1313 return -EINVAL; 1314 } 1315 } 1316 1317 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1318 return -1; 1319 1320 ret = record__alloc_thread_data(rec, evlist); 1321 if (ret) 1322 return ret; 1323 1324 if (record__threads_enabled(rec)) { 1325 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1326 if (ret) { 1327 errno = -ret; 1328 pr_err("Failed to create data directory: %m\n"); 1329 return ret; 1330 } 1331 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1332 if (evlist->mmap) 1333 evlist->mmap[i].file = &rec->data.dir.files[i]; 1334 if (evlist->overwrite_mmap) 1335 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1336 } 1337 } 1338 1339 return 0; 1340 } 1341 1342 static int record__mmap(struct record *rec) 1343 { 1344 return record__mmap_evlist(rec, rec->evlist); 1345 } 1346 1347 static int record__open(struct record *rec) 1348 { 1349 char msg[BUFSIZ]; 1350 struct evsel *pos; 1351 struct evlist *evlist = rec->evlist; 1352 struct perf_session *session = rec->session; 1353 struct record_opts *opts = &rec->opts; 1354 int rc = 0; 1355 bool skipped = false; 1356 bool removed_tracking = false; 1357 1358 evlist__for_each_entry(evlist, pos) { 1359 if (removed_tracking) { 1360 /* 1361 * Normally the head of the list has tracking enabled 1362 * for sideband data like mmaps. If this event is 1363 * removed, make sure to add tracking to the next 1364 * processed event. 1365 */ 1366 if (!pos->tracking) { 1367 pos->tracking = true; 1368 evsel__config(pos, opts, &callchain_param); 1369 } 1370 removed_tracking = false; 1371 } 1372 try_again: 1373 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1374 bool report_error = true; 1375 1376 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1377 if (verbose > 0) 1378 ui__warning("%s\n", msg); 1379 goto try_again; 1380 } 1381 if ((errno == EINVAL || errno == EBADF) && 1382 pos->core.leader != &pos->core && 1383 pos->weak_group) { 1384 pos = evlist__reset_weak_group(evlist, pos, true); 1385 goto try_again; 1386 } 1387 #if defined(__aarch64__) || defined(__arm__) 1388 if (strstr(evsel__name(pos), "cycles")) { 1389 struct evsel *pos2; 1390 /* 1391 * Unfortunately ARM has many events named 1392 * "cycles" on PMUs like the system-level (L3) 1393 * cache which don't support sampling. Only 1394 * display such failures to open when there is 1395 * only 1 cycles event or verbose is enabled. 1396 */ 1397 evlist__for_each_entry(evlist, pos2) { 1398 if (pos2 == pos) 1399 continue; 1400 if (strstr(evsel__name(pos2), "cycles")) { 1401 report_error = false; 1402 break; 1403 } 1404 } 1405 } 1406 #endif 1407 if (report_error || verbose > 0) { 1408 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1409 ui__error("Failure to open event '%s' on PMU '%s' which will be " 1410 "removed.\n%s\n", 1411 evsel__name(pos), evsel__pmu_name(pos), msg); 1412 } 1413 if (pos->tracking) 1414 removed_tracking = true; 1415 pos->skippable = true; 1416 skipped = true; 1417 } 1418 } 1419 1420 if (skipped) { 1421 struct evsel *tmp; 1422 int idx = 0; 1423 bool evlist_empty = true; 1424 1425 /* Remove evsels that failed to open and update indices. */ 1426 evlist__for_each_entry_safe(evlist, tmp, pos) { 1427 if (pos->skippable) { 1428 evlist__remove(evlist, pos); 1429 continue; 1430 } 1431 1432 /* 1433 * Note, dummy events may be command line parsed or 1434 * added by the tool. We care about supporting `perf 1435 * record -e dummy` which may be used as a permission 1436 * check. Dummy events that are added to the command 1437 * line and opened along with other events that fail, 1438 * will still fail as if the dummy events were tool 1439 * added events for the sake of code simplicity. 1440 */ 1441 if (!evsel__is_dummy_event(pos)) 1442 evlist_empty = false; 1443 } 1444 evlist__for_each_entry(evlist, pos) { 1445 pos->core.idx = idx++; 1446 } 1447 /* If list is empty then fail. */ 1448 if (evlist_empty) { 1449 ui__error("Failure to open any events for recording.\n"); 1450 rc = -1; 1451 goto out; 1452 } 1453 } 1454 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1455 pr_warning( 1456 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1457 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1458 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1459 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1460 "Samples in kernel modules won't be resolved at all.\n\n" 1461 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1462 "even with a suitable vmlinux or kallsyms file.\n\n"); 1463 } 1464 1465 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1466 pr_err("failed to set filter \"%s\" on event %s: %m\n", 1467 pos->filter ?: "BPF", evsel__name(pos)); 1468 rc = -1; 1469 goto out; 1470 } 1471 1472 rc = record__mmap(rec); 1473 if (rc) 1474 goto out; 1475 1476 session->evlist = evlist; 1477 perf_session__set_id_hdr_size(session); 1478 out: 1479 return rc; 1480 } 1481 1482 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1483 { 1484 if (rec->evlist->first_sample_time == 0) 1485 rec->evlist->first_sample_time = sample_time; 1486 1487 if (sample_time) 1488 rec->evlist->last_sample_time = sample_time; 1489 } 1490 1491 static int process_sample_event(const struct perf_tool *tool, 1492 union perf_event *event, 1493 struct perf_sample *sample, 1494 struct machine *machine) 1495 { 1496 struct record *rec = container_of(tool, struct record, tool); 1497 1498 set_timestamp_boundary(rec, sample->time); 1499 1500 if (rec->buildid_all) 1501 return 0; 1502 1503 rec->samples++; 1504 return build_id__mark_dso_hit(tool, event, sample, machine); 1505 } 1506 1507 static int process_buildids(struct record *rec) 1508 { 1509 struct perf_session *session = rec->session; 1510 1511 if (perf_data__size(&rec->data) == 0) 1512 return 0; 1513 1514 /* A single DSO is needed and not all inline frames. */ 1515 symbol_conf.inline_name = false; 1516 /* 1517 * During this process, it'll load kernel map and replace the 1518 * dso->long_name to a real pathname it found. In this case 1519 * we prefer the vmlinux path like 1520 * /lib/modules/3.16.4/build/vmlinux 1521 * 1522 * rather than build-id path (in debug directory). 1523 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1524 */ 1525 symbol_conf.ignore_vmlinux_buildid = true; 1526 /* 1527 * If --buildid-all is given, it marks all DSO regardless of hits, 1528 * so no need to process samples. But if timestamp_boundary is enabled, 1529 * it still needs to walk on all samples to get the timestamps of 1530 * first/last samples. 1531 */ 1532 if (rec->buildid_all && !rec->timestamp_boundary) 1533 rec->tool.sample = process_event_sample_stub; 1534 1535 return perf_session__process_events(session); 1536 } 1537 1538 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1539 { 1540 int err; 1541 struct perf_tool *tool = data; 1542 /* 1543 *As for guest kernel when processing subcommand record&report, 1544 *we arrange module mmap prior to guest kernel mmap and trigger 1545 *a preload dso because default guest module symbols are loaded 1546 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1547 *method is used to avoid symbol missing when the first addr is 1548 *in module instead of in guest kernel. 1549 */ 1550 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1551 machine); 1552 if (err < 0) 1553 pr_err("Couldn't record guest kernel [%d]'s reference" 1554 " relocation symbol.\n", machine->pid); 1555 1556 /* 1557 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1558 * have no _text sometimes. 1559 */ 1560 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1561 machine); 1562 if (err < 0) 1563 pr_err("Couldn't record guest kernel [%d]'s reference" 1564 " relocation symbol.\n", machine->pid); 1565 } 1566 1567 static struct perf_event_header finished_round_event = { 1568 .size = sizeof(struct perf_event_header), 1569 .type = PERF_RECORD_FINISHED_ROUND, 1570 }; 1571 1572 static struct perf_event_header finished_init_event = { 1573 .size = sizeof(struct perf_event_header), 1574 .type = PERF_RECORD_FINISHED_INIT, 1575 }; 1576 1577 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1578 { 1579 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1580 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1581 thread->mask->affinity.nbits)) { 1582 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1583 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1584 map->affinity_mask.bits, thread->mask->affinity.nbits); 1585 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1586 (cpu_set_t *)thread->mask->affinity.bits); 1587 if (verbose == 2) { 1588 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1589 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1590 } 1591 } 1592 } 1593 1594 static size_t process_comp_header(void *record, size_t increment) 1595 { 1596 struct perf_record_compressed2 *event = record; 1597 size_t size = sizeof(*event); 1598 1599 if (increment) { 1600 event->header.size += increment; 1601 return increment; 1602 } 1603 1604 event->header.type = PERF_RECORD_COMPRESSED2; 1605 event->header.size = size; 1606 1607 return size; 1608 } 1609 1610 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1611 void *dst, size_t dst_size, void *src, size_t src_size) 1612 { 1613 ssize_t compressed; 1614 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1; 1615 struct zstd_data *zstd_data = &session->zstd_data; 1616 1617 if (map && map->file) 1618 zstd_data = &map->zstd_data; 1619 1620 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1621 max_record_size, process_comp_header); 1622 if (compressed < 0) 1623 return compressed; 1624 1625 if (map && map->file) { 1626 thread->bytes_transferred += src_size; 1627 thread->bytes_compressed += compressed; 1628 } else { 1629 session->bytes_transferred += src_size; 1630 session->bytes_compressed += compressed; 1631 } 1632 1633 return compressed; 1634 } 1635 1636 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1637 bool overwrite, bool synch) 1638 { 1639 u64 bytes_written = rec->bytes_written; 1640 int i; 1641 int rc = 0; 1642 int nr_mmaps; 1643 struct mmap **maps; 1644 int trace_fd = perf_data__fd(&rec->data); 1645 off_t off = 0; 1646 1647 if (!evlist) 1648 return 0; 1649 1650 nr_mmaps = thread->nr_mmaps; 1651 maps = overwrite ? thread->overwrite_maps : thread->maps; 1652 1653 if (!maps) 1654 return 0; 1655 1656 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1657 return 0; 1658 1659 if (record__aio_enabled(rec)) 1660 off = record__aio_get_pos(trace_fd); 1661 1662 for (i = 0; i < nr_mmaps; i++) { 1663 u64 flush = 0; 1664 struct mmap *map = maps[i]; 1665 1666 if (map->core.base) { 1667 record__adjust_affinity(rec, map); 1668 if (synch) { 1669 flush = map->core.flush; 1670 map->core.flush = 1; 1671 } 1672 if (!record__aio_enabled(rec)) { 1673 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1674 if (synch) 1675 map->core.flush = flush; 1676 rc = -1; 1677 goto out; 1678 } 1679 } else { 1680 if (record__aio_push(rec, map, &off) < 0) { 1681 record__aio_set_pos(trace_fd, off); 1682 if (synch) 1683 map->core.flush = flush; 1684 rc = -1; 1685 goto out; 1686 } 1687 } 1688 if (synch) 1689 map->core.flush = flush; 1690 } 1691 1692 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1693 !rec->opts.auxtrace_sample_mode && 1694 record__auxtrace_mmap_read(rec, map) != 0) { 1695 rc = -1; 1696 goto out; 1697 } 1698 } 1699 1700 if (record__aio_enabled(rec)) 1701 record__aio_set_pos(trace_fd, off); 1702 1703 /* 1704 * Mark the round finished in case we wrote 1705 * at least one event. 1706 * 1707 * No need for round events in directory mode, 1708 * because per-cpu maps and files have data 1709 * sorted by kernel. 1710 */ 1711 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1712 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1713 1714 if (overwrite) 1715 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1716 out: 1717 return rc; 1718 } 1719 1720 static int record__mmap_read_all(struct record *rec, bool synch) 1721 { 1722 int err; 1723 1724 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1725 if (err) 1726 return err; 1727 1728 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1729 } 1730 1731 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1732 void *arg __maybe_unused) 1733 { 1734 struct perf_mmap *map = fda->priv[fd].ptr; 1735 1736 if (map) 1737 perf_mmap__put(map); 1738 } 1739 1740 static void *record__thread(void *arg) 1741 { 1742 enum thread_msg msg = THREAD_MSG__READY; 1743 bool terminate = false; 1744 struct fdarray *pollfd; 1745 int err, ctlfd_pos; 1746 1747 thread = arg; 1748 thread->tid = gettid(); 1749 1750 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1751 if (err == -1) 1752 pr_warning("threads[%d]: failed to notify on start: %m\n", thread->tid); 1753 1754 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1755 1756 pollfd = &thread->pollfd; 1757 ctlfd_pos = thread->ctlfd_pos; 1758 1759 for (;;) { 1760 unsigned long long hits = thread->samples; 1761 1762 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1763 break; 1764 1765 if (hits == thread->samples) { 1766 1767 err = fdarray__poll(pollfd, -1); 1768 /* 1769 * Propagate error, only if there's any. Ignore positive 1770 * number of returned events and interrupt error. 1771 */ 1772 if (err > 0 || (err < 0 && errno == EINTR)) 1773 err = 0; 1774 thread->waking++; 1775 1776 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1777 record__thread_munmap_filtered, NULL) == 0) 1778 break; 1779 } 1780 1781 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1782 terminate = true; 1783 close(thread->pipes.msg[0]); 1784 thread->pipes.msg[0] = -1; 1785 pollfd->entries[ctlfd_pos].fd = -1; 1786 pollfd->entries[ctlfd_pos].events = 0; 1787 } 1788 1789 pollfd->entries[ctlfd_pos].revents = 0; 1790 } 1791 record__mmap_read_all(thread->rec, true); 1792 1793 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1794 if (err == -1) 1795 pr_warning("threads[%d]: failed to notify on termination: %m\n", thread->tid); 1796 1797 return NULL; 1798 } 1799 1800 static void record__init_features(struct record *rec) 1801 { 1802 struct perf_session *session = rec->session; 1803 int feat; 1804 1805 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1806 perf_header__set_feat(&session->header, feat); 1807 1808 if (rec->no_buildid) 1809 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1810 1811 if (!have_tracepoints(&rec->evlist->core.entries)) 1812 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1813 1814 if (!rec->opts.branch_stack) 1815 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1816 1817 if (!rec->opts.full_auxtrace) 1818 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1819 1820 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1821 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1822 1823 if (!rec->opts.use_clockid) 1824 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1825 1826 if (!record__threads_enabled(rec)) 1827 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1828 1829 if (!record__comp_enabled(rec)) 1830 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1831 1832 perf_header__clear_feat(&session->header, HEADER_STAT); 1833 } 1834 1835 static void 1836 record__finish_output(struct record *rec) 1837 { 1838 int i; 1839 struct perf_data *data = &rec->data; 1840 int fd = perf_data__fd(data); 1841 1842 if (data->is_pipe) { 1843 /* Just to display approx. size */ 1844 data->file.size = rec->bytes_written; 1845 return; 1846 } 1847 1848 rec->session->header.data_size += rec->bytes_written; 1849 data->file.size = perf_data__seek(data, 0, SEEK_CUR); 1850 if (record__threads_enabled(rec)) { 1851 for (i = 0; i < data->dir.nr; i++) { 1852 data->dir.files[i].size = 1853 perf_data_file__seek(&data->dir.files[i], 0, SEEK_CUR); 1854 } 1855 } 1856 1857 /* Buildid scanning disabled or build ID in kernel and synthesized map events. */ 1858 if (!rec->no_buildid || !rec->no_buildid_cache) { 1859 process_buildids(rec); 1860 1861 if (rec->buildid_all) 1862 perf_session__dsos_hit_all(rec->session); 1863 } 1864 perf_session__write_header(rec->session, rec->evlist, fd, true); 1865 perf_session__cache_build_ids(rec->session); 1866 } 1867 1868 static int record__synthesize_workload(struct record *rec, bool tail) 1869 { 1870 int err; 1871 struct perf_thread_map *thread_map; 1872 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1873 1874 if (rec->opts.tail_synthesize != tail) 1875 return 0; 1876 1877 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1878 if (thread_map == NULL) 1879 return -1; 1880 1881 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1882 process_synthesized_event, 1883 &rec->session->machines.host, 1884 needs_mmap, 1885 rec->opts.record_data_mmap); 1886 perf_thread_map__put(thread_map); 1887 return err; 1888 } 1889 1890 static int write_finished_init(struct record *rec, bool tail) 1891 { 1892 if (rec->opts.tail_synthesize != tail) 1893 return 0; 1894 1895 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1896 } 1897 1898 static int record__synthesize(struct record *rec, bool tail); 1899 1900 static int 1901 record__switch_output(struct record *rec, bool at_exit) 1902 { 1903 struct perf_data *data = &rec->data; 1904 char *new_filename = NULL; 1905 int fd, err; 1906 1907 /* Same Size: "2015122520103046"*/ 1908 char timestamp[] = "InvalidTimestamp"; 1909 1910 record__aio_mmap_read_sync(rec); 1911 1912 write_finished_init(rec, true); 1913 1914 record__synthesize(rec, true); 1915 if (target__none(&rec->opts.target)) 1916 record__synthesize_workload(rec, true); 1917 1918 rec->samples = 0; 1919 record__finish_output(rec); 1920 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1921 if (err) { 1922 pr_err("Failed to get current timestamp\n"); 1923 return -EINVAL; 1924 } 1925 1926 fd = perf_data__switch(data, timestamp, 1927 rec->session->header.data_offset, 1928 at_exit, &new_filename); 1929 if (fd >= 0 && !at_exit) { 1930 rec->bytes_written = 0; 1931 rec->session->header.data_size = 0; 1932 } 1933 1934 if (!quiet) { 1935 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1936 data->path, timestamp); 1937 } 1938 1939 if (rec->switch_output.num_files) { 1940 int n = rec->switch_output.cur_file + 1; 1941 1942 if (n >= rec->switch_output.num_files) 1943 n = 0; 1944 rec->switch_output.cur_file = n; 1945 if (rec->switch_output.filenames[n]) { 1946 remove(rec->switch_output.filenames[n]); 1947 zfree(&rec->switch_output.filenames[n]); 1948 } 1949 rec->switch_output.filenames[n] = new_filename; 1950 } else { 1951 free(new_filename); 1952 } 1953 1954 /* Output tracking events */ 1955 if (!at_exit) { 1956 record__synthesize(rec, false); 1957 1958 /* 1959 * In 'perf record --switch-output' without -a, 1960 * record__synthesize() in record__switch_output() won't 1961 * generate tracking events because there's no thread_map 1962 * in evlist. Which causes newly created perf.data doesn't 1963 * contain map and comm information. 1964 * Create a fake thread_map and directly call 1965 * perf_event__synthesize_thread_map() for those events. 1966 */ 1967 if (target__none(&rec->opts.target)) 1968 record__synthesize_workload(rec, false); 1969 write_finished_init(rec, false); 1970 } 1971 return fd; 1972 } 1973 1974 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1975 struct perf_record_lost_samples *lost, 1976 int cpu_idx, int thread_idx, u64 lost_count, 1977 u16 misc_flag) 1978 { 1979 struct perf_sample_id *sid; 1980 struct perf_sample sample; 1981 int id_hdr_size; 1982 1983 perf_sample__init(&sample, /*all=*/true); 1984 lost->lost = lost_count; 1985 if (evsel->core.ids) { 1986 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1987 sample.id = sid->id; 1988 } 1989 1990 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1991 evsel->core.attr.sample_type, &sample); 1992 lost->header.size = sizeof(*lost) + id_hdr_size; 1993 lost->header.misc = misc_flag; 1994 record__write(rec, NULL, lost, lost->header.size); 1995 perf_sample__exit(&sample); 1996 } 1997 1998 static void record__read_lost_samples(struct record *rec) 1999 { 2000 struct perf_session *session = rec->session; 2001 struct perf_record_lost_samples_and_ids lost; 2002 struct evsel *evsel; 2003 2004 /* there was an error during record__open */ 2005 if (session->evlist == NULL) 2006 return; 2007 2008 evlist__for_each_entry(session->evlist, evsel) { 2009 struct xyarray *xy = evsel->core.sample_id; 2010 u64 lost_count; 2011 2012 if (xy == NULL || evsel->core.fd == NULL) 2013 continue; 2014 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 2015 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 2016 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 2017 continue; 2018 } 2019 2020 for (int x = 0; x < xyarray__max_x(xy); x++) { 2021 for (int y = 0; y < xyarray__max_y(xy); y++) { 2022 struct perf_counts_values count; 2023 2024 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 2025 pr_debug("read LOST count failed\n"); 2026 return; 2027 } 2028 2029 if (count.lost) { 2030 memset(&lost, 0, sizeof(lost)); 2031 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 2032 __record__save_lost_samples(rec, evsel, &lost.lost, 2033 x, y, count.lost, 0); 2034 } 2035 } 2036 } 2037 2038 lost_count = perf_bpf_filter__lost_count(evsel); 2039 if (lost_count) { 2040 memset(&lost, 0, sizeof(lost)); 2041 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 2042 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 2043 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 2044 } 2045 } 2046 } 2047 2048 static volatile sig_atomic_t workload_exec_errno; 2049 2050 /* 2051 * evlist__prepare_workload will send a SIGUSR1 2052 * if the fork fails, since we asked by setting its 2053 * want_signal to true. 2054 */ 2055 static void workload_exec_failed_signal(int signo __maybe_unused, 2056 siginfo_t *info, 2057 void *ucontext __maybe_unused) 2058 { 2059 workload_exec_errno = info->si_value.sival_int; 2060 done = 1; 2061 child_finished = 1; 2062 } 2063 2064 static void snapshot_sig_handler(int sig); 2065 static void alarm_sig_handler(int sig); 2066 2067 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2068 { 2069 if (evlist) { 2070 if (evlist->mmap && evlist->mmap[0].core.base) 2071 return evlist->mmap[0].core.base; 2072 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2073 return evlist->overwrite_mmap[0].core.base; 2074 } 2075 return NULL; 2076 } 2077 2078 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2079 { 2080 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2081 if (pc) 2082 return pc; 2083 return NULL; 2084 } 2085 2086 static int record__synthesize(struct record *rec, bool tail) 2087 { 2088 struct perf_session *session = rec->session; 2089 struct machine *machine = &session->machines.host; 2090 struct perf_data *data = &rec->data; 2091 struct record_opts *opts = &rec->opts; 2092 struct perf_tool *tool = &rec->tool; 2093 int err = 0; 2094 event_op f = process_synthesized_event; 2095 2096 if (rec->opts.tail_synthesize != tail) 2097 return 0; 2098 2099 if (data->is_pipe) { 2100 err = perf_event__synthesize_for_pipe(tool, session, data, 2101 process_synthesized_event); 2102 if (err < 0) 2103 goto out; 2104 2105 rec->bytes_written += err; 2106 } 2107 2108 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2109 process_synthesized_event, machine); 2110 if (err) 2111 goto out; 2112 2113 /* Synthesize id_index before auxtrace_info */ 2114 err = perf_event__synthesize_id_index(tool, 2115 process_synthesized_event, 2116 session->evlist, machine); 2117 if (err) 2118 goto out; 2119 2120 if (rec->opts.full_auxtrace) { 2121 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2122 session, process_synthesized_event); 2123 if (err) 2124 goto out; 2125 } 2126 2127 if (!evlist__exclude_kernel(rec->evlist)) { 2128 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2129 machine); 2130 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2131 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2132 "Check /proc/kallsyms permission or run as root.\n"); 2133 2134 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2135 machine); 2136 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2137 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2138 "Check /proc/modules permission or run as root.\n"); 2139 } 2140 2141 if (perf_guest) { 2142 machines__process_guests(&session->machines, 2143 perf_event__synthesize_guest_os, tool); 2144 } 2145 2146 err = perf_event__synthesize_extra_attr(&rec->tool, 2147 rec->evlist, 2148 process_synthesized_event, 2149 data->is_pipe); 2150 if (err) 2151 goto out; 2152 2153 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2154 process_synthesized_event, 2155 NULL); 2156 if (err < 0) { 2157 pr_err("Couldn't synthesize thread map.\n"); 2158 return err; 2159 } 2160 2161 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2162 process_synthesized_event, NULL); 2163 if (err < 0) { 2164 pr_err("Couldn't synthesize cpu map.\n"); 2165 return err; 2166 } 2167 2168 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2169 machine, opts); 2170 if (err < 0) { 2171 pr_warning("Couldn't synthesize bpf events.\n"); 2172 err = 0; 2173 } 2174 2175 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2176 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2177 machine); 2178 if (err < 0) { 2179 pr_warning("Couldn't synthesize cgroup events.\n"); 2180 err = 0; 2181 } 2182 } 2183 2184 if (rec->opts.nr_threads_synthesize > 1) { 2185 mutex_init(&synth_lock); 2186 perf_set_multithreaded(); 2187 f = process_locked_synthesized_event; 2188 } 2189 2190 if (rec->opts.synth & PERF_SYNTH_TASK) { 2191 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2192 2193 err = __machine__synthesize_threads(machine, tool, &opts->target, 2194 rec->evlist->core.threads, 2195 f, needs_mmap, opts->record_data_mmap, 2196 rec->opts.nr_threads_synthesize); 2197 } 2198 2199 if (rec->opts.nr_threads_synthesize > 1) { 2200 perf_set_singlethreaded(); 2201 mutex_destroy(&synth_lock); 2202 } 2203 2204 out: 2205 return err; 2206 } 2207 2208 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused) 2209 { 2210 #ifdef HAVE_LIBBPF_SUPPORT 2211 perf_event__synthesize_final_bpf_metadata(rec->session, 2212 process_synthesized_event); 2213 #endif 2214 } 2215 2216 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2217 { 2218 struct record *rec = data; 2219 pthread_kill(rec->thread_id, SIGUSR2); 2220 return 0; 2221 } 2222 2223 static int record__setup_sb_evlist(struct record *rec) 2224 { 2225 struct record_opts *opts = &rec->opts; 2226 2227 if (rec->sb_evlist != NULL) { 2228 /* 2229 * We get here if --switch-output-event populated the 2230 * sb_evlist, so associate a callback that will send a SIGUSR2 2231 * to the main thread. 2232 */ 2233 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2234 rec->thread_id = pthread_self(); 2235 } 2236 #ifdef HAVE_LIBBPF_SUPPORT 2237 if (!opts->no_bpf_event) { 2238 if (rec->sb_evlist == NULL) { 2239 rec->sb_evlist = evlist__new(); 2240 2241 if (rec->sb_evlist == NULL) { 2242 pr_err("Couldn't create side band evlist.\n."); 2243 return -1; 2244 } 2245 } 2246 2247 if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) { 2248 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2249 return -1; 2250 } 2251 } 2252 #endif 2253 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2254 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2255 opts->no_bpf_event = true; 2256 } 2257 2258 return 0; 2259 } 2260 2261 static int record__init_clock(struct record *rec) 2262 { 2263 struct perf_session *session = rec->session; 2264 struct timespec ref_clockid; 2265 struct timeval ref_tod; 2266 struct perf_env *env = perf_session__env(session); 2267 u64 ref; 2268 2269 if (!rec->opts.use_clockid) 2270 return 0; 2271 2272 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2273 env->clock.clockid_res_ns = rec->opts.clockid_res_ns; 2274 2275 env->clock.clockid = rec->opts.clockid; 2276 2277 if (gettimeofday(&ref_tod, NULL) != 0) { 2278 pr_err("gettimeofday failed, cannot set reference time.\n"); 2279 return -1; 2280 } 2281 2282 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2283 pr_err("clock_gettime failed, cannot set reference time.\n"); 2284 return -1; 2285 } 2286 2287 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2288 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2289 2290 env->clock.tod_ns = ref; 2291 2292 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2293 (u64) ref_clockid.tv_nsec; 2294 2295 env->clock.clockid_ns = ref; 2296 return 0; 2297 } 2298 2299 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2300 { 2301 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2302 trigger_hit(&auxtrace_snapshot_trigger); 2303 auxtrace_record__snapshot_started = 1; 2304 if (auxtrace_record__snapshot_start(rec->itr)) 2305 trigger_error(&auxtrace_snapshot_trigger); 2306 } 2307 } 2308 2309 static int record__terminate_thread(struct record_thread *thread_data) 2310 { 2311 int err; 2312 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2313 pid_t tid = thread_data->tid; 2314 2315 close(thread_data->pipes.msg[1]); 2316 thread_data->pipes.msg[1] = -1; 2317 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2318 if (err > 0) 2319 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2320 else 2321 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2322 thread->tid, tid); 2323 2324 return 0; 2325 } 2326 2327 static int record__start_threads(struct record *rec) 2328 { 2329 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2330 struct record_thread *thread_data = rec->thread_data; 2331 sigset_t full, mask; 2332 pthread_t handle; 2333 pthread_attr_t attrs; 2334 2335 thread = &thread_data[0]; 2336 2337 if (!record__threads_enabled(rec)) 2338 return 0; 2339 2340 sigfillset(&full); 2341 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2342 pr_err("Failed to block signals on threads start: %m\n"); 2343 return -1; 2344 } 2345 2346 pthread_attr_init(&attrs); 2347 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2348 2349 for (t = 1; t < nr_threads; t++) { 2350 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2351 2352 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2353 pthread_attr_setaffinity_np(&attrs, 2354 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2355 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2356 #endif 2357 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2358 for (tt = 1; tt < t; tt++) 2359 record__terminate_thread(&thread_data[t]); 2360 pr_err("Failed to start threads: %m\n"); 2361 ret = -1; 2362 goto out_err; 2363 } 2364 2365 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2366 if (err > 0) 2367 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2368 thread_msg_tags[msg]); 2369 else 2370 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2371 thread->tid, rec->thread_data[t].tid); 2372 } 2373 2374 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2375 (cpu_set_t *)thread->mask->affinity.bits); 2376 2377 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2378 2379 out_err: 2380 pthread_attr_destroy(&attrs); 2381 2382 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2383 pr_err("Failed to unblock signals on threads start: %m\n"); 2384 ret = -1; 2385 } 2386 2387 return ret; 2388 } 2389 2390 static int record__stop_threads(struct record *rec) 2391 { 2392 int t; 2393 struct record_thread *thread_data = rec->thread_data; 2394 2395 for (t = 1; t < rec->nr_threads; t++) 2396 record__terminate_thread(&thread_data[t]); 2397 2398 for (t = 0; t < rec->nr_threads; t++) { 2399 rec->samples += thread_data[t].samples; 2400 if (!record__threads_enabled(rec)) 2401 continue; 2402 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2403 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2404 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2405 thread_data[t].samples, thread_data[t].waking); 2406 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2407 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2408 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2409 else 2410 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2411 } 2412 2413 return 0; 2414 } 2415 2416 static unsigned long record__waking(struct record *rec) 2417 { 2418 int t; 2419 unsigned long waking = 0; 2420 struct record_thread *thread_data = rec->thread_data; 2421 2422 for (t = 0; t < rec->nr_threads; t++) 2423 waking += thread_data[t].waking; 2424 2425 return waking; 2426 } 2427 2428 static int __cmd_record(struct record *rec, int argc, const char **argv) 2429 { 2430 int err; 2431 int status = 0; 2432 const bool forks = argc > 0; 2433 struct perf_tool *tool = &rec->tool; 2434 struct record_opts *opts = &rec->opts; 2435 struct perf_data *data = &rec->data; 2436 struct perf_session *session; 2437 bool disabled = false, draining = false; 2438 int fd; 2439 float ratio = 0; 2440 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2441 struct perf_env *env; 2442 2443 atexit(record__sig_exit); 2444 signal(SIGCHLD, sig_handler); 2445 signal(SIGINT, sig_handler); 2446 signal(SIGTERM, sig_handler); 2447 signal(SIGSEGV, sigsegv_handler); 2448 2449 if (rec->opts.record_cgroup) { 2450 #ifndef HAVE_FILE_HANDLE 2451 pr_err("cgroup tracking is not supported\n"); 2452 return -1; 2453 #endif 2454 } 2455 2456 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2457 signal(SIGUSR2, snapshot_sig_handler); 2458 if (rec->opts.auxtrace_snapshot_mode) 2459 trigger_on(&auxtrace_snapshot_trigger); 2460 if (rec->switch_output.enabled) 2461 trigger_on(&switch_output_trigger); 2462 } else { 2463 signal(SIGUSR2, SIG_IGN); 2464 } 2465 2466 perf_tool__init(tool, /*ordered_events=*/true); 2467 tool->sample = process_sample_event; 2468 tool->fork = perf_event__process_fork; 2469 tool->exit = perf_event__process_exit; 2470 tool->comm = perf_event__process_comm; 2471 tool->namespaces = perf_event__process_namespaces; 2472 tool->mmap = build_id__process_mmap; 2473 tool->mmap2 = build_id__process_mmap2; 2474 tool->itrace_start = process_timestamp_boundary; 2475 tool->aux = process_timestamp_boundary; 2476 tool->namespace_events = rec->opts.record_namespaces; 2477 tool->cgroup_events = rec->opts.record_cgroup; 2478 session = perf_session__new(data, tool); 2479 if (IS_ERR(session)) { 2480 pr_err("Perf session creation failed.\n"); 2481 return PTR_ERR(session); 2482 } 2483 env = perf_session__env(session); 2484 if (record__threads_enabled(rec)) { 2485 if (perf_data__is_pipe(&rec->data)) { 2486 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2487 return -1; 2488 } 2489 if (rec->opts.full_auxtrace) { 2490 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2491 return -1; 2492 } 2493 } 2494 2495 fd = perf_data__fd(data); 2496 rec->session = session; 2497 2498 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2499 pr_err("Compression initialization failed.\n"); 2500 return -1; 2501 } 2502 #ifdef HAVE_EVENTFD_SUPPORT 2503 done_fd = eventfd(0, EFD_NONBLOCK); 2504 if (done_fd < 0) { 2505 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2506 status = -1; 2507 goto out_delete_session; 2508 } 2509 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2510 if (err < 0) { 2511 pr_err("Failed to add wakeup eventfd to poll list\n"); 2512 status = err; 2513 goto out_delete_session; 2514 } 2515 #endif // HAVE_EVENTFD_SUPPORT 2516 2517 env->comp_type = PERF_COMP_ZSTD; 2518 env->comp_level = rec->opts.comp_level; 2519 2520 if (rec->opts.kcore && 2521 !record__kcore_readable(&session->machines.host)) { 2522 pr_err("ERROR: kcore is not readable.\n"); 2523 return -1; 2524 } 2525 2526 if (record__init_clock(rec)) 2527 return -1; 2528 2529 record__init_features(rec); 2530 2531 if (forks) { 2532 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2533 workload_exec_failed_signal); 2534 if (err < 0) { 2535 pr_err("Couldn't run the workload!\n"); 2536 status = err; 2537 goto out_delete_session; 2538 } 2539 } 2540 2541 /* 2542 * If we have just single event and are sending data 2543 * through pipe, we need to force the ids allocation, 2544 * because we synthesize event name through the pipe 2545 * and need the id for that. 2546 */ 2547 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2548 rec->opts.sample_id = true; 2549 2550 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2551 rec->timestamp_filename = false; 2552 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2553 } 2554 2555 /* 2556 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE 2557 * and hybrid_merge is false. 2558 */ 2559 evlist__uniquify_evsel_names(rec->evlist, &stat_config); 2560 2561 evlist__config(rec->evlist, opts, &callchain_param); 2562 2563 /* Debug message used by test scripts */ 2564 pr_debug3("perf record opening and mmapping events\n"); 2565 if (record__open(rec) != 0) { 2566 err = -1; 2567 goto out_free_threads; 2568 } 2569 /* Debug message used by test scripts */ 2570 pr_debug3("perf record done opening and mmapping events\n"); 2571 env->comp_mmap_len = session->evlist->core.mmap_len; 2572 2573 if (rec->opts.kcore) { 2574 err = record__kcore_copy(&session->machines.host, data); 2575 if (err) { 2576 pr_err("ERROR: Failed to copy kcore\n"); 2577 goto out_free_threads; 2578 } 2579 } 2580 2581 /* 2582 * Normally perf_session__new would do this, but it doesn't have the 2583 * evlist. 2584 */ 2585 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2586 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2587 rec->tool.ordered_events = false; 2588 } 2589 2590 if (evlist__nr_groups(rec->evlist) == 0) 2591 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2592 2593 if (data->is_pipe) { 2594 err = perf_header__write_pipe(fd); 2595 if (err < 0) 2596 goto out_free_threads; 2597 } else { 2598 err = perf_session__write_header(session, rec->evlist, fd, false); 2599 if (err < 0) 2600 goto out_free_threads; 2601 } 2602 2603 err = -1; 2604 if (!rec->no_buildid 2605 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2606 pr_err("Couldn't generate buildids. " 2607 "Use --no-buildid to profile anyway.\n"); 2608 goto out_free_threads; 2609 } 2610 2611 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2612 opts->no_bpf_event = true; 2613 2614 err = record__setup_sb_evlist(rec); 2615 if (err) 2616 goto out_free_threads; 2617 2618 err = record__synthesize(rec, false); 2619 if (err < 0) 2620 goto out_free_threads; 2621 2622 if (rec->realtime_prio) { 2623 struct sched_param param; 2624 2625 param.sched_priority = rec->realtime_prio; 2626 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2627 pr_err("Could not set realtime priority.\n"); 2628 err = -1; 2629 goto out_free_threads; 2630 } 2631 } 2632 2633 if (record__start_threads(rec)) 2634 goto out_free_threads; 2635 2636 /* 2637 * When perf is starting the traced process, all the events 2638 * (apart from group members) have enable_on_exec=1 set, 2639 * so don't spoil it by prematurely enabling them. 2640 */ 2641 if (!target__none(&opts->target) && !opts->target.initial_delay) 2642 evlist__enable(rec->evlist); 2643 2644 /* 2645 * offcpu-time does not call execve, so enable_on_exe wouldn't work 2646 * when recording a workload, do it manually 2647 */ 2648 if (rec->off_cpu) 2649 evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT); 2650 2651 /* 2652 * Let the child rip 2653 */ 2654 if (forks) { 2655 struct machine *machine = &session->machines.host; 2656 union perf_event *event; 2657 pid_t tgid; 2658 2659 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2660 if (event == NULL) { 2661 err = -ENOMEM; 2662 goto out_child; 2663 } 2664 2665 /* 2666 * Some H/W events are generated before COMM event 2667 * which is emitted during exec(), so perf script 2668 * cannot see a correct process name for those events. 2669 * Synthesize COMM event to prevent it. 2670 */ 2671 tgid = perf_event__synthesize_comm(tool, event, 2672 rec->evlist->workload.pid, 2673 process_synthesized_event, 2674 machine); 2675 free(event); 2676 2677 if (tgid == -1) 2678 goto out_child; 2679 2680 event = malloc(sizeof(event->namespaces) + 2681 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2682 machine->id_hdr_size); 2683 if (event == NULL) { 2684 err = -ENOMEM; 2685 goto out_child; 2686 } 2687 2688 /* 2689 * Synthesize NAMESPACES event for the command specified. 2690 */ 2691 perf_event__synthesize_namespaces(tool, event, 2692 rec->evlist->workload.pid, 2693 tgid, process_synthesized_event, 2694 machine); 2695 free(event); 2696 2697 evlist__start_workload(rec->evlist); 2698 } 2699 2700 if (opts->target.initial_delay) { 2701 pr_info(EVLIST_DISABLED_MSG); 2702 if (opts->target.initial_delay > 0) { 2703 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2704 evlist__enable(rec->evlist); 2705 pr_info(EVLIST_ENABLED_MSG); 2706 } 2707 } 2708 2709 err = event_enable_timer__start(rec->evlist->eet); 2710 if (err) 2711 goto out_child; 2712 2713 /* Debug message used by test scripts */ 2714 pr_debug3("perf record has started\n"); 2715 fflush(stderr); 2716 2717 trigger_ready(&auxtrace_snapshot_trigger); 2718 trigger_ready(&switch_output_trigger); 2719 perf_hooks__invoke_record_start(); 2720 2721 /* 2722 * Must write FINISHED_INIT so it will be seen after all other 2723 * synthesized user events, but before any regular events. 2724 */ 2725 err = write_finished_init(rec, false); 2726 if (err < 0) 2727 goto out_child; 2728 2729 for (;;) { 2730 unsigned long long hits = thread->samples; 2731 2732 /* 2733 * rec->evlist->bkw_mmap_state is possible to be 2734 * BKW_MMAP_EMPTY here: when done == true and 2735 * hits != rec->samples in previous round. 2736 * 2737 * evlist__toggle_bkw_mmap ensure we never 2738 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2739 */ 2740 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2741 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2742 2743 if (record__mmap_read_all(rec, false) < 0) { 2744 trigger_error(&auxtrace_snapshot_trigger); 2745 trigger_error(&switch_output_trigger); 2746 err = -1; 2747 goto out_child_no_flush; 2748 } 2749 2750 if (auxtrace_record__snapshot_started) { 2751 auxtrace_record__snapshot_started = 0; 2752 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2753 record__read_auxtrace_snapshot(rec, false); 2754 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2755 pr_err("AUX area tracing snapshot failed\n"); 2756 err = -1; 2757 goto out_child; 2758 } 2759 } 2760 2761 if (trigger_is_hit(&switch_output_trigger)) { 2762 /* 2763 * If switch_output_trigger is hit, the data in 2764 * overwritable ring buffer should have been collected, 2765 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2766 * 2767 * If SIGUSR2 raise after or during record__mmap_read_all(), 2768 * record__mmap_read_all() didn't collect data from 2769 * overwritable ring buffer. Read again. 2770 */ 2771 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2772 continue; 2773 trigger_ready(&switch_output_trigger); 2774 2775 /* 2776 * Reenable events in overwrite ring buffer after 2777 * record__mmap_read_all(): we should have collected 2778 * data from it. 2779 */ 2780 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2781 2782 if (!quiet) 2783 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2784 record__waking(rec)); 2785 thread->waking = 0; 2786 fd = record__switch_output(rec, false); 2787 if (fd < 0) { 2788 pr_err("Failed to switch to new file\n"); 2789 trigger_error(&switch_output_trigger); 2790 err = fd; 2791 goto out_child; 2792 } 2793 2794 /* re-arm the alarm */ 2795 if (rec->switch_output.time) 2796 alarm(rec->switch_output.time); 2797 } 2798 2799 if (hits == thread->samples) { 2800 if (done || draining) 2801 break; 2802 err = fdarray__poll(&thread->pollfd, -1); 2803 /* 2804 * Propagate error, only if there's any. Ignore positive 2805 * number of returned events and interrupt error. 2806 */ 2807 if (err > 0 || (err < 0 && errno == EINTR)) 2808 err = 0; 2809 thread->waking++; 2810 2811 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2812 record__thread_munmap_filtered, NULL) == 0) 2813 draining = true; 2814 2815 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2816 if (err) 2817 goto out_child; 2818 } 2819 2820 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2821 switch (cmd) { 2822 case EVLIST_CTL_CMD_SNAPSHOT: 2823 hit_auxtrace_snapshot_trigger(rec); 2824 evlist__ctlfd_ack(rec->evlist); 2825 break; 2826 case EVLIST_CTL_CMD_STOP: 2827 done = 1; 2828 break; 2829 case EVLIST_CTL_CMD_ACK: 2830 case EVLIST_CTL_CMD_UNSUPPORTED: 2831 case EVLIST_CTL_CMD_ENABLE: 2832 case EVLIST_CTL_CMD_DISABLE: 2833 case EVLIST_CTL_CMD_EVLIST: 2834 case EVLIST_CTL_CMD_PING: 2835 default: 2836 break; 2837 } 2838 } 2839 2840 err = event_enable_timer__process(rec->evlist->eet); 2841 if (err < 0) 2842 goto out_child; 2843 if (err) { 2844 err = 0; 2845 done = 1; 2846 } 2847 2848 /* 2849 * When perf is starting the traced process, at the end events 2850 * die with the process and we wait for that. Thus no need to 2851 * disable events in this case. 2852 */ 2853 if (done && !disabled && !target__none(&opts->target)) { 2854 trigger_off(&auxtrace_snapshot_trigger); 2855 evlist__disable(rec->evlist); 2856 disabled = true; 2857 } 2858 } 2859 2860 trigger_off(&auxtrace_snapshot_trigger); 2861 trigger_off(&switch_output_trigger); 2862 2863 record__synthesize_final_bpf_metadata(rec); 2864 2865 if (opts->auxtrace_snapshot_on_exit) 2866 record__auxtrace_snapshot_exit(rec); 2867 2868 if (forks && workload_exec_errno) { 2869 char msg[STRERR_BUFSIZE]; 2870 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2871 struct strbuf sb = STRBUF_INIT; 2872 2873 evlist__format_evsels(rec->evlist, &sb, 2048); 2874 2875 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2876 sb.buf, argv[0], emsg); 2877 strbuf_release(&sb); 2878 err = -1; 2879 goto out_child; 2880 } 2881 2882 if (!quiet) 2883 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2884 record__waking(rec)); 2885 2886 write_finished_init(rec, true); 2887 2888 if (target__none(&rec->opts.target)) 2889 record__synthesize_workload(rec, true); 2890 2891 out_child: 2892 record__stop_threads(rec); 2893 record__mmap_read_all(rec, true); 2894 goto out_free_threads; 2895 out_child_no_flush: 2896 /* mmap read already failed — retrying would just fail again */ 2897 record__stop_threads(rec); 2898 out_free_threads: 2899 record__free_thread_data(rec); 2900 evlist__finalize_ctlfd(rec->evlist); 2901 record__aio_mmap_read_sync(rec); 2902 2903 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2904 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2905 env->comp_ratio = ratio + 0.5; 2906 } 2907 2908 if (forks) { 2909 int exit_status; 2910 2911 if (!child_finished) 2912 kill(rec->evlist->workload.pid, SIGTERM); 2913 2914 wait(&exit_status); 2915 2916 if (err < 0) 2917 status = err; 2918 else if (WIFEXITED(exit_status)) 2919 status = WEXITSTATUS(exit_status); 2920 else if (WIFSIGNALED(exit_status)) 2921 signr = WTERMSIG(exit_status); 2922 } else 2923 status = err; 2924 2925 if (rec->off_cpu) 2926 rec->bytes_written += off_cpu_write(rec->session); 2927 2928 record__read_lost_samples(rec); 2929 /* this will be recalculated during process_buildids() */ 2930 rec->samples = 0; 2931 2932 if (!err) { 2933 record__synthesize(rec, true); 2934 if (!rec->timestamp_filename) { 2935 record__finish_output(rec); 2936 } else { 2937 fd = record__switch_output(rec, true); 2938 if (fd < 0) { 2939 status = fd; 2940 goto out_delete_session; 2941 } 2942 } 2943 } 2944 2945 perf_hooks__invoke_record_end(); 2946 2947 if (!err && !quiet) { 2948 char samples[128]; 2949 const char *postfix = rec->timestamp_filename ? 2950 ".<timestamp>" : ""; 2951 2952 if (rec->samples && !rec->opts.full_auxtrace) 2953 scnprintf(samples, sizeof(samples), 2954 " (%" PRIu64 " samples)", rec->samples); 2955 else 2956 samples[0] = '\0'; 2957 2958 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2959 perf_data__size(data) / 1024.0 / 1024.0, 2960 data->path, postfix, samples); 2961 if (ratio) { 2962 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2963 rec->session->bytes_transferred / 1024.0 / 1024.0, 2964 ratio); 2965 } 2966 fprintf(stderr, " ]\n"); 2967 } 2968 2969 out_delete_session: 2970 #ifdef HAVE_EVENTFD_SUPPORT 2971 if (done_fd >= 0) { 2972 fd = done_fd; 2973 done_fd = -1; 2974 2975 close(fd); 2976 } 2977 #endif 2978 zstd_fini(&session->zstd_data); 2979 if (!opts->no_bpf_event) 2980 evlist__stop_sb_thread(rec->sb_evlist); 2981 2982 perf_session__delete(session); 2983 return status; 2984 } 2985 2986 static int record_parse_callchain_opt(const struct option *opt, 2987 const char *arg, 2988 int unset) 2989 { 2990 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2991 } 2992 2993 static int record_callchain_opt(const struct option *opt, 2994 const char *arg __maybe_unused, 2995 int unset) 2996 { 2997 /* 2998 * The -g option only sets the callchain if not already configured by 2999 * .perfconfig. It does, however, enable it. 3000 */ 3001 if (callchain_param.record_mode != CALLCHAIN_NONE) { 3002 callchain_param.enabled = true; 3003 return 0; 3004 } 3005 3006 return record_opts__parse_callchain(opt->value, &callchain_param, 3007 EM_HOST != EM_S390 ? "fp" : "dwarf", 3008 unset); 3009 } 3010 3011 3012 static int perf_record_config(const char *var, const char *value, void *cb) 3013 { 3014 struct record *rec = cb; 3015 3016 if (!strcmp(var, "record.build-id")) { 3017 if (!strcmp(value, "cache")) 3018 rec->no_buildid_cache = false; 3019 else if (!strcmp(value, "no-cache")) 3020 rec->no_buildid_cache = true; 3021 else if (!strcmp(value, "skip")) 3022 rec->no_buildid = rec->no_buildid_cache = true; 3023 else if (!strcmp(value, "mmap")) 3024 rec->buildid_mmap = true; 3025 else if (!strcmp(value, "no-mmap")) 3026 rec->buildid_mmap = false; 3027 else 3028 return -1; 3029 return 0; 3030 } 3031 if (!strcmp(var, "record.call-graph")) { 3032 var = "call-graph.record-mode"; 3033 return perf_default_config(var, value, cb); 3034 } 3035 #ifdef HAVE_AIO_SUPPORT 3036 if (!strcmp(var, "record.aio")) { 3037 rec->opts.nr_cblocks = strtol(value, NULL, 0); 3038 if (!rec->opts.nr_cblocks) 3039 rec->opts.nr_cblocks = nr_cblocks_default; 3040 } 3041 #endif 3042 if (!strcmp(var, "record.debuginfod")) { 3043 rec->debuginfod.urls = strdup(value); 3044 if (!rec->debuginfod.urls) 3045 return -ENOMEM; 3046 rec->debuginfod.set = true; 3047 } 3048 3049 return 0; 3050 } 3051 3052 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 3053 { 3054 struct record *rec = (struct record *)opt->value; 3055 3056 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 3057 } 3058 3059 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3060 { 3061 struct record_opts *opts = (struct record_opts *)opt->value; 3062 3063 if (unset || !str) 3064 return 0; 3065 3066 if (!strcasecmp(str, "node")) 3067 opts->affinity = PERF_AFFINITY_NODE; 3068 else if (!strcasecmp(str, "cpu")) 3069 opts->affinity = PERF_AFFINITY_CPU; 3070 3071 return 0; 3072 } 3073 3074 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3075 { 3076 mask->nbits = nr_bits; 3077 mask->bits = bitmap_zalloc(mask->nbits); 3078 if (!mask->bits) 3079 return -ENOMEM; 3080 3081 return 0; 3082 } 3083 3084 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3085 { 3086 bitmap_free(mask->bits); 3087 mask->bits = NULL; 3088 mask->nbits = 0; 3089 } 3090 3091 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3092 { 3093 int ret; 3094 3095 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3096 if (ret) { 3097 mask->affinity.bits = NULL; 3098 return ret; 3099 } 3100 3101 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3102 if (ret) { 3103 record__mmap_cpu_mask_free(&mask->maps); 3104 mask->maps.bits = NULL; 3105 } 3106 3107 return ret; 3108 } 3109 3110 static void record__thread_mask_free(struct thread_mask *mask) 3111 { 3112 record__mmap_cpu_mask_free(&mask->maps); 3113 record__mmap_cpu_mask_free(&mask->affinity); 3114 } 3115 3116 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3117 { 3118 int s; 3119 struct record_opts *opts = opt->value; 3120 3121 if (unset || !str || !strlen(str)) { 3122 opts->threads_spec = THREAD_SPEC__CPU; 3123 } else { 3124 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3125 if (s == THREAD_SPEC__USER) { 3126 opts->threads_user_spec = strdup(str); 3127 if (!opts->threads_user_spec) 3128 return -ENOMEM; 3129 opts->threads_spec = THREAD_SPEC__USER; 3130 break; 3131 } 3132 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3133 opts->threads_spec = s; 3134 break; 3135 } 3136 } 3137 } 3138 3139 if (opts->threads_spec == THREAD_SPEC__USER) 3140 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3141 else 3142 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3143 3144 return 0; 3145 } 3146 3147 static int parse_output_max_size(const struct option *opt, 3148 const char *str, int unset) 3149 { 3150 unsigned long *s = (unsigned long *)opt->value; 3151 static struct parse_tag tags_size[] = { 3152 { .tag = 'B', .mult = 1 }, 3153 { .tag = 'K', .mult = 1 << 10 }, 3154 { .tag = 'M', .mult = 1 << 20 }, 3155 { .tag = 'G', .mult = 1 << 30 }, 3156 { .tag = 0 }, 3157 }; 3158 unsigned long val; 3159 3160 if (unset) { 3161 *s = 0; 3162 return 0; 3163 } 3164 3165 val = parse_tag_value(str, tags_size); 3166 if (val != (unsigned long) -1) { 3167 *s = val; 3168 return 0; 3169 } 3170 3171 return -1; 3172 } 3173 3174 static int record__parse_mmap_pages(const struct option *opt, 3175 const char *str, 3176 int unset __maybe_unused) 3177 { 3178 struct record_opts *opts = opt->value; 3179 char *s, *p; 3180 unsigned int mmap_pages; 3181 int ret; 3182 3183 if (!str) 3184 return -EINVAL; 3185 3186 s = strdup(str); 3187 if (!s) 3188 return -ENOMEM; 3189 3190 p = strchr(s, ','); 3191 if (p) 3192 *p = '\0'; 3193 3194 if (*s) { 3195 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3196 if (ret) 3197 goto out_free; 3198 opts->mmap_pages = mmap_pages; 3199 } 3200 3201 if (!p) { 3202 ret = 0; 3203 goto out_free; 3204 } 3205 3206 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3207 if (ret) 3208 goto out_free; 3209 3210 opts->auxtrace_mmap_pages = mmap_pages; 3211 3212 out_free: 3213 free(s); 3214 return ret; 3215 } 3216 3217 static int record__parse_off_cpu_thresh(const struct option *opt, 3218 const char *str, 3219 int unset __maybe_unused) 3220 { 3221 struct record_opts *opts = opt->value; 3222 char *endptr; 3223 u64 off_cpu_thresh_ms; 3224 3225 if (!str) 3226 return -EINVAL; 3227 3228 off_cpu_thresh_ms = strtoull(str, &endptr, 10); 3229 3230 /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */ 3231 if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0"))) 3232 return -EINVAL; 3233 else 3234 opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC; 3235 3236 return 0; 3237 } 3238 3239 static int parse_control_option(const struct option *opt, 3240 const char *str, 3241 int unset __maybe_unused) 3242 { 3243 struct record_opts *opts = opt->value; 3244 3245 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3246 } 3247 3248 static void switch_output_size_warn(struct record *rec) 3249 { 3250 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3251 struct switch_output *s = &rec->switch_output; 3252 3253 wakeup_size /= 2; 3254 3255 if (s->size < wakeup_size) { 3256 char buf[100]; 3257 3258 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3259 pr_warning("WARNING: switch-output data size lower than " 3260 "wakeup kernel buffer size (%s) " 3261 "expect bigger perf.data sizes\n", buf); 3262 } 3263 } 3264 3265 static int switch_output_setup(struct record *rec) 3266 { 3267 struct switch_output *s = &rec->switch_output; 3268 static struct parse_tag tags_size[] = { 3269 { .tag = 'B', .mult = 1 }, 3270 { .tag = 'K', .mult = 1 << 10 }, 3271 { .tag = 'M', .mult = 1 << 20 }, 3272 { .tag = 'G', .mult = 1 << 30 }, 3273 { .tag = 0 }, 3274 }; 3275 static struct parse_tag tags_time[] = { 3276 { .tag = 's', .mult = 1 }, 3277 { .tag = 'm', .mult = 60 }, 3278 { .tag = 'h', .mult = 60*60 }, 3279 { .tag = 'd', .mult = 60*60*24 }, 3280 { .tag = 0 }, 3281 }; 3282 unsigned long val; 3283 3284 /* 3285 * If we're using --switch-output-events, then we imply its 3286 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3287 * thread to its parent. 3288 */ 3289 if (rec->switch_output_event_set) { 3290 if (record__threads_enabled(rec)) { 3291 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3292 return 0; 3293 } 3294 goto do_signal; 3295 } 3296 3297 if (!s->set) 3298 return 0; 3299 3300 if (record__threads_enabled(rec)) { 3301 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3302 return 0; 3303 } 3304 3305 if (!strcmp(s->str, "signal")) { 3306 do_signal: 3307 s->signal = true; 3308 pr_debug("switch-output with SIGUSR2 signal\n"); 3309 goto enabled; 3310 } 3311 3312 val = parse_tag_value(s->str, tags_size); 3313 if (val != (unsigned long) -1) { 3314 s->size = val; 3315 pr_debug("switch-output with %s size threshold\n", s->str); 3316 goto enabled; 3317 } 3318 3319 val = parse_tag_value(s->str, tags_time); 3320 if (val != (unsigned long) -1) { 3321 s->time = val; 3322 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3323 s->str, s->time); 3324 goto enabled; 3325 } 3326 3327 return -1; 3328 3329 enabled: 3330 rec->timestamp_filename = true; 3331 s->enabled = true; 3332 3333 if (s->size && !rec->opts.no_buffering) 3334 switch_output_size_warn(rec); 3335 3336 return 0; 3337 } 3338 3339 static const char * const __record_usage[] = { 3340 "perf record [<options>] [<command>]", 3341 "perf record [<options>] -- <command> [<options>]", 3342 NULL 3343 }; 3344 const char * const *record_usage = __record_usage; 3345 3346 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3347 struct perf_sample *sample, struct machine *machine) 3348 { 3349 /* 3350 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3351 * no need to add them twice. 3352 */ 3353 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3354 return 0; 3355 return perf_event__process_mmap(tool, event, sample, machine); 3356 } 3357 3358 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3359 struct perf_sample *sample, struct machine *machine) 3360 { 3361 /* 3362 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3363 * no need to add them twice. 3364 */ 3365 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3366 return 0; 3367 3368 return perf_event__process_mmap2(tool, event, sample, machine); 3369 } 3370 3371 static int process_timestamp_boundary(const struct perf_tool *tool, 3372 union perf_event *event __maybe_unused, 3373 struct perf_sample *sample, 3374 struct machine *machine __maybe_unused) 3375 { 3376 struct record *rec = container_of(tool, struct record, tool); 3377 3378 set_timestamp_boundary(rec, sample->time); 3379 return 0; 3380 } 3381 3382 static int parse_record_synth_option(const struct option *opt, 3383 const char *str, 3384 int unset __maybe_unused) 3385 { 3386 struct record_opts *opts = opt->value; 3387 char *p = strdup(str); 3388 3389 if (p == NULL) 3390 return -1; 3391 3392 opts->synth = parse_synth_opt(p); 3393 free(p); 3394 3395 if (opts->synth < 0) { 3396 pr_err("Invalid synth option: %s\n", str); 3397 return -1; 3398 } 3399 return 0; 3400 } 3401 3402 /* 3403 * XXX Ideally would be local to cmd_record() and passed to a record__new 3404 * because we need to have access to it in record__exit, that is called 3405 * after cmd_record() exits, but since record_options need to be accessible to 3406 * builtin-script, leave it here. 3407 * 3408 * At least we don't ouch it in all the other functions here directly. 3409 * 3410 * Just say no to tons of global variables, sigh. 3411 */ 3412 static struct record record = { 3413 .opts = { 3414 .sample_time = true, 3415 .mmap_pages = UINT_MAX, 3416 .user_freq = UINT_MAX, 3417 .user_interval = ULLONG_MAX, 3418 .freq = 4000, 3419 .target = { 3420 .uses_mmap = true, 3421 .default_per_cpu = true, 3422 }, 3423 .mmap_flush = MMAP_FLUSH_DEFAULT, 3424 .nr_threads_synthesize = 1, 3425 .ctl_fd = -1, 3426 .ctl_fd_ack = -1, 3427 .synth = PERF_SYNTH_ALL, 3428 .off_cpu_thresh_ns = OFFCPU_THRESH, 3429 }, 3430 .buildid_mmap = true, 3431 }; 3432 3433 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3434 "\n\t\t\t\tDefault: fp"; 3435 3436 static bool dry_run; 3437 3438 static struct parse_events_option_args parse_events_option_args = { 3439 .evlistp = &record.evlist, 3440 }; 3441 3442 static struct parse_events_option_args switch_output_parse_events_option_args = { 3443 .evlistp = &record.sb_evlist, 3444 }; 3445 3446 /* 3447 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3448 * with it and switch to use the library functions in perf_evlist that came 3449 * from builtin-record.c, i.e. use record_opts, 3450 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3451 * using pipes, etc. 3452 */ 3453 static struct option __record_options[] = { 3454 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3455 "event selector. use 'perf list' to list available events", 3456 parse_events_option), 3457 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3458 "event filter", parse_filter), 3459 OPT_BOOLEAN(0, "latency", &record.latency, 3460 "Enable data collection for latency profiling.\n" 3461 "\t\t\t Use perf report --latency for latency-centric profile."), 3462 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3463 NULL, "don't record events from perf itself", 3464 exclude_perf), 3465 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3466 "record events on existing process id"), 3467 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3468 "record events on existing thread id"), 3469 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3470 "collect data with this RT SCHED_FIFO priority"), 3471 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3472 "collect data without buffering"), 3473 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3474 "collect raw sample records from all opened counters"), 3475 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3476 "system-wide collection from all CPUs"), 3477 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3478 "list of cpus to monitor"), 3479 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3480 OPT_STRING('o', "output", &record.data.path, "file", 3481 "output file name"), 3482 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3483 &record.opts.no_inherit_set, 3484 "child tasks do not inherit counters"), 3485 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3486 "synthesize non-sample events at the end of output"), 3487 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3488 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3489 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3490 "Fail if the specified frequency can't be used"), 3491 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3492 "profile at this frequency", 3493 record__parse_freq), 3494 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3495 "number of mmap data pages and AUX area tracing mmap pages", 3496 record__parse_mmap_pages), 3497 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3498 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3499 record__mmap_flush_parse), 3500 OPT_CALLBACK_NOOPT('g', NULL, &record.opts, 3501 NULL, "enables call-graph recording" , 3502 &record_callchain_opt), 3503 OPT_CALLBACK(0, "call-graph", &record.opts, 3504 "record_mode[,record_size]", record_callchain_help, 3505 &record_parse_callchain_opt), 3506 OPT_INCR('v', "verbose", &verbose, 3507 "be more verbose (show counter open errors, etc)"), 3508 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3509 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3510 "per thread counts"), 3511 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3512 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3513 "Record the sample physical addresses"), 3514 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3515 "Record the sampled data address data page size"), 3516 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3517 "Record the sampled code address (ip) page size"), 3518 OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, 3519 "Record the data source for memory operations"), 3520 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3521 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3522 "Record the sample identifier"), 3523 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3524 &record.opts.sample_time_set, 3525 "Record the sample timestamps"), 3526 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3527 "Record the sample period"), 3528 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3529 "don't sample"), 3530 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3531 &record.no_buildid_cache_set, 3532 "do not update the buildid cache"), 3533 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3534 &record.no_buildid_set, 3535 "do not collect buildids in perf.data"), 3536 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3537 "monitor event in cgroup name only", 3538 parse_cgroups), 3539 OPT_CALLBACK('D', "delay", &record, "ms", 3540 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3541 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3542 record__parse_event_enable_time), 3543 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3544 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), 3545 3546 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3547 "branch any", "sample any taken branches", 3548 parse_branch_stack), 3549 3550 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3551 "branch filter mask", "branch stack filter modes", 3552 parse_branch_stack), 3553 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3554 "sample by weight (on special events only)"), 3555 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3556 "sample transaction flags (special events only)"), 3557 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3558 "use per-thread mmaps"), 3559 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3560 "sample selected machine registers on interrupt," 3561 " use '-I?' to list register names", parse_intr_regs), 3562 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3563 "sample selected machine registers in user space," 3564 " use '--user-regs=?' to list register names", parse_user_regs), 3565 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3566 "Record running/enabled time of read (:S) events"), 3567 OPT_CALLBACK('k', "clockid", &record.opts, 3568 "clockid", "clockid to use for events, see clock_gettime()", 3569 parse_clockid), 3570 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3571 "opts", "AUX area tracing Snapshot Mode", ""), 3572 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3573 "opts", "sample AUX area", ""), 3574 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3575 "per thread proc mmap processing timeout in ms"), 3576 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3577 "Record namespaces events"), 3578 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3579 "Record cgroup events"), 3580 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3581 &record.opts.record_switch_events_set, 3582 "Record context switch events"), 3583 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3584 "Configure all used events to run in kernel space.", 3585 PARSE_OPT_EXCLUSIVE), 3586 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3587 "Configure all used events to run in user space.", 3588 PARSE_OPT_EXCLUSIVE), 3589 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3590 "collect kernel callchains"), 3591 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3592 "collect user callchains"), 3593 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3594 "file", "vmlinux pathname"), 3595 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3596 "Record build-id of all DSOs regardless of hits"), 3597 OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set, 3598 "Record build-id in mmap events and skip build-id processing."), 3599 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3600 "append timestamp to output filename"), 3601 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3602 "Record timestamp boundary (time of first/last samples)"), 3603 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3604 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3605 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3606 "signal"), 3607 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3608 &record.switch_output_event_set, "switch output event", 3609 "switch output event selector. use 'perf list' to list available events", 3610 parse_events_option_new_evlist), 3611 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3612 "Limit number of switch output generated files"), 3613 OPT_BOOLEAN(0, "dry-run", &dry_run, 3614 "Parse options then exit"), 3615 #ifdef HAVE_AIO_SUPPORT 3616 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3617 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3618 record__aio_parse), 3619 #endif 3620 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3621 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3622 record__parse_affinity), 3623 #ifdef HAVE_ZSTD_SUPPORT 3624 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3625 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3626 record__parse_comp_level), 3627 #endif 3628 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3629 "size", "Limit the maximum size of the output file", parse_output_max_size), 3630 OPT_UINTEGER(0, "num-thread-synthesize", 3631 &record.opts.nr_threads_synthesize, 3632 "number of threads to run for event synthesis"), 3633 #ifdef HAVE_LIBPFM 3634 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3635 "libpfm4 event selector. use 'perf list' to list available events", 3636 parse_libpfm_events_option), 3637 #endif 3638 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3639 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3640 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3641 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3642 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3643 parse_control_option), 3644 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3645 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3646 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3647 &record.debuginfod.set, "debuginfod urls", 3648 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3649 "system"), 3650 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3651 "write collected trace data into several data files using parallel threads", 3652 record__parse_threads), 3653 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3654 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3655 "BPF filter action"), 3656 OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", 3657 "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", 3658 record__parse_off_cpu_thresh), 3659 OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap, 3660 &record.opts.record_data_mmap_set, 3661 "Record mmap events for non-executable mappings"), 3662 OPT_END() 3663 }; 3664 3665 struct option *record_options = __record_options; 3666 3667 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3668 { 3669 struct perf_cpu cpu; 3670 unsigned int idx; 3671 3672 if (cpu_map__is_dummy(cpus)) 3673 return 0; 3674 3675 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3676 /* Return ENODEV is input cpu is greater than max cpu */ 3677 if ((unsigned long)cpu.cpu > mask->nbits) 3678 return -ENODEV; 3679 __set_bit(cpu.cpu, mask->bits); 3680 } 3681 3682 return 0; 3683 } 3684 3685 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3686 { 3687 struct perf_cpu_map *cpus; 3688 3689 cpus = perf_cpu_map__new(mask_spec); 3690 if (!cpus) 3691 return -ENOMEM; 3692 3693 bitmap_zero(mask->bits, mask->nbits); 3694 if (record__mmap_cpu_mask_init(mask, cpus)) 3695 return -ENODEV; 3696 3697 perf_cpu_map__put(cpus); 3698 3699 return 0; 3700 } 3701 3702 static void record__free_thread_masks(struct record *rec, int nr_threads) 3703 { 3704 int t; 3705 3706 if (rec->thread_masks) 3707 for (t = 0; t < nr_threads; t++) 3708 record__thread_mask_free(&rec->thread_masks[t]); 3709 3710 zfree(&rec->thread_masks); 3711 } 3712 3713 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3714 { 3715 int t, ret; 3716 3717 rec->thread_masks = calloc(nr_threads, sizeof(*(rec->thread_masks))); 3718 if (!rec->thread_masks) { 3719 pr_err("Failed to allocate thread masks\n"); 3720 return -ENOMEM; 3721 } 3722 3723 for (t = 0; t < nr_threads; t++) { 3724 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3725 if (ret) { 3726 pr_err("Failed to allocate thread masks[%d]\n", t); 3727 goto out_free; 3728 } 3729 } 3730 3731 return 0; 3732 3733 out_free: 3734 record__free_thread_masks(rec, nr_threads); 3735 3736 return ret; 3737 } 3738 3739 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3740 { 3741 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3742 3743 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3744 if (ret) 3745 return ret; 3746 3747 rec->nr_threads = nr_cpus; 3748 pr_debug("nr_threads: %d\n", rec->nr_threads); 3749 3750 for (t = 0; t < rec->nr_threads; t++) { 3751 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3752 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3753 if (verbose > 0) { 3754 pr_debug("thread_masks[%d]: ", t); 3755 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3756 pr_debug("thread_masks[%d]: ", t); 3757 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3758 } 3759 } 3760 3761 return 0; 3762 } 3763 3764 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3765 const char **maps_spec, const char **affinity_spec, 3766 u32 nr_spec) 3767 { 3768 u32 s; 3769 int ret = 0, t = 0; 3770 struct mmap_cpu_mask cpus_mask; 3771 struct thread_mask thread_mask, full_mask, *thread_masks; 3772 3773 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3774 if (ret) { 3775 pr_err("Failed to allocate CPUs mask\n"); 3776 return ret; 3777 } 3778 3779 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3780 if (ret) { 3781 pr_err("Failed to init cpu mask\n"); 3782 goto out_free_cpu_mask; 3783 } 3784 3785 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3786 if (ret) { 3787 pr_err("Failed to allocate full mask\n"); 3788 goto out_free_cpu_mask; 3789 } 3790 3791 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3792 if (ret) { 3793 pr_err("Failed to allocate thread mask\n"); 3794 goto out_free_full_and_cpu_masks; 3795 } 3796 3797 for (s = 0; s < nr_spec; s++) { 3798 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3799 if (ret) { 3800 pr_err("Failed to initialize maps thread mask\n"); 3801 goto out_free; 3802 } 3803 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3804 if (ret) { 3805 pr_err("Failed to initialize affinity thread mask\n"); 3806 goto out_free; 3807 } 3808 3809 /* ignore invalid CPUs but do not allow empty masks */ 3810 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3811 cpus_mask.bits, thread_mask.maps.nbits)) { 3812 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3813 ret = -EINVAL; 3814 goto out_free; 3815 } 3816 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3817 cpus_mask.bits, thread_mask.affinity.nbits)) { 3818 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3819 ret = -EINVAL; 3820 goto out_free; 3821 } 3822 3823 /* do not allow intersection with other masks (full_mask) */ 3824 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3825 thread_mask.maps.nbits)) { 3826 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3827 ret = -EINVAL; 3828 goto out_free; 3829 } 3830 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3831 thread_mask.affinity.nbits)) { 3832 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3833 ret = -EINVAL; 3834 goto out_free; 3835 } 3836 3837 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3838 thread_mask.maps.bits, full_mask.maps.nbits); 3839 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3840 thread_mask.affinity.bits, full_mask.maps.nbits); 3841 3842 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3843 if (!thread_masks) { 3844 pr_err("Failed to reallocate thread masks\n"); 3845 ret = -ENOMEM; 3846 goto out_free; 3847 } 3848 rec->thread_masks = thread_masks; 3849 rec->thread_masks[t] = thread_mask; 3850 if (verbose > 0) { 3851 pr_debug("thread_masks[%d]: ", t); 3852 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3853 pr_debug("thread_masks[%d]: ", t); 3854 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3855 } 3856 t++; 3857 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3858 if (ret) { 3859 pr_err("Failed to allocate thread mask\n"); 3860 goto out_free_full_and_cpu_masks; 3861 } 3862 } 3863 rec->nr_threads = t; 3864 pr_debug("nr_threads: %d\n", rec->nr_threads); 3865 if (!rec->nr_threads) 3866 ret = -EINVAL; 3867 3868 out_free: 3869 record__thread_mask_free(&thread_mask); 3870 out_free_full_and_cpu_masks: 3871 record__thread_mask_free(&full_mask); 3872 out_free_cpu_mask: 3873 record__mmap_cpu_mask_free(&cpus_mask); 3874 3875 return ret; 3876 } 3877 3878 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3879 { 3880 int ret; 3881 struct cpu_topology *topo; 3882 3883 topo = cpu_topology__new(); 3884 if (!topo) { 3885 pr_err("Failed to allocate CPU topology\n"); 3886 return -ENOMEM; 3887 } 3888 3889 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3890 topo->core_cpus_list, topo->core_cpus_lists); 3891 cpu_topology__delete(topo); 3892 3893 return ret; 3894 } 3895 3896 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3897 { 3898 int ret; 3899 struct cpu_topology *topo; 3900 3901 topo = cpu_topology__new(); 3902 if (!topo) { 3903 pr_err("Failed to allocate CPU topology\n"); 3904 return -ENOMEM; 3905 } 3906 3907 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3908 topo->package_cpus_list, topo->package_cpus_lists); 3909 cpu_topology__delete(topo); 3910 3911 return ret; 3912 } 3913 3914 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3915 { 3916 u32 s; 3917 int ret; 3918 const char **spec; 3919 struct numa_topology *topo; 3920 3921 topo = numa_topology__new(); 3922 if (!topo) { 3923 pr_err("Failed to allocate NUMA topology\n"); 3924 return -ENOMEM; 3925 } 3926 3927 spec = calloc(topo->nr, sizeof(char *)); 3928 if (!spec) { 3929 pr_err("Failed to allocate NUMA spec\n"); 3930 ret = -ENOMEM; 3931 goto out_delete_topo; 3932 } 3933 for (s = 0; s < topo->nr; s++) 3934 spec[s] = topo->nodes[s].cpus; 3935 3936 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3937 3938 zfree(&spec); 3939 3940 out_delete_topo: 3941 numa_topology__delete(topo); 3942 3943 return ret; 3944 } 3945 3946 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3947 { 3948 int t, ret; 3949 u32 s, nr_spec = 0; 3950 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3951 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3952 3953 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3954 spec = strtok_r(user_spec, ":", &spec_ptr); 3955 if (spec == NULL) 3956 break; 3957 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3958 mask = strtok_r(spec, "/", &mask_ptr); 3959 if (mask == NULL) 3960 break; 3961 pr_debug2(" maps mask: %s\n", mask); 3962 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3963 if (!tmp_spec) { 3964 pr_err("Failed to reallocate maps spec\n"); 3965 ret = -ENOMEM; 3966 goto out_free; 3967 } 3968 maps_spec = tmp_spec; 3969 maps_spec[nr_spec] = dup_mask = strdup(mask); 3970 if (!maps_spec[nr_spec]) { 3971 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3972 ret = -ENOMEM; 3973 goto out_free; 3974 } 3975 mask = strtok_r(NULL, "/", &mask_ptr); 3976 if (mask == NULL) { 3977 pr_err("Invalid thread maps or affinity specs\n"); 3978 ret = -EINVAL; 3979 goto out_free; 3980 } 3981 pr_debug2(" affinity mask: %s\n", mask); 3982 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3983 if (!tmp_spec) { 3984 pr_err("Failed to reallocate affinity spec\n"); 3985 ret = -ENOMEM; 3986 goto out_free; 3987 } 3988 affinity_spec = tmp_spec; 3989 affinity_spec[nr_spec] = strdup(mask); 3990 if (!affinity_spec[nr_spec]) { 3991 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3992 ret = -ENOMEM; 3993 goto out_free; 3994 } 3995 dup_mask = NULL; 3996 nr_spec++; 3997 } 3998 3999 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 4000 (const char **)affinity_spec, nr_spec); 4001 4002 out_free: 4003 free(dup_mask); 4004 for (s = 0; s < nr_spec; s++) { 4005 if (maps_spec) 4006 free(maps_spec[s]); 4007 if (affinity_spec) 4008 free(affinity_spec[s]); 4009 } 4010 free(affinity_spec); 4011 free(maps_spec); 4012 4013 return ret; 4014 } 4015 4016 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 4017 { 4018 int ret; 4019 4020 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 4021 if (ret) 4022 return ret; 4023 4024 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 4025 return -ENODEV; 4026 4027 rec->nr_threads = 1; 4028 4029 return 0; 4030 } 4031 4032 static int record__init_thread_masks(struct record *rec) 4033 { 4034 int ret = 0; 4035 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 4036 4037 if (!record__threads_enabled(rec)) 4038 return record__init_thread_default_masks(rec, cpus); 4039 4040 if (evlist__per_thread(rec->evlist)) { 4041 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 4042 return -EINVAL; 4043 } 4044 4045 switch (rec->opts.threads_spec) { 4046 case THREAD_SPEC__CPU: 4047 ret = record__init_thread_cpu_masks(rec, cpus); 4048 break; 4049 case THREAD_SPEC__CORE: 4050 ret = record__init_thread_core_masks(rec, cpus); 4051 break; 4052 case THREAD_SPEC__PACKAGE: 4053 ret = record__init_thread_package_masks(rec, cpus); 4054 break; 4055 case THREAD_SPEC__NUMA: 4056 ret = record__init_thread_numa_masks(rec, cpus); 4057 break; 4058 case THREAD_SPEC__USER: 4059 ret = record__init_thread_user_masks(rec, cpus); 4060 break; 4061 default: 4062 break; 4063 } 4064 4065 return ret; 4066 } 4067 4068 int cmd_record(int argc, const char **argv) 4069 { 4070 int err; 4071 struct record *rec = &record; 4072 char errbuf[BUFSIZ]; 4073 4074 setlocale(LC_ALL, ""); 4075 4076 #ifndef HAVE_BPF_SKEL 4077 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 4078 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 4079 # undef set_nobuild 4080 #endif 4081 4082 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 4083 symbol_conf.lazy_load_kernel_maps = true; 4084 rec->opts.affinity = PERF_AFFINITY_SYS; 4085 4086 rec->evlist = evlist__new(); 4087 if (rec->evlist == NULL) 4088 return -ENOMEM; 4089 4090 err = perf_config(perf_record_config, rec); 4091 if (err) 4092 return err; 4093 4094 argc = parse_options(argc, argv, record_options, record_usage, 4095 PARSE_OPT_STOP_AT_NON_OPTION); 4096 if (quiet) 4097 perf_quiet_option(); 4098 4099 err = symbol__validate_sym_arguments(); 4100 if (err) 4101 return err; 4102 4103 perf_debuginfod_setup(&record.debuginfod); 4104 4105 /* 4106 * Use system wide (-a) for the default target (ie. when no 4107 * workload). User ID filtering also implies system-wide. 4108 */ 4109 if ((!argc && target__none(&rec->opts.target)) || rec->uid_str) 4110 rec->opts.target.system_wide = true; 4111 4112 if (nr_cgroups && !rec->opts.target.system_wide) { 4113 usage_with_options_msg(record_usage, record_options, 4114 "cgroup monitoring only available in system-wide mode"); 4115 4116 } 4117 4118 if (record.latency) { 4119 /* 4120 * There is no fundamental reason why latency profiling 4121 * can't work for system-wide mode, but exact semantics 4122 * and details are to be defined. 4123 * See the following thread for details: 4124 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4125 */ 4126 if (record.opts.target.system_wide) { 4127 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4128 err = -EINVAL; 4129 goto out_opts; 4130 } 4131 record.opts.record_switch_events = true; 4132 } 4133 4134 if (rec->buildid_mmap && !perf_can_record_build_id()) { 4135 pr_warning("Missing support for build id in kernel mmap events.\n" 4136 "Disable this warning with --no-buildid-mmap\n"); 4137 rec->buildid_mmap = false; 4138 } 4139 4140 if (rec->buildid_mmap) { 4141 /* Enable perf_event_attr::build_id bit. */ 4142 rec->opts.build_id = true; 4143 /* Disable build-ID table in the header. */ 4144 rec->no_buildid = true; 4145 } else { 4146 pr_debug("Disabling build id in synthesized mmap2 events.\n"); 4147 symbol_conf.no_buildid_mmap2 = true; 4148 } 4149 4150 if (rec->no_buildid_set && rec->no_buildid) { 4151 /* -B implies -N for historic reasons. */ 4152 rec->no_buildid_cache = true; 4153 } 4154 4155 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4156 pr_err("Kernel has no cgroup sampling support.\n"); 4157 err = -EINVAL; 4158 goto out_opts; 4159 } 4160 4161 if (rec->opts.kcore) 4162 rec->opts.text_poke = true; 4163 4164 if (rec->opts.kcore || record__threads_enabled(rec)) 4165 rec->data.is_dir = true; 4166 4167 if (record__threads_enabled(rec)) { 4168 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4169 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4170 goto out_opts; 4171 } 4172 if (record__aio_enabled(rec)) { 4173 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4174 goto out_opts; 4175 } 4176 } 4177 4178 if (rec->opts.comp_level != 0) { 4179 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4180 rec->no_buildid = true; 4181 } 4182 4183 if (rec->opts.record_switch_events && 4184 !perf_can_record_switch_events()) { 4185 ui__error("kernel does not support recording context switch events\n"); 4186 parse_options_usage(record_usage, record_options, "switch-events", 0); 4187 err = -EINVAL; 4188 goto out_opts; 4189 } 4190 4191 if (switch_output_setup(rec)) { 4192 parse_options_usage(record_usage, record_options, "switch-output", 0); 4193 err = -EINVAL; 4194 goto out_opts; 4195 } 4196 4197 if (rec->switch_output.time) { 4198 signal(SIGALRM, alarm_sig_handler); 4199 alarm(rec->switch_output.time); 4200 } 4201 4202 if (rec->switch_output.num_files) { 4203 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4204 sizeof(char *)); 4205 if (!rec->switch_output.filenames) { 4206 err = -EINVAL; 4207 goto out_opts; 4208 } 4209 } 4210 4211 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4212 rec->timestamp_filename = false; 4213 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4214 } 4215 4216 if (rec->filter_action) { 4217 if (!strcmp(rec->filter_action, "pin")) 4218 err = perf_bpf_filter__pin(); 4219 else if (!strcmp(rec->filter_action, "unpin")) 4220 err = perf_bpf_filter__unpin(); 4221 else { 4222 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4223 err = -EINVAL; 4224 } 4225 goto out_opts; 4226 } 4227 4228 /* For backward compatibility, -d implies --mem-info and --data-mmap */ 4229 if (rec->opts.sample_address) { 4230 rec->opts.sample_data_src = true; 4231 if (!rec->opts.record_data_mmap_set) 4232 rec->opts.record_data_mmap = true; 4233 } 4234 4235 /* 4236 * Allow aliases to facilitate the lookup of symbols for address 4237 * filters. Refer to auxtrace_parse_filters(). 4238 */ 4239 symbol_conf.allow_aliases = true; 4240 4241 symbol__init(NULL); 4242 4243 err = record__auxtrace_init(rec); 4244 if (err) 4245 goto out; 4246 4247 if (dry_run) 4248 goto out; 4249 4250 err = -ENOMEM; 4251 4252 if (rec->no_buildid_cache) { 4253 disable_buildid_cache(); 4254 } else if (rec->switch_output.enabled) { 4255 /* 4256 * In 'perf record --switch-output', disable buildid 4257 * generation by default to reduce data file switching 4258 * overhead. Still generate buildid if they are required 4259 * explicitly using 4260 * 4261 * perf record --switch-output --no-no-buildid \ 4262 * --no-no-buildid-cache 4263 * 4264 * Following code equals to: 4265 * 4266 * if ((rec->no_buildid || !rec->no_buildid_set) && 4267 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4268 * disable_buildid_cache(); 4269 */ 4270 bool disable = true; 4271 4272 if (rec->no_buildid_set && !rec->no_buildid) 4273 disable = false; 4274 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4275 disable = false; 4276 if (disable) { 4277 rec->no_buildid = true; 4278 rec->no_buildid_cache = true; 4279 disable_buildid_cache(); 4280 } 4281 } 4282 4283 if (record.opts.overwrite) 4284 record.opts.tail_synthesize = true; 4285 4286 if (rec->evlist->core.nr_entries == 0) { 4287 struct evlist *def_evlist = evlist__new_default(&rec->opts.target, 4288 callchain_param.enabled); 4289 4290 if (!def_evlist) 4291 goto out; 4292 4293 evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries); 4294 evlist__delete(def_evlist); 4295 } 4296 4297 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4298 rec->opts.no_inherit = true; 4299 4300 err = target__validate(&rec->opts.target); 4301 if (err) { 4302 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4303 ui__warning("%s\n", errbuf); 4304 } 4305 4306 if (rec->uid_str) { 4307 uid_t uid = parse_uid(rec->uid_str); 4308 4309 if (uid == UINT_MAX) { 4310 ui__error("Invalid User: %s", rec->uid_str); 4311 err = -EINVAL; 4312 goto out; 4313 } 4314 err = parse_uid_filter(rec->evlist, uid); 4315 if (err) 4316 goto out; 4317 } 4318 4319 /* Enable ignoring missing threads when -p option is defined. */ 4320 rec->opts.ignore_missing_thread = rec->opts.target.pid; 4321 4322 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4323 4324 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) { 4325 if (EM_HOST == EM_AARCH64) 4326 add_leaf_frame_caller_opts_aarch64(&rec->opts); 4327 } 4328 4329 err = -ENOMEM; 4330 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4331 if (rec->opts.target.pid != NULL) { 4332 pr_err("Couldn't create thread/CPU maps: %s\n", 4333 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4334 goto out; 4335 } 4336 else 4337 usage_with_options(record_usage, record_options); 4338 } 4339 4340 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4341 if (err) 4342 goto out; 4343 4344 /* 4345 * We take all buildids when the file contains 4346 * AUX area tracing data because we do not decode the 4347 * trace because it would take too long. 4348 */ 4349 if (rec->opts.full_auxtrace) 4350 rec->buildid_all = true; 4351 4352 if (rec->opts.text_poke) { 4353 err = record__config_text_poke(rec->evlist); 4354 if (err) { 4355 pr_err("record__config_text_poke failed, error %d\n", err); 4356 goto out; 4357 } 4358 } 4359 4360 if (rec->off_cpu) { 4361 err = record__config_off_cpu(rec); 4362 if (err) { 4363 pr_err("record__config_off_cpu failed, error %d\n", err); 4364 goto out; 4365 } 4366 } 4367 4368 if (record_opts__config(&rec->opts)) { 4369 err = -EINVAL; 4370 goto out; 4371 } 4372 4373 err = record__config_tracking_events(rec); 4374 if (err) { 4375 pr_err("record__config_tracking_events failed, error %d\n", err); 4376 goto out; 4377 } 4378 4379 err = record__init_thread_masks(rec); 4380 if (err) { 4381 pr_err("Failed to initialize parallel data streaming masks\n"); 4382 goto out; 4383 } 4384 4385 if (rec->opts.nr_cblocks > nr_cblocks_max) 4386 rec->opts.nr_cblocks = nr_cblocks_max; 4387 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4388 4389 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4390 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4391 4392 if (rec->opts.comp_level > comp_level_max) 4393 rec->opts.comp_level = comp_level_max; 4394 pr_debug("comp level: %d\n", rec->opts.comp_level); 4395 4396 err = __cmd_record(&record, argc, argv); 4397 out: 4398 record__free_thread_masks(rec, rec->nr_threads); 4399 rec->nr_threads = 0; 4400 symbol__exit(); 4401 auxtrace_record__free(rec->itr); 4402 out_opts: 4403 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4404 evlist__delete(rec->evlist); 4405 return err; 4406 } 4407 4408 static void snapshot_sig_handler(int sig __maybe_unused) 4409 { 4410 struct record *rec = &record; 4411 4412 hit_auxtrace_snapshot_trigger(rec); 4413 4414 if (switch_output_signal(rec)) 4415 trigger_hit(&switch_output_trigger); 4416 } 4417 4418 static void alarm_sig_handler(int sig __maybe_unused) 4419 { 4420 struct record *rec = &record; 4421 4422 if (switch_output_time(rec)) 4423 trigger_hit(&switch_output_trigger); 4424 } 4425