1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/stat.h" 30 #include "util/symbol.h" 31 #include "util/record.h" 32 #include "util/cpumap.h" 33 #include "util/thread_map.h" 34 #include "util/data.h" 35 #include "util/perf_regs.h" 36 #include "util/auxtrace.h" 37 #include "util/tsc.h" 38 #include "util/parse-branch-options.h" 39 #include "util/parse-regs-options.h" 40 #include "util/perf_api_probe.h" 41 #include "util/trigger.h" 42 #include "util/perf-hooks.h" 43 #include "util/cpu-set-sched.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/pmu.h" 51 #include "util/pmus.h" 52 #include "util/clockid.h" 53 #include "util/off_cpu.h" 54 #include "util/bpf-filter.h" 55 #include "util/strbuf.h" 56 #include "asm/bug.h" 57 #include "perf.h" 58 #include "cputopo.h" 59 60 #include <errno.h> 61 #include <inttypes.h> 62 #include <locale.h> 63 #include <poll.h> 64 #include <pthread.h> 65 #include <unistd.h> 66 #ifndef HAVE_GETTID 67 #include <syscall.h> 68 #endif 69 #include <sched.h> 70 #include <signal.h> 71 #ifdef HAVE_EVENTFD_SUPPORT 72 #include <sys/eventfd.h> 73 #endif 74 #include <sys/mman.h> 75 #include <sys/wait.h> 76 #include <sys/types.h> 77 #include <sys/stat.h> 78 #include <fcntl.h> 79 #include <linux/err.h> 80 #include <linux/string.h> 81 #include <linux/time64.h> 82 #include <linux/zalloc.h> 83 #include <linux/bitmap.h> 84 #include <sys/time.h> 85 86 struct switch_output { 87 bool enabled; 88 bool signal; 89 unsigned long size; 90 unsigned long time; 91 const char *str; 92 bool set; 93 char **filenames; 94 int num_files; 95 int cur_file; 96 }; 97 98 struct thread_mask { 99 struct mmap_cpu_mask maps; 100 struct mmap_cpu_mask affinity; 101 }; 102 103 struct record_thread { 104 pid_t tid; 105 struct thread_mask *mask; 106 struct { 107 int msg[2]; 108 int ack[2]; 109 } pipes; 110 struct fdarray pollfd; 111 int ctlfd_pos; 112 int nr_mmaps; 113 struct mmap **maps; 114 struct mmap **overwrite_maps; 115 struct record *rec; 116 unsigned long long samples; 117 unsigned long waking; 118 u64 bytes_written; 119 u64 bytes_transferred; 120 u64 bytes_compressed; 121 }; 122 123 static __thread struct record_thread *thread; 124 125 enum thread_msg { 126 THREAD_MSG__UNDEFINED = 0, 127 THREAD_MSG__READY, 128 THREAD_MSG__MAX, 129 }; 130 131 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 132 "UNDEFINED", "READY" 133 }; 134 135 enum thread_spec { 136 THREAD_SPEC__UNDEFINED = 0, 137 THREAD_SPEC__CPU, 138 THREAD_SPEC__CORE, 139 THREAD_SPEC__PACKAGE, 140 THREAD_SPEC__NUMA, 141 THREAD_SPEC__USER, 142 THREAD_SPEC__MAX, 143 }; 144 145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 146 "undefined", "cpu", "core", "package", "numa", "user" 147 }; 148 149 struct pollfd_index_map { 150 int evlist_pollfd_index; 151 int thread_pollfd_index; 152 }; 153 154 struct record { 155 struct perf_tool tool; 156 struct record_opts opts; 157 u64 bytes_written; 158 u64 thread_bytes_written; 159 struct perf_data data; 160 struct auxtrace_record *itr; 161 struct evlist *evlist; 162 struct perf_session *session; 163 struct evlist *sb_evlist; 164 pthread_t thread_id; 165 int realtime_prio; 166 bool latency; 167 bool switch_output_event_set; 168 bool no_buildid; 169 bool no_buildid_set; 170 bool no_buildid_cache; 171 bool no_buildid_cache_set; 172 bool buildid_all; 173 bool buildid_mmap; 174 bool buildid_mmap_set; 175 bool timestamp_filename; 176 bool timestamp_boundary; 177 bool off_cpu; 178 const char *filter_action; 179 const char *uid_str; 180 struct switch_output switch_output; 181 unsigned long long samples; 182 unsigned long output_max_size; /* = 0: unlimited */ 183 struct perf_debuginfod debuginfod; 184 int nr_threads; 185 struct thread_mask *thread_masks; 186 struct record_thread *thread_data; 187 struct pollfd_index_map *index_map; 188 size_t index_map_sz; 189 size_t index_map_cnt; 190 }; 191 192 static volatile int done; 193 194 static volatile int auxtrace_record__snapshot_started; 195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 196 static DEFINE_TRIGGER(switch_output_trigger); 197 198 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 199 "SYS", "NODE", "CPU" 200 }; 201 202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 203 struct perf_sample *sample, struct machine *machine); 204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 205 struct perf_sample *sample, struct machine *machine); 206 static int process_timestamp_boundary(const struct perf_tool *tool, 207 union perf_event *event, 208 struct perf_sample *sample, 209 struct machine *machine); 210 211 #ifndef HAVE_GETTID 212 static inline pid_t gettid(void) 213 { 214 return (pid_t)syscall(__NR_gettid); 215 } 216 #endif 217 218 static int record__threads_enabled(struct record *rec) 219 { 220 return rec->opts.threads_spec; 221 } 222 223 static bool switch_output_signal(struct record *rec) 224 { 225 return rec->switch_output.signal && 226 trigger_is_ready(&switch_output_trigger); 227 } 228 229 static bool switch_output_size(struct record *rec) 230 { 231 return rec->switch_output.size && 232 trigger_is_ready(&switch_output_trigger) && 233 (rec->bytes_written >= rec->switch_output.size); 234 } 235 236 static bool switch_output_time(struct record *rec) 237 { 238 return rec->switch_output.time && 239 trigger_is_ready(&switch_output_trigger); 240 } 241 242 static u64 record__bytes_written(struct record *rec) 243 { 244 return rec->bytes_written + rec->thread_bytes_written; 245 } 246 247 static bool record__output_max_size_exceeded(struct record *rec) 248 { 249 return rec->output_max_size && 250 (record__bytes_written(rec) >= rec->output_max_size); 251 } 252 253 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 254 void *bf, size_t size) 255 { 256 struct perf_data_file *file = &rec->session->data->file; 257 258 if (map && map->file) 259 file = map->file; 260 261 if (perf_data_file__write(file, bf, size) < 0) { 262 pr_err("failed to write perf data, error: %m\n"); 263 return -1; 264 } 265 266 if (map && map->file) { 267 thread->bytes_written += size; 268 rec->thread_bytes_written += size; 269 } else { 270 rec->bytes_written += size; 271 } 272 273 if (record__output_max_size_exceeded(rec) && !done) { 274 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 275 " stopping session ]\n", 276 record__bytes_written(rec) >> 10); 277 done = 1; 278 } 279 280 if (switch_output_size(rec)) 281 trigger_hit(&switch_output_trigger); 282 283 return 0; 284 } 285 286 static int record__aio_enabled(struct record *rec); 287 static int record__comp_enabled(struct record *rec); 288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 289 void *dst, size_t dst_size, void *src, size_t src_size); 290 291 #ifdef HAVE_AIO_SUPPORT 292 static int record__aio_write(struct aiocb *cblock, int trace_fd, 293 void *buf, size_t size, off_t off) 294 { 295 int rc; 296 297 cblock->aio_fildes = trace_fd; 298 cblock->aio_buf = buf; 299 cblock->aio_nbytes = size; 300 cblock->aio_offset = off; 301 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 302 303 do { 304 rc = aio_write(cblock); 305 if (rc == 0) { 306 break; 307 } else if (errno != EAGAIN) { 308 cblock->aio_fildes = -1; 309 pr_err("failed to queue perf data, error: %m\n"); 310 break; 311 } 312 } while (1); 313 314 return rc; 315 } 316 317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 318 { 319 void *rem_buf; 320 off_t rem_off; 321 size_t rem_size; 322 int rc, aio_errno; 323 ssize_t aio_ret, written; 324 325 aio_errno = aio_error(cblock); 326 if (aio_errno == EINPROGRESS) 327 return 0; 328 329 written = aio_ret = aio_return(cblock); 330 if (aio_ret < 0) { 331 if (aio_errno != EINTR) 332 pr_err("failed to write perf data, error: %m\n"); 333 written = 0; 334 } 335 336 rem_size = cblock->aio_nbytes - written; 337 338 if (rem_size == 0) { 339 cblock->aio_fildes = -1; 340 /* 341 * md->refcount is incremented in record__aio_pushfn() for 342 * every aio write request started in record__aio_push() so 343 * decrement it because the request is now complete. 344 */ 345 perf_mmap__put(&md->core); 346 rc = 1; 347 } else { 348 /* 349 * aio write request may require restart with the 350 * remainder if the kernel didn't write whole 351 * chunk at once. 352 */ 353 rem_off = cblock->aio_offset + written; 354 rem_buf = (void *)(cblock->aio_buf + written); 355 record__aio_write(cblock, cblock->aio_fildes, 356 rem_buf, rem_size, rem_off); 357 rc = 0; 358 } 359 360 return rc; 361 } 362 363 static int record__aio_sync(struct mmap *md, bool sync_all) 364 { 365 struct aiocb **aiocb = md->aio.aiocb; 366 struct aiocb *cblocks = md->aio.cblocks; 367 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 368 int i, do_suspend; 369 370 do { 371 do_suspend = 0; 372 for (i = 0; i < md->aio.nr_cblocks; ++i) { 373 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 374 if (sync_all) 375 aiocb[i] = NULL; 376 else 377 return i; 378 } else { 379 /* 380 * Started aio write is not complete yet 381 * so it has to be waited before the 382 * next allocation. 383 */ 384 aiocb[i] = &cblocks[i]; 385 do_suspend = 1; 386 } 387 } 388 if (!do_suspend) 389 return -1; 390 391 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 392 if (!(errno == EAGAIN || errno == EINTR)) 393 pr_err("failed to sync perf data, error: %m\n"); 394 } 395 } while (1); 396 } 397 398 struct record_aio { 399 struct record *rec; 400 void *data; 401 size_t size; 402 }; 403 404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 405 { 406 struct record_aio *aio = to; 407 408 /* 409 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 410 * to release space in the kernel buffer as fast as possible, calling 411 * perf_mmap__consume() from perf_mmap__push() function. 412 * 413 * That lets the kernel to proceed with storing more profiling data into 414 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 415 * 416 * Coping can be done in two steps in case the chunk of profiling data 417 * crosses the upper bound of the kernel buffer. In this case we first move 418 * part of data from map->start till the upper bound and then the remainder 419 * from the beginning of the kernel buffer till the end of the data chunk. 420 */ 421 422 if (record__comp_enabled(aio->rec)) { 423 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 424 mmap__mmap_len(map) - aio->size, 425 buf, size); 426 if (compressed < 0) 427 return (int)compressed; 428 429 size = compressed; 430 } else { 431 memcpy(aio->data + aio->size, buf, size); 432 } 433 434 if (!aio->size) { 435 /* 436 * Increment map->refcount to guard map->aio.data[] buffer 437 * from premature deallocation because map object can be 438 * released earlier than aio write request started on 439 * map->aio.data[] buffer is complete. 440 * 441 * perf_mmap__put() is done at record__aio_complete() 442 * after started aio request completion or at record__aio_push() 443 * if the request failed to start. 444 */ 445 perf_mmap__get(&map->core); 446 } 447 448 aio->size += size; 449 450 return size; 451 } 452 453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 454 { 455 int ret, idx; 456 int trace_fd = rec->session->data->file.fd; 457 struct record_aio aio = { .rec = rec, .size = 0 }; 458 459 /* 460 * Call record__aio_sync() to wait till map->aio.data[] buffer 461 * becomes available after previous aio write operation. 462 */ 463 464 idx = record__aio_sync(map, false); 465 aio.data = map->aio.data[idx]; 466 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 467 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 468 return ret; 469 470 rec->samples++; 471 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 472 if (!ret) { 473 *off += aio.size; 474 rec->bytes_written += aio.size; 475 if (switch_output_size(rec)) 476 trigger_hit(&switch_output_trigger); 477 } else { 478 /* 479 * Decrement map->refcount incremented in record__aio_pushfn() 480 * back if record__aio_write() operation failed to start, otherwise 481 * map->refcount is decremented in record__aio_complete() after 482 * aio write operation finishes successfully. 483 */ 484 perf_mmap__put(&map->core); 485 } 486 487 return ret; 488 } 489 490 static off_t record__aio_get_pos(int trace_fd) 491 { 492 return lseek(trace_fd, 0, SEEK_CUR); 493 } 494 495 static void record__aio_set_pos(int trace_fd, off_t pos) 496 { 497 lseek(trace_fd, pos, SEEK_SET); 498 } 499 500 static void record__aio_mmap_read_sync(struct record *rec) 501 { 502 int i; 503 struct evlist *evlist = rec->evlist; 504 struct mmap *maps = evlist->mmap; 505 506 if (!record__aio_enabled(rec)) 507 return; 508 509 for (i = 0; i < evlist->core.nr_mmaps; i++) { 510 struct mmap *map = &maps[i]; 511 512 if (map->core.base) 513 record__aio_sync(map, true); 514 } 515 } 516 517 static int nr_cblocks_default = 1; 518 static int nr_cblocks_max = 4; 519 520 static int record__aio_parse(const struct option *opt, 521 const char *str, 522 int unset) 523 { 524 struct record_opts *opts = (struct record_opts *)opt->value; 525 526 if (unset) { 527 opts->nr_cblocks = 0; 528 } else { 529 if (str) 530 opts->nr_cblocks = strtol(str, NULL, 0); 531 if (!opts->nr_cblocks) 532 opts->nr_cblocks = nr_cblocks_default; 533 } 534 535 return 0; 536 } 537 #else /* HAVE_AIO_SUPPORT */ 538 static int nr_cblocks_max = 0; 539 540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 541 off_t *off __maybe_unused) 542 { 543 return -1; 544 } 545 546 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 547 { 548 return -1; 549 } 550 551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 552 { 553 } 554 555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 556 { 557 } 558 #endif 559 560 static int record__aio_enabled(struct record *rec) 561 { 562 return rec->opts.nr_cblocks > 0; 563 } 564 565 #define MMAP_FLUSH_DEFAULT 1 566 static int record__mmap_flush_parse(const struct option *opt, 567 const char *str, 568 int unset) 569 { 570 int flush_max; 571 struct record_opts *opts = (struct record_opts *)opt->value; 572 static struct parse_tag tags[] = { 573 { .tag = 'B', .mult = 1 }, 574 { .tag = 'K', .mult = 1 << 10 }, 575 { .tag = 'M', .mult = 1 << 20 }, 576 { .tag = 'G', .mult = 1 << 30 }, 577 { .tag = 0 }, 578 }; 579 580 if (unset) 581 return 0; 582 583 if (str) { 584 opts->mmap_flush = parse_tag_value(str, tags); 585 if (opts->mmap_flush == (int)-1) 586 opts->mmap_flush = strtol(str, NULL, 0); 587 } 588 589 if (!opts->mmap_flush) 590 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 591 592 flush_max = evlist__mmap_size(opts->mmap_pages); 593 flush_max /= 4; 594 if (opts->mmap_flush > flush_max) 595 opts->mmap_flush = flush_max; 596 597 return 0; 598 } 599 600 #ifdef HAVE_ZSTD_SUPPORT 601 static unsigned int comp_level_default = 1; 602 603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 604 { 605 struct record_opts *opts = opt->value; 606 607 if (unset) { 608 opts->comp_level = 0; 609 } else { 610 if (str) 611 opts->comp_level = strtol(str, NULL, 0); 612 if (!opts->comp_level) 613 opts->comp_level = comp_level_default; 614 } 615 616 return 0; 617 } 618 #endif 619 static unsigned int comp_level_max = 22; 620 621 static int record__comp_enabled(struct record *rec) 622 { 623 return rec->opts.comp_level > 0; 624 } 625 626 static int process_synthesized_event(const struct perf_tool *tool, 627 union perf_event *event, 628 struct perf_sample *sample __maybe_unused, 629 struct machine *machine __maybe_unused) 630 { 631 struct record *rec = container_of(tool, struct record, tool); 632 return record__write(rec, NULL, event, event->header.size); 633 } 634 635 static struct mutex synth_lock; 636 637 static int process_locked_synthesized_event(const struct perf_tool *tool, 638 union perf_event *event, 639 struct perf_sample *sample __maybe_unused, 640 struct machine *machine __maybe_unused) 641 { 642 int ret; 643 644 mutex_lock(&synth_lock); 645 ret = process_synthesized_event(tool, event, sample, machine); 646 mutex_unlock(&synth_lock); 647 return ret; 648 } 649 650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 651 { 652 struct record *rec = to; 653 654 if (record__comp_enabled(rec)) { 655 struct perf_record_compressed2 *event = map->data; 656 size_t padding = 0; 657 u8 pad[8] = {0}; 658 ssize_t compressed = zstd_compress(rec->session, map, map->data, 659 mmap__mmap_len(map), bf, size); 660 661 if (compressed < 0) 662 return (int)compressed; 663 664 bf = event; 665 thread->samples++; 666 667 /* 668 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan 669 * error. We make it aligned here. 670 */ 671 event->data_size = compressed - sizeof(struct perf_record_compressed2); 672 event->header.size = PERF_ALIGN(compressed, sizeof(u64)); 673 padding = event->header.size - compressed; 674 return record__write(rec, map, bf, compressed) || 675 record__write(rec, map, &pad, padding); 676 } 677 678 thread->samples++; 679 return record__write(rec, map, bf, size); 680 } 681 682 static volatile sig_atomic_t signr = -1; 683 static volatile sig_atomic_t child_finished; 684 #ifdef HAVE_EVENTFD_SUPPORT 685 static volatile sig_atomic_t done_fd = -1; 686 #endif 687 688 static void sig_handler(int sig) 689 { 690 if (sig == SIGCHLD) 691 child_finished = 1; 692 else 693 signr = sig; 694 695 done = 1; 696 #ifdef HAVE_EVENTFD_SUPPORT 697 if (done_fd >= 0) { 698 u64 tmp = 1; 699 int orig_errno = errno; 700 701 /* 702 * It is possible for this signal handler to run after done is 703 * checked in the main loop, but before the perf counter fds are 704 * polled. If this happens, the poll() will continue to wait 705 * even though done is set, and will only break out if either 706 * another signal is received, or the counters are ready for 707 * read. To ensure the poll() doesn't sleep when done is set, 708 * use an eventfd (done_fd) to wake up the poll(). 709 */ 710 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 711 pr_err("failed to signal wakeup fd, error: %m\n"); 712 713 errno = orig_errno; 714 } 715 #endif // HAVE_EVENTFD_SUPPORT 716 } 717 718 static void sigsegv_handler(int sig) 719 { 720 perf_hooks__recover(); 721 sighandler_dump_stack(sig); 722 } 723 724 static void record__sig_exit(void) 725 { 726 if (signr == -1) 727 return; 728 729 signal(signr, SIG_DFL); 730 raise(signr); 731 } 732 733 #ifdef HAVE_AUXTRACE_SUPPORT 734 735 static int record__process_auxtrace(const struct perf_tool *tool, 736 struct mmap *map, 737 union perf_event *event, void *data1, 738 size_t len1, void *data2, size_t len2) 739 { 740 struct record *rec = container_of(tool, struct record, tool); 741 struct perf_data *data = &rec->data; 742 size_t padding; 743 u8 pad[8] = {0}; 744 745 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 746 off_t file_offset; 747 int fd = perf_data__fd(data); 748 int err; 749 750 file_offset = lseek(fd, 0, SEEK_CUR); 751 if (file_offset == -1) 752 return -1; 753 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 754 event, file_offset); 755 if (err) 756 return err; 757 } 758 759 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 760 padding = (len1 + len2) & 7; 761 if (padding) 762 padding = 8 - padding; 763 764 record__write(rec, map, event, event->header.size); 765 record__write(rec, map, data1, len1); 766 if (len2) 767 record__write(rec, map, data2, len2); 768 record__write(rec, map, &pad, padding); 769 770 return 0; 771 } 772 773 static int record__auxtrace_mmap_read(struct record *rec, 774 struct mmap *map) 775 { 776 int ret; 777 778 ret = auxtrace_mmap__read(map, rec->itr, 779 perf_session__env(rec->session), 780 &rec->tool, 781 record__process_auxtrace); 782 if (ret < 0) 783 return ret; 784 785 if (ret) 786 rec->samples++; 787 788 return 0; 789 } 790 791 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 792 struct mmap *map) 793 { 794 int ret; 795 796 ret = auxtrace_mmap__read_snapshot(map, rec->itr, 797 perf_session__env(rec->session), 798 &rec->tool, 799 record__process_auxtrace, 800 rec->opts.auxtrace_snapshot_size); 801 if (ret < 0) 802 return ret; 803 804 if (ret) 805 rec->samples++; 806 807 return 0; 808 } 809 810 static int record__auxtrace_read_snapshot_all(struct record *rec) 811 { 812 int i; 813 int rc = 0; 814 815 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 816 struct mmap *map = &rec->evlist->mmap[i]; 817 818 if (!map->auxtrace_mmap.base) 819 continue; 820 821 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 822 rc = -1; 823 goto out; 824 } 825 } 826 out: 827 return rc; 828 } 829 830 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 831 { 832 pr_debug("Recording AUX area tracing snapshot\n"); 833 if (record__auxtrace_read_snapshot_all(rec) < 0) { 834 trigger_error(&auxtrace_snapshot_trigger); 835 } else { 836 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 837 trigger_error(&auxtrace_snapshot_trigger); 838 else 839 trigger_ready(&auxtrace_snapshot_trigger); 840 } 841 } 842 843 static int record__auxtrace_snapshot_exit(struct record *rec) 844 { 845 if (trigger_is_error(&auxtrace_snapshot_trigger)) 846 return 0; 847 848 if (!auxtrace_record__snapshot_started && 849 auxtrace_record__snapshot_start(rec->itr)) 850 return -1; 851 852 record__read_auxtrace_snapshot(rec, true); 853 if (trigger_is_error(&auxtrace_snapshot_trigger)) 854 return -1; 855 856 return 0; 857 } 858 859 static int record__auxtrace_init(struct record *rec) 860 { 861 int err; 862 863 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 864 && record__threads_enabled(rec)) { 865 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 866 return -EINVAL; 867 } 868 869 if (!rec->itr) { 870 rec->itr = auxtrace_record__init(rec->evlist, &err); 871 if (err) 872 return err; 873 } 874 875 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 876 rec->opts.auxtrace_snapshot_opts); 877 if (err) 878 return err; 879 880 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 881 rec->opts.auxtrace_sample_opts); 882 if (err) 883 return err; 884 885 err = auxtrace_parse_aux_action(rec->evlist); 886 if (err) 887 return err; 888 889 return auxtrace_parse_filters(rec->evlist); 890 } 891 892 #else 893 894 static inline 895 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 896 struct mmap *map __maybe_unused) 897 { 898 return 0; 899 } 900 901 static inline 902 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 903 bool on_exit __maybe_unused) 904 { 905 } 906 907 static inline 908 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 909 { 910 return 0; 911 } 912 913 static inline 914 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 915 { 916 return 0; 917 } 918 919 static int record__auxtrace_init(struct record *rec __maybe_unused) 920 { 921 return 0; 922 } 923 924 #endif 925 926 static int record__config_text_poke(struct evlist *evlist) 927 { 928 struct evsel *evsel; 929 930 /* Nothing to do if text poke is already configured */ 931 evlist__for_each_entry(evlist, evsel) { 932 if (evsel->core.attr.text_poke) 933 return 0; 934 } 935 936 evsel = evlist__add_dummy_on_all_cpus(evlist); 937 if (!evsel) 938 return -ENOMEM; 939 940 evsel->core.attr.text_poke = 1; 941 evsel->core.attr.ksymbol = 1; 942 evsel->immediate = true; 943 evsel__set_sample_bit(evsel, TIME); 944 945 return 0; 946 } 947 948 static int record__config_off_cpu(struct record *rec) 949 { 950 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 951 } 952 953 static bool record__tracking_system_wide(struct record *rec) 954 { 955 struct evlist *evlist = rec->evlist; 956 struct evsel *evsel; 957 958 /* 959 * If non-dummy evsel exists, system_wide sideband is need to 960 * help parse sample information. 961 * For example, PERF_EVENT_MMAP event to help parse symbol, 962 * and PERF_EVENT_COMM event to help parse task executable name. 963 */ 964 evlist__for_each_entry(evlist, evsel) { 965 if (!evsel__is_dummy_event(evsel)) 966 return true; 967 } 968 969 return false; 970 } 971 972 static int record__config_tracking_events(struct record *rec) 973 { 974 struct record_opts *opts = &rec->opts; 975 struct evlist *evlist = rec->evlist; 976 bool system_wide = false; 977 struct evsel *evsel; 978 979 /* 980 * For initial_delay, system wide or a hybrid system, we need to add 981 * tracking event so that we can track PERF_RECORD_MMAP to cover the 982 * delay of waiting or event synthesis. 983 */ 984 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 985 perf_pmus__num_core_pmus() > 1) { 986 /* 987 * User space tasks can migrate between CPUs, so when tracing 988 * selected CPUs, sideband for all CPUs is still needed. 989 */ 990 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 991 system_wide = true; 992 993 evsel = evlist__findnew_tracking_event(evlist, system_wide); 994 if (!evsel) 995 return -ENOMEM; 996 997 /* 998 * Enable the tracking event when the process is forked for 999 * initial_delay, immediately for system wide. 1000 */ 1001 if (opts->target.initial_delay && !evsel->immediate && 1002 !target__has_cpu(&opts->target)) 1003 evsel->core.attr.enable_on_exec = 1; 1004 else 1005 evsel->immediate = 1; 1006 } 1007 1008 return 0; 1009 } 1010 1011 static bool record__kcore_readable(struct machine *machine) 1012 { 1013 char kcore[PATH_MAX]; 1014 int fd; 1015 1016 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 1017 1018 fd = open(kcore, O_RDONLY); 1019 if (fd < 0) 1020 return false; 1021 1022 close(fd); 1023 1024 return true; 1025 } 1026 1027 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 1028 { 1029 char from_dir[PATH_MAX]; 1030 char kcore_dir[PATH_MAX]; 1031 int ret; 1032 1033 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1034 1035 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1036 if (ret) 1037 return ret; 1038 1039 return kcore_copy(from_dir, kcore_dir); 1040 } 1041 1042 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1043 { 1044 thread_data->pipes.msg[0] = -1; 1045 thread_data->pipes.msg[1] = -1; 1046 thread_data->pipes.ack[0] = -1; 1047 thread_data->pipes.ack[1] = -1; 1048 } 1049 1050 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1051 { 1052 if (pipe(thread_data->pipes.msg)) 1053 return -EINVAL; 1054 1055 if (pipe(thread_data->pipes.ack)) { 1056 close(thread_data->pipes.msg[0]); 1057 thread_data->pipes.msg[0] = -1; 1058 close(thread_data->pipes.msg[1]); 1059 thread_data->pipes.msg[1] = -1; 1060 return -EINVAL; 1061 } 1062 1063 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1064 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1065 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1066 1067 return 0; 1068 } 1069 1070 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1071 { 1072 if (thread_data->pipes.msg[0] != -1) { 1073 close(thread_data->pipes.msg[0]); 1074 thread_data->pipes.msg[0] = -1; 1075 } 1076 if (thread_data->pipes.msg[1] != -1) { 1077 close(thread_data->pipes.msg[1]); 1078 thread_data->pipes.msg[1] = -1; 1079 } 1080 if (thread_data->pipes.ack[0] != -1) { 1081 close(thread_data->pipes.ack[0]); 1082 thread_data->pipes.ack[0] = -1; 1083 } 1084 if (thread_data->pipes.ack[1] != -1) { 1085 close(thread_data->pipes.ack[1]); 1086 thread_data->pipes.ack[1] = -1; 1087 } 1088 } 1089 1090 static bool evlist__per_thread(struct evlist *evlist) 1091 { 1092 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1093 } 1094 1095 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1096 { 1097 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1098 struct mmap *mmap = evlist->mmap; 1099 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1100 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1101 bool per_thread = evlist__per_thread(evlist); 1102 1103 if (per_thread) 1104 thread_data->nr_mmaps = nr_mmaps; 1105 else 1106 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1107 thread_data->mask->maps.nbits); 1108 if (mmap) { 1109 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1110 if (!thread_data->maps) 1111 return -ENOMEM; 1112 } 1113 if (overwrite_mmap) { 1114 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1115 if (!thread_data->overwrite_maps) { 1116 zfree(&thread_data->maps); 1117 return -ENOMEM; 1118 } 1119 } 1120 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1121 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1122 1123 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1124 if (per_thread || 1125 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1126 if (thread_data->maps) { 1127 thread_data->maps[tm] = &mmap[m]; 1128 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1129 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1130 } 1131 if (thread_data->overwrite_maps) { 1132 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1133 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1134 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1135 } 1136 tm++; 1137 } 1138 } 1139 1140 return 0; 1141 } 1142 1143 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1144 { 1145 int f, tm, pos; 1146 struct mmap *map, *overwrite_map; 1147 1148 fdarray__init(&thread_data->pollfd, 64); 1149 1150 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1151 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1152 overwrite_map = thread_data->overwrite_maps ? 1153 thread_data->overwrite_maps[tm] : NULL; 1154 1155 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1156 void *ptr = evlist->core.pollfd.priv[f].ptr; 1157 1158 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1159 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1160 &evlist->core.pollfd); 1161 if (pos < 0) 1162 return pos; 1163 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1164 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1165 } 1166 } 1167 } 1168 1169 return 0; 1170 } 1171 1172 static void record__free_thread_data(struct record *rec) 1173 { 1174 int t; 1175 struct record_thread *thread_data = rec->thread_data; 1176 1177 if (thread_data == NULL) 1178 return; 1179 1180 for (t = 0; t < rec->nr_threads; t++) { 1181 record__thread_data_close_pipes(&thread_data[t]); 1182 zfree(&thread_data[t].maps); 1183 zfree(&thread_data[t].overwrite_maps); 1184 fdarray__exit(&thread_data[t].pollfd); 1185 } 1186 1187 zfree(&rec->thread_data); 1188 } 1189 1190 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1191 int evlist_pollfd_index, 1192 int thread_pollfd_index) 1193 { 1194 size_t x = rec->index_map_cnt; 1195 1196 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1197 return -ENOMEM; 1198 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1199 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1200 rec->index_map_cnt += 1; 1201 return 0; 1202 } 1203 1204 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1205 struct evlist *evlist, 1206 struct record_thread *thread_data) 1207 { 1208 struct pollfd *e_entries = evlist->core.pollfd.entries; 1209 struct pollfd *t_entries = thread_data->pollfd.entries; 1210 int err = 0; 1211 size_t i; 1212 1213 for (i = 0; i < rec->index_map_cnt; i++) { 1214 int e_pos = rec->index_map[i].evlist_pollfd_index; 1215 int t_pos = rec->index_map[i].thread_pollfd_index; 1216 1217 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1218 e_entries[e_pos].events != t_entries[t_pos].events) { 1219 pr_err("Thread and evlist pollfd index mismatch\n"); 1220 err = -EINVAL; 1221 continue; 1222 } 1223 e_entries[e_pos].revents = t_entries[t_pos].revents; 1224 } 1225 return err; 1226 } 1227 1228 static int record__dup_non_perf_events(struct record *rec, 1229 struct evlist *evlist, 1230 struct record_thread *thread_data) 1231 { 1232 struct fdarray *fda = &evlist->core.pollfd; 1233 int i, ret; 1234 1235 for (i = 0; i < fda->nr; i++) { 1236 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1237 continue; 1238 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1239 if (ret < 0) { 1240 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1241 return ret; 1242 } 1243 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1244 thread_data, ret, fda->entries[i].fd); 1245 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1246 if (ret < 0) { 1247 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1248 return ret; 1249 } 1250 } 1251 return 0; 1252 } 1253 1254 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1255 { 1256 int t, ret; 1257 struct record_thread *thread_data; 1258 1259 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1260 if (!rec->thread_data) { 1261 pr_err("Failed to allocate thread data\n"); 1262 return -ENOMEM; 1263 } 1264 thread_data = rec->thread_data; 1265 1266 for (t = 0; t < rec->nr_threads; t++) 1267 record__thread_data_init_pipes(&thread_data[t]); 1268 1269 for (t = 0; t < rec->nr_threads; t++) { 1270 thread_data[t].rec = rec; 1271 thread_data[t].mask = &rec->thread_masks[t]; 1272 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1273 if (ret) { 1274 pr_err("Failed to initialize thread[%d] maps\n", t); 1275 goto out_free; 1276 } 1277 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1278 if (ret) { 1279 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1280 goto out_free; 1281 } 1282 if (t) { 1283 thread_data[t].tid = -1; 1284 ret = record__thread_data_open_pipes(&thread_data[t]); 1285 if (ret) { 1286 pr_err("Failed to open thread[%d] communication pipes\n", t); 1287 goto out_free; 1288 } 1289 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1290 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1291 if (ret < 0) { 1292 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1293 goto out_free; 1294 } 1295 thread_data[t].ctlfd_pos = ret; 1296 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1297 thread_data, thread_data[t].ctlfd_pos, 1298 thread_data[t].pipes.msg[0]); 1299 } else { 1300 thread_data[t].tid = gettid(); 1301 1302 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1303 if (ret < 0) 1304 goto out_free; 1305 1306 thread_data[t].ctlfd_pos = -1; /* Not used */ 1307 } 1308 } 1309 1310 return 0; 1311 1312 out_free: 1313 record__free_thread_data(rec); 1314 1315 return ret; 1316 } 1317 1318 static int record__mmap_evlist(struct record *rec, 1319 struct evlist *evlist) 1320 { 1321 int i, ret; 1322 struct record_opts *opts = &rec->opts; 1323 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1324 opts->auxtrace_sample_mode; 1325 char msg[512]; 1326 1327 if (opts->affinity != PERF_AFFINITY_SYS) 1328 cpu__setup_cpunode_map(); 1329 1330 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1331 opts->auxtrace_mmap_pages, 1332 auxtrace_overwrite, 1333 opts->nr_cblocks, opts->affinity, 1334 opts->mmap_flush, opts->comp_level) < 0) { 1335 if (errno == EPERM) { 1336 pr_err("Permission error mapping pages.\n" 1337 "Consider increasing " 1338 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1339 "or try again with a smaller value of -m/--mmap_pages.\n" 1340 "(current value: %u,%u)\n", 1341 opts->mmap_pages, opts->auxtrace_mmap_pages); 1342 return -errno; 1343 } else { 1344 pr_err("failed to mmap with %d (%s)\n", errno, 1345 str_error_r(errno, msg, sizeof(msg))); 1346 if (errno) 1347 return -errno; 1348 else 1349 return -EINVAL; 1350 } 1351 } 1352 1353 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1354 return -1; 1355 1356 ret = record__alloc_thread_data(rec, evlist); 1357 if (ret) 1358 return ret; 1359 1360 if (record__threads_enabled(rec)) { 1361 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1362 if (ret) { 1363 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1364 return ret; 1365 } 1366 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1367 if (evlist->mmap) 1368 evlist->mmap[i].file = &rec->data.dir.files[i]; 1369 if (evlist->overwrite_mmap) 1370 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1371 } 1372 } 1373 1374 return 0; 1375 } 1376 1377 static int record__mmap(struct record *rec) 1378 { 1379 return record__mmap_evlist(rec, rec->evlist); 1380 } 1381 1382 static int record__open(struct record *rec) 1383 { 1384 char msg[BUFSIZ]; 1385 struct evsel *pos; 1386 struct evlist *evlist = rec->evlist; 1387 struct perf_session *session = rec->session; 1388 struct record_opts *opts = &rec->opts; 1389 int rc = 0; 1390 bool skipped = false; 1391 bool removed_tracking = false; 1392 1393 evlist__for_each_entry(evlist, pos) { 1394 if (removed_tracking) { 1395 /* 1396 * Normally the head of the list has tracking enabled 1397 * for sideband data like mmaps. If this event is 1398 * removed, make sure to add tracking to the next 1399 * processed event. 1400 */ 1401 if (!pos->tracking) { 1402 pos->tracking = true; 1403 evsel__config(pos, opts, &callchain_param); 1404 } 1405 removed_tracking = false; 1406 } 1407 try_again: 1408 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1409 bool report_error = true; 1410 1411 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1412 if (verbose > 0) 1413 ui__warning("%s\n", msg); 1414 goto try_again; 1415 } 1416 if ((errno == EINVAL || errno == EBADF) && 1417 pos->core.leader != &pos->core && 1418 pos->weak_group) { 1419 pos = evlist__reset_weak_group(evlist, pos, true); 1420 goto try_again; 1421 } 1422 #if defined(__aarch64__) || defined(__arm__) 1423 if (strstr(evsel__name(pos), "cycles")) { 1424 struct evsel *pos2; 1425 /* 1426 * Unfortunately ARM has many events named 1427 * "cycles" on PMUs like the system-level (L3) 1428 * cache which don't support sampling. Only 1429 * display such failures to open when there is 1430 * only 1 cycles event or verbose is enabled. 1431 */ 1432 evlist__for_each_entry(evlist, pos2) { 1433 if (pos2 == pos) 1434 continue; 1435 if (strstr(evsel__name(pos2), "cycles")) { 1436 report_error = false; 1437 break; 1438 } 1439 } 1440 } 1441 #endif 1442 if (report_error || verbose > 0) { 1443 ui__error("Failure to open event '%s' on PMU '%s' which will be " 1444 "removed.\n%s\n", 1445 evsel__name(pos), evsel__pmu_name(pos), msg); 1446 } 1447 if (pos->tracking) 1448 removed_tracking = true; 1449 pos->skippable = true; 1450 skipped = true; 1451 } 1452 } 1453 1454 if (skipped) { 1455 struct evsel *tmp; 1456 int idx = 0; 1457 bool evlist_empty = true; 1458 1459 /* Remove evsels that failed to open and update indices. */ 1460 evlist__for_each_entry_safe(evlist, tmp, pos) { 1461 if (pos->skippable) { 1462 evlist__remove(evlist, pos); 1463 continue; 1464 } 1465 1466 /* 1467 * Note, dummy events may be command line parsed or 1468 * added by the tool. We care about supporting `perf 1469 * record -e dummy` which may be used as a permission 1470 * check. Dummy events that are added to the command 1471 * line and opened along with other events that fail, 1472 * will still fail as if the dummy events were tool 1473 * added events for the sake of code simplicity. 1474 */ 1475 if (!evsel__is_dummy_event(pos)) 1476 evlist_empty = false; 1477 } 1478 evlist__for_each_entry(evlist, pos) { 1479 pos->core.idx = idx++; 1480 } 1481 /* If list is empty then fail. */ 1482 if (evlist_empty) { 1483 ui__error("Failure to open any events for recording.\n"); 1484 rc = -1; 1485 goto out; 1486 } 1487 } 1488 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1489 pr_warning( 1490 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1491 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1492 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1493 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1494 "Samples in kernel modules won't be resolved at all.\n\n" 1495 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1496 "even with a suitable vmlinux or kallsyms file.\n\n"); 1497 } 1498 1499 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1500 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1501 pos->filter ?: "BPF", evsel__name(pos), errno, 1502 str_error_r(errno, msg, sizeof(msg))); 1503 rc = -1; 1504 goto out; 1505 } 1506 1507 rc = record__mmap(rec); 1508 if (rc) 1509 goto out; 1510 1511 session->evlist = evlist; 1512 perf_session__set_id_hdr_size(session); 1513 out: 1514 return rc; 1515 } 1516 1517 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1518 { 1519 if (rec->evlist->first_sample_time == 0) 1520 rec->evlist->first_sample_time = sample_time; 1521 1522 if (sample_time) 1523 rec->evlist->last_sample_time = sample_time; 1524 } 1525 1526 static int process_sample_event(const struct perf_tool *tool, 1527 union perf_event *event, 1528 struct perf_sample *sample, 1529 struct evsel *evsel, 1530 struct machine *machine) 1531 { 1532 struct record *rec = container_of(tool, struct record, tool); 1533 1534 set_timestamp_boundary(rec, sample->time); 1535 1536 if (rec->buildid_all) 1537 return 0; 1538 1539 rec->samples++; 1540 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1541 } 1542 1543 static int process_buildids(struct record *rec) 1544 { 1545 struct perf_session *session = rec->session; 1546 1547 if (perf_data__size(&rec->data) == 0) 1548 return 0; 1549 1550 /* 1551 * During this process, it'll load kernel map and replace the 1552 * dso->long_name to a real pathname it found. In this case 1553 * we prefer the vmlinux path like 1554 * /lib/modules/3.16.4/build/vmlinux 1555 * 1556 * rather than build-id path (in debug directory). 1557 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1558 */ 1559 symbol_conf.ignore_vmlinux_buildid = true; 1560 1561 /* 1562 * If --buildid-all is given, it marks all DSO regardless of hits, 1563 * so no need to process samples. But if timestamp_boundary is enabled, 1564 * it still needs to walk on all samples to get the timestamps of 1565 * first/last samples. 1566 */ 1567 if (rec->buildid_all && !rec->timestamp_boundary) 1568 rec->tool.sample = process_event_sample_stub; 1569 1570 return perf_session__process_events(session); 1571 } 1572 1573 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1574 { 1575 int err; 1576 struct perf_tool *tool = data; 1577 /* 1578 *As for guest kernel when processing subcommand record&report, 1579 *we arrange module mmap prior to guest kernel mmap and trigger 1580 *a preload dso because default guest module symbols are loaded 1581 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1582 *method is used to avoid symbol missing when the first addr is 1583 *in module instead of in guest kernel. 1584 */ 1585 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1586 machine); 1587 if (err < 0) 1588 pr_err("Couldn't record guest kernel [%d]'s reference" 1589 " relocation symbol.\n", machine->pid); 1590 1591 /* 1592 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1593 * have no _text sometimes. 1594 */ 1595 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1596 machine); 1597 if (err < 0) 1598 pr_err("Couldn't record guest kernel [%d]'s reference" 1599 " relocation symbol.\n", machine->pid); 1600 } 1601 1602 static struct perf_event_header finished_round_event = { 1603 .size = sizeof(struct perf_event_header), 1604 .type = PERF_RECORD_FINISHED_ROUND, 1605 }; 1606 1607 static struct perf_event_header finished_init_event = { 1608 .size = sizeof(struct perf_event_header), 1609 .type = PERF_RECORD_FINISHED_INIT, 1610 }; 1611 1612 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1613 { 1614 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1615 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1616 thread->mask->affinity.nbits)) { 1617 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1618 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1619 map->affinity_mask.bits, thread->mask->affinity.nbits); 1620 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1621 (cpu_set_t *)thread->mask->affinity.bits); 1622 if (verbose == 2) { 1623 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1624 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1625 } 1626 } 1627 } 1628 1629 static size_t process_comp_header(void *record, size_t increment) 1630 { 1631 struct perf_record_compressed2 *event = record; 1632 size_t size = sizeof(*event); 1633 1634 if (increment) { 1635 event->header.size += increment; 1636 return increment; 1637 } 1638 1639 event->header.type = PERF_RECORD_COMPRESSED2; 1640 event->header.size = size; 1641 1642 return size; 1643 } 1644 1645 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1646 void *dst, size_t dst_size, void *src, size_t src_size) 1647 { 1648 ssize_t compressed; 1649 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1; 1650 struct zstd_data *zstd_data = &session->zstd_data; 1651 1652 if (map && map->file) 1653 zstd_data = &map->zstd_data; 1654 1655 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1656 max_record_size, process_comp_header); 1657 if (compressed < 0) 1658 return compressed; 1659 1660 if (map && map->file) { 1661 thread->bytes_transferred += src_size; 1662 thread->bytes_compressed += compressed; 1663 } else { 1664 session->bytes_transferred += src_size; 1665 session->bytes_compressed += compressed; 1666 } 1667 1668 return compressed; 1669 } 1670 1671 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1672 bool overwrite, bool synch) 1673 { 1674 u64 bytes_written = rec->bytes_written; 1675 int i; 1676 int rc = 0; 1677 int nr_mmaps; 1678 struct mmap **maps; 1679 int trace_fd = rec->data.file.fd; 1680 off_t off = 0; 1681 1682 if (!evlist) 1683 return 0; 1684 1685 nr_mmaps = thread->nr_mmaps; 1686 maps = overwrite ? thread->overwrite_maps : thread->maps; 1687 1688 if (!maps) 1689 return 0; 1690 1691 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1692 return 0; 1693 1694 if (record__aio_enabled(rec)) 1695 off = record__aio_get_pos(trace_fd); 1696 1697 for (i = 0; i < nr_mmaps; i++) { 1698 u64 flush = 0; 1699 struct mmap *map = maps[i]; 1700 1701 if (map->core.base) { 1702 record__adjust_affinity(rec, map); 1703 if (synch) { 1704 flush = map->core.flush; 1705 map->core.flush = 1; 1706 } 1707 if (!record__aio_enabled(rec)) { 1708 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1709 if (synch) 1710 map->core.flush = flush; 1711 rc = -1; 1712 goto out; 1713 } 1714 } else { 1715 if (record__aio_push(rec, map, &off) < 0) { 1716 record__aio_set_pos(trace_fd, off); 1717 if (synch) 1718 map->core.flush = flush; 1719 rc = -1; 1720 goto out; 1721 } 1722 } 1723 if (synch) 1724 map->core.flush = flush; 1725 } 1726 1727 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1728 !rec->opts.auxtrace_sample_mode && 1729 record__auxtrace_mmap_read(rec, map) != 0) { 1730 rc = -1; 1731 goto out; 1732 } 1733 } 1734 1735 if (record__aio_enabled(rec)) 1736 record__aio_set_pos(trace_fd, off); 1737 1738 /* 1739 * Mark the round finished in case we wrote 1740 * at least one event. 1741 * 1742 * No need for round events in directory mode, 1743 * because per-cpu maps and files have data 1744 * sorted by kernel. 1745 */ 1746 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1747 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1748 1749 if (overwrite) 1750 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1751 out: 1752 return rc; 1753 } 1754 1755 static int record__mmap_read_all(struct record *rec, bool synch) 1756 { 1757 int err; 1758 1759 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1760 if (err) 1761 return err; 1762 1763 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1764 } 1765 1766 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1767 void *arg __maybe_unused) 1768 { 1769 struct perf_mmap *map = fda->priv[fd].ptr; 1770 1771 if (map) 1772 perf_mmap__put(map); 1773 } 1774 1775 static void *record__thread(void *arg) 1776 { 1777 enum thread_msg msg = THREAD_MSG__READY; 1778 bool terminate = false; 1779 struct fdarray *pollfd; 1780 int err, ctlfd_pos; 1781 1782 thread = arg; 1783 thread->tid = gettid(); 1784 1785 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1786 if (err == -1) 1787 pr_warning("threads[%d]: failed to notify on start: %s\n", 1788 thread->tid, strerror(errno)); 1789 1790 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1791 1792 pollfd = &thread->pollfd; 1793 ctlfd_pos = thread->ctlfd_pos; 1794 1795 for (;;) { 1796 unsigned long long hits = thread->samples; 1797 1798 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1799 break; 1800 1801 if (hits == thread->samples) { 1802 1803 err = fdarray__poll(pollfd, -1); 1804 /* 1805 * Propagate error, only if there's any. Ignore positive 1806 * number of returned events and interrupt error. 1807 */ 1808 if (err > 0 || (err < 0 && errno == EINTR)) 1809 err = 0; 1810 thread->waking++; 1811 1812 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1813 record__thread_munmap_filtered, NULL) == 0) 1814 break; 1815 } 1816 1817 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1818 terminate = true; 1819 close(thread->pipes.msg[0]); 1820 thread->pipes.msg[0] = -1; 1821 pollfd->entries[ctlfd_pos].fd = -1; 1822 pollfd->entries[ctlfd_pos].events = 0; 1823 } 1824 1825 pollfd->entries[ctlfd_pos].revents = 0; 1826 } 1827 record__mmap_read_all(thread->rec, true); 1828 1829 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1830 if (err == -1) 1831 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1832 thread->tid, strerror(errno)); 1833 1834 return NULL; 1835 } 1836 1837 static void record__init_features(struct record *rec) 1838 { 1839 struct perf_session *session = rec->session; 1840 int feat; 1841 1842 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1843 perf_header__set_feat(&session->header, feat); 1844 1845 if (rec->no_buildid) 1846 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1847 1848 if (!have_tracepoints(&rec->evlist->core.entries)) 1849 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1850 1851 if (!rec->opts.branch_stack) 1852 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1853 1854 if (!rec->opts.full_auxtrace) 1855 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1856 1857 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1858 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1859 1860 if (!rec->opts.use_clockid) 1861 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1862 1863 if (!record__threads_enabled(rec)) 1864 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1865 1866 if (!record__comp_enabled(rec)) 1867 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1868 1869 perf_header__clear_feat(&session->header, HEADER_STAT); 1870 } 1871 1872 static void 1873 record__finish_output(struct record *rec) 1874 { 1875 int i; 1876 struct perf_data *data = &rec->data; 1877 int fd = perf_data__fd(data); 1878 1879 if (data->is_pipe) { 1880 /* Just to display approx. size */ 1881 data->file.size = rec->bytes_written; 1882 return; 1883 } 1884 1885 rec->session->header.data_size += rec->bytes_written; 1886 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1887 if (record__threads_enabled(rec)) { 1888 for (i = 0; i < data->dir.nr; i++) 1889 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1890 } 1891 1892 /* Buildid scanning disabled or build ID in kernel and synthesized map events. */ 1893 if (!rec->no_buildid) { 1894 process_buildids(rec); 1895 1896 if (rec->buildid_all) 1897 perf_session__dsos_hit_all(rec->session); 1898 } 1899 perf_session__write_header(rec->session, rec->evlist, fd, true); 1900 1901 return; 1902 } 1903 1904 static int record__synthesize_workload(struct record *rec, bool tail) 1905 { 1906 int err; 1907 struct perf_thread_map *thread_map; 1908 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1909 1910 if (rec->opts.tail_synthesize != tail) 1911 return 0; 1912 1913 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1914 if (thread_map == NULL) 1915 return -1; 1916 1917 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1918 process_synthesized_event, 1919 &rec->session->machines.host, 1920 needs_mmap, 1921 rec->opts.sample_address); 1922 perf_thread_map__put(thread_map); 1923 return err; 1924 } 1925 1926 static int write_finished_init(struct record *rec, bool tail) 1927 { 1928 if (rec->opts.tail_synthesize != tail) 1929 return 0; 1930 1931 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1932 } 1933 1934 static int record__synthesize(struct record *rec, bool tail); 1935 1936 static int 1937 record__switch_output(struct record *rec, bool at_exit) 1938 { 1939 struct perf_data *data = &rec->data; 1940 char *new_filename = NULL; 1941 int fd, err; 1942 1943 /* Same Size: "2015122520103046"*/ 1944 char timestamp[] = "InvalidTimestamp"; 1945 1946 record__aio_mmap_read_sync(rec); 1947 1948 write_finished_init(rec, true); 1949 1950 record__synthesize(rec, true); 1951 if (target__none(&rec->opts.target)) 1952 record__synthesize_workload(rec, true); 1953 1954 rec->samples = 0; 1955 record__finish_output(rec); 1956 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1957 if (err) { 1958 pr_err("Failed to get current timestamp\n"); 1959 return -EINVAL; 1960 } 1961 1962 fd = perf_data__switch(data, timestamp, 1963 rec->session->header.data_offset, 1964 at_exit, &new_filename); 1965 if (fd >= 0 && !at_exit) { 1966 rec->bytes_written = 0; 1967 rec->session->header.data_size = 0; 1968 } 1969 1970 if (!quiet) { 1971 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1972 data->path, timestamp); 1973 } 1974 1975 if (rec->switch_output.num_files) { 1976 int n = rec->switch_output.cur_file + 1; 1977 1978 if (n >= rec->switch_output.num_files) 1979 n = 0; 1980 rec->switch_output.cur_file = n; 1981 if (rec->switch_output.filenames[n]) { 1982 remove(rec->switch_output.filenames[n]); 1983 zfree(&rec->switch_output.filenames[n]); 1984 } 1985 rec->switch_output.filenames[n] = new_filename; 1986 } else { 1987 free(new_filename); 1988 } 1989 1990 /* Output tracking events */ 1991 if (!at_exit) { 1992 record__synthesize(rec, false); 1993 1994 /* 1995 * In 'perf record --switch-output' without -a, 1996 * record__synthesize() in record__switch_output() won't 1997 * generate tracking events because there's no thread_map 1998 * in evlist. Which causes newly created perf.data doesn't 1999 * contain map and comm information. 2000 * Create a fake thread_map and directly call 2001 * perf_event__synthesize_thread_map() for those events. 2002 */ 2003 if (target__none(&rec->opts.target)) 2004 record__synthesize_workload(rec, false); 2005 write_finished_init(rec, false); 2006 } 2007 return fd; 2008 } 2009 2010 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 2011 struct perf_record_lost_samples *lost, 2012 int cpu_idx, int thread_idx, u64 lost_count, 2013 u16 misc_flag) 2014 { 2015 struct perf_sample_id *sid; 2016 struct perf_sample sample; 2017 int id_hdr_size; 2018 2019 perf_sample__init(&sample, /*all=*/true); 2020 lost->lost = lost_count; 2021 if (evsel->core.ids) { 2022 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 2023 sample.id = sid->id; 2024 } 2025 2026 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 2027 evsel->core.attr.sample_type, &sample); 2028 lost->header.size = sizeof(*lost) + id_hdr_size; 2029 lost->header.misc = misc_flag; 2030 record__write(rec, NULL, lost, lost->header.size); 2031 perf_sample__exit(&sample); 2032 } 2033 2034 static void record__read_lost_samples(struct record *rec) 2035 { 2036 struct perf_session *session = rec->session; 2037 struct perf_record_lost_samples_and_ids lost; 2038 struct evsel *evsel; 2039 2040 /* there was an error during record__open */ 2041 if (session->evlist == NULL) 2042 return; 2043 2044 evlist__for_each_entry(session->evlist, evsel) { 2045 struct xyarray *xy = evsel->core.sample_id; 2046 u64 lost_count; 2047 2048 if (xy == NULL || evsel->core.fd == NULL) 2049 continue; 2050 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 2051 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 2052 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 2053 continue; 2054 } 2055 2056 for (int x = 0; x < xyarray__max_x(xy); x++) { 2057 for (int y = 0; y < xyarray__max_y(xy); y++) { 2058 struct perf_counts_values count; 2059 2060 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 2061 pr_debug("read LOST count failed\n"); 2062 return; 2063 } 2064 2065 if (count.lost) { 2066 memset(&lost, 0, sizeof(lost)); 2067 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 2068 __record__save_lost_samples(rec, evsel, &lost.lost, 2069 x, y, count.lost, 0); 2070 } 2071 } 2072 } 2073 2074 lost_count = perf_bpf_filter__lost_count(evsel); 2075 if (lost_count) { 2076 memset(&lost, 0, sizeof(lost)); 2077 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 2078 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 2079 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 2080 } 2081 } 2082 } 2083 2084 static volatile sig_atomic_t workload_exec_errno; 2085 2086 /* 2087 * evlist__prepare_workload will send a SIGUSR1 2088 * if the fork fails, since we asked by setting its 2089 * want_signal to true. 2090 */ 2091 static void workload_exec_failed_signal(int signo __maybe_unused, 2092 siginfo_t *info, 2093 void *ucontext __maybe_unused) 2094 { 2095 workload_exec_errno = info->si_value.sival_int; 2096 done = 1; 2097 child_finished = 1; 2098 } 2099 2100 static void snapshot_sig_handler(int sig); 2101 static void alarm_sig_handler(int sig); 2102 2103 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2104 { 2105 if (evlist) { 2106 if (evlist->mmap && evlist->mmap[0].core.base) 2107 return evlist->mmap[0].core.base; 2108 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2109 return evlist->overwrite_mmap[0].core.base; 2110 } 2111 return NULL; 2112 } 2113 2114 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2115 { 2116 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2117 if (pc) 2118 return pc; 2119 return NULL; 2120 } 2121 2122 static int record__synthesize(struct record *rec, bool tail) 2123 { 2124 struct perf_session *session = rec->session; 2125 struct machine *machine = &session->machines.host; 2126 struct perf_data *data = &rec->data; 2127 struct record_opts *opts = &rec->opts; 2128 struct perf_tool *tool = &rec->tool; 2129 int err = 0; 2130 event_op f = process_synthesized_event; 2131 2132 if (rec->opts.tail_synthesize != tail) 2133 return 0; 2134 2135 if (data->is_pipe) { 2136 err = perf_event__synthesize_for_pipe(tool, session, data, 2137 process_synthesized_event); 2138 if (err < 0) 2139 goto out; 2140 2141 rec->bytes_written += err; 2142 } 2143 2144 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2145 process_synthesized_event, machine); 2146 if (err) 2147 goto out; 2148 2149 /* Synthesize id_index before auxtrace_info */ 2150 err = perf_event__synthesize_id_index(tool, 2151 process_synthesized_event, 2152 session->evlist, machine); 2153 if (err) 2154 goto out; 2155 2156 if (rec->opts.full_auxtrace) { 2157 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2158 session, process_synthesized_event); 2159 if (err) 2160 goto out; 2161 } 2162 2163 if (!evlist__exclude_kernel(rec->evlist)) { 2164 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2165 machine); 2166 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2167 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2168 "Check /proc/kallsyms permission or run as root.\n"); 2169 2170 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2171 machine); 2172 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2173 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2174 "Check /proc/modules permission or run as root.\n"); 2175 } 2176 2177 if (perf_guest) { 2178 machines__process_guests(&session->machines, 2179 perf_event__synthesize_guest_os, tool); 2180 } 2181 2182 err = perf_event__synthesize_extra_attr(&rec->tool, 2183 rec->evlist, 2184 process_synthesized_event, 2185 data->is_pipe); 2186 if (err) 2187 goto out; 2188 2189 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2190 process_synthesized_event, 2191 NULL); 2192 if (err < 0) { 2193 pr_err("Couldn't synthesize thread map.\n"); 2194 return err; 2195 } 2196 2197 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2198 process_synthesized_event, NULL); 2199 if (err < 0) { 2200 pr_err("Couldn't synthesize cpu map.\n"); 2201 return err; 2202 } 2203 2204 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2205 machine, opts); 2206 if (err < 0) { 2207 pr_warning("Couldn't synthesize bpf events.\n"); 2208 err = 0; 2209 } 2210 2211 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2212 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2213 machine); 2214 if (err < 0) { 2215 pr_warning("Couldn't synthesize cgroup events.\n"); 2216 err = 0; 2217 } 2218 } 2219 2220 if (rec->opts.nr_threads_synthesize > 1) { 2221 mutex_init(&synth_lock); 2222 perf_set_multithreaded(); 2223 f = process_locked_synthesized_event; 2224 } 2225 2226 if (rec->opts.synth & PERF_SYNTH_TASK) { 2227 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2228 2229 err = __machine__synthesize_threads(machine, tool, &opts->target, 2230 rec->evlist->core.threads, 2231 f, needs_mmap, opts->sample_address, 2232 rec->opts.nr_threads_synthesize); 2233 } 2234 2235 if (rec->opts.nr_threads_synthesize > 1) { 2236 perf_set_singlethreaded(); 2237 mutex_destroy(&synth_lock); 2238 } 2239 2240 out: 2241 return err; 2242 } 2243 2244 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused) 2245 { 2246 #ifdef HAVE_LIBBPF_SUPPORT 2247 perf_event__synthesize_final_bpf_metadata(rec->session, 2248 process_synthesized_event); 2249 #endif 2250 } 2251 2252 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2253 { 2254 struct record *rec = data; 2255 pthread_kill(rec->thread_id, SIGUSR2); 2256 return 0; 2257 } 2258 2259 static int record__setup_sb_evlist(struct record *rec) 2260 { 2261 struct record_opts *opts = &rec->opts; 2262 2263 if (rec->sb_evlist != NULL) { 2264 /* 2265 * We get here if --switch-output-event populated the 2266 * sb_evlist, so associate a callback that will send a SIGUSR2 2267 * to the main thread. 2268 */ 2269 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2270 rec->thread_id = pthread_self(); 2271 } 2272 #ifdef HAVE_LIBBPF_SUPPORT 2273 if (!opts->no_bpf_event) { 2274 if (rec->sb_evlist == NULL) { 2275 rec->sb_evlist = evlist__new(); 2276 2277 if (rec->sb_evlist == NULL) { 2278 pr_err("Couldn't create side band evlist.\n."); 2279 return -1; 2280 } 2281 } 2282 2283 if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) { 2284 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2285 return -1; 2286 } 2287 } 2288 #endif 2289 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2290 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2291 opts->no_bpf_event = true; 2292 } 2293 2294 return 0; 2295 } 2296 2297 static int record__init_clock(struct record *rec) 2298 { 2299 struct perf_session *session = rec->session; 2300 struct timespec ref_clockid; 2301 struct timeval ref_tod; 2302 struct perf_env *env = perf_session__env(session); 2303 u64 ref; 2304 2305 if (!rec->opts.use_clockid) 2306 return 0; 2307 2308 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2309 env->clock.clockid_res_ns = rec->opts.clockid_res_ns; 2310 2311 env->clock.clockid = rec->opts.clockid; 2312 2313 if (gettimeofday(&ref_tod, NULL) != 0) { 2314 pr_err("gettimeofday failed, cannot set reference time.\n"); 2315 return -1; 2316 } 2317 2318 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2319 pr_err("clock_gettime failed, cannot set reference time.\n"); 2320 return -1; 2321 } 2322 2323 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2324 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2325 2326 env->clock.tod_ns = ref; 2327 2328 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2329 (u64) ref_clockid.tv_nsec; 2330 2331 env->clock.clockid_ns = ref; 2332 return 0; 2333 } 2334 2335 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2336 { 2337 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2338 trigger_hit(&auxtrace_snapshot_trigger); 2339 auxtrace_record__snapshot_started = 1; 2340 if (auxtrace_record__snapshot_start(rec->itr)) 2341 trigger_error(&auxtrace_snapshot_trigger); 2342 } 2343 } 2344 2345 static int record__terminate_thread(struct record_thread *thread_data) 2346 { 2347 int err; 2348 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2349 pid_t tid = thread_data->tid; 2350 2351 close(thread_data->pipes.msg[1]); 2352 thread_data->pipes.msg[1] = -1; 2353 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2354 if (err > 0) 2355 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2356 else 2357 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2358 thread->tid, tid); 2359 2360 return 0; 2361 } 2362 2363 static int record__start_threads(struct record *rec) 2364 { 2365 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2366 struct record_thread *thread_data = rec->thread_data; 2367 sigset_t full, mask; 2368 pthread_t handle; 2369 pthread_attr_t attrs; 2370 2371 thread = &thread_data[0]; 2372 2373 if (!record__threads_enabled(rec)) 2374 return 0; 2375 2376 sigfillset(&full); 2377 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2378 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2379 return -1; 2380 } 2381 2382 pthread_attr_init(&attrs); 2383 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2384 2385 for (t = 1; t < nr_threads; t++) { 2386 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2387 2388 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2389 pthread_attr_setaffinity_np(&attrs, 2390 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2391 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2392 #endif 2393 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2394 for (tt = 1; tt < t; tt++) 2395 record__terminate_thread(&thread_data[t]); 2396 pr_err("Failed to start threads: %s\n", strerror(errno)); 2397 ret = -1; 2398 goto out_err; 2399 } 2400 2401 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2402 if (err > 0) 2403 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2404 thread_msg_tags[msg]); 2405 else 2406 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2407 thread->tid, rec->thread_data[t].tid); 2408 } 2409 2410 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2411 (cpu_set_t *)thread->mask->affinity.bits); 2412 2413 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2414 2415 out_err: 2416 pthread_attr_destroy(&attrs); 2417 2418 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2419 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2420 ret = -1; 2421 } 2422 2423 return ret; 2424 } 2425 2426 static int record__stop_threads(struct record *rec) 2427 { 2428 int t; 2429 struct record_thread *thread_data = rec->thread_data; 2430 2431 for (t = 1; t < rec->nr_threads; t++) 2432 record__terminate_thread(&thread_data[t]); 2433 2434 for (t = 0; t < rec->nr_threads; t++) { 2435 rec->samples += thread_data[t].samples; 2436 if (!record__threads_enabled(rec)) 2437 continue; 2438 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2439 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2440 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2441 thread_data[t].samples, thread_data[t].waking); 2442 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2443 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2444 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2445 else 2446 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2447 } 2448 2449 return 0; 2450 } 2451 2452 static unsigned long record__waking(struct record *rec) 2453 { 2454 int t; 2455 unsigned long waking = 0; 2456 struct record_thread *thread_data = rec->thread_data; 2457 2458 for (t = 0; t < rec->nr_threads; t++) 2459 waking += thread_data[t].waking; 2460 2461 return waking; 2462 } 2463 2464 static int __cmd_record(struct record *rec, int argc, const char **argv) 2465 { 2466 int err; 2467 int status = 0; 2468 const bool forks = argc > 0; 2469 struct perf_tool *tool = &rec->tool; 2470 struct record_opts *opts = &rec->opts; 2471 struct perf_data *data = &rec->data; 2472 struct perf_session *session; 2473 bool disabled = false, draining = false; 2474 int fd; 2475 float ratio = 0; 2476 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2477 struct perf_env *env; 2478 2479 atexit(record__sig_exit); 2480 signal(SIGCHLD, sig_handler); 2481 signal(SIGINT, sig_handler); 2482 signal(SIGTERM, sig_handler); 2483 signal(SIGSEGV, sigsegv_handler); 2484 2485 if (rec->opts.record_cgroup) { 2486 #ifndef HAVE_FILE_HANDLE 2487 pr_err("cgroup tracking is not supported\n"); 2488 return -1; 2489 #endif 2490 } 2491 2492 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2493 signal(SIGUSR2, snapshot_sig_handler); 2494 if (rec->opts.auxtrace_snapshot_mode) 2495 trigger_on(&auxtrace_snapshot_trigger); 2496 if (rec->switch_output.enabled) 2497 trigger_on(&switch_output_trigger); 2498 } else { 2499 signal(SIGUSR2, SIG_IGN); 2500 } 2501 2502 perf_tool__init(tool, /*ordered_events=*/true); 2503 tool->sample = process_sample_event; 2504 tool->fork = perf_event__process_fork; 2505 tool->exit = perf_event__process_exit; 2506 tool->comm = perf_event__process_comm; 2507 tool->namespaces = perf_event__process_namespaces; 2508 tool->mmap = build_id__process_mmap; 2509 tool->mmap2 = build_id__process_mmap2; 2510 tool->itrace_start = process_timestamp_boundary; 2511 tool->aux = process_timestamp_boundary; 2512 tool->namespace_events = rec->opts.record_namespaces; 2513 tool->cgroup_events = rec->opts.record_cgroup; 2514 session = perf_session__new(data, tool); 2515 if (IS_ERR(session)) { 2516 pr_err("Perf session creation failed.\n"); 2517 return PTR_ERR(session); 2518 } 2519 env = perf_session__env(session); 2520 if (record__threads_enabled(rec)) { 2521 if (perf_data__is_pipe(&rec->data)) { 2522 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2523 return -1; 2524 } 2525 if (rec->opts.full_auxtrace) { 2526 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2527 return -1; 2528 } 2529 } 2530 2531 fd = perf_data__fd(data); 2532 rec->session = session; 2533 2534 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2535 pr_err("Compression initialization failed.\n"); 2536 return -1; 2537 } 2538 #ifdef HAVE_EVENTFD_SUPPORT 2539 done_fd = eventfd(0, EFD_NONBLOCK); 2540 if (done_fd < 0) { 2541 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2542 status = -1; 2543 goto out_delete_session; 2544 } 2545 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2546 if (err < 0) { 2547 pr_err("Failed to add wakeup eventfd to poll list\n"); 2548 status = err; 2549 goto out_delete_session; 2550 } 2551 #endif // HAVE_EVENTFD_SUPPORT 2552 2553 env->comp_type = PERF_COMP_ZSTD; 2554 env->comp_level = rec->opts.comp_level; 2555 2556 if (rec->opts.kcore && 2557 !record__kcore_readable(&session->machines.host)) { 2558 pr_err("ERROR: kcore is not readable.\n"); 2559 return -1; 2560 } 2561 2562 if (record__init_clock(rec)) 2563 return -1; 2564 2565 record__init_features(rec); 2566 2567 if (forks) { 2568 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2569 workload_exec_failed_signal); 2570 if (err < 0) { 2571 pr_err("Couldn't run the workload!\n"); 2572 status = err; 2573 goto out_delete_session; 2574 } 2575 } 2576 2577 /* 2578 * If we have just single event and are sending data 2579 * through pipe, we need to force the ids allocation, 2580 * because we synthesize event name through the pipe 2581 * and need the id for that. 2582 */ 2583 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2584 rec->opts.sample_id = true; 2585 2586 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2587 rec->timestamp_filename = false; 2588 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2589 } 2590 2591 /* 2592 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE 2593 * and hybrid_merge is false. 2594 */ 2595 evlist__uniquify_evsel_names(rec->evlist, &stat_config); 2596 2597 evlist__config(rec->evlist, opts, &callchain_param); 2598 2599 /* Debug message used by test scripts */ 2600 pr_debug3("perf record opening and mmapping events\n"); 2601 if (record__open(rec) != 0) { 2602 err = -1; 2603 goto out_free_threads; 2604 } 2605 /* Debug message used by test scripts */ 2606 pr_debug3("perf record done opening and mmapping events\n"); 2607 env->comp_mmap_len = session->evlist->core.mmap_len; 2608 2609 if (rec->opts.kcore) { 2610 err = record__kcore_copy(&session->machines.host, data); 2611 if (err) { 2612 pr_err("ERROR: Failed to copy kcore\n"); 2613 goto out_free_threads; 2614 } 2615 } 2616 2617 /* 2618 * Normally perf_session__new would do this, but it doesn't have the 2619 * evlist. 2620 */ 2621 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2622 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2623 rec->tool.ordered_events = false; 2624 } 2625 2626 if (evlist__nr_groups(rec->evlist) == 0) 2627 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2628 2629 if (data->is_pipe) { 2630 err = perf_header__write_pipe(fd); 2631 if (err < 0) 2632 goto out_free_threads; 2633 } else { 2634 err = perf_session__write_header(session, rec->evlist, fd, false); 2635 if (err < 0) 2636 goto out_free_threads; 2637 } 2638 2639 err = -1; 2640 if (!rec->no_buildid 2641 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2642 pr_err("Couldn't generate buildids. " 2643 "Use --no-buildid to profile anyway.\n"); 2644 goto out_free_threads; 2645 } 2646 2647 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2648 opts->no_bpf_event = true; 2649 2650 err = record__setup_sb_evlist(rec); 2651 if (err) 2652 goto out_free_threads; 2653 2654 err = record__synthesize(rec, false); 2655 if (err < 0) 2656 goto out_free_threads; 2657 2658 if (rec->realtime_prio) { 2659 struct sched_param param; 2660 2661 param.sched_priority = rec->realtime_prio; 2662 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2663 pr_err("Could not set realtime priority.\n"); 2664 err = -1; 2665 goto out_free_threads; 2666 } 2667 } 2668 2669 if (record__start_threads(rec)) 2670 goto out_free_threads; 2671 2672 /* 2673 * When perf is starting the traced process, all the events 2674 * (apart from group members) have enable_on_exec=1 set, 2675 * so don't spoil it by prematurely enabling them. 2676 */ 2677 if (!target__none(&opts->target) && !opts->target.initial_delay) 2678 evlist__enable(rec->evlist); 2679 2680 /* 2681 * offcpu-time does not call execve, so enable_on_exe wouldn't work 2682 * when recording a workload, do it manually 2683 */ 2684 if (rec->off_cpu) 2685 evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT); 2686 2687 /* 2688 * Let the child rip 2689 */ 2690 if (forks) { 2691 struct machine *machine = &session->machines.host; 2692 union perf_event *event; 2693 pid_t tgid; 2694 2695 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2696 if (event == NULL) { 2697 err = -ENOMEM; 2698 goto out_child; 2699 } 2700 2701 /* 2702 * Some H/W events are generated before COMM event 2703 * which is emitted during exec(), so perf script 2704 * cannot see a correct process name for those events. 2705 * Synthesize COMM event to prevent it. 2706 */ 2707 tgid = perf_event__synthesize_comm(tool, event, 2708 rec->evlist->workload.pid, 2709 process_synthesized_event, 2710 machine); 2711 free(event); 2712 2713 if (tgid == -1) 2714 goto out_child; 2715 2716 event = malloc(sizeof(event->namespaces) + 2717 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2718 machine->id_hdr_size); 2719 if (event == NULL) { 2720 err = -ENOMEM; 2721 goto out_child; 2722 } 2723 2724 /* 2725 * Synthesize NAMESPACES event for the command specified. 2726 */ 2727 perf_event__synthesize_namespaces(tool, event, 2728 rec->evlist->workload.pid, 2729 tgid, process_synthesized_event, 2730 machine); 2731 free(event); 2732 2733 evlist__start_workload(rec->evlist); 2734 } 2735 2736 if (opts->target.initial_delay) { 2737 pr_info(EVLIST_DISABLED_MSG); 2738 if (opts->target.initial_delay > 0) { 2739 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2740 evlist__enable(rec->evlist); 2741 pr_info(EVLIST_ENABLED_MSG); 2742 } 2743 } 2744 2745 err = event_enable_timer__start(rec->evlist->eet); 2746 if (err) 2747 goto out_child; 2748 2749 /* Debug message used by test scripts */ 2750 pr_debug3("perf record has started\n"); 2751 fflush(stderr); 2752 2753 trigger_ready(&auxtrace_snapshot_trigger); 2754 trigger_ready(&switch_output_trigger); 2755 perf_hooks__invoke_record_start(); 2756 2757 /* 2758 * Must write FINISHED_INIT so it will be seen after all other 2759 * synthesized user events, but before any regular events. 2760 */ 2761 err = write_finished_init(rec, false); 2762 if (err < 0) 2763 goto out_child; 2764 2765 for (;;) { 2766 unsigned long long hits = thread->samples; 2767 2768 /* 2769 * rec->evlist->bkw_mmap_state is possible to be 2770 * BKW_MMAP_EMPTY here: when done == true and 2771 * hits != rec->samples in previous round. 2772 * 2773 * evlist__toggle_bkw_mmap ensure we never 2774 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2775 */ 2776 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2777 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2778 2779 if (record__mmap_read_all(rec, false) < 0) { 2780 trigger_error(&auxtrace_snapshot_trigger); 2781 trigger_error(&switch_output_trigger); 2782 err = -1; 2783 goto out_child; 2784 } 2785 2786 if (auxtrace_record__snapshot_started) { 2787 auxtrace_record__snapshot_started = 0; 2788 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2789 record__read_auxtrace_snapshot(rec, false); 2790 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2791 pr_err("AUX area tracing snapshot failed\n"); 2792 err = -1; 2793 goto out_child; 2794 } 2795 } 2796 2797 if (trigger_is_hit(&switch_output_trigger)) { 2798 /* 2799 * If switch_output_trigger is hit, the data in 2800 * overwritable ring buffer should have been collected, 2801 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2802 * 2803 * If SIGUSR2 raise after or during record__mmap_read_all(), 2804 * record__mmap_read_all() didn't collect data from 2805 * overwritable ring buffer. Read again. 2806 */ 2807 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2808 continue; 2809 trigger_ready(&switch_output_trigger); 2810 2811 /* 2812 * Reenable events in overwrite ring buffer after 2813 * record__mmap_read_all(): we should have collected 2814 * data from it. 2815 */ 2816 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2817 2818 if (!quiet) 2819 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2820 record__waking(rec)); 2821 thread->waking = 0; 2822 fd = record__switch_output(rec, false); 2823 if (fd < 0) { 2824 pr_err("Failed to switch to new file\n"); 2825 trigger_error(&switch_output_trigger); 2826 err = fd; 2827 goto out_child; 2828 } 2829 2830 /* re-arm the alarm */ 2831 if (rec->switch_output.time) 2832 alarm(rec->switch_output.time); 2833 } 2834 2835 if (hits == thread->samples) { 2836 if (done || draining) 2837 break; 2838 err = fdarray__poll(&thread->pollfd, -1); 2839 /* 2840 * Propagate error, only if there's any. Ignore positive 2841 * number of returned events and interrupt error. 2842 */ 2843 if (err > 0 || (err < 0 && errno == EINTR)) 2844 err = 0; 2845 thread->waking++; 2846 2847 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2848 record__thread_munmap_filtered, NULL) == 0) 2849 draining = true; 2850 2851 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2852 if (err) 2853 goto out_child; 2854 } 2855 2856 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2857 switch (cmd) { 2858 case EVLIST_CTL_CMD_SNAPSHOT: 2859 hit_auxtrace_snapshot_trigger(rec); 2860 evlist__ctlfd_ack(rec->evlist); 2861 break; 2862 case EVLIST_CTL_CMD_STOP: 2863 done = 1; 2864 break; 2865 case EVLIST_CTL_CMD_ACK: 2866 case EVLIST_CTL_CMD_UNSUPPORTED: 2867 case EVLIST_CTL_CMD_ENABLE: 2868 case EVLIST_CTL_CMD_DISABLE: 2869 case EVLIST_CTL_CMD_EVLIST: 2870 case EVLIST_CTL_CMD_PING: 2871 default: 2872 break; 2873 } 2874 } 2875 2876 err = event_enable_timer__process(rec->evlist->eet); 2877 if (err < 0) 2878 goto out_child; 2879 if (err) { 2880 err = 0; 2881 done = 1; 2882 } 2883 2884 /* 2885 * When perf is starting the traced process, at the end events 2886 * die with the process and we wait for that. Thus no need to 2887 * disable events in this case. 2888 */ 2889 if (done && !disabled && !target__none(&opts->target)) { 2890 trigger_off(&auxtrace_snapshot_trigger); 2891 evlist__disable(rec->evlist); 2892 disabled = true; 2893 } 2894 } 2895 2896 trigger_off(&auxtrace_snapshot_trigger); 2897 trigger_off(&switch_output_trigger); 2898 2899 record__synthesize_final_bpf_metadata(rec); 2900 2901 if (opts->auxtrace_snapshot_on_exit) 2902 record__auxtrace_snapshot_exit(rec); 2903 2904 if (forks && workload_exec_errno) { 2905 char msg[STRERR_BUFSIZE]; 2906 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2907 struct strbuf sb = STRBUF_INIT; 2908 2909 evlist__format_evsels(rec->evlist, &sb, 2048); 2910 2911 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2912 sb.buf, argv[0], emsg); 2913 strbuf_release(&sb); 2914 err = -1; 2915 goto out_child; 2916 } 2917 2918 if (!quiet) 2919 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2920 record__waking(rec)); 2921 2922 write_finished_init(rec, true); 2923 2924 if (target__none(&rec->opts.target)) 2925 record__synthesize_workload(rec, true); 2926 2927 out_child: 2928 record__stop_threads(rec); 2929 record__mmap_read_all(rec, true); 2930 out_free_threads: 2931 record__free_thread_data(rec); 2932 evlist__finalize_ctlfd(rec->evlist); 2933 record__aio_mmap_read_sync(rec); 2934 2935 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2936 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2937 env->comp_ratio = ratio + 0.5; 2938 } 2939 2940 if (forks) { 2941 int exit_status; 2942 2943 if (!child_finished) 2944 kill(rec->evlist->workload.pid, SIGTERM); 2945 2946 wait(&exit_status); 2947 2948 if (err < 0) 2949 status = err; 2950 else if (WIFEXITED(exit_status)) 2951 status = WEXITSTATUS(exit_status); 2952 else if (WIFSIGNALED(exit_status)) 2953 signr = WTERMSIG(exit_status); 2954 } else 2955 status = err; 2956 2957 if (rec->off_cpu) 2958 rec->bytes_written += off_cpu_write(rec->session); 2959 2960 record__read_lost_samples(rec); 2961 record__synthesize(rec, true); 2962 /* this will be recalculated during process_buildids() */ 2963 rec->samples = 0; 2964 2965 if (!err) { 2966 if (!rec->timestamp_filename) { 2967 record__finish_output(rec); 2968 } else { 2969 fd = record__switch_output(rec, true); 2970 if (fd < 0) { 2971 status = fd; 2972 goto out_delete_session; 2973 } 2974 } 2975 } 2976 2977 perf_hooks__invoke_record_end(); 2978 2979 if (!err && !quiet) { 2980 char samples[128]; 2981 const char *postfix = rec->timestamp_filename ? 2982 ".<timestamp>" : ""; 2983 2984 if (rec->samples && !rec->opts.full_auxtrace) 2985 scnprintf(samples, sizeof(samples), 2986 " (%" PRIu64 " samples)", rec->samples); 2987 else 2988 samples[0] = '\0'; 2989 2990 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2991 perf_data__size(data) / 1024.0 / 1024.0, 2992 data->path, postfix, samples); 2993 if (ratio) { 2994 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2995 rec->session->bytes_transferred / 1024.0 / 1024.0, 2996 ratio); 2997 } 2998 fprintf(stderr, " ]\n"); 2999 } 3000 3001 out_delete_session: 3002 #ifdef HAVE_EVENTFD_SUPPORT 3003 if (done_fd >= 0) { 3004 fd = done_fd; 3005 done_fd = -1; 3006 3007 close(fd); 3008 } 3009 #endif 3010 zstd_fini(&session->zstd_data); 3011 if (!opts->no_bpf_event) 3012 evlist__stop_sb_thread(rec->sb_evlist); 3013 3014 perf_session__delete(session); 3015 return status; 3016 } 3017 3018 static void callchain_debug(struct callchain_param *callchain) 3019 { 3020 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 3021 3022 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 3023 3024 if (callchain->record_mode == CALLCHAIN_DWARF) 3025 pr_debug("callchain: stack dump size %d\n", 3026 callchain->dump_size); 3027 } 3028 3029 int record_opts__parse_callchain(struct record_opts *record, 3030 struct callchain_param *callchain, 3031 const char *arg, bool unset) 3032 { 3033 int ret; 3034 callchain->enabled = !unset; 3035 3036 /* --no-call-graph */ 3037 if (unset) { 3038 callchain->record_mode = CALLCHAIN_NONE; 3039 pr_debug("callchain: disabled\n"); 3040 return 0; 3041 } 3042 3043 ret = parse_callchain_record_opt(arg, callchain); 3044 if (!ret) { 3045 /* Enable data address sampling for DWARF unwind. */ 3046 if (callchain->record_mode == CALLCHAIN_DWARF) 3047 record->sample_address = true; 3048 callchain_debug(callchain); 3049 } 3050 3051 return ret; 3052 } 3053 3054 int record_parse_callchain_opt(const struct option *opt, 3055 const char *arg, 3056 int unset) 3057 { 3058 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 3059 } 3060 3061 int record_callchain_opt(const struct option *opt, 3062 const char *arg __maybe_unused, 3063 int unset __maybe_unused) 3064 { 3065 struct callchain_param *callchain = opt->value; 3066 3067 callchain->enabled = true; 3068 3069 if (callchain->record_mode == CALLCHAIN_NONE) 3070 callchain->record_mode = CALLCHAIN_FP; 3071 3072 callchain_debug(callchain); 3073 return 0; 3074 } 3075 3076 static int perf_record_config(const char *var, const char *value, void *cb) 3077 { 3078 struct record *rec = cb; 3079 3080 if (!strcmp(var, "record.build-id")) { 3081 if (!strcmp(value, "cache")) 3082 rec->no_buildid_cache = false; 3083 else if (!strcmp(value, "no-cache")) 3084 rec->no_buildid_cache = true; 3085 else if (!strcmp(value, "skip")) 3086 rec->no_buildid = true; 3087 else if (!strcmp(value, "mmap")) 3088 rec->buildid_mmap = true; 3089 else if (!strcmp(value, "no-mmap")) 3090 rec->buildid_mmap = false; 3091 else 3092 return -1; 3093 return 0; 3094 } 3095 if (!strcmp(var, "record.call-graph")) { 3096 var = "call-graph.record-mode"; 3097 return perf_default_config(var, value, cb); 3098 } 3099 #ifdef HAVE_AIO_SUPPORT 3100 if (!strcmp(var, "record.aio")) { 3101 rec->opts.nr_cblocks = strtol(value, NULL, 0); 3102 if (!rec->opts.nr_cblocks) 3103 rec->opts.nr_cblocks = nr_cblocks_default; 3104 } 3105 #endif 3106 if (!strcmp(var, "record.debuginfod")) { 3107 rec->debuginfod.urls = strdup(value); 3108 if (!rec->debuginfod.urls) 3109 return -ENOMEM; 3110 rec->debuginfod.set = true; 3111 } 3112 3113 return 0; 3114 } 3115 3116 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 3117 { 3118 struct record *rec = (struct record *)opt->value; 3119 3120 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 3121 } 3122 3123 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3124 { 3125 struct record_opts *opts = (struct record_opts *)opt->value; 3126 3127 if (unset || !str) 3128 return 0; 3129 3130 if (!strcasecmp(str, "node")) 3131 opts->affinity = PERF_AFFINITY_NODE; 3132 else if (!strcasecmp(str, "cpu")) 3133 opts->affinity = PERF_AFFINITY_CPU; 3134 3135 return 0; 3136 } 3137 3138 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3139 { 3140 mask->nbits = nr_bits; 3141 mask->bits = bitmap_zalloc(mask->nbits); 3142 if (!mask->bits) 3143 return -ENOMEM; 3144 3145 return 0; 3146 } 3147 3148 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3149 { 3150 bitmap_free(mask->bits); 3151 mask->nbits = 0; 3152 } 3153 3154 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3155 { 3156 int ret; 3157 3158 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3159 if (ret) { 3160 mask->affinity.bits = NULL; 3161 return ret; 3162 } 3163 3164 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3165 if (ret) { 3166 record__mmap_cpu_mask_free(&mask->maps); 3167 mask->maps.bits = NULL; 3168 } 3169 3170 return ret; 3171 } 3172 3173 static void record__thread_mask_free(struct thread_mask *mask) 3174 { 3175 record__mmap_cpu_mask_free(&mask->maps); 3176 record__mmap_cpu_mask_free(&mask->affinity); 3177 } 3178 3179 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3180 { 3181 int s; 3182 struct record_opts *opts = opt->value; 3183 3184 if (unset || !str || !strlen(str)) { 3185 opts->threads_spec = THREAD_SPEC__CPU; 3186 } else { 3187 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3188 if (s == THREAD_SPEC__USER) { 3189 opts->threads_user_spec = strdup(str); 3190 if (!opts->threads_user_spec) 3191 return -ENOMEM; 3192 opts->threads_spec = THREAD_SPEC__USER; 3193 break; 3194 } 3195 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3196 opts->threads_spec = s; 3197 break; 3198 } 3199 } 3200 } 3201 3202 if (opts->threads_spec == THREAD_SPEC__USER) 3203 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3204 else 3205 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3206 3207 return 0; 3208 } 3209 3210 static int parse_output_max_size(const struct option *opt, 3211 const char *str, int unset) 3212 { 3213 unsigned long *s = (unsigned long *)opt->value; 3214 static struct parse_tag tags_size[] = { 3215 { .tag = 'B', .mult = 1 }, 3216 { .tag = 'K', .mult = 1 << 10 }, 3217 { .tag = 'M', .mult = 1 << 20 }, 3218 { .tag = 'G', .mult = 1 << 30 }, 3219 { .tag = 0 }, 3220 }; 3221 unsigned long val; 3222 3223 if (unset) { 3224 *s = 0; 3225 return 0; 3226 } 3227 3228 val = parse_tag_value(str, tags_size); 3229 if (val != (unsigned long) -1) { 3230 *s = val; 3231 return 0; 3232 } 3233 3234 return -1; 3235 } 3236 3237 static int record__parse_mmap_pages(const struct option *opt, 3238 const char *str, 3239 int unset __maybe_unused) 3240 { 3241 struct record_opts *opts = opt->value; 3242 char *s, *p; 3243 unsigned int mmap_pages; 3244 int ret; 3245 3246 if (!str) 3247 return -EINVAL; 3248 3249 s = strdup(str); 3250 if (!s) 3251 return -ENOMEM; 3252 3253 p = strchr(s, ','); 3254 if (p) 3255 *p = '\0'; 3256 3257 if (*s) { 3258 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3259 if (ret) 3260 goto out_free; 3261 opts->mmap_pages = mmap_pages; 3262 } 3263 3264 if (!p) { 3265 ret = 0; 3266 goto out_free; 3267 } 3268 3269 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3270 if (ret) 3271 goto out_free; 3272 3273 opts->auxtrace_mmap_pages = mmap_pages; 3274 3275 out_free: 3276 free(s); 3277 return ret; 3278 } 3279 3280 static int record__parse_off_cpu_thresh(const struct option *opt, 3281 const char *str, 3282 int unset __maybe_unused) 3283 { 3284 struct record_opts *opts = opt->value; 3285 char *endptr; 3286 u64 off_cpu_thresh_ms; 3287 3288 if (!str) 3289 return -EINVAL; 3290 3291 off_cpu_thresh_ms = strtoull(str, &endptr, 10); 3292 3293 /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */ 3294 if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0"))) 3295 return -EINVAL; 3296 else 3297 opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC; 3298 3299 return 0; 3300 } 3301 3302 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3303 { 3304 } 3305 3306 static int parse_control_option(const struct option *opt, 3307 const char *str, 3308 int unset __maybe_unused) 3309 { 3310 struct record_opts *opts = opt->value; 3311 3312 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3313 } 3314 3315 static void switch_output_size_warn(struct record *rec) 3316 { 3317 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3318 struct switch_output *s = &rec->switch_output; 3319 3320 wakeup_size /= 2; 3321 3322 if (s->size < wakeup_size) { 3323 char buf[100]; 3324 3325 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3326 pr_warning("WARNING: switch-output data size lower than " 3327 "wakeup kernel buffer size (%s) " 3328 "expect bigger perf.data sizes\n", buf); 3329 } 3330 } 3331 3332 static int switch_output_setup(struct record *rec) 3333 { 3334 struct switch_output *s = &rec->switch_output; 3335 static struct parse_tag tags_size[] = { 3336 { .tag = 'B', .mult = 1 }, 3337 { .tag = 'K', .mult = 1 << 10 }, 3338 { .tag = 'M', .mult = 1 << 20 }, 3339 { .tag = 'G', .mult = 1 << 30 }, 3340 { .tag = 0 }, 3341 }; 3342 static struct parse_tag tags_time[] = { 3343 { .tag = 's', .mult = 1 }, 3344 { .tag = 'm', .mult = 60 }, 3345 { .tag = 'h', .mult = 60*60 }, 3346 { .tag = 'd', .mult = 60*60*24 }, 3347 { .tag = 0 }, 3348 }; 3349 unsigned long val; 3350 3351 /* 3352 * If we're using --switch-output-events, then we imply its 3353 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3354 * thread to its parent. 3355 */ 3356 if (rec->switch_output_event_set) { 3357 if (record__threads_enabled(rec)) { 3358 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3359 return 0; 3360 } 3361 goto do_signal; 3362 } 3363 3364 if (!s->set) 3365 return 0; 3366 3367 if (record__threads_enabled(rec)) { 3368 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3369 return 0; 3370 } 3371 3372 if (!strcmp(s->str, "signal")) { 3373 do_signal: 3374 s->signal = true; 3375 pr_debug("switch-output with SIGUSR2 signal\n"); 3376 goto enabled; 3377 } 3378 3379 val = parse_tag_value(s->str, tags_size); 3380 if (val != (unsigned long) -1) { 3381 s->size = val; 3382 pr_debug("switch-output with %s size threshold\n", s->str); 3383 goto enabled; 3384 } 3385 3386 val = parse_tag_value(s->str, tags_time); 3387 if (val != (unsigned long) -1) { 3388 s->time = val; 3389 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3390 s->str, s->time); 3391 goto enabled; 3392 } 3393 3394 return -1; 3395 3396 enabled: 3397 rec->timestamp_filename = true; 3398 s->enabled = true; 3399 3400 if (s->size && !rec->opts.no_buffering) 3401 switch_output_size_warn(rec); 3402 3403 return 0; 3404 } 3405 3406 static const char * const __record_usage[] = { 3407 "perf record [<options>] [<command>]", 3408 "perf record [<options>] -- <command> [<options>]", 3409 NULL 3410 }; 3411 const char * const *record_usage = __record_usage; 3412 3413 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3414 struct perf_sample *sample, struct machine *machine) 3415 { 3416 /* 3417 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3418 * no need to add them twice. 3419 */ 3420 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3421 return 0; 3422 return perf_event__process_mmap(tool, event, sample, machine); 3423 } 3424 3425 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3426 struct perf_sample *sample, struct machine *machine) 3427 { 3428 /* 3429 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3430 * no need to add them twice. 3431 */ 3432 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3433 return 0; 3434 3435 return perf_event__process_mmap2(tool, event, sample, machine); 3436 } 3437 3438 static int process_timestamp_boundary(const struct perf_tool *tool, 3439 union perf_event *event __maybe_unused, 3440 struct perf_sample *sample, 3441 struct machine *machine __maybe_unused) 3442 { 3443 struct record *rec = container_of(tool, struct record, tool); 3444 3445 set_timestamp_boundary(rec, sample->time); 3446 return 0; 3447 } 3448 3449 static int parse_record_synth_option(const struct option *opt, 3450 const char *str, 3451 int unset __maybe_unused) 3452 { 3453 struct record_opts *opts = opt->value; 3454 char *p = strdup(str); 3455 3456 if (p == NULL) 3457 return -1; 3458 3459 opts->synth = parse_synth_opt(p); 3460 free(p); 3461 3462 if (opts->synth < 0) { 3463 pr_err("Invalid synth option: %s\n", str); 3464 return -1; 3465 } 3466 return 0; 3467 } 3468 3469 /* 3470 * XXX Ideally would be local to cmd_record() and passed to a record__new 3471 * because we need to have access to it in record__exit, that is called 3472 * after cmd_record() exits, but since record_options need to be accessible to 3473 * builtin-script, leave it here. 3474 * 3475 * At least we don't ouch it in all the other functions here directly. 3476 * 3477 * Just say no to tons of global variables, sigh. 3478 */ 3479 static struct record record = { 3480 .opts = { 3481 .sample_time = true, 3482 .mmap_pages = UINT_MAX, 3483 .user_freq = UINT_MAX, 3484 .user_interval = ULLONG_MAX, 3485 .freq = 4000, 3486 .target = { 3487 .uses_mmap = true, 3488 .default_per_cpu = true, 3489 }, 3490 .mmap_flush = MMAP_FLUSH_DEFAULT, 3491 .nr_threads_synthesize = 1, 3492 .ctl_fd = -1, 3493 .ctl_fd_ack = -1, 3494 .synth = PERF_SYNTH_ALL, 3495 .off_cpu_thresh_ns = OFFCPU_THRESH, 3496 }, 3497 .buildid_mmap = true, 3498 }; 3499 3500 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3501 "\n\t\t\t\tDefault: fp"; 3502 3503 static bool dry_run; 3504 3505 static struct parse_events_option_args parse_events_option_args = { 3506 .evlistp = &record.evlist, 3507 }; 3508 3509 static struct parse_events_option_args switch_output_parse_events_option_args = { 3510 .evlistp = &record.sb_evlist, 3511 }; 3512 3513 /* 3514 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3515 * with it and switch to use the library functions in perf_evlist that came 3516 * from builtin-record.c, i.e. use record_opts, 3517 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3518 * using pipes, etc. 3519 */ 3520 static struct option __record_options[] = { 3521 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3522 "event selector. use 'perf list' to list available events", 3523 parse_events_option), 3524 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3525 "event filter", parse_filter), 3526 OPT_BOOLEAN(0, "latency", &record.latency, 3527 "Enable data collection for latency profiling.\n" 3528 "\t\t\t Use perf report --latency for latency-centric profile."), 3529 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3530 NULL, "don't record events from perf itself", 3531 exclude_perf), 3532 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3533 "record events on existing process id"), 3534 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3535 "record events on existing thread id"), 3536 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3537 "collect data with this RT SCHED_FIFO priority"), 3538 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3539 "collect data without buffering"), 3540 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3541 "collect raw sample records from all opened counters"), 3542 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3543 "system-wide collection from all CPUs"), 3544 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3545 "list of cpus to monitor"), 3546 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3547 OPT_STRING('o', "output", &record.data.path, "file", 3548 "output file name"), 3549 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3550 &record.opts.no_inherit_set, 3551 "child tasks do not inherit counters"), 3552 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3553 "synthesize non-sample events at the end of output"), 3554 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3555 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3556 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3557 "Fail if the specified frequency can't be used"), 3558 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3559 "profile at this frequency", 3560 record__parse_freq), 3561 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3562 "number of mmap data pages and AUX area tracing mmap pages", 3563 record__parse_mmap_pages), 3564 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3565 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3566 record__mmap_flush_parse), 3567 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3568 NULL, "enables call-graph recording" , 3569 &record_callchain_opt), 3570 OPT_CALLBACK(0, "call-graph", &record.opts, 3571 "record_mode[,record_size]", record_callchain_help, 3572 &record_parse_callchain_opt), 3573 OPT_INCR('v', "verbose", &verbose, 3574 "be more verbose (show counter open errors, etc)"), 3575 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3576 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3577 "per thread counts"), 3578 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3579 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3580 "Record the sample physical addresses"), 3581 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3582 "Record the sampled data address data page size"), 3583 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3584 "Record the sampled code address (ip) page size"), 3585 OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, 3586 "Record the data source for memory operations"), 3587 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3588 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3589 "Record the sample identifier"), 3590 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3591 &record.opts.sample_time_set, 3592 "Record the sample timestamps"), 3593 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3594 "Record the sample period"), 3595 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3596 "don't sample"), 3597 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3598 &record.no_buildid_cache_set, 3599 "do not update the buildid cache"), 3600 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3601 &record.no_buildid_set, 3602 "do not collect buildids in perf.data"), 3603 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3604 "monitor event in cgroup name only", 3605 parse_cgroups), 3606 OPT_CALLBACK('D', "delay", &record, "ms", 3607 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3608 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3609 record__parse_event_enable_time), 3610 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3611 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), 3612 3613 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3614 "branch any", "sample any taken branches", 3615 parse_branch_stack), 3616 3617 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3618 "branch filter mask", "branch stack filter modes", 3619 parse_branch_stack), 3620 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3621 "sample by weight (on special events only)"), 3622 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3623 "sample transaction flags (special events only)"), 3624 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3625 "use per-thread mmaps"), 3626 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3627 "sample selected machine registers on interrupt," 3628 " use '-I?' to list register names", parse_intr_regs), 3629 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3630 "sample selected machine registers in user space," 3631 " use '--user-regs=?' to list register names", parse_user_regs), 3632 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3633 "Record running/enabled time of read (:S) events"), 3634 OPT_CALLBACK('k', "clockid", &record.opts, 3635 "clockid", "clockid to use for events, see clock_gettime()", 3636 parse_clockid), 3637 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3638 "opts", "AUX area tracing Snapshot Mode", ""), 3639 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3640 "opts", "sample AUX area", ""), 3641 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3642 "per thread proc mmap processing timeout in ms"), 3643 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3644 "Record namespaces events"), 3645 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3646 "Record cgroup events"), 3647 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3648 &record.opts.record_switch_events_set, 3649 "Record context switch events"), 3650 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3651 "Configure all used events to run in kernel space.", 3652 PARSE_OPT_EXCLUSIVE), 3653 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3654 "Configure all used events to run in user space.", 3655 PARSE_OPT_EXCLUSIVE), 3656 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3657 "collect kernel callchains"), 3658 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3659 "collect user callchains"), 3660 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3661 "file", "vmlinux pathname"), 3662 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3663 "Record build-id of all DSOs regardless of hits"), 3664 OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set, 3665 "Record build-id in mmap events and skip build-id processing."), 3666 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3667 "append timestamp to output filename"), 3668 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3669 "Record timestamp boundary (time of first/last samples)"), 3670 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3671 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3672 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3673 "signal"), 3674 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3675 &record.switch_output_event_set, "switch output event", 3676 "switch output event selector. use 'perf list' to list available events", 3677 parse_events_option_new_evlist), 3678 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3679 "Limit number of switch output generated files"), 3680 OPT_BOOLEAN(0, "dry-run", &dry_run, 3681 "Parse options then exit"), 3682 #ifdef HAVE_AIO_SUPPORT 3683 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3684 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3685 record__aio_parse), 3686 #endif 3687 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3688 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3689 record__parse_affinity), 3690 #ifdef HAVE_ZSTD_SUPPORT 3691 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3692 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3693 record__parse_comp_level), 3694 #endif 3695 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3696 "size", "Limit the maximum size of the output file", parse_output_max_size), 3697 OPT_UINTEGER(0, "num-thread-synthesize", 3698 &record.opts.nr_threads_synthesize, 3699 "number of threads to run for event synthesis"), 3700 #ifdef HAVE_LIBPFM 3701 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3702 "libpfm4 event selector. use 'perf list' to list available events", 3703 parse_libpfm_events_option), 3704 #endif 3705 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3706 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3707 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3708 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3709 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3710 parse_control_option), 3711 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3712 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3713 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3714 &record.debuginfod.set, "debuginfod urls", 3715 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3716 "system"), 3717 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3718 "write collected trace data into several data files using parallel threads", 3719 record__parse_threads), 3720 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3721 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3722 "BPF filter action"), 3723 OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", 3724 "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", 3725 record__parse_off_cpu_thresh), 3726 OPT_END() 3727 }; 3728 3729 struct option *record_options = __record_options; 3730 3731 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3732 { 3733 struct perf_cpu cpu; 3734 int idx; 3735 3736 if (cpu_map__is_dummy(cpus)) 3737 return 0; 3738 3739 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3740 /* Return ENODEV is input cpu is greater than max cpu */ 3741 if ((unsigned long)cpu.cpu > mask->nbits) 3742 return -ENODEV; 3743 __set_bit(cpu.cpu, mask->bits); 3744 } 3745 3746 return 0; 3747 } 3748 3749 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3750 { 3751 struct perf_cpu_map *cpus; 3752 3753 cpus = perf_cpu_map__new(mask_spec); 3754 if (!cpus) 3755 return -ENOMEM; 3756 3757 bitmap_zero(mask->bits, mask->nbits); 3758 if (record__mmap_cpu_mask_init(mask, cpus)) 3759 return -ENODEV; 3760 3761 perf_cpu_map__put(cpus); 3762 3763 return 0; 3764 } 3765 3766 static void record__free_thread_masks(struct record *rec, int nr_threads) 3767 { 3768 int t; 3769 3770 if (rec->thread_masks) 3771 for (t = 0; t < nr_threads; t++) 3772 record__thread_mask_free(&rec->thread_masks[t]); 3773 3774 zfree(&rec->thread_masks); 3775 } 3776 3777 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3778 { 3779 int t, ret; 3780 3781 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3782 if (!rec->thread_masks) { 3783 pr_err("Failed to allocate thread masks\n"); 3784 return -ENOMEM; 3785 } 3786 3787 for (t = 0; t < nr_threads; t++) { 3788 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3789 if (ret) { 3790 pr_err("Failed to allocate thread masks[%d]\n", t); 3791 goto out_free; 3792 } 3793 } 3794 3795 return 0; 3796 3797 out_free: 3798 record__free_thread_masks(rec, nr_threads); 3799 3800 return ret; 3801 } 3802 3803 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3804 { 3805 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3806 3807 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3808 if (ret) 3809 return ret; 3810 3811 rec->nr_threads = nr_cpus; 3812 pr_debug("nr_threads: %d\n", rec->nr_threads); 3813 3814 for (t = 0; t < rec->nr_threads; t++) { 3815 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3816 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3817 if (verbose > 0) { 3818 pr_debug("thread_masks[%d]: ", t); 3819 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3820 pr_debug("thread_masks[%d]: ", t); 3821 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3822 } 3823 } 3824 3825 return 0; 3826 } 3827 3828 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3829 const char **maps_spec, const char **affinity_spec, 3830 u32 nr_spec) 3831 { 3832 u32 s; 3833 int ret = 0, t = 0; 3834 struct mmap_cpu_mask cpus_mask; 3835 struct thread_mask thread_mask, full_mask, *thread_masks; 3836 3837 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3838 if (ret) { 3839 pr_err("Failed to allocate CPUs mask\n"); 3840 return ret; 3841 } 3842 3843 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3844 if (ret) { 3845 pr_err("Failed to init cpu mask\n"); 3846 goto out_free_cpu_mask; 3847 } 3848 3849 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3850 if (ret) { 3851 pr_err("Failed to allocate full mask\n"); 3852 goto out_free_cpu_mask; 3853 } 3854 3855 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3856 if (ret) { 3857 pr_err("Failed to allocate thread mask\n"); 3858 goto out_free_full_and_cpu_masks; 3859 } 3860 3861 for (s = 0; s < nr_spec; s++) { 3862 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3863 if (ret) { 3864 pr_err("Failed to initialize maps thread mask\n"); 3865 goto out_free; 3866 } 3867 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3868 if (ret) { 3869 pr_err("Failed to initialize affinity thread mask\n"); 3870 goto out_free; 3871 } 3872 3873 /* ignore invalid CPUs but do not allow empty masks */ 3874 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3875 cpus_mask.bits, thread_mask.maps.nbits)) { 3876 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3877 ret = -EINVAL; 3878 goto out_free; 3879 } 3880 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3881 cpus_mask.bits, thread_mask.affinity.nbits)) { 3882 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3883 ret = -EINVAL; 3884 goto out_free; 3885 } 3886 3887 /* do not allow intersection with other masks (full_mask) */ 3888 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3889 thread_mask.maps.nbits)) { 3890 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3891 ret = -EINVAL; 3892 goto out_free; 3893 } 3894 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3895 thread_mask.affinity.nbits)) { 3896 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3897 ret = -EINVAL; 3898 goto out_free; 3899 } 3900 3901 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3902 thread_mask.maps.bits, full_mask.maps.nbits); 3903 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3904 thread_mask.affinity.bits, full_mask.maps.nbits); 3905 3906 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3907 if (!thread_masks) { 3908 pr_err("Failed to reallocate thread masks\n"); 3909 ret = -ENOMEM; 3910 goto out_free; 3911 } 3912 rec->thread_masks = thread_masks; 3913 rec->thread_masks[t] = thread_mask; 3914 if (verbose > 0) { 3915 pr_debug("thread_masks[%d]: ", t); 3916 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3917 pr_debug("thread_masks[%d]: ", t); 3918 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3919 } 3920 t++; 3921 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3922 if (ret) { 3923 pr_err("Failed to allocate thread mask\n"); 3924 goto out_free_full_and_cpu_masks; 3925 } 3926 } 3927 rec->nr_threads = t; 3928 pr_debug("nr_threads: %d\n", rec->nr_threads); 3929 if (!rec->nr_threads) 3930 ret = -EINVAL; 3931 3932 out_free: 3933 record__thread_mask_free(&thread_mask); 3934 out_free_full_and_cpu_masks: 3935 record__thread_mask_free(&full_mask); 3936 out_free_cpu_mask: 3937 record__mmap_cpu_mask_free(&cpus_mask); 3938 3939 return ret; 3940 } 3941 3942 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3943 { 3944 int ret; 3945 struct cpu_topology *topo; 3946 3947 topo = cpu_topology__new(); 3948 if (!topo) { 3949 pr_err("Failed to allocate CPU topology\n"); 3950 return -ENOMEM; 3951 } 3952 3953 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3954 topo->core_cpus_list, topo->core_cpus_lists); 3955 cpu_topology__delete(topo); 3956 3957 return ret; 3958 } 3959 3960 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3961 { 3962 int ret; 3963 struct cpu_topology *topo; 3964 3965 topo = cpu_topology__new(); 3966 if (!topo) { 3967 pr_err("Failed to allocate CPU topology\n"); 3968 return -ENOMEM; 3969 } 3970 3971 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3972 topo->package_cpus_list, topo->package_cpus_lists); 3973 cpu_topology__delete(topo); 3974 3975 return ret; 3976 } 3977 3978 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3979 { 3980 u32 s; 3981 int ret; 3982 const char **spec; 3983 struct numa_topology *topo; 3984 3985 topo = numa_topology__new(); 3986 if (!topo) { 3987 pr_err("Failed to allocate NUMA topology\n"); 3988 return -ENOMEM; 3989 } 3990 3991 spec = zalloc(topo->nr * sizeof(char *)); 3992 if (!spec) { 3993 pr_err("Failed to allocate NUMA spec\n"); 3994 ret = -ENOMEM; 3995 goto out_delete_topo; 3996 } 3997 for (s = 0; s < topo->nr; s++) 3998 spec[s] = topo->nodes[s].cpus; 3999 4000 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 4001 4002 zfree(&spec); 4003 4004 out_delete_topo: 4005 numa_topology__delete(topo); 4006 4007 return ret; 4008 } 4009 4010 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 4011 { 4012 int t, ret; 4013 u32 s, nr_spec = 0; 4014 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 4015 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 4016 4017 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 4018 spec = strtok_r(user_spec, ":", &spec_ptr); 4019 if (spec == NULL) 4020 break; 4021 pr_debug2("threads_spec[%d]: %s\n", t, spec); 4022 mask = strtok_r(spec, "/", &mask_ptr); 4023 if (mask == NULL) 4024 break; 4025 pr_debug2(" maps mask: %s\n", mask); 4026 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 4027 if (!tmp_spec) { 4028 pr_err("Failed to reallocate maps spec\n"); 4029 ret = -ENOMEM; 4030 goto out_free; 4031 } 4032 maps_spec = tmp_spec; 4033 maps_spec[nr_spec] = dup_mask = strdup(mask); 4034 if (!maps_spec[nr_spec]) { 4035 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 4036 ret = -ENOMEM; 4037 goto out_free; 4038 } 4039 mask = strtok_r(NULL, "/", &mask_ptr); 4040 if (mask == NULL) { 4041 pr_err("Invalid thread maps or affinity specs\n"); 4042 ret = -EINVAL; 4043 goto out_free; 4044 } 4045 pr_debug2(" affinity mask: %s\n", mask); 4046 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 4047 if (!tmp_spec) { 4048 pr_err("Failed to reallocate affinity spec\n"); 4049 ret = -ENOMEM; 4050 goto out_free; 4051 } 4052 affinity_spec = tmp_spec; 4053 affinity_spec[nr_spec] = strdup(mask); 4054 if (!affinity_spec[nr_spec]) { 4055 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 4056 ret = -ENOMEM; 4057 goto out_free; 4058 } 4059 dup_mask = NULL; 4060 nr_spec++; 4061 } 4062 4063 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 4064 (const char **)affinity_spec, nr_spec); 4065 4066 out_free: 4067 free(dup_mask); 4068 for (s = 0; s < nr_spec; s++) { 4069 if (maps_spec) 4070 free(maps_spec[s]); 4071 if (affinity_spec) 4072 free(affinity_spec[s]); 4073 } 4074 free(affinity_spec); 4075 free(maps_spec); 4076 4077 return ret; 4078 } 4079 4080 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 4081 { 4082 int ret; 4083 4084 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 4085 if (ret) 4086 return ret; 4087 4088 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 4089 return -ENODEV; 4090 4091 rec->nr_threads = 1; 4092 4093 return 0; 4094 } 4095 4096 static int record__init_thread_masks(struct record *rec) 4097 { 4098 int ret = 0; 4099 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 4100 4101 if (!record__threads_enabled(rec)) 4102 return record__init_thread_default_masks(rec, cpus); 4103 4104 if (evlist__per_thread(rec->evlist)) { 4105 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 4106 return -EINVAL; 4107 } 4108 4109 switch (rec->opts.threads_spec) { 4110 case THREAD_SPEC__CPU: 4111 ret = record__init_thread_cpu_masks(rec, cpus); 4112 break; 4113 case THREAD_SPEC__CORE: 4114 ret = record__init_thread_core_masks(rec, cpus); 4115 break; 4116 case THREAD_SPEC__PACKAGE: 4117 ret = record__init_thread_package_masks(rec, cpus); 4118 break; 4119 case THREAD_SPEC__NUMA: 4120 ret = record__init_thread_numa_masks(rec, cpus); 4121 break; 4122 case THREAD_SPEC__USER: 4123 ret = record__init_thread_user_masks(rec, cpus); 4124 break; 4125 default: 4126 break; 4127 } 4128 4129 return ret; 4130 } 4131 4132 int cmd_record(int argc, const char **argv) 4133 { 4134 int err; 4135 struct record *rec = &record; 4136 char errbuf[BUFSIZ]; 4137 4138 setlocale(LC_ALL, ""); 4139 4140 #ifndef HAVE_BPF_SKEL 4141 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 4142 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 4143 # undef set_nobuild 4144 #endif 4145 4146 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 4147 symbol_conf.lazy_load_kernel_maps = true; 4148 rec->opts.affinity = PERF_AFFINITY_SYS; 4149 4150 rec->evlist = evlist__new(); 4151 if (rec->evlist == NULL) 4152 return -ENOMEM; 4153 4154 err = perf_config(perf_record_config, rec); 4155 if (err) 4156 return err; 4157 4158 argc = parse_options(argc, argv, record_options, record_usage, 4159 PARSE_OPT_STOP_AT_NON_OPTION); 4160 if (quiet) 4161 perf_quiet_option(); 4162 4163 err = symbol__validate_sym_arguments(); 4164 if (err) 4165 return err; 4166 4167 perf_debuginfod_setup(&record.debuginfod); 4168 4169 /* Make system wide (-a) the default target. */ 4170 if (!argc && target__none(&rec->opts.target)) 4171 rec->opts.target.system_wide = true; 4172 4173 if (nr_cgroups && !rec->opts.target.system_wide) { 4174 usage_with_options_msg(record_usage, record_options, 4175 "cgroup monitoring only available in system-wide mode"); 4176 4177 } 4178 4179 if (record.latency) { 4180 /* 4181 * There is no fundamental reason why latency profiling 4182 * can't work for system-wide mode, but exact semantics 4183 * and details are to be defined. 4184 * See the following thread for details: 4185 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4186 */ 4187 if (record.opts.target.system_wide) { 4188 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4189 err = -EINVAL; 4190 goto out_opts; 4191 } 4192 record.opts.record_switch_events = true; 4193 } 4194 4195 if (!rec->buildid_mmap) { 4196 pr_debug("Disabling build id in synthesized mmap2 events.\n"); 4197 symbol_conf.no_buildid_mmap2 = true; 4198 } else if (rec->buildid_mmap_set) { 4199 /* 4200 * Explicitly passing --buildid-mmap disables buildid processing 4201 * and cache generation. 4202 */ 4203 rec->no_buildid = true; 4204 } 4205 if (rec->buildid_mmap && !perf_can_record_build_id()) { 4206 pr_warning("Missing support for build id in kernel mmap events.\n" 4207 "Disable this warning with --no-buildid-mmap\n"); 4208 rec->buildid_mmap = false; 4209 } 4210 if (rec->buildid_mmap) { 4211 /* Enable perf_event_attr::build_id bit. */ 4212 rec->opts.build_id = true; 4213 } 4214 4215 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4216 pr_err("Kernel has no cgroup sampling support.\n"); 4217 err = -EINVAL; 4218 goto out_opts; 4219 } 4220 4221 if (rec->opts.kcore) 4222 rec->opts.text_poke = true; 4223 4224 if (rec->opts.kcore || record__threads_enabled(rec)) 4225 rec->data.is_dir = true; 4226 4227 if (record__threads_enabled(rec)) { 4228 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4229 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4230 goto out_opts; 4231 } 4232 if (record__aio_enabled(rec)) { 4233 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4234 goto out_opts; 4235 } 4236 } 4237 4238 if (rec->opts.comp_level != 0) { 4239 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4240 rec->no_buildid = true; 4241 } 4242 4243 if (rec->opts.record_switch_events && 4244 !perf_can_record_switch_events()) { 4245 ui__error("kernel does not support recording context switch events\n"); 4246 parse_options_usage(record_usage, record_options, "switch-events", 0); 4247 err = -EINVAL; 4248 goto out_opts; 4249 } 4250 4251 if (switch_output_setup(rec)) { 4252 parse_options_usage(record_usage, record_options, "switch-output", 0); 4253 err = -EINVAL; 4254 goto out_opts; 4255 } 4256 4257 if (rec->switch_output.time) { 4258 signal(SIGALRM, alarm_sig_handler); 4259 alarm(rec->switch_output.time); 4260 } 4261 4262 if (rec->switch_output.num_files) { 4263 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4264 sizeof(char *)); 4265 if (!rec->switch_output.filenames) { 4266 err = -EINVAL; 4267 goto out_opts; 4268 } 4269 } 4270 4271 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4272 rec->timestamp_filename = false; 4273 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4274 } 4275 4276 if (rec->filter_action) { 4277 if (!strcmp(rec->filter_action, "pin")) 4278 err = perf_bpf_filter__pin(); 4279 else if (!strcmp(rec->filter_action, "unpin")) 4280 err = perf_bpf_filter__unpin(); 4281 else { 4282 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4283 err = -EINVAL; 4284 } 4285 goto out_opts; 4286 } 4287 4288 /* For backward compatibility, -d implies --mem-info */ 4289 if (rec->opts.sample_address) 4290 rec->opts.sample_data_src = true; 4291 4292 /* 4293 * Allow aliases to facilitate the lookup of symbols for address 4294 * filters. Refer to auxtrace_parse_filters(). 4295 */ 4296 symbol_conf.allow_aliases = true; 4297 4298 symbol__init(NULL); 4299 4300 err = record__auxtrace_init(rec); 4301 if (err) 4302 goto out; 4303 4304 if (dry_run) 4305 goto out; 4306 4307 err = -ENOMEM; 4308 4309 if (rec->no_buildid_cache || rec->no_buildid) { 4310 disable_buildid_cache(); 4311 } else if (rec->switch_output.enabled) { 4312 /* 4313 * In 'perf record --switch-output', disable buildid 4314 * generation by default to reduce data file switching 4315 * overhead. Still generate buildid if they are required 4316 * explicitly using 4317 * 4318 * perf record --switch-output --no-no-buildid \ 4319 * --no-no-buildid-cache 4320 * 4321 * Following code equals to: 4322 * 4323 * if ((rec->no_buildid || !rec->no_buildid_set) && 4324 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4325 * disable_buildid_cache(); 4326 */ 4327 bool disable = true; 4328 4329 if (rec->no_buildid_set && !rec->no_buildid) 4330 disable = false; 4331 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4332 disable = false; 4333 if (disable) { 4334 rec->no_buildid = true; 4335 rec->no_buildid_cache = true; 4336 disable_buildid_cache(); 4337 } 4338 } 4339 4340 if (record.opts.overwrite) 4341 record.opts.tail_synthesize = true; 4342 4343 if (rec->evlist->core.nr_entries == 0) { 4344 struct evlist *def_evlist = evlist__new_default(); 4345 4346 if (!def_evlist) 4347 goto out; 4348 4349 evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries); 4350 evlist__delete(def_evlist); 4351 } 4352 4353 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4354 rec->opts.no_inherit = true; 4355 4356 err = target__validate(&rec->opts.target); 4357 if (err) { 4358 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4359 ui__warning("%s\n", errbuf); 4360 } 4361 4362 if (rec->uid_str) { 4363 uid_t uid = parse_uid(rec->uid_str); 4364 4365 if (uid == UINT_MAX) { 4366 ui__error("Invalid User: %s", rec->uid_str); 4367 err = -EINVAL; 4368 goto out; 4369 } 4370 err = parse_uid_filter(rec->evlist, uid); 4371 if (err) 4372 goto out; 4373 4374 /* User ID filtering implies system wide. */ 4375 rec->opts.target.system_wide = true; 4376 } 4377 4378 /* Enable ignoring missing threads when -p option is defined. */ 4379 rec->opts.ignore_missing_thread = rec->opts.target.pid; 4380 4381 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4382 4383 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4384 arch__add_leaf_frame_record_opts(&rec->opts); 4385 4386 err = -ENOMEM; 4387 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4388 if (rec->opts.target.pid != NULL) { 4389 pr_err("Couldn't create thread/CPU maps: %s\n", 4390 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4391 goto out; 4392 } 4393 else 4394 usage_with_options(record_usage, record_options); 4395 } 4396 4397 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4398 if (err) 4399 goto out; 4400 4401 /* 4402 * We take all buildids when the file contains 4403 * AUX area tracing data because we do not decode the 4404 * trace because it would take too long. 4405 */ 4406 if (rec->opts.full_auxtrace) 4407 rec->buildid_all = true; 4408 4409 if (rec->opts.text_poke) { 4410 err = record__config_text_poke(rec->evlist); 4411 if (err) { 4412 pr_err("record__config_text_poke failed, error %d\n", err); 4413 goto out; 4414 } 4415 } 4416 4417 if (rec->off_cpu) { 4418 err = record__config_off_cpu(rec); 4419 if (err) { 4420 pr_err("record__config_off_cpu failed, error %d\n", err); 4421 goto out; 4422 } 4423 } 4424 4425 if (record_opts__config(&rec->opts)) { 4426 err = -EINVAL; 4427 goto out; 4428 } 4429 4430 err = record__config_tracking_events(rec); 4431 if (err) { 4432 pr_err("record__config_tracking_events failed, error %d\n", err); 4433 goto out; 4434 } 4435 4436 err = record__init_thread_masks(rec); 4437 if (err) { 4438 pr_err("Failed to initialize parallel data streaming masks\n"); 4439 goto out; 4440 } 4441 4442 if (rec->opts.nr_cblocks > nr_cblocks_max) 4443 rec->opts.nr_cblocks = nr_cblocks_max; 4444 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4445 4446 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4447 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4448 4449 if (rec->opts.comp_level > comp_level_max) 4450 rec->opts.comp_level = comp_level_max; 4451 pr_debug("comp level: %d\n", rec->opts.comp_level); 4452 4453 err = __cmd_record(&record, argc, argv); 4454 out: 4455 record__free_thread_masks(rec, rec->nr_threads); 4456 rec->nr_threads = 0; 4457 symbol__exit(); 4458 auxtrace_record__free(rec->itr); 4459 out_opts: 4460 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4461 evlist__delete(rec->evlist); 4462 return err; 4463 } 4464 4465 static void snapshot_sig_handler(int sig __maybe_unused) 4466 { 4467 struct record *rec = &record; 4468 4469 hit_auxtrace_snapshot_trigger(rec); 4470 4471 if (switch_output_signal(rec)) 4472 trigger_hit(&switch_output_trigger); 4473 } 4474 4475 static void alarm_sig_handler(int sig __maybe_unused) 4476 { 4477 struct record *rec = &record; 4478 4479 if (switch_output_time(rec)) 4480 trigger_hit(&switch_output_trigger); 4481 } 4482