1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/stat.h" 30 #include "util/symbol.h" 31 #include "util/record.h" 32 #include "util/cpumap.h" 33 #include "util/thread_map.h" 34 #include "util/data.h" 35 #include "util/perf_regs.h" 36 #include "util/auxtrace.h" 37 #include "util/tsc.h" 38 #include "util/parse-branch-options.h" 39 #include "util/parse-regs-options.h" 40 #include "util/perf_api_probe.h" 41 #include "util/trigger.h" 42 #include "util/perf-hooks.h" 43 #include "util/cpu-set-sched.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/pmu.h" 51 #include "util/pmus.h" 52 #include "util/clockid.h" 53 #include "util/off_cpu.h" 54 #include "util/bpf-filter.h" 55 #include "util/strbuf.h" 56 #include "asm/bug.h" 57 #include "perf.h" 58 #include "cputopo.h" 59 60 #include <errno.h> 61 #include <inttypes.h> 62 #include <locale.h> 63 #include <poll.h> 64 #include <pthread.h> 65 #include <unistd.h> 66 #ifndef HAVE_GETTID 67 #include <syscall.h> 68 #endif 69 #include <sched.h> 70 #include <signal.h> 71 #ifdef HAVE_EVENTFD_SUPPORT 72 #include <sys/eventfd.h> 73 #endif 74 #include <sys/mman.h> 75 #include <sys/wait.h> 76 #include <sys/types.h> 77 #include <sys/stat.h> 78 #include <fcntl.h> 79 #include <linux/err.h> 80 #include <linux/string.h> 81 #include <linux/time64.h> 82 #include <linux/zalloc.h> 83 #include <linux/bitmap.h> 84 #include <sys/time.h> 85 86 struct switch_output { 87 bool enabled; 88 bool signal; 89 unsigned long size; 90 unsigned long time; 91 const char *str; 92 bool set; 93 char **filenames; 94 int num_files; 95 int cur_file; 96 }; 97 98 struct thread_mask { 99 struct mmap_cpu_mask maps; 100 struct mmap_cpu_mask affinity; 101 }; 102 103 struct record_thread { 104 pid_t tid; 105 struct thread_mask *mask; 106 struct { 107 int msg[2]; 108 int ack[2]; 109 } pipes; 110 struct fdarray pollfd; 111 int ctlfd_pos; 112 int nr_mmaps; 113 struct mmap **maps; 114 struct mmap **overwrite_maps; 115 struct record *rec; 116 unsigned long long samples; 117 unsigned long waking; 118 u64 bytes_written; 119 u64 bytes_transferred; 120 u64 bytes_compressed; 121 }; 122 123 static __thread struct record_thread *thread; 124 125 enum thread_msg { 126 THREAD_MSG__UNDEFINED = 0, 127 THREAD_MSG__READY, 128 THREAD_MSG__MAX, 129 }; 130 131 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 132 "UNDEFINED", "READY" 133 }; 134 135 enum thread_spec { 136 THREAD_SPEC__UNDEFINED = 0, 137 THREAD_SPEC__CPU, 138 THREAD_SPEC__CORE, 139 THREAD_SPEC__PACKAGE, 140 THREAD_SPEC__NUMA, 141 THREAD_SPEC__USER, 142 THREAD_SPEC__MAX, 143 }; 144 145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 146 "undefined", "cpu", "core", "package", "numa", "user" 147 }; 148 149 struct pollfd_index_map { 150 int evlist_pollfd_index; 151 int thread_pollfd_index; 152 }; 153 154 struct record { 155 struct perf_tool tool; 156 struct record_opts opts; 157 u64 bytes_written; 158 u64 thread_bytes_written; 159 struct perf_data data; 160 struct auxtrace_record *itr; 161 struct evlist *evlist; 162 struct perf_session *session; 163 struct evlist *sb_evlist; 164 pthread_t thread_id; 165 int realtime_prio; 166 bool latency; 167 bool switch_output_event_set; 168 bool no_buildid; 169 bool no_buildid_set; 170 bool no_buildid_cache; 171 bool no_buildid_cache_set; 172 bool buildid_all; 173 bool buildid_mmap; 174 bool timestamp_filename; 175 bool timestamp_boundary; 176 bool off_cpu; 177 const char *filter_action; 178 const char *uid_str; 179 struct switch_output switch_output; 180 unsigned long long samples; 181 unsigned long output_max_size; /* = 0: unlimited */ 182 struct perf_debuginfod debuginfod; 183 int nr_threads; 184 struct thread_mask *thread_masks; 185 struct record_thread *thread_data; 186 struct pollfd_index_map *index_map; 187 size_t index_map_sz; 188 size_t index_map_cnt; 189 }; 190 191 static volatile int done; 192 193 static volatile int auxtrace_record__snapshot_started; 194 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 195 static DEFINE_TRIGGER(switch_output_trigger); 196 197 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 198 "SYS", "NODE", "CPU" 199 }; 200 201 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 202 struct perf_sample *sample, struct machine *machine); 203 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 204 struct perf_sample *sample, struct machine *machine); 205 static int process_timestamp_boundary(const struct perf_tool *tool, 206 union perf_event *event, 207 struct perf_sample *sample, 208 struct machine *machine); 209 210 #ifndef HAVE_GETTID 211 static inline pid_t gettid(void) 212 { 213 return (pid_t)syscall(__NR_gettid); 214 } 215 #endif 216 217 static int record__threads_enabled(struct record *rec) 218 { 219 return rec->opts.threads_spec; 220 } 221 222 static bool switch_output_signal(struct record *rec) 223 { 224 return rec->switch_output.signal && 225 trigger_is_ready(&switch_output_trigger); 226 } 227 228 static bool switch_output_size(struct record *rec) 229 { 230 return rec->switch_output.size && 231 trigger_is_ready(&switch_output_trigger) && 232 (rec->bytes_written >= rec->switch_output.size); 233 } 234 235 static bool switch_output_time(struct record *rec) 236 { 237 return rec->switch_output.time && 238 trigger_is_ready(&switch_output_trigger); 239 } 240 241 static u64 record__bytes_written(struct record *rec) 242 { 243 return rec->bytes_written + rec->thread_bytes_written; 244 } 245 246 static bool record__output_max_size_exceeded(struct record *rec) 247 { 248 return rec->output_max_size && 249 (record__bytes_written(rec) >= rec->output_max_size); 250 } 251 252 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 253 void *bf, size_t size) 254 { 255 struct perf_data_file *file = &rec->session->data->file; 256 257 if (map && map->file) 258 file = map->file; 259 260 if (perf_data_file__write(file, bf, size) < 0) { 261 pr_err("failed to write perf data, error: %m\n"); 262 return -1; 263 } 264 265 if (map && map->file) { 266 thread->bytes_written += size; 267 rec->thread_bytes_written += size; 268 } else { 269 rec->bytes_written += size; 270 } 271 272 if (record__output_max_size_exceeded(rec) && !done) { 273 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 274 " stopping session ]\n", 275 record__bytes_written(rec) >> 10); 276 done = 1; 277 } 278 279 if (switch_output_size(rec)) 280 trigger_hit(&switch_output_trigger); 281 282 return 0; 283 } 284 285 static int record__aio_enabled(struct record *rec); 286 static int record__comp_enabled(struct record *rec); 287 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 288 void *dst, size_t dst_size, void *src, size_t src_size); 289 290 #ifdef HAVE_AIO_SUPPORT 291 static int record__aio_write(struct aiocb *cblock, int trace_fd, 292 void *buf, size_t size, off_t off) 293 { 294 int rc; 295 296 cblock->aio_fildes = trace_fd; 297 cblock->aio_buf = buf; 298 cblock->aio_nbytes = size; 299 cblock->aio_offset = off; 300 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 301 302 do { 303 rc = aio_write(cblock); 304 if (rc == 0) { 305 break; 306 } else if (errno != EAGAIN) { 307 cblock->aio_fildes = -1; 308 pr_err("failed to queue perf data, error: %m\n"); 309 break; 310 } 311 } while (1); 312 313 return rc; 314 } 315 316 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 317 { 318 void *rem_buf; 319 off_t rem_off; 320 size_t rem_size; 321 int rc, aio_errno; 322 ssize_t aio_ret, written; 323 324 aio_errno = aio_error(cblock); 325 if (aio_errno == EINPROGRESS) 326 return 0; 327 328 written = aio_ret = aio_return(cblock); 329 if (aio_ret < 0) { 330 if (aio_errno != EINTR) 331 pr_err("failed to write perf data, error: %m\n"); 332 written = 0; 333 } 334 335 rem_size = cblock->aio_nbytes - written; 336 337 if (rem_size == 0) { 338 cblock->aio_fildes = -1; 339 /* 340 * md->refcount is incremented in record__aio_pushfn() for 341 * every aio write request started in record__aio_push() so 342 * decrement it because the request is now complete. 343 */ 344 perf_mmap__put(&md->core); 345 rc = 1; 346 } else { 347 /* 348 * aio write request may require restart with the 349 * remainder if the kernel didn't write whole 350 * chunk at once. 351 */ 352 rem_off = cblock->aio_offset + written; 353 rem_buf = (void *)(cblock->aio_buf + written); 354 record__aio_write(cblock, cblock->aio_fildes, 355 rem_buf, rem_size, rem_off); 356 rc = 0; 357 } 358 359 return rc; 360 } 361 362 static int record__aio_sync(struct mmap *md, bool sync_all) 363 { 364 struct aiocb **aiocb = md->aio.aiocb; 365 struct aiocb *cblocks = md->aio.cblocks; 366 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 367 int i, do_suspend; 368 369 do { 370 do_suspend = 0; 371 for (i = 0; i < md->aio.nr_cblocks; ++i) { 372 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 373 if (sync_all) 374 aiocb[i] = NULL; 375 else 376 return i; 377 } else { 378 /* 379 * Started aio write is not complete yet 380 * so it has to be waited before the 381 * next allocation. 382 */ 383 aiocb[i] = &cblocks[i]; 384 do_suspend = 1; 385 } 386 } 387 if (!do_suspend) 388 return -1; 389 390 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 391 if (!(errno == EAGAIN || errno == EINTR)) 392 pr_err("failed to sync perf data, error: %m\n"); 393 } 394 } while (1); 395 } 396 397 struct record_aio { 398 struct record *rec; 399 void *data; 400 size_t size; 401 }; 402 403 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 404 { 405 struct record_aio *aio = to; 406 407 /* 408 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 409 * to release space in the kernel buffer as fast as possible, calling 410 * perf_mmap__consume() from perf_mmap__push() function. 411 * 412 * That lets the kernel to proceed with storing more profiling data into 413 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 414 * 415 * Coping can be done in two steps in case the chunk of profiling data 416 * crosses the upper bound of the kernel buffer. In this case we first move 417 * part of data from map->start till the upper bound and then the remainder 418 * from the beginning of the kernel buffer till the end of the data chunk. 419 */ 420 421 if (record__comp_enabled(aio->rec)) { 422 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 423 mmap__mmap_len(map) - aio->size, 424 buf, size); 425 if (compressed < 0) 426 return (int)compressed; 427 428 size = compressed; 429 } else { 430 memcpy(aio->data + aio->size, buf, size); 431 } 432 433 if (!aio->size) { 434 /* 435 * Increment map->refcount to guard map->aio.data[] buffer 436 * from premature deallocation because map object can be 437 * released earlier than aio write request started on 438 * map->aio.data[] buffer is complete. 439 * 440 * perf_mmap__put() is done at record__aio_complete() 441 * after started aio request completion or at record__aio_push() 442 * if the request failed to start. 443 */ 444 perf_mmap__get(&map->core); 445 } 446 447 aio->size += size; 448 449 return size; 450 } 451 452 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 453 { 454 int ret, idx; 455 int trace_fd = rec->session->data->file.fd; 456 struct record_aio aio = { .rec = rec, .size = 0 }; 457 458 /* 459 * Call record__aio_sync() to wait till map->aio.data[] buffer 460 * becomes available after previous aio write operation. 461 */ 462 463 idx = record__aio_sync(map, false); 464 aio.data = map->aio.data[idx]; 465 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 466 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 467 return ret; 468 469 rec->samples++; 470 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 471 if (!ret) { 472 *off += aio.size; 473 rec->bytes_written += aio.size; 474 if (switch_output_size(rec)) 475 trigger_hit(&switch_output_trigger); 476 } else { 477 /* 478 * Decrement map->refcount incremented in record__aio_pushfn() 479 * back if record__aio_write() operation failed to start, otherwise 480 * map->refcount is decremented in record__aio_complete() after 481 * aio write operation finishes successfully. 482 */ 483 perf_mmap__put(&map->core); 484 } 485 486 return ret; 487 } 488 489 static off_t record__aio_get_pos(int trace_fd) 490 { 491 return lseek(trace_fd, 0, SEEK_CUR); 492 } 493 494 static void record__aio_set_pos(int trace_fd, off_t pos) 495 { 496 lseek(trace_fd, pos, SEEK_SET); 497 } 498 499 static void record__aio_mmap_read_sync(struct record *rec) 500 { 501 int i; 502 struct evlist *evlist = rec->evlist; 503 struct mmap *maps = evlist->mmap; 504 505 if (!record__aio_enabled(rec)) 506 return; 507 508 for (i = 0; i < evlist->core.nr_mmaps; i++) { 509 struct mmap *map = &maps[i]; 510 511 if (map->core.base) 512 record__aio_sync(map, true); 513 } 514 } 515 516 static int nr_cblocks_default = 1; 517 static int nr_cblocks_max = 4; 518 519 static int record__aio_parse(const struct option *opt, 520 const char *str, 521 int unset) 522 { 523 struct record_opts *opts = (struct record_opts *)opt->value; 524 525 if (unset) { 526 opts->nr_cblocks = 0; 527 } else { 528 if (str) 529 opts->nr_cblocks = strtol(str, NULL, 0); 530 if (!opts->nr_cblocks) 531 opts->nr_cblocks = nr_cblocks_default; 532 } 533 534 return 0; 535 } 536 #else /* HAVE_AIO_SUPPORT */ 537 static int nr_cblocks_max = 0; 538 539 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 540 off_t *off __maybe_unused) 541 { 542 return -1; 543 } 544 545 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 546 { 547 return -1; 548 } 549 550 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 551 { 552 } 553 554 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 555 { 556 } 557 #endif 558 559 static int record__aio_enabled(struct record *rec) 560 { 561 return rec->opts.nr_cblocks > 0; 562 } 563 564 #define MMAP_FLUSH_DEFAULT 1 565 static int record__mmap_flush_parse(const struct option *opt, 566 const char *str, 567 int unset) 568 { 569 int flush_max; 570 struct record_opts *opts = (struct record_opts *)opt->value; 571 static struct parse_tag tags[] = { 572 { .tag = 'B', .mult = 1 }, 573 { .tag = 'K', .mult = 1 << 10 }, 574 { .tag = 'M', .mult = 1 << 20 }, 575 { .tag = 'G', .mult = 1 << 30 }, 576 { .tag = 0 }, 577 }; 578 579 if (unset) 580 return 0; 581 582 if (str) { 583 opts->mmap_flush = parse_tag_value(str, tags); 584 if (opts->mmap_flush == (int)-1) 585 opts->mmap_flush = strtol(str, NULL, 0); 586 } 587 588 if (!opts->mmap_flush) 589 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 590 591 flush_max = evlist__mmap_size(opts->mmap_pages); 592 flush_max /= 4; 593 if (opts->mmap_flush > flush_max) 594 opts->mmap_flush = flush_max; 595 596 return 0; 597 } 598 599 #ifdef HAVE_ZSTD_SUPPORT 600 static unsigned int comp_level_default = 1; 601 602 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 603 { 604 struct record_opts *opts = opt->value; 605 606 if (unset) { 607 opts->comp_level = 0; 608 } else { 609 if (str) 610 opts->comp_level = strtol(str, NULL, 0); 611 if (!opts->comp_level) 612 opts->comp_level = comp_level_default; 613 } 614 615 return 0; 616 } 617 #endif 618 static unsigned int comp_level_max = 22; 619 620 static int record__comp_enabled(struct record *rec) 621 { 622 return rec->opts.comp_level > 0; 623 } 624 625 static int process_synthesized_event(const struct perf_tool *tool, 626 union perf_event *event, 627 struct perf_sample *sample __maybe_unused, 628 struct machine *machine __maybe_unused) 629 { 630 struct record *rec = container_of(tool, struct record, tool); 631 return record__write(rec, NULL, event, event->header.size); 632 } 633 634 static struct mutex synth_lock; 635 636 static int process_locked_synthesized_event(const struct perf_tool *tool, 637 union perf_event *event, 638 struct perf_sample *sample __maybe_unused, 639 struct machine *machine __maybe_unused) 640 { 641 int ret; 642 643 mutex_lock(&synth_lock); 644 ret = process_synthesized_event(tool, event, sample, machine); 645 mutex_unlock(&synth_lock); 646 return ret; 647 } 648 649 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 650 { 651 struct record *rec = to; 652 653 if (record__comp_enabled(rec)) { 654 struct perf_record_compressed2 *event = map->data; 655 size_t padding = 0; 656 u8 pad[8] = {0}; 657 ssize_t compressed = zstd_compress(rec->session, map, map->data, 658 mmap__mmap_len(map), bf, size); 659 660 if (compressed < 0) 661 return (int)compressed; 662 663 bf = event; 664 thread->samples++; 665 666 /* 667 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan 668 * error. We make it aligned here. 669 */ 670 event->data_size = compressed - sizeof(struct perf_record_compressed2); 671 event->header.size = PERF_ALIGN(compressed, sizeof(u64)); 672 padding = event->header.size - compressed; 673 return record__write(rec, map, bf, compressed) || 674 record__write(rec, map, &pad, padding); 675 } 676 677 thread->samples++; 678 return record__write(rec, map, bf, size); 679 } 680 681 static volatile sig_atomic_t signr = -1; 682 static volatile sig_atomic_t child_finished; 683 #ifdef HAVE_EVENTFD_SUPPORT 684 static volatile sig_atomic_t done_fd = -1; 685 #endif 686 687 static void sig_handler(int sig) 688 { 689 if (sig == SIGCHLD) 690 child_finished = 1; 691 else 692 signr = sig; 693 694 done = 1; 695 #ifdef HAVE_EVENTFD_SUPPORT 696 if (done_fd >= 0) { 697 u64 tmp = 1; 698 int orig_errno = errno; 699 700 /* 701 * It is possible for this signal handler to run after done is 702 * checked in the main loop, but before the perf counter fds are 703 * polled. If this happens, the poll() will continue to wait 704 * even though done is set, and will only break out if either 705 * another signal is received, or the counters are ready for 706 * read. To ensure the poll() doesn't sleep when done is set, 707 * use an eventfd (done_fd) to wake up the poll(). 708 */ 709 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 710 pr_err("failed to signal wakeup fd, error: %m\n"); 711 712 errno = orig_errno; 713 } 714 #endif // HAVE_EVENTFD_SUPPORT 715 } 716 717 static void sigsegv_handler(int sig) 718 { 719 perf_hooks__recover(); 720 sighandler_dump_stack(sig); 721 } 722 723 static void record__sig_exit(void) 724 { 725 if (signr == -1) 726 return; 727 728 signal(signr, SIG_DFL); 729 raise(signr); 730 } 731 732 #ifdef HAVE_AUXTRACE_SUPPORT 733 734 static int record__process_auxtrace(const struct perf_tool *tool, 735 struct mmap *map, 736 union perf_event *event, void *data1, 737 size_t len1, void *data2, size_t len2) 738 { 739 struct record *rec = container_of(tool, struct record, tool); 740 struct perf_data *data = &rec->data; 741 size_t padding; 742 u8 pad[8] = {0}; 743 744 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 745 off_t file_offset; 746 int fd = perf_data__fd(data); 747 int err; 748 749 file_offset = lseek(fd, 0, SEEK_CUR); 750 if (file_offset == -1) 751 return -1; 752 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 753 event, file_offset); 754 if (err) 755 return err; 756 } 757 758 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 759 padding = (len1 + len2) & 7; 760 if (padding) 761 padding = 8 - padding; 762 763 record__write(rec, map, event, event->header.size); 764 record__write(rec, map, data1, len1); 765 if (len2) 766 record__write(rec, map, data2, len2); 767 record__write(rec, map, &pad, padding); 768 769 return 0; 770 } 771 772 static int record__auxtrace_mmap_read(struct record *rec, 773 struct mmap *map) 774 { 775 int ret; 776 777 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 778 record__process_auxtrace); 779 if (ret < 0) 780 return ret; 781 782 if (ret) 783 rec->samples++; 784 785 return 0; 786 } 787 788 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 789 struct mmap *map) 790 { 791 int ret; 792 793 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 794 record__process_auxtrace, 795 rec->opts.auxtrace_snapshot_size); 796 if (ret < 0) 797 return ret; 798 799 if (ret) 800 rec->samples++; 801 802 return 0; 803 } 804 805 static int record__auxtrace_read_snapshot_all(struct record *rec) 806 { 807 int i; 808 int rc = 0; 809 810 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 811 struct mmap *map = &rec->evlist->mmap[i]; 812 813 if (!map->auxtrace_mmap.base) 814 continue; 815 816 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 817 rc = -1; 818 goto out; 819 } 820 } 821 out: 822 return rc; 823 } 824 825 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 826 { 827 pr_debug("Recording AUX area tracing snapshot\n"); 828 if (record__auxtrace_read_snapshot_all(rec) < 0) { 829 trigger_error(&auxtrace_snapshot_trigger); 830 } else { 831 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 832 trigger_error(&auxtrace_snapshot_trigger); 833 else 834 trigger_ready(&auxtrace_snapshot_trigger); 835 } 836 } 837 838 static int record__auxtrace_snapshot_exit(struct record *rec) 839 { 840 if (trigger_is_error(&auxtrace_snapshot_trigger)) 841 return 0; 842 843 if (!auxtrace_record__snapshot_started && 844 auxtrace_record__snapshot_start(rec->itr)) 845 return -1; 846 847 record__read_auxtrace_snapshot(rec, true); 848 if (trigger_is_error(&auxtrace_snapshot_trigger)) 849 return -1; 850 851 return 0; 852 } 853 854 static int record__auxtrace_init(struct record *rec) 855 { 856 int err; 857 858 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 859 && record__threads_enabled(rec)) { 860 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 861 return -EINVAL; 862 } 863 864 if (!rec->itr) { 865 rec->itr = auxtrace_record__init(rec->evlist, &err); 866 if (err) 867 return err; 868 } 869 870 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 871 rec->opts.auxtrace_snapshot_opts); 872 if (err) 873 return err; 874 875 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 876 rec->opts.auxtrace_sample_opts); 877 if (err) 878 return err; 879 880 err = auxtrace_parse_aux_action(rec->evlist); 881 if (err) 882 return err; 883 884 return auxtrace_parse_filters(rec->evlist); 885 } 886 887 #else 888 889 static inline 890 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 891 struct mmap *map __maybe_unused) 892 { 893 return 0; 894 } 895 896 static inline 897 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 898 bool on_exit __maybe_unused) 899 { 900 } 901 902 static inline 903 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 904 { 905 return 0; 906 } 907 908 static inline 909 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 910 { 911 return 0; 912 } 913 914 static int record__auxtrace_init(struct record *rec __maybe_unused) 915 { 916 return 0; 917 } 918 919 #endif 920 921 static int record__config_text_poke(struct evlist *evlist) 922 { 923 struct evsel *evsel; 924 925 /* Nothing to do if text poke is already configured */ 926 evlist__for_each_entry(evlist, evsel) { 927 if (evsel->core.attr.text_poke) 928 return 0; 929 } 930 931 evsel = evlist__add_dummy_on_all_cpus(evlist); 932 if (!evsel) 933 return -ENOMEM; 934 935 evsel->core.attr.text_poke = 1; 936 evsel->core.attr.ksymbol = 1; 937 evsel->immediate = true; 938 evsel__set_sample_bit(evsel, TIME); 939 940 return 0; 941 } 942 943 static int record__config_off_cpu(struct record *rec) 944 { 945 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 946 } 947 948 static bool record__tracking_system_wide(struct record *rec) 949 { 950 struct evlist *evlist = rec->evlist; 951 struct evsel *evsel; 952 953 /* 954 * If non-dummy evsel exists, system_wide sideband is need to 955 * help parse sample information. 956 * For example, PERF_EVENT_MMAP event to help parse symbol, 957 * and PERF_EVENT_COMM event to help parse task executable name. 958 */ 959 evlist__for_each_entry(evlist, evsel) { 960 if (!evsel__is_dummy_event(evsel)) 961 return true; 962 } 963 964 return false; 965 } 966 967 static int record__config_tracking_events(struct record *rec) 968 { 969 struct record_opts *opts = &rec->opts; 970 struct evlist *evlist = rec->evlist; 971 bool system_wide = false; 972 struct evsel *evsel; 973 974 /* 975 * For initial_delay, system wide or a hybrid system, we need to add 976 * tracking event so that we can track PERF_RECORD_MMAP to cover the 977 * delay of waiting or event synthesis. 978 */ 979 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 980 perf_pmus__num_core_pmus() > 1) { 981 982 /* 983 * User space tasks can migrate between CPUs, so when tracing 984 * selected CPUs, sideband for all CPUs is still needed. 985 */ 986 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 987 system_wide = true; 988 989 evsel = evlist__findnew_tracking_event(evlist, system_wide); 990 if (!evsel) 991 return -ENOMEM; 992 993 /* 994 * Enable the tracking event when the process is forked for 995 * initial_delay, immediately for system wide. 996 */ 997 if (opts->target.initial_delay && !evsel->immediate && 998 !target__has_cpu(&opts->target)) 999 evsel->core.attr.enable_on_exec = 1; 1000 else 1001 evsel->immediate = 1; 1002 } 1003 1004 return 0; 1005 } 1006 1007 static bool record__kcore_readable(struct machine *machine) 1008 { 1009 char kcore[PATH_MAX]; 1010 int fd; 1011 1012 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 1013 1014 fd = open(kcore, O_RDONLY); 1015 if (fd < 0) 1016 return false; 1017 1018 close(fd); 1019 1020 return true; 1021 } 1022 1023 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 1024 { 1025 char from_dir[PATH_MAX]; 1026 char kcore_dir[PATH_MAX]; 1027 int ret; 1028 1029 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1030 1031 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1032 if (ret) 1033 return ret; 1034 1035 return kcore_copy(from_dir, kcore_dir); 1036 } 1037 1038 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1039 { 1040 thread_data->pipes.msg[0] = -1; 1041 thread_data->pipes.msg[1] = -1; 1042 thread_data->pipes.ack[0] = -1; 1043 thread_data->pipes.ack[1] = -1; 1044 } 1045 1046 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1047 { 1048 if (pipe(thread_data->pipes.msg)) 1049 return -EINVAL; 1050 1051 if (pipe(thread_data->pipes.ack)) { 1052 close(thread_data->pipes.msg[0]); 1053 thread_data->pipes.msg[0] = -1; 1054 close(thread_data->pipes.msg[1]); 1055 thread_data->pipes.msg[1] = -1; 1056 return -EINVAL; 1057 } 1058 1059 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1060 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1061 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1062 1063 return 0; 1064 } 1065 1066 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1067 { 1068 if (thread_data->pipes.msg[0] != -1) { 1069 close(thread_data->pipes.msg[0]); 1070 thread_data->pipes.msg[0] = -1; 1071 } 1072 if (thread_data->pipes.msg[1] != -1) { 1073 close(thread_data->pipes.msg[1]); 1074 thread_data->pipes.msg[1] = -1; 1075 } 1076 if (thread_data->pipes.ack[0] != -1) { 1077 close(thread_data->pipes.ack[0]); 1078 thread_data->pipes.ack[0] = -1; 1079 } 1080 if (thread_data->pipes.ack[1] != -1) { 1081 close(thread_data->pipes.ack[1]); 1082 thread_data->pipes.ack[1] = -1; 1083 } 1084 } 1085 1086 static bool evlist__per_thread(struct evlist *evlist) 1087 { 1088 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1089 } 1090 1091 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1092 { 1093 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1094 struct mmap *mmap = evlist->mmap; 1095 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1096 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1097 bool per_thread = evlist__per_thread(evlist); 1098 1099 if (per_thread) 1100 thread_data->nr_mmaps = nr_mmaps; 1101 else 1102 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1103 thread_data->mask->maps.nbits); 1104 if (mmap) { 1105 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1106 if (!thread_data->maps) 1107 return -ENOMEM; 1108 } 1109 if (overwrite_mmap) { 1110 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1111 if (!thread_data->overwrite_maps) { 1112 zfree(&thread_data->maps); 1113 return -ENOMEM; 1114 } 1115 } 1116 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1117 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1118 1119 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1120 if (per_thread || 1121 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1122 if (thread_data->maps) { 1123 thread_data->maps[tm] = &mmap[m]; 1124 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1125 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1126 } 1127 if (thread_data->overwrite_maps) { 1128 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1129 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1130 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1131 } 1132 tm++; 1133 } 1134 } 1135 1136 return 0; 1137 } 1138 1139 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1140 { 1141 int f, tm, pos; 1142 struct mmap *map, *overwrite_map; 1143 1144 fdarray__init(&thread_data->pollfd, 64); 1145 1146 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1147 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1148 overwrite_map = thread_data->overwrite_maps ? 1149 thread_data->overwrite_maps[tm] : NULL; 1150 1151 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1152 void *ptr = evlist->core.pollfd.priv[f].ptr; 1153 1154 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1155 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1156 &evlist->core.pollfd); 1157 if (pos < 0) 1158 return pos; 1159 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1160 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1161 } 1162 } 1163 } 1164 1165 return 0; 1166 } 1167 1168 static void record__free_thread_data(struct record *rec) 1169 { 1170 int t; 1171 struct record_thread *thread_data = rec->thread_data; 1172 1173 if (thread_data == NULL) 1174 return; 1175 1176 for (t = 0; t < rec->nr_threads; t++) { 1177 record__thread_data_close_pipes(&thread_data[t]); 1178 zfree(&thread_data[t].maps); 1179 zfree(&thread_data[t].overwrite_maps); 1180 fdarray__exit(&thread_data[t].pollfd); 1181 } 1182 1183 zfree(&rec->thread_data); 1184 } 1185 1186 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1187 int evlist_pollfd_index, 1188 int thread_pollfd_index) 1189 { 1190 size_t x = rec->index_map_cnt; 1191 1192 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1193 return -ENOMEM; 1194 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1195 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1196 rec->index_map_cnt += 1; 1197 return 0; 1198 } 1199 1200 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1201 struct evlist *evlist, 1202 struct record_thread *thread_data) 1203 { 1204 struct pollfd *e_entries = evlist->core.pollfd.entries; 1205 struct pollfd *t_entries = thread_data->pollfd.entries; 1206 int err = 0; 1207 size_t i; 1208 1209 for (i = 0; i < rec->index_map_cnt; i++) { 1210 int e_pos = rec->index_map[i].evlist_pollfd_index; 1211 int t_pos = rec->index_map[i].thread_pollfd_index; 1212 1213 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1214 e_entries[e_pos].events != t_entries[t_pos].events) { 1215 pr_err("Thread and evlist pollfd index mismatch\n"); 1216 err = -EINVAL; 1217 continue; 1218 } 1219 e_entries[e_pos].revents = t_entries[t_pos].revents; 1220 } 1221 return err; 1222 } 1223 1224 static int record__dup_non_perf_events(struct record *rec, 1225 struct evlist *evlist, 1226 struct record_thread *thread_data) 1227 { 1228 struct fdarray *fda = &evlist->core.pollfd; 1229 int i, ret; 1230 1231 for (i = 0; i < fda->nr; i++) { 1232 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1233 continue; 1234 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1235 if (ret < 0) { 1236 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1237 return ret; 1238 } 1239 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1240 thread_data, ret, fda->entries[i].fd); 1241 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1242 if (ret < 0) { 1243 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1244 return ret; 1245 } 1246 } 1247 return 0; 1248 } 1249 1250 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1251 { 1252 int t, ret; 1253 struct record_thread *thread_data; 1254 1255 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1256 if (!rec->thread_data) { 1257 pr_err("Failed to allocate thread data\n"); 1258 return -ENOMEM; 1259 } 1260 thread_data = rec->thread_data; 1261 1262 for (t = 0; t < rec->nr_threads; t++) 1263 record__thread_data_init_pipes(&thread_data[t]); 1264 1265 for (t = 0; t < rec->nr_threads; t++) { 1266 thread_data[t].rec = rec; 1267 thread_data[t].mask = &rec->thread_masks[t]; 1268 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1269 if (ret) { 1270 pr_err("Failed to initialize thread[%d] maps\n", t); 1271 goto out_free; 1272 } 1273 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1274 if (ret) { 1275 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1276 goto out_free; 1277 } 1278 if (t) { 1279 thread_data[t].tid = -1; 1280 ret = record__thread_data_open_pipes(&thread_data[t]); 1281 if (ret) { 1282 pr_err("Failed to open thread[%d] communication pipes\n", t); 1283 goto out_free; 1284 } 1285 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1286 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1287 if (ret < 0) { 1288 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1289 goto out_free; 1290 } 1291 thread_data[t].ctlfd_pos = ret; 1292 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1293 thread_data, thread_data[t].ctlfd_pos, 1294 thread_data[t].pipes.msg[0]); 1295 } else { 1296 thread_data[t].tid = gettid(); 1297 1298 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1299 if (ret < 0) 1300 goto out_free; 1301 1302 thread_data[t].ctlfd_pos = -1; /* Not used */ 1303 } 1304 } 1305 1306 return 0; 1307 1308 out_free: 1309 record__free_thread_data(rec); 1310 1311 return ret; 1312 } 1313 1314 static int record__mmap_evlist(struct record *rec, 1315 struct evlist *evlist) 1316 { 1317 int i, ret; 1318 struct record_opts *opts = &rec->opts; 1319 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1320 opts->auxtrace_sample_mode; 1321 char msg[512]; 1322 1323 if (opts->affinity != PERF_AFFINITY_SYS) 1324 cpu__setup_cpunode_map(); 1325 1326 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1327 opts->auxtrace_mmap_pages, 1328 auxtrace_overwrite, 1329 opts->nr_cblocks, opts->affinity, 1330 opts->mmap_flush, opts->comp_level) < 0) { 1331 if (errno == EPERM) { 1332 pr_err("Permission error mapping pages.\n" 1333 "Consider increasing " 1334 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1335 "or try again with a smaller value of -m/--mmap_pages.\n" 1336 "(current value: %u,%u)\n", 1337 opts->mmap_pages, opts->auxtrace_mmap_pages); 1338 return -errno; 1339 } else { 1340 pr_err("failed to mmap with %d (%s)\n", errno, 1341 str_error_r(errno, msg, sizeof(msg))); 1342 if (errno) 1343 return -errno; 1344 else 1345 return -EINVAL; 1346 } 1347 } 1348 1349 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1350 return -1; 1351 1352 ret = record__alloc_thread_data(rec, evlist); 1353 if (ret) 1354 return ret; 1355 1356 if (record__threads_enabled(rec)) { 1357 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1358 if (ret) { 1359 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1360 return ret; 1361 } 1362 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1363 if (evlist->mmap) 1364 evlist->mmap[i].file = &rec->data.dir.files[i]; 1365 if (evlist->overwrite_mmap) 1366 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1367 } 1368 } 1369 1370 return 0; 1371 } 1372 1373 static int record__mmap(struct record *rec) 1374 { 1375 return record__mmap_evlist(rec, rec->evlist); 1376 } 1377 1378 static int record__open(struct record *rec) 1379 { 1380 char msg[BUFSIZ]; 1381 struct evsel *pos; 1382 struct evlist *evlist = rec->evlist; 1383 struct perf_session *session = rec->session; 1384 struct record_opts *opts = &rec->opts; 1385 int rc = 0; 1386 1387 evlist__for_each_entry(evlist, pos) { 1388 try_again: 1389 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1390 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1391 if (verbose > 0) 1392 ui__warning("%s\n", msg); 1393 goto try_again; 1394 } 1395 if ((errno == EINVAL || errno == EBADF) && 1396 pos->core.leader != &pos->core && 1397 pos->weak_group) { 1398 pos = evlist__reset_weak_group(evlist, pos, true); 1399 goto try_again; 1400 } 1401 rc = -errno; 1402 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1403 ui__error("%s\n", msg); 1404 goto out; 1405 } 1406 1407 pos->supported = true; 1408 } 1409 1410 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1411 pr_warning( 1412 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1413 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1414 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1415 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1416 "Samples in kernel modules won't be resolved at all.\n\n" 1417 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1418 "even with a suitable vmlinux or kallsyms file.\n\n"); 1419 } 1420 1421 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1422 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1423 pos->filter ?: "BPF", evsel__name(pos), errno, 1424 str_error_r(errno, msg, sizeof(msg))); 1425 rc = -1; 1426 goto out; 1427 } 1428 1429 rc = record__mmap(rec); 1430 if (rc) 1431 goto out; 1432 1433 session->evlist = evlist; 1434 perf_session__set_id_hdr_size(session); 1435 out: 1436 return rc; 1437 } 1438 1439 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1440 { 1441 if (rec->evlist->first_sample_time == 0) 1442 rec->evlist->first_sample_time = sample_time; 1443 1444 if (sample_time) 1445 rec->evlist->last_sample_time = sample_time; 1446 } 1447 1448 static int process_sample_event(const struct perf_tool *tool, 1449 union perf_event *event, 1450 struct perf_sample *sample, 1451 struct evsel *evsel, 1452 struct machine *machine) 1453 { 1454 struct record *rec = container_of(tool, struct record, tool); 1455 1456 set_timestamp_boundary(rec, sample->time); 1457 1458 if (rec->buildid_all) 1459 return 0; 1460 1461 rec->samples++; 1462 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1463 } 1464 1465 static int process_buildids(struct record *rec) 1466 { 1467 struct perf_session *session = rec->session; 1468 1469 if (perf_data__size(&rec->data) == 0) 1470 return 0; 1471 1472 /* 1473 * During this process, it'll load kernel map and replace the 1474 * dso->long_name to a real pathname it found. In this case 1475 * we prefer the vmlinux path like 1476 * /lib/modules/3.16.4/build/vmlinux 1477 * 1478 * rather than build-id path (in debug directory). 1479 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1480 */ 1481 symbol_conf.ignore_vmlinux_buildid = true; 1482 1483 /* 1484 * If --buildid-all is given, it marks all DSO regardless of hits, 1485 * so no need to process samples. But if timestamp_boundary is enabled, 1486 * it still needs to walk on all samples to get the timestamps of 1487 * first/last samples. 1488 */ 1489 if (rec->buildid_all && !rec->timestamp_boundary) 1490 rec->tool.sample = process_event_sample_stub; 1491 1492 return perf_session__process_events(session); 1493 } 1494 1495 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1496 { 1497 int err; 1498 struct perf_tool *tool = data; 1499 /* 1500 *As for guest kernel when processing subcommand record&report, 1501 *we arrange module mmap prior to guest kernel mmap and trigger 1502 *a preload dso because default guest module symbols are loaded 1503 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1504 *method is used to avoid symbol missing when the first addr is 1505 *in module instead of in guest kernel. 1506 */ 1507 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1508 machine); 1509 if (err < 0) 1510 pr_err("Couldn't record guest kernel [%d]'s reference" 1511 " relocation symbol.\n", machine->pid); 1512 1513 /* 1514 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1515 * have no _text sometimes. 1516 */ 1517 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1518 machine); 1519 if (err < 0) 1520 pr_err("Couldn't record guest kernel [%d]'s reference" 1521 " relocation symbol.\n", machine->pid); 1522 } 1523 1524 static struct perf_event_header finished_round_event = { 1525 .size = sizeof(struct perf_event_header), 1526 .type = PERF_RECORD_FINISHED_ROUND, 1527 }; 1528 1529 static struct perf_event_header finished_init_event = { 1530 .size = sizeof(struct perf_event_header), 1531 .type = PERF_RECORD_FINISHED_INIT, 1532 }; 1533 1534 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1535 { 1536 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1537 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1538 thread->mask->affinity.nbits)) { 1539 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1540 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1541 map->affinity_mask.bits, thread->mask->affinity.nbits); 1542 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1543 (cpu_set_t *)thread->mask->affinity.bits); 1544 if (verbose == 2) { 1545 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1546 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1547 } 1548 } 1549 } 1550 1551 static size_t process_comp_header(void *record, size_t increment) 1552 { 1553 struct perf_record_compressed2 *event = record; 1554 size_t size = sizeof(*event); 1555 1556 if (increment) { 1557 event->header.size += increment; 1558 return increment; 1559 } 1560 1561 event->header.type = PERF_RECORD_COMPRESSED2; 1562 event->header.size = size; 1563 1564 return size; 1565 } 1566 1567 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1568 void *dst, size_t dst_size, void *src, size_t src_size) 1569 { 1570 ssize_t compressed; 1571 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1; 1572 struct zstd_data *zstd_data = &session->zstd_data; 1573 1574 if (map && map->file) 1575 zstd_data = &map->zstd_data; 1576 1577 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1578 max_record_size, process_comp_header); 1579 if (compressed < 0) 1580 return compressed; 1581 1582 if (map && map->file) { 1583 thread->bytes_transferred += src_size; 1584 thread->bytes_compressed += compressed; 1585 } else { 1586 session->bytes_transferred += src_size; 1587 session->bytes_compressed += compressed; 1588 } 1589 1590 return compressed; 1591 } 1592 1593 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1594 bool overwrite, bool synch) 1595 { 1596 u64 bytes_written = rec->bytes_written; 1597 int i; 1598 int rc = 0; 1599 int nr_mmaps; 1600 struct mmap **maps; 1601 int trace_fd = rec->data.file.fd; 1602 off_t off = 0; 1603 1604 if (!evlist) 1605 return 0; 1606 1607 nr_mmaps = thread->nr_mmaps; 1608 maps = overwrite ? thread->overwrite_maps : thread->maps; 1609 1610 if (!maps) 1611 return 0; 1612 1613 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1614 return 0; 1615 1616 if (record__aio_enabled(rec)) 1617 off = record__aio_get_pos(trace_fd); 1618 1619 for (i = 0; i < nr_mmaps; i++) { 1620 u64 flush = 0; 1621 struct mmap *map = maps[i]; 1622 1623 if (map->core.base) { 1624 record__adjust_affinity(rec, map); 1625 if (synch) { 1626 flush = map->core.flush; 1627 map->core.flush = 1; 1628 } 1629 if (!record__aio_enabled(rec)) { 1630 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1631 if (synch) 1632 map->core.flush = flush; 1633 rc = -1; 1634 goto out; 1635 } 1636 } else { 1637 if (record__aio_push(rec, map, &off) < 0) { 1638 record__aio_set_pos(trace_fd, off); 1639 if (synch) 1640 map->core.flush = flush; 1641 rc = -1; 1642 goto out; 1643 } 1644 } 1645 if (synch) 1646 map->core.flush = flush; 1647 } 1648 1649 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1650 !rec->opts.auxtrace_sample_mode && 1651 record__auxtrace_mmap_read(rec, map) != 0) { 1652 rc = -1; 1653 goto out; 1654 } 1655 } 1656 1657 if (record__aio_enabled(rec)) 1658 record__aio_set_pos(trace_fd, off); 1659 1660 /* 1661 * Mark the round finished in case we wrote 1662 * at least one event. 1663 * 1664 * No need for round events in directory mode, 1665 * because per-cpu maps and files have data 1666 * sorted by kernel. 1667 */ 1668 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1669 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1670 1671 if (overwrite) 1672 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1673 out: 1674 return rc; 1675 } 1676 1677 static int record__mmap_read_all(struct record *rec, bool synch) 1678 { 1679 int err; 1680 1681 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1682 if (err) 1683 return err; 1684 1685 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1686 } 1687 1688 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1689 void *arg __maybe_unused) 1690 { 1691 struct perf_mmap *map = fda->priv[fd].ptr; 1692 1693 if (map) 1694 perf_mmap__put(map); 1695 } 1696 1697 static void *record__thread(void *arg) 1698 { 1699 enum thread_msg msg = THREAD_MSG__READY; 1700 bool terminate = false; 1701 struct fdarray *pollfd; 1702 int err, ctlfd_pos; 1703 1704 thread = arg; 1705 thread->tid = gettid(); 1706 1707 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1708 if (err == -1) 1709 pr_warning("threads[%d]: failed to notify on start: %s\n", 1710 thread->tid, strerror(errno)); 1711 1712 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1713 1714 pollfd = &thread->pollfd; 1715 ctlfd_pos = thread->ctlfd_pos; 1716 1717 for (;;) { 1718 unsigned long long hits = thread->samples; 1719 1720 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1721 break; 1722 1723 if (hits == thread->samples) { 1724 1725 err = fdarray__poll(pollfd, -1); 1726 /* 1727 * Propagate error, only if there's any. Ignore positive 1728 * number of returned events and interrupt error. 1729 */ 1730 if (err > 0 || (err < 0 && errno == EINTR)) 1731 err = 0; 1732 thread->waking++; 1733 1734 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1735 record__thread_munmap_filtered, NULL) == 0) 1736 break; 1737 } 1738 1739 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1740 terminate = true; 1741 close(thread->pipes.msg[0]); 1742 thread->pipes.msg[0] = -1; 1743 pollfd->entries[ctlfd_pos].fd = -1; 1744 pollfd->entries[ctlfd_pos].events = 0; 1745 } 1746 1747 pollfd->entries[ctlfd_pos].revents = 0; 1748 } 1749 record__mmap_read_all(thread->rec, true); 1750 1751 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1752 if (err == -1) 1753 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1754 thread->tid, strerror(errno)); 1755 1756 return NULL; 1757 } 1758 1759 static void record__init_features(struct record *rec) 1760 { 1761 struct perf_session *session = rec->session; 1762 int feat; 1763 1764 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1765 perf_header__set_feat(&session->header, feat); 1766 1767 if (rec->no_buildid) 1768 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1769 1770 if (!have_tracepoints(&rec->evlist->core.entries)) 1771 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1772 1773 if (!rec->opts.branch_stack) 1774 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1775 1776 if (!rec->opts.full_auxtrace) 1777 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1778 1779 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1780 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1781 1782 if (!rec->opts.use_clockid) 1783 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1784 1785 if (!record__threads_enabled(rec)) 1786 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1787 1788 if (!record__comp_enabled(rec)) 1789 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1790 1791 perf_header__clear_feat(&session->header, HEADER_STAT); 1792 } 1793 1794 static void 1795 record__finish_output(struct record *rec) 1796 { 1797 int i; 1798 struct perf_data *data = &rec->data; 1799 int fd = perf_data__fd(data); 1800 1801 if (data->is_pipe) { 1802 /* Just to display approx. size */ 1803 data->file.size = rec->bytes_written; 1804 return; 1805 } 1806 1807 rec->session->header.data_size += rec->bytes_written; 1808 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1809 if (record__threads_enabled(rec)) { 1810 for (i = 0; i < data->dir.nr; i++) 1811 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1812 } 1813 1814 if (!rec->no_buildid) { 1815 process_buildids(rec); 1816 1817 if (rec->buildid_all) 1818 perf_session__dsos_hit_all(rec->session); 1819 } 1820 perf_session__write_header(rec->session, rec->evlist, fd, true); 1821 1822 return; 1823 } 1824 1825 static int record__synthesize_workload(struct record *rec, bool tail) 1826 { 1827 int err; 1828 struct perf_thread_map *thread_map; 1829 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1830 1831 if (rec->opts.tail_synthesize != tail) 1832 return 0; 1833 1834 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1835 if (thread_map == NULL) 1836 return -1; 1837 1838 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1839 process_synthesized_event, 1840 &rec->session->machines.host, 1841 needs_mmap, 1842 rec->opts.sample_address); 1843 perf_thread_map__put(thread_map); 1844 return err; 1845 } 1846 1847 static int write_finished_init(struct record *rec, bool tail) 1848 { 1849 if (rec->opts.tail_synthesize != tail) 1850 return 0; 1851 1852 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1853 } 1854 1855 static int record__synthesize(struct record *rec, bool tail); 1856 1857 static int 1858 record__switch_output(struct record *rec, bool at_exit) 1859 { 1860 struct perf_data *data = &rec->data; 1861 char *new_filename = NULL; 1862 int fd, err; 1863 1864 /* Same Size: "2015122520103046"*/ 1865 char timestamp[] = "InvalidTimestamp"; 1866 1867 record__aio_mmap_read_sync(rec); 1868 1869 write_finished_init(rec, true); 1870 1871 record__synthesize(rec, true); 1872 if (target__none(&rec->opts.target)) 1873 record__synthesize_workload(rec, true); 1874 1875 rec->samples = 0; 1876 record__finish_output(rec); 1877 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1878 if (err) { 1879 pr_err("Failed to get current timestamp\n"); 1880 return -EINVAL; 1881 } 1882 1883 fd = perf_data__switch(data, timestamp, 1884 rec->session->header.data_offset, 1885 at_exit, &new_filename); 1886 if (fd >= 0 && !at_exit) { 1887 rec->bytes_written = 0; 1888 rec->session->header.data_size = 0; 1889 } 1890 1891 if (!quiet) { 1892 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1893 data->path, timestamp); 1894 } 1895 1896 if (rec->switch_output.num_files) { 1897 int n = rec->switch_output.cur_file + 1; 1898 1899 if (n >= rec->switch_output.num_files) 1900 n = 0; 1901 rec->switch_output.cur_file = n; 1902 if (rec->switch_output.filenames[n]) { 1903 remove(rec->switch_output.filenames[n]); 1904 zfree(&rec->switch_output.filenames[n]); 1905 } 1906 rec->switch_output.filenames[n] = new_filename; 1907 } else { 1908 free(new_filename); 1909 } 1910 1911 /* Output tracking events */ 1912 if (!at_exit) { 1913 record__synthesize(rec, false); 1914 1915 /* 1916 * In 'perf record --switch-output' without -a, 1917 * record__synthesize() in record__switch_output() won't 1918 * generate tracking events because there's no thread_map 1919 * in evlist. Which causes newly created perf.data doesn't 1920 * contain map and comm information. 1921 * Create a fake thread_map and directly call 1922 * perf_event__synthesize_thread_map() for those events. 1923 */ 1924 if (target__none(&rec->opts.target)) 1925 record__synthesize_workload(rec, false); 1926 write_finished_init(rec, false); 1927 } 1928 return fd; 1929 } 1930 1931 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1932 struct perf_record_lost_samples *lost, 1933 int cpu_idx, int thread_idx, u64 lost_count, 1934 u16 misc_flag) 1935 { 1936 struct perf_sample_id *sid; 1937 struct perf_sample sample; 1938 int id_hdr_size; 1939 1940 perf_sample__init(&sample, /*all=*/true); 1941 lost->lost = lost_count; 1942 if (evsel->core.ids) { 1943 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1944 sample.id = sid->id; 1945 } 1946 1947 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1948 evsel->core.attr.sample_type, &sample); 1949 lost->header.size = sizeof(*lost) + id_hdr_size; 1950 lost->header.misc = misc_flag; 1951 record__write(rec, NULL, lost, lost->header.size); 1952 perf_sample__exit(&sample); 1953 } 1954 1955 static void record__read_lost_samples(struct record *rec) 1956 { 1957 struct perf_session *session = rec->session; 1958 struct perf_record_lost_samples_and_ids lost; 1959 struct evsel *evsel; 1960 1961 /* there was an error during record__open */ 1962 if (session->evlist == NULL) 1963 return; 1964 1965 evlist__for_each_entry(session->evlist, evsel) { 1966 struct xyarray *xy = evsel->core.sample_id; 1967 u64 lost_count; 1968 1969 if (xy == NULL || evsel->core.fd == NULL) 1970 continue; 1971 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1972 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1973 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1974 continue; 1975 } 1976 1977 for (int x = 0; x < xyarray__max_x(xy); x++) { 1978 for (int y = 0; y < xyarray__max_y(xy); y++) { 1979 struct perf_counts_values count; 1980 1981 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1982 pr_debug("read LOST count failed\n"); 1983 return; 1984 } 1985 1986 if (count.lost) { 1987 memset(&lost, 0, sizeof(lost)); 1988 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1989 __record__save_lost_samples(rec, evsel, &lost.lost, 1990 x, y, count.lost, 0); 1991 } 1992 } 1993 } 1994 1995 lost_count = perf_bpf_filter__lost_count(evsel); 1996 if (lost_count) { 1997 memset(&lost, 0, sizeof(lost)); 1998 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1999 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 2000 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 2001 } 2002 } 2003 } 2004 2005 static volatile sig_atomic_t workload_exec_errno; 2006 2007 /* 2008 * evlist__prepare_workload will send a SIGUSR1 2009 * if the fork fails, since we asked by setting its 2010 * want_signal to true. 2011 */ 2012 static void workload_exec_failed_signal(int signo __maybe_unused, 2013 siginfo_t *info, 2014 void *ucontext __maybe_unused) 2015 { 2016 workload_exec_errno = info->si_value.sival_int; 2017 done = 1; 2018 child_finished = 1; 2019 } 2020 2021 static void snapshot_sig_handler(int sig); 2022 static void alarm_sig_handler(int sig); 2023 2024 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2025 { 2026 if (evlist) { 2027 if (evlist->mmap && evlist->mmap[0].core.base) 2028 return evlist->mmap[0].core.base; 2029 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2030 return evlist->overwrite_mmap[0].core.base; 2031 } 2032 return NULL; 2033 } 2034 2035 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2036 { 2037 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2038 if (pc) 2039 return pc; 2040 return NULL; 2041 } 2042 2043 static int record__synthesize(struct record *rec, bool tail) 2044 { 2045 struct perf_session *session = rec->session; 2046 struct machine *machine = &session->machines.host; 2047 struct perf_data *data = &rec->data; 2048 struct record_opts *opts = &rec->opts; 2049 struct perf_tool *tool = &rec->tool; 2050 int err = 0; 2051 event_op f = process_synthesized_event; 2052 2053 if (rec->opts.tail_synthesize != tail) 2054 return 0; 2055 2056 if (data->is_pipe) { 2057 err = perf_event__synthesize_for_pipe(tool, session, data, 2058 process_synthesized_event); 2059 if (err < 0) 2060 goto out; 2061 2062 rec->bytes_written += err; 2063 } 2064 2065 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2066 process_synthesized_event, machine); 2067 if (err) 2068 goto out; 2069 2070 /* Synthesize id_index before auxtrace_info */ 2071 err = perf_event__synthesize_id_index(tool, 2072 process_synthesized_event, 2073 session->evlist, machine); 2074 if (err) 2075 goto out; 2076 2077 if (rec->opts.full_auxtrace) { 2078 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2079 session, process_synthesized_event); 2080 if (err) 2081 goto out; 2082 } 2083 2084 if (!evlist__exclude_kernel(rec->evlist)) { 2085 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2086 machine); 2087 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2088 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2089 "Check /proc/kallsyms permission or run as root.\n"); 2090 2091 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2092 machine); 2093 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2094 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2095 "Check /proc/modules permission or run as root.\n"); 2096 } 2097 2098 if (perf_guest) { 2099 machines__process_guests(&session->machines, 2100 perf_event__synthesize_guest_os, tool); 2101 } 2102 2103 err = perf_event__synthesize_extra_attr(&rec->tool, 2104 rec->evlist, 2105 process_synthesized_event, 2106 data->is_pipe); 2107 if (err) 2108 goto out; 2109 2110 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2111 process_synthesized_event, 2112 NULL); 2113 if (err < 0) { 2114 pr_err("Couldn't synthesize thread map.\n"); 2115 return err; 2116 } 2117 2118 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2119 process_synthesized_event, NULL); 2120 if (err < 0) { 2121 pr_err("Couldn't synthesize cpu map.\n"); 2122 return err; 2123 } 2124 2125 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2126 machine, opts); 2127 if (err < 0) { 2128 pr_warning("Couldn't synthesize bpf events.\n"); 2129 err = 0; 2130 } 2131 2132 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2133 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2134 machine); 2135 if (err < 0) { 2136 pr_warning("Couldn't synthesize cgroup events.\n"); 2137 err = 0; 2138 } 2139 } 2140 2141 if (rec->opts.nr_threads_synthesize > 1) { 2142 mutex_init(&synth_lock); 2143 perf_set_multithreaded(); 2144 f = process_locked_synthesized_event; 2145 } 2146 2147 if (rec->opts.synth & PERF_SYNTH_TASK) { 2148 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2149 2150 err = __machine__synthesize_threads(machine, tool, &opts->target, 2151 rec->evlist->core.threads, 2152 f, needs_mmap, opts->sample_address, 2153 rec->opts.nr_threads_synthesize); 2154 } 2155 2156 if (rec->opts.nr_threads_synthesize > 1) { 2157 perf_set_singlethreaded(); 2158 mutex_destroy(&synth_lock); 2159 } 2160 2161 out: 2162 return err; 2163 } 2164 2165 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused) 2166 { 2167 #ifdef HAVE_LIBBPF_SUPPORT 2168 perf_event__synthesize_final_bpf_metadata(rec->session, 2169 process_synthesized_event); 2170 #endif 2171 } 2172 2173 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2174 { 2175 struct record *rec = data; 2176 pthread_kill(rec->thread_id, SIGUSR2); 2177 return 0; 2178 } 2179 2180 static int record__setup_sb_evlist(struct record *rec) 2181 { 2182 struct record_opts *opts = &rec->opts; 2183 2184 if (rec->sb_evlist != NULL) { 2185 /* 2186 * We get here if --switch-output-event populated the 2187 * sb_evlist, so associate a callback that will send a SIGUSR2 2188 * to the main thread. 2189 */ 2190 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2191 rec->thread_id = pthread_self(); 2192 } 2193 #ifdef HAVE_LIBBPF_SUPPORT 2194 if (!opts->no_bpf_event) { 2195 if (rec->sb_evlist == NULL) { 2196 rec->sb_evlist = evlist__new(); 2197 2198 if (rec->sb_evlist == NULL) { 2199 pr_err("Couldn't create side band evlist.\n."); 2200 return -1; 2201 } 2202 } 2203 2204 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2205 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2206 return -1; 2207 } 2208 } 2209 #endif 2210 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2211 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2212 opts->no_bpf_event = true; 2213 } 2214 2215 return 0; 2216 } 2217 2218 static int record__init_clock(struct record *rec) 2219 { 2220 struct perf_session *session = rec->session; 2221 struct timespec ref_clockid; 2222 struct timeval ref_tod; 2223 u64 ref; 2224 2225 if (!rec->opts.use_clockid) 2226 return 0; 2227 2228 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2229 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2230 2231 session->header.env.clock.clockid = rec->opts.clockid; 2232 2233 if (gettimeofday(&ref_tod, NULL) != 0) { 2234 pr_err("gettimeofday failed, cannot set reference time.\n"); 2235 return -1; 2236 } 2237 2238 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2239 pr_err("clock_gettime failed, cannot set reference time.\n"); 2240 return -1; 2241 } 2242 2243 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2244 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2245 2246 session->header.env.clock.tod_ns = ref; 2247 2248 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2249 (u64) ref_clockid.tv_nsec; 2250 2251 session->header.env.clock.clockid_ns = ref; 2252 return 0; 2253 } 2254 2255 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2256 { 2257 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2258 trigger_hit(&auxtrace_snapshot_trigger); 2259 auxtrace_record__snapshot_started = 1; 2260 if (auxtrace_record__snapshot_start(rec->itr)) 2261 trigger_error(&auxtrace_snapshot_trigger); 2262 } 2263 } 2264 2265 static int record__terminate_thread(struct record_thread *thread_data) 2266 { 2267 int err; 2268 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2269 pid_t tid = thread_data->tid; 2270 2271 close(thread_data->pipes.msg[1]); 2272 thread_data->pipes.msg[1] = -1; 2273 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2274 if (err > 0) 2275 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2276 else 2277 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2278 thread->tid, tid); 2279 2280 return 0; 2281 } 2282 2283 static int record__start_threads(struct record *rec) 2284 { 2285 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2286 struct record_thread *thread_data = rec->thread_data; 2287 sigset_t full, mask; 2288 pthread_t handle; 2289 pthread_attr_t attrs; 2290 2291 thread = &thread_data[0]; 2292 2293 if (!record__threads_enabled(rec)) 2294 return 0; 2295 2296 sigfillset(&full); 2297 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2298 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2299 return -1; 2300 } 2301 2302 pthread_attr_init(&attrs); 2303 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2304 2305 for (t = 1; t < nr_threads; t++) { 2306 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2307 2308 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2309 pthread_attr_setaffinity_np(&attrs, 2310 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2311 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2312 #endif 2313 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2314 for (tt = 1; tt < t; tt++) 2315 record__terminate_thread(&thread_data[t]); 2316 pr_err("Failed to start threads: %s\n", strerror(errno)); 2317 ret = -1; 2318 goto out_err; 2319 } 2320 2321 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2322 if (err > 0) 2323 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2324 thread_msg_tags[msg]); 2325 else 2326 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2327 thread->tid, rec->thread_data[t].tid); 2328 } 2329 2330 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2331 (cpu_set_t *)thread->mask->affinity.bits); 2332 2333 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2334 2335 out_err: 2336 pthread_attr_destroy(&attrs); 2337 2338 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2339 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2340 ret = -1; 2341 } 2342 2343 return ret; 2344 } 2345 2346 static int record__stop_threads(struct record *rec) 2347 { 2348 int t; 2349 struct record_thread *thread_data = rec->thread_data; 2350 2351 for (t = 1; t < rec->nr_threads; t++) 2352 record__terminate_thread(&thread_data[t]); 2353 2354 for (t = 0; t < rec->nr_threads; t++) { 2355 rec->samples += thread_data[t].samples; 2356 if (!record__threads_enabled(rec)) 2357 continue; 2358 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2359 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2360 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2361 thread_data[t].samples, thread_data[t].waking); 2362 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2363 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2364 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2365 else 2366 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2367 } 2368 2369 return 0; 2370 } 2371 2372 static unsigned long record__waking(struct record *rec) 2373 { 2374 int t; 2375 unsigned long waking = 0; 2376 struct record_thread *thread_data = rec->thread_data; 2377 2378 for (t = 0; t < rec->nr_threads; t++) 2379 waking += thread_data[t].waking; 2380 2381 return waking; 2382 } 2383 2384 static int __cmd_record(struct record *rec, int argc, const char **argv) 2385 { 2386 int err; 2387 int status = 0; 2388 const bool forks = argc > 0; 2389 struct perf_tool *tool = &rec->tool; 2390 struct record_opts *opts = &rec->opts; 2391 struct perf_data *data = &rec->data; 2392 struct perf_session *session; 2393 bool disabled = false, draining = false; 2394 int fd; 2395 float ratio = 0; 2396 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2397 2398 atexit(record__sig_exit); 2399 signal(SIGCHLD, sig_handler); 2400 signal(SIGINT, sig_handler); 2401 signal(SIGTERM, sig_handler); 2402 signal(SIGSEGV, sigsegv_handler); 2403 2404 if (rec->opts.record_cgroup) { 2405 #ifndef HAVE_FILE_HANDLE 2406 pr_err("cgroup tracking is not supported\n"); 2407 return -1; 2408 #endif 2409 } 2410 2411 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2412 signal(SIGUSR2, snapshot_sig_handler); 2413 if (rec->opts.auxtrace_snapshot_mode) 2414 trigger_on(&auxtrace_snapshot_trigger); 2415 if (rec->switch_output.enabled) 2416 trigger_on(&switch_output_trigger); 2417 } else { 2418 signal(SIGUSR2, SIG_IGN); 2419 } 2420 2421 perf_tool__init(tool, /*ordered_events=*/true); 2422 tool->sample = process_sample_event; 2423 tool->fork = perf_event__process_fork; 2424 tool->exit = perf_event__process_exit; 2425 tool->comm = perf_event__process_comm; 2426 tool->namespaces = perf_event__process_namespaces; 2427 tool->mmap = build_id__process_mmap; 2428 tool->mmap2 = build_id__process_mmap2; 2429 tool->itrace_start = process_timestamp_boundary; 2430 tool->aux = process_timestamp_boundary; 2431 tool->namespace_events = rec->opts.record_namespaces; 2432 tool->cgroup_events = rec->opts.record_cgroup; 2433 session = perf_session__new(data, tool); 2434 if (IS_ERR(session)) { 2435 pr_err("Perf session creation failed.\n"); 2436 return PTR_ERR(session); 2437 } 2438 2439 if (record__threads_enabled(rec)) { 2440 if (perf_data__is_pipe(&rec->data)) { 2441 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2442 return -1; 2443 } 2444 if (rec->opts.full_auxtrace) { 2445 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2446 return -1; 2447 } 2448 } 2449 2450 fd = perf_data__fd(data); 2451 rec->session = session; 2452 2453 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2454 pr_err("Compression initialization failed.\n"); 2455 return -1; 2456 } 2457 #ifdef HAVE_EVENTFD_SUPPORT 2458 done_fd = eventfd(0, EFD_NONBLOCK); 2459 if (done_fd < 0) { 2460 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2461 status = -1; 2462 goto out_delete_session; 2463 } 2464 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2465 if (err < 0) { 2466 pr_err("Failed to add wakeup eventfd to poll list\n"); 2467 status = err; 2468 goto out_delete_session; 2469 } 2470 #endif // HAVE_EVENTFD_SUPPORT 2471 2472 session->header.env.comp_type = PERF_COMP_ZSTD; 2473 session->header.env.comp_level = rec->opts.comp_level; 2474 2475 if (rec->opts.kcore && 2476 !record__kcore_readable(&session->machines.host)) { 2477 pr_err("ERROR: kcore is not readable.\n"); 2478 return -1; 2479 } 2480 2481 if (record__init_clock(rec)) 2482 return -1; 2483 2484 record__init_features(rec); 2485 2486 if (forks) { 2487 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2488 workload_exec_failed_signal); 2489 if (err < 0) { 2490 pr_err("Couldn't run the workload!\n"); 2491 status = err; 2492 goto out_delete_session; 2493 } 2494 } 2495 2496 /* 2497 * If we have just single event and are sending data 2498 * through pipe, we need to force the ids allocation, 2499 * because we synthesize event name through the pipe 2500 * and need the id for that. 2501 */ 2502 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2503 rec->opts.sample_id = true; 2504 2505 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2506 rec->timestamp_filename = false; 2507 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2508 } 2509 2510 /* 2511 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE 2512 * and hybrid_merge is false. 2513 */ 2514 evlist__uniquify_evsel_names(rec->evlist, &stat_config); 2515 2516 evlist__config(rec->evlist, opts, &callchain_param); 2517 2518 /* Debug message used by test scripts */ 2519 pr_debug3("perf record opening and mmapping events\n"); 2520 if (record__open(rec) != 0) { 2521 err = -1; 2522 goto out_free_threads; 2523 } 2524 /* Debug message used by test scripts */ 2525 pr_debug3("perf record done opening and mmapping events\n"); 2526 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2527 2528 if (rec->opts.kcore) { 2529 err = record__kcore_copy(&session->machines.host, data); 2530 if (err) { 2531 pr_err("ERROR: Failed to copy kcore\n"); 2532 goto out_free_threads; 2533 } 2534 } 2535 2536 /* 2537 * Normally perf_session__new would do this, but it doesn't have the 2538 * evlist. 2539 */ 2540 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2541 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2542 rec->tool.ordered_events = false; 2543 } 2544 2545 if (evlist__nr_groups(rec->evlist) == 0) 2546 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2547 2548 if (data->is_pipe) { 2549 err = perf_header__write_pipe(fd); 2550 if (err < 0) 2551 goto out_free_threads; 2552 } else { 2553 err = perf_session__write_header(session, rec->evlist, fd, false); 2554 if (err < 0) 2555 goto out_free_threads; 2556 } 2557 2558 err = -1; 2559 if (!rec->no_buildid 2560 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2561 pr_err("Couldn't generate buildids. " 2562 "Use --no-buildid to profile anyway.\n"); 2563 goto out_free_threads; 2564 } 2565 2566 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2567 opts->no_bpf_event = true; 2568 2569 err = record__setup_sb_evlist(rec); 2570 if (err) 2571 goto out_free_threads; 2572 2573 err = record__synthesize(rec, false); 2574 if (err < 0) 2575 goto out_free_threads; 2576 2577 if (rec->realtime_prio) { 2578 struct sched_param param; 2579 2580 param.sched_priority = rec->realtime_prio; 2581 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2582 pr_err("Could not set realtime priority.\n"); 2583 err = -1; 2584 goto out_free_threads; 2585 } 2586 } 2587 2588 if (record__start_threads(rec)) 2589 goto out_free_threads; 2590 2591 /* 2592 * When perf is starting the traced process, all the events 2593 * (apart from group members) have enable_on_exec=1 set, 2594 * so don't spoil it by prematurely enabling them. 2595 */ 2596 if (!target__none(&opts->target) && !opts->target.initial_delay) 2597 evlist__enable(rec->evlist); 2598 2599 /* 2600 * offcpu-time does not call execve, so enable_on_exe wouldn't work 2601 * when recording a workload, do it manually 2602 */ 2603 if (rec->off_cpu) 2604 evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT); 2605 2606 /* 2607 * Let the child rip 2608 */ 2609 if (forks) { 2610 struct machine *machine = &session->machines.host; 2611 union perf_event *event; 2612 pid_t tgid; 2613 2614 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2615 if (event == NULL) { 2616 err = -ENOMEM; 2617 goto out_child; 2618 } 2619 2620 /* 2621 * Some H/W events are generated before COMM event 2622 * which is emitted during exec(), so perf script 2623 * cannot see a correct process name for those events. 2624 * Synthesize COMM event to prevent it. 2625 */ 2626 tgid = perf_event__synthesize_comm(tool, event, 2627 rec->evlist->workload.pid, 2628 process_synthesized_event, 2629 machine); 2630 free(event); 2631 2632 if (tgid == -1) 2633 goto out_child; 2634 2635 event = malloc(sizeof(event->namespaces) + 2636 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2637 machine->id_hdr_size); 2638 if (event == NULL) { 2639 err = -ENOMEM; 2640 goto out_child; 2641 } 2642 2643 /* 2644 * Synthesize NAMESPACES event for the command specified. 2645 */ 2646 perf_event__synthesize_namespaces(tool, event, 2647 rec->evlist->workload.pid, 2648 tgid, process_synthesized_event, 2649 machine); 2650 free(event); 2651 2652 evlist__start_workload(rec->evlist); 2653 } 2654 2655 if (opts->target.initial_delay) { 2656 pr_info(EVLIST_DISABLED_MSG); 2657 if (opts->target.initial_delay > 0) { 2658 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2659 evlist__enable(rec->evlist); 2660 pr_info(EVLIST_ENABLED_MSG); 2661 } 2662 } 2663 2664 err = event_enable_timer__start(rec->evlist->eet); 2665 if (err) 2666 goto out_child; 2667 2668 /* Debug message used by test scripts */ 2669 pr_debug3("perf record has started\n"); 2670 fflush(stderr); 2671 2672 trigger_ready(&auxtrace_snapshot_trigger); 2673 trigger_ready(&switch_output_trigger); 2674 perf_hooks__invoke_record_start(); 2675 2676 /* 2677 * Must write FINISHED_INIT so it will be seen after all other 2678 * synthesized user events, but before any regular events. 2679 */ 2680 err = write_finished_init(rec, false); 2681 if (err < 0) 2682 goto out_child; 2683 2684 for (;;) { 2685 unsigned long long hits = thread->samples; 2686 2687 /* 2688 * rec->evlist->bkw_mmap_state is possible to be 2689 * BKW_MMAP_EMPTY here: when done == true and 2690 * hits != rec->samples in previous round. 2691 * 2692 * evlist__toggle_bkw_mmap ensure we never 2693 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2694 */ 2695 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2696 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2697 2698 if (record__mmap_read_all(rec, false) < 0) { 2699 trigger_error(&auxtrace_snapshot_trigger); 2700 trigger_error(&switch_output_trigger); 2701 err = -1; 2702 goto out_child; 2703 } 2704 2705 if (auxtrace_record__snapshot_started) { 2706 auxtrace_record__snapshot_started = 0; 2707 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2708 record__read_auxtrace_snapshot(rec, false); 2709 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2710 pr_err("AUX area tracing snapshot failed\n"); 2711 err = -1; 2712 goto out_child; 2713 } 2714 } 2715 2716 if (trigger_is_hit(&switch_output_trigger)) { 2717 /* 2718 * If switch_output_trigger is hit, the data in 2719 * overwritable ring buffer should have been collected, 2720 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2721 * 2722 * If SIGUSR2 raise after or during record__mmap_read_all(), 2723 * record__mmap_read_all() didn't collect data from 2724 * overwritable ring buffer. Read again. 2725 */ 2726 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2727 continue; 2728 trigger_ready(&switch_output_trigger); 2729 2730 /* 2731 * Reenable events in overwrite ring buffer after 2732 * record__mmap_read_all(): we should have collected 2733 * data from it. 2734 */ 2735 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2736 2737 if (!quiet) 2738 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2739 record__waking(rec)); 2740 thread->waking = 0; 2741 fd = record__switch_output(rec, false); 2742 if (fd < 0) { 2743 pr_err("Failed to switch to new file\n"); 2744 trigger_error(&switch_output_trigger); 2745 err = fd; 2746 goto out_child; 2747 } 2748 2749 /* re-arm the alarm */ 2750 if (rec->switch_output.time) 2751 alarm(rec->switch_output.time); 2752 } 2753 2754 if (hits == thread->samples) { 2755 if (done || draining) 2756 break; 2757 err = fdarray__poll(&thread->pollfd, -1); 2758 /* 2759 * Propagate error, only if there's any. Ignore positive 2760 * number of returned events and interrupt error. 2761 */ 2762 if (err > 0 || (err < 0 && errno == EINTR)) 2763 err = 0; 2764 thread->waking++; 2765 2766 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2767 record__thread_munmap_filtered, NULL) == 0) 2768 draining = true; 2769 2770 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2771 if (err) 2772 goto out_child; 2773 } 2774 2775 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2776 switch (cmd) { 2777 case EVLIST_CTL_CMD_SNAPSHOT: 2778 hit_auxtrace_snapshot_trigger(rec); 2779 evlist__ctlfd_ack(rec->evlist); 2780 break; 2781 case EVLIST_CTL_CMD_STOP: 2782 done = 1; 2783 break; 2784 case EVLIST_CTL_CMD_ACK: 2785 case EVLIST_CTL_CMD_UNSUPPORTED: 2786 case EVLIST_CTL_CMD_ENABLE: 2787 case EVLIST_CTL_CMD_DISABLE: 2788 case EVLIST_CTL_CMD_EVLIST: 2789 case EVLIST_CTL_CMD_PING: 2790 default: 2791 break; 2792 } 2793 } 2794 2795 err = event_enable_timer__process(rec->evlist->eet); 2796 if (err < 0) 2797 goto out_child; 2798 if (err) { 2799 err = 0; 2800 done = 1; 2801 } 2802 2803 /* 2804 * When perf is starting the traced process, at the end events 2805 * die with the process and we wait for that. Thus no need to 2806 * disable events in this case. 2807 */ 2808 if (done && !disabled && !target__none(&opts->target)) { 2809 trigger_off(&auxtrace_snapshot_trigger); 2810 evlist__disable(rec->evlist); 2811 disabled = true; 2812 } 2813 } 2814 2815 trigger_off(&auxtrace_snapshot_trigger); 2816 trigger_off(&switch_output_trigger); 2817 2818 record__synthesize_final_bpf_metadata(rec); 2819 2820 if (opts->auxtrace_snapshot_on_exit) 2821 record__auxtrace_snapshot_exit(rec); 2822 2823 if (forks && workload_exec_errno) { 2824 char msg[STRERR_BUFSIZE]; 2825 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2826 struct strbuf sb = STRBUF_INIT; 2827 2828 evlist__format_evsels(rec->evlist, &sb, 2048); 2829 2830 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2831 sb.buf, argv[0], emsg); 2832 strbuf_release(&sb); 2833 err = -1; 2834 goto out_child; 2835 } 2836 2837 if (!quiet) 2838 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2839 record__waking(rec)); 2840 2841 write_finished_init(rec, true); 2842 2843 if (target__none(&rec->opts.target)) 2844 record__synthesize_workload(rec, true); 2845 2846 out_child: 2847 record__stop_threads(rec); 2848 record__mmap_read_all(rec, true); 2849 out_free_threads: 2850 record__free_thread_data(rec); 2851 evlist__finalize_ctlfd(rec->evlist); 2852 record__aio_mmap_read_sync(rec); 2853 2854 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2855 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2856 session->header.env.comp_ratio = ratio + 0.5; 2857 } 2858 2859 if (forks) { 2860 int exit_status; 2861 2862 if (!child_finished) 2863 kill(rec->evlist->workload.pid, SIGTERM); 2864 2865 wait(&exit_status); 2866 2867 if (err < 0) 2868 status = err; 2869 else if (WIFEXITED(exit_status)) 2870 status = WEXITSTATUS(exit_status); 2871 else if (WIFSIGNALED(exit_status)) 2872 signr = WTERMSIG(exit_status); 2873 } else 2874 status = err; 2875 2876 if (rec->off_cpu) 2877 rec->bytes_written += off_cpu_write(rec->session); 2878 2879 record__read_lost_samples(rec); 2880 record__synthesize(rec, true); 2881 /* this will be recalculated during process_buildids() */ 2882 rec->samples = 0; 2883 2884 if (!err) { 2885 if (!rec->timestamp_filename) { 2886 record__finish_output(rec); 2887 } else { 2888 fd = record__switch_output(rec, true); 2889 if (fd < 0) { 2890 status = fd; 2891 goto out_delete_session; 2892 } 2893 } 2894 } 2895 2896 perf_hooks__invoke_record_end(); 2897 2898 if (!err && !quiet) { 2899 char samples[128]; 2900 const char *postfix = rec->timestamp_filename ? 2901 ".<timestamp>" : ""; 2902 2903 if (rec->samples && !rec->opts.full_auxtrace) 2904 scnprintf(samples, sizeof(samples), 2905 " (%" PRIu64 " samples)", rec->samples); 2906 else 2907 samples[0] = '\0'; 2908 2909 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2910 perf_data__size(data) / 1024.0 / 1024.0, 2911 data->path, postfix, samples); 2912 if (ratio) { 2913 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2914 rec->session->bytes_transferred / 1024.0 / 1024.0, 2915 ratio); 2916 } 2917 fprintf(stderr, " ]\n"); 2918 } 2919 2920 out_delete_session: 2921 #ifdef HAVE_EVENTFD_SUPPORT 2922 if (done_fd >= 0) { 2923 fd = done_fd; 2924 done_fd = -1; 2925 2926 close(fd); 2927 } 2928 #endif 2929 zstd_fini(&session->zstd_data); 2930 if (!opts->no_bpf_event) 2931 evlist__stop_sb_thread(rec->sb_evlist); 2932 2933 perf_session__delete(session); 2934 return status; 2935 } 2936 2937 static void callchain_debug(struct callchain_param *callchain) 2938 { 2939 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2940 2941 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2942 2943 if (callchain->record_mode == CALLCHAIN_DWARF) 2944 pr_debug("callchain: stack dump size %d\n", 2945 callchain->dump_size); 2946 } 2947 2948 int record_opts__parse_callchain(struct record_opts *record, 2949 struct callchain_param *callchain, 2950 const char *arg, bool unset) 2951 { 2952 int ret; 2953 callchain->enabled = !unset; 2954 2955 /* --no-call-graph */ 2956 if (unset) { 2957 callchain->record_mode = CALLCHAIN_NONE; 2958 pr_debug("callchain: disabled\n"); 2959 return 0; 2960 } 2961 2962 ret = parse_callchain_record_opt(arg, callchain); 2963 if (!ret) { 2964 /* Enable data address sampling for DWARF unwind. */ 2965 if (callchain->record_mode == CALLCHAIN_DWARF) 2966 record->sample_address = true; 2967 callchain_debug(callchain); 2968 } 2969 2970 return ret; 2971 } 2972 2973 int record_parse_callchain_opt(const struct option *opt, 2974 const char *arg, 2975 int unset) 2976 { 2977 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2978 } 2979 2980 int record_callchain_opt(const struct option *opt, 2981 const char *arg __maybe_unused, 2982 int unset __maybe_unused) 2983 { 2984 struct callchain_param *callchain = opt->value; 2985 2986 callchain->enabled = true; 2987 2988 if (callchain->record_mode == CALLCHAIN_NONE) 2989 callchain->record_mode = CALLCHAIN_FP; 2990 2991 callchain_debug(callchain); 2992 return 0; 2993 } 2994 2995 static int perf_record_config(const char *var, const char *value, void *cb) 2996 { 2997 struct record *rec = cb; 2998 2999 if (!strcmp(var, "record.build-id")) { 3000 if (!strcmp(value, "cache")) 3001 rec->no_buildid_cache = false; 3002 else if (!strcmp(value, "no-cache")) 3003 rec->no_buildid_cache = true; 3004 else if (!strcmp(value, "skip")) 3005 rec->no_buildid = true; 3006 else if (!strcmp(value, "mmap")) 3007 rec->buildid_mmap = true; 3008 else 3009 return -1; 3010 return 0; 3011 } 3012 if (!strcmp(var, "record.call-graph")) { 3013 var = "call-graph.record-mode"; 3014 return perf_default_config(var, value, cb); 3015 } 3016 #ifdef HAVE_AIO_SUPPORT 3017 if (!strcmp(var, "record.aio")) { 3018 rec->opts.nr_cblocks = strtol(value, NULL, 0); 3019 if (!rec->opts.nr_cblocks) 3020 rec->opts.nr_cblocks = nr_cblocks_default; 3021 } 3022 #endif 3023 if (!strcmp(var, "record.debuginfod")) { 3024 rec->debuginfod.urls = strdup(value); 3025 if (!rec->debuginfod.urls) 3026 return -ENOMEM; 3027 rec->debuginfod.set = true; 3028 } 3029 3030 return 0; 3031 } 3032 3033 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 3034 { 3035 struct record *rec = (struct record *)opt->value; 3036 3037 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 3038 } 3039 3040 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3041 { 3042 struct record_opts *opts = (struct record_opts *)opt->value; 3043 3044 if (unset || !str) 3045 return 0; 3046 3047 if (!strcasecmp(str, "node")) 3048 opts->affinity = PERF_AFFINITY_NODE; 3049 else if (!strcasecmp(str, "cpu")) 3050 opts->affinity = PERF_AFFINITY_CPU; 3051 3052 return 0; 3053 } 3054 3055 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3056 { 3057 mask->nbits = nr_bits; 3058 mask->bits = bitmap_zalloc(mask->nbits); 3059 if (!mask->bits) 3060 return -ENOMEM; 3061 3062 return 0; 3063 } 3064 3065 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3066 { 3067 bitmap_free(mask->bits); 3068 mask->nbits = 0; 3069 } 3070 3071 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3072 { 3073 int ret; 3074 3075 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3076 if (ret) { 3077 mask->affinity.bits = NULL; 3078 return ret; 3079 } 3080 3081 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3082 if (ret) { 3083 record__mmap_cpu_mask_free(&mask->maps); 3084 mask->maps.bits = NULL; 3085 } 3086 3087 return ret; 3088 } 3089 3090 static void record__thread_mask_free(struct thread_mask *mask) 3091 { 3092 record__mmap_cpu_mask_free(&mask->maps); 3093 record__mmap_cpu_mask_free(&mask->affinity); 3094 } 3095 3096 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3097 { 3098 int s; 3099 struct record_opts *opts = opt->value; 3100 3101 if (unset || !str || !strlen(str)) { 3102 opts->threads_spec = THREAD_SPEC__CPU; 3103 } else { 3104 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3105 if (s == THREAD_SPEC__USER) { 3106 opts->threads_user_spec = strdup(str); 3107 if (!opts->threads_user_spec) 3108 return -ENOMEM; 3109 opts->threads_spec = THREAD_SPEC__USER; 3110 break; 3111 } 3112 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3113 opts->threads_spec = s; 3114 break; 3115 } 3116 } 3117 } 3118 3119 if (opts->threads_spec == THREAD_SPEC__USER) 3120 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3121 else 3122 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3123 3124 return 0; 3125 } 3126 3127 static int parse_output_max_size(const struct option *opt, 3128 const char *str, int unset) 3129 { 3130 unsigned long *s = (unsigned long *)opt->value; 3131 static struct parse_tag tags_size[] = { 3132 { .tag = 'B', .mult = 1 }, 3133 { .tag = 'K', .mult = 1 << 10 }, 3134 { .tag = 'M', .mult = 1 << 20 }, 3135 { .tag = 'G', .mult = 1 << 30 }, 3136 { .tag = 0 }, 3137 }; 3138 unsigned long val; 3139 3140 if (unset) { 3141 *s = 0; 3142 return 0; 3143 } 3144 3145 val = parse_tag_value(str, tags_size); 3146 if (val != (unsigned long) -1) { 3147 *s = val; 3148 return 0; 3149 } 3150 3151 return -1; 3152 } 3153 3154 static int record__parse_mmap_pages(const struct option *opt, 3155 const char *str, 3156 int unset __maybe_unused) 3157 { 3158 struct record_opts *opts = opt->value; 3159 char *s, *p; 3160 unsigned int mmap_pages; 3161 int ret; 3162 3163 if (!str) 3164 return -EINVAL; 3165 3166 s = strdup(str); 3167 if (!s) 3168 return -ENOMEM; 3169 3170 p = strchr(s, ','); 3171 if (p) 3172 *p = '\0'; 3173 3174 if (*s) { 3175 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3176 if (ret) 3177 goto out_free; 3178 opts->mmap_pages = mmap_pages; 3179 } 3180 3181 if (!p) { 3182 ret = 0; 3183 goto out_free; 3184 } 3185 3186 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3187 if (ret) 3188 goto out_free; 3189 3190 opts->auxtrace_mmap_pages = mmap_pages; 3191 3192 out_free: 3193 free(s); 3194 return ret; 3195 } 3196 3197 static int record__parse_off_cpu_thresh(const struct option *opt, 3198 const char *str, 3199 int unset __maybe_unused) 3200 { 3201 struct record_opts *opts = opt->value; 3202 char *endptr; 3203 u64 off_cpu_thresh_ms; 3204 3205 if (!str) 3206 return -EINVAL; 3207 3208 off_cpu_thresh_ms = strtoull(str, &endptr, 10); 3209 3210 /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */ 3211 if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0"))) 3212 return -EINVAL; 3213 else 3214 opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC; 3215 3216 return 0; 3217 } 3218 3219 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3220 { 3221 } 3222 3223 static int parse_control_option(const struct option *opt, 3224 const char *str, 3225 int unset __maybe_unused) 3226 { 3227 struct record_opts *opts = opt->value; 3228 3229 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3230 } 3231 3232 static void switch_output_size_warn(struct record *rec) 3233 { 3234 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3235 struct switch_output *s = &rec->switch_output; 3236 3237 wakeup_size /= 2; 3238 3239 if (s->size < wakeup_size) { 3240 char buf[100]; 3241 3242 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3243 pr_warning("WARNING: switch-output data size lower than " 3244 "wakeup kernel buffer size (%s) " 3245 "expect bigger perf.data sizes\n", buf); 3246 } 3247 } 3248 3249 static int switch_output_setup(struct record *rec) 3250 { 3251 struct switch_output *s = &rec->switch_output; 3252 static struct parse_tag tags_size[] = { 3253 { .tag = 'B', .mult = 1 }, 3254 { .tag = 'K', .mult = 1 << 10 }, 3255 { .tag = 'M', .mult = 1 << 20 }, 3256 { .tag = 'G', .mult = 1 << 30 }, 3257 { .tag = 0 }, 3258 }; 3259 static struct parse_tag tags_time[] = { 3260 { .tag = 's', .mult = 1 }, 3261 { .tag = 'm', .mult = 60 }, 3262 { .tag = 'h', .mult = 60*60 }, 3263 { .tag = 'd', .mult = 60*60*24 }, 3264 { .tag = 0 }, 3265 }; 3266 unsigned long val; 3267 3268 /* 3269 * If we're using --switch-output-events, then we imply its 3270 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3271 * thread to its parent. 3272 */ 3273 if (rec->switch_output_event_set) { 3274 if (record__threads_enabled(rec)) { 3275 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3276 return 0; 3277 } 3278 goto do_signal; 3279 } 3280 3281 if (!s->set) 3282 return 0; 3283 3284 if (record__threads_enabled(rec)) { 3285 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3286 return 0; 3287 } 3288 3289 if (!strcmp(s->str, "signal")) { 3290 do_signal: 3291 s->signal = true; 3292 pr_debug("switch-output with SIGUSR2 signal\n"); 3293 goto enabled; 3294 } 3295 3296 val = parse_tag_value(s->str, tags_size); 3297 if (val != (unsigned long) -1) { 3298 s->size = val; 3299 pr_debug("switch-output with %s size threshold\n", s->str); 3300 goto enabled; 3301 } 3302 3303 val = parse_tag_value(s->str, tags_time); 3304 if (val != (unsigned long) -1) { 3305 s->time = val; 3306 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3307 s->str, s->time); 3308 goto enabled; 3309 } 3310 3311 return -1; 3312 3313 enabled: 3314 rec->timestamp_filename = true; 3315 s->enabled = true; 3316 3317 if (s->size && !rec->opts.no_buffering) 3318 switch_output_size_warn(rec); 3319 3320 return 0; 3321 } 3322 3323 static const char * const __record_usage[] = { 3324 "perf record [<options>] [<command>]", 3325 "perf record [<options>] -- <command> [<options>]", 3326 NULL 3327 }; 3328 const char * const *record_usage = __record_usage; 3329 3330 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3331 struct perf_sample *sample, struct machine *machine) 3332 { 3333 /* 3334 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3335 * no need to add them twice. 3336 */ 3337 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3338 return 0; 3339 return perf_event__process_mmap(tool, event, sample, machine); 3340 } 3341 3342 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3343 struct perf_sample *sample, struct machine *machine) 3344 { 3345 /* 3346 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3347 * no need to add them twice. 3348 */ 3349 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3350 return 0; 3351 3352 return perf_event__process_mmap2(tool, event, sample, machine); 3353 } 3354 3355 static int process_timestamp_boundary(const struct perf_tool *tool, 3356 union perf_event *event __maybe_unused, 3357 struct perf_sample *sample, 3358 struct machine *machine __maybe_unused) 3359 { 3360 struct record *rec = container_of(tool, struct record, tool); 3361 3362 set_timestamp_boundary(rec, sample->time); 3363 return 0; 3364 } 3365 3366 static int parse_record_synth_option(const struct option *opt, 3367 const char *str, 3368 int unset __maybe_unused) 3369 { 3370 struct record_opts *opts = opt->value; 3371 char *p = strdup(str); 3372 3373 if (p == NULL) 3374 return -1; 3375 3376 opts->synth = parse_synth_opt(p); 3377 free(p); 3378 3379 if (opts->synth < 0) { 3380 pr_err("Invalid synth option: %s\n", str); 3381 return -1; 3382 } 3383 return 0; 3384 } 3385 3386 /* 3387 * XXX Ideally would be local to cmd_record() and passed to a record__new 3388 * because we need to have access to it in record__exit, that is called 3389 * after cmd_record() exits, but since record_options need to be accessible to 3390 * builtin-script, leave it here. 3391 * 3392 * At least we don't ouch it in all the other functions here directly. 3393 * 3394 * Just say no to tons of global variables, sigh. 3395 */ 3396 static struct record record = { 3397 .opts = { 3398 .sample_time = true, 3399 .mmap_pages = UINT_MAX, 3400 .user_freq = UINT_MAX, 3401 .user_interval = ULLONG_MAX, 3402 .freq = 4000, 3403 .target = { 3404 .uses_mmap = true, 3405 .default_per_cpu = true, 3406 }, 3407 .mmap_flush = MMAP_FLUSH_DEFAULT, 3408 .nr_threads_synthesize = 1, 3409 .ctl_fd = -1, 3410 .ctl_fd_ack = -1, 3411 .synth = PERF_SYNTH_ALL, 3412 .off_cpu_thresh_ns = OFFCPU_THRESH, 3413 }, 3414 }; 3415 3416 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3417 "\n\t\t\t\tDefault: fp"; 3418 3419 static bool dry_run; 3420 3421 static struct parse_events_option_args parse_events_option_args = { 3422 .evlistp = &record.evlist, 3423 }; 3424 3425 static struct parse_events_option_args switch_output_parse_events_option_args = { 3426 .evlistp = &record.sb_evlist, 3427 }; 3428 3429 /* 3430 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3431 * with it and switch to use the library functions in perf_evlist that came 3432 * from builtin-record.c, i.e. use record_opts, 3433 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3434 * using pipes, etc. 3435 */ 3436 static struct option __record_options[] = { 3437 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3438 "event selector. use 'perf list' to list available events", 3439 parse_events_option), 3440 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3441 "event filter", parse_filter), 3442 OPT_BOOLEAN(0, "latency", &record.latency, 3443 "Enable data collection for latency profiling.\n" 3444 "\t\t\t Use perf report --latency for latency-centric profile."), 3445 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3446 NULL, "don't record events from perf itself", 3447 exclude_perf), 3448 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3449 "record events on existing process id"), 3450 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3451 "record events on existing thread id"), 3452 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3453 "collect data with this RT SCHED_FIFO priority"), 3454 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3455 "collect data without buffering"), 3456 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3457 "collect raw sample records from all opened counters"), 3458 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3459 "system-wide collection from all CPUs"), 3460 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3461 "list of cpus to monitor"), 3462 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3463 OPT_STRING('o', "output", &record.data.path, "file", 3464 "output file name"), 3465 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3466 &record.opts.no_inherit_set, 3467 "child tasks do not inherit counters"), 3468 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3469 "synthesize non-sample events at the end of output"), 3470 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3471 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3472 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3473 "Fail if the specified frequency can't be used"), 3474 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3475 "profile at this frequency", 3476 record__parse_freq), 3477 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3478 "number of mmap data pages and AUX area tracing mmap pages", 3479 record__parse_mmap_pages), 3480 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3481 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3482 record__mmap_flush_parse), 3483 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3484 NULL, "enables call-graph recording" , 3485 &record_callchain_opt), 3486 OPT_CALLBACK(0, "call-graph", &record.opts, 3487 "record_mode[,record_size]", record_callchain_help, 3488 &record_parse_callchain_opt), 3489 OPT_INCR('v', "verbose", &verbose, 3490 "be more verbose (show counter open errors, etc)"), 3491 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3492 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3493 "per thread counts"), 3494 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3495 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3496 "Record the sample physical addresses"), 3497 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3498 "Record the sampled data address data page size"), 3499 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3500 "Record the sampled code address (ip) page size"), 3501 OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, 3502 "Record the data source for memory operations"), 3503 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3504 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3505 "Record the sample identifier"), 3506 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3507 &record.opts.sample_time_set, 3508 "Record the sample timestamps"), 3509 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3510 "Record the sample period"), 3511 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3512 "don't sample"), 3513 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3514 &record.no_buildid_cache_set, 3515 "do not update the buildid cache"), 3516 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3517 &record.no_buildid_set, 3518 "do not collect buildids in perf.data"), 3519 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3520 "monitor event in cgroup name only", 3521 parse_cgroups), 3522 OPT_CALLBACK('D', "delay", &record, "ms", 3523 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3524 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3525 record__parse_event_enable_time), 3526 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3527 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), 3528 3529 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3530 "branch any", "sample any taken branches", 3531 parse_branch_stack), 3532 3533 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3534 "branch filter mask", "branch stack filter modes", 3535 parse_branch_stack), 3536 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3537 "sample by weight (on special events only)"), 3538 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3539 "sample transaction flags (special events only)"), 3540 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3541 "use per-thread mmaps"), 3542 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3543 "sample selected machine registers on interrupt," 3544 " use '-I?' to list register names", parse_intr_regs), 3545 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3546 "sample selected machine registers in user space," 3547 " use '--user-regs=?' to list register names", parse_user_regs), 3548 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3549 "Record running/enabled time of read (:S) events"), 3550 OPT_CALLBACK('k', "clockid", &record.opts, 3551 "clockid", "clockid to use for events, see clock_gettime()", 3552 parse_clockid), 3553 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3554 "opts", "AUX area tracing Snapshot Mode", ""), 3555 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3556 "opts", "sample AUX area", ""), 3557 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3558 "per thread proc mmap processing timeout in ms"), 3559 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3560 "Record namespaces events"), 3561 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3562 "Record cgroup events"), 3563 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3564 &record.opts.record_switch_events_set, 3565 "Record context switch events"), 3566 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3567 "Configure all used events to run in kernel space.", 3568 PARSE_OPT_EXCLUSIVE), 3569 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3570 "Configure all used events to run in user space.", 3571 PARSE_OPT_EXCLUSIVE), 3572 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3573 "collect kernel callchains"), 3574 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3575 "collect user callchains"), 3576 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3577 "file", "vmlinux pathname"), 3578 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3579 "Record build-id of all DSOs regardless of hits"), 3580 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3581 "Record build-id in map events"), 3582 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3583 "append timestamp to output filename"), 3584 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3585 "Record timestamp boundary (time of first/last samples)"), 3586 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3587 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3588 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3589 "signal"), 3590 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3591 &record.switch_output_event_set, "switch output event", 3592 "switch output event selector. use 'perf list' to list available events", 3593 parse_events_option_new_evlist), 3594 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3595 "Limit number of switch output generated files"), 3596 OPT_BOOLEAN(0, "dry-run", &dry_run, 3597 "Parse options then exit"), 3598 #ifdef HAVE_AIO_SUPPORT 3599 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3600 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3601 record__aio_parse), 3602 #endif 3603 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3604 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3605 record__parse_affinity), 3606 #ifdef HAVE_ZSTD_SUPPORT 3607 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3608 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3609 record__parse_comp_level), 3610 #endif 3611 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3612 "size", "Limit the maximum size of the output file", parse_output_max_size), 3613 OPT_UINTEGER(0, "num-thread-synthesize", 3614 &record.opts.nr_threads_synthesize, 3615 "number of threads to run for event synthesis"), 3616 #ifdef HAVE_LIBPFM 3617 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3618 "libpfm4 event selector. use 'perf list' to list available events", 3619 parse_libpfm_events_option), 3620 #endif 3621 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3622 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3623 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3624 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3625 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3626 parse_control_option), 3627 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3628 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3629 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3630 &record.debuginfod.set, "debuginfod urls", 3631 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3632 "system"), 3633 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3634 "write collected trace data into several data files using parallel threads", 3635 record__parse_threads), 3636 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3637 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3638 "BPF filter action"), 3639 OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", 3640 "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", 3641 record__parse_off_cpu_thresh), 3642 OPT_END() 3643 }; 3644 3645 struct option *record_options = __record_options; 3646 3647 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3648 { 3649 struct perf_cpu cpu; 3650 int idx; 3651 3652 if (cpu_map__is_dummy(cpus)) 3653 return 0; 3654 3655 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3656 /* Return ENODEV is input cpu is greater than max cpu */ 3657 if ((unsigned long)cpu.cpu > mask->nbits) 3658 return -ENODEV; 3659 __set_bit(cpu.cpu, mask->bits); 3660 } 3661 3662 return 0; 3663 } 3664 3665 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3666 { 3667 struct perf_cpu_map *cpus; 3668 3669 cpus = perf_cpu_map__new(mask_spec); 3670 if (!cpus) 3671 return -ENOMEM; 3672 3673 bitmap_zero(mask->bits, mask->nbits); 3674 if (record__mmap_cpu_mask_init(mask, cpus)) 3675 return -ENODEV; 3676 3677 perf_cpu_map__put(cpus); 3678 3679 return 0; 3680 } 3681 3682 static void record__free_thread_masks(struct record *rec, int nr_threads) 3683 { 3684 int t; 3685 3686 if (rec->thread_masks) 3687 for (t = 0; t < nr_threads; t++) 3688 record__thread_mask_free(&rec->thread_masks[t]); 3689 3690 zfree(&rec->thread_masks); 3691 } 3692 3693 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3694 { 3695 int t, ret; 3696 3697 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3698 if (!rec->thread_masks) { 3699 pr_err("Failed to allocate thread masks\n"); 3700 return -ENOMEM; 3701 } 3702 3703 for (t = 0; t < nr_threads; t++) { 3704 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3705 if (ret) { 3706 pr_err("Failed to allocate thread masks[%d]\n", t); 3707 goto out_free; 3708 } 3709 } 3710 3711 return 0; 3712 3713 out_free: 3714 record__free_thread_masks(rec, nr_threads); 3715 3716 return ret; 3717 } 3718 3719 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3720 { 3721 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3722 3723 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3724 if (ret) 3725 return ret; 3726 3727 rec->nr_threads = nr_cpus; 3728 pr_debug("nr_threads: %d\n", rec->nr_threads); 3729 3730 for (t = 0; t < rec->nr_threads; t++) { 3731 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3732 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3733 if (verbose > 0) { 3734 pr_debug("thread_masks[%d]: ", t); 3735 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3736 pr_debug("thread_masks[%d]: ", t); 3737 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3738 } 3739 } 3740 3741 return 0; 3742 } 3743 3744 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3745 const char **maps_spec, const char **affinity_spec, 3746 u32 nr_spec) 3747 { 3748 u32 s; 3749 int ret = 0, t = 0; 3750 struct mmap_cpu_mask cpus_mask; 3751 struct thread_mask thread_mask, full_mask, *thread_masks; 3752 3753 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3754 if (ret) { 3755 pr_err("Failed to allocate CPUs mask\n"); 3756 return ret; 3757 } 3758 3759 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3760 if (ret) { 3761 pr_err("Failed to init cpu mask\n"); 3762 goto out_free_cpu_mask; 3763 } 3764 3765 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3766 if (ret) { 3767 pr_err("Failed to allocate full mask\n"); 3768 goto out_free_cpu_mask; 3769 } 3770 3771 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3772 if (ret) { 3773 pr_err("Failed to allocate thread mask\n"); 3774 goto out_free_full_and_cpu_masks; 3775 } 3776 3777 for (s = 0; s < nr_spec; s++) { 3778 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3779 if (ret) { 3780 pr_err("Failed to initialize maps thread mask\n"); 3781 goto out_free; 3782 } 3783 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3784 if (ret) { 3785 pr_err("Failed to initialize affinity thread mask\n"); 3786 goto out_free; 3787 } 3788 3789 /* ignore invalid CPUs but do not allow empty masks */ 3790 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3791 cpus_mask.bits, thread_mask.maps.nbits)) { 3792 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3793 ret = -EINVAL; 3794 goto out_free; 3795 } 3796 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3797 cpus_mask.bits, thread_mask.affinity.nbits)) { 3798 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3799 ret = -EINVAL; 3800 goto out_free; 3801 } 3802 3803 /* do not allow intersection with other masks (full_mask) */ 3804 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3805 thread_mask.maps.nbits)) { 3806 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3807 ret = -EINVAL; 3808 goto out_free; 3809 } 3810 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3811 thread_mask.affinity.nbits)) { 3812 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3813 ret = -EINVAL; 3814 goto out_free; 3815 } 3816 3817 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3818 thread_mask.maps.bits, full_mask.maps.nbits); 3819 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3820 thread_mask.affinity.bits, full_mask.maps.nbits); 3821 3822 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3823 if (!thread_masks) { 3824 pr_err("Failed to reallocate thread masks\n"); 3825 ret = -ENOMEM; 3826 goto out_free; 3827 } 3828 rec->thread_masks = thread_masks; 3829 rec->thread_masks[t] = thread_mask; 3830 if (verbose > 0) { 3831 pr_debug("thread_masks[%d]: ", t); 3832 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3833 pr_debug("thread_masks[%d]: ", t); 3834 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3835 } 3836 t++; 3837 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3838 if (ret) { 3839 pr_err("Failed to allocate thread mask\n"); 3840 goto out_free_full_and_cpu_masks; 3841 } 3842 } 3843 rec->nr_threads = t; 3844 pr_debug("nr_threads: %d\n", rec->nr_threads); 3845 if (!rec->nr_threads) 3846 ret = -EINVAL; 3847 3848 out_free: 3849 record__thread_mask_free(&thread_mask); 3850 out_free_full_and_cpu_masks: 3851 record__thread_mask_free(&full_mask); 3852 out_free_cpu_mask: 3853 record__mmap_cpu_mask_free(&cpus_mask); 3854 3855 return ret; 3856 } 3857 3858 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3859 { 3860 int ret; 3861 struct cpu_topology *topo; 3862 3863 topo = cpu_topology__new(); 3864 if (!topo) { 3865 pr_err("Failed to allocate CPU topology\n"); 3866 return -ENOMEM; 3867 } 3868 3869 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3870 topo->core_cpus_list, topo->core_cpus_lists); 3871 cpu_topology__delete(topo); 3872 3873 return ret; 3874 } 3875 3876 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3877 { 3878 int ret; 3879 struct cpu_topology *topo; 3880 3881 topo = cpu_topology__new(); 3882 if (!topo) { 3883 pr_err("Failed to allocate CPU topology\n"); 3884 return -ENOMEM; 3885 } 3886 3887 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3888 topo->package_cpus_list, topo->package_cpus_lists); 3889 cpu_topology__delete(topo); 3890 3891 return ret; 3892 } 3893 3894 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3895 { 3896 u32 s; 3897 int ret; 3898 const char **spec; 3899 struct numa_topology *topo; 3900 3901 topo = numa_topology__new(); 3902 if (!topo) { 3903 pr_err("Failed to allocate NUMA topology\n"); 3904 return -ENOMEM; 3905 } 3906 3907 spec = zalloc(topo->nr * sizeof(char *)); 3908 if (!spec) { 3909 pr_err("Failed to allocate NUMA spec\n"); 3910 ret = -ENOMEM; 3911 goto out_delete_topo; 3912 } 3913 for (s = 0; s < topo->nr; s++) 3914 spec[s] = topo->nodes[s].cpus; 3915 3916 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3917 3918 zfree(&spec); 3919 3920 out_delete_topo: 3921 numa_topology__delete(topo); 3922 3923 return ret; 3924 } 3925 3926 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3927 { 3928 int t, ret; 3929 u32 s, nr_spec = 0; 3930 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3931 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3932 3933 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3934 spec = strtok_r(user_spec, ":", &spec_ptr); 3935 if (spec == NULL) 3936 break; 3937 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3938 mask = strtok_r(spec, "/", &mask_ptr); 3939 if (mask == NULL) 3940 break; 3941 pr_debug2(" maps mask: %s\n", mask); 3942 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3943 if (!tmp_spec) { 3944 pr_err("Failed to reallocate maps spec\n"); 3945 ret = -ENOMEM; 3946 goto out_free; 3947 } 3948 maps_spec = tmp_spec; 3949 maps_spec[nr_spec] = dup_mask = strdup(mask); 3950 if (!maps_spec[nr_spec]) { 3951 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3952 ret = -ENOMEM; 3953 goto out_free; 3954 } 3955 mask = strtok_r(NULL, "/", &mask_ptr); 3956 if (mask == NULL) { 3957 pr_err("Invalid thread maps or affinity specs\n"); 3958 ret = -EINVAL; 3959 goto out_free; 3960 } 3961 pr_debug2(" affinity mask: %s\n", mask); 3962 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3963 if (!tmp_spec) { 3964 pr_err("Failed to reallocate affinity spec\n"); 3965 ret = -ENOMEM; 3966 goto out_free; 3967 } 3968 affinity_spec = tmp_spec; 3969 affinity_spec[nr_spec] = strdup(mask); 3970 if (!affinity_spec[nr_spec]) { 3971 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3972 ret = -ENOMEM; 3973 goto out_free; 3974 } 3975 dup_mask = NULL; 3976 nr_spec++; 3977 } 3978 3979 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3980 (const char **)affinity_spec, nr_spec); 3981 3982 out_free: 3983 free(dup_mask); 3984 for (s = 0; s < nr_spec; s++) { 3985 if (maps_spec) 3986 free(maps_spec[s]); 3987 if (affinity_spec) 3988 free(affinity_spec[s]); 3989 } 3990 free(affinity_spec); 3991 free(maps_spec); 3992 3993 return ret; 3994 } 3995 3996 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3997 { 3998 int ret; 3999 4000 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 4001 if (ret) 4002 return ret; 4003 4004 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 4005 return -ENODEV; 4006 4007 rec->nr_threads = 1; 4008 4009 return 0; 4010 } 4011 4012 static int record__init_thread_masks(struct record *rec) 4013 { 4014 int ret = 0; 4015 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 4016 4017 if (!record__threads_enabled(rec)) 4018 return record__init_thread_default_masks(rec, cpus); 4019 4020 if (evlist__per_thread(rec->evlist)) { 4021 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 4022 return -EINVAL; 4023 } 4024 4025 switch (rec->opts.threads_spec) { 4026 case THREAD_SPEC__CPU: 4027 ret = record__init_thread_cpu_masks(rec, cpus); 4028 break; 4029 case THREAD_SPEC__CORE: 4030 ret = record__init_thread_core_masks(rec, cpus); 4031 break; 4032 case THREAD_SPEC__PACKAGE: 4033 ret = record__init_thread_package_masks(rec, cpus); 4034 break; 4035 case THREAD_SPEC__NUMA: 4036 ret = record__init_thread_numa_masks(rec, cpus); 4037 break; 4038 case THREAD_SPEC__USER: 4039 ret = record__init_thread_user_masks(rec, cpus); 4040 break; 4041 default: 4042 break; 4043 } 4044 4045 return ret; 4046 } 4047 4048 int cmd_record(int argc, const char **argv) 4049 { 4050 int err; 4051 struct record *rec = &record; 4052 char errbuf[BUFSIZ]; 4053 4054 setlocale(LC_ALL, ""); 4055 4056 #ifndef HAVE_BPF_SKEL 4057 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 4058 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 4059 # undef set_nobuild 4060 #endif 4061 4062 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 4063 symbol_conf.lazy_load_kernel_maps = true; 4064 rec->opts.affinity = PERF_AFFINITY_SYS; 4065 4066 rec->evlist = evlist__new(); 4067 if (rec->evlist == NULL) 4068 return -ENOMEM; 4069 4070 err = perf_config(perf_record_config, rec); 4071 if (err) 4072 return err; 4073 4074 argc = parse_options(argc, argv, record_options, record_usage, 4075 PARSE_OPT_STOP_AT_NON_OPTION); 4076 if (quiet) 4077 perf_quiet_option(); 4078 4079 err = symbol__validate_sym_arguments(); 4080 if (err) 4081 return err; 4082 4083 perf_debuginfod_setup(&record.debuginfod); 4084 4085 /* Make system wide (-a) the default target. */ 4086 if (!argc && target__none(&rec->opts.target)) 4087 rec->opts.target.system_wide = true; 4088 4089 if (nr_cgroups && !rec->opts.target.system_wide) { 4090 usage_with_options_msg(record_usage, record_options, 4091 "cgroup monitoring only available in system-wide mode"); 4092 4093 } 4094 4095 if (record.latency) { 4096 /* 4097 * There is no fundamental reason why latency profiling 4098 * can't work for system-wide mode, but exact semantics 4099 * and details are to be defined. 4100 * See the following thread for details: 4101 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4102 */ 4103 if (record.opts.target.system_wide) { 4104 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4105 err = -EINVAL; 4106 goto out_opts; 4107 } 4108 record.opts.record_switch_events = true; 4109 } 4110 4111 if (rec->buildid_mmap) { 4112 if (!perf_can_record_build_id()) { 4113 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4114 err = -EINVAL; 4115 goto out_opts; 4116 } 4117 pr_debug("Enabling build id in mmap2 events.\n"); 4118 /* Enable mmap build id synthesizing. */ 4119 symbol_conf.buildid_mmap2 = true; 4120 /* Enable perf_event_attr::build_id bit. */ 4121 rec->opts.build_id = true; 4122 /* Disable build id cache. */ 4123 rec->no_buildid = true; 4124 } 4125 4126 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4127 pr_err("Kernel has no cgroup sampling support.\n"); 4128 err = -EINVAL; 4129 goto out_opts; 4130 } 4131 4132 if (rec->opts.kcore) 4133 rec->opts.text_poke = true; 4134 4135 if (rec->opts.kcore || record__threads_enabled(rec)) 4136 rec->data.is_dir = true; 4137 4138 if (record__threads_enabled(rec)) { 4139 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4140 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4141 goto out_opts; 4142 } 4143 if (record__aio_enabled(rec)) { 4144 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4145 goto out_opts; 4146 } 4147 } 4148 4149 if (rec->opts.comp_level != 0) { 4150 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4151 rec->no_buildid = true; 4152 } 4153 4154 if (rec->opts.record_switch_events && 4155 !perf_can_record_switch_events()) { 4156 ui__error("kernel does not support recording context switch events\n"); 4157 parse_options_usage(record_usage, record_options, "switch-events", 0); 4158 err = -EINVAL; 4159 goto out_opts; 4160 } 4161 4162 if (switch_output_setup(rec)) { 4163 parse_options_usage(record_usage, record_options, "switch-output", 0); 4164 err = -EINVAL; 4165 goto out_opts; 4166 } 4167 4168 if (rec->switch_output.time) { 4169 signal(SIGALRM, alarm_sig_handler); 4170 alarm(rec->switch_output.time); 4171 } 4172 4173 if (rec->switch_output.num_files) { 4174 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4175 sizeof(char *)); 4176 if (!rec->switch_output.filenames) { 4177 err = -EINVAL; 4178 goto out_opts; 4179 } 4180 } 4181 4182 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4183 rec->timestamp_filename = false; 4184 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4185 } 4186 4187 if (rec->filter_action) { 4188 if (!strcmp(rec->filter_action, "pin")) 4189 err = perf_bpf_filter__pin(); 4190 else if (!strcmp(rec->filter_action, "unpin")) 4191 err = perf_bpf_filter__unpin(); 4192 else { 4193 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4194 err = -EINVAL; 4195 } 4196 goto out_opts; 4197 } 4198 4199 /* For backward compatibility, -d implies --mem-info */ 4200 if (rec->opts.sample_address) 4201 rec->opts.sample_data_src = true; 4202 4203 /* 4204 * Allow aliases to facilitate the lookup of symbols for address 4205 * filters. Refer to auxtrace_parse_filters(). 4206 */ 4207 symbol_conf.allow_aliases = true; 4208 4209 symbol__init(NULL); 4210 4211 err = record__auxtrace_init(rec); 4212 if (err) 4213 goto out; 4214 4215 if (dry_run) 4216 goto out; 4217 4218 err = -ENOMEM; 4219 4220 if (rec->no_buildid_cache || rec->no_buildid) { 4221 disable_buildid_cache(); 4222 } else if (rec->switch_output.enabled) { 4223 /* 4224 * In 'perf record --switch-output', disable buildid 4225 * generation by default to reduce data file switching 4226 * overhead. Still generate buildid if they are required 4227 * explicitly using 4228 * 4229 * perf record --switch-output --no-no-buildid \ 4230 * --no-no-buildid-cache 4231 * 4232 * Following code equals to: 4233 * 4234 * if ((rec->no_buildid || !rec->no_buildid_set) && 4235 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4236 * disable_buildid_cache(); 4237 */ 4238 bool disable = true; 4239 4240 if (rec->no_buildid_set && !rec->no_buildid) 4241 disable = false; 4242 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4243 disable = false; 4244 if (disable) { 4245 rec->no_buildid = true; 4246 rec->no_buildid_cache = true; 4247 disable_buildid_cache(); 4248 } 4249 } 4250 4251 if (record.opts.overwrite) 4252 record.opts.tail_synthesize = true; 4253 4254 if (rec->evlist->core.nr_entries == 0) { 4255 err = parse_event(rec->evlist, "cycles:P"); 4256 if (err) 4257 goto out; 4258 } 4259 4260 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4261 rec->opts.no_inherit = true; 4262 4263 err = target__validate(&rec->opts.target); 4264 if (err) { 4265 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4266 ui__warning("%s\n", errbuf); 4267 } 4268 4269 if (rec->uid_str) { 4270 uid_t uid = parse_uid(rec->uid_str); 4271 4272 if (uid == UINT_MAX) { 4273 ui__error("Invalid User: %s", rec->uid_str); 4274 err = -EINVAL; 4275 goto out; 4276 } 4277 err = parse_uid_filter(rec->evlist, uid); 4278 if (err) 4279 goto out; 4280 4281 /* User ID filtering implies system wide. */ 4282 rec->opts.target.system_wide = true; 4283 } 4284 4285 /* Enable ignoring missing threads when -p option is defined. */ 4286 rec->opts.ignore_missing_thread = rec->opts.target.pid; 4287 4288 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4289 4290 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4291 arch__add_leaf_frame_record_opts(&rec->opts); 4292 4293 err = -ENOMEM; 4294 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4295 if (rec->opts.target.pid != NULL) { 4296 pr_err("Couldn't create thread/CPU maps: %s\n", 4297 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4298 goto out; 4299 } 4300 else 4301 usage_with_options(record_usage, record_options); 4302 } 4303 4304 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4305 if (err) 4306 goto out; 4307 4308 /* 4309 * We take all buildids when the file contains 4310 * AUX area tracing data because we do not decode the 4311 * trace because it would take too long. 4312 */ 4313 if (rec->opts.full_auxtrace) 4314 rec->buildid_all = true; 4315 4316 if (rec->opts.text_poke) { 4317 err = record__config_text_poke(rec->evlist); 4318 if (err) { 4319 pr_err("record__config_text_poke failed, error %d\n", err); 4320 goto out; 4321 } 4322 } 4323 4324 if (rec->off_cpu) { 4325 err = record__config_off_cpu(rec); 4326 if (err) { 4327 pr_err("record__config_off_cpu failed, error %d\n", err); 4328 goto out; 4329 } 4330 } 4331 4332 if (record_opts__config(&rec->opts)) { 4333 err = -EINVAL; 4334 goto out; 4335 } 4336 4337 err = record__config_tracking_events(rec); 4338 if (err) { 4339 pr_err("record__config_tracking_events failed, error %d\n", err); 4340 goto out; 4341 } 4342 4343 err = record__init_thread_masks(rec); 4344 if (err) { 4345 pr_err("Failed to initialize parallel data streaming masks\n"); 4346 goto out; 4347 } 4348 4349 if (rec->opts.nr_cblocks > nr_cblocks_max) 4350 rec->opts.nr_cblocks = nr_cblocks_max; 4351 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4352 4353 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4354 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4355 4356 if (rec->opts.comp_level > comp_level_max) 4357 rec->opts.comp_level = comp_level_max; 4358 pr_debug("comp level: %d\n", rec->opts.comp_level); 4359 4360 err = __cmd_record(&record, argc, argv); 4361 out: 4362 record__free_thread_masks(rec, rec->nr_threads); 4363 rec->nr_threads = 0; 4364 symbol__exit(); 4365 auxtrace_record__free(rec->itr); 4366 out_opts: 4367 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4368 evlist__delete(rec->evlist); 4369 return err; 4370 } 4371 4372 static void snapshot_sig_handler(int sig __maybe_unused) 4373 { 4374 struct record *rec = &record; 4375 4376 hit_auxtrace_snapshot_trigger(rec); 4377 4378 if (switch_output_signal(rec)) 4379 trigger_hit(&switch_output_trigger); 4380 } 4381 4382 static void alarm_sig_handler(int sig __maybe_unused) 4383 { 4384 struct record *rec = &record; 4385 4386 if (switch_output_time(rec)) 4387 trigger_hit(&switch_output_trigger); 4388 } 4389