1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/stat.h" 30 #include "util/symbol.h" 31 #include "util/record.h" 32 #include "util/cpumap.h" 33 #include "util/thread_map.h" 34 #include "util/data.h" 35 #include "util/perf_regs.h" 36 #include "util/auxtrace.h" 37 #include "util/tsc.h" 38 #include "util/parse-branch-options.h" 39 #include "util/parse-regs-options.h" 40 #include "util/perf_api_probe.h" 41 #include "util/trigger.h" 42 #include "util/perf-hooks.h" 43 #include "util/cpu-set-sched.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/pmu.h" 51 #include "util/pmus.h" 52 #include "util/clockid.h" 53 #include "util/off_cpu.h" 54 #include "util/bpf-filter.h" 55 #include "util/strbuf.h" 56 #include "asm/bug.h" 57 #include "perf.h" 58 #include "cputopo.h" 59 60 #include <errno.h> 61 #include <inttypes.h> 62 #include <locale.h> 63 #include <poll.h> 64 #include <pthread.h> 65 #include <unistd.h> 66 #ifndef HAVE_GETTID 67 #include <syscall.h> 68 #endif 69 #include <sched.h> 70 #include <signal.h> 71 #ifdef HAVE_EVENTFD_SUPPORT 72 #include <sys/eventfd.h> 73 #endif 74 #include <sys/mman.h> 75 #include <sys/wait.h> 76 #include <sys/types.h> 77 #include <sys/stat.h> 78 #include <fcntl.h> 79 #include <linux/err.h> 80 #include <linux/string.h> 81 #include <linux/time64.h> 82 #include <linux/zalloc.h> 83 #include <linux/bitmap.h> 84 #include <sys/time.h> 85 86 struct switch_output { 87 bool enabled; 88 bool signal; 89 unsigned long size; 90 unsigned long time; 91 const char *str; 92 bool set; 93 char **filenames; 94 int num_files; 95 int cur_file; 96 }; 97 98 struct thread_mask { 99 struct mmap_cpu_mask maps; 100 struct mmap_cpu_mask affinity; 101 }; 102 103 struct record_thread { 104 pid_t tid; 105 struct thread_mask *mask; 106 struct { 107 int msg[2]; 108 int ack[2]; 109 } pipes; 110 struct fdarray pollfd; 111 int ctlfd_pos; 112 int nr_mmaps; 113 struct mmap **maps; 114 struct mmap **overwrite_maps; 115 struct record *rec; 116 unsigned long long samples; 117 unsigned long waking; 118 u64 bytes_written; 119 u64 bytes_transferred; 120 u64 bytes_compressed; 121 }; 122 123 static __thread struct record_thread *thread; 124 125 enum thread_msg { 126 THREAD_MSG__UNDEFINED = 0, 127 THREAD_MSG__READY, 128 THREAD_MSG__MAX, 129 }; 130 131 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 132 "UNDEFINED", "READY" 133 }; 134 135 enum thread_spec { 136 THREAD_SPEC__UNDEFINED = 0, 137 THREAD_SPEC__CPU, 138 THREAD_SPEC__CORE, 139 THREAD_SPEC__PACKAGE, 140 THREAD_SPEC__NUMA, 141 THREAD_SPEC__USER, 142 THREAD_SPEC__MAX, 143 }; 144 145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 146 "undefined", "cpu", "core", "package", "numa", "user" 147 }; 148 149 struct pollfd_index_map { 150 int evlist_pollfd_index; 151 int thread_pollfd_index; 152 }; 153 154 struct record { 155 struct perf_tool tool; 156 struct record_opts opts; 157 u64 bytes_written; 158 u64 thread_bytes_written; 159 struct perf_data data; 160 struct auxtrace_record *itr; 161 struct evlist *evlist; 162 struct perf_session *session; 163 struct evlist *sb_evlist; 164 pthread_t thread_id; 165 int realtime_prio; 166 bool latency; 167 bool switch_output_event_set; 168 bool no_buildid; 169 bool no_buildid_set; 170 bool no_buildid_cache; 171 bool no_buildid_cache_set; 172 bool buildid_all; 173 bool buildid_mmap; 174 bool timestamp_filename; 175 bool timestamp_boundary; 176 bool off_cpu; 177 const char *filter_action; 178 const char *uid_str; 179 struct switch_output switch_output; 180 unsigned long long samples; 181 unsigned long output_max_size; /* = 0: unlimited */ 182 struct perf_debuginfod debuginfod; 183 int nr_threads; 184 struct thread_mask *thread_masks; 185 struct record_thread *thread_data; 186 struct pollfd_index_map *index_map; 187 size_t index_map_sz; 188 size_t index_map_cnt; 189 }; 190 191 static volatile int done; 192 193 static volatile int auxtrace_record__snapshot_started; 194 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 195 static DEFINE_TRIGGER(switch_output_trigger); 196 197 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 198 "SYS", "NODE", "CPU" 199 }; 200 201 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 202 struct perf_sample *sample, struct machine *machine); 203 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 204 struct perf_sample *sample, struct machine *machine); 205 static int process_timestamp_boundary(const struct perf_tool *tool, 206 union perf_event *event, 207 struct perf_sample *sample, 208 struct machine *machine); 209 210 #ifndef HAVE_GETTID 211 static inline pid_t gettid(void) 212 { 213 return (pid_t)syscall(__NR_gettid); 214 } 215 #endif 216 217 static int record__threads_enabled(struct record *rec) 218 { 219 return rec->opts.threads_spec; 220 } 221 222 static bool switch_output_signal(struct record *rec) 223 { 224 return rec->switch_output.signal && 225 trigger_is_ready(&switch_output_trigger); 226 } 227 228 static bool switch_output_size(struct record *rec) 229 { 230 return rec->switch_output.size && 231 trigger_is_ready(&switch_output_trigger) && 232 (rec->bytes_written >= rec->switch_output.size); 233 } 234 235 static bool switch_output_time(struct record *rec) 236 { 237 return rec->switch_output.time && 238 trigger_is_ready(&switch_output_trigger); 239 } 240 241 static u64 record__bytes_written(struct record *rec) 242 { 243 return rec->bytes_written + rec->thread_bytes_written; 244 } 245 246 static bool record__output_max_size_exceeded(struct record *rec) 247 { 248 return rec->output_max_size && 249 (record__bytes_written(rec) >= rec->output_max_size); 250 } 251 252 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 253 void *bf, size_t size) 254 { 255 struct perf_data_file *file = &rec->session->data->file; 256 257 if (map && map->file) 258 file = map->file; 259 260 if (perf_data_file__write(file, bf, size) < 0) { 261 pr_err("failed to write perf data, error: %m\n"); 262 return -1; 263 } 264 265 if (map && map->file) { 266 thread->bytes_written += size; 267 rec->thread_bytes_written += size; 268 } else { 269 rec->bytes_written += size; 270 } 271 272 if (record__output_max_size_exceeded(rec) && !done) { 273 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 274 " stopping session ]\n", 275 record__bytes_written(rec) >> 10); 276 done = 1; 277 } 278 279 if (switch_output_size(rec)) 280 trigger_hit(&switch_output_trigger); 281 282 return 0; 283 } 284 285 static int record__aio_enabled(struct record *rec); 286 static int record__comp_enabled(struct record *rec); 287 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 288 void *dst, size_t dst_size, void *src, size_t src_size); 289 290 #ifdef HAVE_AIO_SUPPORT 291 static int record__aio_write(struct aiocb *cblock, int trace_fd, 292 void *buf, size_t size, off_t off) 293 { 294 int rc; 295 296 cblock->aio_fildes = trace_fd; 297 cblock->aio_buf = buf; 298 cblock->aio_nbytes = size; 299 cblock->aio_offset = off; 300 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 301 302 do { 303 rc = aio_write(cblock); 304 if (rc == 0) { 305 break; 306 } else if (errno != EAGAIN) { 307 cblock->aio_fildes = -1; 308 pr_err("failed to queue perf data, error: %m\n"); 309 break; 310 } 311 } while (1); 312 313 return rc; 314 } 315 316 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 317 { 318 void *rem_buf; 319 off_t rem_off; 320 size_t rem_size; 321 int rc, aio_errno; 322 ssize_t aio_ret, written; 323 324 aio_errno = aio_error(cblock); 325 if (aio_errno == EINPROGRESS) 326 return 0; 327 328 written = aio_ret = aio_return(cblock); 329 if (aio_ret < 0) { 330 if (aio_errno != EINTR) 331 pr_err("failed to write perf data, error: %m\n"); 332 written = 0; 333 } 334 335 rem_size = cblock->aio_nbytes - written; 336 337 if (rem_size == 0) { 338 cblock->aio_fildes = -1; 339 /* 340 * md->refcount is incremented in record__aio_pushfn() for 341 * every aio write request started in record__aio_push() so 342 * decrement it because the request is now complete. 343 */ 344 perf_mmap__put(&md->core); 345 rc = 1; 346 } else { 347 /* 348 * aio write request may require restart with the 349 * remainder if the kernel didn't write whole 350 * chunk at once. 351 */ 352 rem_off = cblock->aio_offset + written; 353 rem_buf = (void *)(cblock->aio_buf + written); 354 record__aio_write(cblock, cblock->aio_fildes, 355 rem_buf, rem_size, rem_off); 356 rc = 0; 357 } 358 359 return rc; 360 } 361 362 static int record__aio_sync(struct mmap *md, bool sync_all) 363 { 364 struct aiocb **aiocb = md->aio.aiocb; 365 struct aiocb *cblocks = md->aio.cblocks; 366 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 367 int i, do_suspend; 368 369 do { 370 do_suspend = 0; 371 for (i = 0; i < md->aio.nr_cblocks; ++i) { 372 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 373 if (sync_all) 374 aiocb[i] = NULL; 375 else 376 return i; 377 } else { 378 /* 379 * Started aio write is not complete yet 380 * so it has to be waited before the 381 * next allocation. 382 */ 383 aiocb[i] = &cblocks[i]; 384 do_suspend = 1; 385 } 386 } 387 if (!do_suspend) 388 return -1; 389 390 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 391 if (!(errno == EAGAIN || errno == EINTR)) 392 pr_err("failed to sync perf data, error: %m\n"); 393 } 394 } while (1); 395 } 396 397 struct record_aio { 398 struct record *rec; 399 void *data; 400 size_t size; 401 }; 402 403 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 404 { 405 struct record_aio *aio = to; 406 407 /* 408 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 409 * to release space in the kernel buffer as fast as possible, calling 410 * perf_mmap__consume() from perf_mmap__push() function. 411 * 412 * That lets the kernel to proceed with storing more profiling data into 413 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 414 * 415 * Coping can be done in two steps in case the chunk of profiling data 416 * crosses the upper bound of the kernel buffer. In this case we first move 417 * part of data from map->start till the upper bound and then the remainder 418 * from the beginning of the kernel buffer till the end of the data chunk. 419 */ 420 421 if (record__comp_enabled(aio->rec)) { 422 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 423 mmap__mmap_len(map) - aio->size, 424 buf, size); 425 if (compressed < 0) 426 return (int)compressed; 427 428 size = compressed; 429 } else { 430 memcpy(aio->data + aio->size, buf, size); 431 } 432 433 if (!aio->size) { 434 /* 435 * Increment map->refcount to guard map->aio.data[] buffer 436 * from premature deallocation because map object can be 437 * released earlier than aio write request started on 438 * map->aio.data[] buffer is complete. 439 * 440 * perf_mmap__put() is done at record__aio_complete() 441 * after started aio request completion or at record__aio_push() 442 * if the request failed to start. 443 */ 444 perf_mmap__get(&map->core); 445 } 446 447 aio->size += size; 448 449 return size; 450 } 451 452 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 453 { 454 int ret, idx; 455 int trace_fd = rec->session->data->file.fd; 456 struct record_aio aio = { .rec = rec, .size = 0 }; 457 458 /* 459 * Call record__aio_sync() to wait till map->aio.data[] buffer 460 * becomes available after previous aio write operation. 461 */ 462 463 idx = record__aio_sync(map, false); 464 aio.data = map->aio.data[idx]; 465 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 466 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 467 return ret; 468 469 rec->samples++; 470 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 471 if (!ret) { 472 *off += aio.size; 473 rec->bytes_written += aio.size; 474 if (switch_output_size(rec)) 475 trigger_hit(&switch_output_trigger); 476 } else { 477 /* 478 * Decrement map->refcount incremented in record__aio_pushfn() 479 * back if record__aio_write() operation failed to start, otherwise 480 * map->refcount is decremented in record__aio_complete() after 481 * aio write operation finishes successfully. 482 */ 483 perf_mmap__put(&map->core); 484 } 485 486 return ret; 487 } 488 489 static off_t record__aio_get_pos(int trace_fd) 490 { 491 return lseek(trace_fd, 0, SEEK_CUR); 492 } 493 494 static void record__aio_set_pos(int trace_fd, off_t pos) 495 { 496 lseek(trace_fd, pos, SEEK_SET); 497 } 498 499 static void record__aio_mmap_read_sync(struct record *rec) 500 { 501 int i; 502 struct evlist *evlist = rec->evlist; 503 struct mmap *maps = evlist->mmap; 504 505 if (!record__aio_enabled(rec)) 506 return; 507 508 for (i = 0; i < evlist->core.nr_mmaps; i++) { 509 struct mmap *map = &maps[i]; 510 511 if (map->core.base) 512 record__aio_sync(map, true); 513 } 514 } 515 516 static int nr_cblocks_default = 1; 517 static int nr_cblocks_max = 4; 518 519 static int record__aio_parse(const struct option *opt, 520 const char *str, 521 int unset) 522 { 523 struct record_opts *opts = (struct record_opts *)opt->value; 524 525 if (unset) { 526 opts->nr_cblocks = 0; 527 } else { 528 if (str) 529 opts->nr_cblocks = strtol(str, NULL, 0); 530 if (!opts->nr_cblocks) 531 opts->nr_cblocks = nr_cblocks_default; 532 } 533 534 return 0; 535 } 536 #else /* HAVE_AIO_SUPPORT */ 537 static int nr_cblocks_max = 0; 538 539 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 540 off_t *off __maybe_unused) 541 { 542 return -1; 543 } 544 545 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 546 { 547 return -1; 548 } 549 550 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 551 { 552 } 553 554 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 555 { 556 } 557 #endif 558 559 static int record__aio_enabled(struct record *rec) 560 { 561 return rec->opts.nr_cblocks > 0; 562 } 563 564 #define MMAP_FLUSH_DEFAULT 1 565 static int record__mmap_flush_parse(const struct option *opt, 566 const char *str, 567 int unset) 568 { 569 int flush_max; 570 struct record_opts *opts = (struct record_opts *)opt->value; 571 static struct parse_tag tags[] = { 572 { .tag = 'B', .mult = 1 }, 573 { .tag = 'K', .mult = 1 << 10 }, 574 { .tag = 'M', .mult = 1 << 20 }, 575 { .tag = 'G', .mult = 1 << 30 }, 576 { .tag = 0 }, 577 }; 578 579 if (unset) 580 return 0; 581 582 if (str) { 583 opts->mmap_flush = parse_tag_value(str, tags); 584 if (opts->mmap_flush == (int)-1) 585 opts->mmap_flush = strtol(str, NULL, 0); 586 } 587 588 if (!opts->mmap_flush) 589 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 590 591 flush_max = evlist__mmap_size(opts->mmap_pages); 592 flush_max /= 4; 593 if (opts->mmap_flush > flush_max) 594 opts->mmap_flush = flush_max; 595 596 return 0; 597 } 598 599 #ifdef HAVE_ZSTD_SUPPORT 600 static unsigned int comp_level_default = 1; 601 602 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 603 { 604 struct record_opts *opts = opt->value; 605 606 if (unset) { 607 opts->comp_level = 0; 608 } else { 609 if (str) 610 opts->comp_level = strtol(str, NULL, 0); 611 if (!opts->comp_level) 612 opts->comp_level = comp_level_default; 613 } 614 615 return 0; 616 } 617 #endif 618 static unsigned int comp_level_max = 22; 619 620 static int record__comp_enabled(struct record *rec) 621 { 622 return rec->opts.comp_level > 0; 623 } 624 625 static int process_synthesized_event(const struct perf_tool *tool, 626 union perf_event *event, 627 struct perf_sample *sample __maybe_unused, 628 struct machine *machine __maybe_unused) 629 { 630 struct record *rec = container_of(tool, struct record, tool); 631 return record__write(rec, NULL, event, event->header.size); 632 } 633 634 static struct mutex synth_lock; 635 636 static int process_locked_synthesized_event(const struct perf_tool *tool, 637 union perf_event *event, 638 struct perf_sample *sample __maybe_unused, 639 struct machine *machine __maybe_unused) 640 { 641 int ret; 642 643 mutex_lock(&synth_lock); 644 ret = process_synthesized_event(tool, event, sample, machine); 645 mutex_unlock(&synth_lock); 646 return ret; 647 } 648 649 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 650 { 651 struct record *rec = to; 652 653 if (record__comp_enabled(rec)) { 654 struct perf_record_compressed2 *event = map->data; 655 size_t padding = 0; 656 u8 pad[8] = {0}; 657 ssize_t compressed = zstd_compress(rec->session, map, map->data, 658 mmap__mmap_len(map), bf, size); 659 660 if (compressed < 0) 661 return (int)compressed; 662 663 bf = event; 664 thread->samples++; 665 666 /* 667 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan 668 * error. We make it aligned here. 669 */ 670 event->data_size = compressed - sizeof(struct perf_record_compressed2); 671 event->header.size = PERF_ALIGN(compressed, sizeof(u64)); 672 padding = event->header.size - compressed; 673 return record__write(rec, map, bf, compressed) || 674 record__write(rec, map, &pad, padding); 675 } 676 677 thread->samples++; 678 return record__write(rec, map, bf, size); 679 } 680 681 static volatile sig_atomic_t signr = -1; 682 static volatile sig_atomic_t child_finished; 683 #ifdef HAVE_EVENTFD_SUPPORT 684 static volatile sig_atomic_t done_fd = -1; 685 #endif 686 687 static void sig_handler(int sig) 688 { 689 if (sig == SIGCHLD) 690 child_finished = 1; 691 else 692 signr = sig; 693 694 done = 1; 695 #ifdef HAVE_EVENTFD_SUPPORT 696 if (done_fd >= 0) { 697 u64 tmp = 1; 698 int orig_errno = errno; 699 700 /* 701 * It is possible for this signal handler to run after done is 702 * checked in the main loop, but before the perf counter fds are 703 * polled. If this happens, the poll() will continue to wait 704 * even though done is set, and will only break out if either 705 * another signal is received, or the counters are ready for 706 * read. To ensure the poll() doesn't sleep when done is set, 707 * use an eventfd (done_fd) to wake up the poll(). 708 */ 709 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 710 pr_err("failed to signal wakeup fd, error: %m\n"); 711 712 errno = orig_errno; 713 } 714 #endif // HAVE_EVENTFD_SUPPORT 715 } 716 717 static void sigsegv_handler(int sig) 718 { 719 perf_hooks__recover(); 720 sighandler_dump_stack(sig); 721 } 722 723 static void record__sig_exit(void) 724 { 725 if (signr == -1) 726 return; 727 728 signal(signr, SIG_DFL); 729 raise(signr); 730 } 731 732 #ifdef HAVE_AUXTRACE_SUPPORT 733 734 static int record__process_auxtrace(const struct perf_tool *tool, 735 struct mmap *map, 736 union perf_event *event, void *data1, 737 size_t len1, void *data2, size_t len2) 738 { 739 struct record *rec = container_of(tool, struct record, tool); 740 struct perf_data *data = &rec->data; 741 size_t padding; 742 u8 pad[8] = {0}; 743 744 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 745 off_t file_offset; 746 int fd = perf_data__fd(data); 747 int err; 748 749 file_offset = lseek(fd, 0, SEEK_CUR); 750 if (file_offset == -1) 751 return -1; 752 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 753 event, file_offset); 754 if (err) 755 return err; 756 } 757 758 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 759 padding = (len1 + len2) & 7; 760 if (padding) 761 padding = 8 - padding; 762 763 record__write(rec, map, event, event->header.size); 764 record__write(rec, map, data1, len1); 765 if (len2) 766 record__write(rec, map, data2, len2); 767 record__write(rec, map, &pad, padding); 768 769 return 0; 770 } 771 772 static int record__auxtrace_mmap_read(struct record *rec, 773 struct mmap *map) 774 { 775 int ret; 776 777 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 778 record__process_auxtrace); 779 if (ret < 0) 780 return ret; 781 782 if (ret) 783 rec->samples++; 784 785 return 0; 786 } 787 788 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 789 struct mmap *map) 790 { 791 int ret; 792 793 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 794 record__process_auxtrace, 795 rec->opts.auxtrace_snapshot_size); 796 if (ret < 0) 797 return ret; 798 799 if (ret) 800 rec->samples++; 801 802 return 0; 803 } 804 805 static int record__auxtrace_read_snapshot_all(struct record *rec) 806 { 807 int i; 808 int rc = 0; 809 810 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 811 struct mmap *map = &rec->evlist->mmap[i]; 812 813 if (!map->auxtrace_mmap.base) 814 continue; 815 816 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 817 rc = -1; 818 goto out; 819 } 820 } 821 out: 822 return rc; 823 } 824 825 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 826 { 827 pr_debug("Recording AUX area tracing snapshot\n"); 828 if (record__auxtrace_read_snapshot_all(rec) < 0) { 829 trigger_error(&auxtrace_snapshot_trigger); 830 } else { 831 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 832 trigger_error(&auxtrace_snapshot_trigger); 833 else 834 trigger_ready(&auxtrace_snapshot_trigger); 835 } 836 } 837 838 static int record__auxtrace_snapshot_exit(struct record *rec) 839 { 840 if (trigger_is_error(&auxtrace_snapshot_trigger)) 841 return 0; 842 843 if (!auxtrace_record__snapshot_started && 844 auxtrace_record__snapshot_start(rec->itr)) 845 return -1; 846 847 record__read_auxtrace_snapshot(rec, true); 848 if (trigger_is_error(&auxtrace_snapshot_trigger)) 849 return -1; 850 851 return 0; 852 } 853 854 static int record__auxtrace_init(struct record *rec) 855 { 856 int err; 857 858 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 859 && record__threads_enabled(rec)) { 860 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 861 return -EINVAL; 862 } 863 864 if (!rec->itr) { 865 rec->itr = auxtrace_record__init(rec->evlist, &err); 866 if (err) 867 return err; 868 } 869 870 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 871 rec->opts.auxtrace_snapshot_opts); 872 if (err) 873 return err; 874 875 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 876 rec->opts.auxtrace_sample_opts); 877 if (err) 878 return err; 879 880 err = auxtrace_parse_aux_action(rec->evlist); 881 if (err) 882 return err; 883 884 return auxtrace_parse_filters(rec->evlist); 885 } 886 887 #else 888 889 static inline 890 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 891 struct mmap *map __maybe_unused) 892 { 893 return 0; 894 } 895 896 static inline 897 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 898 bool on_exit __maybe_unused) 899 { 900 } 901 902 static inline 903 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 904 { 905 return 0; 906 } 907 908 static inline 909 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 910 { 911 return 0; 912 } 913 914 static int record__auxtrace_init(struct record *rec __maybe_unused) 915 { 916 return 0; 917 } 918 919 #endif 920 921 static int record__config_text_poke(struct evlist *evlist) 922 { 923 struct evsel *evsel; 924 925 /* Nothing to do if text poke is already configured */ 926 evlist__for_each_entry(evlist, evsel) { 927 if (evsel->core.attr.text_poke) 928 return 0; 929 } 930 931 evsel = evlist__add_dummy_on_all_cpus(evlist); 932 if (!evsel) 933 return -ENOMEM; 934 935 evsel->core.attr.text_poke = 1; 936 evsel->core.attr.ksymbol = 1; 937 evsel->immediate = true; 938 evsel__set_sample_bit(evsel, TIME); 939 940 return 0; 941 } 942 943 static int record__config_off_cpu(struct record *rec) 944 { 945 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 946 } 947 948 static bool record__tracking_system_wide(struct record *rec) 949 { 950 struct evlist *evlist = rec->evlist; 951 struct evsel *evsel; 952 953 /* 954 * If non-dummy evsel exists, system_wide sideband is need to 955 * help parse sample information. 956 * For example, PERF_EVENT_MMAP event to help parse symbol, 957 * and PERF_EVENT_COMM event to help parse task executable name. 958 */ 959 evlist__for_each_entry(evlist, evsel) { 960 if (!evsel__is_dummy_event(evsel)) 961 return true; 962 } 963 964 return false; 965 } 966 967 static int record__config_tracking_events(struct record *rec) 968 { 969 struct record_opts *opts = &rec->opts; 970 struct evlist *evlist = rec->evlist; 971 bool system_wide = false; 972 struct evsel *evsel; 973 974 /* 975 * For initial_delay, system wide or a hybrid system, we need to add 976 * tracking event so that we can track PERF_RECORD_MMAP to cover the 977 * delay of waiting or event synthesis. 978 */ 979 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 980 perf_pmus__num_core_pmus() > 1) { 981 982 /* 983 * User space tasks can migrate between CPUs, so when tracing 984 * selected CPUs, sideband for all CPUs is still needed. 985 */ 986 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 987 system_wide = true; 988 989 evsel = evlist__findnew_tracking_event(evlist, system_wide); 990 if (!evsel) 991 return -ENOMEM; 992 993 /* 994 * Enable the tracking event when the process is forked for 995 * initial_delay, immediately for system wide. 996 */ 997 if (opts->target.initial_delay && !evsel->immediate && 998 !target__has_cpu(&opts->target)) 999 evsel->core.attr.enable_on_exec = 1; 1000 else 1001 evsel->immediate = 1; 1002 } 1003 1004 return 0; 1005 } 1006 1007 static bool record__kcore_readable(struct machine *machine) 1008 { 1009 char kcore[PATH_MAX]; 1010 int fd; 1011 1012 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 1013 1014 fd = open(kcore, O_RDONLY); 1015 if (fd < 0) 1016 return false; 1017 1018 close(fd); 1019 1020 return true; 1021 } 1022 1023 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 1024 { 1025 char from_dir[PATH_MAX]; 1026 char kcore_dir[PATH_MAX]; 1027 int ret; 1028 1029 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1030 1031 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1032 if (ret) 1033 return ret; 1034 1035 return kcore_copy(from_dir, kcore_dir); 1036 } 1037 1038 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1039 { 1040 thread_data->pipes.msg[0] = -1; 1041 thread_data->pipes.msg[1] = -1; 1042 thread_data->pipes.ack[0] = -1; 1043 thread_data->pipes.ack[1] = -1; 1044 } 1045 1046 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1047 { 1048 if (pipe(thread_data->pipes.msg)) 1049 return -EINVAL; 1050 1051 if (pipe(thread_data->pipes.ack)) { 1052 close(thread_data->pipes.msg[0]); 1053 thread_data->pipes.msg[0] = -1; 1054 close(thread_data->pipes.msg[1]); 1055 thread_data->pipes.msg[1] = -1; 1056 return -EINVAL; 1057 } 1058 1059 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1060 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1061 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1062 1063 return 0; 1064 } 1065 1066 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1067 { 1068 if (thread_data->pipes.msg[0] != -1) { 1069 close(thread_data->pipes.msg[0]); 1070 thread_data->pipes.msg[0] = -1; 1071 } 1072 if (thread_data->pipes.msg[1] != -1) { 1073 close(thread_data->pipes.msg[1]); 1074 thread_data->pipes.msg[1] = -1; 1075 } 1076 if (thread_data->pipes.ack[0] != -1) { 1077 close(thread_data->pipes.ack[0]); 1078 thread_data->pipes.ack[0] = -1; 1079 } 1080 if (thread_data->pipes.ack[1] != -1) { 1081 close(thread_data->pipes.ack[1]); 1082 thread_data->pipes.ack[1] = -1; 1083 } 1084 } 1085 1086 static bool evlist__per_thread(struct evlist *evlist) 1087 { 1088 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1089 } 1090 1091 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1092 { 1093 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1094 struct mmap *mmap = evlist->mmap; 1095 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1096 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1097 bool per_thread = evlist__per_thread(evlist); 1098 1099 if (per_thread) 1100 thread_data->nr_mmaps = nr_mmaps; 1101 else 1102 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1103 thread_data->mask->maps.nbits); 1104 if (mmap) { 1105 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1106 if (!thread_data->maps) 1107 return -ENOMEM; 1108 } 1109 if (overwrite_mmap) { 1110 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1111 if (!thread_data->overwrite_maps) { 1112 zfree(&thread_data->maps); 1113 return -ENOMEM; 1114 } 1115 } 1116 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1117 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1118 1119 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1120 if (per_thread || 1121 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1122 if (thread_data->maps) { 1123 thread_data->maps[tm] = &mmap[m]; 1124 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1125 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1126 } 1127 if (thread_data->overwrite_maps) { 1128 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1129 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1130 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1131 } 1132 tm++; 1133 } 1134 } 1135 1136 return 0; 1137 } 1138 1139 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1140 { 1141 int f, tm, pos; 1142 struct mmap *map, *overwrite_map; 1143 1144 fdarray__init(&thread_data->pollfd, 64); 1145 1146 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1147 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1148 overwrite_map = thread_data->overwrite_maps ? 1149 thread_data->overwrite_maps[tm] : NULL; 1150 1151 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1152 void *ptr = evlist->core.pollfd.priv[f].ptr; 1153 1154 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1155 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1156 &evlist->core.pollfd); 1157 if (pos < 0) 1158 return pos; 1159 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1160 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1161 } 1162 } 1163 } 1164 1165 return 0; 1166 } 1167 1168 static void record__free_thread_data(struct record *rec) 1169 { 1170 int t; 1171 struct record_thread *thread_data = rec->thread_data; 1172 1173 if (thread_data == NULL) 1174 return; 1175 1176 for (t = 0; t < rec->nr_threads; t++) { 1177 record__thread_data_close_pipes(&thread_data[t]); 1178 zfree(&thread_data[t].maps); 1179 zfree(&thread_data[t].overwrite_maps); 1180 fdarray__exit(&thread_data[t].pollfd); 1181 } 1182 1183 zfree(&rec->thread_data); 1184 } 1185 1186 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1187 int evlist_pollfd_index, 1188 int thread_pollfd_index) 1189 { 1190 size_t x = rec->index_map_cnt; 1191 1192 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1193 return -ENOMEM; 1194 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1195 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1196 rec->index_map_cnt += 1; 1197 return 0; 1198 } 1199 1200 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1201 struct evlist *evlist, 1202 struct record_thread *thread_data) 1203 { 1204 struct pollfd *e_entries = evlist->core.pollfd.entries; 1205 struct pollfd *t_entries = thread_data->pollfd.entries; 1206 int err = 0; 1207 size_t i; 1208 1209 for (i = 0; i < rec->index_map_cnt; i++) { 1210 int e_pos = rec->index_map[i].evlist_pollfd_index; 1211 int t_pos = rec->index_map[i].thread_pollfd_index; 1212 1213 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1214 e_entries[e_pos].events != t_entries[t_pos].events) { 1215 pr_err("Thread and evlist pollfd index mismatch\n"); 1216 err = -EINVAL; 1217 continue; 1218 } 1219 e_entries[e_pos].revents = t_entries[t_pos].revents; 1220 } 1221 return err; 1222 } 1223 1224 static int record__dup_non_perf_events(struct record *rec, 1225 struct evlist *evlist, 1226 struct record_thread *thread_data) 1227 { 1228 struct fdarray *fda = &evlist->core.pollfd; 1229 int i, ret; 1230 1231 for (i = 0; i < fda->nr; i++) { 1232 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1233 continue; 1234 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1235 if (ret < 0) { 1236 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1237 return ret; 1238 } 1239 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1240 thread_data, ret, fda->entries[i].fd); 1241 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1242 if (ret < 0) { 1243 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1244 return ret; 1245 } 1246 } 1247 return 0; 1248 } 1249 1250 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1251 { 1252 int t, ret; 1253 struct record_thread *thread_data; 1254 1255 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1256 if (!rec->thread_data) { 1257 pr_err("Failed to allocate thread data\n"); 1258 return -ENOMEM; 1259 } 1260 thread_data = rec->thread_data; 1261 1262 for (t = 0; t < rec->nr_threads; t++) 1263 record__thread_data_init_pipes(&thread_data[t]); 1264 1265 for (t = 0; t < rec->nr_threads; t++) { 1266 thread_data[t].rec = rec; 1267 thread_data[t].mask = &rec->thread_masks[t]; 1268 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1269 if (ret) { 1270 pr_err("Failed to initialize thread[%d] maps\n", t); 1271 goto out_free; 1272 } 1273 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1274 if (ret) { 1275 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1276 goto out_free; 1277 } 1278 if (t) { 1279 thread_data[t].tid = -1; 1280 ret = record__thread_data_open_pipes(&thread_data[t]); 1281 if (ret) { 1282 pr_err("Failed to open thread[%d] communication pipes\n", t); 1283 goto out_free; 1284 } 1285 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1286 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1287 if (ret < 0) { 1288 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1289 goto out_free; 1290 } 1291 thread_data[t].ctlfd_pos = ret; 1292 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1293 thread_data, thread_data[t].ctlfd_pos, 1294 thread_data[t].pipes.msg[0]); 1295 } else { 1296 thread_data[t].tid = gettid(); 1297 1298 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1299 if (ret < 0) 1300 goto out_free; 1301 1302 thread_data[t].ctlfd_pos = -1; /* Not used */ 1303 } 1304 } 1305 1306 return 0; 1307 1308 out_free: 1309 record__free_thread_data(rec); 1310 1311 return ret; 1312 } 1313 1314 static int record__mmap_evlist(struct record *rec, 1315 struct evlist *evlist) 1316 { 1317 int i, ret; 1318 struct record_opts *opts = &rec->opts; 1319 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1320 opts->auxtrace_sample_mode; 1321 char msg[512]; 1322 1323 if (opts->affinity != PERF_AFFINITY_SYS) 1324 cpu__setup_cpunode_map(); 1325 1326 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1327 opts->auxtrace_mmap_pages, 1328 auxtrace_overwrite, 1329 opts->nr_cblocks, opts->affinity, 1330 opts->mmap_flush, opts->comp_level) < 0) { 1331 if (errno == EPERM) { 1332 pr_err("Permission error mapping pages.\n" 1333 "Consider increasing " 1334 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1335 "or try again with a smaller value of -m/--mmap_pages.\n" 1336 "(current value: %u,%u)\n", 1337 opts->mmap_pages, opts->auxtrace_mmap_pages); 1338 return -errno; 1339 } else { 1340 pr_err("failed to mmap with %d (%s)\n", errno, 1341 str_error_r(errno, msg, sizeof(msg))); 1342 if (errno) 1343 return -errno; 1344 else 1345 return -EINVAL; 1346 } 1347 } 1348 1349 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1350 return -1; 1351 1352 ret = record__alloc_thread_data(rec, evlist); 1353 if (ret) 1354 return ret; 1355 1356 if (record__threads_enabled(rec)) { 1357 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1358 if (ret) { 1359 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1360 return ret; 1361 } 1362 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1363 if (evlist->mmap) 1364 evlist->mmap[i].file = &rec->data.dir.files[i]; 1365 if (evlist->overwrite_mmap) 1366 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1367 } 1368 } 1369 1370 return 0; 1371 } 1372 1373 static int record__mmap(struct record *rec) 1374 { 1375 return record__mmap_evlist(rec, rec->evlist); 1376 } 1377 1378 static int record__open(struct record *rec) 1379 { 1380 char msg[BUFSIZ]; 1381 struct evsel *pos; 1382 struct evlist *evlist = rec->evlist; 1383 struct perf_session *session = rec->session; 1384 struct record_opts *opts = &rec->opts; 1385 int rc = 0; 1386 1387 evlist__for_each_entry(evlist, pos) { 1388 try_again: 1389 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1390 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1391 if (verbose > 0) 1392 ui__warning("%s\n", msg); 1393 goto try_again; 1394 } 1395 if ((errno == EINVAL || errno == EBADF) && 1396 pos->core.leader != &pos->core && 1397 pos->weak_group) { 1398 pos = evlist__reset_weak_group(evlist, pos, true); 1399 goto try_again; 1400 } 1401 rc = -errno; 1402 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1403 ui__error("%s\n", msg); 1404 goto out; 1405 } 1406 1407 pos->supported = true; 1408 } 1409 1410 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1411 pr_warning( 1412 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1413 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1414 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1415 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1416 "Samples in kernel modules won't be resolved at all.\n\n" 1417 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1418 "even with a suitable vmlinux or kallsyms file.\n\n"); 1419 } 1420 1421 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1422 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1423 pos->filter ?: "BPF", evsel__name(pos), errno, 1424 str_error_r(errno, msg, sizeof(msg))); 1425 rc = -1; 1426 goto out; 1427 } 1428 1429 rc = record__mmap(rec); 1430 if (rc) 1431 goto out; 1432 1433 session->evlist = evlist; 1434 perf_session__set_id_hdr_size(session); 1435 out: 1436 return rc; 1437 } 1438 1439 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1440 { 1441 if (rec->evlist->first_sample_time == 0) 1442 rec->evlist->first_sample_time = sample_time; 1443 1444 if (sample_time) 1445 rec->evlist->last_sample_time = sample_time; 1446 } 1447 1448 static int process_sample_event(const struct perf_tool *tool, 1449 union perf_event *event, 1450 struct perf_sample *sample, 1451 struct evsel *evsel, 1452 struct machine *machine) 1453 { 1454 struct record *rec = container_of(tool, struct record, tool); 1455 1456 set_timestamp_boundary(rec, sample->time); 1457 1458 if (rec->buildid_all) 1459 return 0; 1460 1461 rec->samples++; 1462 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1463 } 1464 1465 static int process_buildids(struct record *rec) 1466 { 1467 struct perf_session *session = rec->session; 1468 1469 if (perf_data__size(&rec->data) == 0) 1470 return 0; 1471 1472 /* 1473 * During this process, it'll load kernel map and replace the 1474 * dso->long_name to a real pathname it found. In this case 1475 * we prefer the vmlinux path like 1476 * /lib/modules/3.16.4/build/vmlinux 1477 * 1478 * rather than build-id path (in debug directory). 1479 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1480 */ 1481 symbol_conf.ignore_vmlinux_buildid = true; 1482 1483 /* 1484 * If --buildid-all is given, it marks all DSO regardless of hits, 1485 * so no need to process samples. But if timestamp_boundary is enabled, 1486 * it still needs to walk on all samples to get the timestamps of 1487 * first/last samples. 1488 */ 1489 if (rec->buildid_all && !rec->timestamp_boundary) 1490 rec->tool.sample = process_event_sample_stub; 1491 1492 return perf_session__process_events(session); 1493 } 1494 1495 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1496 { 1497 int err; 1498 struct perf_tool *tool = data; 1499 /* 1500 *As for guest kernel when processing subcommand record&report, 1501 *we arrange module mmap prior to guest kernel mmap and trigger 1502 *a preload dso because default guest module symbols are loaded 1503 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1504 *method is used to avoid symbol missing when the first addr is 1505 *in module instead of in guest kernel. 1506 */ 1507 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1508 machine); 1509 if (err < 0) 1510 pr_err("Couldn't record guest kernel [%d]'s reference" 1511 " relocation symbol.\n", machine->pid); 1512 1513 /* 1514 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1515 * have no _text sometimes. 1516 */ 1517 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1518 machine); 1519 if (err < 0) 1520 pr_err("Couldn't record guest kernel [%d]'s reference" 1521 " relocation symbol.\n", machine->pid); 1522 } 1523 1524 static struct perf_event_header finished_round_event = { 1525 .size = sizeof(struct perf_event_header), 1526 .type = PERF_RECORD_FINISHED_ROUND, 1527 }; 1528 1529 static struct perf_event_header finished_init_event = { 1530 .size = sizeof(struct perf_event_header), 1531 .type = PERF_RECORD_FINISHED_INIT, 1532 }; 1533 1534 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1535 { 1536 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1537 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1538 thread->mask->affinity.nbits)) { 1539 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1540 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1541 map->affinity_mask.bits, thread->mask->affinity.nbits); 1542 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1543 (cpu_set_t *)thread->mask->affinity.bits); 1544 if (verbose == 2) { 1545 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1546 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1547 } 1548 } 1549 } 1550 1551 static size_t process_comp_header(void *record, size_t increment) 1552 { 1553 struct perf_record_compressed2 *event = record; 1554 size_t size = sizeof(*event); 1555 1556 if (increment) { 1557 event->header.size += increment; 1558 return increment; 1559 } 1560 1561 event->header.type = PERF_RECORD_COMPRESSED2; 1562 event->header.size = size; 1563 1564 return size; 1565 } 1566 1567 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1568 void *dst, size_t dst_size, void *src, size_t src_size) 1569 { 1570 ssize_t compressed; 1571 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1; 1572 struct zstd_data *zstd_data = &session->zstd_data; 1573 1574 if (map && map->file) 1575 zstd_data = &map->zstd_data; 1576 1577 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1578 max_record_size, process_comp_header); 1579 if (compressed < 0) 1580 return compressed; 1581 1582 if (map && map->file) { 1583 thread->bytes_transferred += src_size; 1584 thread->bytes_compressed += compressed; 1585 } else { 1586 session->bytes_transferred += src_size; 1587 session->bytes_compressed += compressed; 1588 } 1589 1590 return compressed; 1591 } 1592 1593 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1594 bool overwrite, bool synch) 1595 { 1596 u64 bytes_written = rec->bytes_written; 1597 int i; 1598 int rc = 0; 1599 int nr_mmaps; 1600 struct mmap **maps; 1601 int trace_fd = rec->data.file.fd; 1602 off_t off = 0; 1603 1604 if (!evlist) 1605 return 0; 1606 1607 nr_mmaps = thread->nr_mmaps; 1608 maps = overwrite ? thread->overwrite_maps : thread->maps; 1609 1610 if (!maps) 1611 return 0; 1612 1613 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1614 return 0; 1615 1616 if (record__aio_enabled(rec)) 1617 off = record__aio_get_pos(trace_fd); 1618 1619 for (i = 0; i < nr_mmaps; i++) { 1620 u64 flush = 0; 1621 struct mmap *map = maps[i]; 1622 1623 if (map->core.base) { 1624 record__adjust_affinity(rec, map); 1625 if (synch) { 1626 flush = map->core.flush; 1627 map->core.flush = 1; 1628 } 1629 if (!record__aio_enabled(rec)) { 1630 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1631 if (synch) 1632 map->core.flush = flush; 1633 rc = -1; 1634 goto out; 1635 } 1636 } else { 1637 if (record__aio_push(rec, map, &off) < 0) { 1638 record__aio_set_pos(trace_fd, off); 1639 if (synch) 1640 map->core.flush = flush; 1641 rc = -1; 1642 goto out; 1643 } 1644 } 1645 if (synch) 1646 map->core.flush = flush; 1647 } 1648 1649 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1650 !rec->opts.auxtrace_sample_mode && 1651 record__auxtrace_mmap_read(rec, map) != 0) { 1652 rc = -1; 1653 goto out; 1654 } 1655 } 1656 1657 if (record__aio_enabled(rec)) 1658 record__aio_set_pos(trace_fd, off); 1659 1660 /* 1661 * Mark the round finished in case we wrote 1662 * at least one event. 1663 * 1664 * No need for round events in directory mode, 1665 * because per-cpu maps and files have data 1666 * sorted by kernel. 1667 */ 1668 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1669 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1670 1671 if (overwrite) 1672 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1673 out: 1674 return rc; 1675 } 1676 1677 static int record__mmap_read_all(struct record *rec, bool synch) 1678 { 1679 int err; 1680 1681 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1682 if (err) 1683 return err; 1684 1685 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1686 } 1687 1688 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1689 void *arg __maybe_unused) 1690 { 1691 struct perf_mmap *map = fda->priv[fd].ptr; 1692 1693 if (map) 1694 perf_mmap__put(map); 1695 } 1696 1697 static void *record__thread(void *arg) 1698 { 1699 enum thread_msg msg = THREAD_MSG__READY; 1700 bool terminate = false; 1701 struct fdarray *pollfd; 1702 int err, ctlfd_pos; 1703 1704 thread = arg; 1705 thread->tid = gettid(); 1706 1707 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1708 if (err == -1) 1709 pr_warning("threads[%d]: failed to notify on start: %s\n", 1710 thread->tid, strerror(errno)); 1711 1712 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1713 1714 pollfd = &thread->pollfd; 1715 ctlfd_pos = thread->ctlfd_pos; 1716 1717 for (;;) { 1718 unsigned long long hits = thread->samples; 1719 1720 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1721 break; 1722 1723 if (hits == thread->samples) { 1724 1725 err = fdarray__poll(pollfd, -1); 1726 /* 1727 * Propagate error, only if there's any. Ignore positive 1728 * number of returned events and interrupt error. 1729 */ 1730 if (err > 0 || (err < 0 && errno == EINTR)) 1731 err = 0; 1732 thread->waking++; 1733 1734 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1735 record__thread_munmap_filtered, NULL) == 0) 1736 break; 1737 } 1738 1739 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1740 terminate = true; 1741 close(thread->pipes.msg[0]); 1742 thread->pipes.msg[0] = -1; 1743 pollfd->entries[ctlfd_pos].fd = -1; 1744 pollfd->entries[ctlfd_pos].events = 0; 1745 } 1746 1747 pollfd->entries[ctlfd_pos].revents = 0; 1748 } 1749 record__mmap_read_all(thread->rec, true); 1750 1751 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1752 if (err == -1) 1753 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1754 thread->tid, strerror(errno)); 1755 1756 return NULL; 1757 } 1758 1759 static void record__init_features(struct record *rec) 1760 { 1761 struct perf_session *session = rec->session; 1762 int feat; 1763 1764 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1765 perf_header__set_feat(&session->header, feat); 1766 1767 if (rec->no_buildid) 1768 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1769 1770 if (!have_tracepoints(&rec->evlist->core.entries)) 1771 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1772 1773 if (!rec->opts.branch_stack) 1774 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1775 1776 if (!rec->opts.full_auxtrace) 1777 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1778 1779 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1780 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1781 1782 if (!rec->opts.use_clockid) 1783 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1784 1785 if (!record__threads_enabled(rec)) 1786 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1787 1788 if (!record__comp_enabled(rec)) 1789 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1790 1791 perf_header__clear_feat(&session->header, HEADER_STAT); 1792 } 1793 1794 static void 1795 record__finish_output(struct record *rec) 1796 { 1797 int i; 1798 struct perf_data *data = &rec->data; 1799 int fd = perf_data__fd(data); 1800 1801 if (data->is_pipe) { 1802 /* Just to display approx. size */ 1803 data->file.size = rec->bytes_written; 1804 return; 1805 } 1806 1807 rec->session->header.data_size += rec->bytes_written; 1808 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1809 if (record__threads_enabled(rec)) { 1810 for (i = 0; i < data->dir.nr; i++) 1811 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1812 } 1813 1814 if (!rec->no_buildid) { 1815 process_buildids(rec); 1816 1817 if (rec->buildid_all) 1818 perf_session__dsos_hit_all(rec->session); 1819 } 1820 perf_session__write_header(rec->session, rec->evlist, fd, true); 1821 1822 return; 1823 } 1824 1825 static int record__synthesize_workload(struct record *rec, bool tail) 1826 { 1827 int err; 1828 struct perf_thread_map *thread_map; 1829 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1830 1831 if (rec->opts.tail_synthesize != tail) 1832 return 0; 1833 1834 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1835 if (thread_map == NULL) 1836 return -1; 1837 1838 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1839 process_synthesized_event, 1840 &rec->session->machines.host, 1841 needs_mmap, 1842 rec->opts.sample_address); 1843 perf_thread_map__put(thread_map); 1844 return err; 1845 } 1846 1847 static int write_finished_init(struct record *rec, bool tail) 1848 { 1849 if (rec->opts.tail_synthesize != tail) 1850 return 0; 1851 1852 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1853 } 1854 1855 static int record__synthesize(struct record *rec, bool tail); 1856 1857 static int 1858 record__switch_output(struct record *rec, bool at_exit) 1859 { 1860 struct perf_data *data = &rec->data; 1861 char *new_filename = NULL; 1862 int fd, err; 1863 1864 /* Same Size: "2015122520103046"*/ 1865 char timestamp[] = "InvalidTimestamp"; 1866 1867 record__aio_mmap_read_sync(rec); 1868 1869 write_finished_init(rec, true); 1870 1871 record__synthesize(rec, true); 1872 if (target__none(&rec->opts.target)) 1873 record__synthesize_workload(rec, true); 1874 1875 rec->samples = 0; 1876 record__finish_output(rec); 1877 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1878 if (err) { 1879 pr_err("Failed to get current timestamp\n"); 1880 return -EINVAL; 1881 } 1882 1883 fd = perf_data__switch(data, timestamp, 1884 rec->session->header.data_offset, 1885 at_exit, &new_filename); 1886 if (fd >= 0 && !at_exit) { 1887 rec->bytes_written = 0; 1888 rec->session->header.data_size = 0; 1889 } 1890 1891 if (!quiet) { 1892 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1893 data->path, timestamp); 1894 } 1895 1896 if (rec->switch_output.num_files) { 1897 int n = rec->switch_output.cur_file + 1; 1898 1899 if (n >= rec->switch_output.num_files) 1900 n = 0; 1901 rec->switch_output.cur_file = n; 1902 if (rec->switch_output.filenames[n]) { 1903 remove(rec->switch_output.filenames[n]); 1904 zfree(&rec->switch_output.filenames[n]); 1905 } 1906 rec->switch_output.filenames[n] = new_filename; 1907 } else { 1908 free(new_filename); 1909 } 1910 1911 /* Output tracking events */ 1912 if (!at_exit) { 1913 record__synthesize(rec, false); 1914 1915 /* 1916 * In 'perf record --switch-output' without -a, 1917 * record__synthesize() in record__switch_output() won't 1918 * generate tracking events because there's no thread_map 1919 * in evlist. Which causes newly created perf.data doesn't 1920 * contain map and comm information. 1921 * Create a fake thread_map and directly call 1922 * perf_event__synthesize_thread_map() for those events. 1923 */ 1924 if (target__none(&rec->opts.target)) 1925 record__synthesize_workload(rec, false); 1926 write_finished_init(rec, false); 1927 } 1928 return fd; 1929 } 1930 1931 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1932 struct perf_record_lost_samples *lost, 1933 int cpu_idx, int thread_idx, u64 lost_count, 1934 u16 misc_flag) 1935 { 1936 struct perf_sample_id *sid; 1937 struct perf_sample sample; 1938 int id_hdr_size; 1939 1940 perf_sample__init(&sample, /*all=*/true); 1941 lost->lost = lost_count; 1942 if (evsel->core.ids) { 1943 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1944 sample.id = sid->id; 1945 } 1946 1947 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1948 evsel->core.attr.sample_type, &sample); 1949 lost->header.size = sizeof(*lost) + id_hdr_size; 1950 lost->header.misc = misc_flag; 1951 record__write(rec, NULL, lost, lost->header.size); 1952 perf_sample__exit(&sample); 1953 } 1954 1955 static void record__read_lost_samples(struct record *rec) 1956 { 1957 struct perf_session *session = rec->session; 1958 struct perf_record_lost_samples_and_ids lost; 1959 struct evsel *evsel; 1960 1961 /* there was an error during record__open */ 1962 if (session->evlist == NULL) 1963 return; 1964 1965 evlist__for_each_entry(session->evlist, evsel) { 1966 struct xyarray *xy = evsel->core.sample_id; 1967 u64 lost_count; 1968 1969 if (xy == NULL || evsel->core.fd == NULL) 1970 continue; 1971 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1972 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1973 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1974 continue; 1975 } 1976 1977 for (int x = 0; x < xyarray__max_x(xy); x++) { 1978 for (int y = 0; y < xyarray__max_y(xy); y++) { 1979 struct perf_counts_values count; 1980 1981 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1982 pr_debug("read LOST count failed\n"); 1983 return; 1984 } 1985 1986 if (count.lost) { 1987 memset(&lost, 0, sizeof(lost)); 1988 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1989 __record__save_lost_samples(rec, evsel, &lost.lost, 1990 x, y, count.lost, 0); 1991 } 1992 } 1993 } 1994 1995 lost_count = perf_bpf_filter__lost_count(evsel); 1996 if (lost_count) { 1997 memset(&lost, 0, sizeof(lost)); 1998 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1999 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 2000 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 2001 } 2002 } 2003 } 2004 2005 static volatile sig_atomic_t workload_exec_errno; 2006 2007 /* 2008 * evlist__prepare_workload will send a SIGUSR1 2009 * if the fork fails, since we asked by setting its 2010 * want_signal to true. 2011 */ 2012 static void workload_exec_failed_signal(int signo __maybe_unused, 2013 siginfo_t *info, 2014 void *ucontext __maybe_unused) 2015 { 2016 workload_exec_errno = info->si_value.sival_int; 2017 done = 1; 2018 child_finished = 1; 2019 } 2020 2021 static void snapshot_sig_handler(int sig); 2022 static void alarm_sig_handler(int sig); 2023 2024 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2025 { 2026 if (evlist) { 2027 if (evlist->mmap && evlist->mmap[0].core.base) 2028 return evlist->mmap[0].core.base; 2029 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2030 return evlist->overwrite_mmap[0].core.base; 2031 } 2032 return NULL; 2033 } 2034 2035 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2036 { 2037 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2038 if (pc) 2039 return pc; 2040 return NULL; 2041 } 2042 2043 static int record__synthesize(struct record *rec, bool tail) 2044 { 2045 struct perf_session *session = rec->session; 2046 struct machine *machine = &session->machines.host; 2047 struct perf_data *data = &rec->data; 2048 struct record_opts *opts = &rec->opts; 2049 struct perf_tool *tool = &rec->tool; 2050 int err = 0; 2051 event_op f = process_synthesized_event; 2052 2053 if (rec->opts.tail_synthesize != tail) 2054 return 0; 2055 2056 if (data->is_pipe) { 2057 err = perf_event__synthesize_for_pipe(tool, session, data, 2058 process_synthesized_event); 2059 if (err < 0) 2060 goto out; 2061 2062 rec->bytes_written += err; 2063 } 2064 2065 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2066 process_synthesized_event, machine); 2067 if (err) 2068 goto out; 2069 2070 /* Synthesize id_index before auxtrace_info */ 2071 err = perf_event__synthesize_id_index(tool, 2072 process_synthesized_event, 2073 session->evlist, machine); 2074 if (err) 2075 goto out; 2076 2077 if (rec->opts.full_auxtrace) { 2078 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2079 session, process_synthesized_event); 2080 if (err) 2081 goto out; 2082 } 2083 2084 if (!evlist__exclude_kernel(rec->evlist)) { 2085 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2086 machine); 2087 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2088 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2089 "Check /proc/kallsyms permission or run as root.\n"); 2090 2091 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2092 machine); 2093 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2094 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2095 "Check /proc/modules permission or run as root.\n"); 2096 } 2097 2098 if (perf_guest) { 2099 machines__process_guests(&session->machines, 2100 perf_event__synthesize_guest_os, tool); 2101 } 2102 2103 err = perf_event__synthesize_extra_attr(&rec->tool, 2104 rec->evlist, 2105 process_synthesized_event, 2106 data->is_pipe); 2107 if (err) 2108 goto out; 2109 2110 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2111 process_synthesized_event, 2112 NULL); 2113 if (err < 0) { 2114 pr_err("Couldn't synthesize thread map.\n"); 2115 return err; 2116 } 2117 2118 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2119 process_synthesized_event, NULL); 2120 if (err < 0) { 2121 pr_err("Couldn't synthesize cpu map.\n"); 2122 return err; 2123 } 2124 2125 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2126 machine, opts); 2127 if (err < 0) { 2128 pr_warning("Couldn't synthesize bpf events.\n"); 2129 err = 0; 2130 } 2131 2132 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2133 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2134 machine); 2135 if (err < 0) { 2136 pr_warning("Couldn't synthesize cgroup events.\n"); 2137 err = 0; 2138 } 2139 } 2140 2141 if (rec->opts.nr_threads_synthesize > 1) { 2142 mutex_init(&synth_lock); 2143 perf_set_multithreaded(); 2144 f = process_locked_synthesized_event; 2145 } 2146 2147 if (rec->opts.synth & PERF_SYNTH_TASK) { 2148 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2149 2150 err = __machine__synthesize_threads(machine, tool, &opts->target, 2151 rec->evlist->core.threads, 2152 f, needs_mmap, opts->sample_address, 2153 rec->opts.nr_threads_synthesize); 2154 } 2155 2156 if (rec->opts.nr_threads_synthesize > 1) { 2157 perf_set_singlethreaded(); 2158 mutex_destroy(&synth_lock); 2159 } 2160 2161 out: 2162 return err; 2163 } 2164 2165 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2166 { 2167 struct record *rec = data; 2168 pthread_kill(rec->thread_id, SIGUSR2); 2169 return 0; 2170 } 2171 2172 static int record__setup_sb_evlist(struct record *rec) 2173 { 2174 struct record_opts *opts = &rec->opts; 2175 2176 if (rec->sb_evlist != NULL) { 2177 /* 2178 * We get here if --switch-output-event populated the 2179 * sb_evlist, so associate a callback that will send a SIGUSR2 2180 * to the main thread. 2181 */ 2182 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2183 rec->thread_id = pthread_self(); 2184 } 2185 #ifdef HAVE_LIBBPF_SUPPORT 2186 if (!opts->no_bpf_event) { 2187 if (rec->sb_evlist == NULL) { 2188 rec->sb_evlist = evlist__new(); 2189 2190 if (rec->sb_evlist == NULL) { 2191 pr_err("Couldn't create side band evlist.\n."); 2192 return -1; 2193 } 2194 } 2195 2196 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2197 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2198 return -1; 2199 } 2200 } 2201 #endif 2202 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2203 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2204 opts->no_bpf_event = true; 2205 } 2206 2207 return 0; 2208 } 2209 2210 static int record__init_clock(struct record *rec) 2211 { 2212 struct perf_session *session = rec->session; 2213 struct timespec ref_clockid; 2214 struct timeval ref_tod; 2215 u64 ref; 2216 2217 if (!rec->opts.use_clockid) 2218 return 0; 2219 2220 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2221 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2222 2223 session->header.env.clock.clockid = rec->opts.clockid; 2224 2225 if (gettimeofday(&ref_tod, NULL) != 0) { 2226 pr_err("gettimeofday failed, cannot set reference time.\n"); 2227 return -1; 2228 } 2229 2230 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2231 pr_err("clock_gettime failed, cannot set reference time.\n"); 2232 return -1; 2233 } 2234 2235 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2236 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2237 2238 session->header.env.clock.tod_ns = ref; 2239 2240 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2241 (u64) ref_clockid.tv_nsec; 2242 2243 session->header.env.clock.clockid_ns = ref; 2244 return 0; 2245 } 2246 2247 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2248 { 2249 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2250 trigger_hit(&auxtrace_snapshot_trigger); 2251 auxtrace_record__snapshot_started = 1; 2252 if (auxtrace_record__snapshot_start(rec->itr)) 2253 trigger_error(&auxtrace_snapshot_trigger); 2254 } 2255 } 2256 2257 static int record__terminate_thread(struct record_thread *thread_data) 2258 { 2259 int err; 2260 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2261 pid_t tid = thread_data->tid; 2262 2263 close(thread_data->pipes.msg[1]); 2264 thread_data->pipes.msg[1] = -1; 2265 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2266 if (err > 0) 2267 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2268 else 2269 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2270 thread->tid, tid); 2271 2272 return 0; 2273 } 2274 2275 static int record__start_threads(struct record *rec) 2276 { 2277 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2278 struct record_thread *thread_data = rec->thread_data; 2279 sigset_t full, mask; 2280 pthread_t handle; 2281 pthread_attr_t attrs; 2282 2283 thread = &thread_data[0]; 2284 2285 if (!record__threads_enabled(rec)) 2286 return 0; 2287 2288 sigfillset(&full); 2289 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2290 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2291 return -1; 2292 } 2293 2294 pthread_attr_init(&attrs); 2295 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2296 2297 for (t = 1; t < nr_threads; t++) { 2298 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2299 2300 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2301 pthread_attr_setaffinity_np(&attrs, 2302 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2303 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2304 #endif 2305 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2306 for (tt = 1; tt < t; tt++) 2307 record__terminate_thread(&thread_data[t]); 2308 pr_err("Failed to start threads: %s\n", strerror(errno)); 2309 ret = -1; 2310 goto out_err; 2311 } 2312 2313 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2314 if (err > 0) 2315 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2316 thread_msg_tags[msg]); 2317 else 2318 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2319 thread->tid, rec->thread_data[t].tid); 2320 } 2321 2322 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2323 (cpu_set_t *)thread->mask->affinity.bits); 2324 2325 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2326 2327 out_err: 2328 pthread_attr_destroy(&attrs); 2329 2330 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2331 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2332 ret = -1; 2333 } 2334 2335 return ret; 2336 } 2337 2338 static int record__stop_threads(struct record *rec) 2339 { 2340 int t; 2341 struct record_thread *thread_data = rec->thread_data; 2342 2343 for (t = 1; t < rec->nr_threads; t++) 2344 record__terminate_thread(&thread_data[t]); 2345 2346 for (t = 0; t < rec->nr_threads; t++) { 2347 rec->samples += thread_data[t].samples; 2348 if (!record__threads_enabled(rec)) 2349 continue; 2350 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2351 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2352 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2353 thread_data[t].samples, thread_data[t].waking); 2354 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2355 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2356 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2357 else 2358 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2359 } 2360 2361 return 0; 2362 } 2363 2364 static unsigned long record__waking(struct record *rec) 2365 { 2366 int t; 2367 unsigned long waking = 0; 2368 struct record_thread *thread_data = rec->thread_data; 2369 2370 for (t = 0; t < rec->nr_threads; t++) 2371 waking += thread_data[t].waking; 2372 2373 return waking; 2374 } 2375 2376 static int __cmd_record(struct record *rec, int argc, const char **argv) 2377 { 2378 int err; 2379 int status = 0; 2380 const bool forks = argc > 0; 2381 struct perf_tool *tool = &rec->tool; 2382 struct record_opts *opts = &rec->opts; 2383 struct perf_data *data = &rec->data; 2384 struct perf_session *session; 2385 bool disabled = false, draining = false; 2386 int fd; 2387 float ratio = 0; 2388 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2389 2390 atexit(record__sig_exit); 2391 signal(SIGCHLD, sig_handler); 2392 signal(SIGINT, sig_handler); 2393 signal(SIGTERM, sig_handler); 2394 signal(SIGSEGV, sigsegv_handler); 2395 2396 if (rec->opts.record_cgroup) { 2397 #ifndef HAVE_FILE_HANDLE 2398 pr_err("cgroup tracking is not supported\n"); 2399 return -1; 2400 #endif 2401 } 2402 2403 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2404 signal(SIGUSR2, snapshot_sig_handler); 2405 if (rec->opts.auxtrace_snapshot_mode) 2406 trigger_on(&auxtrace_snapshot_trigger); 2407 if (rec->switch_output.enabled) 2408 trigger_on(&switch_output_trigger); 2409 } else { 2410 signal(SIGUSR2, SIG_IGN); 2411 } 2412 2413 perf_tool__init(tool, /*ordered_events=*/true); 2414 tool->sample = process_sample_event; 2415 tool->fork = perf_event__process_fork; 2416 tool->exit = perf_event__process_exit; 2417 tool->comm = perf_event__process_comm; 2418 tool->namespaces = perf_event__process_namespaces; 2419 tool->mmap = build_id__process_mmap; 2420 tool->mmap2 = build_id__process_mmap2; 2421 tool->itrace_start = process_timestamp_boundary; 2422 tool->aux = process_timestamp_boundary; 2423 tool->namespace_events = rec->opts.record_namespaces; 2424 tool->cgroup_events = rec->opts.record_cgroup; 2425 session = perf_session__new(data, tool); 2426 if (IS_ERR(session)) { 2427 pr_err("Perf session creation failed.\n"); 2428 return PTR_ERR(session); 2429 } 2430 2431 if (record__threads_enabled(rec)) { 2432 if (perf_data__is_pipe(&rec->data)) { 2433 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2434 return -1; 2435 } 2436 if (rec->opts.full_auxtrace) { 2437 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2438 return -1; 2439 } 2440 } 2441 2442 fd = perf_data__fd(data); 2443 rec->session = session; 2444 2445 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2446 pr_err("Compression initialization failed.\n"); 2447 return -1; 2448 } 2449 #ifdef HAVE_EVENTFD_SUPPORT 2450 done_fd = eventfd(0, EFD_NONBLOCK); 2451 if (done_fd < 0) { 2452 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2453 status = -1; 2454 goto out_delete_session; 2455 } 2456 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2457 if (err < 0) { 2458 pr_err("Failed to add wakeup eventfd to poll list\n"); 2459 status = err; 2460 goto out_delete_session; 2461 } 2462 #endif // HAVE_EVENTFD_SUPPORT 2463 2464 session->header.env.comp_type = PERF_COMP_ZSTD; 2465 session->header.env.comp_level = rec->opts.comp_level; 2466 2467 if (rec->opts.kcore && 2468 !record__kcore_readable(&session->machines.host)) { 2469 pr_err("ERROR: kcore is not readable.\n"); 2470 return -1; 2471 } 2472 2473 if (record__init_clock(rec)) 2474 return -1; 2475 2476 record__init_features(rec); 2477 2478 if (forks) { 2479 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2480 workload_exec_failed_signal); 2481 if (err < 0) { 2482 pr_err("Couldn't run the workload!\n"); 2483 status = err; 2484 goto out_delete_session; 2485 } 2486 } 2487 2488 /* 2489 * If we have just single event and are sending data 2490 * through pipe, we need to force the ids allocation, 2491 * because we synthesize event name through the pipe 2492 * and need the id for that. 2493 */ 2494 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2495 rec->opts.sample_id = true; 2496 2497 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2498 rec->timestamp_filename = false; 2499 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2500 } 2501 2502 /* 2503 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE 2504 * and hybrid_merge is false. 2505 */ 2506 evlist__uniquify_evsel_names(rec->evlist, &stat_config); 2507 2508 evlist__config(rec->evlist, opts, &callchain_param); 2509 2510 /* Debug message used by test scripts */ 2511 pr_debug3("perf record opening and mmapping events\n"); 2512 if (record__open(rec) != 0) { 2513 err = -1; 2514 goto out_free_threads; 2515 } 2516 /* Debug message used by test scripts */ 2517 pr_debug3("perf record done opening and mmapping events\n"); 2518 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2519 2520 if (rec->opts.kcore) { 2521 err = record__kcore_copy(&session->machines.host, data); 2522 if (err) { 2523 pr_err("ERROR: Failed to copy kcore\n"); 2524 goto out_free_threads; 2525 } 2526 } 2527 2528 /* 2529 * Normally perf_session__new would do this, but it doesn't have the 2530 * evlist. 2531 */ 2532 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2533 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2534 rec->tool.ordered_events = false; 2535 } 2536 2537 if (evlist__nr_groups(rec->evlist) == 0) 2538 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2539 2540 if (data->is_pipe) { 2541 err = perf_header__write_pipe(fd); 2542 if (err < 0) 2543 goto out_free_threads; 2544 } else { 2545 err = perf_session__write_header(session, rec->evlist, fd, false); 2546 if (err < 0) 2547 goto out_free_threads; 2548 } 2549 2550 err = -1; 2551 if (!rec->no_buildid 2552 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2553 pr_err("Couldn't generate buildids. " 2554 "Use --no-buildid to profile anyway.\n"); 2555 goto out_free_threads; 2556 } 2557 2558 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2559 opts->no_bpf_event = true; 2560 2561 err = record__setup_sb_evlist(rec); 2562 if (err) 2563 goto out_free_threads; 2564 2565 err = record__synthesize(rec, false); 2566 if (err < 0) 2567 goto out_free_threads; 2568 2569 if (rec->realtime_prio) { 2570 struct sched_param param; 2571 2572 param.sched_priority = rec->realtime_prio; 2573 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2574 pr_err("Could not set realtime priority.\n"); 2575 err = -1; 2576 goto out_free_threads; 2577 } 2578 } 2579 2580 if (record__start_threads(rec)) 2581 goto out_free_threads; 2582 2583 /* 2584 * When perf is starting the traced process, all the events 2585 * (apart from group members) have enable_on_exec=1 set, 2586 * so don't spoil it by prematurely enabling them. 2587 */ 2588 if (!target__none(&opts->target) && !opts->target.initial_delay) 2589 evlist__enable(rec->evlist); 2590 2591 /* 2592 * offcpu-time does not call execve, so enable_on_exe wouldn't work 2593 * when recording a workload, do it manually 2594 */ 2595 if (rec->off_cpu) 2596 evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT); 2597 2598 /* 2599 * Let the child rip 2600 */ 2601 if (forks) { 2602 struct machine *machine = &session->machines.host; 2603 union perf_event *event; 2604 pid_t tgid; 2605 2606 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2607 if (event == NULL) { 2608 err = -ENOMEM; 2609 goto out_child; 2610 } 2611 2612 /* 2613 * Some H/W events are generated before COMM event 2614 * which is emitted during exec(), so perf script 2615 * cannot see a correct process name for those events. 2616 * Synthesize COMM event to prevent it. 2617 */ 2618 tgid = perf_event__synthesize_comm(tool, event, 2619 rec->evlist->workload.pid, 2620 process_synthesized_event, 2621 machine); 2622 free(event); 2623 2624 if (tgid == -1) 2625 goto out_child; 2626 2627 event = malloc(sizeof(event->namespaces) + 2628 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2629 machine->id_hdr_size); 2630 if (event == NULL) { 2631 err = -ENOMEM; 2632 goto out_child; 2633 } 2634 2635 /* 2636 * Synthesize NAMESPACES event for the command specified. 2637 */ 2638 perf_event__synthesize_namespaces(tool, event, 2639 rec->evlist->workload.pid, 2640 tgid, process_synthesized_event, 2641 machine); 2642 free(event); 2643 2644 evlist__start_workload(rec->evlist); 2645 } 2646 2647 if (opts->target.initial_delay) { 2648 pr_info(EVLIST_DISABLED_MSG); 2649 if (opts->target.initial_delay > 0) { 2650 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2651 evlist__enable(rec->evlist); 2652 pr_info(EVLIST_ENABLED_MSG); 2653 } 2654 } 2655 2656 err = event_enable_timer__start(rec->evlist->eet); 2657 if (err) 2658 goto out_child; 2659 2660 /* Debug message used by test scripts */ 2661 pr_debug3("perf record has started\n"); 2662 fflush(stderr); 2663 2664 trigger_ready(&auxtrace_snapshot_trigger); 2665 trigger_ready(&switch_output_trigger); 2666 perf_hooks__invoke_record_start(); 2667 2668 /* 2669 * Must write FINISHED_INIT so it will be seen after all other 2670 * synthesized user events, but before any regular events. 2671 */ 2672 err = write_finished_init(rec, false); 2673 if (err < 0) 2674 goto out_child; 2675 2676 for (;;) { 2677 unsigned long long hits = thread->samples; 2678 2679 /* 2680 * rec->evlist->bkw_mmap_state is possible to be 2681 * BKW_MMAP_EMPTY here: when done == true and 2682 * hits != rec->samples in previous round. 2683 * 2684 * evlist__toggle_bkw_mmap ensure we never 2685 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2686 */ 2687 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2688 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2689 2690 if (record__mmap_read_all(rec, false) < 0) { 2691 trigger_error(&auxtrace_snapshot_trigger); 2692 trigger_error(&switch_output_trigger); 2693 err = -1; 2694 goto out_child; 2695 } 2696 2697 if (auxtrace_record__snapshot_started) { 2698 auxtrace_record__snapshot_started = 0; 2699 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2700 record__read_auxtrace_snapshot(rec, false); 2701 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2702 pr_err("AUX area tracing snapshot failed\n"); 2703 err = -1; 2704 goto out_child; 2705 } 2706 } 2707 2708 if (trigger_is_hit(&switch_output_trigger)) { 2709 /* 2710 * If switch_output_trigger is hit, the data in 2711 * overwritable ring buffer should have been collected, 2712 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2713 * 2714 * If SIGUSR2 raise after or during record__mmap_read_all(), 2715 * record__mmap_read_all() didn't collect data from 2716 * overwritable ring buffer. Read again. 2717 */ 2718 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2719 continue; 2720 trigger_ready(&switch_output_trigger); 2721 2722 /* 2723 * Reenable events in overwrite ring buffer after 2724 * record__mmap_read_all(): we should have collected 2725 * data from it. 2726 */ 2727 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2728 2729 if (!quiet) 2730 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2731 record__waking(rec)); 2732 thread->waking = 0; 2733 fd = record__switch_output(rec, false); 2734 if (fd < 0) { 2735 pr_err("Failed to switch to new file\n"); 2736 trigger_error(&switch_output_trigger); 2737 err = fd; 2738 goto out_child; 2739 } 2740 2741 /* re-arm the alarm */ 2742 if (rec->switch_output.time) 2743 alarm(rec->switch_output.time); 2744 } 2745 2746 if (hits == thread->samples) { 2747 if (done || draining) 2748 break; 2749 err = fdarray__poll(&thread->pollfd, -1); 2750 /* 2751 * Propagate error, only if there's any. Ignore positive 2752 * number of returned events and interrupt error. 2753 */ 2754 if (err > 0 || (err < 0 && errno == EINTR)) 2755 err = 0; 2756 thread->waking++; 2757 2758 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2759 record__thread_munmap_filtered, NULL) == 0) 2760 draining = true; 2761 2762 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2763 if (err) 2764 goto out_child; 2765 } 2766 2767 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2768 switch (cmd) { 2769 case EVLIST_CTL_CMD_SNAPSHOT: 2770 hit_auxtrace_snapshot_trigger(rec); 2771 evlist__ctlfd_ack(rec->evlist); 2772 break; 2773 case EVLIST_CTL_CMD_STOP: 2774 done = 1; 2775 break; 2776 case EVLIST_CTL_CMD_ACK: 2777 case EVLIST_CTL_CMD_UNSUPPORTED: 2778 case EVLIST_CTL_CMD_ENABLE: 2779 case EVLIST_CTL_CMD_DISABLE: 2780 case EVLIST_CTL_CMD_EVLIST: 2781 case EVLIST_CTL_CMD_PING: 2782 default: 2783 break; 2784 } 2785 } 2786 2787 err = event_enable_timer__process(rec->evlist->eet); 2788 if (err < 0) 2789 goto out_child; 2790 if (err) { 2791 err = 0; 2792 done = 1; 2793 } 2794 2795 /* 2796 * When perf is starting the traced process, at the end events 2797 * die with the process and we wait for that. Thus no need to 2798 * disable events in this case. 2799 */ 2800 if (done && !disabled && !target__none(&opts->target)) { 2801 trigger_off(&auxtrace_snapshot_trigger); 2802 evlist__disable(rec->evlist); 2803 disabled = true; 2804 } 2805 } 2806 2807 trigger_off(&auxtrace_snapshot_trigger); 2808 trigger_off(&switch_output_trigger); 2809 2810 if (opts->auxtrace_snapshot_on_exit) 2811 record__auxtrace_snapshot_exit(rec); 2812 2813 if (forks && workload_exec_errno) { 2814 char msg[STRERR_BUFSIZE]; 2815 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2816 struct strbuf sb = STRBUF_INIT; 2817 2818 evlist__format_evsels(rec->evlist, &sb, 2048); 2819 2820 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2821 sb.buf, argv[0], emsg); 2822 strbuf_release(&sb); 2823 err = -1; 2824 goto out_child; 2825 } 2826 2827 if (!quiet) 2828 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2829 record__waking(rec)); 2830 2831 write_finished_init(rec, true); 2832 2833 if (target__none(&rec->opts.target)) 2834 record__synthesize_workload(rec, true); 2835 2836 out_child: 2837 record__stop_threads(rec); 2838 record__mmap_read_all(rec, true); 2839 out_free_threads: 2840 record__free_thread_data(rec); 2841 evlist__finalize_ctlfd(rec->evlist); 2842 record__aio_mmap_read_sync(rec); 2843 2844 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2845 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2846 session->header.env.comp_ratio = ratio + 0.5; 2847 } 2848 2849 if (forks) { 2850 int exit_status; 2851 2852 if (!child_finished) 2853 kill(rec->evlist->workload.pid, SIGTERM); 2854 2855 wait(&exit_status); 2856 2857 if (err < 0) 2858 status = err; 2859 else if (WIFEXITED(exit_status)) 2860 status = WEXITSTATUS(exit_status); 2861 else if (WIFSIGNALED(exit_status)) 2862 signr = WTERMSIG(exit_status); 2863 } else 2864 status = err; 2865 2866 if (rec->off_cpu) 2867 rec->bytes_written += off_cpu_write(rec->session); 2868 2869 record__read_lost_samples(rec); 2870 record__synthesize(rec, true); 2871 /* this will be recalculated during process_buildids() */ 2872 rec->samples = 0; 2873 2874 if (!err) { 2875 if (!rec->timestamp_filename) { 2876 record__finish_output(rec); 2877 } else { 2878 fd = record__switch_output(rec, true); 2879 if (fd < 0) { 2880 status = fd; 2881 goto out_delete_session; 2882 } 2883 } 2884 } 2885 2886 perf_hooks__invoke_record_end(); 2887 2888 if (!err && !quiet) { 2889 char samples[128]; 2890 const char *postfix = rec->timestamp_filename ? 2891 ".<timestamp>" : ""; 2892 2893 if (rec->samples && !rec->opts.full_auxtrace) 2894 scnprintf(samples, sizeof(samples), 2895 " (%" PRIu64 " samples)", rec->samples); 2896 else 2897 samples[0] = '\0'; 2898 2899 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2900 perf_data__size(data) / 1024.0 / 1024.0, 2901 data->path, postfix, samples); 2902 if (ratio) { 2903 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2904 rec->session->bytes_transferred / 1024.0 / 1024.0, 2905 ratio); 2906 } 2907 fprintf(stderr, " ]\n"); 2908 } 2909 2910 out_delete_session: 2911 #ifdef HAVE_EVENTFD_SUPPORT 2912 if (done_fd >= 0) { 2913 fd = done_fd; 2914 done_fd = -1; 2915 2916 close(fd); 2917 } 2918 #endif 2919 zstd_fini(&session->zstd_data); 2920 if (!opts->no_bpf_event) 2921 evlist__stop_sb_thread(rec->sb_evlist); 2922 2923 perf_session__delete(session); 2924 return status; 2925 } 2926 2927 static void callchain_debug(struct callchain_param *callchain) 2928 { 2929 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2930 2931 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2932 2933 if (callchain->record_mode == CALLCHAIN_DWARF) 2934 pr_debug("callchain: stack dump size %d\n", 2935 callchain->dump_size); 2936 } 2937 2938 int record_opts__parse_callchain(struct record_opts *record, 2939 struct callchain_param *callchain, 2940 const char *arg, bool unset) 2941 { 2942 int ret; 2943 callchain->enabled = !unset; 2944 2945 /* --no-call-graph */ 2946 if (unset) { 2947 callchain->record_mode = CALLCHAIN_NONE; 2948 pr_debug("callchain: disabled\n"); 2949 return 0; 2950 } 2951 2952 ret = parse_callchain_record_opt(arg, callchain); 2953 if (!ret) { 2954 /* Enable data address sampling for DWARF unwind. */ 2955 if (callchain->record_mode == CALLCHAIN_DWARF) 2956 record->sample_address = true; 2957 callchain_debug(callchain); 2958 } 2959 2960 return ret; 2961 } 2962 2963 int record_parse_callchain_opt(const struct option *opt, 2964 const char *arg, 2965 int unset) 2966 { 2967 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2968 } 2969 2970 int record_callchain_opt(const struct option *opt, 2971 const char *arg __maybe_unused, 2972 int unset __maybe_unused) 2973 { 2974 struct callchain_param *callchain = opt->value; 2975 2976 callchain->enabled = true; 2977 2978 if (callchain->record_mode == CALLCHAIN_NONE) 2979 callchain->record_mode = CALLCHAIN_FP; 2980 2981 callchain_debug(callchain); 2982 return 0; 2983 } 2984 2985 static int perf_record_config(const char *var, const char *value, void *cb) 2986 { 2987 struct record *rec = cb; 2988 2989 if (!strcmp(var, "record.build-id")) { 2990 if (!strcmp(value, "cache")) 2991 rec->no_buildid_cache = false; 2992 else if (!strcmp(value, "no-cache")) 2993 rec->no_buildid_cache = true; 2994 else if (!strcmp(value, "skip")) 2995 rec->no_buildid = true; 2996 else if (!strcmp(value, "mmap")) 2997 rec->buildid_mmap = true; 2998 else 2999 return -1; 3000 return 0; 3001 } 3002 if (!strcmp(var, "record.call-graph")) { 3003 var = "call-graph.record-mode"; 3004 return perf_default_config(var, value, cb); 3005 } 3006 #ifdef HAVE_AIO_SUPPORT 3007 if (!strcmp(var, "record.aio")) { 3008 rec->opts.nr_cblocks = strtol(value, NULL, 0); 3009 if (!rec->opts.nr_cblocks) 3010 rec->opts.nr_cblocks = nr_cblocks_default; 3011 } 3012 #endif 3013 if (!strcmp(var, "record.debuginfod")) { 3014 rec->debuginfod.urls = strdup(value); 3015 if (!rec->debuginfod.urls) 3016 return -ENOMEM; 3017 rec->debuginfod.set = true; 3018 } 3019 3020 return 0; 3021 } 3022 3023 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 3024 { 3025 struct record *rec = (struct record *)opt->value; 3026 3027 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 3028 } 3029 3030 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3031 { 3032 struct record_opts *opts = (struct record_opts *)opt->value; 3033 3034 if (unset || !str) 3035 return 0; 3036 3037 if (!strcasecmp(str, "node")) 3038 opts->affinity = PERF_AFFINITY_NODE; 3039 else if (!strcasecmp(str, "cpu")) 3040 opts->affinity = PERF_AFFINITY_CPU; 3041 3042 return 0; 3043 } 3044 3045 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3046 { 3047 mask->nbits = nr_bits; 3048 mask->bits = bitmap_zalloc(mask->nbits); 3049 if (!mask->bits) 3050 return -ENOMEM; 3051 3052 return 0; 3053 } 3054 3055 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3056 { 3057 bitmap_free(mask->bits); 3058 mask->nbits = 0; 3059 } 3060 3061 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3062 { 3063 int ret; 3064 3065 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3066 if (ret) { 3067 mask->affinity.bits = NULL; 3068 return ret; 3069 } 3070 3071 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3072 if (ret) { 3073 record__mmap_cpu_mask_free(&mask->maps); 3074 mask->maps.bits = NULL; 3075 } 3076 3077 return ret; 3078 } 3079 3080 static void record__thread_mask_free(struct thread_mask *mask) 3081 { 3082 record__mmap_cpu_mask_free(&mask->maps); 3083 record__mmap_cpu_mask_free(&mask->affinity); 3084 } 3085 3086 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3087 { 3088 int s; 3089 struct record_opts *opts = opt->value; 3090 3091 if (unset || !str || !strlen(str)) { 3092 opts->threads_spec = THREAD_SPEC__CPU; 3093 } else { 3094 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3095 if (s == THREAD_SPEC__USER) { 3096 opts->threads_user_spec = strdup(str); 3097 if (!opts->threads_user_spec) 3098 return -ENOMEM; 3099 opts->threads_spec = THREAD_SPEC__USER; 3100 break; 3101 } 3102 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3103 opts->threads_spec = s; 3104 break; 3105 } 3106 } 3107 } 3108 3109 if (opts->threads_spec == THREAD_SPEC__USER) 3110 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3111 else 3112 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3113 3114 return 0; 3115 } 3116 3117 static int parse_output_max_size(const struct option *opt, 3118 const char *str, int unset) 3119 { 3120 unsigned long *s = (unsigned long *)opt->value; 3121 static struct parse_tag tags_size[] = { 3122 { .tag = 'B', .mult = 1 }, 3123 { .tag = 'K', .mult = 1 << 10 }, 3124 { .tag = 'M', .mult = 1 << 20 }, 3125 { .tag = 'G', .mult = 1 << 30 }, 3126 { .tag = 0 }, 3127 }; 3128 unsigned long val; 3129 3130 if (unset) { 3131 *s = 0; 3132 return 0; 3133 } 3134 3135 val = parse_tag_value(str, tags_size); 3136 if (val != (unsigned long) -1) { 3137 *s = val; 3138 return 0; 3139 } 3140 3141 return -1; 3142 } 3143 3144 static int record__parse_mmap_pages(const struct option *opt, 3145 const char *str, 3146 int unset __maybe_unused) 3147 { 3148 struct record_opts *opts = opt->value; 3149 char *s, *p; 3150 unsigned int mmap_pages; 3151 int ret; 3152 3153 if (!str) 3154 return -EINVAL; 3155 3156 s = strdup(str); 3157 if (!s) 3158 return -ENOMEM; 3159 3160 p = strchr(s, ','); 3161 if (p) 3162 *p = '\0'; 3163 3164 if (*s) { 3165 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3166 if (ret) 3167 goto out_free; 3168 opts->mmap_pages = mmap_pages; 3169 } 3170 3171 if (!p) { 3172 ret = 0; 3173 goto out_free; 3174 } 3175 3176 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3177 if (ret) 3178 goto out_free; 3179 3180 opts->auxtrace_mmap_pages = mmap_pages; 3181 3182 out_free: 3183 free(s); 3184 return ret; 3185 } 3186 3187 static int record__parse_off_cpu_thresh(const struct option *opt, 3188 const char *str, 3189 int unset __maybe_unused) 3190 { 3191 struct record_opts *opts = opt->value; 3192 char *endptr; 3193 u64 off_cpu_thresh_ms; 3194 3195 if (!str) 3196 return -EINVAL; 3197 3198 off_cpu_thresh_ms = strtoull(str, &endptr, 10); 3199 3200 /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */ 3201 if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0"))) 3202 return -EINVAL; 3203 else 3204 opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC; 3205 3206 return 0; 3207 } 3208 3209 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3210 { 3211 } 3212 3213 static int parse_control_option(const struct option *opt, 3214 const char *str, 3215 int unset __maybe_unused) 3216 { 3217 struct record_opts *opts = opt->value; 3218 3219 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3220 } 3221 3222 static void switch_output_size_warn(struct record *rec) 3223 { 3224 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3225 struct switch_output *s = &rec->switch_output; 3226 3227 wakeup_size /= 2; 3228 3229 if (s->size < wakeup_size) { 3230 char buf[100]; 3231 3232 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3233 pr_warning("WARNING: switch-output data size lower than " 3234 "wakeup kernel buffer size (%s) " 3235 "expect bigger perf.data sizes\n", buf); 3236 } 3237 } 3238 3239 static int switch_output_setup(struct record *rec) 3240 { 3241 struct switch_output *s = &rec->switch_output; 3242 static struct parse_tag tags_size[] = { 3243 { .tag = 'B', .mult = 1 }, 3244 { .tag = 'K', .mult = 1 << 10 }, 3245 { .tag = 'M', .mult = 1 << 20 }, 3246 { .tag = 'G', .mult = 1 << 30 }, 3247 { .tag = 0 }, 3248 }; 3249 static struct parse_tag tags_time[] = { 3250 { .tag = 's', .mult = 1 }, 3251 { .tag = 'm', .mult = 60 }, 3252 { .tag = 'h', .mult = 60*60 }, 3253 { .tag = 'd', .mult = 60*60*24 }, 3254 { .tag = 0 }, 3255 }; 3256 unsigned long val; 3257 3258 /* 3259 * If we're using --switch-output-events, then we imply its 3260 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3261 * thread to its parent. 3262 */ 3263 if (rec->switch_output_event_set) { 3264 if (record__threads_enabled(rec)) { 3265 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3266 return 0; 3267 } 3268 goto do_signal; 3269 } 3270 3271 if (!s->set) 3272 return 0; 3273 3274 if (record__threads_enabled(rec)) { 3275 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3276 return 0; 3277 } 3278 3279 if (!strcmp(s->str, "signal")) { 3280 do_signal: 3281 s->signal = true; 3282 pr_debug("switch-output with SIGUSR2 signal\n"); 3283 goto enabled; 3284 } 3285 3286 val = parse_tag_value(s->str, tags_size); 3287 if (val != (unsigned long) -1) { 3288 s->size = val; 3289 pr_debug("switch-output with %s size threshold\n", s->str); 3290 goto enabled; 3291 } 3292 3293 val = parse_tag_value(s->str, tags_time); 3294 if (val != (unsigned long) -1) { 3295 s->time = val; 3296 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3297 s->str, s->time); 3298 goto enabled; 3299 } 3300 3301 return -1; 3302 3303 enabled: 3304 rec->timestamp_filename = true; 3305 s->enabled = true; 3306 3307 if (s->size && !rec->opts.no_buffering) 3308 switch_output_size_warn(rec); 3309 3310 return 0; 3311 } 3312 3313 static const char * const __record_usage[] = { 3314 "perf record [<options>] [<command>]", 3315 "perf record [<options>] -- <command> [<options>]", 3316 NULL 3317 }; 3318 const char * const *record_usage = __record_usage; 3319 3320 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3321 struct perf_sample *sample, struct machine *machine) 3322 { 3323 /* 3324 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3325 * no need to add them twice. 3326 */ 3327 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3328 return 0; 3329 return perf_event__process_mmap(tool, event, sample, machine); 3330 } 3331 3332 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3333 struct perf_sample *sample, struct machine *machine) 3334 { 3335 /* 3336 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3337 * no need to add them twice. 3338 */ 3339 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3340 return 0; 3341 3342 return perf_event__process_mmap2(tool, event, sample, machine); 3343 } 3344 3345 static int process_timestamp_boundary(const struct perf_tool *tool, 3346 union perf_event *event __maybe_unused, 3347 struct perf_sample *sample, 3348 struct machine *machine __maybe_unused) 3349 { 3350 struct record *rec = container_of(tool, struct record, tool); 3351 3352 set_timestamp_boundary(rec, sample->time); 3353 return 0; 3354 } 3355 3356 static int parse_record_synth_option(const struct option *opt, 3357 const char *str, 3358 int unset __maybe_unused) 3359 { 3360 struct record_opts *opts = opt->value; 3361 char *p = strdup(str); 3362 3363 if (p == NULL) 3364 return -1; 3365 3366 opts->synth = parse_synth_opt(p); 3367 free(p); 3368 3369 if (opts->synth < 0) { 3370 pr_err("Invalid synth option: %s\n", str); 3371 return -1; 3372 } 3373 return 0; 3374 } 3375 3376 /* 3377 * XXX Ideally would be local to cmd_record() and passed to a record__new 3378 * because we need to have access to it in record__exit, that is called 3379 * after cmd_record() exits, but since record_options need to be accessible to 3380 * builtin-script, leave it here. 3381 * 3382 * At least we don't ouch it in all the other functions here directly. 3383 * 3384 * Just say no to tons of global variables, sigh. 3385 */ 3386 static struct record record = { 3387 .opts = { 3388 .sample_time = true, 3389 .mmap_pages = UINT_MAX, 3390 .user_freq = UINT_MAX, 3391 .user_interval = ULLONG_MAX, 3392 .freq = 4000, 3393 .target = { 3394 .uses_mmap = true, 3395 .default_per_cpu = true, 3396 }, 3397 .mmap_flush = MMAP_FLUSH_DEFAULT, 3398 .nr_threads_synthesize = 1, 3399 .ctl_fd = -1, 3400 .ctl_fd_ack = -1, 3401 .synth = PERF_SYNTH_ALL, 3402 .off_cpu_thresh_ns = OFFCPU_THRESH, 3403 }, 3404 }; 3405 3406 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3407 "\n\t\t\t\tDefault: fp"; 3408 3409 static bool dry_run; 3410 3411 static struct parse_events_option_args parse_events_option_args = { 3412 .evlistp = &record.evlist, 3413 }; 3414 3415 static struct parse_events_option_args switch_output_parse_events_option_args = { 3416 .evlistp = &record.sb_evlist, 3417 }; 3418 3419 /* 3420 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3421 * with it and switch to use the library functions in perf_evlist that came 3422 * from builtin-record.c, i.e. use record_opts, 3423 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3424 * using pipes, etc. 3425 */ 3426 static struct option __record_options[] = { 3427 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3428 "event selector. use 'perf list' to list available events", 3429 parse_events_option), 3430 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3431 "event filter", parse_filter), 3432 OPT_BOOLEAN(0, "latency", &record.latency, 3433 "Enable data collection for latency profiling.\n" 3434 "\t\t\t Use perf report --latency for latency-centric profile."), 3435 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3436 NULL, "don't record events from perf itself", 3437 exclude_perf), 3438 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3439 "record events on existing process id"), 3440 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3441 "record events on existing thread id"), 3442 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3443 "collect data with this RT SCHED_FIFO priority"), 3444 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3445 "collect data without buffering"), 3446 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3447 "collect raw sample records from all opened counters"), 3448 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3449 "system-wide collection from all CPUs"), 3450 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3451 "list of cpus to monitor"), 3452 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3453 OPT_STRING('o', "output", &record.data.path, "file", 3454 "output file name"), 3455 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3456 &record.opts.no_inherit_set, 3457 "child tasks do not inherit counters"), 3458 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3459 "synthesize non-sample events at the end of output"), 3460 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3461 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3462 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3463 "Fail if the specified frequency can't be used"), 3464 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3465 "profile at this frequency", 3466 record__parse_freq), 3467 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3468 "number of mmap data pages and AUX area tracing mmap pages", 3469 record__parse_mmap_pages), 3470 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3471 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3472 record__mmap_flush_parse), 3473 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3474 NULL, "enables call-graph recording" , 3475 &record_callchain_opt), 3476 OPT_CALLBACK(0, "call-graph", &record.opts, 3477 "record_mode[,record_size]", record_callchain_help, 3478 &record_parse_callchain_opt), 3479 OPT_INCR('v', "verbose", &verbose, 3480 "be more verbose (show counter open errors, etc)"), 3481 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3482 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3483 "per thread counts"), 3484 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3485 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3486 "Record the sample physical addresses"), 3487 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3488 "Record the sampled data address data page size"), 3489 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3490 "Record the sampled code address (ip) page size"), 3491 OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, 3492 "Record the data source for memory operations"), 3493 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3494 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3495 "Record the sample identifier"), 3496 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3497 &record.opts.sample_time_set, 3498 "Record the sample timestamps"), 3499 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3500 "Record the sample period"), 3501 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3502 "don't sample"), 3503 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3504 &record.no_buildid_cache_set, 3505 "do not update the buildid cache"), 3506 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3507 &record.no_buildid_set, 3508 "do not collect buildids in perf.data"), 3509 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3510 "monitor event in cgroup name only", 3511 parse_cgroups), 3512 OPT_CALLBACK('D', "delay", &record, "ms", 3513 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3514 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3515 record__parse_event_enable_time), 3516 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3517 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), 3518 3519 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3520 "branch any", "sample any taken branches", 3521 parse_branch_stack), 3522 3523 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3524 "branch filter mask", "branch stack filter modes", 3525 parse_branch_stack), 3526 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3527 "sample by weight (on special events only)"), 3528 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3529 "sample transaction flags (special events only)"), 3530 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3531 "use per-thread mmaps"), 3532 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3533 "sample selected machine registers on interrupt," 3534 " use '-I?' to list register names", parse_intr_regs), 3535 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3536 "sample selected machine registers in user space," 3537 " use '--user-regs=?' to list register names", parse_user_regs), 3538 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3539 "Record running/enabled time of read (:S) events"), 3540 OPT_CALLBACK('k', "clockid", &record.opts, 3541 "clockid", "clockid to use for events, see clock_gettime()", 3542 parse_clockid), 3543 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3544 "opts", "AUX area tracing Snapshot Mode", ""), 3545 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3546 "opts", "sample AUX area", ""), 3547 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3548 "per thread proc mmap processing timeout in ms"), 3549 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3550 "Record namespaces events"), 3551 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3552 "Record cgroup events"), 3553 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3554 &record.opts.record_switch_events_set, 3555 "Record context switch events"), 3556 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3557 "Configure all used events to run in kernel space.", 3558 PARSE_OPT_EXCLUSIVE), 3559 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3560 "Configure all used events to run in user space.", 3561 PARSE_OPT_EXCLUSIVE), 3562 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3563 "collect kernel callchains"), 3564 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3565 "collect user callchains"), 3566 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3567 "file", "vmlinux pathname"), 3568 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3569 "Record build-id of all DSOs regardless of hits"), 3570 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3571 "Record build-id in map events"), 3572 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3573 "append timestamp to output filename"), 3574 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3575 "Record timestamp boundary (time of first/last samples)"), 3576 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3577 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3578 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3579 "signal"), 3580 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3581 &record.switch_output_event_set, "switch output event", 3582 "switch output event selector. use 'perf list' to list available events", 3583 parse_events_option_new_evlist), 3584 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3585 "Limit number of switch output generated files"), 3586 OPT_BOOLEAN(0, "dry-run", &dry_run, 3587 "Parse options then exit"), 3588 #ifdef HAVE_AIO_SUPPORT 3589 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3590 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3591 record__aio_parse), 3592 #endif 3593 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3594 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3595 record__parse_affinity), 3596 #ifdef HAVE_ZSTD_SUPPORT 3597 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3598 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3599 record__parse_comp_level), 3600 #endif 3601 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3602 "size", "Limit the maximum size of the output file", parse_output_max_size), 3603 OPT_UINTEGER(0, "num-thread-synthesize", 3604 &record.opts.nr_threads_synthesize, 3605 "number of threads to run for event synthesis"), 3606 #ifdef HAVE_LIBPFM 3607 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3608 "libpfm4 event selector. use 'perf list' to list available events", 3609 parse_libpfm_events_option), 3610 #endif 3611 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3612 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3613 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3614 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3615 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3616 parse_control_option), 3617 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3618 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3619 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3620 &record.debuginfod.set, "debuginfod urls", 3621 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3622 "system"), 3623 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3624 "write collected trace data into several data files using parallel threads", 3625 record__parse_threads), 3626 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3627 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3628 "BPF filter action"), 3629 OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", 3630 "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", 3631 record__parse_off_cpu_thresh), 3632 OPT_END() 3633 }; 3634 3635 struct option *record_options = __record_options; 3636 3637 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3638 { 3639 struct perf_cpu cpu; 3640 int idx; 3641 3642 if (cpu_map__is_dummy(cpus)) 3643 return 0; 3644 3645 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3646 /* Return ENODEV is input cpu is greater than max cpu */ 3647 if ((unsigned long)cpu.cpu > mask->nbits) 3648 return -ENODEV; 3649 __set_bit(cpu.cpu, mask->bits); 3650 } 3651 3652 return 0; 3653 } 3654 3655 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3656 { 3657 struct perf_cpu_map *cpus; 3658 3659 cpus = perf_cpu_map__new(mask_spec); 3660 if (!cpus) 3661 return -ENOMEM; 3662 3663 bitmap_zero(mask->bits, mask->nbits); 3664 if (record__mmap_cpu_mask_init(mask, cpus)) 3665 return -ENODEV; 3666 3667 perf_cpu_map__put(cpus); 3668 3669 return 0; 3670 } 3671 3672 static void record__free_thread_masks(struct record *rec, int nr_threads) 3673 { 3674 int t; 3675 3676 if (rec->thread_masks) 3677 for (t = 0; t < nr_threads; t++) 3678 record__thread_mask_free(&rec->thread_masks[t]); 3679 3680 zfree(&rec->thread_masks); 3681 } 3682 3683 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3684 { 3685 int t, ret; 3686 3687 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3688 if (!rec->thread_masks) { 3689 pr_err("Failed to allocate thread masks\n"); 3690 return -ENOMEM; 3691 } 3692 3693 for (t = 0; t < nr_threads; t++) { 3694 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3695 if (ret) { 3696 pr_err("Failed to allocate thread masks[%d]\n", t); 3697 goto out_free; 3698 } 3699 } 3700 3701 return 0; 3702 3703 out_free: 3704 record__free_thread_masks(rec, nr_threads); 3705 3706 return ret; 3707 } 3708 3709 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3710 { 3711 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3712 3713 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3714 if (ret) 3715 return ret; 3716 3717 rec->nr_threads = nr_cpus; 3718 pr_debug("nr_threads: %d\n", rec->nr_threads); 3719 3720 for (t = 0; t < rec->nr_threads; t++) { 3721 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3722 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3723 if (verbose > 0) { 3724 pr_debug("thread_masks[%d]: ", t); 3725 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3726 pr_debug("thread_masks[%d]: ", t); 3727 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3728 } 3729 } 3730 3731 return 0; 3732 } 3733 3734 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3735 const char **maps_spec, const char **affinity_spec, 3736 u32 nr_spec) 3737 { 3738 u32 s; 3739 int ret = 0, t = 0; 3740 struct mmap_cpu_mask cpus_mask; 3741 struct thread_mask thread_mask, full_mask, *thread_masks; 3742 3743 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3744 if (ret) { 3745 pr_err("Failed to allocate CPUs mask\n"); 3746 return ret; 3747 } 3748 3749 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3750 if (ret) { 3751 pr_err("Failed to init cpu mask\n"); 3752 goto out_free_cpu_mask; 3753 } 3754 3755 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3756 if (ret) { 3757 pr_err("Failed to allocate full mask\n"); 3758 goto out_free_cpu_mask; 3759 } 3760 3761 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3762 if (ret) { 3763 pr_err("Failed to allocate thread mask\n"); 3764 goto out_free_full_and_cpu_masks; 3765 } 3766 3767 for (s = 0; s < nr_spec; s++) { 3768 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3769 if (ret) { 3770 pr_err("Failed to initialize maps thread mask\n"); 3771 goto out_free; 3772 } 3773 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3774 if (ret) { 3775 pr_err("Failed to initialize affinity thread mask\n"); 3776 goto out_free; 3777 } 3778 3779 /* ignore invalid CPUs but do not allow empty masks */ 3780 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3781 cpus_mask.bits, thread_mask.maps.nbits)) { 3782 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3783 ret = -EINVAL; 3784 goto out_free; 3785 } 3786 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3787 cpus_mask.bits, thread_mask.affinity.nbits)) { 3788 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3789 ret = -EINVAL; 3790 goto out_free; 3791 } 3792 3793 /* do not allow intersection with other masks (full_mask) */ 3794 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3795 thread_mask.maps.nbits)) { 3796 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3797 ret = -EINVAL; 3798 goto out_free; 3799 } 3800 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3801 thread_mask.affinity.nbits)) { 3802 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3803 ret = -EINVAL; 3804 goto out_free; 3805 } 3806 3807 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3808 thread_mask.maps.bits, full_mask.maps.nbits); 3809 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3810 thread_mask.affinity.bits, full_mask.maps.nbits); 3811 3812 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3813 if (!thread_masks) { 3814 pr_err("Failed to reallocate thread masks\n"); 3815 ret = -ENOMEM; 3816 goto out_free; 3817 } 3818 rec->thread_masks = thread_masks; 3819 rec->thread_masks[t] = thread_mask; 3820 if (verbose > 0) { 3821 pr_debug("thread_masks[%d]: ", t); 3822 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3823 pr_debug("thread_masks[%d]: ", t); 3824 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3825 } 3826 t++; 3827 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3828 if (ret) { 3829 pr_err("Failed to allocate thread mask\n"); 3830 goto out_free_full_and_cpu_masks; 3831 } 3832 } 3833 rec->nr_threads = t; 3834 pr_debug("nr_threads: %d\n", rec->nr_threads); 3835 if (!rec->nr_threads) 3836 ret = -EINVAL; 3837 3838 out_free: 3839 record__thread_mask_free(&thread_mask); 3840 out_free_full_and_cpu_masks: 3841 record__thread_mask_free(&full_mask); 3842 out_free_cpu_mask: 3843 record__mmap_cpu_mask_free(&cpus_mask); 3844 3845 return ret; 3846 } 3847 3848 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3849 { 3850 int ret; 3851 struct cpu_topology *topo; 3852 3853 topo = cpu_topology__new(); 3854 if (!topo) { 3855 pr_err("Failed to allocate CPU topology\n"); 3856 return -ENOMEM; 3857 } 3858 3859 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3860 topo->core_cpus_list, topo->core_cpus_lists); 3861 cpu_topology__delete(topo); 3862 3863 return ret; 3864 } 3865 3866 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3867 { 3868 int ret; 3869 struct cpu_topology *topo; 3870 3871 topo = cpu_topology__new(); 3872 if (!topo) { 3873 pr_err("Failed to allocate CPU topology\n"); 3874 return -ENOMEM; 3875 } 3876 3877 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3878 topo->package_cpus_list, topo->package_cpus_lists); 3879 cpu_topology__delete(topo); 3880 3881 return ret; 3882 } 3883 3884 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3885 { 3886 u32 s; 3887 int ret; 3888 const char **spec; 3889 struct numa_topology *topo; 3890 3891 topo = numa_topology__new(); 3892 if (!topo) { 3893 pr_err("Failed to allocate NUMA topology\n"); 3894 return -ENOMEM; 3895 } 3896 3897 spec = zalloc(topo->nr * sizeof(char *)); 3898 if (!spec) { 3899 pr_err("Failed to allocate NUMA spec\n"); 3900 ret = -ENOMEM; 3901 goto out_delete_topo; 3902 } 3903 for (s = 0; s < topo->nr; s++) 3904 spec[s] = topo->nodes[s].cpus; 3905 3906 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3907 3908 zfree(&spec); 3909 3910 out_delete_topo: 3911 numa_topology__delete(topo); 3912 3913 return ret; 3914 } 3915 3916 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3917 { 3918 int t, ret; 3919 u32 s, nr_spec = 0; 3920 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3921 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3922 3923 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3924 spec = strtok_r(user_spec, ":", &spec_ptr); 3925 if (spec == NULL) 3926 break; 3927 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3928 mask = strtok_r(spec, "/", &mask_ptr); 3929 if (mask == NULL) 3930 break; 3931 pr_debug2(" maps mask: %s\n", mask); 3932 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3933 if (!tmp_spec) { 3934 pr_err("Failed to reallocate maps spec\n"); 3935 ret = -ENOMEM; 3936 goto out_free; 3937 } 3938 maps_spec = tmp_spec; 3939 maps_spec[nr_spec] = dup_mask = strdup(mask); 3940 if (!maps_spec[nr_spec]) { 3941 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3942 ret = -ENOMEM; 3943 goto out_free; 3944 } 3945 mask = strtok_r(NULL, "/", &mask_ptr); 3946 if (mask == NULL) { 3947 pr_err("Invalid thread maps or affinity specs\n"); 3948 ret = -EINVAL; 3949 goto out_free; 3950 } 3951 pr_debug2(" affinity mask: %s\n", mask); 3952 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3953 if (!tmp_spec) { 3954 pr_err("Failed to reallocate affinity spec\n"); 3955 ret = -ENOMEM; 3956 goto out_free; 3957 } 3958 affinity_spec = tmp_spec; 3959 affinity_spec[nr_spec] = strdup(mask); 3960 if (!affinity_spec[nr_spec]) { 3961 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3962 ret = -ENOMEM; 3963 goto out_free; 3964 } 3965 dup_mask = NULL; 3966 nr_spec++; 3967 } 3968 3969 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3970 (const char **)affinity_spec, nr_spec); 3971 3972 out_free: 3973 free(dup_mask); 3974 for (s = 0; s < nr_spec; s++) { 3975 if (maps_spec) 3976 free(maps_spec[s]); 3977 if (affinity_spec) 3978 free(affinity_spec[s]); 3979 } 3980 free(affinity_spec); 3981 free(maps_spec); 3982 3983 return ret; 3984 } 3985 3986 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3987 { 3988 int ret; 3989 3990 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3991 if (ret) 3992 return ret; 3993 3994 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3995 return -ENODEV; 3996 3997 rec->nr_threads = 1; 3998 3999 return 0; 4000 } 4001 4002 static int record__init_thread_masks(struct record *rec) 4003 { 4004 int ret = 0; 4005 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 4006 4007 if (!record__threads_enabled(rec)) 4008 return record__init_thread_default_masks(rec, cpus); 4009 4010 if (evlist__per_thread(rec->evlist)) { 4011 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 4012 return -EINVAL; 4013 } 4014 4015 switch (rec->opts.threads_spec) { 4016 case THREAD_SPEC__CPU: 4017 ret = record__init_thread_cpu_masks(rec, cpus); 4018 break; 4019 case THREAD_SPEC__CORE: 4020 ret = record__init_thread_core_masks(rec, cpus); 4021 break; 4022 case THREAD_SPEC__PACKAGE: 4023 ret = record__init_thread_package_masks(rec, cpus); 4024 break; 4025 case THREAD_SPEC__NUMA: 4026 ret = record__init_thread_numa_masks(rec, cpus); 4027 break; 4028 case THREAD_SPEC__USER: 4029 ret = record__init_thread_user_masks(rec, cpus); 4030 break; 4031 default: 4032 break; 4033 } 4034 4035 return ret; 4036 } 4037 4038 int cmd_record(int argc, const char **argv) 4039 { 4040 int err; 4041 struct record *rec = &record; 4042 char errbuf[BUFSIZ]; 4043 4044 setlocale(LC_ALL, ""); 4045 4046 #ifndef HAVE_BPF_SKEL 4047 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 4048 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 4049 # undef set_nobuild 4050 #endif 4051 4052 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 4053 symbol_conf.lazy_load_kernel_maps = true; 4054 rec->opts.affinity = PERF_AFFINITY_SYS; 4055 4056 rec->evlist = evlist__new(); 4057 if (rec->evlist == NULL) 4058 return -ENOMEM; 4059 4060 err = perf_config(perf_record_config, rec); 4061 if (err) 4062 return err; 4063 4064 argc = parse_options(argc, argv, record_options, record_usage, 4065 PARSE_OPT_STOP_AT_NON_OPTION); 4066 if (quiet) 4067 perf_quiet_option(); 4068 4069 err = symbol__validate_sym_arguments(); 4070 if (err) 4071 return err; 4072 4073 perf_debuginfod_setup(&record.debuginfod); 4074 4075 /* Make system wide (-a) the default target. */ 4076 if (!argc && target__none(&rec->opts.target)) 4077 rec->opts.target.system_wide = true; 4078 4079 if (nr_cgroups && !rec->opts.target.system_wide) { 4080 usage_with_options_msg(record_usage, record_options, 4081 "cgroup monitoring only available in system-wide mode"); 4082 4083 } 4084 4085 if (record.latency) { 4086 /* 4087 * There is no fundamental reason why latency profiling 4088 * can't work for system-wide mode, but exact semantics 4089 * and details are to be defined. 4090 * See the following thread for details: 4091 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4092 */ 4093 if (record.opts.target.system_wide) { 4094 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4095 err = -EINVAL; 4096 goto out_opts; 4097 } 4098 record.opts.record_switch_events = true; 4099 } 4100 4101 if (rec->buildid_mmap) { 4102 if (!perf_can_record_build_id()) { 4103 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4104 err = -EINVAL; 4105 goto out_opts; 4106 } 4107 pr_debug("Enabling build id in mmap2 events.\n"); 4108 /* Enable mmap build id synthesizing. */ 4109 symbol_conf.buildid_mmap2 = true; 4110 /* Enable perf_event_attr::build_id bit. */ 4111 rec->opts.build_id = true; 4112 /* Disable build id cache. */ 4113 rec->no_buildid = true; 4114 } 4115 4116 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4117 pr_err("Kernel has no cgroup sampling support.\n"); 4118 err = -EINVAL; 4119 goto out_opts; 4120 } 4121 4122 if (rec->opts.kcore) 4123 rec->opts.text_poke = true; 4124 4125 if (rec->opts.kcore || record__threads_enabled(rec)) 4126 rec->data.is_dir = true; 4127 4128 if (record__threads_enabled(rec)) { 4129 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4130 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4131 goto out_opts; 4132 } 4133 if (record__aio_enabled(rec)) { 4134 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4135 goto out_opts; 4136 } 4137 } 4138 4139 if (rec->opts.comp_level != 0) { 4140 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4141 rec->no_buildid = true; 4142 } 4143 4144 if (rec->opts.record_switch_events && 4145 !perf_can_record_switch_events()) { 4146 ui__error("kernel does not support recording context switch events\n"); 4147 parse_options_usage(record_usage, record_options, "switch-events", 0); 4148 err = -EINVAL; 4149 goto out_opts; 4150 } 4151 4152 if (switch_output_setup(rec)) { 4153 parse_options_usage(record_usage, record_options, "switch-output", 0); 4154 err = -EINVAL; 4155 goto out_opts; 4156 } 4157 4158 if (rec->switch_output.time) { 4159 signal(SIGALRM, alarm_sig_handler); 4160 alarm(rec->switch_output.time); 4161 } 4162 4163 if (rec->switch_output.num_files) { 4164 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4165 sizeof(char *)); 4166 if (!rec->switch_output.filenames) { 4167 err = -EINVAL; 4168 goto out_opts; 4169 } 4170 } 4171 4172 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4173 rec->timestamp_filename = false; 4174 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4175 } 4176 4177 if (rec->filter_action) { 4178 if (!strcmp(rec->filter_action, "pin")) 4179 err = perf_bpf_filter__pin(); 4180 else if (!strcmp(rec->filter_action, "unpin")) 4181 err = perf_bpf_filter__unpin(); 4182 else { 4183 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4184 err = -EINVAL; 4185 } 4186 goto out_opts; 4187 } 4188 4189 /* For backward compatibility, -d implies --mem-info */ 4190 if (rec->opts.sample_address) 4191 rec->opts.sample_data_src = true; 4192 4193 /* 4194 * Allow aliases to facilitate the lookup of symbols for address 4195 * filters. Refer to auxtrace_parse_filters(). 4196 */ 4197 symbol_conf.allow_aliases = true; 4198 4199 symbol__init(NULL); 4200 4201 err = record__auxtrace_init(rec); 4202 if (err) 4203 goto out; 4204 4205 if (dry_run) 4206 goto out; 4207 4208 err = -ENOMEM; 4209 4210 if (rec->no_buildid_cache || rec->no_buildid) { 4211 disable_buildid_cache(); 4212 } else if (rec->switch_output.enabled) { 4213 /* 4214 * In 'perf record --switch-output', disable buildid 4215 * generation by default to reduce data file switching 4216 * overhead. Still generate buildid if they are required 4217 * explicitly using 4218 * 4219 * perf record --switch-output --no-no-buildid \ 4220 * --no-no-buildid-cache 4221 * 4222 * Following code equals to: 4223 * 4224 * if ((rec->no_buildid || !rec->no_buildid_set) && 4225 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4226 * disable_buildid_cache(); 4227 */ 4228 bool disable = true; 4229 4230 if (rec->no_buildid_set && !rec->no_buildid) 4231 disable = false; 4232 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4233 disable = false; 4234 if (disable) { 4235 rec->no_buildid = true; 4236 rec->no_buildid_cache = true; 4237 disable_buildid_cache(); 4238 } 4239 } 4240 4241 if (record.opts.overwrite) 4242 record.opts.tail_synthesize = true; 4243 4244 if (rec->evlist->core.nr_entries == 0) { 4245 err = parse_event(rec->evlist, "cycles:P"); 4246 if (err) 4247 goto out; 4248 } 4249 4250 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4251 rec->opts.no_inherit = true; 4252 4253 err = target__validate(&rec->opts.target); 4254 if (err) { 4255 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4256 ui__warning("%s\n", errbuf); 4257 } 4258 4259 if (rec->uid_str) { 4260 uid_t uid = parse_uid(rec->uid_str); 4261 4262 if (uid == UINT_MAX) { 4263 ui__error("Invalid User: %s", rec->uid_str); 4264 err = -EINVAL; 4265 goto out; 4266 } 4267 err = parse_uid_filter(rec->evlist, uid); 4268 if (err) 4269 goto out; 4270 4271 /* User ID filtering implies system wide. */ 4272 rec->opts.target.system_wide = true; 4273 } 4274 4275 /* Enable ignoring missing threads when -p option is defined. */ 4276 rec->opts.ignore_missing_thread = rec->opts.target.pid; 4277 4278 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4279 4280 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4281 arch__add_leaf_frame_record_opts(&rec->opts); 4282 4283 err = -ENOMEM; 4284 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4285 if (rec->opts.target.pid != NULL) { 4286 pr_err("Couldn't create thread/CPU maps: %s\n", 4287 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4288 goto out; 4289 } 4290 else 4291 usage_with_options(record_usage, record_options); 4292 } 4293 4294 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4295 if (err) 4296 goto out; 4297 4298 /* 4299 * We take all buildids when the file contains 4300 * AUX area tracing data because we do not decode the 4301 * trace because it would take too long. 4302 */ 4303 if (rec->opts.full_auxtrace) 4304 rec->buildid_all = true; 4305 4306 if (rec->opts.text_poke) { 4307 err = record__config_text_poke(rec->evlist); 4308 if (err) { 4309 pr_err("record__config_text_poke failed, error %d\n", err); 4310 goto out; 4311 } 4312 } 4313 4314 if (rec->off_cpu) { 4315 err = record__config_off_cpu(rec); 4316 if (err) { 4317 pr_err("record__config_off_cpu failed, error %d\n", err); 4318 goto out; 4319 } 4320 } 4321 4322 if (record_opts__config(&rec->opts)) { 4323 err = -EINVAL; 4324 goto out; 4325 } 4326 4327 err = record__config_tracking_events(rec); 4328 if (err) { 4329 pr_err("record__config_tracking_events failed, error %d\n", err); 4330 goto out; 4331 } 4332 4333 err = record__init_thread_masks(rec); 4334 if (err) { 4335 pr_err("Failed to initialize parallel data streaming masks\n"); 4336 goto out; 4337 } 4338 4339 if (rec->opts.nr_cblocks > nr_cblocks_max) 4340 rec->opts.nr_cblocks = nr_cblocks_max; 4341 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4342 4343 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4344 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4345 4346 if (rec->opts.comp_level > comp_level_max) 4347 rec->opts.comp_level = comp_level_max; 4348 pr_debug("comp level: %d\n", rec->opts.comp_level); 4349 4350 err = __cmd_record(&record, argc, argv); 4351 out: 4352 record__free_thread_masks(rec, rec->nr_threads); 4353 rec->nr_threads = 0; 4354 symbol__exit(); 4355 auxtrace_record__free(rec->itr); 4356 out_opts: 4357 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4358 evlist__delete(rec->evlist); 4359 return err; 4360 } 4361 4362 static void snapshot_sig_handler(int sig __maybe_unused) 4363 { 4364 struct record *rec = &record; 4365 4366 hit_auxtrace_snapshot_trigger(rec); 4367 4368 if (switch_output_signal(rec)) 4369 trigger_hit(&switch_output_trigger); 4370 } 4371 4372 static void alarm_sig_handler(int sig __maybe_unused) 4373 { 4374 struct record *rec = &record; 4375 4376 if (switch_output_time(rec)) 4377 trigger_hit(&switch_output_trigger); 4378 } 4379