1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/stat.h" 30 #include "util/symbol.h" 31 #include "util/record.h" 32 #include "util/cpumap.h" 33 #include "util/thread_map.h" 34 #include "util/data.h" 35 #include "util/perf_regs.h" 36 #include "util/auxtrace.h" 37 #include "util/tsc.h" 38 #include "util/parse-branch-options.h" 39 #include "util/parse-regs-options.h" 40 #include "util/perf_api_probe.h" 41 #include "util/trigger.h" 42 #include "util/perf-hooks.h" 43 #include "util/cpu-set-sched.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/pmu.h" 51 #include "util/pmus.h" 52 #include "util/clockid.h" 53 #include "util/off_cpu.h" 54 #include "util/bpf-filter.h" 55 #include "util/strbuf.h" 56 #include "asm/bug.h" 57 #include "perf.h" 58 #include "cputopo.h" 59 60 #include <errno.h> 61 #include <inttypes.h> 62 #include <locale.h> 63 #include <poll.h> 64 #include <pthread.h> 65 #include <unistd.h> 66 #ifndef HAVE_GETTID 67 #include <syscall.h> 68 #endif 69 #include <sched.h> 70 #include <signal.h> 71 #ifdef HAVE_EVENTFD_SUPPORT 72 #include <sys/eventfd.h> 73 #endif 74 #include <sys/mman.h> 75 #include <sys/wait.h> 76 #include <sys/types.h> 77 #include <sys/stat.h> 78 #include <fcntl.h> 79 #include <linux/err.h> 80 #include <linux/string.h> 81 #include <linux/time64.h> 82 #include <linux/zalloc.h> 83 #include <linux/bitmap.h> 84 #include <sys/time.h> 85 86 struct switch_output { 87 bool enabled; 88 bool signal; 89 unsigned long size; 90 unsigned long time; 91 const char *str; 92 bool set; 93 char **filenames; 94 int num_files; 95 int cur_file; 96 }; 97 98 struct thread_mask { 99 struct mmap_cpu_mask maps; 100 struct mmap_cpu_mask affinity; 101 }; 102 103 struct record_thread { 104 pid_t tid; 105 struct thread_mask *mask; 106 struct { 107 int msg[2]; 108 int ack[2]; 109 } pipes; 110 struct fdarray pollfd; 111 int ctlfd_pos; 112 int nr_mmaps; 113 struct mmap **maps; 114 struct mmap **overwrite_maps; 115 struct record *rec; 116 unsigned long long samples; 117 unsigned long waking; 118 u64 bytes_written; 119 u64 bytes_transferred; 120 u64 bytes_compressed; 121 }; 122 123 static __thread struct record_thread *thread; 124 125 enum thread_msg { 126 THREAD_MSG__UNDEFINED = 0, 127 THREAD_MSG__READY, 128 THREAD_MSG__MAX, 129 }; 130 131 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 132 "UNDEFINED", "READY" 133 }; 134 135 enum thread_spec { 136 THREAD_SPEC__UNDEFINED = 0, 137 THREAD_SPEC__CPU, 138 THREAD_SPEC__CORE, 139 THREAD_SPEC__PACKAGE, 140 THREAD_SPEC__NUMA, 141 THREAD_SPEC__USER, 142 THREAD_SPEC__MAX, 143 }; 144 145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 146 "undefined", "cpu", "core", "package", "numa", "user" 147 }; 148 149 struct pollfd_index_map { 150 int evlist_pollfd_index; 151 int thread_pollfd_index; 152 }; 153 154 struct record { 155 struct perf_tool tool; 156 struct record_opts opts; 157 u64 bytes_written; 158 u64 thread_bytes_written; 159 struct perf_data data; 160 struct auxtrace_record *itr; 161 struct evlist *evlist; 162 struct perf_session *session; 163 struct evlist *sb_evlist; 164 pthread_t thread_id; 165 int realtime_prio; 166 bool latency; 167 bool switch_output_event_set; 168 bool no_buildid; 169 bool no_buildid_set; 170 bool no_buildid_cache; 171 bool no_buildid_cache_set; 172 bool buildid_all; 173 bool buildid_mmap; 174 bool timestamp_filename; 175 bool timestamp_boundary; 176 bool off_cpu; 177 const char *filter_action; 178 struct switch_output switch_output; 179 unsigned long long samples; 180 unsigned long output_max_size; /* = 0: unlimited */ 181 struct perf_debuginfod debuginfod; 182 int nr_threads; 183 struct thread_mask *thread_masks; 184 struct record_thread *thread_data; 185 struct pollfd_index_map *index_map; 186 size_t index_map_sz; 187 size_t index_map_cnt; 188 }; 189 190 static volatile int done; 191 192 static volatile int auxtrace_record__snapshot_started; 193 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 194 static DEFINE_TRIGGER(switch_output_trigger); 195 196 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 197 "SYS", "NODE", "CPU" 198 }; 199 200 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 201 struct perf_sample *sample, struct machine *machine); 202 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 203 struct perf_sample *sample, struct machine *machine); 204 static int process_timestamp_boundary(const struct perf_tool *tool, 205 union perf_event *event, 206 struct perf_sample *sample, 207 struct machine *machine); 208 209 #ifndef HAVE_GETTID 210 static inline pid_t gettid(void) 211 { 212 return (pid_t)syscall(__NR_gettid); 213 } 214 #endif 215 216 static int record__threads_enabled(struct record *rec) 217 { 218 return rec->opts.threads_spec; 219 } 220 221 static bool switch_output_signal(struct record *rec) 222 { 223 return rec->switch_output.signal && 224 trigger_is_ready(&switch_output_trigger); 225 } 226 227 static bool switch_output_size(struct record *rec) 228 { 229 return rec->switch_output.size && 230 trigger_is_ready(&switch_output_trigger) && 231 (rec->bytes_written >= rec->switch_output.size); 232 } 233 234 static bool switch_output_time(struct record *rec) 235 { 236 return rec->switch_output.time && 237 trigger_is_ready(&switch_output_trigger); 238 } 239 240 static u64 record__bytes_written(struct record *rec) 241 { 242 return rec->bytes_written + rec->thread_bytes_written; 243 } 244 245 static bool record__output_max_size_exceeded(struct record *rec) 246 { 247 return rec->output_max_size && 248 (record__bytes_written(rec) >= rec->output_max_size); 249 } 250 251 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 252 void *bf, size_t size) 253 { 254 struct perf_data_file *file = &rec->session->data->file; 255 256 if (map && map->file) 257 file = map->file; 258 259 if (perf_data_file__write(file, bf, size) < 0) { 260 pr_err("failed to write perf data, error: %m\n"); 261 return -1; 262 } 263 264 if (map && map->file) { 265 thread->bytes_written += size; 266 rec->thread_bytes_written += size; 267 } else { 268 rec->bytes_written += size; 269 } 270 271 if (record__output_max_size_exceeded(rec) && !done) { 272 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 273 " stopping session ]\n", 274 record__bytes_written(rec) >> 10); 275 done = 1; 276 } 277 278 if (switch_output_size(rec)) 279 trigger_hit(&switch_output_trigger); 280 281 return 0; 282 } 283 284 static int record__aio_enabled(struct record *rec); 285 static int record__comp_enabled(struct record *rec); 286 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 287 void *dst, size_t dst_size, void *src, size_t src_size); 288 289 #ifdef HAVE_AIO_SUPPORT 290 static int record__aio_write(struct aiocb *cblock, int trace_fd, 291 void *buf, size_t size, off_t off) 292 { 293 int rc; 294 295 cblock->aio_fildes = trace_fd; 296 cblock->aio_buf = buf; 297 cblock->aio_nbytes = size; 298 cblock->aio_offset = off; 299 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 300 301 do { 302 rc = aio_write(cblock); 303 if (rc == 0) { 304 break; 305 } else if (errno != EAGAIN) { 306 cblock->aio_fildes = -1; 307 pr_err("failed to queue perf data, error: %m\n"); 308 break; 309 } 310 } while (1); 311 312 return rc; 313 } 314 315 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 316 { 317 void *rem_buf; 318 off_t rem_off; 319 size_t rem_size; 320 int rc, aio_errno; 321 ssize_t aio_ret, written; 322 323 aio_errno = aio_error(cblock); 324 if (aio_errno == EINPROGRESS) 325 return 0; 326 327 written = aio_ret = aio_return(cblock); 328 if (aio_ret < 0) { 329 if (aio_errno != EINTR) 330 pr_err("failed to write perf data, error: %m\n"); 331 written = 0; 332 } 333 334 rem_size = cblock->aio_nbytes - written; 335 336 if (rem_size == 0) { 337 cblock->aio_fildes = -1; 338 /* 339 * md->refcount is incremented in record__aio_pushfn() for 340 * every aio write request started in record__aio_push() so 341 * decrement it because the request is now complete. 342 */ 343 perf_mmap__put(&md->core); 344 rc = 1; 345 } else { 346 /* 347 * aio write request may require restart with the 348 * remainder if the kernel didn't write whole 349 * chunk at once. 350 */ 351 rem_off = cblock->aio_offset + written; 352 rem_buf = (void *)(cblock->aio_buf + written); 353 record__aio_write(cblock, cblock->aio_fildes, 354 rem_buf, rem_size, rem_off); 355 rc = 0; 356 } 357 358 return rc; 359 } 360 361 static int record__aio_sync(struct mmap *md, bool sync_all) 362 { 363 struct aiocb **aiocb = md->aio.aiocb; 364 struct aiocb *cblocks = md->aio.cblocks; 365 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 366 int i, do_suspend; 367 368 do { 369 do_suspend = 0; 370 for (i = 0; i < md->aio.nr_cblocks; ++i) { 371 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 372 if (sync_all) 373 aiocb[i] = NULL; 374 else 375 return i; 376 } else { 377 /* 378 * Started aio write is not complete yet 379 * so it has to be waited before the 380 * next allocation. 381 */ 382 aiocb[i] = &cblocks[i]; 383 do_suspend = 1; 384 } 385 } 386 if (!do_suspend) 387 return -1; 388 389 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 390 if (!(errno == EAGAIN || errno == EINTR)) 391 pr_err("failed to sync perf data, error: %m\n"); 392 } 393 } while (1); 394 } 395 396 struct record_aio { 397 struct record *rec; 398 void *data; 399 size_t size; 400 }; 401 402 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 403 { 404 struct record_aio *aio = to; 405 406 /* 407 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 408 * to release space in the kernel buffer as fast as possible, calling 409 * perf_mmap__consume() from perf_mmap__push() function. 410 * 411 * That lets the kernel to proceed with storing more profiling data into 412 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 413 * 414 * Coping can be done in two steps in case the chunk of profiling data 415 * crosses the upper bound of the kernel buffer. In this case we first move 416 * part of data from map->start till the upper bound and then the remainder 417 * from the beginning of the kernel buffer till the end of the data chunk. 418 */ 419 420 if (record__comp_enabled(aio->rec)) { 421 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 422 mmap__mmap_len(map) - aio->size, 423 buf, size); 424 if (compressed < 0) 425 return (int)compressed; 426 427 size = compressed; 428 } else { 429 memcpy(aio->data + aio->size, buf, size); 430 } 431 432 if (!aio->size) { 433 /* 434 * Increment map->refcount to guard map->aio.data[] buffer 435 * from premature deallocation because map object can be 436 * released earlier than aio write request started on 437 * map->aio.data[] buffer is complete. 438 * 439 * perf_mmap__put() is done at record__aio_complete() 440 * after started aio request completion or at record__aio_push() 441 * if the request failed to start. 442 */ 443 perf_mmap__get(&map->core); 444 } 445 446 aio->size += size; 447 448 return size; 449 } 450 451 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 452 { 453 int ret, idx; 454 int trace_fd = rec->session->data->file.fd; 455 struct record_aio aio = { .rec = rec, .size = 0 }; 456 457 /* 458 * Call record__aio_sync() to wait till map->aio.data[] buffer 459 * becomes available after previous aio write operation. 460 */ 461 462 idx = record__aio_sync(map, false); 463 aio.data = map->aio.data[idx]; 464 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 465 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 466 return ret; 467 468 rec->samples++; 469 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 470 if (!ret) { 471 *off += aio.size; 472 rec->bytes_written += aio.size; 473 if (switch_output_size(rec)) 474 trigger_hit(&switch_output_trigger); 475 } else { 476 /* 477 * Decrement map->refcount incremented in record__aio_pushfn() 478 * back if record__aio_write() operation failed to start, otherwise 479 * map->refcount is decremented in record__aio_complete() after 480 * aio write operation finishes successfully. 481 */ 482 perf_mmap__put(&map->core); 483 } 484 485 return ret; 486 } 487 488 static off_t record__aio_get_pos(int trace_fd) 489 { 490 return lseek(trace_fd, 0, SEEK_CUR); 491 } 492 493 static void record__aio_set_pos(int trace_fd, off_t pos) 494 { 495 lseek(trace_fd, pos, SEEK_SET); 496 } 497 498 static void record__aio_mmap_read_sync(struct record *rec) 499 { 500 int i; 501 struct evlist *evlist = rec->evlist; 502 struct mmap *maps = evlist->mmap; 503 504 if (!record__aio_enabled(rec)) 505 return; 506 507 for (i = 0; i < evlist->core.nr_mmaps; i++) { 508 struct mmap *map = &maps[i]; 509 510 if (map->core.base) 511 record__aio_sync(map, true); 512 } 513 } 514 515 static int nr_cblocks_default = 1; 516 static int nr_cblocks_max = 4; 517 518 static int record__aio_parse(const struct option *opt, 519 const char *str, 520 int unset) 521 { 522 struct record_opts *opts = (struct record_opts *)opt->value; 523 524 if (unset) { 525 opts->nr_cblocks = 0; 526 } else { 527 if (str) 528 opts->nr_cblocks = strtol(str, NULL, 0); 529 if (!opts->nr_cblocks) 530 opts->nr_cblocks = nr_cblocks_default; 531 } 532 533 return 0; 534 } 535 #else /* HAVE_AIO_SUPPORT */ 536 static int nr_cblocks_max = 0; 537 538 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 539 off_t *off __maybe_unused) 540 { 541 return -1; 542 } 543 544 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 545 { 546 return -1; 547 } 548 549 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 550 { 551 } 552 553 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 554 { 555 } 556 #endif 557 558 static int record__aio_enabled(struct record *rec) 559 { 560 return rec->opts.nr_cblocks > 0; 561 } 562 563 #define MMAP_FLUSH_DEFAULT 1 564 static int record__mmap_flush_parse(const struct option *opt, 565 const char *str, 566 int unset) 567 { 568 int flush_max; 569 struct record_opts *opts = (struct record_opts *)opt->value; 570 static struct parse_tag tags[] = { 571 { .tag = 'B', .mult = 1 }, 572 { .tag = 'K', .mult = 1 << 10 }, 573 { .tag = 'M', .mult = 1 << 20 }, 574 { .tag = 'G', .mult = 1 << 30 }, 575 { .tag = 0 }, 576 }; 577 578 if (unset) 579 return 0; 580 581 if (str) { 582 opts->mmap_flush = parse_tag_value(str, tags); 583 if (opts->mmap_flush == (int)-1) 584 opts->mmap_flush = strtol(str, NULL, 0); 585 } 586 587 if (!opts->mmap_flush) 588 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 589 590 flush_max = evlist__mmap_size(opts->mmap_pages); 591 flush_max /= 4; 592 if (opts->mmap_flush > flush_max) 593 opts->mmap_flush = flush_max; 594 595 return 0; 596 } 597 598 #ifdef HAVE_ZSTD_SUPPORT 599 static unsigned int comp_level_default = 1; 600 601 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 602 { 603 struct record_opts *opts = opt->value; 604 605 if (unset) { 606 opts->comp_level = 0; 607 } else { 608 if (str) 609 opts->comp_level = strtol(str, NULL, 0); 610 if (!opts->comp_level) 611 opts->comp_level = comp_level_default; 612 } 613 614 return 0; 615 } 616 #endif 617 static unsigned int comp_level_max = 22; 618 619 static int record__comp_enabled(struct record *rec) 620 { 621 return rec->opts.comp_level > 0; 622 } 623 624 static int process_synthesized_event(const struct perf_tool *tool, 625 union perf_event *event, 626 struct perf_sample *sample __maybe_unused, 627 struct machine *machine __maybe_unused) 628 { 629 struct record *rec = container_of(tool, struct record, tool); 630 return record__write(rec, NULL, event, event->header.size); 631 } 632 633 static struct mutex synth_lock; 634 635 static int process_locked_synthesized_event(const struct perf_tool *tool, 636 union perf_event *event, 637 struct perf_sample *sample __maybe_unused, 638 struct machine *machine __maybe_unused) 639 { 640 int ret; 641 642 mutex_lock(&synth_lock); 643 ret = process_synthesized_event(tool, event, sample, machine); 644 mutex_unlock(&synth_lock); 645 return ret; 646 } 647 648 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 649 { 650 struct record *rec = to; 651 652 if (record__comp_enabled(rec)) { 653 struct perf_record_compressed2 *event = map->data; 654 size_t padding = 0; 655 u8 pad[8] = {0}; 656 ssize_t compressed = zstd_compress(rec->session, map, map->data, 657 mmap__mmap_len(map), bf, size); 658 659 if (compressed < 0) 660 return (int)compressed; 661 662 bf = event; 663 thread->samples++; 664 665 /* 666 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan 667 * error. We make it aligned here. 668 */ 669 event->data_size = compressed - sizeof(struct perf_record_compressed2); 670 event->header.size = PERF_ALIGN(compressed, sizeof(u64)); 671 padding = event->header.size - compressed; 672 return record__write(rec, map, bf, compressed) || 673 record__write(rec, map, &pad, padding); 674 } 675 676 thread->samples++; 677 return record__write(rec, map, bf, size); 678 } 679 680 static volatile sig_atomic_t signr = -1; 681 static volatile sig_atomic_t child_finished; 682 #ifdef HAVE_EVENTFD_SUPPORT 683 static volatile sig_atomic_t done_fd = -1; 684 #endif 685 686 static void sig_handler(int sig) 687 { 688 if (sig == SIGCHLD) 689 child_finished = 1; 690 else 691 signr = sig; 692 693 done = 1; 694 #ifdef HAVE_EVENTFD_SUPPORT 695 if (done_fd >= 0) { 696 u64 tmp = 1; 697 int orig_errno = errno; 698 699 /* 700 * It is possible for this signal handler to run after done is 701 * checked in the main loop, but before the perf counter fds are 702 * polled. If this happens, the poll() will continue to wait 703 * even though done is set, and will only break out if either 704 * another signal is received, or the counters are ready for 705 * read. To ensure the poll() doesn't sleep when done is set, 706 * use an eventfd (done_fd) to wake up the poll(). 707 */ 708 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 709 pr_err("failed to signal wakeup fd, error: %m\n"); 710 711 errno = orig_errno; 712 } 713 #endif // HAVE_EVENTFD_SUPPORT 714 } 715 716 static void sigsegv_handler(int sig) 717 { 718 perf_hooks__recover(); 719 sighandler_dump_stack(sig); 720 } 721 722 static void record__sig_exit(void) 723 { 724 if (signr == -1) 725 return; 726 727 signal(signr, SIG_DFL); 728 raise(signr); 729 } 730 731 #ifdef HAVE_AUXTRACE_SUPPORT 732 733 static int record__process_auxtrace(const struct perf_tool *tool, 734 struct mmap *map, 735 union perf_event *event, void *data1, 736 size_t len1, void *data2, size_t len2) 737 { 738 struct record *rec = container_of(tool, struct record, tool); 739 struct perf_data *data = &rec->data; 740 size_t padding; 741 u8 pad[8] = {0}; 742 743 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 744 off_t file_offset; 745 int fd = perf_data__fd(data); 746 int err; 747 748 file_offset = lseek(fd, 0, SEEK_CUR); 749 if (file_offset == -1) 750 return -1; 751 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 752 event, file_offset); 753 if (err) 754 return err; 755 } 756 757 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 758 padding = (len1 + len2) & 7; 759 if (padding) 760 padding = 8 - padding; 761 762 record__write(rec, map, event, event->header.size); 763 record__write(rec, map, data1, len1); 764 if (len2) 765 record__write(rec, map, data2, len2); 766 record__write(rec, map, &pad, padding); 767 768 return 0; 769 } 770 771 static int record__auxtrace_mmap_read(struct record *rec, 772 struct mmap *map) 773 { 774 int ret; 775 776 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 777 record__process_auxtrace); 778 if (ret < 0) 779 return ret; 780 781 if (ret) 782 rec->samples++; 783 784 return 0; 785 } 786 787 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 788 struct mmap *map) 789 { 790 int ret; 791 792 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 793 record__process_auxtrace, 794 rec->opts.auxtrace_snapshot_size); 795 if (ret < 0) 796 return ret; 797 798 if (ret) 799 rec->samples++; 800 801 return 0; 802 } 803 804 static int record__auxtrace_read_snapshot_all(struct record *rec) 805 { 806 int i; 807 int rc = 0; 808 809 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 810 struct mmap *map = &rec->evlist->mmap[i]; 811 812 if (!map->auxtrace_mmap.base) 813 continue; 814 815 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 816 rc = -1; 817 goto out; 818 } 819 } 820 out: 821 return rc; 822 } 823 824 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 825 { 826 pr_debug("Recording AUX area tracing snapshot\n"); 827 if (record__auxtrace_read_snapshot_all(rec) < 0) { 828 trigger_error(&auxtrace_snapshot_trigger); 829 } else { 830 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 831 trigger_error(&auxtrace_snapshot_trigger); 832 else 833 trigger_ready(&auxtrace_snapshot_trigger); 834 } 835 } 836 837 static int record__auxtrace_snapshot_exit(struct record *rec) 838 { 839 if (trigger_is_error(&auxtrace_snapshot_trigger)) 840 return 0; 841 842 if (!auxtrace_record__snapshot_started && 843 auxtrace_record__snapshot_start(rec->itr)) 844 return -1; 845 846 record__read_auxtrace_snapshot(rec, true); 847 if (trigger_is_error(&auxtrace_snapshot_trigger)) 848 return -1; 849 850 return 0; 851 } 852 853 static int record__auxtrace_init(struct record *rec) 854 { 855 int err; 856 857 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 858 && record__threads_enabled(rec)) { 859 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 860 return -EINVAL; 861 } 862 863 if (!rec->itr) { 864 rec->itr = auxtrace_record__init(rec->evlist, &err); 865 if (err) 866 return err; 867 } 868 869 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 870 rec->opts.auxtrace_snapshot_opts); 871 if (err) 872 return err; 873 874 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 875 rec->opts.auxtrace_sample_opts); 876 if (err) 877 return err; 878 879 err = auxtrace_parse_aux_action(rec->evlist); 880 if (err) 881 return err; 882 883 return auxtrace_parse_filters(rec->evlist); 884 } 885 886 #else 887 888 static inline 889 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 890 struct mmap *map __maybe_unused) 891 { 892 return 0; 893 } 894 895 static inline 896 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 897 bool on_exit __maybe_unused) 898 { 899 } 900 901 static inline 902 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 903 { 904 return 0; 905 } 906 907 static inline 908 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 909 { 910 return 0; 911 } 912 913 static int record__auxtrace_init(struct record *rec __maybe_unused) 914 { 915 return 0; 916 } 917 918 #endif 919 920 static int record__config_text_poke(struct evlist *evlist) 921 { 922 struct evsel *evsel; 923 924 /* Nothing to do if text poke is already configured */ 925 evlist__for_each_entry(evlist, evsel) { 926 if (evsel->core.attr.text_poke) 927 return 0; 928 } 929 930 evsel = evlist__add_dummy_on_all_cpus(evlist); 931 if (!evsel) 932 return -ENOMEM; 933 934 evsel->core.attr.text_poke = 1; 935 evsel->core.attr.ksymbol = 1; 936 evsel->immediate = true; 937 evsel__set_sample_bit(evsel, TIME); 938 939 return 0; 940 } 941 942 static int record__config_off_cpu(struct record *rec) 943 { 944 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 945 } 946 947 static bool record__tracking_system_wide(struct record *rec) 948 { 949 struct evlist *evlist = rec->evlist; 950 struct evsel *evsel; 951 952 /* 953 * If non-dummy evsel exists, system_wide sideband is need to 954 * help parse sample information. 955 * For example, PERF_EVENT_MMAP event to help parse symbol, 956 * and PERF_EVENT_COMM event to help parse task executable name. 957 */ 958 evlist__for_each_entry(evlist, evsel) { 959 if (!evsel__is_dummy_event(evsel)) 960 return true; 961 } 962 963 return false; 964 } 965 966 static int record__config_tracking_events(struct record *rec) 967 { 968 struct record_opts *opts = &rec->opts; 969 struct evlist *evlist = rec->evlist; 970 bool system_wide = false; 971 struct evsel *evsel; 972 973 /* 974 * For initial_delay, system wide or a hybrid system, we need to add 975 * tracking event so that we can track PERF_RECORD_MMAP to cover the 976 * delay of waiting or event synthesis. 977 */ 978 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 979 perf_pmus__num_core_pmus() > 1) { 980 981 /* 982 * User space tasks can migrate between CPUs, so when tracing 983 * selected CPUs, sideband for all CPUs is still needed. 984 */ 985 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 986 system_wide = true; 987 988 evsel = evlist__findnew_tracking_event(evlist, system_wide); 989 if (!evsel) 990 return -ENOMEM; 991 992 /* 993 * Enable the tracking event when the process is forked for 994 * initial_delay, immediately for system wide. 995 */ 996 if (opts->target.initial_delay && !evsel->immediate && 997 !target__has_cpu(&opts->target)) 998 evsel->core.attr.enable_on_exec = 1; 999 else 1000 evsel->immediate = 1; 1001 } 1002 1003 return 0; 1004 } 1005 1006 static bool record__kcore_readable(struct machine *machine) 1007 { 1008 char kcore[PATH_MAX]; 1009 int fd; 1010 1011 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 1012 1013 fd = open(kcore, O_RDONLY); 1014 if (fd < 0) 1015 return false; 1016 1017 close(fd); 1018 1019 return true; 1020 } 1021 1022 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 1023 { 1024 char from_dir[PATH_MAX]; 1025 char kcore_dir[PATH_MAX]; 1026 int ret; 1027 1028 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1029 1030 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1031 if (ret) 1032 return ret; 1033 1034 return kcore_copy(from_dir, kcore_dir); 1035 } 1036 1037 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1038 { 1039 thread_data->pipes.msg[0] = -1; 1040 thread_data->pipes.msg[1] = -1; 1041 thread_data->pipes.ack[0] = -1; 1042 thread_data->pipes.ack[1] = -1; 1043 } 1044 1045 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1046 { 1047 if (pipe(thread_data->pipes.msg)) 1048 return -EINVAL; 1049 1050 if (pipe(thread_data->pipes.ack)) { 1051 close(thread_data->pipes.msg[0]); 1052 thread_data->pipes.msg[0] = -1; 1053 close(thread_data->pipes.msg[1]); 1054 thread_data->pipes.msg[1] = -1; 1055 return -EINVAL; 1056 } 1057 1058 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1059 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1060 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1061 1062 return 0; 1063 } 1064 1065 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1066 { 1067 if (thread_data->pipes.msg[0] != -1) { 1068 close(thread_data->pipes.msg[0]); 1069 thread_data->pipes.msg[0] = -1; 1070 } 1071 if (thread_data->pipes.msg[1] != -1) { 1072 close(thread_data->pipes.msg[1]); 1073 thread_data->pipes.msg[1] = -1; 1074 } 1075 if (thread_data->pipes.ack[0] != -1) { 1076 close(thread_data->pipes.ack[0]); 1077 thread_data->pipes.ack[0] = -1; 1078 } 1079 if (thread_data->pipes.ack[1] != -1) { 1080 close(thread_data->pipes.ack[1]); 1081 thread_data->pipes.ack[1] = -1; 1082 } 1083 } 1084 1085 static bool evlist__per_thread(struct evlist *evlist) 1086 { 1087 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1088 } 1089 1090 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1091 { 1092 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1093 struct mmap *mmap = evlist->mmap; 1094 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1095 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1096 bool per_thread = evlist__per_thread(evlist); 1097 1098 if (per_thread) 1099 thread_data->nr_mmaps = nr_mmaps; 1100 else 1101 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1102 thread_data->mask->maps.nbits); 1103 if (mmap) { 1104 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1105 if (!thread_data->maps) 1106 return -ENOMEM; 1107 } 1108 if (overwrite_mmap) { 1109 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1110 if (!thread_data->overwrite_maps) { 1111 zfree(&thread_data->maps); 1112 return -ENOMEM; 1113 } 1114 } 1115 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1116 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1117 1118 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1119 if (per_thread || 1120 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1121 if (thread_data->maps) { 1122 thread_data->maps[tm] = &mmap[m]; 1123 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1124 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1125 } 1126 if (thread_data->overwrite_maps) { 1127 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1128 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1129 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1130 } 1131 tm++; 1132 } 1133 } 1134 1135 return 0; 1136 } 1137 1138 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1139 { 1140 int f, tm, pos; 1141 struct mmap *map, *overwrite_map; 1142 1143 fdarray__init(&thread_data->pollfd, 64); 1144 1145 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1146 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1147 overwrite_map = thread_data->overwrite_maps ? 1148 thread_data->overwrite_maps[tm] : NULL; 1149 1150 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1151 void *ptr = evlist->core.pollfd.priv[f].ptr; 1152 1153 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1154 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1155 &evlist->core.pollfd); 1156 if (pos < 0) 1157 return pos; 1158 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1159 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1160 } 1161 } 1162 } 1163 1164 return 0; 1165 } 1166 1167 static void record__free_thread_data(struct record *rec) 1168 { 1169 int t; 1170 struct record_thread *thread_data = rec->thread_data; 1171 1172 if (thread_data == NULL) 1173 return; 1174 1175 for (t = 0; t < rec->nr_threads; t++) { 1176 record__thread_data_close_pipes(&thread_data[t]); 1177 zfree(&thread_data[t].maps); 1178 zfree(&thread_data[t].overwrite_maps); 1179 fdarray__exit(&thread_data[t].pollfd); 1180 } 1181 1182 zfree(&rec->thread_data); 1183 } 1184 1185 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1186 int evlist_pollfd_index, 1187 int thread_pollfd_index) 1188 { 1189 size_t x = rec->index_map_cnt; 1190 1191 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1192 return -ENOMEM; 1193 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1194 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1195 rec->index_map_cnt += 1; 1196 return 0; 1197 } 1198 1199 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1200 struct evlist *evlist, 1201 struct record_thread *thread_data) 1202 { 1203 struct pollfd *e_entries = evlist->core.pollfd.entries; 1204 struct pollfd *t_entries = thread_data->pollfd.entries; 1205 int err = 0; 1206 size_t i; 1207 1208 for (i = 0; i < rec->index_map_cnt; i++) { 1209 int e_pos = rec->index_map[i].evlist_pollfd_index; 1210 int t_pos = rec->index_map[i].thread_pollfd_index; 1211 1212 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1213 e_entries[e_pos].events != t_entries[t_pos].events) { 1214 pr_err("Thread and evlist pollfd index mismatch\n"); 1215 err = -EINVAL; 1216 continue; 1217 } 1218 e_entries[e_pos].revents = t_entries[t_pos].revents; 1219 } 1220 return err; 1221 } 1222 1223 static int record__dup_non_perf_events(struct record *rec, 1224 struct evlist *evlist, 1225 struct record_thread *thread_data) 1226 { 1227 struct fdarray *fda = &evlist->core.pollfd; 1228 int i, ret; 1229 1230 for (i = 0; i < fda->nr; i++) { 1231 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1232 continue; 1233 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1234 if (ret < 0) { 1235 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1236 return ret; 1237 } 1238 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1239 thread_data, ret, fda->entries[i].fd); 1240 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1241 if (ret < 0) { 1242 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1243 return ret; 1244 } 1245 } 1246 return 0; 1247 } 1248 1249 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1250 { 1251 int t, ret; 1252 struct record_thread *thread_data; 1253 1254 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1255 if (!rec->thread_data) { 1256 pr_err("Failed to allocate thread data\n"); 1257 return -ENOMEM; 1258 } 1259 thread_data = rec->thread_data; 1260 1261 for (t = 0; t < rec->nr_threads; t++) 1262 record__thread_data_init_pipes(&thread_data[t]); 1263 1264 for (t = 0; t < rec->nr_threads; t++) { 1265 thread_data[t].rec = rec; 1266 thread_data[t].mask = &rec->thread_masks[t]; 1267 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1268 if (ret) { 1269 pr_err("Failed to initialize thread[%d] maps\n", t); 1270 goto out_free; 1271 } 1272 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1273 if (ret) { 1274 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1275 goto out_free; 1276 } 1277 if (t) { 1278 thread_data[t].tid = -1; 1279 ret = record__thread_data_open_pipes(&thread_data[t]); 1280 if (ret) { 1281 pr_err("Failed to open thread[%d] communication pipes\n", t); 1282 goto out_free; 1283 } 1284 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1285 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1286 if (ret < 0) { 1287 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1288 goto out_free; 1289 } 1290 thread_data[t].ctlfd_pos = ret; 1291 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1292 thread_data, thread_data[t].ctlfd_pos, 1293 thread_data[t].pipes.msg[0]); 1294 } else { 1295 thread_data[t].tid = gettid(); 1296 1297 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1298 if (ret < 0) 1299 goto out_free; 1300 1301 thread_data[t].ctlfd_pos = -1; /* Not used */ 1302 } 1303 } 1304 1305 return 0; 1306 1307 out_free: 1308 record__free_thread_data(rec); 1309 1310 return ret; 1311 } 1312 1313 static int record__mmap_evlist(struct record *rec, 1314 struct evlist *evlist) 1315 { 1316 int i, ret; 1317 struct record_opts *opts = &rec->opts; 1318 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1319 opts->auxtrace_sample_mode; 1320 char msg[512]; 1321 1322 if (opts->affinity != PERF_AFFINITY_SYS) 1323 cpu__setup_cpunode_map(); 1324 1325 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1326 opts->auxtrace_mmap_pages, 1327 auxtrace_overwrite, 1328 opts->nr_cblocks, opts->affinity, 1329 opts->mmap_flush, opts->comp_level) < 0) { 1330 if (errno == EPERM) { 1331 pr_err("Permission error mapping pages.\n" 1332 "Consider increasing " 1333 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1334 "or try again with a smaller value of -m/--mmap_pages.\n" 1335 "(current value: %u,%u)\n", 1336 opts->mmap_pages, opts->auxtrace_mmap_pages); 1337 return -errno; 1338 } else { 1339 pr_err("failed to mmap with %d (%s)\n", errno, 1340 str_error_r(errno, msg, sizeof(msg))); 1341 if (errno) 1342 return -errno; 1343 else 1344 return -EINVAL; 1345 } 1346 } 1347 1348 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1349 return -1; 1350 1351 ret = record__alloc_thread_data(rec, evlist); 1352 if (ret) 1353 return ret; 1354 1355 if (record__threads_enabled(rec)) { 1356 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1357 if (ret) { 1358 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1359 return ret; 1360 } 1361 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1362 if (evlist->mmap) 1363 evlist->mmap[i].file = &rec->data.dir.files[i]; 1364 if (evlist->overwrite_mmap) 1365 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1366 } 1367 } 1368 1369 return 0; 1370 } 1371 1372 static int record__mmap(struct record *rec) 1373 { 1374 return record__mmap_evlist(rec, rec->evlist); 1375 } 1376 1377 static int record__open(struct record *rec) 1378 { 1379 char msg[BUFSIZ]; 1380 struct evsel *pos; 1381 struct evlist *evlist = rec->evlist; 1382 struct perf_session *session = rec->session; 1383 struct record_opts *opts = &rec->opts; 1384 int rc = 0; 1385 1386 evlist__for_each_entry(evlist, pos) { 1387 try_again: 1388 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1389 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1390 if (verbose > 0) 1391 ui__warning("%s\n", msg); 1392 goto try_again; 1393 } 1394 if ((errno == EINVAL || errno == EBADF) && 1395 pos->core.leader != &pos->core && 1396 pos->weak_group) { 1397 pos = evlist__reset_weak_group(evlist, pos, true); 1398 goto try_again; 1399 } 1400 rc = -errno; 1401 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1402 ui__error("%s\n", msg); 1403 goto out; 1404 } 1405 1406 pos->supported = true; 1407 } 1408 1409 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1410 pr_warning( 1411 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1412 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1413 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1414 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1415 "Samples in kernel modules won't be resolved at all.\n\n" 1416 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1417 "even with a suitable vmlinux or kallsyms file.\n\n"); 1418 } 1419 1420 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1421 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1422 pos->filter ?: "BPF", evsel__name(pos), errno, 1423 str_error_r(errno, msg, sizeof(msg))); 1424 rc = -1; 1425 goto out; 1426 } 1427 1428 rc = record__mmap(rec); 1429 if (rc) 1430 goto out; 1431 1432 session->evlist = evlist; 1433 perf_session__set_id_hdr_size(session); 1434 out: 1435 return rc; 1436 } 1437 1438 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1439 { 1440 if (rec->evlist->first_sample_time == 0) 1441 rec->evlist->first_sample_time = sample_time; 1442 1443 if (sample_time) 1444 rec->evlist->last_sample_time = sample_time; 1445 } 1446 1447 static int process_sample_event(const struct perf_tool *tool, 1448 union perf_event *event, 1449 struct perf_sample *sample, 1450 struct evsel *evsel, 1451 struct machine *machine) 1452 { 1453 struct record *rec = container_of(tool, struct record, tool); 1454 1455 set_timestamp_boundary(rec, sample->time); 1456 1457 if (rec->buildid_all) 1458 return 0; 1459 1460 rec->samples++; 1461 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1462 } 1463 1464 static int process_buildids(struct record *rec) 1465 { 1466 struct perf_session *session = rec->session; 1467 1468 if (perf_data__size(&rec->data) == 0) 1469 return 0; 1470 1471 /* 1472 * During this process, it'll load kernel map and replace the 1473 * dso->long_name to a real pathname it found. In this case 1474 * we prefer the vmlinux path like 1475 * /lib/modules/3.16.4/build/vmlinux 1476 * 1477 * rather than build-id path (in debug directory). 1478 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1479 */ 1480 symbol_conf.ignore_vmlinux_buildid = true; 1481 1482 /* 1483 * If --buildid-all is given, it marks all DSO regardless of hits, 1484 * so no need to process samples. But if timestamp_boundary is enabled, 1485 * it still needs to walk on all samples to get the timestamps of 1486 * first/last samples. 1487 */ 1488 if (rec->buildid_all && !rec->timestamp_boundary) 1489 rec->tool.sample = process_event_sample_stub; 1490 1491 return perf_session__process_events(session); 1492 } 1493 1494 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1495 { 1496 int err; 1497 struct perf_tool *tool = data; 1498 /* 1499 *As for guest kernel when processing subcommand record&report, 1500 *we arrange module mmap prior to guest kernel mmap and trigger 1501 *a preload dso because default guest module symbols are loaded 1502 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1503 *method is used to avoid symbol missing when the first addr is 1504 *in module instead of in guest kernel. 1505 */ 1506 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1507 machine); 1508 if (err < 0) 1509 pr_err("Couldn't record guest kernel [%d]'s reference" 1510 " relocation symbol.\n", machine->pid); 1511 1512 /* 1513 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1514 * have no _text sometimes. 1515 */ 1516 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1517 machine); 1518 if (err < 0) 1519 pr_err("Couldn't record guest kernel [%d]'s reference" 1520 " relocation symbol.\n", machine->pid); 1521 } 1522 1523 static struct perf_event_header finished_round_event = { 1524 .size = sizeof(struct perf_event_header), 1525 .type = PERF_RECORD_FINISHED_ROUND, 1526 }; 1527 1528 static struct perf_event_header finished_init_event = { 1529 .size = sizeof(struct perf_event_header), 1530 .type = PERF_RECORD_FINISHED_INIT, 1531 }; 1532 1533 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1534 { 1535 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1536 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1537 thread->mask->affinity.nbits)) { 1538 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1539 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1540 map->affinity_mask.bits, thread->mask->affinity.nbits); 1541 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1542 (cpu_set_t *)thread->mask->affinity.bits); 1543 if (verbose == 2) { 1544 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1545 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1546 } 1547 } 1548 } 1549 1550 static size_t process_comp_header(void *record, size_t increment) 1551 { 1552 struct perf_record_compressed2 *event = record; 1553 size_t size = sizeof(*event); 1554 1555 if (increment) { 1556 event->header.size += increment; 1557 return increment; 1558 } 1559 1560 event->header.type = PERF_RECORD_COMPRESSED2; 1561 event->header.size = size; 1562 1563 return size; 1564 } 1565 1566 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1567 void *dst, size_t dst_size, void *src, size_t src_size) 1568 { 1569 ssize_t compressed; 1570 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1; 1571 struct zstd_data *zstd_data = &session->zstd_data; 1572 1573 if (map && map->file) 1574 zstd_data = &map->zstd_data; 1575 1576 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1577 max_record_size, process_comp_header); 1578 if (compressed < 0) 1579 return compressed; 1580 1581 if (map && map->file) { 1582 thread->bytes_transferred += src_size; 1583 thread->bytes_compressed += compressed; 1584 } else { 1585 session->bytes_transferred += src_size; 1586 session->bytes_compressed += compressed; 1587 } 1588 1589 return compressed; 1590 } 1591 1592 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1593 bool overwrite, bool synch) 1594 { 1595 u64 bytes_written = rec->bytes_written; 1596 int i; 1597 int rc = 0; 1598 int nr_mmaps; 1599 struct mmap **maps; 1600 int trace_fd = rec->data.file.fd; 1601 off_t off = 0; 1602 1603 if (!evlist) 1604 return 0; 1605 1606 nr_mmaps = thread->nr_mmaps; 1607 maps = overwrite ? thread->overwrite_maps : thread->maps; 1608 1609 if (!maps) 1610 return 0; 1611 1612 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1613 return 0; 1614 1615 if (record__aio_enabled(rec)) 1616 off = record__aio_get_pos(trace_fd); 1617 1618 for (i = 0; i < nr_mmaps; i++) { 1619 u64 flush = 0; 1620 struct mmap *map = maps[i]; 1621 1622 if (map->core.base) { 1623 record__adjust_affinity(rec, map); 1624 if (synch) { 1625 flush = map->core.flush; 1626 map->core.flush = 1; 1627 } 1628 if (!record__aio_enabled(rec)) { 1629 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1630 if (synch) 1631 map->core.flush = flush; 1632 rc = -1; 1633 goto out; 1634 } 1635 } else { 1636 if (record__aio_push(rec, map, &off) < 0) { 1637 record__aio_set_pos(trace_fd, off); 1638 if (synch) 1639 map->core.flush = flush; 1640 rc = -1; 1641 goto out; 1642 } 1643 } 1644 if (synch) 1645 map->core.flush = flush; 1646 } 1647 1648 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1649 !rec->opts.auxtrace_sample_mode && 1650 record__auxtrace_mmap_read(rec, map) != 0) { 1651 rc = -1; 1652 goto out; 1653 } 1654 } 1655 1656 if (record__aio_enabled(rec)) 1657 record__aio_set_pos(trace_fd, off); 1658 1659 /* 1660 * Mark the round finished in case we wrote 1661 * at least one event. 1662 * 1663 * No need for round events in directory mode, 1664 * because per-cpu maps and files have data 1665 * sorted by kernel. 1666 */ 1667 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1668 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1669 1670 if (overwrite) 1671 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1672 out: 1673 return rc; 1674 } 1675 1676 static int record__mmap_read_all(struct record *rec, bool synch) 1677 { 1678 int err; 1679 1680 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1681 if (err) 1682 return err; 1683 1684 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1685 } 1686 1687 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1688 void *arg __maybe_unused) 1689 { 1690 struct perf_mmap *map = fda->priv[fd].ptr; 1691 1692 if (map) 1693 perf_mmap__put(map); 1694 } 1695 1696 static void *record__thread(void *arg) 1697 { 1698 enum thread_msg msg = THREAD_MSG__READY; 1699 bool terminate = false; 1700 struct fdarray *pollfd; 1701 int err, ctlfd_pos; 1702 1703 thread = arg; 1704 thread->tid = gettid(); 1705 1706 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1707 if (err == -1) 1708 pr_warning("threads[%d]: failed to notify on start: %s\n", 1709 thread->tid, strerror(errno)); 1710 1711 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1712 1713 pollfd = &thread->pollfd; 1714 ctlfd_pos = thread->ctlfd_pos; 1715 1716 for (;;) { 1717 unsigned long long hits = thread->samples; 1718 1719 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1720 break; 1721 1722 if (hits == thread->samples) { 1723 1724 err = fdarray__poll(pollfd, -1); 1725 /* 1726 * Propagate error, only if there's any. Ignore positive 1727 * number of returned events and interrupt error. 1728 */ 1729 if (err > 0 || (err < 0 && errno == EINTR)) 1730 err = 0; 1731 thread->waking++; 1732 1733 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1734 record__thread_munmap_filtered, NULL) == 0) 1735 break; 1736 } 1737 1738 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1739 terminate = true; 1740 close(thread->pipes.msg[0]); 1741 thread->pipes.msg[0] = -1; 1742 pollfd->entries[ctlfd_pos].fd = -1; 1743 pollfd->entries[ctlfd_pos].events = 0; 1744 } 1745 1746 pollfd->entries[ctlfd_pos].revents = 0; 1747 } 1748 record__mmap_read_all(thread->rec, true); 1749 1750 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1751 if (err == -1) 1752 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1753 thread->tid, strerror(errno)); 1754 1755 return NULL; 1756 } 1757 1758 static void record__init_features(struct record *rec) 1759 { 1760 struct perf_session *session = rec->session; 1761 int feat; 1762 1763 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1764 perf_header__set_feat(&session->header, feat); 1765 1766 if (rec->no_buildid) 1767 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1768 1769 if (!have_tracepoints(&rec->evlist->core.entries)) 1770 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1771 1772 if (!rec->opts.branch_stack) 1773 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1774 1775 if (!rec->opts.full_auxtrace) 1776 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1777 1778 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1779 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1780 1781 if (!rec->opts.use_clockid) 1782 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1783 1784 if (!record__threads_enabled(rec)) 1785 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1786 1787 if (!record__comp_enabled(rec)) 1788 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1789 1790 perf_header__clear_feat(&session->header, HEADER_STAT); 1791 } 1792 1793 static void 1794 record__finish_output(struct record *rec) 1795 { 1796 int i; 1797 struct perf_data *data = &rec->data; 1798 int fd = perf_data__fd(data); 1799 1800 if (data->is_pipe) { 1801 /* Just to display approx. size */ 1802 data->file.size = rec->bytes_written; 1803 return; 1804 } 1805 1806 rec->session->header.data_size += rec->bytes_written; 1807 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1808 if (record__threads_enabled(rec)) { 1809 for (i = 0; i < data->dir.nr; i++) 1810 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1811 } 1812 1813 if (!rec->no_buildid) { 1814 process_buildids(rec); 1815 1816 if (rec->buildid_all) 1817 perf_session__dsos_hit_all(rec->session); 1818 } 1819 perf_session__write_header(rec->session, rec->evlist, fd, true); 1820 1821 return; 1822 } 1823 1824 static int record__synthesize_workload(struct record *rec, bool tail) 1825 { 1826 int err; 1827 struct perf_thread_map *thread_map; 1828 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1829 1830 if (rec->opts.tail_synthesize != tail) 1831 return 0; 1832 1833 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1834 if (thread_map == NULL) 1835 return -1; 1836 1837 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1838 process_synthesized_event, 1839 &rec->session->machines.host, 1840 needs_mmap, 1841 rec->opts.sample_address); 1842 perf_thread_map__put(thread_map); 1843 return err; 1844 } 1845 1846 static int write_finished_init(struct record *rec, bool tail) 1847 { 1848 if (rec->opts.tail_synthesize != tail) 1849 return 0; 1850 1851 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1852 } 1853 1854 static int record__synthesize(struct record *rec, bool tail); 1855 1856 static int 1857 record__switch_output(struct record *rec, bool at_exit) 1858 { 1859 struct perf_data *data = &rec->data; 1860 char *new_filename = NULL; 1861 int fd, err; 1862 1863 /* Same Size: "2015122520103046"*/ 1864 char timestamp[] = "InvalidTimestamp"; 1865 1866 record__aio_mmap_read_sync(rec); 1867 1868 write_finished_init(rec, true); 1869 1870 record__synthesize(rec, true); 1871 if (target__none(&rec->opts.target)) 1872 record__synthesize_workload(rec, true); 1873 1874 rec->samples = 0; 1875 record__finish_output(rec); 1876 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1877 if (err) { 1878 pr_err("Failed to get current timestamp\n"); 1879 return -EINVAL; 1880 } 1881 1882 fd = perf_data__switch(data, timestamp, 1883 rec->session->header.data_offset, 1884 at_exit, &new_filename); 1885 if (fd >= 0 && !at_exit) { 1886 rec->bytes_written = 0; 1887 rec->session->header.data_size = 0; 1888 } 1889 1890 if (!quiet) { 1891 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1892 data->path, timestamp); 1893 } 1894 1895 if (rec->switch_output.num_files) { 1896 int n = rec->switch_output.cur_file + 1; 1897 1898 if (n >= rec->switch_output.num_files) 1899 n = 0; 1900 rec->switch_output.cur_file = n; 1901 if (rec->switch_output.filenames[n]) { 1902 remove(rec->switch_output.filenames[n]); 1903 zfree(&rec->switch_output.filenames[n]); 1904 } 1905 rec->switch_output.filenames[n] = new_filename; 1906 } else { 1907 free(new_filename); 1908 } 1909 1910 /* Output tracking events */ 1911 if (!at_exit) { 1912 record__synthesize(rec, false); 1913 1914 /* 1915 * In 'perf record --switch-output' without -a, 1916 * record__synthesize() in record__switch_output() won't 1917 * generate tracking events because there's no thread_map 1918 * in evlist. Which causes newly created perf.data doesn't 1919 * contain map and comm information. 1920 * Create a fake thread_map and directly call 1921 * perf_event__synthesize_thread_map() for those events. 1922 */ 1923 if (target__none(&rec->opts.target)) 1924 record__synthesize_workload(rec, false); 1925 write_finished_init(rec, false); 1926 } 1927 return fd; 1928 } 1929 1930 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1931 struct perf_record_lost_samples *lost, 1932 int cpu_idx, int thread_idx, u64 lost_count, 1933 u16 misc_flag) 1934 { 1935 struct perf_sample_id *sid; 1936 struct perf_sample sample; 1937 int id_hdr_size; 1938 1939 perf_sample__init(&sample, /*all=*/true); 1940 lost->lost = lost_count; 1941 if (evsel->core.ids) { 1942 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1943 sample.id = sid->id; 1944 } 1945 1946 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1947 evsel->core.attr.sample_type, &sample); 1948 lost->header.size = sizeof(*lost) + id_hdr_size; 1949 lost->header.misc = misc_flag; 1950 record__write(rec, NULL, lost, lost->header.size); 1951 perf_sample__exit(&sample); 1952 } 1953 1954 static void record__read_lost_samples(struct record *rec) 1955 { 1956 struct perf_session *session = rec->session; 1957 struct perf_record_lost_samples_and_ids lost; 1958 struct evsel *evsel; 1959 1960 /* there was an error during record__open */ 1961 if (session->evlist == NULL) 1962 return; 1963 1964 evlist__for_each_entry(session->evlist, evsel) { 1965 struct xyarray *xy = evsel->core.sample_id; 1966 u64 lost_count; 1967 1968 if (xy == NULL || evsel->core.fd == NULL) 1969 continue; 1970 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1971 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1972 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1973 continue; 1974 } 1975 1976 for (int x = 0; x < xyarray__max_x(xy); x++) { 1977 for (int y = 0; y < xyarray__max_y(xy); y++) { 1978 struct perf_counts_values count; 1979 1980 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1981 pr_debug("read LOST count failed\n"); 1982 return; 1983 } 1984 1985 if (count.lost) { 1986 memset(&lost, 0, sizeof(lost)); 1987 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1988 __record__save_lost_samples(rec, evsel, &lost.lost, 1989 x, y, count.lost, 0); 1990 } 1991 } 1992 } 1993 1994 lost_count = perf_bpf_filter__lost_count(evsel); 1995 if (lost_count) { 1996 memset(&lost, 0, sizeof(lost)); 1997 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1998 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 1999 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 2000 } 2001 } 2002 } 2003 2004 static volatile sig_atomic_t workload_exec_errno; 2005 2006 /* 2007 * evlist__prepare_workload will send a SIGUSR1 2008 * if the fork fails, since we asked by setting its 2009 * want_signal to true. 2010 */ 2011 static void workload_exec_failed_signal(int signo __maybe_unused, 2012 siginfo_t *info, 2013 void *ucontext __maybe_unused) 2014 { 2015 workload_exec_errno = info->si_value.sival_int; 2016 done = 1; 2017 child_finished = 1; 2018 } 2019 2020 static void snapshot_sig_handler(int sig); 2021 static void alarm_sig_handler(int sig); 2022 2023 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2024 { 2025 if (evlist) { 2026 if (evlist->mmap && evlist->mmap[0].core.base) 2027 return evlist->mmap[0].core.base; 2028 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2029 return evlist->overwrite_mmap[0].core.base; 2030 } 2031 return NULL; 2032 } 2033 2034 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2035 { 2036 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2037 if (pc) 2038 return pc; 2039 return NULL; 2040 } 2041 2042 static int record__synthesize(struct record *rec, bool tail) 2043 { 2044 struct perf_session *session = rec->session; 2045 struct machine *machine = &session->machines.host; 2046 struct perf_data *data = &rec->data; 2047 struct record_opts *opts = &rec->opts; 2048 struct perf_tool *tool = &rec->tool; 2049 int err = 0; 2050 event_op f = process_synthesized_event; 2051 2052 if (rec->opts.tail_synthesize != tail) 2053 return 0; 2054 2055 if (data->is_pipe) { 2056 err = perf_event__synthesize_for_pipe(tool, session, data, 2057 process_synthesized_event); 2058 if (err < 0) 2059 goto out; 2060 2061 rec->bytes_written += err; 2062 } 2063 2064 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2065 process_synthesized_event, machine); 2066 if (err) 2067 goto out; 2068 2069 /* Synthesize id_index before auxtrace_info */ 2070 err = perf_event__synthesize_id_index(tool, 2071 process_synthesized_event, 2072 session->evlist, machine); 2073 if (err) 2074 goto out; 2075 2076 if (rec->opts.full_auxtrace) { 2077 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2078 session, process_synthesized_event); 2079 if (err) 2080 goto out; 2081 } 2082 2083 if (!evlist__exclude_kernel(rec->evlist)) { 2084 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2085 machine); 2086 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2087 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2088 "Check /proc/kallsyms permission or run as root.\n"); 2089 2090 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2091 machine); 2092 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2093 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2094 "Check /proc/modules permission or run as root.\n"); 2095 } 2096 2097 if (perf_guest) { 2098 machines__process_guests(&session->machines, 2099 perf_event__synthesize_guest_os, tool); 2100 } 2101 2102 err = perf_event__synthesize_extra_attr(&rec->tool, 2103 rec->evlist, 2104 process_synthesized_event, 2105 data->is_pipe); 2106 if (err) 2107 goto out; 2108 2109 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2110 process_synthesized_event, 2111 NULL); 2112 if (err < 0) { 2113 pr_err("Couldn't synthesize thread map.\n"); 2114 return err; 2115 } 2116 2117 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2118 process_synthesized_event, NULL); 2119 if (err < 0) { 2120 pr_err("Couldn't synthesize cpu map.\n"); 2121 return err; 2122 } 2123 2124 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2125 machine, opts); 2126 if (err < 0) { 2127 pr_warning("Couldn't synthesize bpf events.\n"); 2128 err = 0; 2129 } 2130 2131 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2132 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2133 machine); 2134 if (err < 0) { 2135 pr_warning("Couldn't synthesize cgroup events.\n"); 2136 err = 0; 2137 } 2138 } 2139 2140 if (rec->opts.nr_threads_synthesize > 1) { 2141 mutex_init(&synth_lock); 2142 perf_set_multithreaded(); 2143 f = process_locked_synthesized_event; 2144 } 2145 2146 if (rec->opts.synth & PERF_SYNTH_TASK) { 2147 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2148 2149 err = __machine__synthesize_threads(machine, tool, &opts->target, 2150 rec->evlist->core.threads, 2151 f, needs_mmap, opts->sample_address, 2152 rec->opts.nr_threads_synthesize); 2153 } 2154 2155 if (rec->opts.nr_threads_synthesize > 1) { 2156 perf_set_singlethreaded(); 2157 mutex_destroy(&synth_lock); 2158 } 2159 2160 out: 2161 return err; 2162 } 2163 2164 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2165 { 2166 struct record *rec = data; 2167 pthread_kill(rec->thread_id, SIGUSR2); 2168 return 0; 2169 } 2170 2171 static int record__setup_sb_evlist(struct record *rec) 2172 { 2173 struct record_opts *opts = &rec->opts; 2174 2175 if (rec->sb_evlist != NULL) { 2176 /* 2177 * We get here if --switch-output-event populated the 2178 * sb_evlist, so associate a callback that will send a SIGUSR2 2179 * to the main thread. 2180 */ 2181 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2182 rec->thread_id = pthread_self(); 2183 } 2184 #ifdef HAVE_LIBBPF_SUPPORT 2185 if (!opts->no_bpf_event) { 2186 if (rec->sb_evlist == NULL) { 2187 rec->sb_evlist = evlist__new(); 2188 2189 if (rec->sb_evlist == NULL) { 2190 pr_err("Couldn't create side band evlist.\n."); 2191 return -1; 2192 } 2193 } 2194 2195 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2196 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2197 return -1; 2198 } 2199 } 2200 #endif 2201 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2202 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2203 opts->no_bpf_event = true; 2204 } 2205 2206 return 0; 2207 } 2208 2209 static int record__init_clock(struct record *rec) 2210 { 2211 struct perf_session *session = rec->session; 2212 struct timespec ref_clockid; 2213 struct timeval ref_tod; 2214 u64 ref; 2215 2216 if (!rec->opts.use_clockid) 2217 return 0; 2218 2219 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2220 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2221 2222 session->header.env.clock.clockid = rec->opts.clockid; 2223 2224 if (gettimeofday(&ref_tod, NULL) != 0) { 2225 pr_err("gettimeofday failed, cannot set reference time.\n"); 2226 return -1; 2227 } 2228 2229 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2230 pr_err("clock_gettime failed, cannot set reference time.\n"); 2231 return -1; 2232 } 2233 2234 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2235 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2236 2237 session->header.env.clock.tod_ns = ref; 2238 2239 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2240 (u64) ref_clockid.tv_nsec; 2241 2242 session->header.env.clock.clockid_ns = ref; 2243 return 0; 2244 } 2245 2246 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2247 { 2248 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2249 trigger_hit(&auxtrace_snapshot_trigger); 2250 auxtrace_record__snapshot_started = 1; 2251 if (auxtrace_record__snapshot_start(rec->itr)) 2252 trigger_error(&auxtrace_snapshot_trigger); 2253 } 2254 } 2255 2256 static int record__terminate_thread(struct record_thread *thread_data) 2257 { 2258 int err; 2259 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2260 pid_t tid = thread_data->tid; 2261 2262 close(thread_data->pipes.msg[1]); 2263 thread_data->pipes.msg[1] = -1; 2264 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2265 if (err > 0) 2266 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2267 else 2268 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2269 thread->tid, tid); 2270 2271 return 0; 2272 } 2273 2274 static int record__start_threads(struct record *rec) 2275 { 2276 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2277 struct record_thread *thread_data = rec->thread_data; 2278 sigset_t full, mask; 2279 pthread_t handle; 2280 pthread_attr_t attrs; 2281 2282 thread = &thread_data[0]; 2283 2284 if (!record__threads_enabled(rec)) 2285 return 0; 2286 2287 sigfillset(&full); 2288 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2289 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2290 return -1; 2291 } 2292 2293 pthread_attr_init(&attrs); 2294 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2295 2296 for (t = 1; t < nr_threads; t++) { 2297 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2298 2299 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2300 pthread_attr_setaffinity_np(&attrs, 2301 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2302 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2303 #endif 2304 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2305 for (tt = 1; tt < t; tt++) 2306 record__terminate_thread(&thread_data[t]); 2307 pr_err("Failed to start threads: %s\n", strerror(errno)); 2308 ret = -1; 2309 goto out_err; 2310 } 2311 2312 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2313 if (err > 0) 2314 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2315 thread_msg_tags[msg]); 2316 else 2317 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2318 thread->tid, rec->thread_data[t].tid); 2319 } 2320 2321 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2322 (cpu_set_t *)thread->mask->affinity.bits); 2323 2324 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2325 2326 out_err: 2327 pthread_attr_destroy(&attrs); 2328 2329 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2330 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2331 ret = -1; 2332 } 2333 2334 return ret; 2335 } 2336 2337 static int record__stop_threads(struct record *rec) 2338 { 2339 int t; 2340 struct record_thread *thread_data = rec->thread_data; 2341 2342 for (t = 1; t < rec->nr_threads; t++) 2343 record__terminate_thread(&thread_data[t]); 2344 2345 for (t = 0; t < rec->nr_threads; t++) { 2346 rec->samples += thread_data[t].samples; 2347 if (!record__threads_enabled(rec)) 2348 continue; 2349 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2350 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2351 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2352 thread_data[t].samples, thread_data[t].waking); 2353 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2354 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2355 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2356 else 2357 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2358 } 2359 2360 return 0; 2361 } 2362 2363 static unsigned long record__waking(struct record *rec) 2364 { 2365 int t; 2366 unsigned long waking = 0; 2367 struct record_thread *thread_data = rec->thread_data; 2368 2369 for (t = 0; t < rec->nr_threads; t++) 2370 waking += thread_data[t].waking; 2371 2372 return waking; 2373 } 2374 2375 static int __cmd_record(struct record *rec, int argc, const char **argv) 2376 { 2377 int err; 2378 int status = 0; 2379 const bool forks = argc > 0; 2380 struct perf_tool *tool = &rec->tool; 2381 struct record_opts *opts = &rec->opts; 2382 struct perf_data *data = &rec->data; 2383 struct perf_session *session; 2384 bool disabled = false, draining = false; 2385 int fd; 2386 float ratio = 0; 2387 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2388 2389 atexit(record__sig_exit); 2390 signal(SIGCHLD, sig_handler); 2391 signal(SIGINT, sig_handler); 2392 signal(SIGTERM, sig_handler); 2393 signal(SIGSEGV, sigsegv_handler); 2394 2395 if (rec->opts.record_cgroup) { 2396 #ifndef HAVE_FILE_HANDLE 2397 pr_err("cgroup tracking is not supported\n"); 2398 return -1; 2399 #endif 2400 } 2401 2402 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2403 signal(SIGUSR2, snapshot_sig_handler); 2404 if (rec->opts.auxtrace_snapshot_mode) 2405 trigger_on(&auxtrace_snapshot_trigger); 2406 if (rec->switch_output.enabled) 2407 trigger_on(&switch_output_trigger); 2408 } else { 2409 signal(SIGUSR2, SIG_IGN); 2410 } 2411 2412 perf_tool__init(tool, /*ordered_events=*/true); 2413 tool->sample = process_sample_event; 2414 tool->fork = perf_event__process_fork; 2415 tool->exit = perf_event__process_exit; 2416 tool->comm = perf_event__process_comm; 2417 tool->namespaces = perf_event__process_namespaces; 2418 tool->mmap = build_id__process_mmap; 2419 tool->mmap2 = build_id__process_mmap2; 2420 tool->itrace_start = process_timestamp_boundary; 2421 tool->aux = process_timestamp_boundary; 2422 tool->namespace_events = rec->opts.record_namespaces; 2423 tool->cgroup_events = rec->opts.record_cgroup; 2424 session = perf_session__new(data, tool); 2425 if (IS_ERR(session)) { 2426 pr_err("Perf session creation failed.\n"); 2427 return PTR_ERR(session); 2428 } 2429 2430 if (record__threads_enabled(rec)) { 2431 if (perf_data__is_pipe(&rec->data)) { 2432 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2433 return -1; 2434 } 2435 if (rec->opts.full_auxtrace) { 2436 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2437 return -1; 2438 } 2439 } 2440 2441 fd = perf_data__fd(data); 2442 rec->session = session; 2443 2444 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2445 pr_err("Compression initialization failed.\n"); 2446 return -1; 2447 } 2448 #ifdef HAVE_EVENTFD_SUPPORT 2449 done_fd = eventfd(0, EFD_NONBLOCK); 2450 if (done_fd < 0) { 2451 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2452 status = -1; 2453 goto out_delete_session; 2454 } 2455 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2456 if (err < 0) { 2457 pr_err("Failed to add wakeup eventfd to poll list\n"); 2458 status = err; 2459 goto out_delete_session; 2460 } 2461 #endif // HAVE_EVENTFD_SUPPORT 2462 2463 session->header.env.comp_type = PERF_COMP_ZSTD; 2464 session->header.env.comp_level = rec->opts.comp_level; 2465 2466 if (rec->opts.kcore && 2467 !record__kcore_readable(&session->machines.host)) { 2468 pr_err("ERROR: kcore is not readable.\n"); 2469 return -1; 2470 } 2471 2472 if (record__init_clock(rec)) 2473 return -1; 2474 2475 record__init_features(rec); 2476 2477 if (forks) { 2478 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2479 workload_exec_failed_signal); 2480 if (err < 0) { 2481 pr_err("Couldn't run the workload!\n"); 2482 status = err; 2483 goto out_delete_session; 2484 } 2485 } 2486 2487 /* 2488 * If we have just single event and are sending data 2489 * through pipe, we need to force the ids allocation, 2490 * because we synthesize event name through the pipe 2491 * and need the id for that. 2492 */ 2493 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2494 rec->opts.sample_id = true; 2495 2496 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2497 rec->timestamp_filename = false; 2498 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2499 } 2500 2501 /* 2502 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE 2503 * and hybrid_merge is false. 2504 */ 2505 evlist__uniquify_evsel_names(rec->evlist, &stat_config); 2506 2507 evlist__config(rec->evlist, opts, &callchain_param); 2508 2509 /* Debug message used by test scripts */ 2510 pr_debug3("perf record opening and mmapping events\n"); 2511 if (record__open(rec) != 0) { 2512 err = -1; 2513 goto out_free_threads; 2514 } 2515 /* Debug message used by test scripts */ 2516 pr_debug3("perf record done opening and mmapping events\n"); 2517 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2518 2519 if (rec->opts.kcore) { 2520 err = record__kcore_copy(&session->machines.host, data); 2521 if (err) { 2522 pr_err("ERROR: Failed to copy kcore\n"); 2523 goto out_free_threads; 2524 } 2525 } 2526 2527 /* 2528 * Normally perf_session__new would do this, but it doesn't have the 2529 * evlist. 2530 */ 2531 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2532 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2533 rec->tool.ordered_events = false; 2534 } 2535 2536 if (evlist__nr_groups(rec->evlist) == 0) 2537 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2538 2539 if (data->is_pipe) { 2540 err = perf_header__write_pipe(fd); 2541 if (err < 0) 2542 goto out_free_threads; 2543 } else { 2544 err = perf_session__write_header(session, rec->evlist, fd, false); 2545 if (err < 0) 2546 goto out_free_threads; 2547 } 2548 2549 err = -1; 2550 if (!rec->no_buildid 2551 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2552 pr_err("Couldn't generate buildids. " 2553 "Use --no-buildid to profile anyway.\n"); 2554 goto out_free_threads; 2555 } 2556 2557 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2558 opts->no_bpf_event = true; 2559 2560 err = record__setup_sb_evlist(rec); 2561 if (err) 2562 goto out_free_threads; 2563 2564 err = record__synthesize(rec, false); 2565 if (err < 0) 2566 goto out_free_threads; 2567 2568 if (rec->realtime_prio) { 2569 struct sched_param param; 2570 2571 param.sched_priority = rec->realtime_prio; 2572 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2573 pr_err("Could not set realtime priority.\n"); 2574 err = -1; 2575 goto out_free_threads; 2576 } 2577 } 2578 2579 if (record__start_threads(rec)) 2580 goto out_free_threads; 2581 2582 /* 2583 * When perf is starting the traced process, all the events 2584 * (apart from group members) have enable_on_exec=1 set, 2585 * so don't spoil it by prematurely enabling them. 2586 */ 2587 if (!target__none(&opts->target) && !opts->target.initial_delay) 2588 evlist__enable(rec->evlist); 2589 2590 /* 2591 * offcpu-time does not call execve, so enable_on_exe wouldn't work 2592 * when recording a workload, do it manually 2593 */ 2594 if (rec->off_cpu) 2595 evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT); 2596 2597 /* 2598 * Let the child rip 2599 */ 2600 if (forks) { 2601 struct machine *machine = &session->machines.host; 2602 union perf_event *event; 2603 pid_t tgid; 2604 2605 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2606 if (event == NULL) { 2607 err = -ENOMEM; 2608 goto out_child; 2609 } 2610 2611 /* 2612 * Some H/W events are generated before COMM event 2613 * which is emitted during exec(), so perf script 2614 * cannot see a correct process name for those events. 2615 * Synthesize COMM event to prevent it. 2616 */ 2617 tgid = perf_event__synthesize_comm(tool, event, 2618 rec->evlist->workload.pid, 2619 process_synthesized_event, 2620 machine); 2621 free(event); 2622 2623 if (tgid == -1) 2624 goto out_child; 2625 2626 event = malloc(sizeof(event->namespaces) + 2627 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2628 machine->id_hdr_size); 2629 if (event == NULL) { 2630 err = -ENOMEM; 2631 goto out_child; 2632 } 2633 2634 /* 2635 * Synthesize NAMESPACES event for the command specified. 2636 */ 2637 perf_event__synthesize_namespaces(tool, event, 2638 rec->evlist->workload.pid, 2639 tgid, process_synthesized_event, 2640 machine); 2641 free(event); 2642 2643 evlist__start_workload(rec->evlist); 2644 } 2645 2646 if (opts->target.initial_delay) { 2647 pr_info(EVLIST_DISABLED_MSG); 2648 if (opts->target.initial_delay > 0) { 2649 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2650 evlist__enable(rec->evlist); 2651 pr_info(EVLIST_ENABLED_MSG); 2652 } 2653 } 2654 2655 err = event_enable_timer__start(rec->evlist->eet); 2656 if (err) 2657 goto out_child; 2658 2659 /* Debug message used by test scripts */ 2660 pr_debug3("perf record has started\n"); 2661 fflush(stderr); 2662 2663 trigger_ready(&auxtrace_snapshot_trigger); 2664 trigger_ready(&switch_output_trigger); 2665 perf_hooks__invoke_record_start(); 2666 2667 /* 2668 * Must write FINISHED_INIT so it will be seen after all other 2669 * synthesized user events, but before any regular events. 2670 */ 2671 err = write_finished_init(rec, false); 2672 if (err < 0) 2673 goto out_child; 2674 2675 for (;;) { 2676 unsigned long long hits = thread->samples; 2677 2678 /* 2679 * rec->evlist->bkw_mmap_state is possible to be 2680 * BKW_MMAP_EMPTY here: when done == true and 2681 * hits != rec->samples in previous round. 2682 * 2683 * evlist__toggle_bkw_mmap ensure we never 2684 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2685 */ 2686 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2687 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2688 2689 if (record__mmap_read_all(rec, false) < 0) { 2690 trigger_error(&auxtrace_snapshot_trigger); 2691 trigger_error(&switch_output_trigger); 2692 err = -1; 2693 goto out_child; 2694 } 2695 2696 if (auxtrace_record__snapshot_started) { 2697 auxtrace_record__snapshot_started = 0; 2698 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2699 record__read_auxtrace_snapshot(rec, false); 2700 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2701 pr_err("AUX area tracing snapshot failed\n"); 2702 err = -1; 2703 goto out_child; 2704 } 2705 } 2706 2707 if (trigger_is_hit(&switch_output_trigger)) { 2708 /* 2709 * If switch_output_trigger is hit, the data in 2710 * overwritable ring buffer should have been collected, 2711 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2712 * 2713 * If SIGUSR2 raise after or during record__mmap_read_all(), 2714 * record__mmap_read_all() didn't collect data from 2715 * overwritable ring buffer. Read again. 2716 */ 2717 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2718 continue; 2719 trigger_ready(&switch_output_trigger); 2720 2721 /* 2722 * Reenable events in overwrite ring buffer after 2723 * record__mmap_read_all(): we should have collected 2724 * data from it. 2725 */ 2726 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2727 2728 if (!quiet) 2729 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2730 record__waking(rec)); 2731 thread->waking = 0; 2732 fd = record__switch_output(rec, false); 2733 if (fd < 0) { 2734 pr_err("Failed to switch to new file\n"); 2735 trigger_error(&switch_output_trigger); 2736 err = fd; 2737 goto out_child; 2738 } 2739 2740 /* re-arm the alarm */ 2741 if (rec->switch_output.time) 2742 alarm(rec->switch_output.time); 2743 } 2744 2745 if (hits == thread->samples) { 2746 if (done || draining) 2747 break; 2748 err = fdarray__poll(&thread->pollfd, -1); 2749 /* 2750 * Propagate error, only if there's any. Ignore positive 2751 * number of returned events and interrupt error. 2752 */ 2753 if (err > 0 || (err < 0 && errno == EINTR)) 2754 err = 0; 2755 thread->waking++; 2756 2757 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2758 record__thread_munmap_filtered, NULL) == 0) 2759 draining = true; 2760 2761 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2762 if (err) 2763 goto out_child; 2764 } 2765 2766 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2767 switch (cmd) { 2768 case EVLIST_CTL_CMD_SNAPSHOT: 2769 hit_auxtrace_snapshot_trigger(rec); 2770 evlist__ctlfd_ack(rec->evlist); 2771 break; 2772 case EVLIST_CTL_CMD_STOP: 2773 done = 1; 2774 break; 2775 case EVLIST_CTL_CMD_ACK: 2776 case EVLIST_CTL_CMD_UNSUPPORTED: 2777 case EVLIST_CTL_CMD_ENABLE: 2778 case EVLIST_CTL_CMD_DISABLE: 2779 case EVLIST_CTL_CMD_EVLIST: 2780 case EVLIST_CTL_CMD_PING: 2781 default: 2782 break; 2783 } 2784 } 2785 2786 err = event_enable_timer__process(rec->evlist->eet); 2787 if (err < 0) 2788 goto out_child; 2789 if (err) { 2790 err = 0; 2791 done = 1; 2792 } 2793 2794 /* 2795 * When perf is starting the traced process, at the end events 2796 * die with the process and we wait for that. Thus no need to 2797 * disable events in this case. 2798 */ 2799 if (done && !disabled && !target__none(&opts->target)) { 2800 trigger_off(&auxtrace_snapshot_trigger); 2801 evlist__disable(rec->evlist); 2802 disabled = true; 2803 } 2804 } 2805 2806 trigger_off(&auxtrace_snapshot_trigger); 2807 trigger_off(&switch_output_trigger); 2808 2809 if (opts->auxtrace_snapshot_on_exit) 2810 record__auxtrace_snapshot_exit(rec); 2811 2812 if (forks && workload_exec_errno) { 2813 char msg[STRERR_BUFSIZE]; 2814 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2815 struct strbuf sb = STRBUF_INIT; 2816 2817 evlist__format_evsels(rec->evlist, &sb, 2048); 2818 2819 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2820 sb.buf, argv[0], emsg); 2821 strbuf_release(&sb); 2822 err = -1; 2823 goto out_child; 2824 } 2825 2826 if (!quiet) 2827 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2828 record__waking(rec)); 2829 2830 write_finished_init(rec, true); 2831 2832 if (target__none(&rec->opts.target)) 2833 record__synthesize_workload(rec, true); 2834 2835 out_child: 2836 record__stop_threads(rec); 2837 record__mmap_read_all(rec, true); 2838 out_free_threads: 2839 record__free_thread_data(rec); 2840 evlist__finalize_ctlfd(rec->evlist); 2841 record__aio_mmap_read_sync(rec); 2842 2843 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2844 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2845 session->header.env.comp_ratio = ratio + 0.5; 2846 } 2847 2848 if (forks) { 2849 int exit_status; 2850 2851 if (!child_finished) 2852 kill(rec->evlist->workload.pid, SIGTERM); 2853 2854 wait(&exit_status); 2855 2856 if (err < 0) 2857 status = err; 2858 else if (WIFEXITED(exit_status)) 2859 status = WEXITSTATUS(exit_status); 2860 else if (WIFSIGNALED(exit_status)) 2861 signr = WTERMSIG(exit_status); 2862 } else 2863 status = err; 2864 2865 if (rec->off_cpu) 2866 rec->bytes_written += off_cpu_write(rec->session); 2867 2868 record__read_lost_samples(rec); 2869 record__synthesize(rec, true); 2870 /* this will be recalculated during process_buildids() */ 2871 rec->samples = 0; 2872 2873 if (!err) { 2874 if (!rec->timestamp_filename) { 2875 record__finish_output(rec); 2876 } else { 2877 fd = record__switch_output(rec, true); 2878 if (fd < 0) { 2879 status = fd; 2880 goto out_delete_session; 2881 } 2882 } 2883 } 2884 2885 perf_hooks__invoke_record_end(); 2886 2887 if (!err && !quiet) { 2888 char samples[128]; 2889 const char *postfix = rec->timestamp_filename ? 2890 ".<timestamp>" : ""; 2891 2892 if (rec->samples && !rec->opts.full_auxtrace) 2893 scnprintf(samples, sizeof(samples), 2894 " (%" PRIu64 " samples)", rec->samples); 2895 else 2896 samples[0] = '\0'; 2897 2898 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2899 perf_data__size(data) / 1024.0 / 1024.0, 2900 data->path, postfix, samples); 2901 if (ratio) { 2902 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2903 rec->session->bytes_transferred / 1024.0 / 1024.0, 2904 ratio); 2905 } 2906 fprintf(stderr, " ]\n"); 2907 } 2908 2909 out_delete_session: 2910 #ifdef HAVE_EVENTFD_SUPPORT 2911 if (done_fd >= 0) { 2912 fd = done_fd; 2913 done_fd = -1; 2914 2915 close(fd); 2916 } 2917 #endif 2918 zstd_fini(&session->zstd_data); 2919 if (!opts->no_bpf_event) 2920 evlist__stop_sb_thread(rec->sb_evlist); 2921 2922 perf_session__delete(session); 2923 return status; 2924 } 2925 2926 static void callchain_debug(struct callchain_param *callchain) 2927 { 2928 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2929 2930 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2931 2932 if (callchain->record_mode == CALLCHAIN_DWARF) 2933 pr_debug("callchain: stack dump size %d\n", 2934 callchain->dump_size); 2935 } 2936 2937 int record_opts__parse_callchain(struct record_opts *record, 2938 struct callchain_param *callchain, 2939 const char *arg, bool unset) 2940 { 2941 int ret; 2942 callchain->enabled = !unset; 2943 2944 /* --no-call-graph */ 2945 if (unset) { 2946 callchain->record_mode = CALLCHAIN_NONE; 2947 pr_debug("callchain: disabled\n"); 2948 return 0; 2949 } 2950 2951 ret = parse_callchain_record_opt(arg, callchain); 2952 if (!ret) { 2953 /* Enable data address sampling for DWARF unwind. */ 2954 if (callchain->record_mode == CALLCHAIN_DWARF) 2955 record->sample_address = true; 2956 callchain_debug(callchain); 2957 } 2958 2959 return ret; 2960 } 2961 2962 int record_parse_callchain_opt(const struct option *opt, 2963 const char *arg, 2964 int unset) 2965 { 2966 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2967 } 2968 2969 int record_callchain_opt(const struct option *opt, 2970 const char *arg __maybe_unused, 2971 int unset __maybe_unused) 2972 { 2973 struct callchain_param *callchain = opt->value; 2974 2975 callchain->enabled = true; 2976 2977 if (callchain->record_mode == CALLCHAIN_NONE) 2978 callchain->record_mode = CALLCHAIN_FP; 2979 2980 callchain_debug(callchain); 2981 return 0; 2982 } 2983 2984 static int perf_record_config(const char *var, const char *value, void *cb) 2985 { 2986 struct record *rec = cb; 2987 2988 if (!strcmp(var, "record.build-id")) { 2989 if (!strcmp(value, "cache")) 2990 rec->no_buildid_cache = false; 2991 else if (!strcmp(value, "no-cache")) 2992 rec->no_buildid_cache = true; 2993 else if (!strcmp(value, "skip")) 2994 rec->no_buildid = true; 2995 else if (!strcmp(value, "mmap")) 2996 rec->buildid_mmap = true; 2997 else 2998 return -1; 2999 return 0; 3000 } 3001 if (!strcmp(var, "record.call-graph")) { 3002 var = "call-graph.record-mode"; 3003 return perf_default_config(var, value, cb); 3004 } 3005 #ifdef HAVE_AIO_SUPPORT 3006 if (!strcmp(var, "record.aio")) { 3007 rec->opts.nr_cblocks = strtol(value, NULL, 0); 3008 if (!rec->opts.nr_cblocks) 3009 rec->opts.nr_cblocks = nr_cblocks_default; 3010 } 3011 #endif 3012 if (!strcmp(var, "record.debuginfod")) { 3013 rec->debuginfod.urls = strdup(value); 3014 if (!rec->debuginfod.urls) 3015 return -ENOMEM; 3016 rec->debuginfod.set = true; 3017 } 3018 3019 return 0; 3020 } 3021 3022 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 3023 { 3024 struct record *rec = (struct record *)opt->value; 3025 3026 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 3027 } 3028 3029 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3030 { 3031 struct record_opts *opts = (struct record_opts *)opt->value; 3032 3033 if (unset || !str) 3034 return 0; 3035 3036 if (!strcasecmp(str, "node")) 3037 opts->affinity = PERF_AFFINITY_NODE; 3038 else if (!strcasecmp(str, "cpu")) 3039 opts->affinity = PERF_AFFINITY_CPU; 3040 3041 return 0; 3042 } 3043 3044 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3045 { 3046 mask->nbits = nr_bits; 3047 mask->bits = bitmap_zalloc(mask->nbits); 3048 if (!mask->bits) 3049 return -ENOMEM; 3050 3051 return 0; 3052 } 3053 3054 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3055 { 3056 bitmap_free(mask->bits); 3057 mask->nbits = 0; 3058 } 3059 3060 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3061 { 3062 int ret; 3063 3064 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3065 if (ret) { 3066 mask->affinity.bits = NULL; 3067 return ret; 3068 } 3069 3070 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3071 if (ret) { 3072 record__mmap_cpu_mask_free(&mask->maps); 3073 mask->maps.bits = NULL; 3074 } 3075 3076 return ret; 3077 } 3078 3079 static void record__thread_mask_free(struct thread_mask *mask) 3080 { 3081 record__mmap_cpu_mask_free(&mask->maps); 3082 record__mmap_cpu_mask_free(&mask->affinity); 3083 } 3084 3085 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3086 { 3087 int s; 3088 struct record_opts *opts = opt->value; 3089 3090 if (unset || !str || !strlen(str)) { 3091 opts->threads_spec = THREAD_SPEC__CPU; 3092 } else { 3093 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3094 if (s == THREAD_SPEC__USER) { 3095 opts->threads_user_spec = strdup(str); 3096 if (!opts->threads_user_spec) 3097 return -ENOMEM; 3098 opts->threads_spec = THREAD_SPEC__USER; 3099 break; 3100 } 3101 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3102 opts->threads_spec = s; 3103 break; 3104 } 3105 } 3106 } 3107 3108 if (opts->threads_spec == THREAD_SPEC__USER) 3109 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3110 else 3111 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3112 3113 return 0; 3114 } 3115 3116 static int parse_output_max_size(const struct option *opt, 3117 const char *str, int unset) 3118 { 3119 unsigned long *s = (unsigned long *)opt->value; 3120 static struct parse_tag tags_size[] = { 3121 { .tag = 'B', .mult = 1 }, 3122 { .tag = 'K', .mult = 1 << 10 }, 3123 { .tag = 'M', .mult = 1 << 20 }, 3124 { .tag = 'G', .mult = 1 << 30 }, 3125 { .tag = 0 }, 3126 }; 3127 unsigned long val; 3128 3129 if (unset) { 3130 *s = 0; 3131 return 0; 3132 } 3133 3134 val = parse_tag_value(str, tags_size); 3135 if (val != (unsigned long) -1) { 3136 *s = val; 3137 return 0; 3138 } 3139 3140 return -1; 3141 } 3142 3143 static int record__parse_mmap_pages(const struct option *opt, 3144 const char *str, 3145 int unset __maybe_unused) 3146 { 3147 struct record_opts *opts = opt->value; 3148 char *s, *p; 3149 unsigned int mmap_pages; 3150 int ret; 3151 3152 if (!str) 3153 return -EINVAL; 3154 3155 s = strdup(str); 3156 if (!s) 3157 return -ENOMEM; 3158 3159 p = strchr(s, ','); 3160 if (p) 3161 *p = '\0'; 3162 3163 if (*s) { 3164 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3165 if (ret) 3166 goto out_free; 3167 opts->mmap_pages = mmap_pages; 3168 } 3169 3170 if (!p) { 3171 ret = 0; 3172 goto out_free; 3173 } 3174 3175 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3176 if (ret) 3177 goto out_free; 3178 3179 opts->auxtrace_mmap_pages = mmap_pages; 3180 3181 out_free: 3182 free(s); 3183 return ret; 3184 } 3185 3186 static int record__parse_off_cpu_thresh(const struct option *opt, 3187 const char *str, 3188 int unset __maybe_unused) 3189 { 3190 struct record_opts *opts = opt->value; 3191 char *endptr; 3192 u64 off_cpu_thresh_ms; 3193 3194 if (!str) 3195 return -EINVAL; 3196 3197 off_cpu_thresh_ms = strtoull(str, &endptr, 10); 3198 3199 /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */ 3200 if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0"))) 3201 return -EINVAL; 3202 else 3203 opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC; 3204 3205 return 0; 3206 } 3207 3208 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3209 { 3210 } 3211 3212 static int parse_control_option(const struct option *opt, 3213 const char *str, 3214 int unset __maybe_unused) 3215 { 3216 struct record_opts *opts = opt->value; 3217 3218 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3219 } 3220 3221 static void switch_output_size_warn(struct record *rec) 3222 { 3223 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3224 struct switch_output *s = &rec->switch_output; 3225 3226 wakeup_size /= 2; 3227 3228 if (s->size < wakeup_size) { 3229 char buf[100]; 3230 3231 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3232 pr_warning("WARNING: switch-output data size lower than " 3233 "wakeup kernel buffer size (%s) " 3234 "expect bigger perf.data sizes\n", buf); 3235 } 3236 } 3237 3238 static int switch_output_setup(struct record *rec) 3239 { 3240 struct switch_output *s = &rec->switch_output; 3241 static struct parse_tag tags_size[] = { 3242 { .tag = 'B', .mult = 1 }, 3243 { .tag = 'K', .mult = 1 << 10 }, 3244 { .tag = 'M', .mult = 1 << 20 }, 3245 { .tag = 'G', .mult = 1 << 30 }, 3246 { .tag = 0 }, 3247 }; 3248 static struct parse_tag tags_time[] = { 3249 { .tag = 's', .mult = 1 }, 3250 { .tag = 'm', .mult = 60 }, 3251 { .tag = 'h', .mult = 60*60 }, 3252 { .tag = 'd', .mult = 60*60*24 }, 3253 { .tag = 0 }, 3254 }; 3255 unsigned long val; 3256 3257 /* 3258 * If we're using --switch-output-events, then we imply its 3259 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3260 * thread to its parent. 3261 */ 3262 if (rec->switch_output_event_set) { 3263 if (record__threads_enabled(rec)) { 3264 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3265 return 0; 3266 } 3267 goto do_signal; 3268 } 3269 3270 if (!s->set) 3271 return 0; 3272 3273 if (record__threads_enabled(rec)) { 3274 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3275 return 0; 3276 } 3277 3278 if (!strcmp(s->str, "signal")) { 3279 do_signal: 3280 s->signal = true; 3281 pr_debug("switch-output with SIGUSR2 signal\n"); 3282 goto enabled; 3283 } 3284 3285 val = parse_tag_value(s->str, tags_size); 3286 if (val != (unsigned long) -1) { 3287 s->size = val; 3288 pr_debug("switch-output with %s size threshold\n", s->str); 3289 goto enabled; 3290 } 3291 3292 val = parse_tag_value(s->str, tags_time); 3293 if (val != (unsigned long) -1) { 3294 s->time = val; 3295 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3296 s->str, s->time); 3297 goto enabled; 3298 } 3299 3300 return -1; 3301 3302 enabled: 3303 rec->timestamp_filename = true; 3304 s->enabled = true; 3305 3306 if (s->size && !rec->opts.no_buffering) 3307 switch_output_size_warn(rec); 3308 3309 return 0; 3310 } 3311 3312 static const char * const __record_usage[] = { 3313 "perf record [<options>] [<command>]", 3314 "perf record [<options>] -- <command> [<options>]", 3315 NULL 3316 }; 3317 const char * const *record_usage = __record_usage; 3318 3319 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3320 struct perf_sample *sample, struct machine *machine) 3321 { 3322 /* 3323 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3324 * no need to add them twice. 3325 */ 3326 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3327 return 0; 3328 return perf_event__process_mmap(tool, event, sample, machine); 3329 } 3330 3331 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3332 struct perf_sample *sample, struct machine *machine) 3333 { 3334 /* 3335 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3336 * no need to add them twice. 3337 */ 3338 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3339 return 0; 3340 3341 return perf_event__process_mmap2(tool, event, sample, machine); 3342 } 3343 3344 static int process_timestamp_boundary(const struct perf_tool *tool, 3345 union perf_event *event __maybe_unused, 3346 struct perf_sample *sample, 3347 struct machine *machine __maybe_unused) 3348 { 3349 struct record *rec = container_of(tool, struct record, tool); 3350 3351 set_timestamp_boundary(rec, sample->time); 3352 return 0; 3353 } 3354 3355 static int parse_record_synth_option(const struct option *opt, 3356 const char *str, 3357 int unset __maybe_unused) 3358 { 3359 struct record_opts *opts = opt->value; 3360 char *p = strdup(str); 3361 3362 if (p == NULL) 3363 return -1; 3364 3365 opts->synth = parse_synth_opt(p); 3366 free(p); 3367 3368 if (opts->synth < 0) { 3369 pr_err("Invalid synth option: %s\n", str); 3370 return -1; 3371 } 3372 return 0; 3373 } 3374 3375 /* 3376 * XXX Ideally would be local to cmd_record() and passed to a record__new 3377 * because we need to have access to it in record__exit, that is called 3378 * after cmd_record() exits, but since record_options need to be accessible to 3379 * builtin-script, leave it here. 3380 * 3381 * At least we don't ouch it in all the other functions here directly. 3382 * 3383 * Just say no to tons of global variables, sigh. 3384 */ 3385 static struct record record = { 3386 .opts = { 3387 .sample_time = true, 3388 .mmap_pages = UINT_MAX, 3389 .user_freq = UINT_MAX, 3390 .user_interval = ULLONG_MAX, 3391 .freq = 4000, 3392 .target = { 3393 .uses_mmap = true, 3394 .default_per_cpu = true, 3395 }, 3396 .mmap_flush = MMAP_FLUSH_DEFAULT, 3397 .nr_threads_synthesize = 1, 3398 .ctl_fd = -1, 3399 .ctl_fd_ack = -1, 3400 .synth = PERF_SYNTH_ALL, 3401 .off_cpu_thresh_ns = OFFCPU_THRESH, 3402 }, 3403 }; 3404 3405 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3406 "\n\t\t\t\tDefault: fp"; 3407 3408 static bool dry_run; 3409 3410 static struct parse_events_option_args parse_events_option_args = { 3411 .evlistp = &record.evlist, 3412 }; 3413 3414 static struct parse_events_option_args switch_output_parse_events_option_args = { 3415 .evlistp = &record.sb_evlist, 3416 }; 3417 3418 /* 3419 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3420 * with it and switch to use the library functions in perf_evlist that came 3421 * from builtin-record.c, i.e. use record_opts, 3422 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3423 * using pipes, etc. 3424 */ 3425 static struct option __record_options[] = { 3426 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3427 "event selector. use 'perf list' to list available events", 3428 parse_events_option), 3429 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3430 "event filter", parse_filter), 3431 OPT_BOOLEAN(0, "latency", &record.latency, 3432 "Enable data collection for latency profiling.\n" 3433 "\t\t\t Use perf report --latency for latency-centric profile."), 3434 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3435 NULL, "don't record events from perf itself", 3436 exclude_perf), 3437 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3438 "record events on existing process id"), 3439 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3440 "record events on existing thread id"), 3441 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3442 "collect data with this RT SCHED_FIFO priority"), 3443 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3444 "collect data without buffering"), 3445 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3446 "collect raw sample records from all opened counters"), 3447 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3448 "system-wide collection from all CPUs"), 3449 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3450 "list of cpus to monitor"), 3451 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3452 OPT_STRING('o', "output", &record.data.path, "file", 3453 "output file name"), 3454 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3455 &record.opts.no_inherit_set, 3456 "child tasks do not inherit counters"), 3457 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3458 "synthesize non-sample events at the end of output"), 3459 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3460 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3461 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3462 "Fail if the specified frequency can't be used"), 3463 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3464 "profile at this frequency", 3465 record__parse_freq), 3466 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3467 "number of mmap data pages and AUX area tracing mmap pages", 3468 record__parse_mmap_pages), 3469 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3470 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3471 record__mmap_flush_parse), 3472 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3473 NULL, "enables call-graph recording" , 3474 &record_callchain_opt), 3475 OPT_CALLBACK(0, "call-graph", &record.opts, 3476 "record_mode[,record_size]", record_callchain_help, 3477 &record_parse_callchain_opt), 3478 OPT_INCR('v', "verbose", &verbose, 3479 "be more verbose (show counter open errors, etc)"), 3480 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3481 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3482 "per thread counts"), 3483 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3484 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3485 "Record the sample physical addresses"), 3486 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3487 "Record the sampled data address data page size"), 3488 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3489 "Record the sampled code address (ip) page size"), 3490 OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, 3491 "Record the data source for memory operations"), 3492 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3493 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3494 "Record the sample identifier"), 3495 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3496 &record.opts.sample_time_set, 3497 "Record the sample timestamps"), 3498 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3499 "Record the sample period"), 3500 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3501 "don't sample"), 3502 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3503 &record.no_buildid_cache_set, 3504 "do not update the buildid cache"), 3505 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3506 &record.no_buildid_set, 3507 "do not collect buildids in perf.data"), 3508 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3509 "monitor event in cgroup name only", 3510 parse_cgroups), 3511 OPT_CALLBACK('D', "delay", &record, "ms", 3512 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3513 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3514 record__parse_event_enable_time), 3515 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3516 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3517 "user to profile"), 3518 3519 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3520 "branch any", "sample any taken branches", 3521 parse_branch_stack), 3522 3523 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3524 "branch filter mask", "branch stack filter modes", 3525 parse_branch_stack), 3526 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3527 "sample by weight (on special events only)"), 3528 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3529 "sample transaction flags (special events only)"), 3530 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3531 "use per-thread mmaps"), 3532 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3533 "sample selected machine registers on interrupt," 3534 " use '-I?' to list register names", parse_intr_regs), 3535 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3536 "sample selected machine registers in user space," 3537 " use '--user-regs=?' to list register names", parse_user_regs), 3538 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3539 "Record running/enabled time of read (:S) events"), 3540 OPT_CALLBACK('k', "clockid", &record.opts, 3541 "clockid", "clockid to use for events, see clock_gettime()", 3542 parse_clockid), 3543 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3544 "opts", "AUX area tracing Snapshot Mode", ""), 3545 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3546 "opts", "sample AUX area", ""), 3547 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3548 "per thread proc mmap processing timeout in ms"), 3549 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3550 "Record namespaces events"), 3551 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3552 "Record cgroup events"), 3553 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3554 &record.opts.record_switch_events_set, 3555 "Record context switch events"), 3556 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3557 "Configure all used events to run in kernel space.", 3558 PARSE_OPT_EXCLUSIVE), 3559 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3560 "Configure all used events to run in user space.", 3561 PARSE_OPT_EXCLUSIVE), 3562 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3563 "collect kernel callchains"), 3564 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3565 "collect user callchains"), 3566 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3567 "file", "vmlinux pathname"), 3568 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3569 "Record build-id of all DSOs regardless of hits"), 3570 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3571 "Record build-id in map events"), 3572 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3573 "append timestamp to output filename"), 3574 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3575 "Record timestamp boundary (time of first/last samples)"), 3576 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3577 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3578 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3579 "signal"), 3580 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3581 &record.switch_output_event_set, "switch output event", 3582 "switch output event selector. use 'perf list' to list available events", 3583 parse_events_option_new_evlist), 3584 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3585 "Limit number of switch output generated files"), 3586 OPT_BOOLEAN(0, "dry-run", &dry_run, 3587 "Parse options then exit"), 3588 #ifdef HAVE_AIO_SUPPORT 3589 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3590 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3591 record__aio_parse), 3592 #endif 3593 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3594 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3595 record__parse_affinity), 3596 #ifdef HAVE_ZSTD_SUPPORT 3597 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3598 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3599 record__parse_comp_level), 3600 #endif 3601 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3602 "size", "Limit the maximum size of the output file", parse_output_max_size), 3603 OPT_UINTEGER(0, "num-thread-synthesize", 3604 &record.opts.nr_threads_synthesize, 3605 "number of threads to run for event synthesis"), 3606 #ifdef HAVE_LIBPFM 3607 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3608 "libpfm4 event selector. use 'perf list' to list available events", 3609 parse_libpfm_events_option), 3610 #endif 3611 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3612 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3613 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3614 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3615 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3616 parse_control_option), 3617 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3618 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3619 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3620 &record.debuginfod.set, "debuginfod urls", 3621 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3622 "system"), 3623 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3624 "write collected trace data into several data files using parallel threads", 3625 record__parse_threads), 3626 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3627 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3628 "BPF filter action"), 3629 OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", 3630 "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", 3631 record__parse_off_cpu_thresh), 3632 OPT_END() 3633 }; 3634 3635 struct option *record_options = __record_options; 3636 3637 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3638 { 3639 struct perf_cpu cpu; 3640 int idx; 3641 3642 if (cpu_map__is_dummy(cpus)) 3643 return 0; 3644 3645 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3646 /* Return ENODEV is input cpu is greater than max cpu */ 3647 if ((unsigned long)cpu.cpu > mask->nbits) 3648 return -ENODEV; 3649 __set_bit(cpu.cpu, mask->bits); 3650 } 3651 3652 return 0; 3653 } 3654 3655 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3656 { 3657 struct perf_cpu_map *cpus; 3658 3659 cpus = perf_cpu_map__new(mask_spec); 3660 if (!cpus) 3661 return -ENOMEM; 3662 3663 bitmap_zero(mask->bits, mask->nbits); 3664 if (record__mmap_cpu_mask_init(mask, cpus)) 3665 return -ENODEV; 3666 3667 perf_cpu_map__put(cpus); 3668 3669 return 0; 3670 } 3671 3672 static void record__free_thread_masks(struct record *rec, int nr_threads) 3673 { 3674 int t; 3675 3676 if (rec->thread_masks) 3677 for (t = 0; t < nr_threads; t++) 3678 record__thread_mask_free(&rec->thread_masks[t]); 3679 3680 zfree(&rec->thread_masks); 3681 } 3682 3683 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3684 { 3685 int t, ret; 3686 3687 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3688 if (!rec->thread_masks) { 3689 pr_err("Failed to allocate thread masks\n"); 3690 return -ENOMEM; 3691 } 3692 3693 for (t = 0; t < nr_threads; t++) { 3694 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3695 if (ret) { 3696 pr_err("Failed to allocate thread masks[%d]\n", t); 3697 goto out_free; 3698 } 3699 } 3700 3701 return 0; 3702 3703 out_free: 3704 record__free_thread_masks(rec, nr_threads); 3705 3706 return ret; 3707 } 3708 3709 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3710 { 3711 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3712 3713 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3714 if (ret) 3715 return ret; 3716 3717 rec->nr_threads = nr_cpus; 3718 pr_debug("nr_threads: %d\n", rec->nr_threads); 3719 3720 for (t = 0; t < rec->nr_threads; t++) { 3721 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3722 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3723 if (verbose > 0) { 3724 pr_debug("thread_masks[%d]: ", t); 3725 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3726 pr_debug("thread_masks[%d]: ", t); 3727 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3728 } 3729 } 3730 3731 return 0; 3732 } 3733 3734 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3735 const char **maps_spec, const char **affinity_spec, 3736 u32 nr_spec) 3737 { 3738 u32 s; 3739 int ret = 0, t = 0; 3740 struct mmap_cpu_mask cpus_mask; 3741 struct thread_mask thread_mask, full_mask, *thread_masks; 3742 3743 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3744 if (ret) { 3745 pr_err("Failed to allocate CPUs mask\n"); 3746 return ret; 3747 } 3748 3749 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3750 if (ret) { 3751 pr_err("Failed to init cpu mask\n"); 3752 goto out_free_cpu_mask; 3753 } 3754 3755 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3756 if (ret) { 3757 pr_err("Failed to allocate full mask\n"); 3758 goto out_free_cpu_mask; 3759 } 3760 3761 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3762 if (ret) { 3763 pr_err("Failed to allocate thread mask\n"); 3764 goto out_free_full_and_cpu_masks; 3765 } 3766 3767 for (s = 0; s < nr_spec; s++) { 3768 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3769 if (ret) { 3770 pr_err("Failed to initialize maps thread mask\n"); 3771 goto out_free; 3772 } 3773 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3774 if (ret) { 3775 pr_err("Failed to initialize affinity thread mask\n"); 3776 goto out_free; 3777 } 3778 3779 /* ignore invalid CPUs but do not allow empty masks */ 3780 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3781 cpus_mask.bits, thread_mask.maps.nbits)) { 3782 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3783 ret = -EINVAL; 3784 goto out_free; 3785 } 3786 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3787 cpus_mask.bits, thread_mask.affinity.nbits)) { 3788 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3789 ret = -EINVAL; 3790 goto out_free; 3791 } 3792 3793 /* do not allow intersection with other masks (full_mask) */ 3794 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3795 thread_mask.maps.nbits)) { 3796 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3797 ret = -EINVAL; 3798 goto out_free; 3799 } 3800 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3801 thread_mask.affinity.nbits)) { 3802 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3803 ret = -EINVAL; 3804 goto out_free; 3805 } 3806 3807 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3808 thread_mask.maps.bits, full_mask.maps.nbits); 3809 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3810 thread_mask.affinity.bits, full_mask.maps.nbits); 3811 3812 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3813 if (!thread_masks) { 3814 pr_err("Failed to reallocate thread masks\n"); 3815 ret = -ENOMEM; 3816 goto out_free; 3817 } 3818 rec->thread_masks = thread_masks; 3819 rec->thread_masks[t] = thread_mask; 3820 if (verbose > 0) { 3821 pr_debug("thread_masks[%d]: ", t); 3822 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3823 pr_debug("thread_masks[%d]: ", t); 3824 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3825 } 3826 t++; 3827 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3828 if (ret) { 3829 pr_err("Failed to allocate thread mask\n"); 3830 goto out_free_full_and_cpu_masks; 3831 } 3832 } 3833 rec->nr_threads = t; 3834 pr_debug("nr_threads: %d\n", rec->nr_threads); 3835 if (!rec->nr_threads) 3836 ret = -EINVAL; 3837 3838 out_free: 3839 record__thread_mask_free(&thread_mask); 3840 out_free_full_and_cpu_masks: 3841 record__thread_mask_free(&full_mask); 3842 out_free_cpu_mask: 3843 record__mmap_cpu_mask_free(&cpus_mask); 3844 3845 return ret; 3846 } 3847 3848 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3849 { 3850 int ret; 3851 struct cpu_topology *topo; 3852 3853 topo = cpu_topology__new(); 3854 if (!topo) { 3855 pr_err("Failed to allocate CPU topology\n"); 3856 return -ENOMEM; 3857 } 3858 3859 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3860 topo->core_cpus_list, topo->core_cpus_lists); 3861 cpu_topology__delete(topo); 3862 3863 return ret; 3864 } 3865 3866 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3867 { 3868 int ret; 3869 struct cpu_topology *topo; 3870 3871 topo = cpu_topology__new(); 3872 if (!topo) { 3873 pr_err("Failed to allocate CPU topology\n"); 3874 return -ENOMEM; 3875 } 3876 3877 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3878 topo->package_cpus_list, topo->package_cpus_lists); 3879 cpu_topology__delete(topo); 3880 3881 return ret; 3882 } 3883 3884 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3885 { 3886 u32 s; 3887 int ret; 3888 const char **spec; 3889 struct numa_topology *topo; 3890 3891 topo = numa_topology__new(); 3892 if (!topo) { 3893 pr_err("Failed to allocate NUMA topology\n"); 3894 return -ENOMEM; 3895 } 3896 3897 spec = zalloc(topo->nr * sizeof(char *)); 3898 if (!spec) { 3899 pr_err("Failed to allocate NUMA spec\n"); 3900 ret = -ENOMEM; 3901 goto out_delete_topo; 3902 } 3903 for (s = 0; s < topo->nr; s++) 3904 spec[s] = topo->nodes[s].cpus; 3905 3906 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3907 3908 zfree(&spec); 3909 3910 out_delete_topo: 3911 numa_topology__delete(topo); 3912 3913 return ret; 3914 } 3915 3916 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3917 { 3918 int t, ret; 3919 u32 s, nr_spec = 0; 3920 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3921 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3922 3923 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3924 spec = strtok_r(user_spec, ":", &spec_ptr); 3925 if (spec == NULL) 3926 break; 3927 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3928 mask = strtok_r(spec, "/", &mask_ptr); 3929 if (mask == NULL) 3930 break; 3931 pr_debug2(" maps mask: %s\n", mask); 3932 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3933 if (!tmp_spec) { 3934 pr_err("Failed to reallocate maps spec\n"); 3935 ret = -ENOMEM; 3936 goto out_free; 3937 } 3938 maps_spec = tmp_spec; 3939 maps_spec[nr_spec] = dup_mask = strdup(mask); 3940 if (!maps_spec[nr_spec]) { 3941 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3942 ret = -ENOMEM; 3943 goto out_free; 3944 } 3945 mask = strtok_r(NULL, "/", &mask_ptr); 3946 if (mask == NULL) { 3947 pr_err("Invalid thread maps or affinity specs\n"); 3948 ret = -EINVAL; 3949 goto out_free; 3950 } 3951 pr_debug2(" affinity mask: %s\n", mask); 3952 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3953 if (!tmp_spec) { 3954 pr_err("Failed to reallocate affinity spec\n"); 3955 ret = -ENOMEM; 3956 goto out_free; 3957 } 3958 affinity_spec = tmp_spec; 3959 affinity_spec[nr_spec] = strdup(mask); 3960 if (!affinity_spec[nr_spec]) { 3961 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3962 ret = -ENOMEM; 3963 goto out_free; 3964 } 3965 dup_mask = NULL; 3966 nr_spec++; 3967 } 3968 3969 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3970 (const char **)affinity_spec, nr_spec); 3971 3972 out_free: 3973 free(dup_mask); 3974 for (s = 0; s < nr_spec; s++) { 3975 if (maps_spec) 3976 free(maps_spec[s]); 3977 if (affinity_spec) 3978 free(affinity_spec[s]); 3979 } 3980 free(affinity_spec); 3981 free(maps_spec); 3982 3983 return ret; 3984 } 3985 3986 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3987 { 3988 int ret; 3989 3990 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3991 if (ret) 3992 return ret; 3993 3994 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3995 return -ENODEV; 3996 3997 rec->nr_threads = 1; 3998 3999 return 0; 4000 } 4001 4002 static int record__init_thread_masks(struct record *rec) 4003 { 4004 int ret = 0; 4005 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 4006 4007 if (!record__threads_enabled(rec)) 4008 return record__init_thread_default_masks(rec, cpus); 4009 4010 if (evlist__per_thread(rec->evlist)) { 4011 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 4012 return -EINVAL; 4013 } 4014 4015 switch (rec->opts.threads_spec) { 4016 case THREAD_SPEC__CPU: 4017 ret = record__init_thread_cpu_masks(rec, cpus); 4018 break; 4019 case THREAD_SPEC__CORE: 4020 ret = record__init_thread_core_masks(rec, cpus); 4021 break; 4022 case THREAD_SPEC__PACKAGE: 4023 ret = record__init_thread_package_masks(rec, cpus); 4024 break; 4025 case THREAD_SPEC__NUMA: 4026 ret = record__init_thread_numa_masks(rec, cpus); 4027 break; 4028 case THREAD_SPEC__USER: 4029 ret = record__init_thread_user_masks(rec, cpus); 4030 break; 4031 default: 4032 break; 4033 } 4034 4035 return ret; 4036 } 4037 4038 int cmd_record(int argc, const char **argv) 4039 { 4040 int err; 4041 struct record *rec = &record; 4042 char errbuf[BUFSIZ]; 4043 4044 setlocale(LC_ALL, ""); 4045 4046 #ifndef HAVE_BPF_SKEL 4047 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 4048 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 4049 # undef set_nobuild 4050 #endif 4051 4052 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 4053 symbol_conf.lazy_load_kernel_maps = true; 4054 rec->opts.affinity = PERF_AFFINITY_SYS; 4055 4056 rec->evlist = evlist__new(); 4057 if (rec->evlist == NULL) 4058 return -ENOMEM; 4059 4060 err = perf_config(perf_record_config, rec); 4061 if (err) 4062 return err; 4063 4064 argc = parse_options(argc, argv, record_options, record_usage, 4065 PARSE_OPT_STOP_AT_NON_OPTION); 4066 if (quiet) 4067 perf_quiet_option(); 4068 4069 err = symbol__validate_sym_arguments(); 4070 if (err) 4071 return err; 4072 4073 perf_debuginfod_setup(&record.debuginfod); 4074 4075 /* Make system wide (-a) the default target. */ 4076 if (!argc && target__none(&rec->opts.target)) 4077 rec->opts.target.system_wide = true; 4078 4079 if (nr_cgroups && !rec->opts.target.system_wide) { 4080 usage_with_options_msg(record_usage, record_options, 4081 "cgroup monitoring only available in system-wide mode"); 4082 4083 } 4084 4085 if (record.latency) { 4086 /* 4087 * There is no fundamental reason why latency profiling 4088 * can't work for system-wide mode, but exact semantics 4089 * and details are to be defined. 4090 * See the following thread for details: 4091 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4092 */ 4093 if (record.opts.target.system_wide) { 4094 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4095 err = -EINVAL; 4096 goto out_opts; 4097 } 4098 record.opts.record_switch_events = true; 4099 } 4100 4101 if (rec->buildid_mmap) { 4102 if (!perf_can_record_build_id()) { 4103 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4104 err = -EINVAL; 4105 goto out_opts; 4106 } 4107 pr_debug("Enabling build id in mmap2 events.\n"); 4108 /* Enable mmap build id synthesizing. */ 4109 symbol_conf.buildid_mmap2 = true; 4110 /* Enable perf_event_attr::build_id bit. */ 4111 rec->opts.build_id = true; 4112 /* Disable build id cache. */ 4113 rec->no_buildid = true; 4114 } 4115 4116 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4117 pr_err("Kernel has no cgroup sampling support.\n"); 4118 err = -EINVAL; 4119 goto out_opts; 4120 } 4121 4122 if (rec->opts.kcore) 4123 rec->opts.text_poke = true; 4124 4125 if (rec->opts.kcore || record__threads_enabled(rec)) 4126 rec->data.is_dir = true; 4127 4128 if (record__threads_enabled(rec)) { 4129 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4130 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4131 goto out_opts; 4132 } 4133 if (record__aio_enabled(rec)) { 4134 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4135 goto out_opts; 4136 } 4137 } 4138 4139 if (rec->opts.comp_level != 0) { 4140 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4141 rec->no_buildid = true; 4142 } 4143 4144 if (rec->opts.record_switch_events && 4145 !perf_can_record_switch_events()) { 4146 ui__error("kernel does not support recording context switch events\n"); 4147 parse_options_usage(record_usage, record_options, "switch-events", 0); 4148 err = -EINVAL; 4149 goto out_opts; 4150 } 4151 4152 if (switch_output_setup(rec)) { 4153 parse_options_usage(record_usage, record_options, "switch-output", 0); 4154 err = -EINVAL; 4155 goto out_opts; 4156 } 4157 4158 if (rec->switch_output.time) { 4159 signal(SIGALRM, alarm_sig_handler); 4160 alarm(rec->switch_output.time); 4161 } 4162 4163 if (rec->switch_output.num_files) { 4164 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4165 sizeof(char *)); 4166 if (!rec->switch_output.filenames) { 4167 err = -EINVAL; 4168 goto out_opts; 4169 } 4170 } 4171 4172 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4173 rec->timestamp_filename = false; 4174 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4175 } 4176 4177 if (rec->filter_action) { 4178 if (!strcmp(rec->filter_action, "pin")) 4179 err = perf_bpf_filter__pin(); 4180 else if (!strcmp(rec->filter_action, "unpin")) 4181 err = perf_bpf_filter__unpin(); 4182 else { 4183 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4184 err = -EINVAL; 4185 } 4186 goto out_opts; 4187 } 4188 4189 /* For backward compatibility, -d implies --mem-info */ 4190 if (rec->opts.sample_address) 4191 rec->opts.sample_data_src = true; 4192 4193 /* 4194 * Allow aliases to facilitate the lookup of symbols for address 4195 * filters. Refer to auxtrace_parse_filters(). 4196 */ 4197 symbol_conf.allow_aliases = true; 4198 4199 symbol__init(NULL); 4200 4201 err = record__auxtrace_init(rec); 4202 if (err) 4203 goto out; 4204 4205 if (dry_run) 4206 goto out; 4207 4208 err = -ENOMEM; 4209 4210 if (rec->no_buildid_cache || rec->no_buildid) { 4211 disable_buildid_cache(); 4212 } else if (rec->switch_output.enabled) { 4213 /* 4214 * In 'perf record --switch-output', disable buildid 4215 * generation by default to reduce data file switching 4216 * overhead. Still generate buildid if they are required 4217 * explicitly using 4218 * 4219 * perf record --switch-output --no-no-buildid \ 4220 * --no-no-buildid-cache 4221 * 4222 * Following code equals to: 4223 * 4224 * if ((rec->no_buildid || !rec->no_buildid_set) && 4225 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4226 * disable_buildid_cache(); 4227 */ 4228 bool disable = true; 4229 4230 if (rec->no_buildid_set && !rec->no_buildid) 4231 disable = false; 4232 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4233 disable = false; 4234 if (disable) { 4235 rec->no_buildid = true; 4236 rec->no_buildid_cache = true; 4237 disable_buildid_cache(); 4238 } 4239 } 4240 4241 if (record.opts.overwrite) 4242 record.opts.tail_synthesize = true; 4243 4244 if (rec->evlist->core.nr_entries == 0) { 4245 err = parse_event(rec->evlist, "cycles:P"); 4246 if (err) 4247 goto out; 4248 } 4249 4250 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4251 rec->opts.no_inherit = true; 4252 4253 err = target__validate(&rec->opts.target); 4254 if (err) { 4255 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4256 ui__warning("%s\n", errbuf); 4257 } 4258 4259 err = target__parse_uid(&rec->opts.target); 4260 if (err) { 4261 int saved_errno = errno; 4262 4263 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4264 ui__error("%s", errbuf); 4265 4266 err = -saved_errno; 4267 goto out; 4268 } 4269 4270 /* Enable ignoring missing threads when -u/-p option is defined. */ 4271 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4272 4273 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4274 4275 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4276 arch__add_leaf_frame_record_opts(&rec->opts); 4277 4278 err = -ENOMEM; 4279 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4280 if (rec->opts.target.pid != NULL) { 4281 pr_err("Couldn't create thread/CPU maps: %s\n", 4282 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4283 goto out; 4284 } 4285 else 4286 usage_with_options(record_usage, record_options); 4287 } 4288 4289 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4290 if (err) 4291 goto out; 4292 4293 /* 4294 * We take all buildids when the file contains 4295 * AUX area tracing data because we do not decode the 4296 * trace because it would take too long. 4297 */ 4298 if (rec->opts.full_auxtrace) 4299 rec->buildid_all = true; 4300 4301 if (rec->opts.text_poke) { 4302 err = record__config_text_poke(rec->evlist); 4303 if (err) { 4304 pr_err("record__config_text_poke failed, error %d\n", err); 4305 goto out; 4306 } 4307 } 4308 4309 if (rec->off_cpu) { 4310 err = record__config_off_cpu(rec); 4311 if (err) { 4312 pr_err("record__config_off_cpu failed, error %d\n", err); 4313 goto out; 4314 } 4315 } 4316 4317 if (record_opts__config(&rec->opts)) { 4318 err = -EINVAL; 4319 goto out; 4320 } 4321 4322 err = record__config_tracking_events(rec); 4323 if (err) { 4324 pr_err("record__config_tracking_events failed, error %d\n", err); 4325 goto out; 4326 } 4327 4328 err = record__init_thread_masks(rec); 4329 if (err) { 4330 pr_err("Failed to initialize parallel data streaming masks\n"); 4331 goto out; 4332 } 4333 4334 if (rec->opts.nr_cblocks > nr_cblocks_max) 4335 rec->opts.nr_cblocks = nr_cblocks_max; 4336 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4337 4338 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4339 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4340 4341 if (rec->opts.comp_level > comp_level_max) 4342 rec->opts.comp_level = comp_level_max; 4343 pr_debug("comp level: %d\n", rec->opts.comp_level); 4344 4345 err = __cmd_record(&record, argc, argv); 4346 out: 4347 record__free_thread_masks(rec, rec->nr_threads); 4348 rec->nr_threads = 0; 4349 symbol__exit(); 4350 auxtrace_record__free(rec->itr); 4351 out_opts: 4352 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4353 evlist__delete(rec->evlist); 4354 return err; 4355 } 4356 4357 static void snapshot_sig_handler(int sig __maybe_unused) 4358 { 4359 struct record *rec = &record; 4360 4361 hit_auxtrace_snapshot_trigger(rec); 4362 4363 if (switch_output_signal(rec)) 4364 trigger_hit(&switch_output_trigger); 4365 } 4366 4367 static void alarm_sig_handler(int sig __maybe_unused) 4368 { 4369 struct record *rec = &record; 4370 4371 if (switch_output_time(rec)) 4372 trigger_hit(&switch_output_trigger); 4373 } 4374