1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/pmu.h" 50 #include "util/pmus.h" 51 #include "util/clockid.h" 52 #include "util/off_cpu.h" 53 #include "util/bpf-filter.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 u64 thread_bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool switch_output_event_set; 165 bool no_buildid; 166 bool no_buildid_set; 167 bool no_buildid_cache; 168 bool no_buildid_cache_set; 169 bool buildid_all; 170 bool buildid_mmap; 171 bool timestamp_filename; 172 bool timestamp_boundary; 173 bool off_cpu; 174 struct switch_output switch_output; 175 unsigned long long samples; 176 unsigned long output_max_size; /* = 0: unlimited */ 177 struct perf_debuginfod debuginfod; 178 int nr_threads; 179 struct thread_mask *thread_masks; 180 struct record_thread *thread_data; 181 struct pollfd_index_map *index_map; 182 size_t index_map_sz; 183 size_t index_map_cnt; 184 }; 185 186 static volatile int done; 187 188 static volatile int auxtrace_record__snapshot_started; 189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 190 static DEFINE_TRIGGER(switch_output_trigger); 191 192 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 193 "SYS", "NODE", "CPU" 194 }; 195 196 #ifndef HAVE_GETTID 197 static inline pid_t gettid(void) 198 { 199 return (pid_t)syscall(__NR_gettid); 200 } 201 #endif 202 203 static int record__threads_enabled(struct record *rec) 204 { 205 return rec->opts.threads_spec; 206 } 207 208 static bool switch_output_signal(struct record *rec) 209 { 210 return rec->switch_output.signal && 211 trigger_is_ready(&switch_output_trigger); 212 } 213 214 static bool switch_output_size(struct record *rec) 215 { 216 return rec->switch_output.size && 217 trigger_is_ready(&switch_output_trigger) && 218 (rec->bytes_written >= rec->switch_output.size); 219 } 220 221 static bool switch_output_time(struct record *rec) 222 { 223 return rec->switch_output.time && 224 trigger_is_ready(&switch_output_trigger); 225 } 226 227 static u64 record__bytes_written(struct record *rec) 228 { 229 return rec->bytes_written + rec->thread_bytes_written; 230 } 231 232 static bool record__output_max_size_exceeded(struct record *rec) 233 { 234 return rec->output_max_size && 235 (record__bytes_written(rec) >= rec->output_max_size); 236 } 237 238 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 239 void *bf, size_t size) 240 { 241 struct perf_data_file *file = &rec->session->data->file; 242 243 if (map && map->file) 244 file = map->file; 245 246 if (perf_data_file__write(file, bf, size) < 0) { 247 pr_err("failed to write perf data, error: %m\n"); 248 return -1; 249 } 250 251 if (map && map->file) { 252 thread->bytes_written += size; 253 rec->thread_bytes_written += size; 254 } else { 255 rec->bytes_written += size; 256 } 257 258 if (record__output_max_size_exceeded(rec) && !done) { 259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 260 " stopping session ]\n", 261 record__bytes_written(rec) >> 10); 262 done = 1; 263 } 264 265 if (switch_output_size(rec)) 266 trigger_hit(&switch_output_trigger); 267 268 return 0; 269 } 270 271 static int record__aio_enabled(struct record *rec); 272 static int record__comp_enabled(struct record *rec); 273 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 274 void *dst, size_t dst_size, void *src, size_t src_size); 275 276 #ifdef HAVE_AIO_SUPPORT 277 static int record__aio_write(struct aiocb *cblock, int trace_fd, 278 void *buf, size_t size, off_t off) 279 { 280 int rc; 281 282 cblock->aio_fildes = trace_fd; 283 cblock->aio_buf = buf; 284 cblock->aio_nbytes = size; 285 cblock->aio_offset = off; 286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 287 288 do { 289 rc = aio_write(cblock); 290 if (rc == 0) { 291 break; 292 } else if (errno != EAGAIN) { 293 cblock->aio_fildes = -1; 294 pr_err("failed to queue perf data, error: %m\n"); 295 break; 296 } 297 } while (1); 298 299 return rc; 300 } 301 302 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 303 { 304 void *rem_buf; 305 off_t rem_off; 306 size_t rem_size; 307 int rc, aio_errno; 308 ssize_t aio_ret, written; 309 310 aio_errno = aio_error(cblock); 311 if (aio_errno == EINPROGRESS) 312 return 0; 313 314 written = aio_ret = aio_return(cblock); 315 if (aio_ret < 0) { 316 if (aio_errno != EINTR) 317 pr_err("failed to write perf data, error: %m\n"); 318 written = 0; 319 } 320 321 rem_size = cblock->aio_nbytes - written; 322 323 if (rem_size == 0) { 324 cblock->aio_fildes = -1; 325 /* 326 * md->refcount is incremented in record__aio_pushfn() for 327 * every aio write request started in record__aio_push() so 328 * decrement it because the request is now complete. 329 */ 330 perf_mmap__put(&md->core); 331 rc = 1; 332 } else { 333 /* 334 * aio write request may require restart with the 335 * reminder if the kernel didn't write whole 336 * chunk at once. 337 */ 338 rem_off = cblock->aio_offset + written; 339 rem_buf = (void *)(cblock->aio_buf + written); 340 record__aio_write(cblock, cblock->aio_fildes, 341 rem_buf, rem_size, rem_off); 342 rc = 0; 343 } 344 345 return rc; 346 } 347 348 static int record__aio_sync(struct mmap *md, bool sync_all) 349 { 350 struct aiocb **aiocb = md->aio.aiocb; 351 struct aiocb *cblocks = md->aio.cblocks; 352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 353 int i, do_suspend; 354 355 do { 356 do_suspend = 0; 357 for (i = 0; i < md->aio.nr_cblocks; ++i) { 358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 359 if (sync_all) 360 aiocb[i] = NULL; 361 else 362 return i; 363 } else { 364 /* 365 * Started aio write is not complete yet 366 * so it has to be waited before the 367 * next allocation. 368 */ 369 aiocb[i] = &cblocks[i]; 370 do_suspend = 1; 371 } 372 } 373 if (!do_suspend) 374 return -1; 375 376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 377 if (!(errno == EAGAIN || errno == EINTR)) 378 pr_err("failed to sync perf data, error: %m\n"); 379 } 380 } while (1); 381 } 382 383 struct record_aio { 384 struct record *rec; 385 void *data; 386 size_t size; 387 }; 388 389 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 390 { 391 struct record_aio *aio = to; 392 393 /* 394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 395 * to release space in the kernel buffer as fast as possible, calling 396 * perf_mmap__consume() from perf_mmap__push() function. 397 * 398 * That lets the kernel to proceed with storing more profiling data into 399 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 400 * 401 * Coping can be done in two steps in case the chunk of profiling data 402 * crosses the upper bound of the kernel buffer. In this case we first move 403 * part of data from map->start till the upper bound and then the reminder 404 * from the beginning of the kernel buffer till the end of the data chunk. 405 */ 406 407 if (record__comp_enabled(aio->rec)) { 408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 mmap__mmap_len(map) - aio->size, 410 buf, size); 411 } else { 412 memcpy(aio->data + aio->size, buf, size); 413 } 414 415 if (!aio->size) { 416 /* 417 * Increment map->refcount to guard map->aio.data[] buffer 418 * from premature deallocation because map object can be 419 * released earlier than aio write request started on 420 * map->aio.data[] buffer is complete. 421 * 422 * perf_mmap__put() is done at record__aio_complete() 423 * after started aio request completion or at record__aio_push() 424 * if the request failed to start. 425 */ 426 perf_mmap__get(&map->core); 427 } 428 429 aio->size += size; 430 431 return size; 432 } 433 434 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 435 { 436 int ret, idx; 437 int trace_fd = rec->session->data->file.fd; 438 struct record_aio aio = { .rec = rec, .size = 0 }; 439 440 /* 441 * Call record__aio_sync() to wait till map->aio.data[] buffer 442 * becomes available after previous aio write operation. 443 */ 444 445 idx = record__aio_sync(map, false); 446 aio.data = map->aio.data[idx]; 447 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 449 return ret; 450 451 rec->samples++; 452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 453 if (!ret) { 454 *off += aio.size; 455 rec->bytes_written += aio.size; 456 if (switch_output_size(rec)) 457 trigger_hit(&switch_output_trigger); 458 } else { 459 /* 460 * Decrement map->refcount incremented in record__aio_pushfn() 461 * back if record__aio_write() operation failed to start, otherwise 462 * map->refcount is decremented in record__aio_complete() after 463 * aio write operation finishes successfully. 464 */ 465 perf_mmap__put(&map->core); 466 } 467 468 return ret; 469 } 470 471 static off_t record__aio_get_pos(int trace_fd) 472 { 473 return lseek(trace_fd, 0, SEEK_CUR); 474 } 475 476 static void record__aio_set_pos(int trace_fd, off_t pos) 477 { 478 lseek(trace_fd, pos, SEEK_SET); 479 } 480 481 static void record__aio_mmap_read_sync(struct record *rec) 482 { 483 int i; 484 struct evlist *evlist = rec->evlist; 485 struct mmap *maps = evlist->mmap; 486 487 if (!record__aio_enabled(rec)) 488 return; 489 490 for (i = 0; i < evlist->core.nr_mmaps; i++) { 491 struct mmap *map = &maps[i]; 492 493 if (map->core.base) 494 record__aio_sync(map, true); 495 } 496 } 497 498 static int nr_cblocks_default = 1; 499 static int nr_cblocks_max = 4; 500 501 static int record__aio_parse(const struct option *opt, 502 const char *str, 503 int unset) 504 { 505 struct record_opts *opts = (struct record_opts *)opt->value; 506 507 if (unset) { 508 opts->nr_cblocks = 0; 509 } else { 510 if (str) 511 opts->nr_cblocks = strtol(str, NULL, 0); 512 if (!opts->nr_cblocks) 513 opts->nr_cblocks = nr_cblocks_default; 514 } 515 516 return 0; 517 } 518 #else /* HAVE_AIO_SUPPORT */ 519 static int nr_cblocks_max = 0; 520 521 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 522 off_t *off __maybe_unused) 523 { 524 return -1; 525 } 526 527 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 528 { 529 return -1; 530 } 531 532 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 533 { 534 } 535 536 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 537 { 538 } 539 #endif 540 541 static int record__aio_enabled(struct record *rec) 542 { 543 return rec->opts.nr_cblocks > 0; 544 } 545 546 #define MMAP_FLUSH_DEFAULT 1 547 static int record__mmap_flush_parse(const struct option *opt, 548 const char *str, 549 int unset) 550 { 551 int flush_max; 552 struct record_opts *opts = (struct record_opts *)opt->value; 553 static struct parse_tag tags[] = { 554 { .tag = 'B', .mult = 1 }, 555 { .tag = 'K', .mult = 1 << 10 }, 556 { .tag = 'M', .mult = 1 << 20 }, 557 { .tag = 'G', .mult = 1 << 30 }, 558 { .tag = 0 }, 559 }; 560 561 if (unset) 562 return 0; 563 564 if (str) { 565 opts->mmap_flush = parse_tag_value(str, tags); 566 if (opts->mmap_flush == (int)-1) 567 opts->mmap_flush = strtol(str, NULL, 0); 568 } 569 570 if (!opts->mmap_flush) 571 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 572 573 flush_max = evlist__mmap_size(opts->mmap_pages); 574 flush_max /= 4; 575 if (opts->mmap_flush > flush_max) 576 opts->mmap_flush = flush_max; 577 578 return 0; 579 } 580 581 #ifdef HAVE_ZSTD_SUPPORT 582 static unsigned int comp_level_default = 1; 583 584 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 585 { 586 struct record_opts *opts = opt->value; 587 588 if (unset) { 589 opts->comp_level = 0; 590 } else { 591 if (str) 592 opts->comp_level = strtol(str, NULL, 0); 593 if (!opts->comp_level) 594 opts->comp_level = comp_level_default; 595 } 596 597 return 0; 598 } 599 #endif 600 static unsigned int comp_level_max = 22; 601 602 static int record__comp_enabled(struct record *rec) 603 { 604 return rec->opts.comp_level > 0; 605 } 606 607 static int process_synthesized_event(struct perf_tool *tool, 608 union perf_event *event, 609 struct perf_sample *sample __maybe_unused, 610 struct machine *machine __maybe_unused) 611 { 612 struct record *rec = container_of(tool, struct record, tool); 613 return record__write(rec, NULL, event, event->header.size); 614 } 615 616 static struct mutex synth_lock; 617 618 static int process_locked_synthesized_event(struct perf_tool *tool, 619 union perf_event *event, 620 struct perf_sample *sample __maybe_unused, 621 struct machine *machine __maybe_unused) 622 { 623 int ret; 624 625 mutex_lock(&synth_lock); 626 ret = process_synthesized_event(tool, event, sample, machine); 627 mutex_unlock(&synth_lock); 628 return ret; 629 } 630 631 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 632 { 633 struct record *rec = to; 634 635 if (record__comp_enabled(rec)) { 636 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 637 bf = map->data; 638 } 639 640 thread->samples++; 641 return record__write(rec, map, bf, size); 642 } 643 644 static volatile sig_atomic_t signr = -1; 645 static volatile sig_atomic_t child_finished; 646 #ifdef HAVE_EVENTFD_SUPPORT 647 static volatile sig_atomic_t done_fd = -1; 648 #endif 649 650 static void sig_handler(int sig) 651 { 652 if (sig == SIGCHLD) 653 child_finished = 1; 654 else 655 signr = sig; 656 657 done = 1; 658 #ifdef HAVE_EVENTFD_SUPPORT 659 if (done_fd >= 0) { 660 u64 tmp = 1; 661 int orig_errno = errno; 662 663 /* 664 * It is possible for this signal handler to run after done is 665 * checked in the main loop, but before the perf counter fds are 666 * polled. If this happens, the poll() will continue to wait 667 * even though done is set, and will only break out if either 668 * another signal is received, or the counters are ready for 669 * read. To ensure the poll() doesn't sleep when done is set, 670 * use an eventfd (done_fd) to wake up the poll(). 671 */ 672 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 673 pr_err("failed to signal wakeup fd, error: %m\n"); 674 675 errno = orig_errno; 676 } 677 #endif // HAVE_EVENTFD_SUPPORT 678 } 679 680 static void sigsegv_handler(int sig) 681 { 682 perf_hooks__recover(); 683 sighandler_dump_stack(sig); 684 } 685 686 static void record__sig_exit(void) 687 { 688 if (signr == -1) 689 return; 690 691 signal(signr, SIG_DFL); 692 raise(signr); 693 } 694 695 #ifdef HAVE_AUXTRACE_SUPPORT 696 697 static int record__process_auxtrace(struct perf_tool *tool, 698 struct mmap *map, 699 union perf_event *event, void *data1, 700 size_t len1, void *data2, size_t len2) 701 { 702 struct record *rec = container_of(tool, struct record, tool); 703 struct perf_data *data = &rec->data; 704 size_t padding; 705 u8 pad[8] = {0}; 706 707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 708 off_t file_offset; 709 int fd = perf_data__fd(data); 710 int err; 711 712 file_offset = lseek(fd, 0, SEEK_CUR); 713 if (file_offset == -1) 714 return -1; 715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 716 event, file_offset); 717 if (err) 718 return err; 719 } 720 721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 722 padding = (len1 + len2) & 7; 723 if (padding) 724 padding = 8 - padding; 725 726 record__write(rec, map, event, event->header.size); 727 record__write(rec, map, data1, len1); 728 if (len2) 729 record__write(rec, map, data2, len2); 730 record__write(rec, map, &pad, padding); 731 732 return 0; 733 } 734 735 static int record__auxtrace_mmap_read(struct record *rec, 736 struct mmap *map) 737 { 738 int ret; 739 740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 741 record__process_auxtrace); 742 if (ret < 0) 743 return ret; 744 745 if (ret) 746 rec->samples++; 747 748 return 0; 749 } 750 751 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 752 struct mmap *map) 753 { 754 int ret; 755 756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 757 record__process_auxtrace, 758 rec->opts.auxtrace_snapshot_size); 759 if (ret < 0) 760 return ret; 761 762 if (ret) 763 rec->samples++; 764 765 return 0; 766 } 767 768 static int record__auxtrace_read_snapshot_all(struct record *rec) 769 { 770 int i; 771 int rc = 0; 772 773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 774 struct mmap *map = &rec->evlist->mmap[i]; 775 776 if (!map->auxtrace_mmap.base) 777 continue; 778 779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 780 rc = -1; 781 goto out; 782 } 783 } 784 out: 785 return rc; 786 } 787 788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 789 { 790 pr_debug("Recording AUX area tracing snapshot\n"); 791 if (record__auxtrace_read_snapshot_all(rec) < 0) { 792 trigger_error(&auxtrace_snapshot_trigger); 793 } else { 794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 795 trigger_error(&auxtrace_snapshot_trigger); 796 else 797 trigger_ready(&auxtrace_snapshot_trigger); 798 } 799 } 800 801 static int record__auxtrace_snapshot_exit(struct record *rec) 802 { 803 if (trigger_is_error(&auxtrace_snapshot_trigger)) 804 return 0; 805 806 if (!auxtrace_record__snapshot_started && 807 auxtrace_record__snapshot_start(rec->itr)) 808 return -1; 809 810 record__read_auxtrace_snapshot(rec, true); 811 if (trigger_is_error(&auxtrace_snapshot_trigger)) 812 return -1; 813 814 return 0; 815 } 816 817 static int record__auxtrace_init(struct record *rec) 818 { 819 int err; 820 821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 822 && record__threads_enabled(rec)) { 823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 824 return -EINVAL; 825 } 826 827 if (!rec->itr) { 828 rec->itr = auxtrace_record__init(rec->evlist, &err); 829 if (err) 830 return err; 831 } 832 833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 834 rec->opts.auxtrace_snapshot_opts); 835 if (err) 836 return err; 837 838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 839 rec->opts.auxtrace_sample_opts); 840 if (err) 841 return err; 842 843 auxtrace_regroup_aux_output(rec->evlist); 844 845 return auxtrace_parse_filters(rec->evlist); 846 } 847 848 #else 849 850 static inline 851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 852 struct mmap *map __maybe_unused) 853 { 854 return 0; 855 } 856 857 static inline 858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 859 bool on_exit __maybe_unused) 860 { 861 } 862 863 static inline 864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 865 { 866 return 0; 867 } 868 869 static inline 870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 871 { 872 return 0; 873 } 874 875 static int record__auxtrace_init(struct record *rec __maybe_unused) 876 { 877 return 0; 878 } 879 880 #endif 881 882 static int record__config_text_poke(struct evlist *evlist) 883 { 884 struct evsel *evsel; 885 886 /* Nothing to do if text poke is already configured */ 887 evlist__for_each_entry(evlist, evsel) { 888 if (evsel->core.attr.text_poke) 889 return 0; 890 } 891 892 evsel = evlist__add_dummy_on_all_cpus(evlist); 893 if (!evsel) 894 return -ENOMEM; 895 896 evsel->core.attr.text_poke = 1; 897 evsel->core.attr.ksymbol = 1; 898 evsel->immediate = true; 899 evsel__set_sample_bit(evsel, TIME); 900 901 return 0; 902 } 903 904 static int record__config_off_cpu(struct record *rec) 905 { 906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 907 } 908 909 static bool record__tracking_system_wide(struct record *rec) 910 { 911 struct evlist *evlist = rec->evlist; 912 struct evsel *evsel; 913 914 /* 915 * If non-dummy evsel exists, system_wide sideband is need to 916 * help parse sample information. 917 * For example, PERF_EVENT_MMAP event to help parse symbol, 918 * and PERF_EVENT_COMM event to help parse task executable name. 919 */ 920 evlist__for_each_entry(evlist, evsel) { 921 if (!evsel__is_dummy_event(evsel)) 922 return true; 923 } 924 925 return false; 926 } 927 928 static int record__config_tracking_events(struct record *rec) 929 { 930 struct record_opts *opts = &rec->opts; 931 struct evlist *evlist = rec->evlist; 932 bool system_wide = false; 933 struct evsel *evsel; 934 935 /* 936 * For initial_delay, system wide or a hybrid system, we need to add 937 * tracking event so that we can track PERF_RECORD_MMAP to cover the 938 * delay of waiting or event synthesis. 939 */ 940 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 941 perf_pmus__num_core_pmus() > 1) { 942 943 /* 944 * User space tasks can migrate between CPUs, so when tracing 945 * selected CPUs, sideband for all CPUs is still needed. 946 */ 947 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 948 system_wide = true; 949 950 evsel = evlist__findnew_tracking_event(evlist, system_wide); 951 if (!evsel) 952 return -ENOMEM; 953 954 /* 955 * Enable the tracking event when the process is forked for 956 * initial_delay, immediately for system wide. 957 */ 958 if (opts->target.initial_delay && !evsel->immediate && 959 !target__has_cpu(&opts->target)) 960 evsel->core.attr.enable_on_exec = 1; 961 else 962 evsel->immediate = 1; 963 } 964 965 return 0; 966 } 967 968 static bool record__kcore_readable(struct machine *machine) 969 { 970 char kcore[PATH_MAX]; 971 int fd; 972 973 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 974 975 fd = open(kcore, O_RDONLY); 976 if (fd < 0) 977 return false; 978 979 close(fd); 980 981 return true; 982 } 983 984 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 985 { 986 char from_dir[PATH_MAX]; 987 char kcore_dir[PATH_MAX]; 988 int ret; 989 990 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 991 992 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 993 if (ret) 994 return ret; 995 996 return kcore_copy(from_dir, kcore_dir); 997 } 998 999 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1000 { 1001 thread_data->pipes.msg[0] = -1; 1002 thread_data->pipes.msg[1] = -1; 1003 thread_data->pipes.ack[0] = -1; 1004 thread_data->pipes.ack[1] = -1; 1005 } 1006 1007 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1008 { 1009 if (pipe(thread_data->pipes.msg)) 1010 return -EINVAL; 1011 1012 if (pipe(thread_data->pipes.ack)) { 1013 close(thread_data->pipes.msg[0]); 1014 thread_data->pipes.msg[0] = -1; 1015 close(thread_data->pipes.msg[1]); 1016 thread_data->pipes.msg[1] = -1; 1017 return -EINVAL; 1018 } 1019 1020 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1021 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1022 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1023 1024 return 0; 1025 } 1026 1027 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1028 { 1029 if (thread_data->pipes.msg[0] != -1) { 1030 close(thread_data->pipes.msg[0]); 1031 thread_data->pipes.msg[0] = -1; 1032 } 1033 if (thread_data->pipes.msg[1] != -1) { 1034 close(thread_data->pipes.msg[1]); 1035 thread_data->pipes.msg[1] = -1; 1036 } 1037 if (thread_data->pipes.ack[0] != -1) { 1038 close(thread_data->pipes.ack[0]); 1039 thread_data->pipes.ack[0] = -1; 1040 } 1041 if (thread_data->pipes.ack[1] != -1) { 1042 close(thread_data->pipes.ack[1]); 1043 thread_data->pipes.ack[1] = -1; 1044 } 1045 } 1046 1047 static bool evlist__per_thread(struct evlist *evlist) 1048 { 1049 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1050 } 1051 1052 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1053 { 1054 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1055 struct mmap *mmap = evlist->mmap; 1056 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1057 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1058 bool per_thread = evlist__per_thread(evlist); 1059 1060 if (per_thread) 1061 thread_data->nr_mmaps = nr_mmaps; 1062 else 1063 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1064 thread_data->mask->maps.nbits); 1065 if (mmap) { 1066 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1067 if (!thread_data->maps) 1068 return -ENOMEM; 1069 } 1070 if (overwrite_mmap) { 1071 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1072 if (!thread_data->overwrite_maps) { 1073 zfree(&thread_data->maps); 1074 return -ENOMEM; 1075 } 1076 } 1077 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1078 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1079 1080 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1081 if (per_thread || 1082 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1083 if (thread_data->maps) { 1084 thread_data->maps[tm] = &mmap[m]; 1085 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1086 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1087 } 1088 if (thread_data->overwrite_maps) { 1089 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1090 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1091 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1092 } 1093 tm++; 1094 } 1095 } 1096 1097 return 0; 1098 } 1099 1100 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1101 { 1102 int f, tm, pos; 1103 struct mmap *map, *overwrite_map; 1104 1105 fdarray__init(&thread_data->pollfd, 64); 1106 1107 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1108 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1109 overwrite_map = thread_data->overwrite_maps ? 1110 thread_data->overwrite_maps[tm] : NULL; 1111 1112 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1113 void *ptr = evlist->core.pollfd.priv[f].ptr; 1114 1115 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1116 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1117 &evlist->core.pollfd); 1118 if (pos < 0) 1119 return pos; 1120 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1121 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1122 } 1123 } 1124 } 1125 1126 return 0; 1127 } 1128 1129 static void record__free_thread_data(struct record *rec) 1130 { 1131 int t; 1132 struct record_thread *thread_data = rec->thread_data; 1133 1134 if (thread_data == NULL) 1135 return; 1136 1137 for (t = 0; t < rec->nr_threads; t++) { 1138 record__thread_data_close_pipes(&thread_data[t]); 1139 zfree(&thread_data[t].maps); 1140 zfree(&thread_data[t].overwrite_maps); 1141 fdarray__exit(&thread_data[t].pollfd); 1142 } 1143 1144 zfree(&rec->thread_data); 1145 } 1146 1147 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1148 int evlist_pollfd_index, 1149 int thread_pollfd_index) 1150 { 1151 size_t x = rec->index_map_cnt; 1152 1153 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1154 return -ENOMEM; 1155 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1156 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1157 rec->index_map_cnt += 1; 1158 return 0; 1159 } 1160 1161 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1162 struct evlist *evlist, 1163 struct record_thread *thread_data) 1164 { 1165 struct pollfd *e_entries = evlist->core.pollfd.entries; 1166 struct pollfd *t_entries = thread_data->pollfd.entries; 1167 int err = 0; 1168 size_t i; 1169 1170 for (i = 0; i < rec->index_map_cnt; i++) { 1171 int e_pos = rec->index_map[i].evlist_pollfd_index; 1172 int t_pos = rec->index_map[i].thread_pollfd_index; 1173 1174 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1175 e_entries[e_pos].events != t_entries[t_pos].events) { 1176 pr_err("Thread and evlist pollfd index mismatch\n"); 1177 err = -EINVAL; 1178 continue; 1179 } 1180 e_entries[e_pos].revents = t_entries[t_pos].revents; 1181 } 1182 return err; 1183 } 1184 1185 static int record__dup_non_perf_events(struct record *rec, 1186 struct evlist *evlist, 1187 struct record_thread *thread_data) 1188 { 1189 struct fdarray *fda = &evlist->core.pollfd; 1190 int i, ret; 1191 1192 for (i = 0; i < fda->nr; i++) { 1193 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1194 continue; 1195 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1196 if (ret < 0) { 1197 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1198 return ret; 1199 } 1200 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1201 thread_data, ret, fda->entries[i].fd); 1202 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1203 if (ret < 0) { 1204 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1205 return ret; 1206 } 1207 } 1208 return 0; 1209 } 1210 1211 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1212 { 1213 int t, ret; 1214 struct record_thread *thread_data; 1215 1216 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1217 if (!rec->thread_data) { 1218 pr_err("Failed to allocate thread data\n"); 1219 return -ENOMEM; 1220 } 1221 thread_data = rec->thread_data; 1222 1223 for (t = 0; t < rec->nr_threads; t++) 1224 record__thread_data_init_pipes(&thread_data[t]); 1225 1226 for (t = 0; t < rec->nr_threads; t++) { 1227 thread_data[t].rec = rec; 1228 thread_data[t].mask = &rec->thread_masks[t]; 1229 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1230 if (ret) { 1231 pr_err("Failed to initialize thread[%d] maps\n", t); 1232 goto out_free; 1233 } 1234 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1235 if (ret) { 1236 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1237 goto out_free; 1238 } 1239 if (t) { 1240 thread_data[t].tid = -1; 1241 ret = record__thread_data_open_pipes(&thread_data[t]); 1242 if (ret) { 1243 pr_err("Failed to open thread[%d] communication pipes\n", t); 1244 goto out_free; 1245 } 1246 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1247 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1248 if (ret < 0) { 1249 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1250 goto out_free; 1251 } 1252 thread_data[t].ctlfd_pos = ret; 1253 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1254 thread_data, thread_data[t].ctlfd_pos, 1255 thread_data[t].pipes.msg[0]); 1256 } else { 1257 thread_data[t].tid = gettid(); 1258 1259 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1260 if (ret < 0) 1261 goto out_free; 1262 1263 thread_data[t].ctlfd_pos = -1; /* Not used */ 1264 } 1265 } 1266 1267 return 0; 1268 1269 out_free: 1270 record__free_thread_data(rec); 1271 1272 return ret; 1273 } 1274 1275 static int record__mmap_evlist(struct record *rec, 1276 struct evlist *evlist) 1277 { 1278 int i, ret; 1279 struct record_opts *opts = &rec->opts; 1280 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1281 opts->auxtrace_sample_mode; 1282 char msg[512]; 1283 1284 if (opts->affinity != PERF_AFFINITY_SYS) 1285 cpu__setup_cpunode_map(); 1286 1287 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1288 opts->auxtrace_mmap_pages, 1289 auxtrace_overwrite, 1290 opts->nr_cblocks, opts->affinity, 1291 opts->mmap_flush, opts->comp_level) < 0) { 1292 if (errno == EPERM) { 1293 pr_err("Permission error mapping pages.\n" 1294 "Consider increasing " 1295 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1296 "or try again with a smaller value of -m/--mmap_pages.\n" 1297 "(current value: %u,%u)\n", 1298 opts->mmap_pages, opts->auxtrace_mmap_pages); 1299 return -errno; 1300 } else { 1301 pr_err("failed to mmap with %d (%s)\n", errno, 1302 str_error_r(errno, msg, sizeof(msg))); 1303 if (errno) 1304 return -errno; 1305 else 1306 return -EINVAL; 1307 } 1308 } 1309 1310 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1311 return -1; 1312 1313 ret = record__alloc_thread_data(rec, evlist); 1314 if (ret) 1315 return ret; 1316 1317 if (record__threads_enabled(rec)) { 1318 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1319 if (ret) { 1320 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1321 return ret; 1322 } 1323 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1324 if (evlist->mmap) 1325 evlist->mmap[i].file = &rec->data.dir.files[i]; 1326 if (evlist->overwrite_mmap) 1327 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1328 } 1329 } 1330 1331 return 0; 1332 } 1333 1334 static int record__mmap(struct record *rec) 1335 { 1336 return record__mmap_evlist(rec, rec->evlist); 1337 } 1338 1339 static int record__open(struct record *rec) 1340 { 1341 char msg[BUFSIZ]; 1342 struct evsel *pos; 1343 struct evlist *evlist = rec->evlist; 1344 struct perf_session *session = rec->session; 1345 struct record_opts *opts = &rec->opts; 1346 int rc = 0; 1347 1348 evlist__config(evlist, opts, &callchain_param); 1349 1350 evlist__for_each_entry(evlist, pos) { 1351 try_again: 1352 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1353 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1354 if (verbose > 0) 1355 ui__warning("%s\n", msg); 1356 goto try_again; 1357 } 1358 if ((errno == EINVAL || errno == EBADF) && 1359 pos->core.leader != &pos->core && 1360 pos->weak_group) { 1361 pos = evlist__reset_weak_group(evlist, pos, true); 1362 goto try_again; 1363 } 1364 rc = -errno; 1365 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1366 ui__error("%s\n", msg); 1367 goto out; 1368 } 1369 1370 pos->supported = true; 1371 } 1372 1373 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1374 pr_warning( 1375 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1376 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1377 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1378 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1379 "Samples in kernel modules won't be resolved at all.\n\n" 1380 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1381 "even with a suitable vmlinux or kallsyms file.\n\n"); 1382 } 1383 1384 if (evlist__apply_filters(evlist, &pos)) { 1385 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1386 pos->filter ?: "BPF", evsel__name(pos), errno, 1387 str_error_r(errno, msg, sizeof(msg))); 1388 rc = -1; 1389 goto out; 1390 } 1391 1392 rc = record__mmap(rec); 1393 if (rc) 1394 goto out; 1395 1396 session->evlist = evlist; 1397 perf_session__set_id_hdr_size(session); 1398 out: 1399 return rc; 1400 } 1401 1402 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1403 { 1404 if (rec->evlist->first_sample_time == 0) 1405 rec->evlist->first_sample_time = sample_time; 1406 1407 if (sample_time) 1408 rec->evlist->last_sample_time = sample_time; 1409 } 1410 1411 static int process_sample_event(struct perf_tool *tool, 1412 union perf_event *event, 1413 struct perf_sample *sample, 1414 struct evsel *evsel, 1415 struct machine *machine) 1416 { 1417 struct record *rec = container_of(tool, struct record, tool); 1418 1419 set_timestamp_boundary(rec, sample->time); 1420 1421 if (rec->buildid_all) 1422 return 0; 1423 1424 rec->samples++; 1425 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1426 } 1427 1428 static int process_buildids(struct record *rec) 1429 { 1430 struct perf_session *session = rec->session; 1431 1432 if (perf_data__size(&rec->data) == 0) 1433 return 0; 1434 1435 /* 1436 * During this process, it'll load kernel map and replace the 1437 * dso->long_name to a real pathname it found. In this case 1438 * we prefer the vmlinux path like 1439 * /lib/modules/3.16.4/build/vmlinux 1440 * 1441 * rather than build-id path (in debug directory). 1442 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1443 */ 1444 symbol_conf.ignore_vmlinux_buildid = true; 1445 1446 /* 1447 * If --buildid-all is given, it marks all DSO regardless of hits, 1448 * so no need to process samples. But if timestamp_boundary is enabled, 1449 * it still needs to walk on all samples to get the timestamps of 1450 * first/last samples. 1451 */ 1452 if (rec->buildid_all && !rec->timestamp_boundary) 1453 rec->tool.sample = NULL; 1454 1455 return perf_session__process_events(session); 1456 } 1457 1458 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1459 { 1460 int err; 1461 struct perf_tool *tool = data; 1462 /* 1463 *As for guest kernel when processing subcommand record&report, 1464 *we arrange module mmap prior to guest kernel mmap and trigger 1465 *a preload dso because default guest module symbols are loaded 1466 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1467 *method is used to avoid symbol missing when the first addr is 1468 *in module instead of in guest kernel. 1469 */ 1470 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1471 machine); 1472 if (err < 0) 1473 pr_err("Couldn't record guest kernel [%d]'s reference" 1474 " relocation symbol.\n", machine->pid); 1475 1476 /* 1477 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1478 * have no _text sometimes. 1479 */ 1480 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1481 machine); 1482 if (err < 0) 1483 pr_err("Couldn't record guest kernel [%d]'s reference" 1484 " relocation symbol.\n", machine->pid); 1485 } 1486 1487 static struct perf_event_header finished_round_event = { 1488 .size = sizeof(struct perf_event_header), 1489 .type = PERF_RECORD_FINISHED_ROUND, 1490 }; 1491 1492 static struct perf_event_header finished_init_event = { 1493 .size = sizeof(struct perf_event_header), 1494 .type = PERF_RECORD_FINISHED_INIT, 1495 }; 1496 1497 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1498 { 1499 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1500 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1501 thread->mask->affinity.nbits)) { 1502 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1503 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1504 map->affinity_mask.bits, thread->mask->affinity.nbits); 1505 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1506 (cpu_set_t *)thread->mask->affinity.bits); 1507 if (verbose == 2) { 1508 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1509 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1510 } 1511 } 1512 } 1513 1514 static size_t process_comp_header(void *record, size_t increment) 1515 { 1516 struct perf_record_compressed *event = record; 1517 size_t size = sizeof(*event); 1518 1519 if (increment) { 1520 event->header.size += increment; 1521 return increment; 1522 } 1523 1524 event->header.type = PERF_RECORD_COMPRESSED; 1525 event->header.size = size; 1526 1527 return size; 1528 } 1529 1530 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1531 void *dst, size_t dst_size, void *src, size_t src_size) 1532 { 1533 size_t compressed; 1534 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1535 struct zstd_data *zstd_data = &session->zstd_data; 1536 1537 if (map && map->file) 1538 zstd_data = &map->zstd_data; 1539 1540 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1541 max_record_size, process_comp_header); 1542 1543 if (map && map->file) { 1544 thread->bytes_transferred += src_size; 1545 thread->bytes_compressed += compressed; 1546 } else { 1547 session->bytes_transferred += src_size; 1548 session->bytes_compressed += compressed; 1549 } 1550 1551 return compressed; 1552 } 1553 1554 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1555 bool overwrite, bool synch) 1556 { 1557 u64 bytes_written = rec->bytes_written; 1558 int i; 1559 int rc = 0; 1560 int nr_mmaps; 1561 struct mmap **maps; 1562 int trace_fd = rec->data.file.fd; 1563 off_t off = 0; 1564 1565 if (!evlist) 1566 return 0; 1567 1568 nr_mmaps = thread->nr_mmaps; 1569 maps = overwrite ? thread->overwrite_maps : thread->maps; 1570 1571 if (!maps) 1572 return 0; 1573 1574 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1575 return 0; 1576 1577 if (record__aio_enabled(rec)) 1578 off = record__aio_get_pos(trace_fd); 1579 1580 for (i = 0; i < nr_mmaps; i++) { 1581 u64 flush = 0; 1582 struct mmap *map = maps[i]; 1583 1584 if (map->core.base) { 1585 record__adjust_affinity(rec, map); 1586 if (synch) { 1587 flush = map->core.flush; 1588 map->core.flush = 1; 1589 } 1590 if (!record__aio_enabled(rec)) { 1591 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1592 if (synch) 1593 map->core.flush = flush; 1594 rc = -1; 1595 goto out; 1596 } 1597 } else { 1598 if (record__aio_push(rec, map, &off) < 0) { 1599 record__aio_set_pos(trace_fd, off); 1600 if (synch) 1601 map->core.flush = flush; 1602 rc = -1; 1603 goto out; 1604 } 1605 } 1606 if (synch) 1607 map->core.flush = flush; 1608 } 1609 1610 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1611 !rec->opts.auxtrace_sample_mode && 1612 record__auxtrace_mmap_read(rec, map) != 0) { 1613 rc = -1; 1614 goto out; 1615 } 1616 } 1617 1618 if (record__aio_enabled(rec)) 1619 record__aio_set_pos(trace_fd, off); 1620 1621 /* 1622 * Mark the round finished in case we wrote 1623 * at least one event. 1624 * 1625 * No need for round events in directory mode, 1626 * because per-cpu maps and files have data 1627 * sorted by kernel. 1628 */ 1629 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1630 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1631 1632 if (overwrite) 1633 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1634 out: 1635 return rc; 1636 } 1637 1638 static int record__mmap_read_all(struct record *rec, bool synch) 1639 { 1640 int err; 1641 1642 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1643 if (err) 1644 return err; 1645 1646 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1647 } 1648 1649 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1650 void *arg __maybe_unused) 1651 { 1652 struct perf_mmap *map = fda->priv[fd].ptr; 1653 1654 if (map) 1655 perf_mmap__put(map); 1656 } 1657 1658 static void *record__thread(void *arg) 1659 { 1660 enum thread_msg msg = THREAD_MSG__READY; 1661 bool terminate = false; 1662 struct fdarray *pollfd; 1663 int err, ctlfd_pos; 1664 1665 thread = arg; 1666 thread->tid = gettid(); 1667 1668 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1669 if (err == -1) 1670 pr_warning("threads[%d]: failed to notify on start: %s\n", 1671 thread->tid, strerror(errno)); 1672 1673 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1674 1675 pollfd = &thread->pollfd; 1676 ctlfd_pos = thread->ctlfd_pos; 1677 1678 for (;;) { 1679 unsigned long long hits = thread->samples; 1680 1681 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1682 break; 1683 1684 if (hits == thread->samples) { 1685 1686 err = fdarray__poll(pollfd, -1); 1687 /* 1688 * Propagate error, only if there's any. Ignore positive 1689 * number of returned events and interrupt error. 1690 */ 1691 if (err > 0 || (err < 0 && errno == EINTR)) 1692 err = 0; 1693 thread->waking++; 1694 1695 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1696 record__thread_munmap_filtered, NULL) == 0) 1697 break; 1698 } 1699 1700 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1701 terminate = true; 1702 close(thread->pipes.msg[0]); 1703 thread->pipes.msg[0] = -1; 1704 pollfd->entries[ctlfd_pos].fd = -1; 1705 pollfd->entries[ctlfd_pos].events = 0; 1706 } 1707 1708 pollfd->entries[ctlfd_pos].revents = 0; 1709 } 1710 record__mmap_read_all(thread->rec, true); 1711 1712 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1713 if (err == -1) 1714 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1715 thread->tid, strerror(errno)); 1716 1717 return NULL; 1718 } 1719 1720 static void record__init_features(struct record *rec) 1721 { 1722 struct perf_session *session = rec->session; 1723 int feat; 1724 1725 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1726 perf_header__set_feat(&session->header, feat); 1727 1728 if (rec->no_buildid) 1729 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1730 1731 #ifdef HAVE_LIBTRACEEVENT 1732 if (!have_tracepoints(&rec->evlist->core.entries)) 1733 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1734 #endif 1735 1736 if (!rec->opts.branch_stack) 1737 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1738 1739 if (!rec->opts.full_auxtrace) 1740 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1741 1742 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1743 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1744 1745 if (!rec->opts.use_clockid) 1746 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1747 1748 if (!record__threads_enabled(rec)) 1749 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1750 1751 if (!record__comp_enabled(rec)) 1752 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1753 1754 perf_header__clear_feat(&session->header, HEADER_STAT); 1755 } 1756 1757 static void 1758 record__finish_output(struct record *rec) 1759 { 1760 int i; 1761 struct perf_data *data = &rec->data; 1762 int fd = perf_data__fd(data); 1763 1764 if (data->is_pipe) 1765 return; 1766 1767 rec->session->header.data_size += rec->bytes_written; 1768 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1769 if (record__threads_enabled(rec)) { 1770 for (i = 0; i < data->dir.nr; i++) 1771 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1772 } 1773 1774 if (!rec->no_buildid) { 1775 process_buildids(rec); 1776 1777 if (rec->buildid_all) 1778 dsos__hit_all(rec->session); 1779 } 1780 perf_session__write_header(rec->session, rec->evlist, fd, true); 1781 1782 return; 1783 } 1784 1785 static int record__synthesize_workload(struct record *rec, bool tail) 1786 { 1787 int err; 1788 struct perf_thread_map *thread_map; 1789 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1790 1791 if (rec->opts.tail_synthesize != tail) 1792 return 0; 1793 1794 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1795 if (thread_map == NULL) 1796 return -1; 1797 1798 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1799 process_synthesized_event, 1800 &rec->session->machines.host, 1801 needs_mmap, 1802 rec->opts.sample_address); 1803 perf_thread_map__put(thread_map); 1804 return err; 1805 } 1806 1807 static int write_finished_init(struct record *rec, bool tail) 1808 { 1809 if (rec->opts.tail_synthesize != tail) 1810 return 0; 1811 1812 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1813 } 1814 1815 static int record__synthesize(struct record *rec, bool tail); 1816 1817 static int 1818 record__switch_output(struct record *rec, bool at_exit) 1819 { 1820 struct perf_data *data = &rec->data; 1821 int fd, err; 1822 char *new_filename; 1823 1824 /* Same Size: "2015122520103046"*/ 1825 char timestamp[] = "InvalidTimestamp"; 1826 1827 record__aio_mmap_read_sync(rec); 1828 1829 write_finished_init(rec, true); 1830 1831 record__synthesize(rec, true); 1832 if (target__none(&rec->opts.target)) 1833 record__synthesize_workload(rec, true); 1834 1835 rec->samples = 0; 1836 record__finish_output(rec); 1837 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1838 if (err) { 1839 pr_err("Failed to get current timestamp\n"); 1840 return -EINVAL; 1841 } 1842 1843 fd = perf_data__switch(data, timestamp, 1844 rec->session->header.data_offset, 1845 at_exit, &new_filename); 1846 if (fd >= 0 && !at_exit) { 1847 rec->bytes_written = 0; 1848 rec->session->header.data_size = 0; 1849 } 1850 1851 if (!quiet) 1852 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1853 data->path, timestamp); 1854 1855 if (rec->switch_output.num_files) { 1856 int n = rec->switch_output.cur_file + 1; 1857 1858 if (n >= rec->switch_output.num_files) 1859 n = 0; 1860 rec->switch_output.cur_file = n; 1861 if (rec->switch_output.filenames[n]) { 1862 remove(rec->switch_output.filenames[n]); 1863 zfree(&rec->switch_output.filenames[n]); 1864 } 1865 rec->switch_output.filenames[n] = new_filename; 1866 } else { 1867 free(new_filename); 1868 } 1869 1870 /* Output tracking events */ 1871 if (!at_exit) { 1872 record__synthesize(rec, false); 1873 1874 /* 1875 * In 'perf record --switch-output' without -a, 1876 * record__synthesize() in record__switch_output() won't 1877 * generate tracking events because there's no thread_map 1878 * in evlist. Which causes newly created perf.data doesn't 1879 * contain map and comm information. 1880 * Create a fake thread_map and directly call 1881 * perf_event__synthesize_thread_map() for those events. 1882 */ 1883 if (target__none(&rec->opts.target)) 1884 record__synthesize_workload(rec, false); 1885 write_finished_init(rec, false); 1886 } 1887 return fd; 1888 } 1889 1890 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1891 struct perf_record_lost_samples *lost, 1892 int cpu_idx, int thread_idx, u64 lost_count, 1893 u16 misc_flag) 1894 { 1895 struct perf_sample_id *sid; 1896 struct perf_sample sample = {}; 1897 int id_hdr_size; 1898 1899 lost->lost = lost_count; 1900 if (evsel->core.ids) { 1901 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1902 sample.id = sid->id; 1903 } 1904 1905 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1906 evsel->core.attr.sample_type, &sample); 1907 lost->header.size = sizeof(*lost) + id_hdr_size; 1908 lost->header.misc = misc_flag; 1909 record__write(rec, NULL, lost, lost->header.size); 1910 } 1911 1912 static void record__read_lost_samples(struct record *rec) 1913 { 1914 struct perf_session *session = rec->session; 1915 struct perf_record_lost_samples *lost; 1916 struct evsel *evsel; 1917 1918 /* there was an error during record__open */ 1919 if (session->evlist == NULL) 1920 return; 1921 1922 lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1923 if (lost == NULL) { 1924 pr_debug("Memory allocation failed\n"); 1925 return; 1926 } 1927 1928 lost->header.type = PERF_RECORD_LOST_SAMPLES; 1929 1930 evlist__for_each_entry(session->evlist, evsel) { 1931 struct xyarray *xy = evsel->core.sample_id; 1932 u64 lost_count; 1933 1934 if (xy == NULL || evsel->core.fd == NULL) 1935 continue; 1936 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1937 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1938 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1939 continue; 1940 } 1941 1942 for (int x = 0; x < xyarray__max_x(xy); x++) { 1943 for (int y = 0; y < xyarray__max_y(xy); y++) { 1944 struct perf_counts_values count; 1945 1946 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1947 pr_debug("read LOST count failed\n"); 1948 goto out; 1949 } 1950 1951 if (count.lost) { 1952 __record__save_lost_samples(rec, evsel, lost, 1953 x, y, count.lost, 0); 1954 } 1955 } 1956 } 1957 1958 lost_count = perf_bpf_filter__lost_count(evsel); 1959 if (lost_count) 1960 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, 1961 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1962 } 1963 out: 1964 free(lost); 1965 } 1966 1967 static volatile sig_atomic_t workload_exec_errno; 1968 1969 /* 1970 * evlist__prepare_workload will send a SIGUSR1 1971 * if the fork fails, since we asked by setting its 1972 * want_signal to true. 1973 */ 1974 static void workload_exec_failed_signal(int signo __maybe_unused, 1975 siginfo_t *info, 1976 void *ucontext __maybe_unused) 1977 { 1978 workload_exec_errno = info->si_value.sival_int; 1979 done = 1; 1980 child_finished = 1; 1981 } 1982 1983 static void snapshot_sig_handler(int sig); 1984 static void alarm_sig_handler(int sig); 1985 1986 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1987 { 1988 if (evlist) { 1989 if (evlist->mmap && evlist->mmap[0].core.base) 1990 return evlist->mmap[0].core.base; 1991 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1992 return evlist->overwrite_mmap[0].core.base; 1993 } 1994 return NULL; 1995 } 1996 1997 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1998 { 1999 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2000 if (pc) 2001 return pc; 2002 return NULL; 2003 } 2004 2005 static int record__synthesize(struct record *rec, bool tail) 2006 { 2007 struct perf_session *session = rec->session; 2008 struct machine *machine = &session->machines.host; 2009 struct perf_data *data = &rec->data; 2010 struct record_opts *opts = &rec->opts; 2011 struct perf_tool *tool = &rec->tool; 2012 int err = 0; 2013 event_op f = process_synthesized_event; 2014 2015 if (rec->opts.tail_synthesize != tail) 2016 return 0; 2017 2018 if (data->is_pipe) { 2019 err = perf_event__synthesize_for_pipe(tool, session, data, 2020 process_synthesized_event); 2021 if (err < 0) 2022 goto out; 2023 2024 rec->bytes_written += err; 2025 } 2026 2027 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2028 process_synthesized_event, machine); 2029 if (err) 2030 goto out; 2031 2032 /* Synthesize id_index before auxtrace_info */ 2033 err = perf_event__synthesize_id_index(tool, 2034 process_synthesized_event, 2035 session->evlist, machine); 2036 if (err) 2037 goto out; 2038 2039 if (rec->opts.full_auxtrace) { 2040 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2041 session, process_synthesized_event); 2042 if (err) 2043 goto out; 2044 } 2045 2046 if (!evlist__exclude_kernel(rec->evlist)) { 2047 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2048 machine); 2049 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2050 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2051 "Check /proc/kallsyms permission or run as root.\n"); 2052 2053 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2054 machine); 2055 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2056 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2057 "Check /proc/modules permission or run as root.\n"); 2058 } 2059 2060 if (perf_guest) { 2061 machines__process_guests(&session->machines, 2062 perf_event__synthesize_guest_os, tool); 2063 } 2064 2065 err = perf_event__synthesize_extra_attr(&rec->tool, 2066 rec->evlist, 2067 process_synthesized_event, 2068 data->is_pipe); 2069 if (err) 2070 goto out; 2071 2072 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2073 process_synthesized_event, 2074 NULL); 2075 if (err < 0) { 2076 pr_err("Couldn't synthesize thread map.\n"); 2077 return err; 2078 } 2079 2080 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2081 process_synthesized_event, NULL); 2082 if (err < 0) { 2083 pr_err("Couldn't synthesize cpu map.\n"); 2084 return err; 2085 } 2086 2087 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2088 machine, opts); 2089 if (err < 0) { 2090 pr_warning("Couldn't synthesize bpf events.\n"); 2091 err = 0; 2092 } 2093 2094 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2095 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2096 machine); 2097 if (err < 0) { 2098 pr_warning("Couldn't synthesize cgroup events.\n"); 2099 err = 0; 2100 } 2101 } 2102 2103 if (rec->opts.nr_threads_synthesize > 1) { 2104 mutex_init(&synth_lock); 2105 perf_set_multithreaded(); 2106 f = process_locked_synthesized_event; 2107 } 2108 2109 if (rec->opts.synth & PERF_SYNTH_TASK) { 2110 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2111 2112 err = __machine__synthesize_threads(machine, tool, &opts->target, 2113 rec->evlist->core.threads, 2114 f, needs_mmap, opts->sample_address, 2115 rec->opts.nr_threads_synthesize); 2116 } 2117 2118 if (rec->opts.nr_threads_synthesize > 1) { 2119 perf_set_singlethreaded(); 2120 mutex_destroy(&synth_lock); 2121 } 2122 2123 out: 2124 return err; 2125 } 2126 2127 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2128 { 2129 struct record *rec = data; 2130 pthread_kill(rec->thread_id, SIGUSR2); 2131 return 0; 2132 } 2133 2134 static int record__setup_sb_evlist(struct record *rec) 2135 { 2136 struct record_opts *opts = &rec->opts; 2137 2138 if (rec->sb_evlist != NULL) { 2139 /* 2140 * We get here if --switch-output-event populated the 2141 * sb_evlist, so associate a callback that will send a SIGUSR2 2142 * to the main thread. 2143 */ 2144 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2145 rec->thread_id = pthread_self(); 2146 } 2147 #ifdef HAVE_LIBBPF_SUPPORT 2148 if (!opts->no_bpf_event) { 2149 if (rec->sb_evlist == NULL) { 2150 rec->sb_evlist = evlist__new(); 2151 2152 if (rec->sb_evlist == NULL) { 2153 pr_err("Couldn't create side band evlist.\n."); 2154 return -1; 2155 } 2156 } 2157 2158 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2159 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2160 return -1; 2161 } 2162 } 2163 #endif 2164 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2165 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2166 opts->no_bpf_event = true; 2167 } 2168 2169 return 0; 2170 } 2171 2172 static int record__init_clock(struct record *rec) 2173 { 2174 struct perf_session *session = rec->session; 2175 struct timespec ref_clockid; 2176 struct timeval ref_tod; 2177 u64 ref; 2178 2179 if (!rec->opts.use_clockid) 2180 return 0; 2181 2182 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2183 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2184 2185 session->header.env.clock.clockid = rec->opts.clockid; 2186 2187 if (gettimeofday(&ref_tod, NULL) != 0) { 2188 pr_err("gettimeofday failed, cannot set reference time.\n"); 2189 return -1; 2190 } 2191 2192 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2193 pr_err("clock_gettime failed, cannot set reference time.\n"); 2194 return -1; 2195 } 2196 2197 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2198 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2199 2200 session->header.env.clock.tod_ns = ref; 2201 2202 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2203 (u64) ref_clockid.tv_nsec; 2204 2205 session->header.env.clock.clockid_ns = ref; 2206 return 0; 2207 } 2208 2209 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2210 { 2211 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2212 trigger_hit(&auxtrace_snapshot_trigger); 2213 auxtrace_record__snapshot_started = 1; 2214 if (auxtrace_record__snapshot_start(rec->itr)) 2215 trigger_error(&auxtrace_snapshot_trigger); 2216 } 2217 } 2218 2219 static void record__uniquify_name(struct record *rec) 2220 { 2221 struct evsel *pos; 2222 struct evlist *evlist = rec->evlist; 2223 char *new_name; 2224 int ret; 2225 2226 if (perf_pmus__num_core_pmus() == 1) 2227 return; 2228 2229 evlist__for_each_entry(evlist, pos) { 2230 if (!evsel__is_hybrid(pos)) 2231 continue; 2232 2233 if (strchr(pos->name, '/')) 2234 continue; 2235 2236 ret = asprintf(&new_name, "%s/%s/", 2237 pos->pmu_name, pos->name); 2238 if (ret) { 2239 free(pos->name); 2240 pos->name = new_name; 2241 } 2242 } 2243 } 2244 2245 static int record__terminate_thread(struct record_thread *thread_data) 2246 { 2247 int err; 2248 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2249 pid_t tid = thread_data->tid; 2250 2251 close(thread_data->pipes.msg[1]); 2252 thread_data->pipes.msg[1] = -1; 2253 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2254 if (err > 0) 2255 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2256 else 2257 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2258 thread->tid, tid); 2259 2260 return 0; 2261 } 2262 2263 static int record__start_threads(struct record *rec) 2264 { 2265 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2266 struct record_thread *thread_data = rec->thread_data; 2267 sigset_t full, mask; 2268 pthread_t handle; 2269 pthread_attr_t attrs; 2270 2271 thread = &thread_data[0]; 2272 2273 if (!record__threads_enabled(rec)) 2274 return 0; 2275 2276 sigfillset(&full); 2277 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2278 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2279 return -1; 2280 } 2281 2282 pthread_attr_init(&attrs); 2283 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2284 2285 for (t = 1; t < nr_threads; t++) { 2286 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2287 2288 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2289 pthread_attr_setaffinity_np(&attrs, 2290 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2291 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2292 #endif 2293 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2294 for (tt = 1; tt < t; tt++) 2295 record__terminate_thread(&thread_data[t]); 2296 pr_err("Failed to start threads: %s\n", strerror(errno)); 2297 ret = -1; 2298 goto out_err; 2299 } 2300 2301 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2302 if (err > 0) 2303 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2304 thread_msg_tags[msg]); 2305 else 2306 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2307 thread->tid, rec->thread_data[t].tid); 2308 } 2309 2310 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2311 (cpu_set_t *)thread->mask->affinity.bits); 2312 2313 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2314 2315 out_err: 2316 pthread_attr_destroy(&attrs); 2317 2318 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2319 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2320 ret = -1; 2321 } 2322 2323 return ret; 2324 } 2325 2326 static int record__stop_threads(struct record *rec) 2327 { 2328 int t; 2329 struct record_thread *thread_data = rec->thread_data; 2330 2331 for (t = 1; t < rec->nr_threads; t++) 2332 record__terminate_thread(&thread_data[t]); 2333 2334 for (t = 0; t < rec->nr_threads; t++) { 2335 rec->samples += thread_data[t].samples; 2336 if (!record__threads_enabled(rec)) 2337 continue; 2338 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2339 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2340 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2341 thread_data[t].samples, thread_data[t].waking); 2342 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2343 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2344 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2345 else 2346 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2347 } 2348 2349 return 0; 2350 } 2351 2352 static unsigned long record__waking(struct record *rec) 2353 { 2354 int t; 2355 unsigned long waking = 0; 2356 struct record_thread *thread_data = rec->thread_data; 2357 2358 for (t = 0; t < rec->nr_threads; t++) 2359 waking += thread_data[t].waking; 2360 2361 return waking; 2362 } 2363 2364 static int __cmd_record(struct record *rec, int argc, const char **argv) 2365 { 2366 int err; 2367 int status = 0; 2368 const bool forks = argc > 0; 2369 struct perf_tool *tool = &rec->tool; 2370 struct record_opts *opts = &rec->opts; 2371 struct perf_data *data = &rec->data; 2372 struct perf_session *session; 2373 bool disabled = false, draining = false; 2374 int fd; 2375 float ratio = 0; 2376 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2377 2378 atexit(record__sig_exit); 2379 signal(SIGCHLD, sig_handler); 2380 signal(SIGINT, sig_handler); 2381 signal(SIGTERM, sig_handler); 2382 signal(SIGSEGV, sigsegv_handler); 2383 2384 if (rec->opts.record_namespaces) 2385 tool->namespace_events = true; 2386 2387 if (rec->opts.record_cgroup) { 2388 #ifdef HAVE_FILE_HANDLE 2389 tool->cgroup_events = true; 2390 #else 2391 pr_err("cgroup tracking is not supported\n"); 2392 return -1; 2393 #endif 2394 } 2395 2396 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2397 signal(SIGUSR2, snapshot_sig_handler); 2398 if (rec->opts.auxtrace_snapshot_mode) 2399 trigger_on(&auxtrace_snapshot_trigger); 2400 if (rec->switch_output.enabled) 2401 trigger_on(&switch_output_trigger); 2402 } else { 2403 signal(SIGUSR2, SIG_IGN); 2404 } 2405 2406 session = perf_session__new(data, tool); 2407 if (IS_ERR(session)) { 2408 pr_err("Perf session creation failed.\n"); 2409 return PTR_ERR(session); 2410 } 2411 2412 if (record__threads_enabled(rec)) { 2413 if (perf_data__is_pipe(&rec->data)) { 2414 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2415 return -1; 2416 } 2417 if (rec->opts.full_auxtrace) { 2418 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2419 return -1; 2420 } 2421 } 2422 2423 fd = perf_data__fd(data); 2424 rec->session = session; 2425 2426 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2427 pr_err("Compression initialization failed.\n"); 2428 return -1; 2429 } 2430 #ifdef HAVE_EVENTFD_SUPPORT 2431 done_fd = eventfd(0, EFD_NONBLOCK); 2432 if (done_fd < 0) { 2433 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2434 status = -1; 2435 goto out_delete_session; 2436 } 2437 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2438 if (err < 0) { 2439 pr_err("Failed to add wakeup eventfd to poll list\n"); 2440 status = err; 2441 goto out_delete_session; 2442 } 2443 #endif // HAVE_EVENTFD_SUPPORT 2444 2445 session->header.env.comp_type = PERF_COMP_ZSTD; 2446 session->header.env.comp_level = rec->opts.comp_level; 2447 2448 if (rec->opts.kcore && 2449 !record__kcore_readable(&session->machines.host)) { 2450 pr_err("ERROR: kcore is not readable.\n"); 2451 return -1; 2452 } 2453 2454 if (record__init_clock(rec)) 2455 return -1; 2456 2457 record__init_features(rec); 2458 2459 if (forks) { 2460 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2461 workload_exec_failed_signal); 2462 if (err < 0) { 2463 pr_err("Couldn't run the workload!\n"); 2464 status = err; 2465 goto out_delete_session; 2466 } 2467 } 2468 2469 /* 2470 * If we have just single event and are sending data 2471 * through pipe, we need to force the ids allocation, 2472 * because we synthesize event name through the pipe 2473 * and need the id for that. 2474 */ 2475 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2476 rec->opts.sample_id = true; 2477 2478 record__uniquify_name(rec); 2479 2480 /* Debug message used by test scripts */ 2481 pr_debug3("perf record opening and mmapping events\n"); 2482 if (record__open(rec) != 0) { 2483 err = -1; 2484 goto out_free_threads; 2485 } 2486 /* Debug message used by test scripts */ 2487 pr_debug3("perf record done opening and mmapping events\n"); 2488 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2489 2490 if (rec->opts.kcore) { 2491 err = record__kcore_copy(&session->machines.host, data); 2492 if (err) { 2493 pr_err("ERROR: Failed to copy kcore\n"); 2494 goto out_free_threads; 2495 } 2496 } 2497 2498 /* 2499 * Normally perf_session__new would do this, but it doesn't have the 2500 * evlist. 2501 */ 2502 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2503 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2504 rec->tool.ordered_events = false; 2505 } 2506 2507 if (evlist__nr_groups(rec->evlist) == 0) 2508 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2509 2510 if (data->is_pipe) { 2511 err = perf_header__write_pipe(fd); 2512 if (err < 0) 2513 goto out_free_threads; 2514 } else { 2515 err = perf_session__write_header(session, rec->evlist, fd, false); 2516 if (err < 0) 2517 goto out_free_threads; 2518 } 2519 2520 err = -1; 2521 if (!rec->no_buildid 2522 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2523 pr_err("Couldn't generate buildids. " 2524 "Use --no-buildid to profile anyway.\n"); 2525 goto out_free_threads; 2526 } 2527 2528 err = record__setup_sb_evlist(rec); 2529 if (err) 2530 goto out_free_threads; 2531 2532 err = record__synthesize(rec, false); 2533 if (err < 0) 2534 goto out_free_threads; 2535 2536 if (rec->realtime_prio) { 2537 struct sched_param param; 2538 2539 param.sched_priority = rec->realtime_prio; 2540 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2541 pr_err("Could not set realtime priority.\n"); 2542 err = -1; 2543 goto out_free_threads; 2544 } 2545 } 2546 2547 if (record__start_threads(rec)) 2548 goto out_free_threads; 2549 2550 /* 2551 * When perf is starting the traced process, all the events 2552 * (apart from group members) have enable_on_exec=1 set, 2553 * so don't spoil it by prematurely enabling them. 2554 */ 2555 if (!target__none(&opts->target) && !opts->target.initial_delay) 2556 evlist__enable(rec->evlist); 2557 2558 /* 2559 * Let the child rip 2560 */ 2561 if (forks) { 2562 struct machine *machine = &session->machines.host; 2563 union perf_event *event; 2564 pid_t tgid; 2565 2566 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2567 if (event == NULL) { 2568 err = -ENOMEM; 2569 goto out_child; 2570 } 2571 2572 /* 2573 * Some H/W events are generated before COMM event 2574 * which is emitted during exec(), so perf script 2575 * cannot see a correct process name for those events. 2576 * Synthesize COMM event to prevent it. 2577 */ 2578 tgid = perf_event__synthesize_comm(tool, event, 2579 rec->evlist->workload.pid, 2580 process_synthesized_event, 2581 machine); 2582 free(event); 2583 2584 if (tgid == -1) 2585 goto out_child; 2586 2587 event = malloc(sizeof(event->namespaces) + 2588 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2589 machine->id_hdr_size); 2590 if (event == NULL) { 2591 err = -ENOMEM; 2592 goto out_child; 2593 } 2594 2595 /* 2596 * Synthesize NAMESPACES event for the command specified. 2597 */ 2598 perf_event__synthesize_namespaces(tool, event, 2599 rec->evlist->workload.pid, 2600 tgid, process_synthesized_event, 2601 machine); 2602 free(event); 2603 2604 evlist__start_workload(rec->evlist); 2605 } 2606 2607 if (opts->target.initial_delay) { 2608 pr_info(EVLIST_DISABLED_MSG); 2609 if (opts->target.initial_delay > 0) { 2610 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2611 evlist__enable(rec->evlist); 2612 pr_info(EVLIST_ENABLED_MSG); 2613 } 2614 } 2615 2616 err = event_enable_timer__start(rec->evlist->eet); 2617 if (err) 2618 goto out_child; 2619 2620 /* Debug message used by test scripts */ 2621 pr_debug3("perf record has started\n"); 2622 fflush(stderr); 2623 2624 trigger_ready(&auxtrace_snapshot_trigger); 2625 trigger_ready(&switch_output_trigger); 2626 perf_hooks__invoke_record_start(); 2627 2628 /* 2629 * Must write FINISHED_INIT so it will be seen after all other 2630 * synthesized user events, but before any regular events. 2631 */ 2632 err = write_finished_init(rec, false); 2633 if (err < 0) 2634 goto out_child; 2635 2636 for (;;) { 2637 unsigned long long hits = thread->samples; 2638 2639 /* 2640 * rec->evlist->bkw_mmap_state is possible to be 2641 * BKW_MMAP_EMPTY here: when done == true and 2642 * hits != rec->samples in previous round. 2643 * 2644 * evlist__toggle_bkw_mmap ensure we never 2645 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2646 */ 2647 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2648 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2649 2650 if (record__mmap_read_all(rec, false) < 0) { 2651 trigger_error(&auxtrace_snapshot_trigger); 2652 trigger_error(&switch_output_trigger); 2653 err = -1; 2654 goto out_child; 2655 } 2656 2657 if (auxtrace_record__snapshot_started) { 2658 auxtrace_record__snapshot_started = 0; 2659 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2660 record__read_auxtrace_snapshot(rec, false); 2661 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2662 pr_err("AUX area tracing snapshot failed\n"); 2663 err = -1; 2664 goto out_child; 2665 } 2666 } 2667 2668 if (trigger_is_hit(&switch_output_trigger)) { 2669 /* 2670 * If switch_output_trigger is hit, the data in 2671 * overwritable ring buffer should have been collected, 2672 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2673 * 2674 * If SIGUSR2 raise after or during record__mmap_read_all(), 2675 * record__mmap_read_all() didn't collect data from 2676 * overwritable ring buffer. Read again. 2677 */ 2678 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2679 continue; 2680 trigger_ready(&switch_output_trigger); 2681 2682 /* 2683 * Reenable events in overwrite ring buffer after 2684 * record__mmap_read_all(): we should have collected 2685 * data from it. 2686 */ 2687 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2688 2689 if (!quiet) 2690 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2691 record__waking(rec)); 2692 thread->waking = 0; 2693 fd = record__switch_output(rec, false); 2694 if (fd < 0) { 2695 pr_err("Failed to switch to new file\n"); 2696 trigger_error(&switch_output_trigger); 2697 err = fd; 2698 goto out_child; 2699 } 2700 2701 /* re-arm the alarm */ 2702 if (rec->switch_output.time) 2703 alarm(rec->switch_output.time); 2704 } 2705 2706 if (hits == thread->samples) { 2707 if (done || draining) 2708 break; 2709 err = fdarray__poll(&thread->pollfd, -1); 2710 /* 2711 * Propagate error, only if there's any. Ignore positive 2712 * number of returned events and interrupt error. 2713 */ 2714 if (err > 0 || (err < 0 && errno == EINTR)) 2715 err = 0; 2716 thread->waking++; 2717 2718 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2719 record__thread_munmap_filtered, NULL) == 0) 2720 draining = true; 2721 2722 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2723 if (err) 2724 goto out_child; 2725 } 2726 2727 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2728 switch (cmd) { 2729 case EVLIST_CTL_CMD_SNAPSHOT: 2730 hit_auxtrace_snapshot_trigger(rec); 2731 evlist__ctlfd_ack(rec->evlist); 2732 break; 2733 case EVLIST_CTL_CMD_STOP: 2734 done = 1; 2735 break; 2736 case EVLIST_CTL_CMD_ACK: 2737 case EVLIST_CTL_CMD_UNSUPPORTED: 2738 case EVLIST_CTL_CMD_ENABLE: 2739 case EVLIST_CTL_CMD_DISABLE: 2740 case EVLIST_CTL_CMD_EVLIST: 2741 case EVLIST_CTL_CMD_PING: 2742 default: 2743 break; 2744 } 2745 } 2746 2747 err = event_enable_timer__process(rec->evlist->eet); 2748 if (err < 0) 2749 goto out_child; 2750 if (err) { 2751 err = 0; 2752 done = 1; 2753 } 2754 2755 /* 2756 * When perf is starting the traced process, at the end events 2757 * die with the process and we wait for that. Thus no need to 2758 * disable events in this case. 2759 */ 2760 if (done && !disabled && !target__none(&opts->target)) { 2761 trigger_off(&auxtrace_snapshot_trigger); 2762 evlist__disable(rec->evlist); 2763 disabled = true; 2764 } 2765 } 2766 2767 trigger_off(&auxtrace_snapshot_trigger); 2768 trigger_off(&switch_output_trigger); 2769 2770 if (opts->auxtrace_snapshot_on_exit) 2771 record__auxtrace_snapshot_exit(rec); 2772 2773 if (forks && workload_exec_errno) { 2774 char msg[STRERR_BUFSIZE], strevsels[2048]; 2775 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2776 2777 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2778 2779 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2780 strevsels, argv[0], emsg); 2781 err = -1; 2782 goto out_child; 2783 } 2784 2785 if (!quiet) 2786 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2787 record__waking(rec)); 2788 2789 write_finished_init(rec, true); 2790 2791 if (target__none(&rec->opts.target)) 2792 record__synthesize_workload(rec, true); 2793 2794 out_child: 2795 record__stop_threads(rec); 2796 record__mmap_read_all(rec, true); 2797 out_free_threads: 2798 record__free_thread_data(rec); 2799 evlist__finalize_ctlfd(rec->evlist); 2800 record__aio_mmap_read_sync(rec); 2801 2802 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2803 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2804 session->header.env.comp_ratio = ratio + 0.5; 2805 } 2806 2807 if (forks) { 2808 int exit_status; 2809 2810 if (!child_finished) 2811 kill(rec->evlist->workload.pid, SIGTERM); 2812 2813 wait(&exit_status); 2814 2815 if (err < 0) 2816 status = err; 2817 else if (WIFEXITED(exit_status)) 2818 status = WEXITSTATUS(exit_status); 2819 else if (WIFSIGNALED(exit_status)) 2820 signr = WTERMSIG(exit_status); 2821 } else 2822 status = err; 2823 2824 if (rec->off_cpu) 2825 rec->bytes_written += off_cpu_write(rec->session); 2826 2827 record__read_lost_samples(rec); 2828 record__synthesize(rec, true); 2829 /* this will be recalculated during process_buildids() */ 2830 rec->samples = 0; 2831 2832 if (!err) { 2833 if (!rec->timestamp_filename) { 2834 record__finish_output(rec); 2835 } else { 2836 fd = record__switch_output(rec, true); 2837 if (fd < 0) { 2838 status = fd; 2839 goto out_delete_session; 2840 } 2841 } 2842 } 2843 2844 perf_hooks__invoke_record_end(); 2845 2846 if (!err && !quiet) { 2847 char samples[128]; 2848 const char *postfix = rec->timestamp_filename ? 2849 ".<timestamp>" : ""; 2850 2851 if (rec->samples && !rec->opts.full_auxtrace) 2852 scnprintf(samples, sizeof(samples), 2853 " (%" PRIu64 " samples)", rec->samples); 2854 else 2855 samples[0] = '\0'; 2856 2857 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2858 perf_data__size(data) / 1024.0 / 1024.0, 2859 data->path, postfix, samples); 2860 if (ratio) { 2861 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2862 rec->session->bytes_transferred / 1024.0 / 1024.0, 2863 ratio); 2864 } 2865 fprintf(stderr, " ]\n"); 2866 } 2867 2868 out_delete_session: 2869 #ifdef HAVE_EVENTFD_SUPPORT 2870 if (done_fd >= 0) { 2871 fd = done_fd; 2872 done_fd = -1; 2873 2874 close(fd); 2875 } 2876 #endif 2877 zstd_fini(&session->zstd_data); 2878 perf_session__delete(session); 2879 2880 if (!opts->no_bpf_event) 2881 evlist__stop_sb_thread(rec->sb_evlist); 2882 return status; 2883 } 2884 2885 static void callchain_debug(struct callchain_param *callchain) 2886 { 2887 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2888 2889 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2890 2891 if (callchain->record_mode == CALLCHAIN_DWARF) 2892 pr_debug("callchain: stack dump size %d\n", 2893 callchain->dump_size); 2894 } 2895 2896 int record_opts__parse_callchain(struct record_opts *record, 2897 struct callchain_param *callchain, 2898 const char *arg, bool unset) 2899 { 2900 int ret; 2901 callchain->enabled = !unset; 2902 2903 /* --no-call-graph */ 2904 if (unset) { 2905 callchain->record_mode = CALLCHAIN_NONE; 2906 pr_debug("callchain: disabled\n"); 2907 return 0; 2908 } 2909 2910 ret = parse_callchain_record_opt(arg, callchain); 2911 if (!ret) { 2912 /* Enable data address sampling for DWARF unwind. */ 2913 if (callchain->record_mode == CALLCHAIN_DWARF) 2914 record->sample_address = true; 2915 callchain_debug(callchain); 2916 } 2917 2918 return ret; 2919 } 2920 2921 int record_parse_callchain_opt(const struct option *opt, 2922 const char *arg, 2923 int unset) 2924 { 2925 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2926 } 2927 2928 int record_callchain_opt(const struct option *opt, 2929 const char *arg __maybe_unused, 2930 int unset __maybe_unused) 2931 { 2932 struct callchain_param *callchain = opt->value; 2933 2934 callchain->enabled = true; 2935 2936 if (callchain->record_mode == CALLCHAIN_NONE) 2937 callchain->record_mode = CALLCHAIN_FP; 2938 2939 callchain_debug(callchain); 2940 return 0; 2941 } 2942 2943 static int perf_record_config(const char *var, const char *value, void *cb) 2944 { 2945 struct record *rec = cb; 2946 2947 if (!strcmp(var, "record.build-id")) { 2948 if (!strcmp(value, "cache")) 2949 rec->no_buildid_cache = false; 2950 else if (!strcmp(value, "no-cache")) 2951 rec->no_buildid_cache = true; 2952 else if (!strcmp(value, "skip")) 2953 rec->no_buildid = true; 2954 else if (!strcmp(value, "mmap")) 2955 rec->buildid_mmap = true; 2956 else 2957 return -1; 2958 return 0; 2959 } 2960 if (!strcmp(var, "record.call-graph")) { 2961 var = "call-graph.record-mode"; 2962 return perf_default_config(var, value, cb); 2963 } 2964 #ifdef HAVE_AIO_SUPPORT 2965 if (!strcmp(var, "record.aio")) { 2966 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2967 if (!rec->opts.nr_cblocks) 2968 rec->opts.nr_cblocks = nr_cblocks_default; 2969 } 2970 #endif 2971 if (!strcmp(var, "record.debuginfod")) { 2972 rec->debuginfod.urls = strdup(value); 2973 if (!rec->debuginfod.urls) 2974 return -ENOMEM; 2975 rec->debuginfod.set = true; 2976 } 2977 2978 return 0; 2979 } 2980 2981 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2982 { 2983 struct record *rec = (struct record *)opt->value; 2984 2985 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2986 } 2987 2988 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2989 { 2990 struct record_opts *opts = (struct record_opts *)opt->value; 2991 2992 if (unset || !str) 2993 return 0; 2994 2995 if (!strcasecmp(str, "node")) 2996 opts->affinity = PERF_AFFINITY_NODE; 2997 else if (!strcasecmp(str, "cpu")) 2998 opts->affinity = PERF_AFFINITY_CPU; 2999 3000 return 0; 3001 } 3002 3003 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3004 { 3005 mask->nbits = nr_bits; 3006 mask->bits = bitmap_zalloc(mask->nbits); 3007 if (!mask->bits) 3008 return -ENOMEM; 3009 3010 return 0; 3011 } 3012 3013 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3014 { 3015 bitmap_free(mask->bits); 3016 mask->nbits = 0; 3017 } 3018 3019 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3020 { 3021 int ret; 3022 3023 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3024 if (ret) { 3025 mask->affinity.bits = NULL; 3026 return ret; 3027 } 3028 3029 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3030 if (ret) { 3031 record__mmap_cpu_mask_free(&mask->maps); 3032 mask->maps.bits = NULL; 3033 } 3034 3035 return ret; 3036 } 3037 3038 static void record__thread_mask_free(struct thread_mask *mask) 3039 { 3040 record__mmap_cpu_mask_free(&mask->maps); 3041 record__mmap_cpu_mask_free(&mask->affinity); 3042 } 3043 3044 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3045 { 3046 int s; 3047 struct record_opts *opts = opt->value; 3048 3049 if (unset || !str || !strlen(str)) { 3050 opts->threads_spec = THREAD_SPEC__CPU; 3051 } else { 3052 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3053 if (s == THREAD_SPEC__USER) { 3054 opts->threads_user_spec = strdup(str); 3055 if (!opts->threads_user_spec) 3056 return -ENOMEM; 3057 opts->threads_spec = THREAD_SPEC__USER; 3058 break; 3059 } 3060 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3061 opts->threads_spec = s; 3062 break; 3063 } 3064 } 3065 } 3066 3067 if (opts->threads_spec == THREAD_SPEC__USER) 3068 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3069 else 3070 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3071 3072 return 0; 3073 } 3074 3075 static int parse_output_max_size(const struct option *opt, 3076 const char *str, int unset) 3077 { 3078 unsigned long *s = (unsigned long *)opt->value; 3079 static struct parse_tag tags_size[] = { 3080 { .tag = 'B', .mult = 1 }, 3081 { .tag = 'K', .mult = 1 << 10 }, 3082 { .tag = 'M', .mult = 1 << 20 }, 3083 { .tag = 'G', .mult = 1 << 30 }, 3084 { .tag = 0 }, 3085 }; 3086 unsigned long val; 3087 3088 if (unset) { 3089 *s = 0; 3090 return 0; 3091 } 3092 3093 val = parse_tag_value(str, tags_size); 3094 if (val != (unsigned long) -1) { 3095 *s = val; 3096 return 0; 3097 } 3098 3099 return -1; 3100 } 3101 3102 static int record__parse_mmap_pages(const struct option *opt, 3103 const char *str, 3104 int unset __maybe_unused) 3105 { 3106 struct record_opts *opts = opt->value; 3107 char *s, *p; 3108 unsigned int mmap_pages; 3109 int ret; 3110 3111 if (!str) 3112 return -EINVAL; 3113 3114 s = strdup(str); 3115 if (!s) 3116 return -ENOMEM; 3117 3118 p = strchr(s, ','); 3119 if (p) 3120 *p = '\0'; 3121 3122 if (*s) { 3123 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3124 if (ret) 3125 goto out_free; 3126 opts->mmap_pages = mmap_pages; 3127 } 3128 3129 if (!p) { 3130 ret = 0; 3131 goto out_free; 3132 } 3133 3134 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3135 if (ret) 3136 goto out_free; 3137 3138 opts->auxtrace_mmap_pages = mmap_pages; 3139 3140 out_free: 3141 free(s); 3142 return ret; 3143 } 3144 3145 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3146 { 3147 } 3148 3149 static int parse_control_option(const struct option *opt, 3150 const char *str, 3151 int unset __maybe_unused) 3152 { 3153 struct record_opts *opts = opt->value; 3154 3155 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3156 } 3157 3158 static void switch_output_size_warn(struct record *rec) 3159 { 3160 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3161 struct switch_output *s = &rec->switch_output; 3162 3163 wakeup_size /= 2; 3164 3165 if (s->size < wakeup_size) { 3166 char buf[100]; 3167 3168 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3169 pr_warning("WARNING: switch-output data size lower than " 3170 "wakeup kernel buffer size (%s) " 3171 "expect bigger perf.data sizes\n", buf); 3172 } 3173 } 3174 3175 static int switch_output_setup(struct record *rec) 3176 { 3177 struct switch_output *s = &rec->switch_output; 3178 static struct parse_tag tags_size[] = { 3179 { .tag = 'B', .mult = 1 }, 3180 { .tag = 'K', .mult = 1 << 10 }, 3181 { .tag = 'M', .mult = 1 << 20 }, 3182 { .tag = 'G', .mult = 1 << 30 }, 3183 { .tag = 0 }, 3184 }; 3185 static struct parse_tag tags_time[] = { 3186 { .tag = 's', .mult = 1 }, 3187 { .tag = 'm', .mult = 60 }, 3188 { .tag = 'h', .mult = 60*60 }, 3189 { .tag = 'd', .mult = 60*60*24 }, 3190 { .tag = 0 }, 3191 }; 3192 unsigned long val; 3193 3194 /* 3195 * If we're using --switch-output-events, then we imply its 3196 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3197 * thread to its parent. 3198 */ 3199 if (rec->switch_output_event_set) { 3200 if (record__threads_enabled(rec)) { 3201 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3202 return 0; 3203 } 3204 goto do_signal; 3205 } 3206 3207 if (!s->set) 3208 return 0; 3209 3210 if (record__threads_enabled(rec)) { 3211 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3212 return 0; 3213 } 3214 3215 if (!strcmp(s->str, "signal")) { 3216 do_signal: 3217 s->signal = true; 3218 pr_debug("switch-output with SIGUSR2 signal\n"); 3219 goto enabled; 3220 } 3221 3222 val = parse_tag_value(s->str, tags_size); 3223 if (val != (unsigned long) -1) { 3224 s->size = val; 3225 pr_debug("switch-output with %s size threshold\n", s->str); 3226 goto enabled; 3227 } 3228 3229 val = parse_tag_value(s->str, tags_time); 3230 if (val != (unsigned long) -1) { 3231 s->time = val; 3232 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3233 s->str, s->time); 3234 goto enabled; 3235 } 3236 3237 return -1; 3238 3239 enabled: 3240 rec->timestamp_filename = true; 3241 s->enabled = true; 3242 3243 if (s->size && !rec->opts.no_buffering) 3244 switch_output_size_warn(rec); 3245 3246 return 0; 3247 } 3248 3249 static const char * const __record_usage[] = { 3250 "perf record [<options>] [<command>]", 3251 "perf record [<options>] -- <command> [<options>]", 3252 NULL 3253 }; 3254 const char * const *record_usage = __record_usage; 3255 3256 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3257 struct perf_sample *sample, struct machine *machine) 3258 { 3259 /* 3260 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3261 * no need to add them twice. 3262 */ 3263 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3264 return 0; 3265 return perf_event__process_mmap(tool, event, sample, machine); 3266 } 3267 3268 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3269 struct perf_sample *sample, struct machine *machine) 3270 { 3271 /* 3272 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3273 * no need to add them twice. 3274 */ 3275 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3276 return 0; 3277 3278 return perf_event__process_mmap2(tool, event, sample, machine); 3279 } 3280 3281 static int process_timestamp_boundary(struct perf_tool *tool, 3282 union perf_event *event __maybe_unused, 3283 struct perf_sample *sample, 3284 struct machine *machine __maybe_unused) 3285 { 3286 struct record *rec = container_of(tool, struct record, tool); 3287 3288 set_timestamp_boundary(rec, sample->time); 3289 return 0; 3290 } 3291 3292 static int parse_record_synth_option(const struct option *opt, 3293 const char *str, 3294 int unset __maybe_unused) 3295 { 3296 struct record_opts *opts = opt->value; 3297 char *p = strdup(str); 3298 3299 if (p == NULL) 3300 return -1; 3301 3302 opts->synth = parse_synth_opt(p); 3303 free(p); 3304 3305 if (opts->synth < 0) { 3306 pr_err("Invalid synth option: %s\n", str); 3307 return -1; 3308 } 3309 return 0; 3310 } 3311 3312 /* 3313 * XXX Ideally would be local to cmd_record() and passed to a record__new 3314 * because we need to have access to it in record__exit, that is called 3315 * after cmd_record() exits, but since record_options need to be accessible to 3316 * builtin-script, leave it here. 3317 * 3318 * At least we don't ouch it in all the other functions here directly. 3319 * 3320 * Just say no to tons of global variables, sigh. 3321 */ 3322 static struct record record = { 3323 .opts = { 3324 .sample_time = true, 3325 .mmap_pages = UINT_MAX, 3326 .user_freq = UINT_MAX, 3327 .user_interval = ULLONG_MAX, 3328 .freq = 4000, 3329 .target = { 3330 .uses_mmap = true, 3331 .default_per_cpu = true, 3332 }, 3333 .mmap_flush = MMAP_FLUSH_DEFAULT, 3334 .nr_threads_synthesize = 1, 3335 .ctl_fd = -1, 3336 .ctl_fd_ack = -1, 3337 .synth = PERF_SYNTH_ALL, 3338 }, 3339 .tool = { 3340 .sample = process_sample_event, 3341 .fork = perf_event__process_fork, 3342 .exit = perf_event__process_exit, 3343 .comm = perf_event__process_comm, 3344 .namespaces = perf_event__process_namespaces, 3345 .mmap = build_id__process_mmap, 3346 .mmap2 = build_id__process_mmap2, 3347 .itrace_start = process_timestamp_boundary, 3348 .aux = process_timestamp_boundary, 3349 .ordered_events = true, 3350 }, 3351 }; 3352 3353 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3354 "\n\t\t\t\tDefault: fp"; 3355 3356 static bool dry_run; 3357 3358 static struct parse_events_option_args parse_events_option_args = { 3359 .evlistp = &record.evlist, 3360 }; 3361 3362 static struct parse_events_option_args switch_output_parse_events_option_args = { 3363 .evlistp = &record.sb_evlist, 3364 }; 3365 3366 /* 3367 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3368 * with it and switch to use the library functions in perf_evlist that came 3369 * from builtin-record.c, i.e. use record_opts, 3370 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3371 * using pipes, etc. 3372 */ 3373 static struct option __record_options[] = { 3374 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3375 "event selector. use 'perf list' to list available events", 3376 parse_events_option), 3377 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3378 "event filter", parse_filter), 3379 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3380 NULL, "don't record events from perf itself", 3381 exclude_perf), 3382 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3383 "record events on existing process id"), 3384 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3385 "record events on existing thread id"), 3386 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3387 "collect data with this RT SCHED_FIFO priority"), 3388 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3389 "collect data without buffering"), 3390 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3391 "collect raw sample records from all opened counters"), 3392 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3393 "system-wide collection from all CPUs"), 3394 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3395 "list of cpus to monitor"), 3396 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3397 OPT_STRING('o', "output", &record.data.path, "file", 3398 "output file name"), 3399 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3400 &record.opts.no_inherit_set, 3401 "child tasks do not inherit counters"), 3402 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3403 "synthesize non-sample events at the end of output"), 3404 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3405 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3406 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3407 "Fail if the specified frequency can't be used"), 3408 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3409 "profile at this frequency", 3410 record__parse_freq), 3411 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3412 "number of mmap data pages and AUX area tracing mmap pages", 3413 record__parse_mmap_pages), 3414 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3415 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3416 record__mmap_flush_parse), 3417 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3418 NULL, "enables call-graph recording" , 3419 &record_callchain_opt), 3420 OPT_CALLBACK(0, "call-graph", &record.opts, 3421 "record_mode[,record_size]", record_callchain_help, 3422 &record_parse_callchain_opt), 3423 OPT_INCR('v', "verbose", &verbose, 3424 "be more verbose (show counter open errors, etc)"), 3425 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3426 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3427 "per thread counts"), 3428 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3429 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3430 "Record the sample physical addresses"), 3431 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3432 "Record the sampled data address data page size"), 3433 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3434 "Record the sampled code address (ip) page size"), 3435 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3436 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3437 "Record the sample identifier"), 3438 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3439 &record.opts.sample_time_set, 3440 "Record the sample timestamps"), 3441 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3442 "Record the sample period"), 3443 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3444 "don't sample"), 3445 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3446 &record.no_buildid_cache_set, 3447 "do not update the buildid cache"), 3448 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3449 &record.no_buildid_set, 3450 "do not collect buildids in perf.data"), 3451 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3452 "monitor event in cgroup name only", 3453 parse_cgroups), 3454 OPT_CALLBACK('D', "delay", &record, "ms", 3455 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3456 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3457 record__parse_event_enable_time), 3458 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3459 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3460 "user to profile"), 3461 3462 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3463 "branch any", "sample any taken branches", 3464 parse_branch_stack), 3465 3466 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3467 "branch filter mask", "branch stack filter modes", 3468 parse_branch_stack), 3469 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3470 "sample by weight (on special events only)"), 3471 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3472 "sample transaction flags (special events only)"), 3473 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3474 "use per-thread mmaps"), 3475 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3476 "sample selected machine registers on interrupt," 3477 " use '-I?' to list register names", parse_intr_regs), 3478 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3479 "sample selected machine registers on interrupt," 3480 " use '--user-regs=?' to list register names", parse_user_regs), 3481 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3482 "Record running/enabled time of read (:S) events"), 3483 OPT_CALLBACK('k', "clockid", &record.opts, 3484 "clockid", "clockid to use for events, see clock_gettime()", 3485 parse_clockid), 3486 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3487 "opts", "AUX area tracing Snapshot Mode", ""), 3488 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3489 "opts", "sample AUX area", ""), 3490 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3491 "per thread proc mmap processing timeout in ms"), 3492 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3493 "Record namespaces events"), 3494 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3495 "Record cgroup events"), 3496 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3497 &record.opts.record_switch_events_set, 3498 "Record context switch events"), 3499 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3500 "Configure all used events to run in kernel space.", 3501 PARSE_OPT_EXCLUSIVE), 3502 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3503 "Configure all used events to run in user space.", 3504 PARSE_OPT_EXCLUSIVE), 3505 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3506 "collect kernel callchains"), 3507 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3508 "collect user callchains"), 3509 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3510 "file", "vmlinux pathname"), 3511 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3512 "Record build-id of all DSOs regardless of hits"), 3513 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3514 "Record build-id in map events"), 3515 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3516 "append timestamp to output filename"), 3517 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3518 "Record timestamp boundary (time of first/last samples)"), 3519 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3520 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3521 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3522 "signal"), 3523 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3524 &record.switch_output_event_set, "switch output event", 3525 "switch output event selector. use 'perf list' to list available events", 3526 parse_events_option_new_evlist), 3527 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3528 "Limit number of switch output generated files"), 3529 OPT_BOOLEAN(0, "dry-run", &dry_run, 3530 "Parse options then exit"), 3531 #ifdef HAVE_AIO_SUPPORT 3532 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3533 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3534 record__aio_parse), 3535 #endif 3536 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3537 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3538 record__parse_affinity), 3539 #ifdef HAVE_ZSTD_SUPPORT 3540 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3541 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3542 record__parse_comp_level), 3543 #endif 3544 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3545 "size", "Limit the maximum size of the output file", parse_output_max_size), 3546 OPT_UINTEGER(0, "num-thread-synthesize", 3547 &record.opts.nr_threads_synthesize, 3548 "number of threads to run for event synthesis"), 3549 #ifdef HAVE_LIBPFM 3550 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3551 "libpfm4 event selector. use 'perf list' to list available events", 3552 parse_libpfm_events_option), 3553 #endif 3554 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3555 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3556 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3557 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3558 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3559 parse_control_option), 3560 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3561 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3562 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3563 &record.debuginfod.set, "debuginfod urls", 3564 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3565 "system"), 3566 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3567 "write collected trace data into several data files using parallel threads", 3568 record__parse_threads), 3569 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3570 OPT_END() 3571 }; 3572 3573 struct option *record_options = __record_options; 3574 3575 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3576 { 3577 struct perf_cpu cpu; 3578 int idx; 3579 3580 if (cpu_map__is_dummy(cpus)) 3581 return 0; 3582 3583 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3584 if (cpu.cpu == -1) 3585 continue; 3586 /* Return ENODEV is input cpu is greater than max cpu */ 3587 if ((unsigned long)cpu.cpu > mask->nbits) 3588 return -ENODEV; 3589 __set_bit(cpu.cpu, mask->bits); 3590 } 3591 3592 return 0; 3593 } 3594 3595 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3596 { 3597 struct perf_cpu_map *cpus; 3598 3599 cpus = perf_cpu_map__new(mask_spec); 3600 if (!cpus) 3601 return -ENOMEM; 3602 3603 bitmap_zero(mask->bits, mask->nbits); 3604 if (record__mmap_cpu_mask_init(mask, cpus)) 3605 return -ENODEV; 3606 3607 perf_cpu_map__put(cpus); 3608 3609 return 0; 3610 } 3611 3612 static void record__free_thread_masks(struct record *rec, int nr_threads) 3613 { 3614 int t; 3615 3616 if (rec->thread_masks) 3617 for (t = 0; t < nr_threads; t++) 3618 record__thread_mask_free(&rec->thread_masks[t]); 3619 3620 zfree(&rec->thread_masks); 3621 } 3622 3623 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3624 { 3625 int t, ret; 3626 3627 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3628 if (!rec->thread_masks) { 3629 pr_err("Failed to allocate thread masks\n"); 3630 return -ENOMEM; 3631 } 3632 3633 for (t = 0; t < nr_threads; t++) { 3634 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3635 if (ret) { 3636 pr_err("Failed to allocate thread masks[%d]\n", t); 3637 goto out_free; 3638 } 3639 } 3640 3641 return 0; 3642 3643 out_free: 3644 record__free_thread_masks(rec, nr_threads); 3645 3646 return ret; 3647 } 3648 3649 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3650 { 3651 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3652 3653 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3654 if (ret) 3655 return ret; 3656 3657 rec->nr_threads = nr_cpus; 3658 pr_debug("nr_threads: %d\n", rec->nr_threads); 3659 3660 for (t = 0; t < rec->nr_threads; t++) { 3661 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3662 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3663 if (verbose > 0) { 3664 pr_debug("thread_masks[%d]: ", t); 3665 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3666 pr_debug("thread_masks[%d]: ", t); 3667 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3668 } 3669 } 3670 3671 return 0; 3672 } 3673 3674 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3675 const char **maps_spec, const char **affinity_spec, 3676 u32 nr_spec) 3677 { 3678 u32 s; 3679 int ret = 0, t = 0; 3680 struct mmap_cpu_mask cpus_mask; 3681 struct thread_mask thread_mask, full_mask, *thread_masks; 3682 3683 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3684 if (ret) { 3685 pr_err("Failed to allocate CPUs mask\n"); 3686 return ret; 3687 } 3688 3689 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3690 if (ret) { 3691 pr_err("Failed to init cpu mask\n"); 3692 goto out_free_cpu_mask; 3693 } 3694 3695 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3696 if (ret) { 3697 pr_err("Failed to allocate full mask\n"); 3698 goto out_free_cpu_mask; 3699 } 3700 3701 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3702 if (ret) { 3703 pr_err("Failed to allocate thread mask\n"); 3704 goto out_free_full_and_cpu_masks; 3705 } 3706 3707 for (s = 0; s < nr_spec; s++) { 3708 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3709 if (ret) { 3710 pr_err("Failed to initialize maps thread mask\n"); 3711 goto out_free; 3712 } 3713 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3714 if (ret) { 3715 pr_err("Failed to initialize affinity thread mask\n"); 3716 goto out_free; 3717 } 3718 3719 /* ignore invalid CPUs but do not allow empty masks */ 3720 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3721 cpus_mask.bits, thread_mask.maps.nbits)) { 3722 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3723 ret = -EINVAL; 3724 goto out_free; 3725 } 3726 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3727 cpus_mask.bits, thread_mask.affinity.nbits)) { 3728 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3729 ret = -EINVAL; 3730 goto out_free; 3731 } 3732 3733 /* do not allow intersection with other masks (full_mask) */ 3734 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3735 thread_mask.maps.nbits)) { 3736 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3737 ret = -EINVAL; 3738 goto out_free; 3739 } 3740 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3741 thread_mask.affinity.nbits)) { 3742 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3743 ret = -EINVAL; 3744 goto out_free; 3745 } 3746 3747 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3748 thread_mask.maps.bits, full_mask.maps.nbits); 3749 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3750 thread_mask.affinity.bits, full_mask.maps.nbits); 3751 3752 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3753 if (!thread_masks) { 3754 pr_err("Failed to reallocate thread masks\n"); 3755 ret = -ENOMEM; 3756 goto out_free; 3757 } 3758 rec->thread_masks = thread_masks; 3759 rec->thread_masks[t] = thread_mask; 3760 if (verbose > 0) { 3761 pr_debug("thread_masks[%d]: ", t); 3762 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3763 pr_debug("thread_masks[%d]: ", t); 3764 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3765 } 3766 t++; 3767 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3768 if (ret) { 3769 pr_err("Failed to allocate thread mask\n"); 3770 goto out_free_full_and_cpu_masks; 3771 } 3772 } 3773 rec->nr_threads = t; 3774 pr_debug("nr_threads: %d\n", rec->nr_threads); 3775 if (!rec->nr_threads) 3776 ret = -EINVAL; 3777 3778 out_free: 3779 record__thread_mask_free(&thread_mask); 3780 out_free_full_and_cpu_masks: 3781 record__thread_mask_free(&full_mask); 3782 out_free_cpu_mask: 3783 record__mmap_cpu_mask_free(&cpus_mask); 3784 3785 return ret; 3786 } 3787 3788 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3789 { 3790 int ret; 3791 struct cpu_topology *topo; 3792 3793 topo = cpu_topology__new(); 3794 if (!topo) { 3795 pr_err("Failed to allocate CPU topology\n"); 3796 return -ENOMEM; 3797 } 3798 3799 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3800 topo->core_cpus_list, topo->core_cpus_lists); 3801 cpu_topology__delete(topo); 3802 3803 return ret; 3804 } 3805 3806 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3807 { 3808 int ret; 3809 struct cpu_topology *topo; 3810 3811 topo = cpu_topology__new(); 3812 if (!topo) { 3813 pr_err("Failed to allocate CPU topology\n"); 3814 return -ENOMEM; 3815 } 3816 3817 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3818 topo->package_cpus_list, topo->package_cpus_lists); 3819 cpu_topology__delete(topo); 3820 3821 return ret; 3822 } 3823 3824 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3825 { 3826 u32 s; 3827 int ret; 3828 const char **spec; 3829 struct numa_topology *topo; 3830 3831 topo = numa_topology__new(); 3832 if (!topo) { 3833 pr_err("Failed to allocate NUMA topology\n"); 3834 return -ENOMEM; 3835 } 3836 3837 spec = zalloc(topo->nr * sizeof(char *)); 3838 if (!spec) { 3839 pr_err("Failed to allocate NUMA spec\n"); 3840 ret = -ENOMEM; 3841 goto out_delete_topo; 3842 } 3843 for (s = 0; s < topo->nr; s++) 3844 spec[s] = topo->nodes[s].cpus; 3845 3846 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3847 3848 zfree(&spec); 3849 3850 out_delete_topo: 3851 numa_topology__delete(topo); 3852 3853 return ret; 3854 } 3855 3856 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3857 { 3858 int t, ret; 3859 u32 s, nr_spec = 0; 3860 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3861 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3862 3863 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3864 spec = strtok_r(user_spec, ":", &spec_ptr); 3865 if (spec == NULL) 3866 break; 3867 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3868 mask = strtok_r(spec, "/", &mask_ptr); 3869 if (mask == NULL) 3870 break; 3871 pr_debug2(" maps mask: %s\n", mask); 3872 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3873 if (!tmp_spec) { 3874 pr_err("Failed to reallocate maps spec\n"); 3875 ret = -ENOMEM; 3876 goto out_free; 3877 } 3878 maps_spec = tmp_spec; 3879 maps_spec[nr_spec] = dup_mask = strdup(mask); 3880 if (!maps_spec[nr_spec]) { 3881 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3882 ret = -ENOMEM; 3883 goto out_free; 3884 } 3885 mask = strtok_r(NULL, "/", &mask_ptr); 3886 if (mask == NULL) { 3887 pr_err("Invalid thread maps or affinity specs\n"); 3888 ret = -EINVAL; 3889 goto out_free; 3890 } 3891 pr_debug2(" affinity mask: %s\n", mask); 3892 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3893 if (!tmp_spec) { 3894 pr_err("Failed to reallocate affinity spec\n"); 3895 ret = -ENOMEM; 3896 goto out_free; 3897 } 3898 affinity_spec = tmp_spec; 3899 affinity_spec[nr_spec] = strdup(mask); 3900 if (!affinity_spec[nr_spec]) { 3901 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3902 ret = -ENOMEM; 3903 goto out_free; 3904 } 3905 dup_mask = NULL; 3906 nr_spec++; 3907 } 3908 3909 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3910 (const char **)affinity_spec, nr_spec); 3911 3912 out_free: 3913 free(dup_mask); 3914 for (s = 0; s < nr_spec; s++) { 3915 if (maps_spec) 3916 free(maps_spec[s]); 3917 if (affinity_spec) 3918 free(affinity_spec[s]); 3919 } 3920 free(affinity_spec); 3921 free(maps_spec); 3922 3923 return ret; 3924 } 3925 3926 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3927 { 3928 int ret; 3929 3930 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3931 if (ret) 3932 return ret; 3933 3934 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3935 return -ENODEV; 3936 3937 rec->nr_threads = 1; 3938 3939 return 0; 3940 } 3941 3942 static int record__init_thread_masks(struct record *rec) 3943 { 3944 int ret = 0; 3945 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3946 3947 if (!record__threads_enabled(rec)) 3948 return record__init_thread_default_masks(rec, cpus); 3949 3950 if (evlist__per_thread(rec->evlist)) { 3951 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3952 return -EINVAL; 3953 } 3954 3955 switch (rec->opts.threads_spec) { 3956 case THREAD_SPEC__CPU: 3957 ret = record__init_thread_cpu_masks(rec, cpus); 3958 break; 3959 case THREAD_SPEC__CORE: 3960 ret = record__init_thread_core_masks(rec, cpus); 3961 break; 3962 case THREAD_SPEC__PACKAGE: 3963 ret = record__init_thread_package_masks(rec, cpus); 3964 break; 3965 case THREAD_SPEC__NUMA: 3966 ret = record__init_thread_numa_masks(rec, cpus); 3967 break; 3968 case THREAD_SPEC__USER: 3969 ret = record__init_thread_user_masks(rec, cpus); 3970 break; 3971 default: 3972 break; 3973 } 3974 3975 return ret; 3976 } 3977 3978 int cmd_record(int argc, const char **argv) 3979 { 3980 int err; 3981 struct record *rec = &record; 3982 char errbuf[BUFSIZ]; 3983 3984 setlocale(LC_ALL, ""); 3985 3986 #ifndef HAVE_BPF_SKEL 3987 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3988 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3989 # undef set_nobuild 3990 #endif 3991 3992 rec->opts.affinity = PERF_AFFINITY_SYS; 3993 3994 rec->evlist = evlist__new(); 3995 if (rec->evlist == NULL) 3996 return -ENOMEM; 3997 3998 err = perf_config(perf_record_config, rec); 3999 if (err) 4000 return err; 4001 4002 argc = parse_options(argc, argv, record_options, record_usage, 4003 PARSE_OPT_STOP_AT_NON_OPTION); 4004 if (quiet) 4005 perf_quiet_option(); 4006 4007 err = symbol__validate_sym_arguments(); 4008 if (err) 4009 return err; 4010 4011 perf_debuginfod_setup(&record.debuginfod); 4012 4013 /* Make system wide (-a) the default target. */ 4014 if (!argc && target__none(&rec->opts.target)) 4015 rec->opts.target.system_wide = true; 4016 4017 if (nr_cgroups && !rec->opts.target.system_wide) { 4018 usage_with_options_msg(record_usage, record_options, 4019 "cgroup monitoring only available in system-wide mode"); 4020 4021 } 4022 4023 if (rec->buildid_mmap) { 4024 if (!perf_can_record_build_id()) { 4025 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4026 err = -EINVAL; 4027 goto out_opts; 4028 } 4029 pr_debug("Enabling build id in mmap2 events.\n"); 4030 /* Enable mmap build id synthesizing. */ 4031 symbol_conf.buildid_mmap2 = true; 4032 /* Enable perf_event_attr::build_id bit. */ 4033 rec->opts.build_id = true; 4034 /* Disable build id cache. */ 4035 rec->no_buildid = true; 4036 } 4037 4038 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4039 pr_err("Kernel has no cgroup sampling support.\n"); 4040 err = -EINVAL; 4041 goto out_opts; 4042 } 4043 4044 if (rec->opts.kcore) 4045 rec->opts.text_poke = true; 4046 4047 if (rec->opts.kcore || record__threads_enabled(rec)) 4048 rec->data.is_dir = true; 4049 4050 if (record__threads_enabled(rec)) { 4051 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4052 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4053 goto out_opts; 4054 } 4055 if (record__aio_enabled(rec)) { 4056 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4057 goto out_opts; 4058 } 4059 } 4060 4061 if (rec->opts.comp_level != 0) { 4062 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4063 rec->no_buildid = true; 4064 } 4065 4066 if (rec->opts.record_switch_events && 4067 !perf_can_record_switch_events()) { 4068 ui__error("kernel does not support recording context switch events\n"); 4069 parse_options_usage(record_usage, record_options, "switch-events", 0); 4070 err = -EINVAL; 4071 goto out_opts; 4072 } 4073 4074 if (switch_output_setup(rec)) { 4075 parse_options_usage(record_usage, record_options, "switch-output", 0); 4076 err = -EINVAL; 4077 goto out_opts; 4078 } 4079 4080 if (rec->switch_output.time) { 4081 signal(SIGALRM, alarm_sig_handler); 4082 alarm(rec->switch_output.time); 4083 } 4084 4085 if (rec->switch_output.num_files) { 4086 rec->switch_output.filenames = calloc(sizeof(char *), 4087 rec->switch_output.num_files); 4088 if (!rec->switch_output.filenames) { 4089 err = -EINVAL; 4090 goto out_opts; 4091 } 4092 } 4093 4094 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4095 rec->timestamp_filename = false; 4096 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4097 } 4098 4099 /* 4100 * Allow aliases to facilitate the lookup of symbols for address 4101 * filters. Refer to auxtrace_parse_filters(). 4102 */ 4103 symbol_conf.allow_aliases = true; 4104 4105 symbol__init(NULL); 4106 4107 err = record__auxtrace_init(rec); 4108 if (err) 4109 goto out; 4110 4111 if (dry_run) 4112 goto out; 4113 4114 err = -ENOMEM; 4115 4116 if (rec->no_buildid_cache || rec->no_buildid) { 4117 disable_buildid_cache(); 4118 } else if (rec->switch_output.enabled) { 4119 /* 4120 * In 'perf record --switch-output', disable buildid 4121 * generation by default to reduce data file switching 4122 * overhead. Still generate buildid if they are required 4123 * explicitly using 4124 * 4125 * perf record --switch-output --no-no-buildid \ 4126 * --no-no-buildid-cache 4127 * 4128 * Following code equals to: 4129 * 4130 * if ((rec->no_buildid || !rec->no_buildid_set) && 4131 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4132 * disable_buildid_cache(); 4133 */ 4134 bool disable = true; 4135 4136 if (rec->no_buildid_set && !rec->no_buildid) 4137 disable = false; 4138 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4139 disable = false; 4140 if (disable) { 4141 rec->no_buildid = true; 4142 rec->no_buildid_cache = true; 4143 disable_buildid_cache(); 4144 } 4145 } 4146 4147 if (record.opts.overwrite) 4148 record.opts.tail_synthesize = true; 4149 4150 if (rec->evlist->core.nr_entries == 0) { 4151 bool can_profile_kernel = perf_event_paranoid_check(1); 4152 4153 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); 4154 if (err) 4155 goto out; 4156 } 4157 4158 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4159 rec->opts.no_inherit = true; 4160 4161 err = target__validate(&rec->opts.target); 4162 if (err) { 4163 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4164 ui__warning("%s\n", errbuf); 4165 } 4166 4167 err = target__parse_uid(&rec->opts.target); 4168 if (err) { 4169 int saved_errno = errno; 4170 4171 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4172 ui__error("%s", errbuf); 4173 4174 err = -saved_errno; 4175 goto out; 4176 } 4177 4178 /* Enable ignoring missing threads when -u/-p option is defined. */ 4179 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4180 4181 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4182 4183 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4184 arch__add_leaf_frame_record_opts(&rec->opts); 4185 4186 err = -ENOMEM; 4187 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4188 if (rec->opts.target.pid != NULL) { 4189 pr_err("Couldn't create thread/CPU maps: %s\n", 4190 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4191 goto out; 4192 } 4193 else 4194 usage_with_options(record_usage, record_options); 4195 } 4196 4197 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4198 if (err) 4199 goto out; 4200 4201 /* 4202 * We take all buildids when the file contains 4203 * AUX area tracing data because we do not decode the 4204 * trace because it would take too long. 4205 */ 4206 if (rec->opts.full_auxtrace) 4207 rec->buildid_all = true; 4208 4209 if (rec->opts.text_poke) { 4210 err = record__config_text_poke(rec->evlist); 4211 if (err) { 4212 pr_err("record__config_text_poke failed, error %d\n", err); 4213 goto out; 4214 } 4215 } 4216 4217 if (rec->off_cpu) { 4218 err = record__config_off_cpu(rec); 4219 if (err) { 4220 pr_err("record__config_off_cpu failed, error %d\n", err); 4221 goto out; 4222 } 4223 } 4224 4225 if (record_opts__config(&rec->opts)) { 4226 err = -EINVAL; 4227 goto out; 4228 } 4229 4230 err = record__config_tracking_events(rec); 4231 if (err) { 4232 pr_err("record__config_tracking_events failed, error %d\n", err); 4233 goto out; 4234 } 4235 4236 err = record__init_thread_masks(rec); 4237 if (err) { 4238 pr_err("Failed to initialize parallel data streaming masks\n"); 4239 goto out; 4240 } 4241 4242 if (rec->opts.nr_cblocks > nr_cblocks_max) 4243 rec->opts.nr_cblocks = nr_cblocks_max; 4244 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4245 4246 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4247 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4248 4249 if (rec->opts.comp_level > comp_level_max) 4250 rec->opts.comp_level = comp_level_max; 4251 pr_debug("comp level: %d\n", rec->opts.comp_level); 4252 4253 err = __cmd_record(&record, argc, argv); 4254 out: 4255 evlist__delete(rec->evlist); 4256 symbol__exit(); 4257 auxtrace_record__free(rec->itr); 4258 out_opts: 4259 record__free_thread_masks(rec, rec->nr_threads); 4260 rec->nr_threads = 0; 4261 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4262 return err; 4263 } 4264 4265 static void snapshot_sig_handler(int sig __maybe_unused) 4266 { 4267 struct record *rec = &record; 4268 4269 hit_auxtrace_snapshot_trigger(rec); 4270 4271 if (switch_output_signal(rec)) 4272 trigger_hit(&switch_output_trigger); 4273 } 4274 4275 static void alarm_sig_handler(int sig __maybe_unused) 4276 { 4277 struct record *rec = &record; 4278 4279 if (switch_output_time(rec)) 4280 trigger_hit(&switch_output_trigger); 4281 } 4282