1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/llvm-utils.h" 41 #include "util/bpf-loader.h" 42 #include "util/trigger.h" 43 #include "util/perf-hooks.h" 44 #include "util/cpu-set-sched.h" 45 #include "util/synthetic-events.h" 46 #include "util/time-utils.h" 47 #include "util/units.h" 48 #include "util/bpf-event.h" 49 #include "util/util.h" 50 #include "util/pfm.h" 51 #include "util/clockid.h" 52 #include "util/pmu-hybrid.h" 53 #include "util/evlist-hybrid.h" 54 #include "util/off_cpu.h" 55 #include "asm/bug.h" 56 #include "perf.h" 57 #include "cputopo.h" 58 59 #include <errno.h> 60 #include <inttypes.h> 61 #include <locale.h> 62 #include <poll.h> 63 #include <pthread.h> 64 #include <unistd.h> 65 #ifndef HAVE_GETTID 66 #include <syscall.h> 67 #endif 68 #include <sched.h> 69 #include <signal.h> 70 #ifdef HAVE_EVENTFD_SUPPORT 71 #include <sys/eventfd.h> 72 #endif 73 #include <sys/mman.h> 74 #include <sys/wait.h> 75 #include <sys/types.h> 76 #include <sys/stat.h> 77 #include <fcntl.h> 78 #include <linux/err.h> 79 #include <linux/string.h> 80 #include <linux/time64.h> 81 #include <linux/zalloc.h> 82 #include <linux/bitmap.h> 83 #include <sys/time.h> 84 85 struct switch_output { 86 bool enabled; 87 bool signal; 88 unsigned long size; 89 unsigned long time; 90 const char *str; 91 bool set; 92 char **filenames; 93 int num_files; 94 int cur_file; 95 }; 96 97 struct thread_mask { 98 struct mmap_cpu_mask maps; 99 struct mmap_cpu_mask affinity; 100 }; 101 102 struct record_thread { 103 pid_t tid; 104 struct thread_mask *mask; 105 struct { 106 int msg[2]; 107 int ack[2]; 108 } pipes; 109 struct fdarray pollfd; 110 int ctlfd_pos; 111 int nr_mmaps; 112 struct mmap **maps; 113 struct mmap **overwrite_maps; 114 struct record *rec; 115 unsigned long long samples; 116 unsigned long waking; 117 u64 bytes_written; 118 u64 bytes_transferred; 119 u64 bytes_compressed; 120 }; 121 122 static __thread struct record_thread *thread; 123 124 enum thread_msg { 125 THREAD_MSG__UNDEFINED = 0, 126 THREAD_MSG__READY, 127 THREAD_MSG__MAX, 128 }; 129 130 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 131 "UNDEFINED", "READY" 132 }; 133 134 enum thread_spec { 135 THREAD_SPEC__UNDEFINED = 0, 136 THREAD_SPEC__CPU, 137 THREAD_SPEC__CORE, 138 THREAD_SPEC__PACKAGE, 139 THREAD_SPEC__NUMA, 140 THREAD_SPEC__USER, 141 THREAD_SPEC__MAX, 142 }; 143 144 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 145 "undefined", "cpu", "core", "package", "numa", "user" 146 }; 147 148 struct pollfd_index_map { 149 int evlist_pollfd_index; 150 int thread_pollfd_index; 151 }; 152 153 struct record { 154 struct perf_tool tool; 155 struct record_opts opts; 156 u64 bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool switch_output_event_set; 165 bool no_buildid; 166 bool no_buildid_set; 167 bool no_buildid_cache; 168 bool no_buildid_cache_set; 169 bool buildid_all; 170 bool buildid_mmap; 171 bool timestamp_filename; 172 bool timestamp_boundary; 173 bool off_cpu; 174 struct switch_output switch_output; 175 unsigned long long samples; 176 unsigned long output_max_size; /* = 0: unlimited */ 177 struct perf_debuginfod debuginfod; 178 int nr_threads; 179 struct thread_mask *thread_masks; 180 struct record_thread *thread_data; 181 struct pollfd_index_map *index_map; 182 size_t index_map_sz; 183 size_t index_map_cnt; 184 }; 185 186 static volatile int done; 187 188 static volatile int auxtrace_record__snapshot_started; 189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 190 static DEFINE_TRIGGER(switch_output_trigger); 191 192 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 193 "SYS", "NODE", "CPU" 194 }; 195 196 #ifndef HAVE_GETTID 197 static inline pid_t gettid(void) 198 { 199 return (pid_t)syscall(__NR_gettid); 200 } 201 #endif 202 203 static int record__threads_enabled(struct record *rec) 204 { 205 return rec->opts.threads_spec; 206 } 207 208 static bool switch_output_signal(struct record *rec) 209 { 210 return rec->switch_output.signal && 211 trigger_is_ready(&switch_output_trigger); 212 } 213 214 static bool switch_output_size(struct record *rec) 215 { 216 return rec->switch_output.size && 217 trigger_is_ready(&switch_output_trigger) && 218 (rec->bytes_written >= rec->switch_output.size); 219 } 220 221 static bool switch_output_time(struct record *rec) 222 { 223 return rec->switch_output.time && 224 trigger_is_ready(&switch_output_trigger); 225 } 226 227 static u64 record__bytes_written(struct record *rec) 228 { 229 int t; 230 u64 bytes_written = rec->bytes_written; 231 struct record_thread *thread_data = rec->thread_data; 232 233 for (t = 0; t < rec->nr_threads; t++) 234 bytes_written += thread_data[t].bytes_written; 235 236 return bytes_written; 237 } 238 239 static bool record__output_max_size_exceeded(struct record *rec) 240 { 241 return rec->output_max_size && 242 (record__bytes_written(rec) >= rec->output_max_size); 243 } 244 245 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 246 void *bf, size_t size) 247 { 248 struct perf_data_file *file = &rec->session->data->file; 249 250 if (map && map->file) 251 file = map->file; 252 253 if (perf_data_file__write(file, bf, size) < 0) { 254 pr_err("failed to write perf data, error: %m\n"); 255 return -1; 256 } 257 258 if (map && map->file) 259 thread->bytes_written += size; 260 else 261 rec->bytes_written += size; 262 263 if (record__output_max_size_exceeded(rec) && !done) { 264 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 265 " stopping session ]\n", 266 record__bytes_written(rec) >> 10); 267 done = 1; 268 } 269 270 if (switch_output_size(rec)) 271 trigger_hit(&switch_output_trigger); 272 273 return 0; 274 } 275 276 static int record__aio_enabled(struct record *rec); 277 static int record__comp_enabled(struct record *rec); 278 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 279 void *dst, size_t dst_size, void *src, size_t src_size); 280 281 #ifdef HAVE_AIO_SUPPORT 282 static int record__aio_write(struct aiocb *cblock, int trace_fd, 283 void *buf, size_t size, off_t off) 284 { 285 int rc; 286 287 cblock->aio_fildes = trace_fd; 288 cblock->aio_buf = buf; 289 cblock->aio_nbytes = size; 290 cblock->aio_offset = off; 291 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 292 293 do { 294 rc = aio_write(cblock); 295 if (rc == 0) { 296 break; 297 } else if (errno != EAGAIN) { 298 cblock->aio_fildes = -1; 299 pr_err("failed to queue perf data, error: %m\n"); 300 break; 301 } 302 } while (1); 303 304 return rc; 305 } 306 307 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 308 { 309 void *rem_buf; 310 off_t rem_off; 311 size_t rem_size; 312 int rc, aio_errno; 313 ssize_t aio_ret, written; 314 315 aio_errno = aio_error(cblock); 316 if (aio_errno == EINPROGRESS) 317 return 0; 318 319 written = aio_ret = aio_return(cblock); 320 if (aio_ret < 0) { 321 if (aio_errno != EINTR) 322 pr_err("failed to write perf data, error: %m\n"); 323 written = 0; 324 } 325 326 rem_size = cblock->aio_nbytes - written; 327 328 if (rem_size == 0) { 329 cblock->aio_fildes = -1; 330 /* 331 * md->refcount is incremented in record__aio_pushfn() for 332 * every aio write request started in record__aio_push() so 333 * decrement it because the request is now complete. 334 */ 335 perf_mmap__put(&md->core); 336 rc = 1; 337 } else { 338 /* 339 * aio write request may require restart with the 340 * reminder if the kernel didn't write whole 341 * chunk at once. 342 */ 343 rem_off = cblock->aio_offset + written; 344 rem_buf = (void *)(cblock->aio_buf + written); 345 record__aio_write(cblock, cblock->aio_fildes, 346 rem_buf, rem_size, rem_off); 347 rc = 0; 348 } 349 350 return rc; 351 } 352 353 static int record__aio_sync(struct mmap *md, bool sync_all) 354 { 355 struct aiocb **aiocb = md->aio.aiocb; 356 struct aiocb *cblocks = md->aio.cblocks; 357 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 358 int i, do_suspend; 359 360 do { 361 do_suspend = 0; 362 for (i = 0; i < md->aio.nr_cblocks; ++i) { 363 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 364 if (sync_all) 365 aiocb[i] = NULL; 366 else 367 return i; 368 } else { 369 /* 370 * Started aio write is not complete yet 371 * so it has to be waited before the 372 * next allocation. 373 */ 374 aiocb[i] = &cblocks[i]; 375 do_suspend = 1; 376 } 377 } 378 if (!do_suspend) 379 return -1; 380 381 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 382 if (!(errno == EAGAIN || errno == EINTR)) 383 pr_err("failed to sync perf data, error: %m\n"); 384 } 385 } while (1); 386 } 387 388 struct record_aio { 389 struct record *rec; 390 void *data; 391 size_t size; 392 }; 393 394 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 395 { 396 struct record_aio *aio = to; 397 398 /* 399 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 400 * to release space in the kernel buffer as fast as possible, calling 401 * perf_mmap__consume() from perf_mmap__push() function. 402 * 403 * That lets the kernel to proceed with storing more profiling data into 404 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 405 * 406 * Coping can be done in two steps in case the chunk of profiling data 407 * crosses the upper bound of the kernel buffer. In this case we first move 408 * part of data from map->start till the upper bound and then the reminder 409 * from the beginning of the kernel buffer till the end of the data chunk. 410 */ 411 412 if (record__comp_enabled(aio->rec)) { 413 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 414 mmap__mmap_len(map) - aio->size, 415 buf, size); 416 } else { 417 memcpy(aio->data + aio->size, buf, size); 418 } 419 420 if (!aio->size) { 421 /* 422 * Increment map->refcount to guard map->aio.data[] buffer 423 * from premature deallocation because map object can be 424 * released earlier than aio write request started on 425 * map->aio.data[] buffer is complete. 426 * 427 * perf_mmap__put() is done at record__aio_complete() 428 * after started aio request completion or at record__aio_push() 429 * if the request failed to start. 430 */ 431 perf_mmap__get(&map->core); 432 } 433 434 aio->size += size; 435 436 return size; 437 } 438 439 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 440 { 441 int ret, idx; 442 int trace_fd = rec->session->data->file.fd; 443 struct record_aio aio = { .rec = rec, .size = 0 }; 444 445 /* 446 * Call record__aio_sync() to wait till map->aio.data[] buffer 447 * becomes available after previous aio write operation. 448 */ 449 450 idx = record__aio_sync(map, false); 451 aio.data = map->aio.data[idx]; 452 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 453 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 454 return ret; 455 456 rec->samples++; 457 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 458 if (!ret) { 459 *off += aio.size; 460 rec->bytes_written += aio.size; 461 if (switch_output_size(rec)) 462 trigger_hit(&switch_output_trigger); 463 } else { 464 /* 465 * Decrement map->refcount incremented in record__aio_pushfn() 466 * back if record__aio_write() operation failed to start, otherwise 467 * map->refcount is decremented in record__aio_complete() after 468 * aio write operation finishes successfully. 469 */ 470 perf_mmap__put(&map->core); 471 } 472 473 return ret; 474 } 475 476 static off_t record__aio_get_pos(int trace_fd) 477 { 478 return lseek(trace_fd, 0, SEEK_CUR); 479 } 480 481 static void record__aio_set_pos(int trace_fd, off_t pos) 482 { 483 lseek(trace_fd, pos, SEEK_SET); 484 } 485 486 static void record__aio_mmap_read_sync(struct record *rec) 487 { 488 int i; 489 struct evlist *evlist = rec->evlist; 490 struct mmap *maps = evlist->mmap; 491 492 if (!record__aio_enabled(rec)) 493 return; 494 495 for (i = 0; i < evlist->core.nr_mmaps; i++) { 496 struct mmap *map = &maps[i]; 497 498 if (map->core.base) 499 record__aio_sync(map, true); 500 } 501 } 502 503 static int nr_cblocks_default = 1; 504 static int nr_cblocks_max = 4; 505 506 static int record__aio_parse(const struct option *opt, 507 const char *str, 508 int unset) 509 { 510 struct record_opts *opts = (struct record_opts *)opt->value; 511 512 if (unset) { 513 opts->nr_cblocks = 0; 514 } else { 515 if (str) 516 opts->nr_cblocks = strtol(str, NULL, 0); 517 if (!opts->nr_cblocks) 518 opts->nr_cblocks = nr_cblocks_default; 519 } 520 521 return 0; 522 } 523 #else /* HAVE_AIO_SUPPORT */ 524 static int nr_cblocks_max = 0; 525 526 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 527 off_t *off __maybe_unused) 528 { 529 return -1; 530 } 531 532 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 533 { 534 return -1; 535 } 536 537 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 538 { 539 } 540 541 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 542 { 543 } 544 #endif 545 546 static int record__aio_enabled(struct record *rec) 547 { 548 return rec->opts.nr_cblocks > 0; 549 } 550 551 #define MMAP_FLUSH_DEFAULT 1 552 static int record__mmap_flush_parse(const struct option *opt, 553 const char *str, 554 int unset) 555 { 556 int flush_max; 557 struct record_opts *opts = (struct record_opts *)opt->value; 558 static struct parse_tag tags[] = { 559 { .tag = 'B', .mult = 1 }, 560 { .tag = 'K', .mult = 1 << 10 }, 561 { .tag = 'M', .mult = 1 << 20 }, 562 { .tag = 'G', .mult = 1 << 30 }, 563 { .tag = 0 }, 564 }; 565 566 if (unset) 567 return 0; 568 569 if (str) { 570 opts->mmap_flush = parse_tag_value(str, tags); 571 if (opts->mmap_flush == (int)-1) 572 opts->mmap_flush = strtol(str, NULL, 0); 573 } 574 575 if (!opts->mmap_flush) 576 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 577 578 flush_max = evlist__mmap_size(opts->mmap_pages); 579 flush_max /= 4; 580 if (opts->mmap_flush > flush_max) 581 opts->mmap_flush = flush_max; 582 583 return 0; 584 } 585 586 #ifdef HAVE_ZSTD_SUPPORT 587 static unsigned int comp_level_default = 1; 588 589 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 590 { 591 struct record_opts *opts = opt->value; 592 593 if (unset) { 594 opts->comp_level = 0; 595 } else { 596 if (str) 597 opts->comp_level = strtol(str, NULL, 0); 598 if (!opts->comp_level) 599 opts->comp_level = comp_level_default; 600 } 601 602 return 0; 603 } 604 #endif 605 static unsigned int comp_level_max = 22; 606 607 static int record__comp_enabled(struct record *rec) 608 { 609 return rec->opts.comp_level > 0; 610 } 611 612 static int process_synthesized_event(struct perf_tool *tool, 613 union perf_event *event, 614 struct perf_sample *sample __maybe_unused, 615 struct machine *machine __maybe_unused) 616 { 617 struct record *rec = container_of(tool, struct record, tool); 618 return record__write(rec, NULL, event, event->header.size); 619 } 620 621 static struct mutex synth_lock; 622 623 static int process_locked_synthesized_event(struct perf_tool *tool, 624 union perf_event *event, 625 struct perf_sample *sample __maybe_unused, 626 struct machine *machine __maybe_unused) 627 { 628 int ret; 629 630 mutex_lock(&synth_lock); 631 ret = process_synthesized_event(tool, event, sample, machine); 632 mutex_unlock(&synth_lock); 633 return ret; 634 } 635 636 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 637 { 638 struct record *rec = to; 639 640 if (record__comp_enabled(rec)) { 641 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 642 bf = map->data; 643 } 644 645 thread->samples++; 646 return record__write(rec, map, bf, size); 647 } 648 649 static volatile int signr = -1; 650 static volatile int child_finished; 651 #ifdef HAVE_EVENTFD_SUPPORT 652 static int done_fd = -1; 653 #endif 654 655 static void sig_handler(int sig) 656 { 657 if (sig == SIGCHLD) 658 child_finished = 1; 659 else 660 signr = sig; 661 662 done = 1; 663 #ifdef HAVE_EVENTFD_SUPPORT 664 { 665 u64 tmp = 1; 666 /* 667 * It is possible for this signal handler to run after done is checked 668 * in the main loop, but before the perf counter fds are polled. If this 669 * happens, the poll() will continue to wait even though done is set, 670 * and will only break out if either another signal is received, or the 671 * counters are ready for read. To ensure the poll() doesn't sleep when 672 * done is set, use an eventfd (done_fd) to wake up the poll(). 673 */ 674 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 675 pr_err("failed to signal wakeup fd, error: %m\n"); 676 } 677 #endif // HAVE_EVENTFD_SUPPORT 678 } 679 680 static void sigsegv_handler(int sig) 681 { 682 perf_hooks__recover(); 683 sighandler_dump_stack(sig); 684 } 685 686 static void record__sig_exit(void) 687 { 688 if (signr == -1) 689 return; 690 691 signal(signr, SIG_DFL); 692 raise(signr); 693 } 694 695 #ifdef HAVE_AUXTRACE_SUPPORT 696 697 static int record__process_auxtrace(struct perf_tool *tool, 698 struct mmap *map, 699 union perf_event *event, void *data1, 700 size_t len1, void *data2, size_t len2) 701 { 702 struct record *rec = container_of(tool, struct record, tool); 703 struct perf_data *data = &rec->data; 704 size_t padding; 705 u8 pad[8] = {0}; 706 707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 708 off_t file_offset; 709 int fd = perf_data__fd(data); 710 int err; 711 712 file_offset = lseek(fd, 0, SEEK_CUR); 713 if (file_offset == -1) 714 return -1; 715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 716 event, file_offset); 717 if (err) 718 return err; 719 } 720 721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 722 padding = (len1 + len2) & 7; 723 if (padding) 724 padding = 8 - padding; 725 726 record__write(rec, map, event, event->header.size); 727 record__write(rec, map, data1, len1); 728 if (len2) 729 record__write(rec, map, data2, len2); 730 record__write(rec, map, &pad, padding); 731 732 return 0; 733 } 734 735 static int record__auxtrace_mmap_read(struct record *rec, 736 struct mmap *map) 737 { 738 int ret; 739 740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 741 record__process_auxtrace); 742 if (ret < 0) 743 return ret; 744 745 if (ret) 746 rec->samples++; 747 748 return 0; 749 } 750 751 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 752 struct mmap *map) 753 { 754 int ret; 755 756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 757 record__process_auxtrace, 758 rec->opts.auxtrace_snapshot_size); 759 if (ret < 0) 760 return ret; 761 762 if (ret) 763 rec->samples++; 764 765 return 0; 766 } 767 768 static int record__auxtrace_read_snapshot_all(struct record *rec) 769 { 770 int i; 771 int rc = 0; 772 773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 774 struct mmap *map = &rec->evlist->mmap[i]; 775 776 if (!map->auxtrace_mmap.base) 777 continue; 778 779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 780 rc = -1; 781 goto out; 782 } 783 } 784 out: 785 return rc; 786 } 787 788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 789 { 790 pr_debug("Recording AUX area tracing snapshot\n"); 791 if (record__auxtrace_read_snapshot_all(rec) < 0) { 792 trigger_error(&auxtrace_snapshot_trigger); 793 } else { 794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 795 trigger_error(&auxtrace_snapshot_trigger); 796 else 797 trigger_ready(&auxtrace_snapshot_trigger); 798 } 799 } 800 801 static int record__auxtrace_snapshot_exit(struct record *rec) 802 { 803 if (trigger_is_error(&auxtrace_snapshot_trigger)) 804 return 0; 805 806 if (!auxtrace_record__snapshot_started && 807 auxtrace_record__snapshot_start(rec->itr)) 808 return -1; 809 810 record__read_auxtrace_snapshot(rec, true); 811 if (trigger_is_error(&auxtrace_snapshot_trigger)) 812 return -1; 813 814 return 0; 815 } 816 817 static int record__auxtrace_init(struct record *rec) 818 { 819 int err; 820 821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 822 && record__threads_enabled(rec)) { 823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 824 return -EINVAL; 825 } 826 827 if (!rec->itr) { 828 rec->itr = auxtrace_record__init(rec->evlist, &err); 829 if (err) 830 return err; 831 } 832 833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 834 rec->opts.auxtrace_snapshot_opts); 835 if (err) 836 return err; 837 838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 839 rec->opts.auxtrace_sample_opts); 840 if (err) 841 return err; 842 843 auxtrace_regroup_aux_output(rec->evlist); 844 845 return auxtrace_parse_filters(rec->evlist); 846 } 847 848 #else 849 850 static inline 851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 852 struct mmap *map __maybe_unused) 853 { 854 return 0; 855 } 856 857 static inline 858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 859 bool on_exit __maybe_unused) 860 { 861 } 862 863 static inline 864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 865 { 866 return 0; 867 } 868 869 static inline 870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 871 { 872 return 0; 873 } 874 875 static int record__auxtrace_init(struct record *rec __maybe_unused) 876 { 877 return 0; 878 } 879 880 #endif 881 882 static int record__config_text_poke(struct evlist *evlist) 883 { 884 struct evsel *evsel; 885 886 /* Nothing to do if text poke is already configured */ 887 evlist__for_each_entry(evlist, evsel) { 888 if (evsel->core.attr.text_poke) 889 return 0; 890 } 891 892 evsel = evlist__add_dummy_on_all_cpus(evlist); 893 if (!evsel) 894 return -ENOMEM; 895 896 evsel->core.attr.text_poke = 1; 897 evsel->core.attr.ksymbol = 1; 898 evsel->immediate = true; 899 evsel__set_sample_bit(evsel, TIME); 900 901 return 0; 902 } 903 904 static int record__config_off_cpu(struct record *rec) 905 { 906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 907 } 908 909 static bool record__kcore_readable(struct machine *machine) 910 { 911 char kcore[PATH_MAX]; 912 int fd; 913 914 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 915 916 fd = open(kcore, O_RDONLY); 917 if (fd < 0) 918 return false; 919 920 close(fd); 921 922 return true; 923 } 924 925 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 926 { 927 char from_dir[PATH_MAX]; 928 char kcore_dir[PATH_MAX]; 929 int ret; 930 931 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 932 933 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 934 if (ret) 935 return ret; 936 937 return kcore_copy(from_dir, kcore_dir); 938 } 939 940 static void record__thread_data_init_pipes(struct record_thread *thread_data) 941 { 942 thread_data->pipes.msg[0] = -1; 943 thread_data->pipes.msg[1] = -1; 944 thread_data->pipes.ack[0] = -1; 945 thread_data->pipes.ack[1] = -1; 946 } 947 948 static int record__thread_data_open_pipes(struct record_thread *thread_data) 949 { 950 if (pipe(thread_data->pipes.msg)) 951 return -EINVAL; 952 953 if (pipe(thread_data->pipes.ack)) { 954 close(thread_data->pipes.msg[0]); 955 thread_data->pipes.msg[0] = -1; 956 close(thread_data->pipes.msg[1]); 957 thread_data->pipes.msg[1] = -1; 958 return -EINVAL; 959 } 960 961 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 962 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 963 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 964 965 return 0; 966 } 967 968 static void record__thread_data_close_pipes(struct record_thread *thread_data) 969 { 970 if (thread_data->pipes.msg[0] != -1) { 971 close(thread_data->pipes.msg[0]); 972 thread_data->pipes.msg[0] = -1; 973 } 974 if (thread_data->pipes.msg[1] != -1) { 975 close(thread_data->pipes.msg[1]); 976 thread_data->pipes.msg[1] = -1; 977 } 978 if (thread_data->pipes.ack[0] != -1) { 979 close(thread_data->pipes.ack[0]); 980 thread_data->pipes.ack[0] = -1; 981 } 982 if (thread_data->pipes.ack[1] != -1) { 983 close(thread_data->pipes.ack[1]); 984 thread_data->pipes.ack[1] = -1; 985 } 986 } 987 988 static bool evlist__per_thread(struct evlist *evlist) 989 { 990 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 991 } 992 993 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 994 { 995 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 996 struct mmap *mmap = evlist->mmap; 997 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 998 struct perf_cpu_map *cpus = evlist->core.all_cpus; 999 bool per_thread = evlist__per_thread(evlist); 1000 1001 if (per_thread) 1002 thread_data->nr_mmaps = nr_mmaps; 1003 else 1004 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1005 thread_data->mask->maps.nbits); 1006 if (mmap) { 1007 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1008 if (!thread_data->maps) 1009 return -ENOMEM; 1010 } 1011 if (overwrite_mmap) { 1012 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1013 if (!thread_data->overwrite_maps) { 1014 zfree(&thread_data->maps); 1015 return -ENOMEM; 1016 } 1017 } 1018 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1019 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1020 1021 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1022 if (per_thread || 1023 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1024 if (thread_data->maps) { 1025 thread_data->maps[tm] = &mmap[m]; 1026 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1027 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1028 } 1029 if (thread_data->overwrite_maps) { 1030 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1031 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1032 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1033 } 1034 tm++; 1035 } 1036 } 1037 1038 return 0; 1039 } 1040 1041 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1042 { 1043 int f, tm, pos; 1044 struct mmap *map, *overwrite_map; 1045 1046 fdarray__init(&thread_data->pollfd, 64); 1047 1048 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1049 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1050 overwrite_map = thread_data->overwrite_maps ? 1051 thread_data->overwrite_maps[tm] : NULL; 1052 1053 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1054 void *ptr = evlist->core.pollfd.priv[f].ptr; 1055 1056 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1057 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1058 &evlist->core.pollfd); 1059 if (pos < 0) 1060 return pos; 1061 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1062 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1063 } 1064 } 1065 } 1066 1067 return 0; 1068 } 1069 1070 static void record__free_thread_data(struct record *rec) 1071 { 1072 int t; 1073 struct record_thread *thread_data = rec->thread_data; 1074 1075 if (thread_data == NULL) 1076 return; 1077 1078 for (t = 0; t < rec->nr_threads; t++) { 1079 record__thread_data_close_pipes(&thread_data[t]); 1080 zfree(&thread_data[t].maps); 1081 zfree(&thread_data[t].overwrite_maps); 1082 fdarray__exit(&thread_data[t].pollfd); 1083 } 1084 1085 zfree(&rec->thread_data); 1086 } 1087 1088 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1089 int evlist_pollfd_index, 1090 int thread_pollfd_index) 1091 { 1092 size_t x = rec->index_map_cnt; 1093 1094 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1095 return -ENOMEM; 1096 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1097 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1098 rec->index_map_cnt += 1; 1099 return 0; 1100 } 1101 1102 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1103 struct evlist *evlist, 1104 struct record_thread *thread_data) 1105 { 1106 struct pollfd *e_entries = evlist->core.pollfd.entries; 1107 struct pollfd *t_entries = thread_data->pollfd.entries; 1108 int err = 0; 1109 size_t i; 1110 1111 for (i = 0; i < rec->index_map_cnt; i++) { 1112 int e_pos = rec->index_map[i].evlist_pollfd_index; 1113 int t_pos = rec->index_map[i].thread_pollfd_index; 1114 1115 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1116 e_entries[e_pos].events != t_entries[t_pos].events) { 1117 pr_err("Thread and evlist pollfd index mismatch\n"); 1118 err = -EINVAL; 1119 continue; 1120 } 1121 e_entries[e_pos].revents = t_entries[t_pos].revents; 1122 } 1123 return err; 1124 } 1125 1126 static int record__dup_non_perf_events(struct record *rec, 1127 struct evlist *evlist, 1128 struct record_thread *thread_data) 1129 { 1130 struct fdarray *fda = &evlist->core.pollfd; 1131 int i, ret; 1132 1133 for (i = 0; i < fda->nr; i++) { 1134 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1135 continue; 1136 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1137 if (ret < 0) { 1138 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1139 return ret; 1140 } 1141 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1142 thread_data, ret, fda->entries[i].fd); 1143 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1144 if (ret < 0) { 1145 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1146 return ret; 1147 } 1148 } 1149 return 0; 1150 } 1151 1152 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1153 { 1154 int t, ret; 1155 struct record_thread *thread_data; 1156 1157 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1158 if (!rec->thread_data) { 1159 pr_err("Failed to allocate thread data\n"); 1160 return -ENOMEM; 1161 } 1162 thread_data = rec->thread_data; 1163 1164 for (t = 0; t < rec->nr_threads; t++) 1165 record__thread_data_init_pipes(&thread_data[t]); 1166 1167 for (t = 0; t < rec->nr_threads; t++) { 1168 thread_data[t].rec = rec; 1169 thread_data[t].mask = &rec->thread_masks[t]; 1170 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1171 if (ret) { 1172 pr_err("Failed to initialize thread[%d] maps\n", t); 1173 goto out_free; 1174 } 1175 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1176 if (ret) { 1177 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1178 goto out_free; 1179 } 1180 if (t) { 1181 thread_data[t].tid = -1; 1182 ret = record__thread_data_open_pipes(&thread_data[t]); 1183 if (ret) { 1184 pr_err("Failed to open thread[%d] communication pipes\n", t); 1185 goto out_free; 1186 } 1187 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1188 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1189 if (ret < 0) { 1190 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1191 goto out_free; 1192 } 1193 thread_data[t].ctlfd_pos = ret; 1194 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1195 thread_data, thread_data[t].ctlfd_pos, 1196 thread_data[t].pipes.msg[0]); 1197 } else { 1198 thread_data[t].tid = gettid(); 1199 1200 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1201 if (ret < 0) 1202 goto out_free; 1203 1204 thread_data[t].ctlfd_pos = -1; /* Not used */ 1205 } 1206 } 1207 1208 return 0; 1209 1210 out_free: 1211 record__free_thread_data(rec); 1212 1213 return ret; 1214 } 1215 1216 static int record__mmap_evlist(struct record *rec, 1217 struct evlist *evlist) 1218 { 1219 int i, ret; 1220 struct record_opts *opts = &rec->opts; 1221 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1222 opts->auxtrace_sample_mode; 1223 char msg[512]; 1224 1225 if (opts->affinity != PERF_AFFINITY_SYS) 1226 cpu__setup_cpunode_map(); 1227 1228 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1229 opts->auxtrace_mmap_pages, 1230 auxtrace_overwrite, 1231 opts->nr_cblocks, opts->affinity, 1232 opts->mmap_flush, opts->comp_level) < 0) { 1233 if (errno == EPERM) { 1234 pr_err("Permission error mapping pages.\n" 1235 "Consider increasing " 1236 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1237 "or try again with a smaller value of -m/--mmap_pages.\n" 1238 "(current value: %u,%u)\n", 1239 opts->mmap_pages, opts->auxtrace_mmap_pages); 1240 return -errno; 1241 } else { 1242 pr_err("failed to mmap with %d (%s)\n", errno, 1243 str_error_r(errno, msg, sizeof(msg))); 1244 if (errno) 1245 return -errno; 1246 else 1247 return -EINVAL; 1248 } 1249 } 1250 1251 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1252 return -1; 1253 1254 ret = record__alloc_thread_data(rec, evlist); 1255 if (ret) 1256 return ret; 1257 1258 if (record__threads_enabled(rec)) { 1259 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1260 if (ret) { 1261 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1262 return ret; 1263 } 1264 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1265 if (evlist->mmap) 1266 evlist->mmap[i].file = &rec->data.dir.files[i]; 1267 if (evlist->overwrite_mmap) 1268 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1269 } 1270 } 1271 1272 return 0; 1273 } 1274 1275 static int record__mmap(struct record *rec) 1276 { 1277 return record__mmap_evlist(rec, rec->evlist); 1278 } 1279 1280 static int record__open(struct record *rec) 1281 { 1282 char msg[BUFSIZ]; 1283 struct evsel *pos; 1284 struct evlist *evlist = rec->evlist; 1285 struct perf_session *session = rec->session; 1286 struct record_opts *opts = &rec->opts; 1287 int rc = 0; 1288 1289 /* 1290 * For initial_delay, system wide or a hybrid system, we need to add a 1291 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1292 * of waiting or event synthesis. 1293 */ 1294 if (opts->initial_delay || target__has_cpu(&opts->target) || 1295 perf_pmu__has_hybrid()) { 1296 pos = evlist__get_tracking_event(evlist); 1297 if (!evsel__is_dummy_event(pos)) { 1298 /* Set up dummy event. */ 1299 if (evlist__add_dummy(evlist)) 1300 return -ENOMEM; 1301 pos = evlist__last(evlist); 1302 evlist__set_tracking_event(evlist, pos); 1303 } 1304 1305 /* 1306 * Enable the dummy event when the process is forked for 1307 * initial_delay, immediately for system wide. 1308 */ 1309 if (opts->initial_delay && !pos->immediate && 1310 !target__has_cpu(&opts->target)) 1311 pos->core.attr.enable_on_exec = 1; 1312 else 1313 pos->immediate = 1; 1314 } 1315 1316 evlist__config(evlist, opts, &callchain_param); 1317 1318 evlist__for_each_entry(evlist, pos) { 1319 try_again: 1320 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1321 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1322 if (verbose > 0) 1323 ui__warning("%s\n", msg); 1324 goto try_again; 1325 } 1326 if ((errno == EINVAL || errno == EBADF) && 1327 pos->core.leader != &pos->core && 1328 pos->weak_group) { 1329 pos = evlist__reset_weak_group(evlist, pos, true); 1330 goto try_again; 1331 } 1332 rc = -errno; 1333 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1334 ui__error("%s\n", msg); 1335 goto out; 1336 } 1337 1338 pos->supported = true; 1339 } 1340 1341 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1342 pr_warning( 1343 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1344 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1345 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1346 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1347 "Samples in kernel modules won't be resolved at all.\n\n" 1348 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1349 "even with a suitable vmlinux or kallsyms file.\n\n"); 1350 } 1351 1352 if (evlist__apply_filters(evlist, &pos)) { 1353 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1354 pos->filter, evsel__name(pos), errno, 1355 str_error_r(errno, msg, sizeof(msg))); 1356 rc = -1; 1357 goto out; 1358 } 1359 1360 rc = record__mmap(rec); 1361 if (rc) 1362 goto out; 1363 1364 session->evlist = evlist; 1365 perf_session__set_id_hdr_size(session); 1366 out: 1367 return rc; 1368 } 1369 1370 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1371 { 1372 if (rec->evlist->first_sample_time == 0) 1373 rec->evlist->first_sample_time = sample_time; 1374 1375 if (sample_time) 1376 rec->evlist->last_sample_time = sample_time; 1377 } 1378 1379 static int process_sample_event(struct perf_tool *tool, 1380 union perf_event *event, 1381 struct perf_sample *sample, 1382 struct evsel *evsel, 1383 struct machine *machine) 1384 { 1385 struct record *rec = container_of(tool, struct record, tool); 1386 1387 set_timestamp_boundary(rec, sample->time); 1388 1389 if (rec->buildid_all) 1390 return 0; 1391 1392 rec->samples++; 1393 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1394 } 1395 1396 static int process_buildids(struct record *rec) 1397 { 1398 struct perf_session *session = rec->session; 1399 1400 if (perf_data__size(&rec->data) == 0) 1401 return 0; 1402 1403 /* 1404 * During this process, it'll load kernel map and replace the 1405 * dso->long_name to a real pathname it found. In this case 1406 * we prefer the vmlinux path like 1407 * /lib/modules/3.16.4/build/vmlinux 1408 * 1409 * rather than build-id path (in debug directory). 1410 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1411 */ 1412 symbol_conf.ignore_vmlinux_buildid = true; 1413 1414 /* 1415 * If --buildid-all is given, it marks all DSO regardless of hits, 1416 * so no need to process samples. But if timestamp_boundary is enabled, 1417 * it still needs to walk on all samples to get the timestamps of 1418 * first/last samples. 1419 */ 1420 if (rec->buildid_all && !rec->timestamp_boundary) 1421 rec->tool.sample = NULL; 1422 1423 return perf_session__process_events(session); 1424 } 1425 1426 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1427 { 1428 int err; 1429 struct perf_tool *tool = data; 1430 /* 1431 *As for guest kernel when processing subcommand record&report, 1432 *we arrange module mmap prior to guest kernel mmap and trigger 1433 *a preload dso because default guest module symbols are loaded 1434 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1435 *method is used to avoid symbol missing when the first addr is 1436 *in module instead of in guest kernel. 1437 */ 1438 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1439 machine); 1440 if (err < 0) 1441 pr_err("Couldn't record guest kernel [%d]'s reference" 1442 " relocation symbol.\n", machine->pid); 1443 1444 /* 1445 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1446 * have no _text sometimes. 1447 */ 1448 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1449 machine); 1450 if (err < 0) 1451 pr_err("Couldn't record guest kernel [%d]'s reference" 1452 " relocation symbol.\n", machine->pid); 1453 } 1454 1455 static struct perf_event_header finished_round_event = { 1456 .size = sizeof(struct perf_event_header), 1457 .type = PERF_RECORD_FINISHED_ROUND, 1458 }; 1459 1460 static struct perf_event_header finished_init_event = { 1461 .size = sizeof(struct perf_event_header), 1462 .type = PERF_RECORD_FINISHED_INIT, 1463 }; 1464 1465 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1466 { 1467 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1468 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1469 thread->mask->affinity.nbits)) { 1470 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1471 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1472 map->affinity_mask.bits, thread->mask->affinity.nbits); 1473 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1474 (cpu_set_t *)thread->mask->affinity.bits); 1475 if (verbose == 2) { 1476 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1477 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1478 } 1479 } 1480 } 1481 1482 static size_t process_comp_header(void *record, size_t increment) 1483 { 1484 struct perf_record_compressed *event = record; 1485 size_t size = sizeof(*event); 1486 1487 if (increment) { 1488 event->header.size += increment; 1489 return increment; 1490 } 1491 1492 event->header.type = PERF_RECORD_COMPRESSED; 1493 event->header.size = size; 1494 1495 return size; 1496 } 1497 1498 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1499 void *dst, size_t dst_size, void *src, size_t src_size) 1500 { 1501 size_t compressed; 1502 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1503 struct zstd_data *zstd_data = &session->zstd_data; 1504 1505 if (map && map->file) 1506 zstd_data = &map->zstd_data; 1507 1508 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1509 max_record_size, process_comp_header); 1510 1511 if (map && map->file) { 1512 thread->bytes_transferred += src_size; 1513 thread->bytes_compressed += compressed; 1514 } else { 1515 session->bytes_transferred += src_size; 1516 session->bytes_compressed += compressed; 1517 } 1518 1519 return compressed; 1520 } 1521 1522 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1523 bool overwrite, bool synch) 1524 { 1525 u64 bytes_written = rec->bytes_written; 1526 int i; 1527 int rc = 0; 1528 int nr_mmaps; 1529 struct mmap **maps; 1530 int trace_fd = rec->data.file.fd; 1531 off_t off = 0; 1532 1533 if (!evlist) 1534 return 0; 1535 1536 nr_mmaps = thread->nr_mmaps; 1537 maps = overwrite ? thread->overwrite_maps : thread->maps; 1538 1539 if (!maps) 1540 return 0; 1541 1542 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1543 return 0; 1544 1545 if (record__aio_enabled(rec)) 1546 off = record__aio_get_pos(trace_fd); 1547 1548 for (i = 0; i < nr_mmaps; i++) { 1549 u64 flush = 0; 1550 struct mmap *map = maps[i]; 1551 1552 if (map->core.base) { 1553 record__adjust_affinity(rec, map); 1554 if (synch) { 1555 flush = map->core.flush; 1556 map->core.flush = 1; 1557 } 1558 if (!record__aio_enabled(rec)) { 1559 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1560 if (synch) 1561 map->core.flush = flush; 1562 rc = -1; 1563 goto out; 1564 } 1565 } else { 1566 if (record__aio_push(rec, map, &off) < 0) { 1567 record__aio_set_pos(trace_fd, off); 1568 if (synch) 1569 map->core.flush = flush; 1570 rc = -1; 1571 goto out; 1572 } 1573 } 1574 if (synch) 1575 map->core.flush = flush; 1576 } 1577 1578 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1579 !rec->opts.auxtrace_sample_mode && 1580 record__auxtrace_mmap_read(rec, map) != 0) { 1581 rc = -1; 1582 goto out; 1583 } 1584 } 1585 1586 if (record__aio_enabled(rec)) 1587 record__aio_set_pos(trace_fd, off); 1588 1589 /* 1590 * Mark the round finished in case we wrote 1591 * at least one event. 1592 * 1593 * No need for round events in directory mode, 1594 * because per-cpu maps and files have data 1595 * sorted by kernel. 1596 */ 1597 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1598 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1599 1600 if (overwrite) 1601 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1602 out: 1603 return rc; 1604 } 1605 1606 static int record__mmap_read_all(struct record *rec, bool synch) 1607 { 1608 int err; 1609 1610 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1611 if (err) 1612 return err; 1613 1614 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1615 } 1616 1617 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1618 void *arg __maybe_unused) 1619 { 1620 struct perf_mmap *map = fda->priv[fd].ptr; 1621 1622 if (map) 1623 perf_mmap__put(map); 1624 } 1625 1626 static void *record__thread(void *arg) 1627 { 1628 enum thread_msg msg = THREAD_MSG__READY; 1629 bool terminate = false; 1630 struct fdarray *pollfd; 1631 int err, ctlfd_pos; 1632 1633 thread = arg; 1634 thread->tid = gettid(); 1635 1636 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1637 if (err == -1) 1638 pr_warning("threads[%d]: failed to notify on start: %s\n", 1639 thread->tid, strerror(errno)); 1640 1641 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1642 1643 pollfd = &thread->pollfd; 1644 ctlfd_pos = thread->ctlfd_pos; 1645 1646 for (;;) { 1647 unsigned long long hits = thread->samples; 1648 1649 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1650 break; 1651 1652 if (hits == thread->samples) { 1653 1654 err = fdarray__poll(pollfd, -1); 1655 /* 1656 * Propagate error, only if there's any. Ignore positive 1657 * number of returned events and interrupt error. 1658 */ 1659 if (err > 0 || (err < 0 && errno == EINTR)) 1660 err = 0; 1661 thread->waking++; 1662 1663 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1664 record__thread_munmap_filtered, NULL) == 0) 1665 break; 1666 } 1667 1668 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1669 terminate = true; 1670 close(thread->pipes.msg[0]); 1671 thread->pipes.msg[0] = -1; 1672 pollfd->entries[ctlfd_pos].fd = -1; 1673 pollfd->entries[ctlfd_pos].events = 0; 1674 } 1675 1676 pollfd->entries[ctlfd_pos].revents = 0; 1677 } 1678 record__mmap_read_all(thread->rec, true); 1679 1680 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1681 if (err == -1) 1682 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1683 thread->tid, strerror(errno)); 1684 1685 return NULL; 1686 } 1687 1688 static void record__init_features(struct record *rec) 1689 { 1690 struct perf_session *session = rec->session; 1691 int feat; 1692 1693 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1694 perf_header__set_feat(&session->header, feat); 1695 1696 if (rec->no_buildid) 1697 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1698 1699 if (!have_tracepoints(&rec->evlist->core.entries)) 1700 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1701 1702 if (!rec->opts.branch_stack) 1703 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1704 1705 if (!rec->opts.full_auxtrace) 1706 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1707 1708 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1709 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1710 1711 if (!rec->opts.use_clockid) 1712 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1713 1714 if (!record__threads_enabled(rec)) 1715 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1716 1717 if (!record__comp_enabled(rec)) 1718 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1719 1720 perf_header__clear_feat(&session->header, HEADER_STAT); 1721 } 1722 1723 static void 1724 record__finish_output(struct record *rec) 1725 { 1726 int i; 1727 struct perf_data *data = &rec->data; 1728 int fd = perf_data__fd(data); 1729 1730 if (data->is_pipe) 1731 return; 1732 1733 rec->session->header.data_size += rec->bytes_written; 1734 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1735 if (record__threads_enabled(rec)) { 1736 for (i = 0; i < data->dir.nr; i++) 1737 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1738 } 1739 1740 if (!rec->no_buildid) { 1741 process_buildids(rec); 1742 1743 if (rec->buildid_all) 1744 dsos__hit_all(rec->session); 1745 } 1746 perf_session__write_header(rec->session, rec->evlist, fd, true); 1747 1748 return; 1749 } 1750 1751 static int record__synthesize_workload(struct record *rec, bool tail) 1752 { 1753 int err; 1754 struct perf_thread_map *thread_map; 1755 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1756 1757 if (rec->opts.tail_synthesize != tail) 1758 return 0; 1759 1760 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1761 if (thread_map == NULL) 1762 return -1; 1763 1764 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1765 process_synthesized_event, 1766 &rec->session->machines.host, 1767 needs_mmap, 1768 rec->opts.sample_address); 1769 perf_thread_map__put(thread_map); 1770 return err; 1771 } 1772 1773 static int write_finished_init(struct record *rec, bool tail) 1774 { 1775 if (rec->opts.tail_synthesize != tail) 1776 return 0; 1777 1778 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1779 } 1780 1781 static int record__synthesize(struct record *rec, bool tail); 1782 1783 static int 1784 record__switch_output(struct record *rec, bool at_exit) 1785 { 1786 struct perf_data *data = &rec->data; 1787 int fd, err; 1788 char *new_filename; 1789 1790 /* Same Size: "2015122520103046"*/ 1791 char timestamp[] = "InvalidTimestamp"; 1792 1793 record__aio_mmap_read_sync(rec); 1794 1795 write_finished_init(rec, true); 1796 1797 record__synthesize(rec, true); 1798 if (target__none(&rec->opts.target)) 1799 record__synthesize_workload(rec, true); 1800 1801 rec->samples = 0; 1802 record__finish_output(rec); 1803 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1804 if (err) { 1805 pr_err("Failed to get current timestamp\n"); 1806 return -EINVAL; 1807 } 1808 1809 fd = perf_data__switch(data, timestamp, 1810 rec->session->header.data_offset, 1811 at_exit, &new_filename); 1812 if (fd >= 0 && !at_exit) { 1813 rec->bytes_written = 0; 1814 rec->session->header.data_size = 0; 1815 } 1816 1817 if (!quiet) 1818 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1819 data->path, timestamp); 1820 1821 if (rec->switch_output.num_files) { 1822 int n = rec->switch_output.cur_file + 1; 1823 1824 if (n >= rec->switch_output.num_files) 1825 n = 0; 1826 rec->switch_output.cur_file = n; 1827 if (rec->switch_output.filenames[n]) { 1828 remove(rec->switch_output.filenames[n]); 1829 zfree(&rec->switch_output.filenames[n]); 1830 } 1831 rec->switch_output.filenames[n] = new_filename; 1832 } else { 1833 free(new_filename); 1834 } 1835 1836 /* Output tracking events */ 1837 if (!at_exit) { 1838 record__synthesize(rec, false); 1839 1840 /* 1841 * In 'perf record --switch-output' without -a, 1842 * record__synthesize() in record__switch_output() won't 1843 * generate tracking events because there's no thread_map 1844 * in evlist. Which causes newly created perf.data doesn't 1845 * contain map and comm information. 1846 * Create a fake thread_map and directly call 1847 * perf_event__synthesize_thread_map() for those events. 1848 */ 1849 if (target__none(&rec->opts.target)) 1850 record__synthesize_workload(rec, false); 1851 write_finished_init(rec, false); 1852 } 1853 return fd; 1854 } 1855 1856 static void __record__read_lost_samples(struct record *rec, struct evsel *evsel, 1857 struct perf_record_lost_samples *lost, 1858 int cpu_idx, int thread_idx) 1859 { 1860 struct perf_counts_values count; 1861 struct perf_sample_id *sid; 1862 struct perf_sample sample = {}; 1863 int id_hdr_size; 1864 1865 if (perf_evsel__read(&evsel->core, cpu_idx, thread_idx, &count) < 0) { 1866 pr_err("read LOST count failed\n"); 1867 return; 1868 } 1869 1870 if (count.lost == 0) 1871 return; 1872 1873 lost->lost = count.lost; 1874 if (evsel->core.ids) { 1875 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1876 sample.id = sid->id; 1877 } 1878 1879 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1880 evsel->core.attr.sample_type, &sample); 1881 lost->header.size = sizeof(*lost) + id_hdr_size; 1882 record__write(rec, NULL, lost, lost->header.size); 1883 } 1884 1885 static void record__read_lost_samples(struct record *rec) 1886 { 1887 struct perf_session *session = rec->session; 1888 struct perf_record_lost_samples *lost; 1889 struct evsel *evsel; 1890 1891 /* there was an error during record__open */ 1892 if (session->evlist == NULL) 1893 return; 1894 1895 lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1896 if (lost == NULL) { 1897 pr_debug("Memory allocation failed\n"); 1898 return; 1899 } 1900 1901 lost->header.type = PERF_RECORD_LOST_SAMPLES; 1902 1903 evlist__for_each_entry(session->evlist, evsel) { 1904 struct xyarray *xy = evsel->core.sample_id; 1905 1906 if (xy == NULL || evsel->core.fd == NULL) 1907 continue; 1908 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1909 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1910 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1911 continue; 1912 } 1913 1914 for (int x = 0; x < xyarray__max_x(xy); x++) { 1915 for (int y = 0; y < xyarray__max_y(xy); y++) { 1916 __record__read_lost_samples(rec, evsel, lost, x, y); 1917 } 1918 } 1919 } 1920 free(lost); 1921 1922 } 1923 1924 static volatile int workload_exec_errno; 1925 1926 /* 1927 * evlist__prepare_workload will send a SIGUSR1 1928 * if the fork fails, since we asked by setting its 1929 * want_signal to true. 1930 */ 1931 static void workload_exec_failed_signal(int signo __maybe_unused, 1932 siginfo_t *info, 1933 void *ucontext __maybe_unused) 1934 { 1935 workload_exec_errno = info->si_value.sival_int; 1936 done = 1; 1937 child_finished = 1; 1938 } 1939 1940 static void snapshot_sig_handler(int sig); 1941 static void alarm_sig_handler(int sig); 1942 1943 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1944 { 1945 if (evlist) { 1946 if (evlist->mmap && evlist->mmap[0].core.base) 1947 return evlist->mmap[0].core.base; 1948 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1949 return evlist->overwrite_mmap[0].core.base; 1950 } 1951 return NULL; 1952 } 1953 1954 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1955 { 1956 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1957 if (pc) 1958 return pc; 1959 return NULL; 1960 } 1961 1962 static int record__synthesize(struct record *rec, bool tail) 1963 { 1964 struct perf_session *session = rec->session; 1965 struct machine *machine = &session->machines.host; 1966 struct perf_data *data = &rec->data; 1967 struct record_opts *opts = &rec->opts; 1968 struct perf_tool *tool = &rec->tool; 1969 int err = 0; 1970 event_op f = process_synthesized_event; 1971 1972 if (rec->opts.tail_synthesize != tail) 1973 return 0; 1974 1975 if (data->is_pipe) { 1976 err = perf_event__synthesize_for_pipe(tool, session, data, 1977 process_synthesized_event); 1978 if (err < 0) 1979 goto out; 1980 1981 rec->bytes_written += err; 1982 } 1983 1984 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1985 process_synthesized_event, machine); 1986 if (err) 1987 goto out; 1988 1989 /* Synthesize id_index before auxtrace_info */ 1990 err = perf_event__synthesize_id_index(tool, 1991 process_synthesized_event, 1992 session->evlist, machine); 1993 if (err) 1994 goto out; 1995 1996 if (rec->opts.full_auxtrace) { 1997 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1998 session, process_synthesized_event); 1999 if (err) 2000 goto out; 2001 } 2002 2003 if (!evlist__exclude_kernel(rec->evlist)) { 2004 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2005 machine); 2006 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2007 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2008 "Check /proc/kallsyms permission or run as root.\n"); 2009 2010 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2011 machine); 2012 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2013 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2014 "Check /proc/modules permission or run as root.\n"); 2015 } 2016 2017 if (perf_guest) { 2018 machines__process_guests(&session->machines, 2019 perf_event__synthesize_guest_os, tool); 2020 } 2021 2022 err = perf_event__synthesize_extra_attr(&rec->tool, 2023 rec->evlist, 2024 process_synthesized_event, 2025 data->is_pipe); 2026 if (err) 2027 goto out; 2028 2029 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2030 process_synthesized_event, 2031 NULL); 2032 if (err < 0) { 2033 pr_err("Couldn't synthesize thread map.\n"); 2034 return err; 2035 } 2036 2037 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2038 process_synthesized_event, NULL); 2039 if (err < 0) { 2040 pr_err("Couldn't synthesize cpu map.\n"); 2041 return err; 2042 } 2043 2044 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2045 machine, opts); 2046 if (err < 0) { 2047 pr_warning("Couldn't synthesize bpf events.\n"); 2048 err = 0; 2049 } 2050 2051 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2052 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2053 machine); 2054 if (err < 0) { 2055 pr_warning("Couldn't synthesize cgroup events.\n"); 2056 err = 0; 2057 } 2058 } 2059 2060 if (rec->opts.nr_threads_synthesize > 1) { 2061 mutex_init(&synth_lock); 2062 perf_set_multithreaded(); 2063 f = process_locked_synthesized_event; 2064 } 2065 2066 if (rec->opts.synth & PERF_SYNTH_TASK) { 2067 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2068 2069 err = __machine__synthesize_threads(machine, tool, &opts->target, 2070 rec->evlist->core.threads, 2071 f, needs_mmap, opts->sample_address, 2072 rec->opts.nr_threads_synthesize); 2073 } 2074 2075 if (rec->opts.nr_threads_synthesize > 1) { 2076 perf_set_singlethreaded(); 2077 mutex_destroy(&synth_lock); 2078 } 2079 2080 out: 2081 return err; 2082 } 2083 2084 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2085 { 2086 struct record *rec = data; 2087 pthread_kill(rec->thread_id, SIGUSR2); 2088 return 0; 2089 } 2090 2091 static int record__setup_sb_evlist(struct record *rec) 2092 { 2093 struct record_opts *opts = &rec->opts; 2094 2095 if (rec->sb_evlist != NULL) { 2096 /* 2097 * We get here if --switch-output-event populated the 2098 * sb_evlist, so associate a callback that will send a SIGUSR2 2099 * to the main thread. 2100 */ 2101 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2102 rec->thread_id = pthread_self(); 2103 } 2104 #ifdef HAVE_LIBBPF_SUPPORT 2105 if (!opts->no_bpf_event) { 2106 if (rec->sb_evlist == NULL) { 2107 rec->sb_evlist = evlist__new(); 2108 2109 if (rec->sb_evlist == NULL) { 2110 pr_err("Couldn't create side band evlist.\n."); 2111 return -1; 2112 } 2113 } 2114 2115 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2116 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2117 return -1; 2118 } 2119 } 2120 #endif 2121 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2122 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2123 opts->no_bpf_event = true; 2124 } 2125 2126 return 0; 2127 } 2128 2129 static int record__init_clock(struct record *rec) 2130 { 2131 struct perf_session *session = rec->session; 2132 struct timespec ref_clockid; 2133 struct timeval ref_tod; 2134 u64 ref; 2135 2136 if (!rec->opts.use_clockid) 2137 return 0; 2138 2139 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2140 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2141 2142 session->header.env.clock.clockid = rec->opts.clockid; 2143 2144 if (gettimeofday(&ref_tod, NULL) != 0) { 2145 pr_err("gettimeofday failed, cannot set reference time.\n"); 2146 return -1; 2147 } 2148 2149 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2150 pr_err("clock_gettime failed, cannot set reference time.\n"); 2151 return -1; 2152 } 2153 2154 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2155 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2156 2157 session->header.env.clock.tod_ns = ref; 2158 2159 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2160 (u64) ref_clockid.tv_nsec; 2161 2162 session->header.env.clock.clockid_ns = ref; 2163 return 0; 2164 } 2165 2166 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2167 { 2168 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2169 trigger_hit(&auxtrace_snapshot_trigger); 2170 auxtrace_record__snapshot_started = 1; 2171 if (auxtrace_record__snapshot_start(rec->itr)) 2172 trigger_error(&auxtrace_snapshot_trigger); 2173 } 2174 } 2175 2176 static void record__uniquify_name(struct record *rec) 2177 { 2178 struct evsel *pos; 2179 struct evlist *evlist = rec->evlist; 2180 char *new_name; 2181 int ret; 2182 2183 if (!perf_pmu__has_hybrid()) 2184 return; 2185 2186 evlist__for_each_entry(evlist, pos) { 2187 if (!evsel__is_hybrid(pos)) 2188 continue; 2189 2190 if (strchr(pos->name, '/')) 2191 continue; 2192 2193 ret = asprintf(&new_name, "%s/%s/", 2194 pos->pmu_name, pos->name); 2195 if (ret) { 2196 free(pos->name); 2197 pos->name = new_name; 2198 } 2199 } 2200 } 2201 2202 static int record__terminate_thread(struct record_thread *thread_data) 2203 { 2204 int err; 2205 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2206 pid_t tid = thread_data->tid; 2207 2208 close(thread_data->pipes.msg[1]); 2209 thread_data->pipes.msg[1] = -1; 2210 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2211 if (err > 0) 2212 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2213 else 2214 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2215 thread->tid, tid); 2216 2217 return 0; 2218 } 2219 2220 static int record__start_threads(struct record *rec) 2221 { 2222 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2223 struct record_thread *thread_data = rec->thread_data; 2224 sigset_t full, mask; 2225 pthread_t handle; 2226 pthread_attr_t attrs; 2227 2228 thread = &thread_data[0]; 2229 2230 if (!record__threads_enabled(rec)) 2231 return 0; 2232 2233 sigfillset(&full); 2234 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2235 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2236 return -1; 2237 } 2238 2239 pthread_attr_init(&attrs); 2240 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2241 2242 for (t = 1; t < nr_threads; t++) { 2243 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2244 2245 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2246 pthread_attr_setaffinity_np(&attrs, 2247 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2248 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2249 #endif 2250 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2251 for (tt = 1; tt < t; tt++) 2252 record__terminate_thread(&thread_data[t]); 2253 pr_err("Failed to start threads: %s\n", strerror(errno)); 2254 ret = -1; 2255 goto out_err; 2256 } 2257 2258 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2259 if (err > 0) 2260 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2261 thread_msg_tags[msg]); 2262 else 2263 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2264 thread->tid, rec->thread_data[t].tid); 2265 } 2266 2267 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2268 (cpu_set_t *)thread->mask->affinity.bits); 2269 2270 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2271 2272 out_err: 2273 pthread_attr_destroy(&attrs); 2274 2275 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2276 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2277 ret = -1; 2278 } 2279 2280 return ret; 2281 } 2282 2283 static int record__stop_threads(struct record *rec) 2284 { 2285 int t; 2286 struct record_thread *thread_data = rec->thread_data; 2287 2288 for (t = 1; t < rec->nr_threads; t++) 2289 record__terminate_thread(&thread_data[t]); 2290 2291 for (t = 0; t < rec->nr_threads; t++) { 2292 rec->samples += thread_data[t].samples; 2293 if (!record__threads_enabled(rec)) 2294 continue; 2295 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2296 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2297 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2298 thread_data[t].samples, thread_data[t].waking); 2299 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2300 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2301 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2302 else 2303 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2304 } 2305 2306 return 0; 2307 } 2308 2309 static unsigned long record__waking(struct record *rec) 2310 { 2311 int t; 2312 unsigned long waking = 0; 2313 struct record_thread *thread_data = rec->thread_data; 2314 2315 for (t = 0; t < rec->nr_threads; t++) 2316 waking += thread_data[t].waking; 2317 2318 return waking; 2319 } 2320 2321 static int __cmd_record(struct record *rec, int argc, const char **argv) 2322 { 2323 int err; 2324 int status = 0; 2325 const bool forks = argc > 0; 2326 struct perf_tool *tool = &rec->tool; 2327 struct record_opts *opts = &rec->opts; 2328 struct perf_data *data = &rec->data; 2329 struct perf_session *session; 2330 bool disabled = false, draining = false; 2331 int fd; 2332 float ratio = 0; 2333 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2334 2335 atexit(record__sig_exit); 2336 signal(SIGCHLD, sig_handler); 2337 signal(SIGINT, sig_handler); 2338 signal(SIGTERM, sig_handler); 2339 signal(SIGSEGV, sigsegv_handler); 2340 2341 if (rec->opts.record_namespaces) 2342 tool->namespace_events = true; 2343 2344 if (rec->opts.record_cgroup) { 2345 #ifdef HAVE_FILE_HANDLE 2346 tool->cgroup_events = true; 2347 #else 2348 pr_err("cgroup tracking is not supported\n"); 2349 return -1; 2350 #endif 2351 } 2352 2353 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2354 signal(SIGUSR2, snapshot_sig_handler); 2355 if (rec->opts.auxtrace_snapshot_mode) 2356 trigger_on(&auxtrace_snapshot_trigger); 2357 if (rec->switch_output.enabled) 2358 trigger_on(&switch_output_trigger); 2359 } else { 2360 signal(SIGUSR2, SIG_IGN); 2361 } 2362 2363 session = perf_session__new(data, tool); 2364 if (IS_ERR(session)) { 2365 pr_err("Perf session creation failed.\n"); 2366 return PTR_ERR(session); 2367 } 2368 2369 if (record__threads_enabled(rec)) { 2370 if (perf_data__is_pipe(&rec->data)) { 2371 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2372 return -1; 2373 } 2374 if (rec->opts.full_auxtrace) { 2375 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2376 return -1; 2377 } 2378 } 2379 2380 fd = perf_data__fd(data); 2381 rec->session = session; 2382 2383 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2384 pr_err("Compression initialization failed.\n"); 2385 return -1; 2386 } 2387 #ifdef HAVE_EVENTFD_SUPPORT 2388 done_fd = eventfd(0, EFD_NONBLOCK); 2389 if (done_fd < 0) { 2390 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2391 status = -1; 2392 goto out_delete_session; 2393 } 2394 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2395 if (err < 0) { 2396 pr_err("Failed to add wakeup eventfd to poll list\n"); 2397 status = err; 2398 goto out_delete_session; 2399 } 2400 #endif // HAVE_EVENTFD_SUPPORT 2401 2402 session->header.env.comp_type = PERF_COMP_ZSTD; 2403 session->header.env.comp_level = rec->opts.comp_level; 2404 2405 if (rec->opts.kcore && 2406 !record__kcore_readable(&session->machines.host)) { 2407 pr_err("ERROR: kcore is not readable.\n"); 2408 return -1; 2409 } 2410 2411 if (record__init_clock(rec)) 2412 return -1; 2413 2414 record__init_features(rec); 2415 2416 if (forks) { 2417 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2418 workload_exec_failed_signal); 2419 if (err < 0) { 2420 pr_err("Couldn't run the workload!\n"); 2421 status = err; 2422 goto out_delete_session; 2423 } 2424 } 2425 2426 /* 2427 * If we have just single event and are sending data 2428 * through pipe, we need to force the ids allocation, 2429 * because we synthesize event name through the pipe 2430 * and need the id for that. 2431 */ 2432 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2433 rec->opts.sample_id = true; 2434 2435 record__uniquify_name(rec); 2436 2437 /* Debug message used by test scripts */ 2438 pr_debug3("perf record opening and mmapping events\n"); 2439 if (record__open(rec) != 0) { 2440 err = -1; 2441 goto out_free_threads; 2442 } 2443 /* Debug message used by test scripts */ 2444 pr_debug3("perf record done opening and mmapping events\n"); 2445 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2446 2447 if (rec->opts.kcore) { 2448 err = record__kcore_copy(&session->machines.host, data); 2449 if (err) { 2450 pr_err("ERROR: Failed to copy kcore\n"); 2451 goto out_free_threads; 2452 } 2453 } 2454 2455 err = bpf__apply_obj_config(); 2456 if (err) { 2457 char errbuf[BUFSIZ]; 2458 2459 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2460 pr_err("ERROR: Apply config to BPF failed: %s\n", 2461 errbuf); 2462 goto out_free_threads; 2463 } 2464 2465 /* 2466 * Normally perf_session__new would do this, but it doesn't have the 2467 * evlist. 2468 */ 2469 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2470 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2471 rec->tool.ordered_events = false; 2472 } 2473 2474 if (!rec->evlist->core.nr_groups) 2475 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2476 2477 if (data->is_pipe) { 2478 err = perf_header__write_pipe(fd); 2479 if (err < 0) 2480 goto out_free_threads; 2481 } else { 2482 err = perf_session__write_header(session, rec->evlist, fd, false); 2483 if (err < 0) 2484 goto out_free_threads; 2485 } 2486 2487 err = -1; 2488 if (!rec->no_buildid 2489 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2490 pr_err("Couldn't generate buildids. " 2491 "Use --no-buildid to profile anyway.\n"); 2492 goto out_free_threads; 2493 } 2494 2495 err = record__setup_sb_evlist(rec); 2496 if (err) 2497 goto out_free_threads; 2498 2499 err = record__synthesize(rec, false); 2500 if (err < 0) 2501 goto out_free_threads; 2502 2503 if (rec->realtime_prio) { 2504 struct sched_param param; 2505 2506 param.sched_priority = rec->realtime_prio; 2507 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2508 pr_err("Could not set realtime priority.\n"); 2509 err = -1; 2510 goto out_free_threads; 2511 } 2512 } 2513 2514 if (record__start_threads(rec)) 2515 goto out_free_threads; 2516 2517 /* 2518 * When perf is starting the traced process, all the events 2519 * (apart from group members) have enable_on_exec=1 set, 2520 * so don't spoil it by prematurely enabling them. 2521 */ 2522 if (!target__none(&opts->target) && !opts->initial_delay) 2523 evlist__enable(rec->evlist); 2524 2525 /* 2526 * Let the child rip 2527 */ 2528 if (forks) { 2529 struct machine *machine = &session->machines.host; 2530 union perf_event *event; 2531 pid_t tgid; 2532 2533 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2534 if (event == NULL) { 2535 err = -ENOMEM; 2536 goto out_child; 2537 } 2538 2539 /* 2540 * Some H/W events are generated before COMM event 2541 * which is emitted during exec(), so perf script 2542 * cannot see a correct process name for those events. 2543 * Synthesize COMM event to prevent it. 2544 */ 2545 tgid = perf_event__synthesize_comm(tool, event, 2546 rec->evlist->workload.pid, 2547 process_synthesized_event, 2548 machine); 2549 free(event); 2550 2551 if (tgid == -1) 2552 goto out_child; 2553 2554 event = malloc(sizeof(event->namespaces) + 2555 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2556 machine->id_hdr_size); 2557 if (event == NULL) { 2558 err = -ENOMEM; 2559 goto out_child; 2560 } 2561 2562 /* 2563 * Synthesize NAMESPACES event for the command specified. 2564 */ 2565 perf_event__synthesize_namespaces(tool, event, 2566 rec->evlist->workload.pid, 2567 tgid, process_synthesized_event, 2568 machine); 2569 free(event); 2570 2571 evlist__start_workload(rec->evlist); 2572 } 2573 2574 if (opts->initial_delay) { 2575 pr_info(EVLIST_DISABLED_MSG); 2576 if (opts->initial_delay > 0) { 2577 usleep(opts->initial_delay * USEC_PER_MSEC); 2578 evlist__enable(rec->evlist); 2579 pr_info(EVLIST_ENABLED_MSG); 2580 } 2581 } 2582 2583 err = event_enable_timer__start(rec->evlist->eet); 2584 if (err) 2585 goto out_child; 2586 2587 /* Debug message used by test scripts */ 2588 pr_debug3("perf record has started\n"); 2589 fflush(stderr); 2590 2591 trigger_ready(&auxtrace_snapshot_trigger); 2592 trigger_ready(&switch_output_trigger); 2593 perf_hooks__invoke_record_start(); 2594 2595 /* 2596 * Must write FINISHED_INIT so it will be seen after all other 2597 * synthesized user events, but before any regular events. 2598 */ 2599 err = write_finished_init(rec, false); 2600 if (err < 0) 2601 goto out_child; 2602 2603 for (;;) { 2604 unsigned long long hits = thread->samples; 2605 2606 /* 2607 * rec->evlist->bkw_mmap_state is possible to be 2608 * BKW_MMAP_EMPTY here: when done == true and 2609 * hits != rec->samples in previous round. 2610 * 2611 * evlist__toggle_bkw_mmap ensure we never 2612 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2613 */ 2614 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2615 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2616 2617 if (record__mmap_read_all(rec, false) < 0) { 2618 trigger_error(&auxtrace_snapshot_trigger); 2619 trigger_error(&switch_output_trigger); 2620 err = -1; 2621 goto out_child; 2622 } 2623 2624 if (auxtrace_record__snapshot_started) { 2625 auxtrace_record__snapshot_started = 0; 2626 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2627 record__read_auxtrace_snapshot(rec, false); 2628 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2629 pr_err("AUX area tracing snapshot failed\n"); 2630 err = -1; 2631 goto out_child; 2632 } 2633 } 2634 2635 if (trigger_is_hit(&switch_output_trigger)) { 2636 /* 2637 * If switch_output_trigger is hit, the data in 2638 * overwritable ring buffer should have been collected, 2639 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2640 * 2641 * If SIGUSR2 raise after or during record__mmap_read_all(), 2642 * record__mmap_read_all() didn't collect data from 2643 * overwritable ring buffer. Read again. 2644 */ 2645 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2646 continue; 2647 trigger_ready(&switch_output_trigger); 2648 2649 /* 2650 * Reenable events in overwrite ring buffer after 2651 * record__mmap_read_all(): we should have collected 2652 * data from it. 2653 */ 2654 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2655 2656 if (!quiet) 2657 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2658 record__waking(rec)); 2659 thread->waking = 0; 2660 fd = record__switch_output(rec, false); 2661 if (fd < 0) { 2662 pr_err("Failed to switch to new file\n"); 2663 trigger_error(&switch_output_trigger); 2664 err = fd; 2665 goto out_child; 2666 } 2667 2668 /* re-arm the alarm */ 2669 if (rec->switch_output.time) 2670 alarm(rec->switch_output.time); 2671 } 2672 2673 if (hits == thread->samples) { 2674 if (done || draining) 2675 break; 2676 err = fdarray__poll(&thread->pollfd, -1); 2677 /* 2678 * Propagate error, only if there's any. Ignore positive 2679 * number of returned events and interrupt error. 2680 */ 2681 if (err > 0 || (err < 0 && errno == EINTR)) 2682 err = 0; 2683 thread->waking++; 2684 2685 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2686 record__thread_munmap_filtered, NULL) == 0) 2687 draining = true; 2688 2689 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2690 if (err) 2691 goto out_child; 2692 } 2693 2694 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2695 switch (cmd) { 2696 case EVLIST_CTL_CMD_SNAPSHOT: 2697 hit_auxtrace_snapshot_trigger(rec); 2698 evlist__ctlfd_ack(rec->evlist); 2699 break; 2700 case EVLIST_CTL_CMD_STOP: 2701 done = 1; 2702 break; 2703 case EVLIST_CTL_CMD_ACK: 2704 case EVLIST_CTL_CMD_UNSUPPORTED: 2705 case EVLIST_CTL_CMD_ENABLE: 2706 case EVLIST_CTL_CMD_DISABLE: 2707 case EVLIST_CTL_CMD_EVLIST: 2708 case EVLIST_CTL_CMD_PING: 2709 default: 2710 break; 2711 } 2712 } 2713 2714 err = event_enable_timer__process(rec->evlist->eet); 2715 if (err < 0) 2716 goto out_child; 2717 if (err) { 2718 err = 0; 2719 done = 1; 2720 } 2721 2722 /* 2723 * When perf is starting the traced process, at the end events 2724 * die with the process and we wait for that. Thus no need to 2725 * disable events in this case. 2726 */ 2727 if (done && !disabled && !target__none(&opts->target)) { 2728 trigger_off(&auxtrace_snapshot_trigger); 2729 evlist__disable(rec->evlist); 2730 disabled = true; 2731 } 2732 } 2733 2734 trigger_off(&auxtrace_snapshot_trigger); 2735 trigger_off(&switch_output_trigger); 2736 2737 if (opts->auxtrace_snapshot_on_exit) 2738 record__auxtrace_snapshot_exit(rec); 2739 2740 if (forks && workload_exec_errno) { 2741 char msg[STRERR_BUFSIZE], strevsels[2048]; 2742 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2743 2744 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2745 2746 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2747 strevsels, argv[0], emsg); 2748 err = -1; 2749 goto out_child; 2750 } 2751 2752 if (!quiet) 2753 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2754 record__waking(rec)); 2755 2756 write_finished_init(rec, true); 2757 2758 if (target__none(&rec->opts.target)) 2759 record__synthesize_workload(rec, true); 2760 2761 out_child: 2762 record__stop_threads(rec); 2763 record__mmap_read_all(rec, true); 2764 out_free_threads: 2765 record__free_thread_data(rec); 2766 evlist__finalize_ctlfd(rec->evlist); 2767 record__aio_mmap_read_sync(rec); 2768 2769 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2770 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2771 session->header.env.comp_ratio = ratio + 0.5; 2772 } 2773 2774 if (forks) { 2775 int exit_status; 2776 2777 if (!child_finished) 2778 kill(rec->evlist->workload.pid, SIGTERM); 2779 2780 wait(&exit_status); 2781 2782 if (err < 0) 2783 status = err; 2784 else if (WIFEXITED(exit_status)) 2785 status = WEXITSTATUS(exit_status); 2786 else if (WIFSIGNALED(exit_status)) 2787 signr = WTERMSIG(exit_status); 2788 } else 2789 status = err; 2790 2791 if (rec->off_cpu) 2792 rec->bytes_written += off_cpu_write(rec->session); 2793 2794 record__read_lost_samples(rec); 2795 record__synthesize(rec, true); 2796 /* this will be recalculated during process_buildids() */ 2797 rec->samples = 0; 2798 2799 if (!err) { 2800 if (!rec->timestamp_filename) { 2801 record__finish_output(rec); 2802 } else { 2803 fd = record__switch_output(rec, true); 2804 if (fd < 0) { 2805 status = fd; 2806 goto out_delete_session; 2807 } 2808 } 2809 } 2810 2811 perf_hooks__invoke_record_end(); 2812 2813 if (!err && !quiet) { 2814 char samples[128]; 2815 const char *postfix = rec->timestamp_filename ? 2816 ".<timestamp>" : ""; 2817 2818 if (rec->samples && !rec->opts.full_auxtrace) 2819 scnprintf(samples, sizeof(samples), 2820 " (%" PRIu64 " samples)", rec->samples); 2821 else 2822 samples[0] = '\0'; 2823 2824 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2825 perf_data__size(data) / 1024.0 / 1024.0, 2826 data->path, postfix, samples); 2827 if (ratio) { 2828 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2829 rec->session->bytes_transferred / 1024.0 / 1024.0, 2830 ratio); 2831 } 2832 fprintf(stderr, " ]\n"); 2833 } 2834 2835 out_delete_session: 2836 #ifdef HAVE_EVENTFD_SUPPORT 2837 if (done_fd >= 0) 2838 close(done_fd); 2839 #endif 2840 zstd_fini(&session->zstd_data); 2841 perf_session__delete(session); 2842 2843 if (!opts->no_bpf_event) 2844 evlist__stop_sb_thread(rec->sb_evlist); 2845 return status; 2846 } 2847 2848 static void callchain_debug(struct callchain_param *callchain) 2849 { 2850 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2851 2852 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2853 2854 if (callchain->record_mode == CALLCHAIN_DWARF) 2855 pr_debug("callchain: stack dump size %d\n", 2856 callchain->dump_size); 2857 } 2858 2859 int record_opts__parse_callchain(struct record_opts *record, 2860 struct callchain_param *callchain, 2861 const char *arg, bool unset) 2862 { 2863 int ret; 2864 callchain->enabled = !unset; 2865 2866 /* --no-call-graph */ 2867 if (unset) { 2868 callchain->record_mode = CALLCHAIN_NONE; 2869 pr_debug("callchain: disabled\n"); 2870 return 0; 2871 } 2872 2873 ret = parse_callchain_record_opt(arg, callchain); 2874 if (!ret) { 2875 /* Enable data address sampling for DWARF unwind. */ 2876 if (callchain->record_mode == CALLCHAIN_DWARF) 2877 record->sample_address = true; 2878 callchain_debug(callchain); 2879 } 2880 2881 return ret; 2882 } 2883 2884 int record_parse_callchain_opt(const struct option *opt, 2885 const char *arg, 2886 int unset) 2887 { 2888 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2889 } 2890 2891 int record_callchain_opt(const struct option *opt, 2892 const char *arg __maybe_unused, 2893 int unset __maybe_unused) 2894 { 2895 struct callchain_param *callchain = opt->value; 2896 2897 callchain->enabled = true; 2898 2899 if (callchain->record_mode == CALLCHAIN_NONE) 2900 callchain->record_mode = CALLCHAIN_FP; 2901 2902 callchain_debug(callchain); 2903 return 0; 2904 } 2905 2906 static int perf_record_config(const char *var, const char *value, void *cb) 2907 { 2908 struct record *rec = cb; 2909 2910 if (!strcmp(var, "record.build-id")) { 2911 if (!strcmp(value, "cache")) 2912 rec->no_buildid_cache = false; 2913 else if (!strcmp(value, "no-cache")) 2914 rec->no_buildid_cache = true; 2915 else if (!strcmp(value, "skip")) 2916 rec->no_buildid = true; 2917 else if (!strcmp(value, "mmap")) 2918 rec->buildid_mmap = true; 2919 else 2920 return -1; 2921 return 0; 2922 } 2923 if (!strcmp(var, "record.call-graph")) { 2924 var = "call-graph.record-mode"; 2925 return perf_default_config(var, value, cb); 2926 } 2927 #ifdef HAVE_AIO_SUPPORT 2928 if (!strcmp(var, "record.aio")) { 2929 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2930 if (!rec->opts.nr_cblocks) 2931 rec->opts.nr_cblocks = nr_cblocks_default; 2932 } 2933 #endif 2934 if (!strcmp(var, "record.debuginfod")) { 2935 rec->debuginfod.urls = strdup(value); 2936 if (!rec->debuginfod.urls) 2937 return -ENOMEM; 2938 rec->debuginfod.set = true; 2939 } 2940 2941 return 0; 2942 } 2943 2944 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2945 { 2946 struct record *rec = (struct record *)opt->value; 2947 2948 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2949 } 2950 2951 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2952 { 2953 struct record_opts *opts = (struct record_opts *)opt->value; 2954 2955 if (unset || !str) 2956 return 0; 2957 2958 if (!strcasecmp(str, "node")) 2959 opts->affinity = PERF_AFFINITY_NODE; 2960 else if (!strcasecmp(str, "cpu")) 2961 opts->affinity = PERF_AFFINITY_CPU; 2962 2963 return 0; 2964 } 2965 2966 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2967 { 2968 mask->nbits = nr_bits; 2969 mask->bits = bitmap_zalloc(mask->nbits); 2970 if (!mask->bits) 2971 return -ENOMEM; 2972 2973 return 0; 2974 } 2975 2976 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2977 { 2978 bitmap_free(mask->bits); 2979 mask->nbits = 0; 2980 } 2981 2982 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2983 { 2984 int ret; 2985 2986 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2987 if (ret) { 2988 mask->affinity.bits = NULL; 2989 return ret; 2990 } 2991 2992 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2993 if (ret) { 2994 record__mmap_cpu_mask_free(&mask->maps); 2995 mask->maps.bits = NULL; 2996 } 2997 2998 return ret; 2999 } 3000 3001 static void record__thread_mask_free(struct thread_mask *mask) 3002 { 3003 record__mmap_cpu_mask_free(&mask->maps); 3004 record__mmap_cpu_mask_free(&mask->affinity); 3005 } 3006 3007 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3008 { 3009 int s; 3010 struct record_opts *opts = opt->value; 3011 3012 if (unset || !str || !strlen(str)) { 3013 opts->threads_spec = THREAD_SPEC__CPU; 3014 } else { 3015 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3016 if (s == THREAD_SPEC__USER) { 3017 opts->threads_user_spec = strdup(str); 3018 if (!opts->threads_user_spec) 3019 return -ENOMEM; 3020 opts->threads_spec = THREAD_SPEC__USER; 3021 break; 3022 } 3023 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3024 opts->threads_spec = s; 3025 break; 3026 } 3027 } 3028 } 3029 3030 if (opts->threads_spec == THREAD_SPEC__USER) 3031 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3032 else 3033 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3034 3035 return 0; 3036 } 3037 3038 static int parse_output_max_size(const struct option *opt, 3039 const char *str, int unset) 3040 { 3041 unsigned long *s = (unsigned long *)opt->value; 3042 static struct parse_tag tags_size[] = { 3043 { .tag = 'B', .mult = 1 }, 3044 { .tag = 'K', .mult = 1 << 10 }, 3045 { .tag = 'M', .mult = 1 << 20 }, 3046 { .tag = 'G', .mult = 1 << 30 }, 3047 { .tag = 0 }, 3048 }; 3049 unsigned long val; 3050 3051 if (unset) { 3052 *s = 0; 3053 return 0; 3054 } 3055 3056 val = parse_tag_value(str, tags_size); 3057 if (val != (unsigned long) -1) { 3058 *s = val; 3059 return 0; 3060 } 3061 3062 return -1; 3063 } 3064 3065 static int record__parse_mmap_pages(const struct option *opt, 3066 const char *str, 3067 int unset __maybe_unused) 3068 { 3069 struct record_opts *opts = opt->value; 3070 char *s, *p; 3071 unsigned int mmap_pages; 3072 int ret; 3073 3074 if (!str) 3075 return -EINVAL; 3076 3077 s = strdup(str); 3078 if (!s) 3079 return -ENOMEM; 3080 3081 p = strchr(s, ','); 3082 if (p) 3083 *p = '\0'; 3084 3085 if (*s) { 3086 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3087 if (ret) 3088 goto out_free; 3089 opts->mmap_pages = mmap_pages; 3090 } 3091 3092 if (!p) { 3093 ret = 0; 3094 goto out_free; 3095 } 3096 3097 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3098 if (ret) 3099 goto out_free; 3100 3101 opts->auxtrace_mmap_pages = mmap_pages; 3102 3103 out_free: 3104 free(s); 3105 return ret; 3106 } 3107 3108 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3109 { 3110 } 3111 3112 static int parse_control_option(const struct option *opt, 3113 const char *str, 3114 int unset __maybe_unused) 3115 { 3116 struct record_opts *opts = opt->value; 3117 3118 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3119 } 3120 3121 static void switch_output_size_warn(struct record *rec) 3122 { 3123 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3124 struct switch_output *s = &rec->switch_output; 3125 3126 wakeup_size /= 2; 3127 3128 if (s->size < wakeup_size) { 3129 char buf[100]; 3130 3131 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3132 pr_warning("WARNING: switch-output data size lower than " 3133 "wakeup kernel buffer size (%s) " 3134 "expect bigger perf.data sizes\n", buf); 3135 } 3136 } 3137 3138 static int switch_output_setup(struct record *rec) 3139 { 3140 struct switch_output *s = &rec->switch_output; 3141 static struct parse_tag tags_size[] = { 3142 { .tag = 'B', .mult = 1 }, 3143 { .tag = 'K', .mult = 1 << 10 }, 3144 { .tag = 'M', .mult = 1 << 20 }, 3145 { .tag = 'G', .mult = 1 << 30 }, 3146 { .tag = 0 }, 3147 }; 3148 static struct parse_tag tags_time[] = { 3149 { .tag = 's', .mult = 1 }, 3150 { .tag = 'm', .mult = 60 }, 3151 { .tag = 'h', .mult = 60*60 }, 3152 { .tag = 'd', .mult = 60*60*24 }, 3153 { .tag = 0 }, 3154 }; 3155 unsigned long val; 3156 3157 /* 3158 * If we're using --switch-output-events, then we imply its 3159 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3160 * thread to its parent. 3161 */ 3162 if (rec->switch_output_event_set) { 3163 if (record__threads_enabled(rec)) { 3164 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3165 return 0; 3166 } 3167 goto do_signal; 3168 } 3169 3170 if (!s->set) 3171 return 0; 3172 3173 if (record__threads_enabled(rec)) { 3174 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3175 return 0; 3176 } 3177 3178 if (!strcmp(s->str, "signal")) { 3179 do_signal: 3180 s->signal = true; 3181 pr_debug("switch-output with SIGUSR2 signal\n"); 3182 goto enabled; 3183 } 3184 3185 val = parse_tag_value(s->str, tags_size); 3186 if (val != (unsigned long) -1) { 3187 s->size = val; 3188 pr_debug("switch-output with %s size threshold\n", s->str); 3189 goto enabled; 3190 } 3191 3192 val = parse_tag_value(s->str, tags_time); 3193 if (val != (unsigned long) -1) { 3194 s->time = val; 3195 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3196 s->str, s->time); 3197 goto enabled; 3198 } 3199 3200 return -1; 3201 3202 enabled: 3203 rec->timestamp_filename = true; 3204 s->enabled = true; 3205 3206 if (s->size && !rec->opts.no_buffering) 3207 switch_output_size_warn(rec); 3208 3209 return 0; 3210 } 3211 3212 static const char * const __record_usage[] = { 3213 "perf record [<options>] [<command>]", 3214 "perf record [<options>] -- <command> [<options>]", 3215 NULL 3216 }; 3217 const char * const *record_usage = __record_usage; 3218 3219 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3220 struct perf_sample *sample, struct machine *machine) 3221 { 3222 /* 3223 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3224 * no need to add them twice. 3225 */ 3226 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3227 return 0; 3228 return perf_event__process_mmap(tool, event, sample, machine); 3229 } 3230 3231 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3232 struct perf_sample *sample, struct machine *machine) 3233 { 3234 /* 3235 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3236 * no need to add them twice. 3237 */ 3238 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3239 return 0; 3240 3241 return perf_event__process_mmap2(tool, event, sample, machine); 3242 } 3243 3244 static int process_timestamp_boundary(struct perf_tool *tool, 3245 union perf_event *event __maybe_unused, 3246 struct perf_sample *sample, 3247 struct machine *machine __maybe_unused) 3248 { 3249 struct record *rec = container_of(tool, struct record, tool); 3250 3251 set_timestamp_boundary(rec, sample->time); 3252 return 0; 3253 } 3254 3255 static int parse_record_synth_option(const struct option *opt, 3256 const char *str, 3257 int unset __maybe_unused) 3258 { 3259 struct record_opts *opts = opt->value; 3260 char *p = strdup(str); 3261 3262 if (p == NULL) 3263 return -1; 3264 3265 opts->synth = parse_synth_opt(p); 3266 free(p); 3267 3268 if (opts->synth < 0) { 3269 pr_err("Invalid synth option: %s\n", str); 3270 return -1; 3271 } 3272 return 0; 3273 } 3274 3275 /* 3276 * XXX Ideally would be local to cmd_record() and passed to a record__new 3277 * because we need to have access to it in record__exit, that is called 3278 * after cmd_record() exits, but since record_options need to be accessible to 3279 * builtin-script, leave it here. 3280 * 3281 * At least we don't ouch it in all the other functions here directly. 3282 * 3283 * Just say no to tons of global variables, sigh. 3284 */ 3285 static struct record record = { 3286 .opts = { 3287 .sample_time = true, 3288 .mmap_pages = UINT_MAX, 3289 .user_freq = UINT_MAX, 3290 .user_interval = ULLONG_MAX, 3291 .freq = 4000, 3292 .target = { 3293 .uses_mmap = true, 3294 .default_per_cpu = true, 3295 }, 3296 .mmap_flush = MMAP_FLUSH_DEFAULT, 3297 .nr_threads_synthesize = 1, 3298 .ctl_fd = -1, 3299 .ctl_fd_ack = -1, 3300 .synth = PERF_SYNTH_ALL, 3301 }, 3302 .tool = { 3303 .sample = process_sample_event, 3304 .fork = perf_event__process_fork, 3305 .exit = perf_event__process_exit, 3306 .comm = perf_event__process_comm, 3307 .namespaces = perf_event__process_namespaces, 3308 .mmap = build_id__process_mmap, 3309 .mmap2 = build_id__process_mmap2, 3310 .itrace_start = process_timestamp_boundary, 3311 .aux = process_timestamp_boundary, 3312 .ordered_events = true, 3313 }, 3314 }; 3315 3316 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3317 "\n\t\t\t\tDefault: fp"; 3318 3319 static bool dry_run; 3320 3321 /* 3322 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3323 * with it and switch to use the library functions in perf_evlist that came 3324 * from builtin-record.c, i.e. use record_opts, 3325 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3326 * using pipes, etc. 3327 */ 3328 static struct option __record_options[] = { 3329 OPT_CALLBACK('e', "event", &record.evlist, "event", 3330 "event selector. use 'perf list' to list available events", 3331 parse_events_option), 3332 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3333 "event filter", parse_filter), 3334 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3335 NULL, "don't record events from perf itself", 3336 exclude_perf), 3337 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3338 "record events on existing process id"), 3339 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3340 "record events on existing thread id"), 3341 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3342 "collect data with this RT SCHED_FIFO priority"), 3343 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3344 "collect data without buffering"), 3345 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3346 "collect raw sample records from all opened counters"), 3347 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3348 "system-wide collection from all CPUs"), 3349 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3350 "list of cpus to monitor"), 3351 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3352 OPT_STRING('o', "output", &record.data.path, "file", 3353 "output file name"), 3354 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3355 &record.opts.no_inherit_set, 3356 "child tasks do not inherit counters"), 3357 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3358 "synthesize non-sample events at the end of output"), 3359 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3360 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3361 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3362 "Fail if the specified frequency can't be used"), 3363 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3364 "profile at this frequency", 3365 record__parse_freq), 3366 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3367 "number of mmap data pages and AUX area tracing mmap pages", 3368 record__parse_mmap_pages), 3369 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3370 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3371 record__mmap_flush_parse), 3372 OPT_BOOLEAN(0, "group", &record.opts.group, 3373 "put the counters into a counter group"), 3374 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3375 NULL, "enables call-graph recording" , 3376 &record_callchain_opt), 3377 OPT_CALLBACK(0, "call-graph", &record.opts, 3378 "record_mode[,record_size]", record_callchain_help, 3379 &record_parse_callchain_opt), 3380 OPT_INCR('v', "verbose", &verbose, 3381 "be more verbose (show counter open errors, etc)"), 3382 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 3383 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3384 "per thread counts"), 3385 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3386 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3387 "Record the sample physical addresses"), 3388 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3389 "Record the sampled data address data page size"), 3390 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3391 "Record the sampled code address (ip) page size"), 3392 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3393 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3394 "Record the sample identifier"), 3395 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3396 &record.opts.sample_time_set, 3397 "Record the sample timestamps"), 3398 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3399 "Record the sample period"), 3400 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3401 "don't sample"), 3402 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3403 &record.no_buildid_cache_set, 3404 "do not update the buildid cache"), 3405 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3406 &record.no_buildid_set, 3407 "do not collect buildids in perf.data"), 3408 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3409 "monitor event in cgroup name only", 3410 parse_cgroups), 3411 OPT_CALLBACK('D', "delay", &record, "ms", 3412 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3413 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3414 record__parse_event_enable_time), 3415 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3416 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3417 "user to profile"), 3418 3419 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3420 "branch any", "sample any taken branches", 3421 parse_branch_stack), 3422 3423 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3424 "branch filter mask", "branch stack filter modes", 3425 parse_branch_stack), 3426 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3427 "sample by weight (on special events only)"), 3428 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3429 "sample transaction flags (special events only)"), 3430 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3431 "use per-thread mmaps"), 3432 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3433 "sample selected machine registers on interrupt," 3434 " use '-I?' to list register names", parse_intr_regs), 3435 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3436 "sample selected machine registers on interrupt," 3437 " use '--user-regs=?' to list register names", parse_user_regs), 3438 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3439 "Record running/enabled time of read (:S) events"), 3440 OPT_CALLBACK('k', "clockid", &record.opts, 3441 "clockid", "clockid to use for events, see clock_gettime()", 3442 parse_clockid), 3443 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3444 "opts", "AUX area tracing Snapshot Mode", ""), 3445 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3446 "opts", "sample AUX area", ""), 3447 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3448 "per thread proc mmap processing timeout in ms"), 3449 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3450 "Record namespaces events"), 3451 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3452 "Record cgroup events"), 3453 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3454 &record.opts.record_switch_events_set, 3455 "Record context switch events"), 3456 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3457 "Configure all used events to run in kernel space.", 3458 PARSE_OPT_EXCLUSIVE), 3459 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3460 "Configure all used events to run in user space.", 3461 PARSE_OPT_EXCLUSIVE), 3462 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3463 "collect kernel callchains"), 3464 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3465 "collect user callchains"), 3466 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3467 "clang binary to use for compiling BPF scriptlets"), 3468 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3469 "options passed to clang when compiling BPF scriptlets"), 3470 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3471 "file", "vmlinux pathname"), 3472 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3473 "Record build-id of all DSOs regardless of hits"), 3474 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3475 "Record build-id in map events"), 3476 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3477 "append timestamp to output filename"), 3478 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3479 "Record timestamp boundary (time of first/last samples)"), 3480 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3481 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3482 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3483 "signal"), 3484 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3485 "switch output event selector. use 'perf list' to list available events", 3486 parse_events_option_new_evlist), 3487 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3488 "Limit number of switch output generated files"), 3489 OPT_BOOLEAN(0, "dry-run", &dry_run, 3490 "Parse options then exit"), 3491 #ifdef HAVE_AIO_SUPPORT 3492 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3493 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3494 record__aio_parse), 3495 #endif 3496 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3497 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3498 record__parse_affinity), 3499 #ifdef HAVE_ZSTD_SUPPORT 3500 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3501 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3502 record__parse_comp_level), 3503 #endif 3504 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3505 "size", "Limit the maximum size of the output file", parse_output_max_size), 3506 OPT_UINTEGER(0, "num-thread-synthesize", 3507 &record.opts.nr_threads_synthesize, 3508 "number of threads to run for event synthesis"), 3509 #ifdef HAVE_LIBPFM 3510 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3511 "libpfm4 event selector. use 'perf list' to list available events", 3512 parse_libpfm_events_option), 3513 #endif 3514 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3515 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3516 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3517 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3518 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3519 parse_control_option), 3520 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3521 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3522 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3523 &record.debuginfod.set, "debuginfod urls", 3524 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3525 "system"), 3526 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3527 "write collected trace data into several data files using parallel threads", 3528 record__parse_threads), 3529 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3530 OPT_END() 3531 }; 3532 3533 struct option *record_options = __record_options; 3534 3535 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3536 { 3537 struct perf_cpu cpu; 3538 int idx; 3539 3540 if (cpu_map__is_dummy(cpus)) 3541 return 0; 3542 3543 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3544 if (cpu.cpu == -1) 3545 continue; 3546 /* Return ENODEV is input cpu is greater than max cpu */ 3547 if ((unsigned long)cpu.cpu > mask->nbits) 3548 return -ENODEV; 3549 set_bit(cpu.cpu, mask->bits); 3550 } 3551 3552 return 0; 3553 } 3554 3555 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3556 { 3557 struct perf_cpu_map *cpus; 3558 3559 cpus = perf_cpu_map__new(mask_spec); 3560 if (!cpus) 3561 return -ENOMEM; 3562 3563 bitmap_zero(mask->bits, mask->nbits); 3564 if (record__mmap_cpu_mask_init(mask, cpus)) 3565 return -ENODEV; 3566 3567 perf_cpu_map__put(cpus); 3568 3569 return 0; 3570 } 3571 3572 static void record__free_thread_masks(struct record *rec, int nr_threads) 3573 { 3574 int t; 3575 3576 if (rec->thread_masks) 3577 for (t = 0; t < nr_threads; t++) 3578 record__thread_mask_free(&rec->thread_masks[t]); 3579 3580 zfree(&rec->thread_masks); 3581 } 3582 3583 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3584 { 3585 int t, ret; 3586 3587 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3588 if (!rec->thread_masks) { 3589 pr_err("Failed to allocate thread masks\n"); 3590 return -ENOMEM; 3591 } 3592 3593 for (t = 0; t < nr_threads; t++) { 3594 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3595 if (ret) { 3596 pr_err("Failed to allocate thread masks[%d]\n", t); 3597 goto out_free; 3598 } 3599 } 3600 3601 return 0; 3602 3603 out_free: 3604 record__free_thread_masks(rec, nr_threads); 3605 3606 return ret; 3607 } 3608 3609 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3610 { 3611 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3612 3613 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3614 if (ret) 3615 return ret; 3616 3617 rec->nr_threads = nr_cpus; 3618 pr_debug("nr_threads: %d\n", rec->nr_threads); 3619 3620 for (t = 0; t < rec->nr_threads; t++) { 3621 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3622 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3623 if (verbose) { 3624 pr_debug("thread_masks[%d]: ", t); 3625 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3626 pr_debug("thread_masks[%d]: ", t); 3627 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3628 } 3629 } 3630 3631 return 0; 3632 } 3633 3634 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3635 const char **maps_spec, const char **affinity_spec, 3636 u32 nr_spec) 3637 { 3638 u32 s; 3639 int ret = 0, t = 0; 3640 struct mmap_cpu_mask cpus_mask; 3641 struct thread_mask thread_mask, full_mask, *thread_masks; 3642 3643 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3644 if (ret) { 3645 pr_err("Failed to allocate CPUs mask\n"); 3646 return ret; 3647 } 3648 3649 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3650 if (ret) { 3651 pr_err("Failed to init cpu mask\n"); 3652 goto out_free_cpu_mask; 3653 } 3654 3655 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3656 if (ret) { 3657 pr_err("Failed to allocate full mask\n"); 3658 goto out_free_cpu_mask; 3659 } 3660 3661 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3662 if (ret) { 3663 pr_err("Failed to allocate thread mask\n"); 3664 goto out_free_full_and_cpu_masks; 3665 } 3666 3667 for (s = 0; s < nr_spec; s++) { 3668 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3669 if (ret) { 3670 pr_err("Failed to initialize maps thread mask\n"); 3671 goto out_free; 3672 } 3673 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3674 if (ret) { 3675 pr_err("Failed to initialize affinity thread mask\n"); 3676 goto out_free; 3677 } 3678 3679 /* ignore invalid CPUs but do not allow empty masks */ 3680 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3681 cpus_mask.bits, thread_mask.maps.nbits)) { 3682 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3683 ret = -EINVAL; 3684 goto out_free; 3685 } 3686 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3687 cpus_mask.bits, thread_mask.affinity.nbits)) { 3688 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3689 ret = -EINVAL; 3690 goto out_free; 3691 } 3692 3693 /* do not allow intersection with other masks (full_mask) */ 3694 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3695 thread_mask.maps.nbits)) { 3696 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3697 ret = -EINVAL; 3698 goto out_free; 3699 } 3700 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3701 thread_mask.affinity.nbits)) { 3702 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3703 ret = -EINVAL; 3704 goto out_free; 3705 } 3706 3707 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3708 thread_mask.maps.bits, full_mask.maps.nbits); 3709 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3710 thread_mask.affinity.bits, full_mask.maps.nbits); 3711 3712 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3713 if (!thread_masks) { 3714 pr_err("Failed to reallocate thread masks\n"); 3715 ret = -ENOMEM; 3716 goto out_free; 3717 } 3718 rec->thread_masks = thread_masks; 3719 rec->thread_masks[t] = thread_mask; 3720 if (verbose) { 3721 pr_debug("thread_masks[%d]: ", t); 3722 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3723 pr_debug("thread_masks[%d]: ", t); 3724 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3725 } 3726 t++; 3727 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3728 if (ret) { 3729 pr_err("Failed to allocate thread mask\n"); 3730 goto out_free_full_and_cpu_masks; 3731 } 3732 } 3733 rec->nr_threads = t; 3734 pr_debug("nr_threads: %d\n", rec->nr_threads); 3735 if (!rec->nr_threads) 3736 ret = -EINVAL; 3737 3738 out_free: 3739 record__thread_mask_free(&thread_mask); 3740 out_free_full_and_cpu_masks: 3741 record__thread_mask_free(&full_mask); 3742 out_free_cpu_mask: 3743 record__mmap_cpu_mask_free(&cpus_mask); 3744 3745 return ret; 3746 } 3747 3748 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3749 { 3750 int ret; 3751 struct cpu_topology *topo; 3752 3753 topo = cpu_topology__new(); 3754 if (!topo) { 3755 pr_err("Failed to allocate CPU topology\n"); 3756 return -ENOMEM; 3757 } 3758 3759 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3760 topo->core_cpus_list, topo->core_cpus_lists); 3761 cpu_topology__delete(topo); 3762 3763 return ret; 3764 } 3765 3766 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3767 { 3768 int ret; 3769 struct cpu_topology *topo; 3770 3771 topo = cpu_topology__new(); 3772 if (!topo) { 3773 pr_err("Failed to allocate CPU topology\n"); 3774 return -ENOMEM; 3775 } 3776 3777 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3778 topo->package_cpus_list, topo->package_cpus_lists); 3779 cpu_topology__delete(topo); 3780 3781 return ret; 3782 } 3783 3784 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3785 { 3786 u32 s; 3787 int ret; 3788 const char **spec; 3789 struct numa_topology *topo; 3790 3791 topo = numa_topology__new(); 3792 if (!topo) { 3793 pr_err("Failed to allocate NUMA topology\n"); 3794 return -ENOMEM; 3795 } 3796 3797 spec = zalloc(topo->nr * sizeof(char *)); 3798 if (!spec) { 3799 pr_err("Failed to allocate NUMA spec\n"); 3800 ret = -ENOMEM; 3801 goto out_delete_topo; 3802 } 3803 for (s = 0; s < topo->nr; s++) 3804 spec[s] = topo->nodes[s].cpus; 3805 3806 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3807 3808 zfree(&spec); 3809 3810 out_delete_topo: 3811 numa_topology__delete(topo); 3812 3813 return ret; 3814 } 3815 3816 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3817 { 3818 int t, ret; 3819 u32 s, nr_spec = 0; 3820 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3821 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3822 3823 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3824 spec = strtok_r(user_spec, ":", &spec_ptr); 3825 if (spec == NULL) 3826 break; 3827 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3828 mask = strtok_r(spec, "/", &mask_ptr); 3829 if (mask == NULL) 3830 break; 3831 pr_debug2(" maps mask: %s\n", mask); 3832 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3833 if (!tmp_spec) { 3834 pr_err("Failed to reallocate maps spec\n"); 3835 ret = -ENOMEM; 3836 goto out_free; 3837 } 3838 maps_spec = tmp_spec; 3839 maps_spec[nr_spec] = dup_mask = strdup(mask); 3840 if (!maps_spec[nr_spec]) { 3841 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3842 ret = -ENOMEM; 3843 goto out_free; 3844 } 3845 mask = strtok_r(NULL, "/", &mask_ptr); 3846 if (mask == NULL) { 3847 pr_err("Invalid thread maps or affinity specs\n"); 3848 ret = -EINVAL; 3849 goto out_free; 3850 } 3851 pr_debug2(" affinity mask: %s\n", mask); 3852 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3853 if (!tmp_spec) { 3854 pr_err("Failed to reallocate affinity spec\n"); 3855 ret = -ENOMEM; 3856 goto out_free; 3857 } 3858 affinity_spec = tmp_spec; 3859 affinity_spec[nr_spec] = strdup(mask); 3860 if (!affinity_spec[nr_spec]) { 3861 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3862 ret = -ENOMEM; 3863 goto out_free; 3864 } 3865 dup_mask = NULL; 3866 nr_spec++; 3867 } 3868 3869 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3870 (const char **)affinity_spec, nr_spec); 3871 3872 out_free: 3873 free(dup_mask); 3874 for (s = 0; s < nr_spec; s++) { 3875 if (maps_spec) 3876 free(maps_spec[s]); 3877 if (affinity_spec) 3878 free(affinity_spec[s]); 3879 } 3880 free(affinity_spec); 3881 free(maps_spec); 3882 3883 return ret; 3884 } 3885 3886 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3887 { 3888 int ret; 3889 3890 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3891 if (ret) 3892 return ret; 3893 3894 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3895 return -ENODEV; 3896 3897 rec->nr_threads = 1; 3898 3899 return 0; 3900 } 3901 3902 static int record__init_thread_masks(struct record *rec) 3903 { 3904 int ret = 0; 3905 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3906 3907 if (!record__threads_enabled(rec)) 3908 return record__init_thread_default_masks(rec, cpus); 3909 3910 if (evlist__per_thread(rec->evlist)) { 3911 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3912 return -EINVAL; 3913 } 3914 3915 switch (rec->opts.threads_spec) { 3916 case THREAD_SPEC__CPU: 3917 ret = record__init_thread_cpu_masks(rec, cpus); 3918 break; 3919 case THREAD_SPEC__CORE: 3920 ret = record__init_thread_core_masks(rec, cpus); 3921 break; 3922 case THREAD_SPEC__PACKAGE: 3923 ret = record__init_thread_package_masks(rec, cpus); 3924 break; 3925 case THREAD_SPEC__NUMA: 3926 ret = record__init_thread_numa_masks(rec, cpus); 3927 break; 3928 case THREAD_SPEC__USER: 3929 ret = record__init_thread_user_masks(rec, cpus); 3930 break; 3931 default: 3932 break; 3933 } 3934 3935 return ret; 3936 } 3937 3938 int cmd_record(int argc, const char **argv) 3939 { 3940 int err; 3941 struct record *rec = &record; 3942 char errbuf[BUFSIZ]; 3943 3944 setlocale(LC_ALL, ""); 3945 3946 #ifndef HAVE_LIBBPF_SUPPORT 3947 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3948 set_nobuild('\0', "clang-path", true); 3949 set_nobuild('\0', "clang-opt", true); 3950 # undef set_nobuild 3951 #endif 3952 3953 #ifndef HAVE_BPF_PROLOGUE 3954 # if !defined (HAVE_DWARF_SUPPORT) 3955 # define REASON "NO_DWARF=1" 3956 # elif !defined (HAVE_LIBBPF_SUPPORT) 3957 # define REASON "NO_LIBBPF=1" 3958 # else 3959 # define REASON "this architecture doesn't support BPF prologue" 3960 # endif 3961 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3962 set_nobuild('\0', "vmlinux", true); 3963 # undef set_nobuild 3964 # undef REASON 3965 #endif 3966 3967 #ifndef HAVE_BPF_SKEL 3968 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3969 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3970 # undef set_nobuild 3971 #endif 3972 3973 rec->opts.affinity = PERF_AFFINITY_SYS; 3974 3975 rec->evlist = evlist__new(); 3976 if (rec->evlist == NULL) 3977 return -ENOMEM; 3978 3979 err = perf_config(perf_record_config, rec); 3980 if (err) 3981 return err; 3982 3983 argc = parse_options(argc, argv, record_options, record_usage, 3984 PARSE_OPT_STOP_AT_NON_OPTION); 3985 if (quiet) 3986 perf_quiet_option(); 3987 3988 err = symbol__validate_sym_arguments(); 3989 if (err) 3990 return err; 3991 3992 perf_debuginfod_setup(&record.debuginfod); 3993 3994 /* Make system wide (-a) the default target. */ 3995 if (!argc && target__none(&rec->opts.target)) 3996 rec->opts.target.system_wide = true; 3997 3998 if (nr_cgroups && !rec->opts.target.system_wide) { 3999 usage_with_options_msg(record_usage, record_options, 4000 "cgroup monitoring only available in system-wide mode"); 4001 4002 } 4003 4004 if (rec->buildid_mmap) { 4005 if (!perf_can_record_build_id()) { 4006 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4007 err = -EINVAL; 4008 goto out_opts; 4009 } 4010 pr_debug("Enabling build id in mmap2 events.\n"); 4011 /* Enable mmap build id synthesizing. */ 4012 symbol_conf.buildid_mmap2 = true; 4013 /* Enable perf_event_attr::build_id bit. */ 4014 rec->opts.build_id = true; 4015 /* Disable build id cache. */ 4016 rec->no_buildid = true; 4017 } 4018 4019 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4020 pr_err("Kernel has no cgroup sampling support.\n"); 4021 err = -EINVAL; 4022 goto out_opts; 4023 } 4024 4025 if (rec->opts.kcore) 4026 rec->opts.text_poke = true; 4027 4028 if (rec->opts.kcore || record__threads_enabled(rec)) 4029 rec->data.is_dir = true; 4030 4031 if (record__threads_enabled(rec)) { 4032 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4033 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4034 goto out_opts; 4035 } 4036 if (record__aio_enabled(rec)) { 4037 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4038 goto out_opts; 4039 } 4040 } 4041 4042 if (rec->opts.comp_level != 0) { 4043 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4044 rec->no_buildid = true; 4045 } 4046 4047 if (rec->opts.record_switch_events && 4048 !perf_can_record_switch_events()) { 4049 ui__error("kernel does not support recording context switch events\n"); 4050 parse_options_usage(record_usage, record_options, "switch-events", 0); 4051 err = -EINVAL; 4052 goto out_opts; 4053 } 4054 4055 if (switch_output_setup(rec)) { 4056 parse_options_usage(record_usage, record_options, "switch-output", 0); 4057 err = -EINVAL; 4058 goto out_opts; 4059 } 4060 4061 if (rec->switch_output.time) { 4062 signal(SIGALRM, alarm_sig_handler); 4063 alarm(rec->switch_output.time); 4064 } 4065 4066 if (rec->switch_output.num_files) { 4067 rec->switch_output.filenames = calloc(sizeof(char *), 4068 rec->switch_output.num_files); 4069 if (!rec->switch_output.filenames) { 4070 err = -EINVAL; 4071 goto out_opts; 4072 } 4073 } 4074 4075 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4076 rec->timestamp_filename = false; 4077 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4078 } 4079 4080 /* 4081 * Allow aliases to facilitate the lookup of symbols for address 4082 * filters. Refer to auxtrace_parse_filters(). 4083 */ 4084 symbol_conf.allow_aliases = true; 4085 4086 symbol__init(NULL); 4087 4088 err = record__auxtrace_init(rec); 4089 if (err) 4090 goto out; 4091 4092 if (dry_run) 4093 goto out; 4094 4095 err = bpf__setup_stdout(rec->evlist); 4096 if (err) { 4097 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 4098 pr_err("ERROR: Setup BPF stdout failed: %s\n", 4099 errbuf); 4100 goto out; 4101 } 4102 4103 err = -ENOMEM; 4104 4105 if (rec->no_buildid_cache || rec->no_buildid) { 4106 disable_buildid_cache(); 4107 } else if (rec->switch_output.enabled) { 4108 /* 4109 * In 'perf record --switch-output', disable buildid 4110 * generation by default to reduce data file switching 4111 * overhead. Still generate buildid if they are required 4112 * explicitly using 4113 * 4114 * perf record --switch-output --no-no-buildid \ 4115 * --no-no-buildid-cache 4116 * 4117 * Following code equals to: 4118 * 4119 * if ((rec->no_buildid || !rec->no_buildid_set) && 4120 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4121 * disable_buildid_cache(); 4122 */ 4123 bool disable = true; 4124 4125 if (rec->no_buildid_set && !rec->no_buildid) 4126 disable = false; 4127 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4128 disable = false; 4129 if (disable) { 4130 rec->no_buildid = true; 4131 rec->no_buildid_cache = true; 4132 disable_buildid_cache(); 4133 } 4134 } 4135 4136 if (record.opts.overwrite) 4137 record.opts.tail_synthesize = true; 4138 4139 if (rec->evlist->core.nr_entries == 0) { 4140 if (perf_pmu__has_hybrid()) { 4141 err = evlist__add_default_hybrid(rec->evlist, 4142 !record.opts.no_samples); 4143 } else { 4144 err = __evlist__add_default(rec->evlist, 4145 !record.opts.no_samples); 4146 } 4147 4148 if (err < 0) { 4149 pr_err("Not enough memory for event selector list\n"); 4150 goto out; 4151 } 4152 } 4153 4154 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4155 rec->opts.no_inherit = true; 4156 4157 err = target__validate(&rec->opts.target); 4158 if (err) { 4159 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4160 ui__warning("%s\n", errbuf); 4161 } 4162 4163 err = target__parse_uid(&rec->opts.target); 4164 if (err) { 4165 int saved_errno = errno; 4166 4167 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4168 ui__error("%s", errbuf); 4169 4170 err = -saved_errno; 4171 goto out; 4172 } 4173 4174 /* Enable ignoring missing threads when -u/-p option is defined. */ 4175 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4176 4177 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 4178 pr_err("failed to use cpu list %s\n", 4179 rec->opts.target.cpu_list); 4180 goto out; 4181 } 4182 4183 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 4184 4185 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4186 arch__add_leaf_frame_record_opts(&rec->opts); 4187 4188 err = -ENOMEM; 4189 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4190 if (rec->opts.target.pid != NULL) { 4191 pr_err("Couldn't create thread/CPU maps: %s\n", 4192 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4193 goto out; 4194 } 4195 else 4196 usage_with_options(record_usage, record_options); 4197 } 4198 4199 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4200 if (err) 4201 goto out; 4202 4203 /* 4204 * We take all buildids when the file contains 4205 * AUX area tracing data because we do not decode the 4206 * trace because it would take too long. 4207 */ 4208 if (rec->opts.full_auxtrace) 4209 rec->buildid_all = true; 4210 4211 if (rec->opts.text_poke) { 4212 err = record__config_text_poke(rec->evlist); 4213 if (err) { 4214 pr_err("record__config_text_poke failed, error %d\n", err); 4215 goto out; 4216 } 4217 } 4218 4219 if (rec->off_cpu) { 4220 err = record__config_off_cpu(rec); 4221 if (err) { 4222 pr_err("record__config_off_cpu failed, error %d\n", err); 4223 goto out; 4224 } 4225 } 4226 4227 if (record_opts__config(&rec->opts)) { 4228 err = -EINVAL; 4229 goto out; 4230 } 4231 4232 err = record__init_thread_masks(rec); 4233 if (err) { 4234 pr_err("Failed to initialize parallel data streaming masks\n"); 4235 goto out; 4236 } 4237 4238 if (rec->opts.nr_cblocks > nr_cblocks_max) 4239 rec->opts.nr_cblocks = nr_cblocks_max; 4240 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4241 4242 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4243 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4244 4245 if (rec->opts.comp_level > comp_level_max) 4246 rec->opts.comp_level = comp_level_max; 4247 pr_debug("comp level: %d\n", rec->opts.comp_level); 4248 4249 err = __cmd_record(&record, argc, argv); 4250 out: 4251 evlist__delete(rec->evlist); 4252 symbol__exit(); 4253 auxtrace_record__free(rec->itr); 4254 out_opts: 4255 record__free_thread_masks(rec, rec->nr_threads); 4256 rec->nr_threads = 0; 4257 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4258 return err; 4259 } 4260 4261 static void snapshot_sig_handler(int sig __maybe_unused) 4262 { 4263 struct record *rec = &record; 4264 4265 hit_auxtrace_snapshot_trigger(rec); 4266 4267 if (switch_output_signal(rec)) 4268 trigger_hit(&switch_output_trigger); 4269 } 4270 4271 static void alarm_sig_handler(int sig __maybe_unused) 4272 { 4273 struct record *rec = &record; 4274 4275 if (switch_output_time(rec)) 4276 trigger_hit(&switch_output_trigger); 4277 } 4278