1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/mutex.h" 25 #include "util/target.h" 26 #include "util/session.h" 27 #include "util/tool.h" 28 #include "util/symbol.h" 29 #include "util/record.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/perf_api_probe.h" 39 #include "util/llvm-utils.h" 40 #include "util/bpf-loader.h" 41 #include "util/trigger.h" 42 #include "util/perf-hooks.h" 43 #include "util/cpu-set-sched.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/clockid.h" 51 #include "util/pmu-hybrid.h" 52 #include "util/evlist-hybrid.h" 53 #include "util/off_cpu.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 struct perf_data data; 157 struct auxtrace_record *itr; 158 struct evlist *evlist; 159 struct perf_session *session; 160 struct evlist *sb_evlist; 161 pthread_t thread_id; 162 int realtime_prio; 163 bool switch_output_event_set; 164 bool no_buildid; 165 bool no_buildid_set; 166 bool no_buildid_cache; 167 bool no_buildid_cache_set; 168 bool buildid_all; 169 bool buildid_mmap; 170 bool timestamp_filename; 171 bool timestamp_boundary; 172 bool off_cpu; 173 struct switch_output switch_output; 174 unsigned long long samples; 175 unsigned long output_max_size; /* = 0: unlimited */ 176 struct perf_debuginfod debuginfod; 177 int nr_threads; 178 struct thread_mask *thread_masks; 179 struct record_thread *thread_data; 180 struct pollfd_index_map *index_map; 181 size_t index_map_sz; 182 size_t index_map_cnt; 183 }; 184 185 static volatile int done; 186 187 static volatile int auxtrace_record__snapshot_started; 188 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 189 static DEFINE_TRIGGER(switch_output_trigger); 190 191 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 192 "SYS", "NODE", "CPU" 193 }; 194 195 #ifndef HAVE_GETTID 196 static inline pid_t gettid(void) 197 { 198 return (pid_t)syscall(__NR_gettid); 199 } 200 #endif 201 202 static int record__threads_enabled(struct record *rec) 203 { 204 return rec->opts.threads_spec; 205 } 206 207 static bool switch_output_signal(struct record *rec) 208 { 209 return rec->switch_output.signal && 210 trigger_is_ready(&switch_output_trigger); 211 } 212 213 static bool switch_output_size(struct record *rec) 214 { 215 return rec->switch_output.size && 216 trigger_is_ready(&switch_output_trigger) && 217 (rec->bytes_written >= rec->switch_output.size); 218 } 219 220 static bool switch_output_time(struct record *rec) 221 { 222 return rec->switch_output.time && 223 trigger_is_ready(&switch_output_trigger); 224 } 225 226 static u64 record__bytes_written(struct record *rec) 227 { 228 int t; 229 u64 bytes_written = rec->bytes_written; 230 struct record_thread *thread_data = rec->thread_data; 231 232 for (t = 0; t < rec->nr_threads; t++) 233 bytes_written += thread_data[t].bytes_written; 234 235 return bytes_written; 236 } 237 238 static bool record__output_max_size_exceeded(struct record *rec) 239 { 240 return rec->output_max_size && 241 (record__bytes_written(rec) >= rec->output_max_size); 242 } 243 244 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 245 void *bf, size_t size) 246 { 247 struct perf_data_file *file = &rec->session->data->file; 248 249 if (map && map->file) 250 file = map->file; 251 252 if (perf_data_file__write(file, bf, size) < 0) { 253 pr_err("failed to write perf data, error: %m\n"); 254 return -1; 255 } 256 257 if (map && map->file) 258 thread->bytes_written += size; 259 else 260 rec->bytes_written += size; 261 262 if (record__output_max_size_exceeded(rec) && !done) { 263 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 264 " stopping session ]\n", 265 record__bytes_written(rec) >> 10); 266 done = 1; 267 } 268 269 if (switch_output_size(rec)) 270 trigger_hit(&switch_output_trigger); 271 272 return 0; 273 } 274 275 static int record__aio_enabled(struct record *rec); 276 static int record__comp_enabled(struct record *rec); 277 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 278 void *dst, size_t dst_size, void *src, size_t src_size); 279 280 #ifdef HAVE_AIO_SUPPORT 281 static int record__aio_write(struct aiocb *cblock, int trace_fd, 282 void *buf, size_t size, off_t off) 283 { 284 int rc; 285 286 cblock->aio_fildes = trace_fd; 287 cblock->aio_buf = buf; 288 cblock->aio_nbytes = size; 289 cblock->aio_offset = off; 290 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 291 292 do { 293 rc = aio_write(cblock); 294 if (rc == 0) { 295 break; 296 } else if (errno != EAGAIN) { 297 cblock->aio_fildes = -1; 298 pr_err("failed to queue perf data, error: %m\n"); 299 break; 300 } 301 } while (1); 302 303 return rc; 304 } 305 306 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 307 { 308 void *rem_buf; 309 off_t rem_off; 310 size_t rem_size; 311 int rc, aio_errno; 312 ssize_t aio_ret, written; 313 314 aio_errno = aio_error(cblock); 315 if (aio_errno == EINPROGRESS) 316 return 0; 317 318 written = aio_ret = aio_return(cblock); 319 if (aio_ret < 0) { 320 if (aio_errno != EINTR) 321 pr_err("failed to write perf data, error: %m\n"); 322 written = 0; 323 } 324 325 rem_size = cblock->aio_nbytes - written; 326 327 if (rem_size == 0) { 328 cblock->aio_fildes = -1; 329 /* 330 * md->refcount is incremented in record__aio_pushfn() for 331 * every aio write request started in record__aio_push() so 332 * decrement it because the request is now complete. 333 */ 334 perf_mmap__put(&md->core); 335 rc = 1; 336 } else { 337 /* 338 * aio write request may require restart with the 339 * reminder if the kernel didn't write whole 340 * chunk at once. 341 */ 342 rem_off = cblock->aio_offset + written; 343 rem_buf = (void *)(cblock->aio_buf + written); 344 record__aio_write(cblock, cblock->aio_fildes, 345 rem_buf, rem_size, rem_off); 346 rc = 0; 347 } 348 349 return rc; 350 } 351 352 static int record__aio_sync(struct mmap *md, bool sync_all) 353 { 354 struct aiocb **aiocb = md->aio.aiocb; 355 struct aiocb *cblocks = md->aio.cblocks; 356 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 357 int i, do_suspend; 358 359 do { 360 do_suspend = 0; 361 for (i = 0; i < md->aio.nr_cblocks; ++i) { 362 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 363 if (sync_all) 364 aiocb[i] = NULL; 365 else 366 return i; 367 } else { 368 /* 369 * Started aio write is not complete yet 370 * so it has to be waited before the 371 * next allocation. 372 */ 373 aiocb[i] = &cblocks[i]; 374 do_suspend = 1; 375 } 376 } 377 if (!do_suspend) 378 return -1; 379 380 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 381 if (!(errno == EAGAIN || errno == EINTR)) 382 pr_err("failed to sync perf data, error: %m\n"); 383 } 384 } while (1); 385 } 386 387 struct record_aio { 388 struct record *rec; 389 void *data; 390 size_t size; 391 }; 392 393 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 394 { 395 struct record_aio *aio = to; 396 397 /* 398 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 399 * to release space in the kernel buffer as fast as possible, calling 400 * perf_mmap__consume() from perf_mmap__push() function. 401 * 402 * That lets the kernel to proceed with storing more profiling data into 403 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 404 * 405 * Coping can be done in two steps in case the chunk of profiling data 406 * crosses the upper bound of the kernel buffer. In this case we first move 407 * part of data from map->start till the upper bound and then the reminder 408 * from the beginning of the kernel buffer till the end of the data chunk. 409 */ 410 411 if (record__comp_enabled(aio->rec)) { 412 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 413 mmap__mmap_len(map) - aio->size, 414 buf, size); 415 } else { 416 memcpy(aio->data + aio->size, buf, size); 417 } 418 419 if (!aio->size) { 420 /* 421 * Increment map->refcount to guard map->aio.data[] buffer 422 * from premature deallocation because map object can be 423 * released earlier than aio write request started on 424 * map->aio.data[] buffer is complete. 425 * 426 * perf_mmap__put() is done at record__aio_complete() 427 * after started aio request completion or at record__aio_push() 428 * if the request failed to start. 429 */ 430 perf_mmap__get(&map->core); 431 } 432 433 aio->size += size; 434 435 return size; 436 } 437 438 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 439 { 440 int ret, idx; 441 int trace_fd = rec->session->data->file.fd; 442 struct record_aio aio = { .rec = rec, .size = 0 }; 443 444 /* 445 * Call record__aio_sync() to wait till map->aio.data[] buffer 446 * becomes available after previous aio write operation. 447 */ 448 449 idx = record__aio_sync(map, false); 450 aio.data = map->aio.data[idx]; 451 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 452 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 453 return ret; 454 455 rec->samples++; 456 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 457 if (!ret) { 458 *off += aio.size; 459 rec->bytes_written += aio.size; 460 if (switch_output_size(rec)) 461 trigger_hit(&switch_output_trigger); 462 } else { 463 /* 464 * Decrement map->refcount incremented in record__aio_pushfn() 465 * back if record__aio_write() operation failed to start, otherwise 466 * map->refcount is decremented in record__aio_complete() after 467 * aio write operation finishes successfully. 468 */ 469 perf_mmap__put(&map->core); 470 } 471 472 return ret; 473 } 474 475 static off_t record__aio_get_pos(int trace_fd) 476 { 477 return lseek(trace_fd, 0, SEEK_CUR); 478 } 479 480 static void record__aio_set_pos(int trace_fd, off_t pos) 481 { 482 lseek(trace_fd, pos, SEEK_SET); 483 } 484 485 static void record__aio_mmap_read_sync(struct record *rec) 486 { 487 int i; 488 struct evlist *evlist = rec->evlist; 489 struct mmap *maps = evlist->mmap; 490 491 if (!record__aio_enabled(rec)) 492 return; 493 494 for (i = 0; i < evlist->core.nr_mmaps; i++) { 495 struct mmap *map = &maps[i]; 496 497 if (map->core.base) 498 record__aio_sync(map, true); 499 } 500 } 501 502 static int nr_cblocks_default = 1; 503 static int nr_cblocks_max = 4; 504 505 static int record__aio_parse(const struct option *opt, 506 const char *str, 507 int unset) 508 { 509 struct record_opts *opts = (struct record_opts *)opt->value; 510 511 if (unset) { 512 opts->nr_cblocks = 0; 513 } else { 514 if (str) 515 opts->nr_cblocks = strtol(str, NULL, 0); 516 if (!opts->nr_cblocks) 517 opts->nr_cblocks = nr_cblocks_default; 518 } 519 520 return 0; 521 } 522 #else /* HAVE_AIO_SUPPORT */ 523 static int nr_cblocks_max = 0; 524 525 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 526 off_t *off __maybe_unused) 527 { 528 return -1; 529 } 530 531 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 532 { 533 return -1; 534 } 535 536 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 537 { 538 } 539 540 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 541 { 542 } 543 #endif 544 545 static int record__aio_enabled(struct record *rec) 546 { 547 return rec->opts.nr_cblocks > 0; 548 } 549 550 #define MMAP_FLUSH_DEFAULT 1 551 static int record__mmap_flush_parse(const struct option *opt, 552 const char *str, 553 int unset) 554 { 555 int flush_max; 556 struct record_opts *opts = (struct record_opts *)opt->value; 557 static struct parse_tag tags[] = { 558 { .tag = 'B', .mult = 1 }, 559 { .tag = 'K', .mult = 1 << 10 }, 560 { .tag = 'M', .mult = 1 << 20 }, 561 { .tag = 'G', .mult = 1 << 30 }, 562 { .tag = 0 }, 563 }; 564 565 if (unset) 566 return 0; 567 568 if (str) { 569 opts->mmap_flush = parse_tag_value(str, tags); 570 if (opts->mmap_flush == (int)-1) 571 opts->mmap_flush = strtol(str, NULL, 0); 572 } 573 574 if (!opts->mmap_flush) 575 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 576 577 flush_max = evlist__mmap_size(opts->mmap_pages); 578 flush_max /= 4; 579 if (opts->mmap_flush > flush_max) 580 opts->mmap_flush = flush_max; 581 582 return 0; 583 } 584 585 #ifdef HAVE_ZSTD_SUPPORT 586 static unsigned int comp_level_default = 1; 587 588 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 589 { 590 struct record_opts *opts = opt->value; 591 592 if (unset) { 593 opts->comp_level = 0; 594 } else { 595 if (str) 596 opts->comp_level = strtol(str, NULL, 0); 597 if (!opts->comp_level) 598 opts->comp_level = comp_level_default; 599 } 600 601 return 0; 602 } 603 #endif 604 static unsigned int comp_level_max = 22; 605 606 static int record__comp_enabled(struct record *rec) 607 { 608 return rec->opts.comp_level > 0; 609 } 610 611 static int process_synthesized_event(struct perf_tool *tool, 612 union perf_event *event, 613 struct perf_sample *sample __maybe_unused, 614 struct machine *machine __maybe_unused) 615 { 616 struct record *rec = container_of(tool, struct record, tool); 617 return record__write(rec, NULL, event, event->header.size); 618 } 619 620 static struct mutex synth_lock; 621 622 static int process_locked_synthesized_event(struct perf_tool *tool, 623 union perf_event *event, 624 struct perf_sample *sample __maybe_unused, 625 struct machine *machine __maybe_unused) 626 { 627 int ret; 628 629 mutex_lock(&synth_lock); 630 ret = process_synthesized_event(tool, event, sample, machine); 631 mutex_unlock(&synth_lock); 632 return ret; 633 } 634 635 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 636 { 637 struct record *rec = to; 638 639 if (record__comp_enabled(rec)) { 640 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 641 bf = map->data; 642 } 643 644 thread->samples++; 645 return record__write(rec, map, bf, size); 646 } 647 648 static volatile int signr = -1; 649 static volatile int child_finished; 650 #ifdef HAVE_EVENTFD_SUPPORT 651 static int done_fd = -1; 652 #endif 653 654 static void sig_handler(int sig) 655 { 656 if (sig == SIGCHLD) 657 child_finished = 1; 658 else 659 signr = sig; 660 661 done = 1; 662 #ifdef HAVE_EVENTFD_SUPPORT 663 { 664 u64 tmp = 1; 665 /* 666 * It is possible for this signal handler to run after done is checked 667 * in the main loop, but before the perf counter fds are polled. If this 668 * happens, the poll() will continue to wait even though done is set, 669 * and will only break out if either another signal is received, or the 670 * counters are ready for read. To ensure the poll() doesn't sleep when 671 * done is set, use an eventfd (done_fd) to wake up the poll(). 672 */ 673 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 674 pr_err("failed to signal wakeup fd, error: %m\n"); 675 } 676 #endif // HAVE_EVENTFD_SUPPORT 677 } 678 679 static void sigsegv_handler(int sig) 680 { 681 perf_hooks__recover(); 682 sighandler_dump_stack(sig); 683 } 684 685 static void record__sig_exit(void) 686 { 687 if (signr == -1) 688 return; 689 690 signal(signr, SIG_DFL); 691 raise(signr); 692 } 693 694 #ifdef HAVE_AUXTRACE_SUPPORT 695 696 static int record__process_auxtrace(struct perf_tool *tool, 697 struct mmap *map, 698 union perf_event *event, void *data1, 699 size_t len1, void *data2, size_t len2) 700 { 701 struct record *rec = container_of(tool, struct record, tool); 702 struct perf_data *data = &rec->data; 703 size_t padding; 704 u8 pad[8] = {0}; 705 706 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 707 off_t file_offset; 708 int fd = perf_data__fd(data); 709 int err; 710 711 file_offset = lseek(fd, 0, SEEK_CUR); 712 if (file_offset == -1) 713 return -1; 714 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 715 event, file_offset); 716 if (err) 717 return err; 718 } 719 720 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 721 padding = (len1 + len2) & 7; 722 if (padding) 723 padding = 8 - padding; 724 725 record__write(rec, map, event, event->header.size); 726 record__write(rec, map, data1, len1); 727 if (len2) 728 record__write(rec, map, data2, len2); 729 record__write(rec, map, &pad, padding); 730 731 return 0; 732 } 733 734 static int record__auxtrace_mmap_read(struct record *rec, 735 struct mmap *map) 736 { 737 int ret; 738 739 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 740 record__process_auxtrace); 741 if (ret < 0) 742 return ret; 743 744 if (ret) 745 rec->samples++; 746 747 return 0; 748 } 749 750 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 751 struct mmap *map) 752 { 753 int ret; 754 755 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 756 record__process_auxtrace, 757 rec->opts.auxtrace_snapshot_size); 758 if (ret < 0) 759 return ret; 760 761 if (ret) 762 rec->samples++; 763 764 return 0; 765 } 766 767 static int record__auxtrace_read_snapshot_all(struct record *rec) 768 { 769 int i; 770 int rc = 0; 771 772 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 773 struct mmap *map = &rec->evlist->mmap[i]; 774 775 if (!map->auxtrace_mmap.base) 776 continue; 777 778 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 779 rc = -1; 780 goto out; 781 } 782 } 783 out: 784 return rc; 785 } 786 787 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 788 { 789 pr_debug("Recording AUX area tracing snapshot\n"); 790 if (record__auxtrace_read_snapshot_all(rec) < 0) { 791 trigger_error(&auxtrace_snapshot_trigger); 792 } else { 793 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 794 trigger_error(&auxtrace_snapshot_trigger); 795 else 796 trigger_ready(&auxtrace_snapshot_trigger); 797 } 798 } 799 800 static int record__auxtrace_snapshot_exit(struct record *rec) 801 { 802 if (trigger_is_error(&auxtrace_snapshot_trigger)) 803 return 0; 804 805 if (!auxtrace_record__snapshot_started && 806 auxtrace_record__snapshot_start(rec->itr)) 807 return -1; 808 809 record__read_auxtrace_snapshot(rec, true); 810 if (trigger_is_error(&auxtrace_snapshot_trigger)) 811 return -1; 812 813 return 0; 814 } 815 816 static int record__auxtrace_init(struct record *rec) 817 { 818 int err; 819 820 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 821 && record__threads_enabled(rec)) { 822 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 823 return -EINVAL; 824 } 825 826 if (!rec->itr) { 827 rec->itr = auxtrace_record__init(rec->evlist, &err); 828 if (err) 829 return err; 830 } 831 832 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 833 rec->opts.auxtrace_snapshot_opts); 834 if (err) 835 return err; 836 837 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 838 rec->opts.auxtrace_sample_opts); 839 if (err) 840 return err; 841 842 auxtrace_regroup_aux_output(rec->evlist); 843 844 return auxtrace_parse_filters(rec->evlist); 845 } 846 847 #else 848 849 static inline 850 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 851 struct mmap *map __maybe_unused) 852 { 853 return 0; 854 } 855 856 static inline 857 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 858 bool on_exit __maybe_unused) 859 { 860 } 861 862 static inline 863 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 864 { 865 return 0; 866 } 867 868 static inline 869 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 870 { 871 return 0; 872 } 873 874 static int record__auxtrace_init(struct record *rec __maybe_unused) 875 { 876 return 0; 877 } 878 879 #endif 880 881 static int record__config_text_poke(struct evlist *evlist) 882 { 883 struct evsel *evsel; 884 885 /* Nothing to do if text poke is already configured */ 886 evlist__for_each_entry(evlist, evsel) { 887 if (evsel->core.attr.text_poke) 888 return 0; 889 } 890 891 evsel = evlist__add_dummy_on_all_cpus(evlist); 892 if (!evsel) 893 return -ENOMEM; 894 895 evsel->core.attr.text_poke = 1; 896 evsel->core.attr.ksymbol = 1; 897 evsel->immediate = true; 898 evsel__set_sample_bit(evsel, TIME); 899 900 return 0; 901 } 902 903 static int record__config_off_cpu(struct record *rec) 904 { 905 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 906 } 907 908 static bool record__kcore_readable(struct machine *machine) 909 { 910 char kcore[PATH_MAX]; 911 int fd; 912 913 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 914 915 fd = open(kcore, O_RDONLY); 916 if (fd < 0) 917 return false; 918 919 close(fd); 920 921 return true; 922 } 923 924 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 925 { 926 char from_dir[PATH_MAX]; 927 char kcore_dir[PATH_MAX]; 928 int ret; 929 930 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 931 932 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 933 if (ret) 934 return ret; 935 936 return kcore_copy(from_dir, kcore_dir); 937 } 938 939 static void record__thread_data_init_pipes(struct record_thread *thread_data) 940 { 941 thread_data->pipes.msg[0] = -1; 942 thread_data->pipes.msg[1] = -1; 943 thread_data->pipes.ack[0] = -1; 944 thread_data->pipes.ack[1] = -1; 945 } 946 947 static int record__thread_data_open_pipes(struct record_thread *thread_data) 948 { 949 if (pipe(thread_data->pipes.msg)) 950 return -EINVAL; 951 952 if (pipe(thread_data->pipes.ack)) { 953 close(thread_data->pipes.msg[0]); 954 thread_data->pipes.msg[0] = -1; 955 close(thread_data->pipes.msg[1]); 956 thread_data->pipes.msg[1] = -1; 957 return -EINVAL; 958 } 959 960 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 961 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 962 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 963 964 return 0; 965 } 966 967 static void record__thread_data_close_pipes(struct record_thread *thread_data) 968 { 969 if (thread_data->pipes.msg[0] != -1) { 970 close(thread_data->pipes.msg[0]); 971 thread_data->pipes.msg[0] = -1; 972 } 973 if (thread_data->pipes.msg[1] != -1) { 974 close(thread_data->pipes.msg[1]); 975 thread_data->pipes.msg[1] = -1; 976 } 977 if (thread_data->pipes.ack[0] != -1) { 978 close(thread_data->pipes.ack[0]); 979 thread_data->pipes.ack[0] = -1; 980 } 981 if (thread_data->pipes.ack[1] != -1) { 982 close(thread_data->pipes.ack[1]); 983 thread_data->pipes.ack[1] = -1; 984 } 985 } 986 987 static bool evlist__per_thread(struct evlist *evlist) 988 { 989 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 990 } 991 992 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 993 { 994 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 995 struct mmap *mmap = evlist->mmap; 996 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 997 struct perf_cpu_map *cpus = evlist->core.all_cpus; 998 bool per_thread = evlist__per_thread(evlist); 999 1000 if (per_thread) 1001 thread_data->nr_mmaps = nr_mmaps; 1002 else 1003 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1004 thread_data->mask->maps.nbits); 1005 if (mmap) { 1006 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1007 if (!thread_data->maps) 1008 return -ENOMEM; 1009 } 1010 if (overwrite_mmap) { 1011 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1012 if (!thread_data->overwrite_maps) { 1013 zfree(&thread_data->maps); 1014 return -ENOMEM; 1015 } 1016 } 1017 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1018 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1019 1020 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1021 if (per_thread || 1022 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1023 if (thread_data->maps) { 1024 thread_data->maps[tm] = &mmap[m]; 1025 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1026 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1027 } 1028 if (thread_data->overwrite_maps) { 1029 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1030 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1031 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1032 } 1033 tm++; 1034 } 1035 } 1036 1037 return 0; 1038 } 1039 1040 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1041 { 1042 int f, tm, pos; 1043 struct mmap *map, *overwrite_map; 1044 1045 fdarray__init(&thread_data->pollfd, 64); 1046 1047 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1048 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1049 overwrite_map = thread_data->overwrite_maps ? 1050 thread_data->overwrite_maps[tm] : NULL; 1051 1052 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1053 void *ptr = evlist->core.pollfd.priv[f].ptr; 1054 1055 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1056 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1057 &evlist->core.pollfd); 1058 if (pos < 0) 1059 return pos; 1060 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1061 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1062 } 1063 } 1064 } 1065 1066 return 0; 1067 } 1068 1069 static void record__free_thread_data(struct record *rec) 1070 { 1071 int t; 1072 struct record_thread *thread_data = rec->thread_data; 1073 1074 if (thread_data == NULL) 1075 return; 1076 1077 for (t = 0; t < rec->nr_threads; t++) { 1078 record__thread_data_close_pipes(&thread_data[t]); 1079 zfree(&thread_data[t].maps); 1080 zfree(&thread_data[t].overwrite_maps); 1081 fdarray__exit(&thread_data[t].pollfd); 1082 } 1083 1084 zfree(&rec->thread_data); 1085 } 1086 1087 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1088 int evlist_pollfd_index, 1089 int thread_pollfd_index) 1090 { 1091 size_t x = rec->index_map_cnt; 1092 1093 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1094 return -ENOMEM; 1095 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1096 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1097 rec->index_map_cnt += 1; 1098 return 0; 1099 } 1100 1101 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1102 struct evlist *evlist, 1103 struct record_thread *thread_data) 1104 { 1105 struct pollfd *e_entries = evlist->core.pollfd.entries; 1106 struct pollfd *t_entries = thread_data->pollfd.entries; 1107 int err = 0; 1108 size_t i; 1109 1110 for (i = 0; i < rec->index_map_cnt; i++) { 1111 int e_pos = rec->index_map[i].evlist_pollfd_index; 1112 int t_pos = rec->index_map[i].thread_pollfd_index; 1113 1114 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1115 e_entries[e_pos].events != t_entries[t_pos].events) { 1116 pr_err("Thread and evlist pollfd index mismatch\n"); 1117 err = -EINVAL; 1118 continue; 1119 } 1120 e_entries[e_pos].revents = t_entries[t_pos].revents; 1121 } 1122 return err; 1123 } 1124 1125 static int record__dup_non_perf_events(struct record *rec, 1126 struct evlist *evlist, 1127 struct record_thread *thread_data) 1128 { 1129 struct fdarray *fda = &evlist->core.pollfd; 1130 int i, ret; 1131 1132 for (i = 0; i < fda->nr; i++) { 1133 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1134 continue; 1135 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1136 if (ret < 0) { 1137 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1138 return ret; 1139 } 1140 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1141 thread_data, ret, fda->entries[i].fd); 1142 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1143 if (ret < 0) { 1144 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1145 return ret; 1146 } 1147 } 1148 return 0; 1149 } 1150 1151 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1152 { 1153 int t, ret; 1154 struct record_thread *thread_data; 1155 1156 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1157 if (!rec->thread_data) { 1158 pr_err("Failed to allocate thread data\n"); 1159 return -ENOMEM; 1160 } 1161 thread_data = rec->thread_data; 1162 1163 for (t = 0; t < rec->nr_threads; t++) 1164 record__thread_data_init_pipes(&thread_data[t]); 1165 1166 for (t = 0; t < rec->nr_threads; t++) { 1167 thread_data[t].rec = rec; 1168 thread_data[t].mask = &rec->thread_masks[t]; 1169 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1170 if (ret) { 1171 pr_err("Failed to initialize thread[%d] maps\n", t); 1172 goto out_free; 1173 } 1174 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1175 if (ret) { 1176 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1177 goto out_free; 1178 } 1179 if (t) { 1180 thread_data[t].tid = -1; 1181 ret = record__thread_data_open_pipes(&thread_data[t]); 1182 if (ret) { 1183 pr_err("Failed to open thread[%d] communication pipes\n", t); 1184 goto out_free; 1185 } 1186 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1187 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1188 if (ret < 0) { 1189 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1190 goto out_free; 1191 } 1192 thread_data[t].ctlfd_pos = ret; 1193 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1194 thread_data, thread_data[t].ctlfd_pos, 1195 thread_data[t].pipes.msg[0]); 1196 } else { 1197 thread_data[t].tid = gettid(); 1198 1199 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1200 if (ret < 0) 1201 goto out_free; 1202 1203 thread_data[t].ctlfd_pos = -1; /* Not used */ 1204 } 1205 } 1206 1207 return 0; 1208 1209 out_free: 1210 record__free_thread_data(rec); 1211 1212 return ret; 1213 } 1214 1215 static int record__mmap_evlist(struct record *rec, 1216 struct evlist *evlist) 1217 { 1218 int i, ret; 1219 struct record_opts *opts = &rec->opts; 1220 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1221 opts->auxtrace_sample_mode; 1222 char msg[512]; 1223 1224 if (opts->affinity != PERF_AFFINITY_SYS) 1225 cpu__setup_cpunode_map(); 1226 1227 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1228 opts->auxtrace_mmap_pages, 1229 auxtrace_overwrite, 1230 opts->nr_cblocks, opts->affinity, 1231 opts->mmap_flush, opts->comp_level) < 0) { 1232 if (errno == EPERM) { 1233 pr_err("Permission error mapping pages.\n" 1234 "Consider increasing " 1235 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1236 "or try again with a smaller value of -m/--mmap_pages.\n" 1237 "(current value: %u,%u)\n", 1238 opts->mmap_pages, opts->auxtrace_mmap_pages); 1239 return -errno; 1240 } else { 1241 pr_err("failed to mmap with %d (%s)\n", errno, 1242 str_error_r(errno, msg, sizeof(msg))); 1243 if (errno) 1244 return -errno; 1245 else 1246 return -EINVAL; 1247 } 1248 } 1249 1250 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1251 return -1; 1252 1253 ret = record__alloc_thread_data(rec, evlist); 1254 if (ret) 1255 return ret; 1256 1257 if (record__threads_enabled(rec)) { 1258 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1259 if (ret) { 1260 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1261 return ret; 1262 } 1263 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1264 if (evlist->mmap) 1265 evlist->mmap[i].file = &rec->data.dir.files[i]; 1266 if (evlist->overwrite_mmap) 1267 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1268 } 1269 } 1270 1271 return 0; 1272 } 1273 1274 static int record__mmap(struct record *rec) 1275 { 1276 return record__mmap_evlist(rec, rec->evlist); 1277 } 1278 1279 static int record__open(struct record *rec) 1280 { 1281 char msg[BUFSIZ]; 1282 struct evsel *pos; 1283 struct evlist *evlist = rec->evlist; 1284 struct perf_session *session = rec->session; 1285 struct record_opts *opts = &rec->opts; 1286 int rc = 0; 1287 1288 /* 1289 * For initial_delay, system wide or a hybrid system, we need to add a 1290 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1291 * of waiting or event synthesis. 1292 */ 1293 if (opts->initial_delay || target__has_cpu(&opts->target) || 1294 perf_pmu__has_hybrid()) { 1295 pos = evlist__get_tracking_event(evlist); 1296 if (!evsel__is_dummy_event(pos)) { 1297 /* Set up dummy event. */ 1298 if (evlist__add_dummy(evlist)) 1299 return -ENOMEM; 1300 pos = evlist__last(evlist); 1301 evlist__set_tracking_event(evlist, pos); 1302 } 1303 1304 /* 1305 * Enable the dummy event when the process is forked for 1306 * initial_delay, immediately for system wide. 1307 */ 1308 if (opts->initial_delay && !pos->immediate && 1309 !target__has_cpu(&opts->target)) 1310 pos->core.attr.enable_on_exec = 1; 1311 else 1312 pos->immediate = 1; 1313 } 1314 1315 evlist__config(evlist, opts, &callchain_param); 1316 1317 evlist__for_each_entry(evlist, pos) { 1318 try_again: 1319 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1320 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1321 if (verbose > 0) 1322 ui__warning("%s\n", msg); 1323 goto try_again; 1324 } 1325 if ((errno == EINVAL || errno == EBADF) && 1326 pos->core.leader != &pos->core && 1327 pos->weak_group) { 1328 pos = evlist__reset_weak_group(evlist, pos, true); 1329 goto try_again; 1330 } 1331 rc = -errno; 1332 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1333 ui__error("%s\n", msg); 1334 goto out; 1335 } 1336 1337 pos->supported = true; 1338 } 1339 1340 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1341 pr_warning( 1342 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1343 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1344 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1345 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1346 "Samples in kernel modules won't be resolved at all.\n\n" 1347 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1348 "even with a suitable vmlinux or kallsyms file.\n\n"); 1349 } 1350 1351 if (evlist__apply_filters(evlist, &pos)) { 1352 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1353 pos->filter, evsel__name(pos), errno, 1354 str_error_r(errno, msg, sizeof(msg))); 1355 rc = -1; 1356 goto out; 1357 } 1358 1359 rc = record__mmap(rec); 1360 if (rc) 1361 goto out; 1362 1363 session->evlist = evlist; 1364 perf_session__set_id_hdr_size(session); 1365 out: 1366 return rc; 1367 } 1368 1369 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1370 { 1371 if (rec->evlist->first_sample_time == 0) 1372 rec->evlist->first_sample_time = sample_time; 1373 1374 if (sample_time) 1375 rec->evlist->last_sample_time = sample_time; 1376 } 1377 1378 static int process_sample_event(struct perf_tool *tool, 1379 union perf_event *event, 1380 struct perf_sample *sample, 1381 struct evsel *evsel, 1382 struct machine *machine) 1383 { 1384 struct record *rec = container_of(tool, struct record, tool); 1385 1386 set_timestamp_boundary(rec, sample->time); 1387 1388 if (rec->buildid_all) 1389 return 0; 1390 1391 rec->samples++; 1392 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1393 } 1394 1395 static int process_buildids(struct record *rec) 1396 { 1397 struct perf_session *session = rec->session; 1398 1399 if (perf_data__size(&rec->data) == 0) 1400 return 0; 1401 1402 /* 1403 * During this process, it'll load kernel map and replace the 1404 * dso->long_name to a real pathname it found. In this case 1405 * we prefer the vmlinux path like 1406 * /lib/modules/3.16.4/build/vmlinux 1407 * 1408 * rather than build-id path (in debug directory). 1409 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1410 */ 1411 symbol_conf.ignore_vmlinux_buildid = true; 1412 1413 /* 1414 * If --buildid-all is given, it marks all DSO regardless of hits, 1415 * so no need to process samples. But if timestamp_boundary is enabled, 1416 * it still needs to walk on all samples to get the timestamps of 1417 * first/last samples. 1418 */ 1419 if (rec->buildid_all && !rec->timestamp_boundary) 1420 rec->tool.sample = NULL; 1421 1422 return perf_session__process_events(session); 1423 } 1424 1425 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1426 { 1427 int err; 1428 struct perf_tool *tool = data; 1429 /* 1430 *As for guest kernel when processing subcommand record&report, 1431 *we arrange module mmap prior to guest kernel mmap and trigger 1432 *a preload dso because default guest module symbols are loaded 1433 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1434 *method is used to avoid symbol missing when the first addr is 1435 *in module instead of in guest kernel. 1436 */ 1437 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1438 machine); 1439 if (err < 0) 1440 pr_err("Couldn't record guest kernel [%d]'s reference" 1441 " relocation symbol.\n", machine->pid); 1442 1443 /* 1444 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1445 * have no _text sometimes. 1446 */ 1447 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1448 machine); 1449 if (err < 0) 1450 pr_err("Couldn't record guest kernel [%d]'s reference" 1451 " relocation symbol.\n", machine->pid); 1452 } 1453 1454 static struct perf_event_header finished_round_event = { 1455 .size = sizeof(struct perf_event_header), 1456 .type = PERF_RECORD_FINISHED_ROUND, 1457 }; 1458 1459 static struct perf_event_header finished_init_event = { 1460 .size = sizeof(struct perf_event_header), 1461 .type = PERF_RECORD_FINISHED_INIT, 1462 }; 1463 1464 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1465 { 1466 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1467 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1468 thread->mask->affinity.nbits)) { 1469 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1470 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1471 map->affinity_mask.bits, thread->mask->affinity.nbits); 1472 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1473 (cpu_set_t *)thread->mask->affinity.bits); 1474 if (verbose == 2) { 1475 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1476 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1477 } 1478 } 1479 } 1480 1481 static size_t process_comp_header(void *record, size_t increment) 1482 { 1483 struct perf_record_compressed *event = record; 1484 size_t size = sizeof(*event); 1485 1486 if (increment) { 1487 event->header.size += increment; 1488 return increment; 1489 } 1490 1491 event->header.type = PERF_RECORD_COMPRESSED; 1492 event->header.size = size; 1493 1494 return size; 1495 } 1496 1497 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1498 void *dst, size_t dst_size, void *src, size_t src_size) 1499 { 1500 size_t compressed; 1501 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1502 struct zstd_data *zstd_data = &session->zstd_data; 1503 1504 if (map && map->file) 1505 zstd_data = &map->zstd_data; 1506 1507 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1508 max_record_size, process_comp_header); 1509 1510 if (map && map->file) { 1511 thread->bytes_transferred += src_size; 1512 thread->bytes_compressed += compressed; 1513 } else { 1514 session->bytes_transferred += src_size; 1515 session->bytes_compressed += compressed; 1516 } 1517 1518 return compressed; 1519 } 1520 1521 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1522 bool overwrite, bool synch) 1523 { 1524 u64 bytes_written = rec->bytes_written; 1525 int i; 1526 int rc = 0; 1527 int nr_mmaps; 1528 struct mmap **maps; 1529 int trace_fd = rec->data.file.fd; 1530 off_t off = 0; 1531 1532 if (!evlist) 1533 return 0; 1534 1535 nr_mmaps = thread->nr_mmaps; 1536 maps = overwrite ? thread->overwrite_maps : thread->maps; 1537 1538 if (!maps) 1539 return 0; 1540 1541 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1542 return 0; 1543 1544 if (record__aio_enabled(rec)) 1545 off = record__aio_get_pos(trace_fd); 1546 1547 for (i = 0; i < nr_mmaps; i++) { 1548 u64 flush = 0; 1549 struct mmap *map = maps[i]; 1550 1551 if (map->core.base) { 1552 record__adjust_affinity(rec, map); 1553 if (synch) { 1554 flush = map->core.flush; 1555 map->core.flush = 1; 1556 } 1557 if (!record__aio_enabled(rec)) { 1558 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1559 if (synch) 1560 map->core.flush = flush; 1561 rc = -1; 1562 goto out; 1563 } 1564 } else { 1565 if (record__aio_push(rec, map, &off) < 0) { 1566 record__aio_set_pos(trace_fd, off); 1567 if (synch) 1568 map->core.flush = flush; 1569 rc = -1; 1570 goto out; 1571 } 1572 } 1573 if (synch) 1574 map->core.flush = flush; 1575 } 1576 1577 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1578 !rec->opts.auxtrace_sample_mode && 1579 record__auxtrace_mmap_read(rec, map) != 0) { 1580 rc = -1; 1581 goto out; 1582 } 1583 } 1584 1585 if (record__aio_enabled(rec)) 1586 record__aio_set_pos(trace_fd, off); 1587 1588 /* 1589 * Mark the round finished in case we wrote 1590 * at least one event. 1591 * 1592 * No need for round events in directory mode, 1593 * because per-cpu maps and files have data 1594 * sorted by kernel. 1595 */ 1596 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1597 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1598 1599 if (overwrite) 1600 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1601 out: 1602 return rc; 1603 } 1604 1605 static int record__mmap_read_all(struct record *rec, bool synch) 1606 { 1607 int err; 1608 1609 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1610 if (err) 1611 return err; 1612 1613 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1614 } 1615 1616 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1617 void *arg __maybe_unused) 1618 { 1619 struct perf_mmap *map = fda->priv[fd].ptr; 1620 1621 if (map) 1622 perf_mmap__put(map); 1623 } 1624 1625 static void *record__thread(void *arg) 1626 { 1627 enum thread_msg msg = THREAD_MSG__READY; 1628 bool terminate = false; 1629 struct fdarray *pollfd; 1630 int err, ctlfd_pos; 1631 1632 thread = arg; 1633 thread->tid = gettid(); 1634 1635 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1636 if (err == -1) 1637 pr_warning("threads[%d]: failed to notify on start: %s\n", 1638 thread->tid, strerror(errno)); 1639 1640 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1641 1642 pollfd = &thread->pollfd; 1643 ctlfd_pos = thread->ctlfd_pos; 1644 1645 for (;;) { 1646 unsigned long long hits = thread->samples; 1647 1648 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1649 break; 1650 1651 if (hits == thread->samples) { 1652 1653 err = fdarray__poll(pollfd, -1); 1654 /* 1655 * Propagate error, only if there's any. Ignore positive 1656 * number of returned events and interrupt error. 1657 */ 1658 if (err > 0 || (err < 0 && errno == EINTR)) 1659 err = 0; 1660 thread->waking++; 1661 1662 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1663 record__thread_munmap_filtered, NULL) == 0) 1664 break; 1665 } 1666 1667 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1668 terminate = true; 1669 close(thread->pipes.msg[0]); 1670 thread->pipes.msg[0] = -1; 1671 pollfd->entries[ctlfd_pos].fd = -1; 1672 pollfd->entries[ctlfd_pos].events = 0; 1673 } 1674 1675 pollfd->entries[ctlfd_pos].revents = 0; 1676 } 1677 record__mmap_read_all(thread->rec, true); 1678 1679 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1680 if (err == -1) 1681 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1682 thread->tid, strerror(errno)); 1683 1684 return NULL; 1685 } 1686 1687 static void record__init_features(struct record *rec) 1688 { 1689 struct perf_session *session = rec->session; 1690 int feat; 1691 1692 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1693 perf_header__set_feat(&session->header, feat); 1694 1695 if (rec->no_buildid) 1696 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1697 1698 if (!have_tracepoints(&rec->evlist->core.entries)) 1699 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1700 1701 if (!rec->opts.branch_stack) 1702 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1703 1704 if (!rec->opts.full_auxtrace) 1705 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1706 1707 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1708 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1709 1710 if (!rec->opts.use_clockid) 1711 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1712 1713 if (!record__threads_enabled(rec)) 1714 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1715 1716 if (!record__comp_enabled(rec)) 1717 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1718 1719 perf_header__clear_feat(&session->header, HEADER_STAT); 1720 } 1721 1722 static void 1723 record__finish_output(struct record *rec) 1724 { 1725 int i; 1726 struct perf_data *data = &rec->data; 1727 int fd = perf_data__fd(data); 1728 1729 if (data->is_pipe) 1730 return; 1731 1732 rec->session->header.data_size += rec->bytes_written; 1733 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1734 if (record__threads_enabled(rec)) { 1735 for (i = 0; i < data->dir.nr; i++) 1736 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1737 } 1738 1739 if (!rec->no_buildid) { 1740 process_buildids(rec); 1741 1742 if (rec->buildid_all) 1743 dsos__hit_all(rec->session); 1744 } 1745 perf_session__write_header(rec->session, rec->evlist, fd, true); 1746 1747 return; 1748 } 1749 1750 static int record__synthesize_workload(struct record *rec, bool tail) 1751 { 1752 int err; 1753 struct perf_thread_map *thread_map; 1754 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1755 1756 if (rec->opts.tail_synthesize != tail) 1757 return 0; 1758 1759 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1760 if (thread_map == NULL) 1761 return -1; 1762 1763 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1764 process_synthesized_event, 1765 &rec->session->machines.host, 1766 needs_mmap, 1767 rec->opts.sample_address); 1768 perf_thread_map__put(thread_map); 1769 return err; 1770 } 1771 1772 static int write_finished_init(struct record *rec, bool tail) 1773 { 1774 if (rec->opts.tail_synthesize != tail) 1775 return 0; 1776 1777 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1778 } 1779 1780 static int record__synthesize(struct record *rec, bool tail); 1781 1782 static int 1783 record__switch_output(struct record *rec, bool at_exit) 1784 { 1785 struct perf_data *data = &rec->data; 1786 int fd, err; 1787 char *new_filename; 1788 1789 /* Same Size: "2015122520103046"*/ 1790 char timestamp[] = "InvalidTimestamp"; 1791 1792 record__aio_mmap_read_sync(rec); 1793 1794 write_finished_init(rec, true); 1795 1796 record__synthesize(rec, true); 1797 if (target__none(&rec->opts.target)) 1798 record__synthesize_workload(rec, true); 1799 1800 rec->samples = 0; 1801 record__finish_output(rec); 1802 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1803 if (err) { 1804 pr_err("Failed to get current timestamp\n"); 1805 return -EINVAL; 1806 } 1807 1808 fd = perf_data__switch(data, timestamp, 1809 rec->session->header.data_offset, 1810 at_exit, &new_filename); 1811 if (fd >= 0 && !at_exit) { 1812 rec->bytes_written = 0; 1813 rec->session->header.data_size = 0; 1814 } 1815 1816 if (!quiet) 1817 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1818 data->path, timestamp); 1819 1820 if (rec->switch_output.num_files) { 1821 int n = rec->switch_output.cur_file + 1; 1822 1823 if (n >= rec->switch_output.num_files) 1824 n = 0; 1825 rec->switch_output.cur_file = n; 1826 if (rec->switch_output.filenames[n]) { 1827 remove(rec->switch_output.filenames[n]); 1828 zfree(&rec->switch_output.filenames[n]); 1829 } 1830 rec->switch_output.filenames[n] = new_filename; 1831 } else { 1832 free(new_filename); 1833 } 1834 1835 /* Output tracking events */ 1836 if (!at_exit) { 1837 record__synthesize(rec, false); 1838 1839 /* 1840 * In 'perf record --switch-output' without -a, 1841 * record__synthesize() in record__switch_output() won't 1842 * generate tracking events because there's no thread_map 1843 * in evlist. Which causes newly created perf.data doesn't 1844 * contain map and comm information. 1845 * Create a fake thread_map and directly call 1846 * perf_event__synthesize_thread_map() for those events. 1847 */ 1848 if (target__none(&rec->opts.target)) 1849 record__synthesize_workload(rec, false); 1850 write_finished_init(rec, false); 1851 } 1852 return fd; 1853 } 1854 1855 static volatile int workload_exec_errno; 1856 1857 /* 1858 * evlist__prepare_workload will send a SIGUSR1 1859 * if the fork fails, since we asked by setting its 1860 * want_signal to true. 1861 */ 1862 static void workload_exec_failed_signal(int signo __maybe_unused, 1863 siginfo_t *info, 1864 void *ucontext __maybe_unused) 1865 { 1866 workload_exec_errno = info->si_value.sival_int; 1867 done = 1; 1868 child_finished = 1; 1869 } 1870 1871 static void snapshot_sig_handler(int sig); 1872 static void alarm_sig_handler(int sig); 1873 1874 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1875 { 1876 if (evlist) { 1877 if (evlist->mmap && evlist->mmap[0].core.base) 1878 return evlist->mmap[0].core.base; 1879 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1880 return evlist->overwrite_mmap[0].core.base; 1881 } 1882 return NULL; 1883 } 1884 1885 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1886 { 1887 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1888 if (pc) 1889 return pc; 1890 return NULL; 1891 } 1892 1893 static int record__synthesize(struct record *rec, bool tail) 1894 { 1895 struct perf_session *session = rec->session; 1896 struct machine *machine = &session->machines.host; 1897 struct perf_data *data = &rec->data; 1898 struct record_opts *opts = &rec->opts; 1899 struct perf_tool *tool = &rec->tool; 1900 int err = 0; 1901 event_op f = process_synthesized_event; 1902 1903 if (rec->opts.tail_synthesize != tail) 1904 return 0; 1905 1906 if (data->is_pipe) { 1907 err = perf_event__synthesize_for_pipe(tool, session, data, 1908 process_synthesized_event); 1909 if (err < 0) 1910 goto out; 1911 1912 rec->bytes_written += err; 1913 } 1914 1915 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1916 process_synthesized_event, machine); 1917 if (err) 1918 goto out; 1919 1920 /* Synthesize id_index before auxtrace_info */ 1921 err = perf_event__synthesize_id_index(tool, 1922 process_synthesized_event, 1923 session->evlist, machine); 1924 if (err) 1925 goto out; 1926 1927 if (rec->opts.full_auxtrace) { 1928 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1929 session, process_synthesized_event); 1930 if (err) 1931 goto out; 1932 } 1933 1934 if (!evlist__exclude_kernel(rec->evlist)) { 1935 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1936 machine); 1937 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1938 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1939 "Check /proc/kallsyms permission or run as root.\n"); 1940 1941 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1942 machine); 1943 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1944 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1945 "Check /proc/modules permission or run as root.\n"); 1946 } 1947 1948 if (perf_guest) { 1949 machines__process_guests(&session->machines, 1950 perf_event__synthesize_guest_os, tool); 1951 } 1952 1953 err = perf_event__synthesize_extra_attr(&rec->tool, 1954 rec->evlist, 1955 process_synthesized_event, 1956 data->is_pipe); 1957 if (err) 1958 goto out; 1959 1960 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1961 process_synthesized_event, 1962 NULL); 1963 if (err < 0) { 1964 pr_err("Couldn't synthesize thread map.\n"); 1965 return err; 1966 } 1967 1968 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 1969 process_synthesized_event, NULL); 1970 if (err < 0) { 1971 pr_err("Couldn't synthesize cpu map.\n"); 1972 return err; 1973 } 1974 1975 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1976 machine, opts); 1977 if (err < 0) { 1978 pr_warning("Couldn't synthesize bpf events.\n"); 1979 err = 0; 1980 } 1981 1982 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1983 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1984 machine); 1985 if (err < 0) { 1986 pr_warning("Couldn't synthesize cgroup events.\n"); 1987 err = 0; 1988 } 1989 } 1990 1991 if (rec->opts.nr_threads_synthesize > 1) { 1992 mutex_init(&synth_lock); 1993 perf_set_multithreaded(); 1994 f = process_locked_synthesized_event; 1995 } 1996 1997 if (rec->opts.synth & PERF_SYNTH_TASK) { 1998 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1999 2000 err = __machine__synthesize_threads(machine, tool, &opts->target, 2001 rec->evlist->core.threads, 2002 f, needs_mmap, opts->sample_address, 2003 rec->opts.nr_threads_synthesize); 2004 } 2005 2006 if (rec->opts.nr_threads_synthesize > 1) { 2007 perf_set_singlethreaded(); 2008 mutex_destroy(&synth_lock); 2009 } 2010 2011 out: 2012 return err; 2013 } 2014 2015 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2016 { 2017 struct record *rec = data; 2018 pthread_kill(rec->thread_id, SIGUSR2); 2019 return 0; 2020 } 2021 2022 static int record__setup_sb_evlist(struct record *rec) 2023 { 2024 struct record_opts *opts = &rec->opts; 2025 2026 if (rec->sb_evlist != NULL) { 2027 /* 2028 * We get here if --switch-output-event populated the 2029 * sb_evlist, so associate a callback that will send a SIGUSR2 2030 * to the main thread. 2031 */ 2032 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2033 rec->thread_id = pthread_self(); 2034 } 2035 #ifdef HAVE_LIBBPF_SUPPORT 2036 if (!opts->no_bpf_event) { 2037 if (rec->sb_evlist == NULL) { 2038 rec->sb_evlist = evlist__new(); 2039 2040 if (rec->sb_evlist == NULL) { 2041 pr_err("Couldn't create side band evlist.\n."); 2042 return -1; 2043 } 2044 } 2045 2046 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2047 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2048 return -1; 2049 } 2050 } 2051 #endif 2052 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2053 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2054 opts->no_bpf_event = true; 2055 } 2056 2057 return 0; 2058 } 2059 2060 static int record__init_clock(struct record *rec) 2061 { 2062 struct perf_session *session = rec->session; 2063 struct timespec ref_clockid; 2064 struct timeval ref_tod; 2065 u64 ref; 2066 2067 if (!rec->opts.use_clockid) 2068 return 0; 2069 2070 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2071 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2072 2073 session->header.env.clock.clockid = rec->opts.clockid; 2074 2075 if (gettimeofday(&ref_tod, NULL) != 0) { 2076 pr_err("gettimeofday failed, cannot set reference time.\n"); 2077 return -1; 2078 } 2079 2080 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2081 pr_err("clock_gettime failed, cannot set reference time.\n"); 2082 return -1; 2083 } 2084 2085 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2086 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2087 2088 session->header.env.clock.tod_ns = ref; 2089 2090 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2091 (u64) ref_clockid.tv_nsec; 2092 2093 session->header.env.clock.clockid_ns = ref; 2094 return 0; 2095 } 2096 2097 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2098 { 2099 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2100 trigger_hit(&auxtrace_snapshot_trigger); 2101 auxtrace_record__snapshot_started = 1; 2102 if (auxtrace_record__snapshot_start(rec->itr)) 2103 trigger_error(&auxtrace_snapshot_trigger); 2104 } 2105 } 2106 2107 static void record__uniquify_name(struct record *rec) 2108 { 2109 struct evsel *pos; 2110 struct evlist *evlist = rec->evlist; 2111 char *new_name; 2112 int ret; 2113 2114 if (!perf_pmu__has_hybrid()) 2115 return; 2116 2117 evlist__for_each_entry(evlist, pos) { 2118 if (!evsel__is_hybrid(pos)) 2119 continue; 2120 2121 if (strchr(pos->name, '/')) 2122 continue; 2123 2124 ret = asprintf(&new_name, "%s/%s/", 2125 pos->pmu_name, pos->name); 2126 if (ret) { 2127 free(pos->name); 2128 pos->name = new_name; 2129 } 2130 } 2131 } 2132 2133 static int record__terminate_thread(struct record_thread *thread_data) 2134 { 2135 int err; 2136 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2137 pid_t tid = thread_data->tid; 2138 2139 close(thread_data->pipes.msg[1]); 2140 thread_data->pipes.msg[1] = -1; 2141 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2142 if (err > 0) 2143 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2144 else 2145 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2146 thread->tid, tid); 2147 2148 return 0; 2149 } 2150 2151 static int record__start_threads(struct record *rec) 2152 { 2153 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2154 struct record_thread *thread_data = rec->thread_data; 2155 sigset_t full, mask; 2156 pthread_t handle; 2157 pthread_attr_t attrs; 2158 2159 thread = &thread_data[0]; 2160 2161 if (!record__threads_enabled(rec)) 2162 return 0; 2163 2164 sigfillset(&full); 2165 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2166 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2167 return -1; 2168 } 2169 2170 pthread_attr_init(&attrs); 2171 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2172 2173 for (t = 1; t < nr_threads; t++) { 2174 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2175 2176 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2177 pthread_attr_setaffinity_np(&attrs, 2178 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2179 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2180 #endif 2181 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2182 for (tt = 1; tt < t; tt++) 2183 record__terminate_thread(&thread_data[t]); 2184 pr_err("Failed to start threads: %s\n", strerror(errno)); 2185 ret = -1; 2186 goto out_err; 2187 } 2188 2189 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2190 if (err > 0) 2191 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2192 thread_msg_tags[msg]); 2193 else 2194 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2195 thread->tid, rec->thread_data[t].tid); 2196 } 2197 2198 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2199 (cpu_set_t *)thread->mask->affinity.bits); 2200 2201 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2202 2203 out_err: 2204 pthread_attr_destroy(&attrs); 2205 2206 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2207 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2208 ret = -1; 2209 } 2210 2211 return ret; 2212 } 2213 2214 static int record__stop_threads(struct record *rec) 2215 { 2216 int t; 2217 struct record_thread *thread_data = rec->thread_data; 2218 2219 for (t = 1; t < rec->nr_threads; t++) 2220 record__terminate_thread(&thread_data[t]); 2221 2222 for (t = 0; t < rec->nr_threads; t++) { 2223 rec->samples += thread_data[t].samples; 2224 if (!record__threads_enabled(rec)) 2225 continue; 2226 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2227 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2228 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2229 thread_data[t].samples, thread_data[t].waking); 2230 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2231 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2232 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2233 else 2234 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2235 } 2236 2237 return 0; 2238 } 2239 2240 static unsigned long record__waking(struct record *rec) 2241 { 2242 int t; 2243 unsigned long waking = 0; 2244 struct record_thread *thread_data = rec->thread_data; 2245 2246 for (t = 0; t < rec->nr_threads; t++) 2247 waking += thread_data[t].waking; 2248 2249 return waking; 2250 } 2251 2252 static int __cmd_record(struct record *rec, int argc, const char **argv) 2253 { 2254 int err; 2255 int status = 0; 2256 const bool forks = argc > 0; 2257 struct perf_tool *tool = &rec->tool; 2258 struct record_opts *opts = &rec->opts; 2259 struct perf_data *data = &rec->data; 2260 struct perf_session *session; 2261 bool disabled = false, draining = false; 2262 int fd; 2263 float ratio = 0; 2264 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2265 2266 atexit(record__sig_exit); 2267 signal(SIGCHLD, sig_handler); 2268 signal(SIGINT, sig_handler); 2269 signal(SIGTERM, sig_handler); 2270 signal(SIGSEGV, sigsegv_handler); 2271 2272 if (rec->opts.record_namespaces) 2273 tool->namespace_events = true; 2274 2275 if (rec->opts.record_cgroup) { 2276 #ifdef HAVE_FILE_HANDLE 2277 tool->cgroup_events = true; 2278 #else 2279 pr_err("cgroup tracking is not supported\n"); 2280 return -1; 2281 #endif 2282 } 2283 2284 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2285 signal(SIGUSR2, snapshot_sig_handler); 2286 if (rec->opts.auxtrace_snapshot_mode) 2287 trigger_on(&auxtrace_snapshot_trigger); 2288 if (rec->switch_output.enabled) 2289 trigger_on(&switch_output_trigger); 2290 } else { 2291 signal(SIGUSR2, SIG_IGN); 2292 } 2293 2294 session = perf_session__new(data, tool); 2295 if (IS_ERR(session)) { 2296 pr_err("Perf session creation failed.\n"); 2297 return PTR_ERR(session); 2298 } 2299 2300 if (record__threads_enabled(rec)) { 2301 if (perf_data__is_pipe(&rec->data)) { 2302 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2303 return -1; 2304 } 2305 if (rec->opts.full_auxtrace) { 2306 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2307 return -1; 2308 } 2309 } 2310 2311 fd = perf_data__fd(data); 2312 rec->session = session; 2313 2314 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2315 pr_err("Compression initialization failed.\n"); 2316 return -1; 2317 } 2318 #ifdef HAVE_EVENTFD_SUPPORT 2319 done_fd = eventfd(0, EFD_NONBLOCK); 2320 if (done_fd < 0) { 2321 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2322 status = -1; 2323 goto out_delete_session; 2324 } 2325 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2326 if (err < 0) { 2327 pr_err("Failed to add wakeup eventfd to poll list\n"); 2328 status = err; 2329 goto out_delete_session; 2330 } 2331 #endif // HAVE_EVENTFD_SUPPORT 2332 2333 session->header.env.comp_type = PERF_COMP_ZSTD; 2334 session->header.env.comp_level = rec->opts.comp_level; 2335 2336 if (rec->opts.kcore && 2337 !record__kcore_readable(&session->machines.host)) { 2338 pr_err("ERROR: kcore is not readable.\n"); 2339 return -1; 2340 } 2341 2342 if (record__init_clock(rec)) 2343 return -1; 2344 2345 record__init_features(rec); 2346 2347 if (forks) { 2348 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2349 workload_exec_failed_signal); 2350 if (err < 0) { 2351 pr_err("Couldn't run the workload!\n"); 2352 status = err; 2353 goto out_delete_session; 2354 } 2355 } 2356 2357 /* 2358 * If we have just single event and are sending data 2359 * through pipe, we need to force the ids allocation, 2360 * because we synthesize event name through the pipe 2361 * and need the id for that. 2362 */ 2363 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2364 rec->opts.sample_id = true; 2365 2366 record__uniquify_name(rec); 2367 2368 if (record__open(rec) != 0) { 2369 err = -1; 2370 goto out_free_threads; 2371 } 2372 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2373 2374 if (rec->opts.kcore) { 2375 err = record__kcore_copy(&session->machines.host, data); 2376 if (err) { 2377 pr_err("ERROR: Failed to copy kcore\n"); 2378 goto out_free_threads; 2379 } 2380 } 2381 2382 err = bpf__apply_obj_config(); 2383 if (err) { 2384 char errbuf[BUFSIZ]; 2385 2386 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2387 pr_err("ERROR: Apply config to BPF failed: %s\n", 2388 errbuf); 2389 goto out_free_threads; 2390 } 2391 2392 /* 2393 * Normally perf_session__new would do this, but it doesn't have the 2394 * evlist. 2395 */ 2396 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2397 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2398 rec->tool.ordered_events = false; 2399 } 2400 2401 if (!rec->evlist->core.nr_groups) 2402 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2403 2404 if (data->is_pipe) { 2405 err = perf_header__write_pipe(fd); 2406 if (err < 0) 2407 goto out_free_threads; 2408 } else { 2409 err = perf_session__write_header(session, rec->evlist, fd, false); 2410 if (err < 0) 2411 goto out_free_threads; 2412 } 2413 2414 err = -1; 2415 if (!rec->no_buildid 2416 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2417 pr_err("Couldn't generate buildids. " 2418 "Use --no-buildid to profile anyway.\n"); 2419 goto out_free_threads; 2420 } 2421 2422 err = record__setup_sb_evlist(rec); 2423 if (err) 2424 goto out_free_threads; 2425 2426 err = record__synthesize(rec, false); 2427 if (err < 0) 2428 goto out_free_threads; 2429 2430 if (rec->realtime_prio) { 2431 struct sched_param param; 2432 2433 param.sched_priority = rec->realtime_prio; 2434 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2435 pr_err("Could not set realtime priority.\n"); 2436 err = -1; 2437 goto out_free_threads; 2438 } 2439 } 2440 2441 if (record__start_threads(rec)) 2442 goto out_free_threads; 2443 2444 /* 2445 * When perf is starting the traced process, all the events 2446 * (apart from group members) have enable_on_exec=1 set, 2447 * so don't spoil it by prematurely enabling them. 2448 */ 2449 if (!target__none(&opts->target) && !opts->initial_delay) 2450 evlist__enable(rec->evlist); 2451 2452 /* 2453 * Let the child rip 2454 */ 2455 if (forks) { 2456 struct machine *machine = &session->machines.host; 2457 union perf_event *event; 2458 pid_t tgid; 2459 2460 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2461 if (event == NULL) { 2462 err = -ENOMEM; 2463 goto out_child; 2464 } 2465 2466 /* 2467 * Some H/W events are generated before COMM event 2468 * which is emitted during exec(), so perf script 2469 * cannot see a correct process name for those events. 2470 * Synthesize COMM event to prevent it. 2471 */ 2472 tgid = perf_event__synthesize_comm(tool, event, 2473 rec->evlist->workload.pid, 2474 process_synthesized_event, 2475 machine); 2476 free(event); 2477 2478 if (tgid == -1) 2479 goto out_child; 2480 2481 event = malloc(sizeof(event->namespaces) + 2482 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2483 machine->id_hdr_size); 2484 if (event == NULL) { 2485 err = -ENOMEM; 2486 goto out_child; 2487 } 2488 2489 /* 2490 * Synthesize NAMESPACES event for the command specified. 2491 */ 2492 perf_event__synthesize_namespaces(tool, event, 2493 rec->evlist->workload.pid, 2494 tgid, process_synthesized_event, 2495 machine); 2496 free(event); 2497 2498 evlist__start_workload(rec->evlist); 2499 } 2500 2501 if (opts->initial_delay) { 2502 pr_info(EVLIST_DISABLED_MSG); 2503 if (opts->initial_delay > 0) { 2504 usleep(opts->initial_delay * USEC_PER_MSEC); 2505 evlist__enable(rec->evlist); 2506 pr_info(EVLIST_ENABLED_MSG); 2507 } 2508 } 2509 2510 err = event_enable_timer__start(rec->evlist->eet); 2511 if (err) 2512 goto out_child; 2513 2514 trigger_ready(&auxtrace_snapshot_trigger); 2515 trigger_ready(&switch_output_trigger); 2516 perf_hooks__invoke_record_start(); 2517 2518 /* 2519 * Must write FINISHED_INIT so it will be seen after all other 2520 * synthesized user events, but before any regular events. 2521 */ 2522 err = write_finished_init(rec, false); 2523 if (err < 0) 2524 goto out_child; 2525 2526 for (;;) { 2527 unsigned long long hits = thread->samples; 2528 2529 /* 2530 * rec->evlist->bkw_mmap_state is possible to be 2531 * BKW_MMAP_EMPTY here: when done == true and 2532 * hits != rec->samples in previous round. 2533 * 2534 * evlist__toggle_bkw_mmap ensure we never 2535 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2536 */ 2537 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2538 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2539 2540 if (record__mmap_read_all(rec, false) < 0) { 2541 trigger_error(&auxtrace_snapshot_trigger); 2542 trigger_error(&switch_output_trigger); 2543 err = -1; 2544 goto out_child; 2545 } 2546 2547 if (auxtrace_record__snapshot_started) { 2548 auxtrace_record__snapshot_started = 0; 2549 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2550 record__read_auxtrace_snapshot(rec, false); 2551 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2552 pr_err("AUX area tracing snapshot failed\n"); 2553 err = -1; 2554 goto out_child; 2555 } 2556 } 2557 2558 if (trigger_is_hit(&switch_output_trigger)) { 2559 /* 2560 * If switch_output_trigger is hit, the data in 2561 * overwritable ring buffer should have been collected, 2562 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2563 * 2564 * If SIGUSR2 raise after or during record__mmap_read_all(), 2565 * record__mmap_read_all() didn't collect data from 2566 * overwritable ring buffer. Read again. 2567 */ 2568 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2569 continue; 2570 trigger_ready(&switch_output_trigger); 2571 2572 /* 2573 * Reenable events in overwrite ring buffer after 2574 * record__mmap_read_all(): we should have collected 2575 * data from it. 2576 */ 2577 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2578 2579 if (!quiet) 2580 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2581 record__waking(rec)); 2582 thread->waking = 0; 2583 fd = record__switch_output(rec, false); 2584 if (fd < 0) { 2585 pr_err("Failed to switch to new file\n"); 2586 trigger_error(&switch_output_trigger); 2587 err = fd; 2588 goto out_child; 2589 } 2590 2591 /* re-arm the alarm */ 2592 if (rec->switch_output.time) 2593 alarm(rec->switch_output.time); 2594 } 2595 2596 if (hits == thread->samples) { 2597 if (done || draining) 2598 break; 2599 err = fdarray__poll(&thread->pollfd, -1); 2600 /* 2601 * Propagate error, only if there's any. Ignore positive 2602 * number of returned events and interrupt error. 2603 */ 2604 if (err > 0 || (err < 0 && errno == EINTR)) 2605 err = 0; 2606 thread->waking++; 2607 2608 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2609 record__thread_munmap_filtered, NULL) == 0) 2610 draining = true; 2611 2612 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2613 if (err) 2614 goto out_child; 2615 } 2616 2617 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2618 switch (cmd) { 2619 case EVLIST_CTL_CMD_SNAPSHOT: 2620 hit_auxtrace_snapshot_trigger(rec); 2621 evlist__ctlfd_ack(rec->evlist); 2622 break; 2623 case EVLIST_CTL_CMD_STOP: 2624 done = 1; 2625 break; 2626 case EVLIST_CTL_CMD_ACK: 2627 case EVLIST_CTL_CMD_UNSUPPORTED: 2628 case EVLIST_CTL_CMD_ENABLE: 2629 case EVLIST_CTL_CMD_DISABLE: 2630 case EVLIST_CTL_CMD_EVLIST: 2631 case EVLIST_CTL_CMD_PING: 2632 default: 2633 break; 2634 } 2635 } 2636 2637 err = event_enable_timer__process(rec->evlist->eet); 2638 if (err < 0) 2639 goto out_child; 2640 if (err) { 2641 err = 0; 2642 done = 1; 2643 } 2644 2645 /* 2646 * When perf is starting the traced process, at the end events 2647 * die with the process and we wait for that. Thus no need to 2648 * disable events in this case. 2649 */ 2650 if (done && !disabled && !target__none(&opts->target)) { 2651 trigger_off(&auxtrace_snapshot_trigger); 2652 evlist__disable(rec->evlist); 2653 disabled = true; 2654 } 2655 } 2656 2657 trigger_off(&auxtrace_snapshot_trigger); 2658 trigger_off(&switch_output_trigger); 2659 2660 if (opts->auxtrace_snapshot_on_exit) 2661 record__auxtrace_snapshot_exit(rec); 2662 2663 if (forks && workload_exec_errno) { 2664 char msg[STRERR_BUFSIZE], strevsels[2048]; 2665 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2666 2667 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2668 2669 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2670 strevsels, argv[0], emsg); 2671 err = -1; 2672 goto out_child; 2673 } 2674 2675 if (!quiet) 2676 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2677 record__waking(rec)); 2678 2679 write_finished_init(rec, true); 2680 2681 if (target__none(&rec->opts.target)) 2682 record__synthesize_workload(rec, true); 2683 2684 out_child: 2685 record__stop_threads(rec); 2686 record__mmap_read_all(rec, true); 2687 out_free_threads: 2688 record__free_thread_data(rec); 2689 evlist__finalize_ctlfd(rec->evlist); 2690 record__aio_mmap_read_sync(rec); 2691 2692 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2693 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2694 session->header.env.comp_ratio = ratio + 0.5; 2695 } 2696 2697 if (forks) { 2698 int exit_status; 2699 2700 if (!child_finished) 2701 kill(rec->evlist->workload.pid, SIGTERM); 2702 2703 wait(&exit_status); 2704 2705 if (err < 0) 2706 status = err; 2707 else if (WIFEXITED(exit_status)) 2708 status = WEXITSTATUS(exit_status); 2709 else if (WIFSIGNALED(exit_status)) 2710 signr = WTERMSIG(exit_status); 2711 } else 2712 status = err; 2713 2714 if (rec->off_cpu) 2715 rec->bytes_written += off_cpu_write(rec->session); 2716 2717 record__synthesize(rec, true); 2718 /* this will be recalculated during process_buildids() */ 2719 rec->samples = 0; 2720 2721 if (!err) { 2722 if (!rec->timestamp_filename) { 2723 record__finish_output(rec); 2724 } else { 2725 fd = record__switch_output(rec, true); 2726 if (fd < 0) { 2727 status = fd; 2728 goto out_delete_session; 2729 } 2730 } 2731 } 2732 2733 perf_hooks__invoke_record_end(); 2734 2735 if (!err && !quiet) { 2736 char samples[128]; 2737 const char *postfix = rec->timestamp_filename ? 2738 ".<timestamp>" : ""; 2739 2740 if (rec->samples && !rec->opts.full_auxtrace) 2741 scnprintf(samples, sizeof(samples), 2742 " (%" PRIu64 " samples)", rec->samples); 2743 else 2744 samples[0] = '\0'; 2745 2746 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2747 perf_data__size(data) / 1024.0 / 1024.0, 2748 data->path, postfix, samples); 2749 if (ratio) { 2750 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2751 rec->session->bytes_transferred / 1024.0 / 1024.0, 2752 ratio); 2753 } 2754 fprintf(stderr, " ]\n"); 2755 } 2756 2757 out_delete_session: 2758 #ifdef HAVE_EVENTFD_SUPPORT 2759 if (done_fd >= 0) 2760 close(done_fd); 2761 #endif 2762 zstd_fini(&session->zstd_data); 2763 perf_session__delete(session); 2764 2765 if (!opts->no_bpf_event) 2766 evlist__stop_sb_thread(rec->sb_evlist); 2767 return status; 2768 } 2769 2770 static void callchain_debug(struct callchain_param *callchain) 2771 { 2772 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2773 2774 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2775 2776 if (callchain->record_mode == CALLCHAIN_DWARF) 2777 pr_debug("callchain: stack dump size %d\n", 2778 callchain->dump_size); 2779 } 2780 2781 int record_opts__parse_callchain(struct record_opts *record, 2782 struct callchain_param *callchain, 2783 const char *arg, bool unset) 2784 { 2785 int ret; 2786 callchain->enabled = !unset; 2787 2788 /* --no-call-graph */ 2789 if (unset) { 2790 callchain->record_mode = CALLCHAIN_NONE; 2791 pr_debug("callchain: disabled\n"); 2792 return 0; 2793 } 2794 2795 ret = parse_callchain_record_opt(arg, callchain); 2796 if (!ret) { 2797 /* Enable data address sampling for DWARF unwind. */ 2798 if (callchain->record_mode == CALLCHAIN_DWARF) 2799 record->sample_address = true; 2800 callchain_debug(callchain); 2801 } 2802 2803 return ret; 2804 } 2805 2806 int record_parse_callchain_opt(const struct option *opt, 2807 const char *arg, 2808 int unset) 2809 { 2810 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2811 } 2812 2813 int record_callchain_opt(const struct option *opt, 2814 const char *arg __maybe_unused, 2815 int unset __maybe_unused) 2816 { 2817 struct callchain_param *callchain = opt->value; 2818 2819 callchain->enabled = true; 2820 2821 if (callchain->record_mode == CALLCHAIN_NONE) 2822 callchain->record_mode = CALLCHAIN_FP; 2823 2824 callchain_debug(callchain); 2825 return 0; 2826 } 2827 2828 static int perf_record_config(const char *var, const char *value, void *cb) 2829 { 2830 struct record *rec = cb; 2831 2832 if (!strcmp(var, "record.build-id")) { 2833 if (!strcmp(value, "cache")) 2834 rec->no_buildid_cache = false; 2835 else if (!strcmp(value, "no-cache")) 2836 rec->no_buildid_cache = true; 2837 else if (!strcmp(value, "skip")) 2838 rec->no_buildid = true; 2839 else if (!strcmp(value, "mmap")) 2840 rec->buildid_mmap = true; 2841 else 2842 return -1; 2843 return 0; 2844 } 2845 if (!strcmp(var, "record.call-graph")) { 2846 var = "call-graph.record-mode"; 2847 return perf_default_config(var, value, cb); 2848 } 2849 #ifdef HAVE_AIO_SUPPORT 2850 if (!strcmp(var, "record.aio")) { 2851 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2852 if (!rec->opts.nr_cblocks) 2853 rec->opts.nr_cblocks = nr_cblocks_default; 2854 } 2855 #endif 2856 if (!strcmp(var, "record.debuginfod")) { 2857 rec->debuginfod.urls = strdup(value); 2858 if (!rec->debuginfod.urls) 2859 return -ENOMEM; 2860 rec->debuginfod.set = true; 2861 } 2862 2863 return 0; 2864 } 2865 2866 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2867 { 2868 struct record *rec = (struct record *)opt->value; 2869 2870 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2871 } 2872 2873 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2874 { 2875 struct record_opts *opts = (struct record_opts *)opt->value; 2876 2877 if (unset || !str) 2878 return 0; 2879 2880 if (!strcasecmp(str, "node")) 2881 opts->affinity = PERF_AFFINITY_NODE; 2882 else if (!strcasecmp(str, "cpu")) 2883 opts->affinity = PERF_AFFINITY_CPU; 2884 2885 return 0; 2886 } 2887 2888 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2889 { 2890 mask->nbits = nr_bits; 2891 mask->bits = bitmap_zalloc(mask->nbits); 2892 if (!mask->bits) 2893 return -ENOMEM; 2894 2895 return 0; 2896 } 2897 2898 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2899 { 2900 bitmap_free(mask->bits); 2901 mask->nbits = 0; 2902 } 2903 2904 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2905 { 2906 int ret; 2907 2908 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2909 if (ret) { 2910 mask->affinity.bits = NULL; 2911 return ret; 2912 } 2913 2914 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2915 if (ret) { 2916 record__mmap_cpu_mask_free(&mask->maps); 2917 mask->maps.bits = NULL; 2918 } 2919 2920 return ret; 2921 } 2922 2923 static void record__thread_mask_free(struct thread_mask *mask) 2924 { 2925 record__mmap_cpu_mask_free(&mask->maps); 2926 record__mmap_cpu_mask_free(&mask->affinity); 2927 } 2928 2929 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2930 { 2931 int s; 2932 struct record_opts *opts = opt->value; 2933 2934 if (unset || !str || !strlen(str)) { 2935 opts->threads_spec = THREAD_SPEC__CPU; 2936 } else { 2937 for (s = 1; s < THREAD_SPEC__MAX; s++) { 2938 if (s == THREAD_SPEC__USER) { 2939 opts->threads_user_spec = strdup(str); 2940 if (!opts->threads_user_spec) 2941 return -ENOMEM; 2942 opts->threads_spec = THREAD_SPEC__USER; 2943 break; 2944 } 2945 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 2946 opts->threads_spec = s; 2947 break; 2948 } 2949 } 2950 } 2951 2952 if (opts->threads_spec == THREAD_SPEC__USER) 2953 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 2954 else 2955 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 2956 2957 return 0; 2958 } 2959 2960 static int parse_output_max_size(const struct option *opt, 2961 const char *str, int unset) 2962 { 2963 unsigned long *s = (unsigned long *)opt->value; 2964 static struct parse_tag tags_size[] = { 2965 { .tag = 'B', .mult = 1 }, 2966 { .tag = 'K', .mult = 1 << 10 }, 2967 { .tag = 'M', .mult = 1 << 20 }, 2968 { .tag = 'G', .mult = 1 << 30 }, 2969 { .tag = 0 }, 2970 }; 2971 unsigned long val; 2972 2973 if (unset) { 2974 *s = 0; 2975 return 0; 2976 } 2977 2978 val = parse_tag_value(str, tags_size); 2979 if (val != (unsigned long) -1) { 2980 *s = val; 2981 return 0; 2982 } 2983 2984 return -1; 2985 } 2986 2987 static int record__parse_mmap_pages(const struct option *opt, 2988 const char *str, 2989 int unset __maybe_unused) 2990 { 2991 struct record_opts *opts = opt->value; 2992 char *s, *p; 2993 unsigned int mmap_pages; 2994 int ret; 2995 2996 if (!str) 2997 return -EINVAL; 2998 2999 s = strdup(str); 3000 if (!s) 3001 return -ENOMEM; 3002 3003 p = strchr(s, ','); 3004 if (p) 3005 *p = '\0'; 3006 3007 if (*s) { 3008 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3009 if (ret) 3010 goto out_free; 3011 opts->mmap_pages = mmap_pages; 3012 } 3013 3014 if (!p) { 3015 ret = 0; 3016 goto out_free; 3017 } 3018 3019 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3020 if (ret) 3021 goto out_free; 3022 3023 opts->auxtrace_mmap_pages = mmap_pages; 3024 3025 out_free: 3026 free(s); 3027 return ret; 3028 } 3029 3030 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3031 { 3032 } 3033 3034 static int parse_control_option(const struct option *opt, 3035 const char *str, 3036 int unset __maybe_unused) 3037 { 3038 struct record_opts *opts = opt->value; 3039 3040 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3041 } 3042 3043 static void switch_output_size_warn(struct record *rec) 3044 { 3045 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3046 struct switch_output *s = &rec->switch_output; 3047 3048 wakeup_size /= 2; 3049 3050 if (s->size < wakeup_size) { 3051 char buf[100]; 3052 3053 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3054 pr_warning("WARNING: switch-output data size lower than " 3055 "wakeup kernel buffer size (%s) " 3056 "expect bigger perf.data sizes\n", buf); 3057 } 3058 } 3059 3060 static int switch_output_setup(struct record *rec) 3061 { 3062 struct switch_output *s = &rec->switch_output; 3063 static struct parse_tag tags_size[] = { 3064 { .tag = 'B', .mult = 1 }, 3065 { .tag = 'K', .mult = 1 << 10 }, 3066 { .tag = 'M', .mult = 1 << 20 }, 3067 { .tag = 'G', .mult = 1 << 30 }, 3068 { .tag = 0 }, 3069 }; 3070 static struct parse_tag tags_time[] = { 3071 { .tag = 's', .mult = 1 }, 3072 { .tag = 'm', .mult = 60 }, 3073 { .tag = 'h', .mult = 60*60 }, 3074 { .tag = 'd', .mult = 60*60*24 }, 3075 { .tag = 0 }, 3076 }; 3077 unsigned long val; 3078 3079 /* 3080 * If we're using --switch-output-events, then we imply its 3081 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3082 * thread to its parent. 3083 */ 3084 if (rec->switch_output_event_set) { 3085 if (record__threads_enabled(rec)) { 3086 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3087 return 0; 3088 } 3089 goto do_signal; 3090 } 3091 3092 if (!s->set) 3093 return 0; 3094 3095 if (record__threads_enabled(rec)) { 3096 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3097 return 0; 3098 } 3099 3100 if (!strcmp(s->str, "signal")) { 3101 do_signal: 3102 s->signal = true; 3103 pr_debug("switch-output with SIGUSR2 signal\n"); 3104 goto enabled; 3105 } 3106 3107 val = parse_tag_value(s->str, tags_size); 3108 if (val != (unsigned long) -1) { 3109 s->size = val; 3110 pr_debug("switch-output with %s size threshold\n", s->str); 3111 goto enabled; 3112 } 3113 3114 val = parse_tag_value(s->str, tags_time); 3115 if (val != (unsigned long) -1) { 3116 s->time = val; 3117 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3118 s->str, s->time); 3119 goto enabled; 3120 } 3121 3122 return -1; 3123 3124 enabled: 3125 rec->timestamp_filename = true; 3126 s->enabled = true; 3127 3128 if (s->size && !rec->opts.no_buffering) 3129 switch_output_size_warn(rec); 3130 3131 return 0; 3132 } 3133 3134 static const char * const __record_usage[] = { 3135 "perf record [<options>] [<command>]", 3136 "perf record [<options>] -- <command> [<options>]", 3137 NULL 3138 }; 3139 const char * const *record_usage = __record_usage; 3140 3141 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3142 struct perf_sample *sample, struct machine *machine) 3143 { 3144 /* 3145 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3146 * no need to add them twice. 3147 */ 3148 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3149 return 0; 3150 return perf_event__process_mmap(tool, event, sample, machine); 3151 } 3152 3153 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3154 struct perf_sample *sample, struct machine *machine) 3155 { 3156 /* 3157 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3158 * no need to add them twice. 3159 */ 3160 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3161 return 0; 3162 3163 return perf_event__process_mmap2(tool, event, sample, machine); 3164 } 3165 3166 static int process_timestamp_boundary(struct perf_tool *tool, 3167 union perf_event *event __maybe_unused, 3168 struct perf_sample *sample, 3169 struct machine *machine __maybe_unused) 3170 { 3171 struct record *rec = container_of(tool, struct record, tool); 3172 3173 set_timestamp_boundary(rec, sample->time); 3174 return 0; 3175 } 3176 3177 static int parse_record_synth_option(const struct option *opt, 3178 const char *str, 3179 int unset __maybe_unused) 3180 { 3181 struct record_opts *opts = opt->value; 3182 char *p = strdup(str); 3183 3184 if (p == NULL) 3185 return -1; 3186 3187 opts->synth = parse_synth_opt(p); 3188 free(p); 3189 3190 if (opts->synth < 0) { 3191 pr_err("Invalid synth option: %s\n", str); 3192 return -1; 3193 } 3194 return 0; 3195 } 3196 3197 /* 3198 * XXX Ideally would be local to cmd_record() and passed to a record__new 3199 * because we need to have access to it in record__exit, that is called 3200 * after cmd_record() exits, but since record_options need to be accessible to 3201 * builtin-script, leave it here. 3202 * 3203 * At least we don't ouch it in all the other functions here directly. 3204 * 3205 * Just say no to tons of global variables, sigh. 3206 */ 3207 static struct record record = { 3208 .opts = { 3209 .sample_time = true, 3210 .mmap_pages = UINT_MAX, 3211 .user_freq = UINT_MAX, 3212 .user_interval = ULLONG_MAX, 3213 .freq = 4000, 3214 .target = { 3215 .uses_mmap = true, 3216 .default_per_cpu = true, 3217 }, 3218 .mmap_flush = MMAP_FLUSH_DEFAULT, 3219 .nr_threads_synthesize = 1, 3220 .ctl_fd = -1, 3221 .ctl_fd_ack = -1, 3222 .synth = PERF_SYNTH_ALL, 3223 }, 3224 .tool = { 3225 .sample = process_sample_event, 3226 .fork = perf_event__process_fork, 3227 .exit = perf_event__process_exit, 3228 .comm = perf_event__process_comm, 3229 .namespaces = perf_event__process_namespaces, 3230 .mmap = build_id__process_mmap, 3231 .mmap2 = build_id__process_mmap2, 3232 .itrace_start = process_timestamp_boundary, 3233 .aux = process_timestamp_boundary, 3234 .ordered_events = true, 3235 }, 3236 }; 3237 3238 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3239 "\n\t\t\t\tDefault: fp"; 3240 3241 static bool dry_run; 3242 3243 /* 3244 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3245 * with it and switch to use the library functions in perf_evlist that came 3246 * from builtin-record.c, i.e. use record_opts, 3247 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3248 * using pipes, etc. 3249 */ 3250 static struct option __record_options[] = { 3251 OPT_CALLBACK('e', "event", &record.evlist, "event", 3252 "event selector. use 'perf list' to list available events", 3253 parse_events_option), 3254 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3255 "event filter", parse_filter), 3256 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3257 NULL, "don't record events from perf itself", 3258 exclude_perf), 3259 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3260 "record events on existing process id"), 3261 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3262 "record events on existing thread id"), 3263 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3264 "collect data with this RT SCHED_FIFO priority"), 3265 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3266 "collect data without buffering"), 3267 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3268 "collect raw sample records from all opened counters"), 3269 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3270 "system-wide collection from all CPUs"), 3271 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3272 "list of cpus to monitor"), 3273 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3274 OPT_STRING('o', "output", &record.data.path, "file", 3275 "output file name"), 3276 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3277 &record.opts.no_inherit_set, 3278 "child tasks do not inherit counters"), 3279 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3280 "synthesize non-sample events at the end of output"), 3281 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3282 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3283 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3284 "Fail if the specified frequency can't be used"), 3285 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3286 "profile at this frequency", 3287 record__parse_freq), 3288 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3289 "number of mmap data pages and AUX area tracing mmap pages", 3290 record__parse_mmap_pages), 3291 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3292 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3293 record__mmap_flush_parse), 3294 OPT_BOOLEAN(0, "group", &record.opts.group, 3295 "put the counters into a counter group"), 3296 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3297 NULL, "enables call-graph recording" , 3298 &record_callchain_opt), 3299 OPT_CALLBACK(0, "call-graph", &record.opts, 3300 "record_mode[,record_size]", record_callchain_help, 3301 &record_parse_callchain_opt), 3302 OPT_INCR('v', "verbose", &verbose, 3303 "be more verbose (show counter open errors, etc)"), 3304 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 3305 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3306 "per thread counts"), 3307 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3308 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3309 "Record the sample physical addresses"), 3310 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3311 "Record the sampled data address data page size"), 3312 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3313 "Record the sampled code address (ip) page size"), 3314 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3315 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3316 "Record the sample identifier"), 3317 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3318 &record.opts.sample_time_set, 3319 "Record the sample timestamps"), 3320 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3321 "Record the sample period"), 3322 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3323 "don't sample"), 3324 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3325 &record.no_buildid_cache_set, 3326 "do not update the buildid cache"), 3327 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3328 &record.no_buildid_set, 3329 "do not collect buildids in perf.data"), 3330 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3331 "monitor event in cgroup name only", 3332 parse_cgroups), 3333 OPT_CALLBACK('D', "delay", &record, "ms", 3334 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3335 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3336 record__parse_event_enable_time), 3337 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3338 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3339 "user to profile"), 3340 3341 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3342 "branch any", "sample any taken branches", 3343 parse_branch_stack), 3344 3345 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3346 "branch filter mask", "branch stack filter modes", 3347 parse_branch_stack), 3348 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3349 "sample by weight (on special events only)"), 3350 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3351 "sample transaction flags (special events only)"), 3352 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3353 "use per-thread mmaps"), 3354 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3355 "sample selected machine registers on interrupt," 3356 " use '-I?' to list register names", parse_intr_regs), 3357 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3358 "sample selected machine registers on interrupt," 3359 " use '--user-regs=?' to list register names", parse_user_regs), 3360 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3361 "Record running/enabled time of read (:S) events"), 3362 OPT_CALLBACK('k', "clockid", &record.opts, 3363 "clockid", "clockid to use for events, see clock_gettime()", 3364 parse_clockid), 3365 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3366 "opts", "AUX area tracing Snapshot Mode", ""), 3367 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3368 "opts", "sample AUX area", ""), 3369 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3370 "per thread proc mmap processing timeout in ms"), 3371 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3372 "Record namespaces events"), 3373 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3374 "Record cgroup events"), 3375 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3376 &record.opts.record_switch_events_set, 3377 "Record context switch events"), 3378 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3379 "Configure all used events to run in kernel space.", 3380 PARSE_OPT_EXCLUSIVE), 3381 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3382 "Configure all used events to run in user space.", 3383 PARSE_OPT_EXCLUSIVE), 3384 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3385 "collect kernel callchains"), 3386 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3387 "collect user callchains"), 3388 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3389 "clang binary to use for compiling BPF scriptlets"), 3390 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3391 "options passed to clang when compiling BPF scriptlets"), 3392 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3393 "file", "vmlinux pathname"), 3394 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3395 "Record build-id of all DSOs regardless of hits"), 3396 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3397 "Record build-id in map events"), 3398 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3399 "append timestamp to output filename"), 3400 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3401 "Record timestamp boundary (time of first/last samples)"), 3402 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3403 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3404 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3405 "signal"), 3406 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3407 "switch output event selector. use 'perf list' to list available events", 3408 parse_events_option_new_evlist), 3409 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3410 "Limit number of switch output generated files"), 3411 OPT_BOOLEAN(0, "dry-run", &dry_run, 3412 "Parse options then exit"), 3413 #ifdef HAVE_AIO_SUPPORT 3414 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3415 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3416 record__aio_parse), 3417 #endif 3418 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3419 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3420 record__parse_affinity), 3421 #ifdef HAVE_ZSTD_SUPPORT 3422 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3423 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3424 record__parse_comp_level), 3425 #endif 3426 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3427 "size", "Limit the maximum size of the output file", parse_output_max_size), 3428 OPT_UINTEGER(0, "num-thread-synthesize", 3429 &record.opts.nr_threads_synthesize, 3430 "number of threads to run for event synthesis"), 3431 #ifdef HAVE_LIBPFM 3432 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3433 "libpfm4 event selector. use 'perf list' to list available events", 3434 parse_libpfm_events_option), 3435 #endif 3436 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3437 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3438 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3439 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3440 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3441 parse_control_option), 3442 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3443 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3444 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3445 &record.debuginfod.set, "debuginfod urls", 3446 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3447 "system"), 3448 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3449 "write collected trace data into several data files using parallel threads", 3450 record__parse_threads), 3451 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3452 OPT_END() 3453 }; 3454 3455 struct option *record_options = __record_options; 3456 3457 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3458 { 3459 struct perf_cpu cpu; 3460 int idx; 3461 3462 if (cpu_map__is_dummy(cpus)) 3463 return 0; 3464 3465 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3466 if (cpu.cpu == -1) 3467 continue; 3468 /* Return ENODEV is input cpu is greater than max cpu */ 3469 if ((unsigned long)cpu.cpu > mask->nbits) 3470 return -ENODEV; 3471 set_bit(cpu.cpu, mask->bits); 3472 } 3473 3474 return 0; 3475 } 3476 3477 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3478 { 3479 struct perf_cpu_map *cpus; 3480 3481 cpus = perf_cpu_map__new(mask_spec); 3482 if (!cpus) 3483 return -ENOMEM; 3484 3485 bitmap_zero(mask->bits, mask->nbits); 3486 if (record__mmap_cpu_mask_init(mask, cpus)) 3487 return -ENODEV; 3488 3489 perf_cpu_map__put(cpus); 3490 3491 return 0; 3492 } 3493 3494 static void record__free_thread_masks(struct record *rec, int nr_threads) 3495 { 3496 int t; 3497 3498 if (rec->thread_masks) 3499 for (t = 0; t < nr_threads; t++) 3500 record__thread_mask_free(&rec->thread_masks[t]); 3501 3502 zfree(&rec->thread_masks); 3503 } 3504 3505 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3506 { 3507 int t, ret; 3508 3509 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3510 if (!rec->thread_masks) { 3511 pr_err("Failed to allocate thread masks\n"); 3512 return -ENOMEM; 3513 } 3514 3515 for (t = 0; t < nr_threads; t++) { 3516 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3517 if (ret) { 3518 pr_err("Failed to allocate thread masks[%d]\n", t); 3519 goto out_free; 3520 } 3521 } 3522 3523 return 0; 3524 3525 out_free: 3526 record__free_thread_masks(rec, nr_threads); 3527 3528 return ret; 3529 } 3530 3531 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3532 { 3533 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3534 3535 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3536 if (ret) 3537 return ret; 3538 3539 rec->nr_threads = nr_cpus; 3540 pr_debug("nr_threads: %d\n", rec->nr_threads); 3541 3542 for (t = 0; t < rec->nr_threads; t++) { 3543 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3544 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3545 if (verbose) { 3546 pr_debug("thread_masks[%d]: ", t); 3547 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3548 pr_debug("thread_masks[%d]: ", t); 3549 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3550 } 3551 } 3552 3553 return 0; 3554 } 3555 3556 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3557 const char **maps_spec, const char **affinity_spec, 3558 u32 nr_spec) 3559 { 3560 u32 s; 3561 int ret = 0, t = 0; 3562 struct mmap_cpu_mask cpus_mask; 3563 struct thread_mask thread_mask, full_mask, *thread_masks; 3564 3565 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3566 if (ret) { 3567 pr_err("Failed to allocate CPUs mask\n"); 3568 return ret; 3569 } 3570 3571 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3572 if (ret) { 3573 pr_err("Failed to init cpu mask\n"); 3574 goto out_free_cpu_mask; 3575 } 3576 3577 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3578 if (ret) { 3579 pr_err("Failed to allocate full mask\n"); 3580 goto out_free_cpu_mask; 3581 } 3582 3583 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3584 if (ret) { 3585 pr_err("Failed to allocate thread mask\n"); 3586 goto out_free_full_and_cpu_masks; 3587 } 3588 3589 for (s = 0; s < nr_spec; s++) { 3590 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3591 if (ret) { 3592 pr_err("Failed to initialize maps thread mask\n"); 3593 goto out_free; 3594 } 3595 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3596 if (ret) { 3597 pr_err("Failed to initialize affinity thread mask\n"); 3598 goto out_free; 3599 } 3600 3601 /* ignore invalid CPUs but do not allow empty masks */ 3602 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3603 cpus_mask.bits, thread_mask.maps.nbits)) { 3604 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3605 ret = -EINVAL; 3606 goto out_free; 3607 } 3608 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3609 cpus_mask.bits, thread_mask.affinity.nbits)) { 3610 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3611 ret = -EINVAL; 3612 goto out_free; 3613 } 3614 3615 /* do not allow intersection with other masks (full_mask) */ 3616 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3617 thread_mask.maps.nbits)) { 3618 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3619 ret = -EINVAL; 3620 goto out_free; 3621 } 3622 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3623 thread_mask.affinity.nbits)) { 3624 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3625 ret = -EINVAL; 3626 goto out_free; 3627 } 3628 3629 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3630 thread_mask.maps.bits, full_mask.maps.nbits); 3631 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3632 thread_mask.affinity.bits, full_mask.maps.nbits); 3633 3634 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3635 if (!thread_masks) { 3636 pr_err("Failed to reallocate thread masks\n"); 3637 ret = -ENOMEM; 3638 goto out_free; 3639 } 3640 rec->thread_masks = thread_masks; 3641 rec->thread_masks[t] = thread_mask; 3642 if (verbose) { 3643 pr_debug("thread_masks[%d]: ", t); 3644 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3645 pr_debug("thread_masks[%d]: ", t); 3646 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3647 } 3648 t++; 3649 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3650 if (ret) { 3651 pr_err("Failed to allocate thread mask\n"); 3652 goto out_free_full_and_cpu_masks; 3653 } 3654 } 3655 rec->nr_threads = t; 3656 pr_debug("nr_threads: %d\n", rec->nr_threads); 3657 if (!rec->nr_threads) 3658 ret = -EINVAL; 3659 3660 out_free: 3661 record__thread_mask_free(&thread_mask); 3662 out_free_full_and_cpu_masks: 3663 record__thread_mask_free(&full_mask); 3664 out_free_cpu_mask: 3665 record__mmap_cpu_mask_free(&cpus_mask); 3666 3667 return ret; 3668 } 3669 3670 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3671 { 3672 int ret; 3673 struct cpu_topology *topo; 3674 3675 topo = cpu_topology__new(); 3676 if (!topo) { 3677 pr_err("Failed to allocate CPU topology\n"); 3678 return -ENOMEM; 3679 } 3680 3681 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3682 topo->core_cpus_list, topo->core_cpus_lists); 3683 cpu_topology__delete(topo); 3684 3685 return ret; 3686 } 3687 3688 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3689 { 3690 int ret; 3691 struct cpu_topology *topo; 3692 3693 topo = cpu_topology__new(); 3694 if (!topo) { 3695 pr_err("Failed to allocate CPU topology\n"); 3696 return -ENOMEM; 3697 } 3698 3699 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3700 topo->package_cpus_list, topo->package_cpus_lists); 3701 cpu_topology__delete(topo); 3702 3703 return ret; 3704 } 3705 3706 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3707 { 3708 u32 s; 3709 int ret; 3710 const char **spec; 3711 struct numa_topology *topo; 3712 3713 topo = numa_topology__new(); 3714 if (!topo) { 3715 pr_err("Failed to allocate NUMA topology\n"); 3716 return -ENOMEM; 3717 } 3718 3719 spec = zalloc(topo->nr * sizeof(char *)); 3720 if (!spec) { 3721 pr_err("Failed to allocate NUMA spec\n"); 3722 ret = -ENOMEM; 3723 goto out_delete_topo; 3724 } 3725 for (s = 0; s < topo->nr; s++) 3726 spec[s] = topo->nodes[s].cpus; 3727 3728 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3729 3730 zfree(&spec); 3731 3732 out_delete_topo: 3733 numa_topology__delete(topo); 3734 3735 return ret; 3736 } 3737 3738 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3739 { 3740 int t, ret; 3741 u32 s, nr_spec = 0; 3742 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3743 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3744 3745 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3746 spec = strtok_r(user_spec, ":", &spec_ptr); 3747 if (spec == NULL) 3748 break; 3749 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3750 mask = strtok_r(spec, "/", &mask_ptr); 3751 if (mask == NULL) 3752 break; 3753 pr_debug2(" maps mask: %s\n", mask); 3754 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3755 if (!tmp_spec) { 3756 pr_err("Failed to reallocate maps spec\n"); 3757 ret = -ENOMEM; 3758 goto out_free; 3759 } 3760 maps_spec = tmp_spec; 3761 maps_spec[nr_spec] = dup_mask = strdup(mask); 3762 if (!maps_spec[nr_spec]) { 3763 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3764 ret = -ENOMEM; 3765 goto out_free; 3766 } 3767 mask = strtok_r(NULL, "/", &mask_ptr); 3768 if (mask == NULL) { 3769 pr_err("Invalid thread maps or affinity specs\n"); 3770 ret = -EINVAL; 3771 goto out_free; 3772 } 3773 pr_debug2(" affinity mask: %s\n", mask); 3774 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3775 if (!tmp_spec) { 3776 pr_err("Failed to reallocate affinity spec\n"); 3777 ret = -ENOMEM; 3778 goto out_free; 3779 } 3780 affinity_spec = tmp_spec; 3781 affinity_spec[nr_spec] = strdup(mask); 3782 if (!affinity_spec[nr_spec]) { 3783 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3784 ret = -ENOMEM; 3785 goto out_free; 3786 } 3787 dup_mask = NULL; 3788 nr_spec++; 3789 } 3790 3791 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3792 (const char **)affinity_spec, nr_spec); 3793 3794 out_free: 3795 free(dup_mask); 3796 for (s = 0; s < nr_spec; s++) { 3797 if (maps_spec) 3798 free(maps_spec[s]); 3799 if (affinity_spec) 3800 free(affinity_spec[s]); 3801 } 3802 free(affinity_spec); 3803 free(maps_spec); 3804 3805 return ret; 3806 } 3807 3808 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3809 { 3810 int ret; 3811 3812 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3813 if (ret) 3814 return ret; 3815 3816 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3817 return -ENODEV; 3818 3819 rec->nr_threads = 1; 3820 3821 return 0; 3822 } 3823 3824 static int record__init_thread_masks(struct record *rec) 3825 { 3826 int ret = 0; 3827 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3828 3829 if (!record__threads_enabled(rec)) 3830 return record__init_thread_default_masks(rec, cpus); 3831 3832 if (evlist__per_thread(rec->evlist)) { 3833 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3834 return -EINVAL; 3835 } 3836 3837 switch (rec->opts.threads_spec) { 3838 case THREAD_SPEC__CPU: 3839 ret = record__init_thread_cpu_masks(rec, cpus); 3840 break; 3841 case THREAD_SPEC__CORE: 3842 ret = record__init_thread_core_masks(rec, cpus); 3843 break; 3844 case THREAD_SPEC__PACKAGE: 3845 ret = record__init_thread_package_masks(rec, cpus); 3846 break; 3847 case THREAD_SPEC__NUMA: 3848 ret = record__init_thread_numa_masks(rec, cpus); 3849 break; 3850 case THREAD_SPEC__USER: 3851 ret = record__init_thread_user_masks(rec, cpus); 3852 break; 3853 default: 3854 break; 3855 } 3856 3857 return ret; 3858 } 3859 3860 int cmd_record(int argc, const char **argv) 3861 { 3862 int err; 3863 struct record *rec = &record; 3864 char errbuf[BUFSIZ]; 3865 3866 setlocale(LC_ALL, ""); 3867 3868 #ifndef HAVE_LIBBPF_SUPPORT 3869 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3870 set_nobuild('\0', "clang-path", true); 3871 set_nobuild('\0', "clang-opt", true); 3872 # undef set_nobuild 3873 #endif 3874 3875 #ifndef HAVE_BPF_PROLOGUE 3876 # if !defined (HAVE_DWARF_SUPPORT) 3877 # define REASON "NO_DWARF=1" 3878 # elif !defined (HAVE_LIBBPF_SUPPORT) 3879 # define REASON "NO_LIBBPF=1" 3880 # else 3881 # define REASON "this architecture doesn't support BPF prologue" 3882 # endif 3883 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3884 set_nobuild('\0', "vmlinux", true); 3885 # undef set_nobuild 3886 # undef REASON 3887 #endif 3888 3889 #ifndef HAVE_BPF_SKEL 3890 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3891 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3892 # undef set_nobuild 3893 #endif 3894 3895 rec->opts.affinity = PERF_AFFINITY_SYS; 3896 3897 rec->evlist = evlist__new(); 3898 if (rec->evlist == NULL) 3899 return -ENOMEM; 3900 3901 err = perf_config(perf_record_config, rec); 3902 if (err) 3903 return err; 3904 3905 argc = parse_options(argc, argv, record_options, record_usage, 3906 PARSE_OPT_STOP_AT_NON_OPTION); 3907 if (quiet) 3908 perf_quiet_option(); 3909 3910 err = symbol__validate_sym_arguments(); 3911 if (err) 3912 return err; 3913 3914 perf_debuginfod_setup(&record.debuginfod); 3915 3916 /* Make system wide (-a) the default target. */ 3917 if (!argc && target__none(&rec->opts.target)) 3918 rec->opts.target.system_wide = true; 3919 3920 if (nr_cgroups && !rec->opts.target.system_wide) { 3921 usage_with_options_msg(record_usage, record_options, 3922 "cgroup monitoring only available in system-wide mode"); 3923 3924 } 3925 3926 if (rec->buildid_mmap) { 3927 if (!perf_can_record_build_id()) { 3928 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3929 err = -EINVAL; 3930 goto out_opts; 3931 } 3932 pr_debug("Enabling build id in mmap2 events.\n"); 3933 /* Enable mmap build id synthesizing. */ 3934 symbol_conf.buildid_mmap2 = true; 3935 /* Enable perf_event_attr::build_id bit. */ 3936 rec->opts.build_id = true; 3937 /* Disable build id cache. */ 3938 rec->no_buildid = true; 3939 } 3940 3941 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3942 pr_err("Kernel has no cgroup sampling support.\n"); 3943 err = -EINVAL; 3944 goto out_opts; 3945 } 3946 3947 if (rec->opts.kcore) 3948 rec->opts.text_poke = true; 3949 3950 if (rec->opts.kcore || record__threads_enabled(rec)) 3951 rec->data.is_dir = true; 3952 3953 if (record__threads_enabled(rec)) { 3954 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 3955 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 3956 goto out_opts; 3957 } 3958 if (record__aio_enabled(rec)) { 3959 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 3960 goto out_opts; 3961 } 3962 } 3963 3964 if (rec->opts.comp_level != 0) { 3965 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 3966 rec->no_buildid = true; 3967 } 3968 3969 if (rec->opts.record_switch_events && 3970 !perf_can_record_switch_events()) { 3971 ui__error("kernel does not support recording context switch events\n"); 3972 parse_options_usage(record_usage, record_options, "switch-events", 0); 3973 err = -EINVAL; 3974 goto out_opts; 3975 } 3976 3977 if (switch_output_setup(rec)) { 3978 parse_options_usage(record_usage, record_options, "switch-output", 0); 3979 err = -EINVAL; 3980 goto out_opts; 3981 } 3982 3983 if (rec->switch_output.time) { 3984 signal(SIGALRM, alarm_sig_handler); 3985 alarm(rec->switch_output.time); 3986 } 3987 3988 if (rec->switch_output.num_files) { 3989 rec->switch_output.filenames = calloc(sizeof(char *), 3990 rec->switch_output.num_files); 3991 if (!rec->switch_output.filenames) { 3992 err = -EINVAL; 3993 goto out_opts; 3994 } 3995 } 3996 3997 if (rec->timestamp_filename && record__threads_enabled(rec)) { 3998 rec->timestamp_filename = false; 3999 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4000 } 4001 4002 /* 4003 * Allow aliases to facilitate the lookup of symbols for address 4004 * filters. Refer to auxtrace_parse_filters(). 4005 */ 4006 symbol_conf.allow_aliases = true; 4007 4008 symbol__init(NULL); 4009 4010 err = record__auxtrace_init(rec); 4011 if (err) 4012 goto out; 4013 4014 if (dry_run) 4015 goto out; 4016 4017 err = bpf__setup_stdout(rec->evlist); 4018 if (err) { 4019 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 4020 pr_err("ERROR: Setup BPF stdout failed: %s\n", 4021 errbuf); 4022 goto out; 4023 } 4024 4025 err = -ENOMEM; 4026 4027 if (rec->no_buildid_cache || rec->no_buildid) { 4028 disable_buildid_cache(); 4029 } else if (rec->switch_output.enabled) { 4030 /* 4031 * In 'perf record --switch-output', disable buildid 4032 * generation by default to reduce data file switching 4033 * overhead. Still generate buildid if they are required 4034 * explicitly using 4035 * 4036 * perf record --switch-output --no-no-buildid \ 4037 * --no-no-buildid-cache 4038 * 4039 * Following code equals to: 4040 * 4041 * if ((rec->no_buildid || !rec->no_buildid_set) && 4042 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4043 * disable_buildid_cache(); 4044 */ 4045 bool disable = true; 4046 4047 if (rec->no_buildid_set && !rec->no_buildid) 4048 disable = false; 4049 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4050 disable = false; 4051 if (disable) { 4052 rec->no_buildid = true; 4053 rec->no_buildid_cache = true; 4054 disable_buildid_cache(); 4055 } 4056 } 4057 4058 if (record.opts.overwrite) 4059 record.opts.tail_synthesize = true; 4060 4061 if (rec->evlist->core.nr_entries == 0) { 4062 if (perf_pmu__has_hybrid()) { 4063 err = evlist__add_default_hybrid(rec->evlist, 4064 !record.opts.no_samples); 4065 } else { 4066 err = __evlist__add_default(rec->evlist, 4067 !record.opts.no_samples); 4068 } 4069 4070 if (err < 0) { 4071 pr_err("Not enough memory for event selector list\n"); 4072 goto out; 4073 } 4074 } 4075 4076 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4077 rec->opts.no_inherit = true; 4078 4079 err = target__validate(&rec->opts.target); 4080 if (err) { 4081 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4082 ui__warning("%s\n", errbuf); 4083 } 4084 4085 err = target__parse_uid(&rec->opts.target); 4086 if (err) { 4087 int saved_errno = errno; 4088 4089 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4090 ui__error("%s", errbuf); 4091 4092 err = -saved_errno; 4093 goto out; 4094 } 4095 4096 /* Enable ignoring missing threads when -u/-p option is defined. */ 4097 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4098 4099 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 4100 pr_err("failed to use cpu list %s\n", 4101 rec->opts.target.cpu_list); 4102 goto out; 4103 } 4104 4105 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 4106 4107 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4108 arch__add_leaf_frame_record_opts(&rec->opts); 4109 4110 err = -ENOMEM; 4111 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4112 if (rec->opts.target.pid != NULL) { 4113 pr_err("Couldn't create thread/CPU maps: %s\n", 4114 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4115 goto out; 4116 } 4117 else 4118 usage_with_options(record_usage, record_options); 4119 } 4120 4121 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4122 if (err) 4123 goto out; 4124 4125 /* 4126 * We take all buildids when the file contains 4127 * AUX area tracing data because we do not decode the 4128 * trace because it would take too long. 4129 */ 4130 if (rec->opts.full_auxtrace) 4131 rec->buildid_all = true; 4132 4133 if (rec->opts.text_poke) { 4134 err = record__config_text_poke(rec->evlist); 4135 if (err) { 4136 pr_err("record__config_text_poke failed, error %d\n", err); 4137 goto out; 4138 } 4139 } 4140 4141 if (rec->off_cpu) { 4142 err = record__config_off_cpu(rec); 4143 if (err) { 4144 pr_err("record__config_off_cpu failed, error %d\n", err); 4145 goto out; 4146 } 4147 } 4148 4149 if (record_opts__config(&rec->opts)) { 4150 err = -EINVAL; 4151 goto out; 4152 } 4153 4154 err = record__init_thread_masks(rec); 4155 if (err) { 4156 pr_err("Failed to initialize parallel data streaming masks\n"); 4157 goto out; 4158 } 4159 4160 if (rec->opts.nr_cblocks > nr_cblocks_max) 4161 rec->opts.nr_cblocks = nr_cblocks_max; 4162 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4163 4164 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4165 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4166 4167 if (rec->opts.comp_level > comp_level_max) 4168 rec->opts.comp_level = comp_level_max; 4169 pr_debug("comp level: %d\n", rec->opts.comp_level); 4170 4171 err = __cmd_record(&record, argc, argv); 4172 out: 4173 evlist__delete(rec->evlist); 4174 symbol__exit(); 4175 auxtrace_record__free(rec->itr); 4176 out_opts: 4177 record__free_thread_masks(rec, rec->nr_threads); 4178 rec->nr_threads = 0; 4179 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4180 return err; 4181 } 4182 4183 static void snapshot_sig_handler(int sig __maybe_unused) 4184 { 4185 struct record *rec = &record; 4186 4187 hit_auxtrace_snapshot_trigger(rec); 4188 4189 if (switch_output_signal(rec)) 4190 trigger_hit(&switch_output_trigger); 4191 } 4192 4193 static void alarm_sig_handler(int sig __maybe_unused) 4194 { 4195 struct record *rec = &record; 4196 4197 if (switch_output_time(rec)) 4198 trigger_hit(&switch_output_trigger); 4199 } 4200