1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/stat.h" 30 #include "util/symbol.h" 31 #include "util/record.h" 32 #include "util/cpumap.h" 33 #include "util/thread_map.h" 34 #include "util/data.h" 35 #include "util/perf_regs.h" 36 #include "util/auxtrace.h" 37 #include "util/tsc.h" 38 #include "util/parse-branch-options.h" 39 #include "util/parse-regs-options.h" 40 #include "util/perf_api_probe.h" 41 #include "util/trigger.h" 42 #include "util/perf-hooks.h" 43 #include "util/cpu-set-sched.h" 44 #include "util/synthetic-events.h" 45 #include "util/time-utils.h" 46 #include "util/units.h" 47 #include "util/bpf-event.h" 48 #include "util/util.h" 49 #include "util/pfm.h" 50 #include "util/pmu.h" 51 #include "util/pmus.h" 52 #include "util/clockid.h" 53 #include "util/off_cpu.h" 54 #include "util/bpf-filter.h" 55 #include "util/strbuf.h" 56 #include "asm/bug.h" 57 #include "perf.h" 58 #include "cputopo.h" 59 60 #include <errno.h> 61 #include <inttypes.h> 62 #include <locale.h> 63 #include <poll.h> 64 #include <pthread.h> 65 #include <unistd.h> 66 #ifndef HAVE_GETTID 67 #include <syscall.h> 68 #endif 69 #include <sched.h> 70 #include <signal.h> 71 #ifdef HAVE_EVENTFD_SUPPORT 72 #include <sys/eventfd.h> 73 #endif 74 #include <sys/mman.h> 75 #include <sys/wait.h> 76 #include <sys/types.h> 77 #include <sys/stat.h> 78 #include <fcntl.h> 79 #include <linux/err.h> 80 #include <linux/string.h> 81 #include <linux/time64.h> 82 #include <linux/zalloc.h> 83 #include <linux/bitmap.h> 84 #include <sys/time.h> 85 86 struct switch_output { 87 bool enabled; 88 bool signal; 89 unsigned long size; 90 unsigned long time; 91 const char *str; 92 bool set; 93 char **filenames; 94 int num_files; 95 int cur_file; 96 }; 97 98 struct thread_mask { 99 struct mmap_cpu_mask maps; 100 struct mmap_cpu_mask affinity; 101 }; 102 103 struct record_thread { 104 pid_t tid; 105 struct thread_mask *mask; 106 struct { 107 int msg[2]; 108 int ack[2]; 109 } pipes; 110 struct fdarray pollfd; 111 int ctlfd_pos; 112 int nr_mmaps; 113 struct mmap **maps; 114 struct mmap **overwrite_maps; 115 struct record *rec; 116 unsigned long long samples; 117 unsigned long waking; 118 u64 bytes_written; 119 u64 bytes_transferred; 120 u64 bytes_compressed; 121 }; 122 123 static __thread struct record_thread *thread; 124 125 enum thread_msg { 126 THREAD_MSG__UNDEFINED = 0, 127 THREAD_MSG__READY, 128 THREAD_MSG__MAX, 129 }; 130 131 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 132 "UNDEFINED", "READY" 133 }; 134 135 enum thread_spec { 136 THREAD_SPEC__UNDEFINED = 0, 137 THREAD_SPEC__CPU, 138 THREAD_SPEC__CORE, 139 THREAD_SPEC__PACKAGE, 140 THREAD_SPEC__NUMA, 141 THREAD_SPEC__USER, 142 THREAD_SPEC__MAX, 143 }; 144 145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 146 "undefined", "cpu", "core", "package", "numa", "user" 147 }; 148 149 struct pollfd_index_map { 150 int evlist_pollfd_index; 151 int thread_pollfd_index; 152 }; 153 154 struct record { 155 struct perf_tool tool; 156 struct record_opts opts; 157 u64 bytes_written; 158 u64 thread_bytes_written; 159 struct perf_data data; 160 struct auxtrace_record *itr; 161 struct evlist *evlist; 162 struct perf_session *session; 163 struct evlist *sb_evlist; 164 pthread_t thread_id; 165 int realtime_prio; 166 bool latency; 167 bool switch_output_event_set; 168 bool no_buildid; 169 bool no_buildid_set; 170 bool no_buildid_cache; 171 bool no_buildid_cache_set; 172 bool buildid_all; 173 bool buildid_mmap; 174 bool buildid_mmap_set; 175 bool timestamp_filename; 176 bool timestamp_boundary; 177 bool off_cpu; 178 const char *filter_action; 179 const char *uid_str; 180 struct switch_output switch_output; 181 unsigned long long samples; 182 unsigned long output_max_size; /* = 0: unlimited */ 183 struct perf_debuginfod debuginfod; 184 int nr_threads; 185 struct thread_mask *thread_masks; 186 struct record_thread *thread_data; 187 struct pollfd_index_map *index_map; 188 size_t index_map_sz; 189 size_t index_map_cnt; 190 }; 191 192 static volatile int done; 193 194 static volatile int auxtrace_record__snapshot_started; 195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 196 static DEFINE_TRIGGER(switch_output_trigger); 197 198 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 199 "SYS", "NODE", "CPU" 200 }; 201 202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 203 struct perf_sample *sample, struct machine *machine); 204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 205 struct perf_sample *sample, struct machine *machine); 206 static int process_timestamp_boundary(const struct perf_tool *tool, 207 union perf_event *event, 208 struct perf_sample *sample, 209 struct machine *machine); 210 211 #ifndef HAVE_GETTID 212 static inline pid_t gettid(void) 213 { 214 return (pid_t)syscall(__NR_gettid); 215 } 216 #endif 217 218 static int record__threads_enabled(struct record *rec) 219 { 220 return rec->opts.threads_spec; 221 } 222 223 static bool switch_output_signal(struct record *rec) 224 { 225 return rec->switch_output.signal && 226 trigger_is_ready(&switch_output_trigger); 227 } 228 229 static bool switch_output_size(struct record *rec) 230 { 231 return rec->switch_output.size && 232 trigger_is_ready(&switch_output_trigger) && 233 (rec->bytes_written >= rec->switch_output.size); 234 } 235 236 static bool switch_output_time(struct record *rec) 237 { 238 return rec->switch_output.time && 239 trigger_is_ready(&switch_output_trigger); 240 } 241 242 static u64 record__bytes_written(struct record *rec) 243 { 244 return rec->bytes_written + rec->thread_bytes_written; 245 } 246 247 static bool record__output_max_size_exceeded(struct record *rec) 248 { 249 return rec->output_max_size && 250 (record__bytes_written(rec) >= rec->output_max_size); 251 } 252 253 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 254 void *bf, size_t size) 255 { 256 struct perf_data_file *file = &rec->session->data->file; 257 258 if (map && map->file) 259 file = map->file; 260 261 if (perf_data_file__write(file, bf, size) < 0) { 262 pr_err("failed to write perf data, error: %m\n"); 263 return -1; 264 } 265 266 if (map && map->file) { 267 thread->bytes_written += size; 268 rec->thread_bytes_written += size; 269 } else { 270 rec->bytes_written += size; 271 } 272 273 if (record__output_max_size_exceeded(rec) && !done) { 274 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 275 " stopping session ]\n", 276 record__bytes_written(rec) >> 10); 277 done = 1; 278 } 279 280 if (switch_output_size(rec)) 281 trigger_hit(&switch_output_trigger); 282 283 return 0; 284 } 285 286 static int record__aio_enabled(struct record *rec); 287 static int record__comp_enabled(struct record *rec); 288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 289 void *dst, size_t dst_size, void *src, size_t src_size); 290 291 #ifdef HAVE_AIO_SUPPORT 292 static int record__aio_write(struct aiocb *cblock, int trace_fd, 293 void *buf, size_t size, off_t off) 294 { 295 int rc; 296 297 cblock->aio_fildes = trace_fd; 298 cblock->aio_buf = buf; 299 cblock->aio_nbytes = size; 300 cblock->aio_offset = off; 301 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 302 303 do { 304 rc = aio_write(cblock); 305 if (rc == 0) { 306 break; 307 } else if (errno != EAGAIN) { 308 cblock->aio_fildes = -1; 309 pr_err("failed to queue perf data, error: %m\n"); 310 break; 311 } 312 } while (1); 313 314 return rc; 315 } 316 317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 318 { 319 void *rem_buf; 320 off_t rem_off; 321 size_t rem_size; 322 int rc, aio_errno; 323 ssize_t aio_ret, written; 324 325 aio_errno = aio_error(cblock); 326 if (aio_errno == EINPROGRESS) 327 return 0; 328 329 written = aio_ret = aio_return(cblock); 330 if (aio_ret < 0) { 331 if (aio_errno != EINTR) 332 pr_err("failed to write perf data, error: %m\n"); 333 written = 0; 334 } 335 336 rem_size = cblock->aio_nbytes - written; 337 338 if (rem_size == 0) { 339 cblock->aio_fildes = -1; 340 /* 341 * md->refcount is incremented in record__aio_pushfn() for 342 * every aio write request started in record__aio_push() so 343 * decrement it because the request is now complete. 344 */ 345 perf_mmap__put(&md->core); 346 rc = 1; 347 } else { 348 /* 349 * aio write request may require restart with the 350 * remainder if the kernel didn't write whole 351 * chunk at once. 352 */ 353 rem_off = cblock->aio_offset + written; 354 rem_buf = (void *)(cblock->aio_buf + written); 355 record__aio_write(cblock, cblock->aio_fildes, 356 rem_buf, rem_size, rem_off); 357 rc = 0; 358 } 359 360 return rc; 361 } 362 363 static int record__aio_sync(struct mmap *md, bool sync_all) 364 { 365 struct aiocb **aiocb = md->aio.aiocb; 366 struct aiocb *cblocks = md->aio.cblocks; 367 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 368 int i, do_suspend; 369 370 do { 371 do_suspend = 0; 372 for (i = 0; i < md->aio.nr_cblocks; ++i) { 373 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 374 if (sync_all) 375 aiocb[i] = NULL; 376 else 377 return i; 378 } else { 379 /* 380 * Started aio write is not complete yet 381 * so it has to be waited before the 382 * next allocation. 383 */ 384 aiocb[i] = &cblocks[i]; 385 do_suspend = 1; 386 } 387 } 388 if (!do_suspend) 389 return -1; 390 391 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 392 if (!(errno == EAGAIN || errno == EINTR)) 393 pr_err("failed to sync perf data, error: %m\n"); 394 } 395 } while (1); 396 } 397 398 struct record_aio { 399 struct record *rec; 400 void *data; 401 size_t size; 402 }; 403 404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 405 { 406 struct record_aio *aio = to; 407 408 /* 409 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 410 * to release space in the kernel buffer as fast as possible, calling 411 * perf_mmap__consume() from perf_mmap__push() function. 412 * 413 * That lets the kernel to proceed with storing more profiling data into 414 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 415 * 416 * Coping can be done in two steps in case the chunk of profiling data 417 * crosses the upper bound of the kernel buffer. In this case we first move 418 * part of data from map->start till the upper bound and then the remainder 419 * from the beginning of the kernel buffer till the end of the data chunk. 420 */ 421 422 if (record__comp_enabled(aio->rec)) { 423 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 424 mmap__mmap_len(map) - aio->size, 425 buf, size); 426 if (compressed < 0) 427 return (int)compressed; 428 429 size = compressed; 430 } else { 431 memcpy(aio->data + aio->size, buf, size); 432 } 433 434 if (!aio->size) { 435 /* 436 * Increment map->refcount to guard map->aio.data[] buffer 437 * from premature deallocation because map object can be 438 * released earlier than aio write request started on 439 * map->aio.data[] buffer is complete. 440 * 441 * perf_mmap__put() is done at record__aio_complete() 442 * after started aio request completion or at record__aio_push() 443 * if the request failed to start. 444 */ 445 perf_mmap__get(&map->core); 446 } 447 448 aio->size += size; 449 450 return size; 451 } 452 453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 454 { 455 int ret, idx; 456 int trace_fd = rec->session->data->file.fd; 457 struct record_aio aio = { .rec = rec, .size = 0 }; 458 459 /* 460 * Call record__aio_sync() to wait till map->aio.data[] buffer 461 * becomes available after previous aio write operation. 462 */ 463 464 idx = record__aio_sync(map, false); 465 aio.data = map->aio.data[idx]; 466 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 467 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 468 return ret; 469 470 rec->samples++; 471 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 472 if (!ret) { 473 *off += aio.size; 474 rec->bytes_written += aio.size; 475 if (switch_output_size(rec)) 476 trigger_hit(&switch_output_trigger); 477 } else { 478 /* 479 * Decrement map->refcount incremented in record__aio_pushfn() 480 * back if record__aio_write() operation failed to start, otherwise 481 * map->refcount is decremented in record__aio_complete() after 482 * aio write operation finishes successfully. 483 */ 484 perf_mmap__put(&map->core); 485 } 486 487 return ret; 488 } 489 490 static off_t record__aio_get_pos(int trace_fd) 491 { 492 return lseek(trace_fd, 0, SEEK_CUR); 493 } 494 495 static void record__aio_set_pos(int trace_fd, off_t pos) 496 { 497 lseek(trace_fd, pos, SEEK_SET); 498 } 499 500 static void record__aio_mmap_read_sync(struct record *rec) 501 { 502 int i; 503 struct evlist *evlist = rec->evlist; 504 struct mmap *maps = evlist->mmap; 505 506 if (!record__aio_enabled(rec)) 507 return; 508 509 for (i = 0; i < evlist->core.nr_mmaps; i++) { 510 struct mmap *map = &maps[i]; 511 512 if (map->core.base) 513 record__aio_sync(map, true); 514 } 515 } 516 517 static int nr_cblocks_default = 1; 518 static int nr_cblocks_max = 4; 519 520 static int record__aio_parse(const struct option *opt, 521 const char *str, 522 int unset) 523 { 524 struct record_opts *opts = (struct record_opts *)opt->value; 525 526 if (unset) { 527 opts->nr_cblocks = 0; 528 } else { 529 if (str) 530 opts->nr_cblocks = strtol(str, NULL, 0); 531 if (!opts->nr_cblocks) 532 opts->nr_cblocks = nr_cblocks_default; 533 } 534 535 return 0; 536 } 537 #else /* HAVE_AIO_SUPPORT */ 538 static int nr_cblocks_max = 0; 539 540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 541 off_t *off __maybe_unused) 542 { 543 return -1; 544 } 545 546 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 547 { 548 return -1; 549 } 550 551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 552 { 553 } 554 555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 556 { 557 } 558 #endif 559 560 static int record__aio_enabled(struct record *rec) 561 { 562 return rec->opts.nr_cblocks > 0; 563 } 564 565 #define MMAP_FLUSH_DEFAULT 1 566 static int record__mmap_flush_parse(const struct option *opt, 567 const char *str, 568 int unset) 569 { 570 int flush_max; 571 struct record_opts *opts = (struct record_opts *)opt->value; 572 static struct parse_tag tags[] = { 573 { .tag = 'B', .mult = 1 }, 574 { .tag = 'K', .mult = 1 << 10 }, 575 { .tag = 'M', .mult = 1 << 20 }, 576 { .tag = 'G', .mult = 1 << 30 }, 577 { .tag = 0 }, 578 }; 579 580 if (unset) 581 return 0; 582 583 if (str) { 584 opts->mmap_flush = parse_tag_value(str, tags); 585 if (opts->mmap_flush == (int)-1) 586 opts->mmap_flush = strtol(str, NULL, 0); 587 } 588 589 if (!opts->mmap_flush) 590 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 591 592 flush_max = evlist__mmap_size(opts->mmap_pages); 593 flush_max /= 4; 594 if (opts->mmap_flush > flush_max) 595 opts->mmap_flush = flush_max; 596 597 return 0; 598 } 599 600 #ifdef HAVE_ZSTD_SUPPORT 601 static unsigned int comp_level_default = 1; 602 603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 604 { 605 struct record_opts *opts = opt->value; 606 607 if (unset) { 608 opts->comp_level = 0; 609 } else { 610 if (str) 611 opts->comp_level = strtol(str, NULL, 0); 612 if (!opts->comp_level) 613 opts->comp_level = comp_level_default; 614 } 615 616 return 0; 617 } 618 #endif 619 static unsigned int comp_level_max = 22; 620 621 static int record__comp_enabled(struct record *rec) 622 { 623 return rec->opts.comp_level > 0; 624 } 625 626 static int process_synthesized_event(const struct perf_tool *tool, 627 union perf_event *event, 628 struct perf_sample *sample __maybe_unused, 629 struct machine *machine __maybe_unused) 630 { 631 struct record *rec = container_of(tool, struct record, tool); 632 return record__write(rec, NULL, event, event->header.size); 633 } 634 635 static struct mutex synth_lock; 636 637 static int process_locked_synthesized_event(const struct perf_tool *tool, 638 union perf_event *event, 639 struct perf_sample *sample __maybe_unused, 640 struct machine *machine __maybe_unused) 641 { 642 int ret; 643 644 mutex_lock(&synth_lock); 645 ret = process_synthesized_event(tool, event, sample, machine); 646 mutex_unlock(&synth_lock); 647 return ret; 648 } 649 650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 651 { 652 struct record *rec = to; 653 654 if (record__comp_enabled(rec)) { 655 struct perf_record_compressed2 *event = map->data; 656 size_t padding = 0; 657 u8 pad[8] = {0}; 658 ssize_t compressed = zstd_compress(rec->session, map, map->data, 659 mmap__mmap_len(map), bf, size); 660 661 if (compressed < 0) 662 return (int)compressed; 663 664 bf = event; 665 thread->samples++; 666 667 /* 668 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan 669 * error. We make it aligned here. 670 */ 671 event->data_size = compressed - sizeof(struct perf_record_compressed2); 672 event->header.size = PERF_ALIGN(compressed, sizeof(u64)); 673 padding = event->header.size - compressed; 674 return record__write(rec, map, bf, compressed) || 675 record__write(rec, map, &pad, padding); 676 } 677 678 thread->samples++; 679 return record__write(rec, map, bf, size); 680 } 681 682 static volatile sig_atomic_t signr = -1; 683 static volatile sig_atomic_t child_finished; 684 #ifdef HAVE_EVENTFD_SUPPORT 685 static volatile sig_atomic_t done_fd = -1; 686 #endif 687 688 static void sig_handler(int sig) 689 { 690 if (sig == SIGCHLD) 691 child_finished = 1; 692 else 693 signr = sig; 694 695 done = 1; 696 #ifdef HAVE_EVENTFD_SUPPORT 697 if (done_fd >= 0) { 698 u64 tmp = 1; 699 int orig_errno = errno; 700 701 /* 702 * It is possible for this signal handler to run after done is 703 * checked in the main loop, but before the perf counter fds are 704 * polled. If this happens, the poll() will continue to wait 705 * even though done is set, and will only break out if either 706 * another signal is received, or the counters are ready for 707 * read. To ensure the poll() doesn't sleep when done is set, 708 * use an eventfd (done_fd) to wake up the poll(). 709 */ 710 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 711 pr_err("failed to signal wakeup fd, error: %m\n"); 712 713 errno = orig_errno; 714 } 715 #endif // HAVE_EVENTFD_SUPPORT 716 } 717 718 static void sigsegv_handler(int sig) 719 { 720 perf_hooks__recover(); 721 sighandler_dump_stack(sig); 722 } 723 724 static void record__sig_exit(void) 725 { 726 if (signr == -1) 727 return; 728 729 signal(signr, SIG_DFL); 730 raise(signr); 731 } 732 733 static int record__process_auxtrace(const struct perf_tool *tool, 734 struct mmap *map, 735 union perf_event *event, void *data1, 736 size_t len1, void *data2, size_t len2) 737 { 738 struct record *rec = container_of(tool, struct record, tool); 739 struct perf_data *data = &rec->data; 740 size_t padding; 741 u8 pad[8] = {0}; 742 743 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 744 off_t file_offset; 745 int fd = perf_data__fd(data); 746 int err; 747 748 file_offset = lseek(fd, 0, SEEK_CUR); 749 if (file_offset == -1) 750 return -1; 751 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 752 event, file_offset); 753 if (err) 754 return err; 755 } 756 757 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 758 padding = (len1 + len2) & 7; 759 if (padding) 760 padding = 8 - padding; 761 762 record__write(rec, map, event, event->header.size); 763 record__write(rec, map, data1, len1); 764 if (len2) 765 record__write(rec, map, data2, len2); 766 record__write(rec, map, &pad, padding); 767 768 return 0; 769 } 770 771 static int record__auxtrace_mmap_read(struct record *rec, 772 struct mmap *map) 773 { 774 int ret; 775 776 ret = auxtrace_mmap__read(map, rec->itr, 777 perf_session__env(rec->session), 778 &rec->tool, 779 record__process_auxtrace); 780 if (ret < 0) 781 return ret; 782 783 if (ret) 784 rec->samples++; 785 786 return 0; 787 } 788 789 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 790 struct mmap *map) 791 { 792 int ret; 793 794 ret = auxtrace_mmap__read_snapshot(map, rec->itr, 795 perf_session__env(rec->session), 796 &rec->tool, 797 record__process_auxtrace, 798 rec->opts.auxtrace_snapshot_size); 799 if (ret < 0) 800 return ret; 801 802 if (ret) 803 rec->samples++; 804 805 return 0; 806 } 807 808 static int record__auxtrace_read_snapshot_all(struct record *rec) 809 { 810 int i; 811 int rc = 0; 812 813 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 814 struct mmap *map = &rec->evlist->mmap[i]; 815 816 if (!map->auxtrace_mmap.base) 817 continue; 818 819 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 820 rc = -1; 821 goto out; 822 } 823 } 824 out: 825 return rc; 826 } 827 828 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 829 { 830 pr_debug("Recording AUX area tracing snapshot\n"); 831 if (record__auxtrace_read_snapshot_all(rec) < 0) { 832 trigger_error(&auxtrace_snapshot_trigger); 833 } else { 834 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 835 trigger_error(&auxtrace_snapshot_trigger); 836 else 837 trigger_ready(&auxtrace_snapshot_trigger); 838 } 839 } 840 841 static int record__auxtrace_snapshot_exit(struct record *rec) 842 { 843 if (trigger_is_error(&auxtrace_snapshot_trigger)) 844 return 0; 845 846 if (!auxtrace_record__snapshot_started && 847 auxtrace_record__snapshot_start(rec->itr)) 848 return -1; 849 850 record__read_auxtrace_snapshot(rec, true); 851 if (trigger_is_error(&auxtrace_snapshot_trigger)) 852 return -1; 853 854 return 0; 855 } 856 857 static int record__auxtrace_init(struct record *rec) 858 { 859 int err; 860 861 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 862 && record__threads_enabled(rec)) { 863 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 864 return -EINVAL; 865 } 866 867 if (!rec->itr) { 868 rec->itr = auxtrace_record__init(rec->evlist, &err); 869 if (err) 870 return err; 871 } 872 873 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 874 rec->opts.auxtrace_snapshot_opts); 875 if (err) 876 return err; 877 878 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 879 rec->opts.auxtrace_sample_opts); 880 if (err) 881 return err; 882 883 err = auxtrace_parse_aux_action(rec->evlist); 884 if (err) 885 return err; 886 887 return auxtrace_parse_filters(rec->evlist); 888 } 889 890 static int record__config_text_poke(struct evlist *evlist) 891 { 892 struct evsel *evsel; 893 894 /* Nothing to do if text poke is already configured */ 895 evlist__for_each_entry(evlist, evsel) { 896 if (evsel->core.attr.text_poke) 897 return 0; 898 } 899 900 evsel = evlist__add_dummy_on_all_cpus(evlist); 901 if (!evsel) 902 return -ENOMEM; 903 904 evsel->core.attr.text_poke = 1; 905 evsel->core.attr.ksymbol = 1; 906 evsel->immediate = true; 907 evsel__set_sample_bit(evsel, TIME); 908 909 return 0; 910 } 911 912 static int record__config_off_cpu(struct record *rec) 913 { 914 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 915 } 916 917 static bool record__tracking_system_wide(struct record *rec) 918 { 919 struct evlist *evlist = rec->evlist; 920 struct evsel *evsel; 921 922 /* 923 * If non-dummy evsel exists, system_wide sideband is need to 924 * help parse sample information. 925 * For example, PERF_EVENT_MMAP event to help parse symbol, 926 * and PERF_EVENT_COMM event to help parse task executable name. 927 */ 928 evlist__for_each_entry(evlist, evsel) { 929 if (!evsel__is_dummy_event(evsel)) 930 return true; 931 } 932 933 return false; 934 } 935 936 static int record__config_tracking_events(struct record *rec) 937 { 938 struct record_opts *opts = &rec->opts; 939 struct evlist *evlist = rec->evlist; 940 bool system_wide = false; 941 struct evsel *evsel; 942 943 /* 944 * For initial_delay, system wide or a hybrid system, we need to add 945 * tracking event so that we can track PERF_RECORD_MMAP to cover the 946 * delay of waiting or event synthesis. 947 */ 948 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 949 perf_pmus__num_core_pmus() > 1) { 950 /* 951 * User space tasks can migrate between CPUs, so when tracing 952 * selected CPUs, sideband for all CPUs is still needed. 953 */ 954 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 955 system_wide = true; 956 957 evsel = evlist__findnew_tracking_event(evlist, system_wide); 958 if (!evsel) 959 return -ENOMEM; 960 961 /* 962 * Enable the tracking event when the process is forked for 963 * initial_delay, immediately for system wide. 964 */ 965 if (opts->target.initial_delay && !evsel->immediate && 966 !target__has_cpu(&opts->target)) 967 evsel->core.attr.enable_on_exec = 1; 968 else 969 evsel->immediate = 1; 970 } 971 972 return 0; 973 } 974 975 static bool record__kcore_readable(struct machine *machine) 976 { 977 char kcore[PATH_MAX]; 978 int fd; 979 980 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 981 982 fd = open(kcore, O_RDONLY); 983 if (fd < 0) 984 return false; 985 986 close(fd); 987 988 return true; 989 } 990 991 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 992 { 993 char from_dir[PATH_MAX]; 994 char kcore_dir[PATH_MAX]; 995 int ret; 996 997 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 998 999 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1000 if (ret) 1001 return ret; 1002 1003 return kcore_copy(from_dir, kcore_dir); 1004 } 1005 1006 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1007 { 1008 thread_data->pipes.msg[0] = -1; 1009 thread_data->pipes.msg[1] = -1; 1010 thread_data->pipes.ack[0] = -1; 1011 thread_data->pipes.ack[1] = -1; 1012 } 1013 1014 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1015 { 1016 if (pipe(thread_data->pipes.msg)) 1017 return -EINVAL; 1018 1019 if (pipe(thread_data->pipes.ack)) { 1020 close(thread_data->pipes.msg[0]); 1021 thread_data->pipes.msg[0] = -1; 1022 close(thread_data->pipes.msg[1]); 1023 thread_data->pipes.msg[1] = -1; 1024 return -EINVAL; 1025 } 1026 1027 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1028 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1029 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1030 1031 return 0; 1032 } 1033 1034 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1035 { 1036 if (thread_data->pipes.msg[0] != -1) { 1037 close(thread_data->pipes.msg[0]); 1038 thread_data->pipes.msg[0] = -1; 1039 } 1040 if (thread_data->pipes.msg[1] != -1) { 1041 close(thread_data->pipes.msg[1]); 1042 thread_data->pipes.msg[1] = -1; 1043 } 1044 if (thread_data->pipes.ack[0] != -1) { 1045 close(thread_data->pipes.ack[0]); 1046 thread_data->pipes.ack[0] = -1; 1047 } 1048 if (thread_data->pipes.ack[1] != -1) { 1049 close(thread_data->pipes.ack[1]); 1050 thread_data->pipes.ack[1] = -1; 1051 } 1052 } 1053 1054 static bool evlist__per_thread(struct evlist *evlist) 1055 { 1056 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1057 } 1058 1059 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1060 { 1061 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1062 struct mmap *mmap = evlist->mmap; 1063 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1064 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1065 bool per_thread = evlist__per_thread(evlist); 1066 1067 if (per_thread) 1068 thread_data->nr_mmaps = nr_mmaps; 1069 else 1070 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1071 thread_data->mask->maps.nbits); 1072 if (mmap) { 1073 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1074 if (!thread_data->maps) 1075 return -ENOMEM; 1076 } 1077 if (overwrite_mmap) { 1078 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1079 if (!thread_data->overwrite_maps) { 1080 zfree(&thread_data->maps); 1081 return -ENOMEM; 1082 } 1083 } 1084 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1085 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1086 1087 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1088 if (per_thread || 1089 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1090 if (thread_data->maps) { 1091 thread_data->maps[tm] = &mmap[m]; 1092 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1093 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1094 } 1095 if (thread_data->overwrite_maps) { 1096 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1097 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1098 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1099 } 1100 tm++; 1101 } 1102 } 1103 1104 return 0; 1105 } 1106 1107 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1108 { 1109 int f, tm, pos; 1110 struct mmap *map, *overwrite_map; 1111 1112 fdarray__init(&thread_data->pollfd, 64); 1113 1114 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1115 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1116 overwrite_map = thread_data->overwrite_maps ? 1117 thread_data->overwrite_maps[tm] : NULL; 1118 1119 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1120 void *ptr = evlist->core.pollfd.priv[f].ptr; 1121 1122 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1123 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1124 &evlist->core.pollfd); 1125 if (pos < 0) 1126 return pos; 1127 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1128 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1129 } 1130 } 1131 } 1132 1133 return 0; 1134 } 1135 1136 static void record__free_thread_data(struct record *rec) 1137 { 1138 int t; 1139 struct record_thread *thread_data = rec->thread_data; 1140 1141 if (thread_data == NULL) 1142 return; 1143 1144 for (t = 0; t < rec->nr_threads; t++) { 1145 record__thread_data_close_pipes(&thread_data[t]); 1146 zfree(&thread_data[t].maps); 1147 zfree(&thread_data[t].overwrite_maps); 1148 fdarray__exit(&thread_data[t].pollfd); 1149 } 1150 1151 zfree(&rec->thread_data); 1152 } 1153 1154 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1155 int evlist_pollfd_index, 1156 int thread_pollfd_index) 1157 { 1158 size_t x = rec->index_map_cnt; 1159 1160 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1161 return -ENOMEM; 1162 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1163 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1164 rec->index_map_cnt += 1; 1165 return 0; 1166 } 1167 1168 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1169 struct evlist *evlist, 1170 struct record_thread *thread_data) 1171 { 1172 struct pollfd *e_entries = evlist->core.pollfd.entries; 1173 struct pollfd *t_entries = thread_data->pollfd.entries; 1174 int err = 0; 1175 size_t i; 1176 1177 for (i = 0; i < rec->index_map_cnt; i++) { 1178 int e_pos = rec->index_map[i].evlist_pollfd_index; 1179 int t_pos = rec->index_map[i].thread_pollfd_index; 1180 1181 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1182 e_entries[e_pos].events != t_entries[t_pos].events) { 1183 pr_err("Thread and evlist pollfd index mismatch\n"); 1184 err = -EINVAL; 1185 continue; 1186 } 1187 e_entries[e_pos].revents = t_entries[t_pos].revents; 1188 } 1189 return err; 1190 } 1191 1192 static int record__dup_non_perf_events(struct record *rec, 1193 struct evlist *evlist, 1194 struct record_thread *thread_data) 1195 { 1196 struct fdarray *fda = &evlist->core.pollfd; 1197 int i, ret; 1198 1199 for (i = 0; i < fda->nr; i++) { 1200 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1201 continue; 1202 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1203 if (ret < 0) { 1204 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1205 return ret; 1206 } 1207 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1208 thread_data, ret, fda->entries[i].fd); 1209 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1210 if (ret < 0) { 1211 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1212 return ret; 1213 } 1214 } 1215 return 0; 1216 } 1217 1218 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1219 { 1220 int t, ret; 1221 struct record_thread *thread_data; 1222 1223 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1224 if (!rec->thread_data) { 1225 pr_err("Failed to allocate thread data\n"); 1226 return -ENOMEM; 1227 } 1228 thread_data = rec->thread_data; 1229 1230 for (t = 0; t < rec->nr_threads; t++) 1231 record__thread_data_init_pipes(&thread_data[t]); 1232 1233 for (t = 0; t < rec->nr_threads; t++) { 1234 thread_data[t].rec = rec; 1235 thread_data[t].mask = &rec->thread_masks[t]; 1236 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1237 if (ret) { 1238 pr_err("Failed to initialize thread[%d] maps\n", t); 1239 goto out_free; 1240 } 1241 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1242 if (ret) { 1243 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1244 goto out_free; 1245 } 1246 if (t) { 1247 thread_data[t].tid = -1; 1248 ret = record__thread_data_open_pipes(&thread_data[t]); 1249 if (ret) { 1250 pr_err("Failed to open thread[%d] communication pipes\n", t); 1251 goto out_free; 1252 } 1253 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1254 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1255 if (ret < 0) { 1256 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1257 goto out_free; 1258 } 1259 thread_data[t].ctlfd_pos = ret; 1260 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1261 thread_data, thread_data[t].ctlfd_pos, 1262 thread_data[t].pipes.msg[0]); 1263 } else { 1264 thread_data[t].tid = gettid(); 1265 1266 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1267 if (ret < 0) 1268 goto out_free; 1269 1270 thread_data[t].ctlfd_pos = -1; /* Not used */ 1271 } 1272 } 1273 1274 return 0; 1275 1276 out_free: 1277 record__free_thread_data(rec); 1278 1279 return ret; 1280 } 1281 1282 static int record__mmap_evlist(struct record *rec, 1283 struct evlist *evlist) 1284 { 1285 int i, ret; 1286 struct record_opts *opts = &rec->opts; 1287 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1288 opts->auxtrace_sample_mode; 1289 1290 if (opts->affinity != PERF_AFFINITY_SYS) 1291 cpu__setup_cpunode_map(); 1292 1293 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1294 opts->auxtrace_mmap_pages, 1295 auxtrace_overwrite, 1296 opts->nr_cblocks, opts->affinity, 1297 opts->mmap_flush, opts->comp_level) < 0) { 1298 if (errno == EPERM) { 1299 pr_err("Permission error mapping pages.\n" 1300 "Consider increasing " 1301 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1302 "or try again with a smaller value of -m/--mmap_pages.\n" 1303 "(current value: %u,%u)\n", 1304 opts->mmap_pages, opts->auxtrace_mmap_pages); 1305 return -errno; 1306 } else { 1307 pr_err("failed to mmap: %m\n"); 1308 if (errno) 1309 return -errno; 1310 else 1311 return -EINVAL; 1312 } 1313 } 1314 1315 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1316 return -1; 1317 1318 ret = record__alloc_thread_data(rec, evlist); 1319 if (ret) 1320 return ret; 1321 1322 if (record__threads_enabled(rec)) { 1323 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1324 if (ret) { 1325 errno = -ret; 1326 pr_err("Failed to create data directory: %m\n"); 1327 return ret; 1328 } 1329 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1330 if (evlist->mmap) 1331 evlist->mmap[i].file = &rec->data.dir.files[i]; 1332 if (evlist->overwrite_mmap) 1333 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1334 } 1335 } 1336 1337 return 0; 1338 } 1339 1340 static int record__mmap(struct record *rec) 1341 { 1342 return record__mmap_evlist(rec, rec->evlist); 1343 } 1344 1345 static int record__open(struct record *rec) 1346 { 1347 char msg[BUFSIZ]; 1348 struct evsel *pos; 1349 struct evlist *evlist = rec->evlist; 1350 struct perf_session *session = rec->session; 1351 struct record_opts *opts = &rec->opts; 1352 int rc = 0; 1353 bool skipped = false; 1354 bool removed_tracking = false; 1355 1356 evlist__for_each_entry(evlist, pos) { 1357 if (removed_tracking) { 1358 /* 1359 * Normally the head of the list has tracking enabled 1360 * for sideband data like mmaps. If this event is 1361 * removed, make sure to add tracking to the next 1362 * processed event. 1363 */ 1364 if (!pos->tracking) { 1365 pos->tracking = true; 1366 evsel__config(pos, opts, &callchain_param); 1367 } 1368 removed_tracking = false; 1369 } 1370 try_again: 1371 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1372 bool report_error = true; 1373 1374 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1375 if (verbose > 0) 1376 ui__warning("%s\n", msg); 1377 goto try_again; 1378 } 1379 if ((errno == EINVAL || errno == EBADF) && 1380 pos->core.leader != &pos->core && 1381 pos->weak_group) { 1382 pos = evlist__reset_weak_group(evlist, pos, true); 1383 goto try_again; 1384 } 1385 #if defined(__aarch64__) || defined(__arm__) 1386 if (strstr(evsel__name(pos), "cycles")) { 1387 struct evsel *pos2; 1388 /* 1389 * Unfortunately ARM has many events named 1390 * "cycles" on PMUs like the system-level (L3) 1391 * cache which don't support sampling. Only 1392 * display such failures to open when there is 1393 * only 1 cycles event or verbose is enabled. 1394 */ 1395 evlist__for_each_entry(evlist, pos2) { 1396 if (pos2 == pos) 1397 continue; 1398 if (strstr(evsel__name(pos2), "cycles")) { 1399 report_error = false; 1400 break; 1401 } 1402 } 1403 } 1404 #endif 1405 if (report_error || verbose > 0) { 1406 ui__error("Failure to open event '%s' on PMU '%s' which will be " 1407 "removed.\n%s\n", 1408 evsel__name(pos), evsel__pmu_name(pos), msg); 1409 } 1410 if (pos->tracking) 1411 removed_tracking = true; 1412 pos->skippable = true; 1413 skipped = true; 1414 } 1415 } 1416 1417 if (skipped) { 1418 struct evsel *tmp; 1419 int idx = 0; 1420 bool evlist_empty = true; 1421 1422 /* Remove evsels that failed to open and update indices. */ 1423 evlist__for_each_entry_safe(evlist, tmp, pos) { 1424 if (pos->skippable) { 1425 evlist__remove(evlist, pos); 1426 continue; 1427 } 1428 1429 /* 1430 * Note, dummy events may be command line parsed or 1431 * added by the tool. We care about supporting `perf 1432 * record -e dummy` which may be used as a permission 1433 * check. Dummy events that are added to the command 1434 * line and opened along with other events that fail, 1435 * will still fail as if the dummy events were tool 1436 * added events for the sake of code simplicity. 1437 */ 1438 if (!evsel__is_dummy_event(pos)) 1439 evlist_empty = false; 1440 } 1441 evlist__for_each_entry(evlist, pos) { 1442 pos->core.idx = idx++; 1443 } 1444 /* If list is empty then fail. */ 1445 if (evlist_empty) { 1446 ui__error("Failure to open any events for recording.\n"); 1447 rc = -1; 1448 goto out; 1449 } 1450 } 1451 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1452 pr_warning( 1453 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1454 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1455 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1456 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1457 "Samples in kernel modules won't be resolved at all.\n\n" 1458 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1459 "even with a suitable vmlinux or kallsyms file.\n\n"); 1460 } 1461 1462 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1463 pr_err("failed to set filter \"%s\" on event %s: %m\n", 1464 pos->filter ?: "BPF", evsel__name(pos)); 1465 rc = -1; 1466 goto out; 1467 } 1468 1469 rc = record__mmap(rec); 1470 if (rc) 1471 goto out; 1472 1473 session->evlist = evlist; 1474 perf_session__set_id_hdr_size(session); 1475 out: 1476 return rc; 1477 } 1478 1479 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1480 { 1481 if (rec->evlist->first_sample_time == 0) 1482 rec->evlist->first_sample_time = sample_time; 1483 1484 if (sample_time) 1485 rec->evlist->last_sample_time = sample_time; 1486 } 1487 1488 static int process_sample_event(const struct perf_tool *tool, 1489 union perf_event *event, 1490 struct perf_sample *sample, 1491 struct evsel *evsel, 1492 struct machine *machine) 1493 { 1494 struct record *rec = container_of(tool, struct record, tool); 1495 1496 set_timestamp_boundary(rec, sample->time); 1497 1498 if (rec->buildid_all) 1499 return 0; 1500 1501 rec->samples++; 1502 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1503 } 1504 1505 static int process_buildids(struct record *rec) 1506 { 1507 struct perf_session *session = rec->session; 1508 1509 if (perf_data__size(&rec->data) == 0) 1510 return 0; 1511 1512 /* A single DSO is needed and not all inline frames. */ 1513 symbol_conf.inline_name = false; 1514 /* 1515 * During this process, it'll load kernel map and replace the 1516 * dso->long_name to a real pathname it found. In this case 1517 * we prefer the vmlinux path like 1518 * /lib/modules/3.16.4/build/vmlinux 1519 * 1520 * rather than build-id path (in debug directory). 1521 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1522 */ 1523 symbol_conf.ignore_vmlinux_buildid = true; 1524 /* 1525 * If --buildid-all is given, it marks all DSO regardless of hits, 1526 * so no need to process samples. But if timestamp_boundary is enabled, 1527 * it still needs to walk on all samples to get the timestamps of 1528 * first/last samples. 1529 */ 1530 if (rec->buildid_all && !rec->timestamp_boundary) 1531 rec->tool.sample = process_event_sample_stub; 1532 1533 return perf_session__process_events(session); 1534 } 1535 1536 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1537 { 1538 int err; 1539 struct perf_tool *tool = data; 1540 /* 1541 *As for guest kernel when processing subcommand record&report, 1542 *we arrange module mmap prior to guest kernel mmap and trigger 1543 *a preload dso because default guest module symbols are loaded 1544 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1545 *method is used to avoid symbol missing when the first addr is 1546 *in module instead of in guest kernel. 1547 */ 1548 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1549 machine); 1550 if (err < 0) 1551 pr_err("Couldn't record guest kernel [%d]'s reference" 1552 " relocation symbol.\n", machine->pid); 1553 1554 /* 1555 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1556 * have no _text sometimes. 1557 */ 1558 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1559 machine); 1560 if (err < 0) 1561 pr_err("Couldn't record guest kernel [%d]'s reference" 1562 " relocation symbol.\n", machine->pid); 1563 } 1564 1565 static struct perf_event_header finished_round_event = { 1566 .size = sizeof(struct perf_event_header), 1567 .type = PERF_RECORD_FINISHED_ROUND, 1568 }; 1569 1570 static struct perf_event_header finished_init_event = { 1571 .size = sizeof(struct perf_event_header), 1572 .type = PERF_RECORD_FINISHED_INIT, 1573 }; 1574 1575 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1576 { 1577 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1578 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1579 thread->mask->affinity.nbits)) { 1580 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1581 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1582 map->affinity_mask.bits, thread->mask->affinity.nbits); 1583 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1584 (cpu_set_t *)thread->mask->affinity.bits); 1585 if (verbose == 2) { 1586 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1587 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1588 } 1589 } 1590 } 1591 1592 static size_t process_comp_header(void *record, size_t increment) 1593 { 1594 struct perf_record_compressed2 *event = record; 1595 size_t size = sizeof(*event); 1596 1597 if (increment) { 1598 event->header.size += increment; 1599 return increment; 1600 } 1601 1602 event->header.type = PERF_RECORD_COMPRESSED2; 1603 event->header.size = size; 1604 1605 return size; 1606 } 1607 1608 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1609 void *dst, size_t dst_size, void *src, size_t src_size) 1610 { 1611 ssize_t compressed; 1612 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1; 1613 struct zstd_data *zstd_data = &session->zstd_data; 1614 1615 if (map && map->file) 1616 zstd_data = &map->zstd_data; 1617 1618 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1619 max_record_size, process_comp_header); 1620 if (compressed < 0) 1621 return compressed; 1622 1623 if (map && map->file) { 1624 thread->bytes_transferred += src_size; 1625 thread->bytes_compressed += compressed; 1626 } else { 1627 session->bytes_transferred += src_size; 1628 session->bytes_compressed += compressed; 1629 } 1630 1631 return compressed; 1632 } 1633 1634 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1635 bool overwrite, bool synch) 1636 { 1637 u64 bytes_written = rec->bytes_written; 1638 int i; 1639 int rc = 0; 1640 int nr_mmaps; 1641 struct mmap **maps; 1642 int trace_fd = rec->data.file.fd; 1643 off_t off = 0; 1644 1645 if (!evlist) 1646 return 0; 1647 1648 nr_mmaps = thread->nr_mmaps; 1649 maps = overwrite ? thread->overwrite_maps : thread->maps; 1650 1651 if (!maps) 1652 return 0; 1653 1654 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1655 return 0; 1656 1657 if (record__aio_enabled(rec)) 1658 off = record__aio_get_pos(trace_fd); 1659 1660 for (i = 0; i < nr_mmaps; i++) { 1661 u64 flush = 0; 1662 struct mmap *map = maps[i]; 1663 1664 if (map->core.base) { 1665 record__adjust_affinity(rec, map); 1666 if (synch) { 1667 flush = map->core.flush; 1668 map->core.flush = 1; 1669 } 1670 if (!record__aio_enabled(rec)) { 1671 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1672 if (synch) 1673 map->core.flush = flush; 1674 rc = -1; 1675 goto out; 1676 } 1677 } else { 1678 if (record__aio_push(rec, map, &off) < 0) { 1679 record__aio_set_pos(trace_fd, off); 1680 if (synch) 1681 map->core.flush = flush; 1682 rc = -1; 1683 goto out; 1684 } 1685 } 1686 if (synch) 1687 map->core.flush = flush; 1688 } 1689 1690 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1691 !rec->opts.auxtrace_sample_mode && 1692 record__auxtrace_mmap_read(rec, map) != 0) { 1693 rc = -1; 1694 goto out; 1695 } 1696 } 1697 1698 if (record__aio_enabled(rec)) 1699 record__aio_set_pos(trace_fd, off); 1700 1701 /* 1702 * Mark the round finished in case we wrote 1703 * at least one event. 1704 * 1705 * No need for round events in directory mode, 1706 * because per-cpu maps and files have data 1707 * sorted by kernel. 1708 */ 1709 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1710 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1711 1712 if (overwrite) 1713 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1714 out: 1715 return rc; 1716 } 1717 1718 static int record__mmap_read_all(struct record *rec, bool synch) 1719 { 1720 int err; 1721 1722 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1723 if (err) 1724 return err; 1725 1726 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1727 } 1728 1729 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1730 void *arg __maybe_unused) 1731 { 1732 struct perf_mmap *map = fda->priv[fd].ptr; 1733 1734 if (map) 1735 perf_mmap__put(map); 1736 } 1737 1738 static void *record__thread(void *arg) 1739 { 1740 enum thread_msg msg = THREAD_MSG__READY; 1741 bool terminate = false; 1742 struct fdarray *pollfd; 1743 int err, ctlfd_pos; 1744 1745 thread = arg; 1746 thread->tid = gettid(); 1747 1748 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1749 if (err == -1) 1750 pr_warning("threads[%d]: failed to notify on start: %m\n", thread->tid); 1751 1752 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1753 1754 pollfd = &thread->pollfd; 1755 ctlfd_pos = thread->ctlfd_pos; 1756 1757 for (;;) { 1758 unsigned long long hits = thread->samples; 1759 1760 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1761 break; 1762 1763 if (hits == thread->samples) { 1764 1765 err = fdarray__poll(pollfd, -1); 1766 /* 1767 * Propagate error, only if there's any. Ignore positive 1768 * number of returned events and interrupt error. 1769 */ 1770 if (err > 0 || (err < 0 && errno == EINTR)) 1771 err = 0; 1772 thread->waking++; 1773 1774 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1775 record__thread_munmap_filtered, NULL) == 0) 1776 break; 1777 } 1778 1779 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1780 terminate = true; 1781 close(thread->pipes.msg[0]); 1782 thread->pipes.msg[0] = -1; 1783 pollfd->entries[ctlfd_pos].fd = -1; 1784 pollfd->entries[ctlfd_pos].events = 0; 1785 } 1786 1787 pollfd->entries[ctlfd_pos].revents = 0; 1788 } 1789 record__mmap_read_all(thread->rec, true); 1790 1791 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1792 if (err == -1) 1793 pr_warning("threads[%d]: failed to notify on termination: %m\n", thread->tid); 1794 1795 return NULL; 1796 } 1797 1798 static void record__init_features(struct record *rec) 1799 { 1800 struct perf_session *session = rec->session; 1801 int feat; 1802 1803 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1804 perf_header__set_feat(&session->header, feat); 1805 1806 if (rec->no_buildid) 1807 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1808 1809 if (!have_tracepoints(&rec->evlist->core.entries)) 1810 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1811 1812 if (!rec->opts.branch_stack) 1813 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1814 1815 if (!rec->opts.full_auxtrace) 1816 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1817 1818 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1819 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1820 1821 if (!rec->opts.use_clockid) 1822 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1823 1824 if (!record__threads_enabled(rec)) 1825 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1826 1827 if (!record__comp_enabled(rec)) 1828 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1829 1830 perf_header__clear_feat(&session->header, HEADER_STAT); 1831 } 1832 1833 static void 1834 record__finish_output(struct record *rec) 1835 { 1836 int i; 1837 struct perf_data *data = &rec->data; 1838 int fd = perf_data__fd(data); 1839 1840 if (data->is_pipe) { 1841 /* Just to display approx. size */ 1842 data->file.size = rec->bytes_written; 1843 return; 1844 } 1845 1846 rec->session->header.data_size += rec->bytes_written; 1847 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1848 if (record__threads_enabled(rec)) { 1849 for (i = 0; i < data->dir.nr; i++) 1850 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1851 } 1852 1853 /* Buildid scanning disabled or build ID in kernel and synthesized map events. */ 1854 if (!rec->no_buildid || !rec->no_buildid_cache) { 1855 process_buildids(rec); 1856 1857 if (rec->buildid_all) 1858 perf_session__dsos_hit_all(rec->session); 1859 } 1860 perf_session__write_header(rec->session, rec->evlist, fd, true); 1861 perf_session__cache_build_ids(rec->session); 1862 } 1863 1864 static int record__synthesize_workload(struct record *rec, bool tail) 1865 { 1866 int err; 1867 struct perf_thread_map *thread_map; 1868 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1869 1870 if (rec->opts.tail_synthesize != tail) 1871 return 0; 1872 1873 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1874 if (thread_map == NULL) 1875 return -1; 1876 1877 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1878 process_synthesized_event, 1879 &rec->session->machines.host, 1880 needs_mmap, 1881 rec->opts.record_data_mmap); 1882 perf_thread_map__put(thread_map); 1883 return err; 1884 } 1885 1886 static int write_finished_init(struct record *rec, bool tail) 1887 { 1888 if (rec->opts.tail_synthesize != tail) 1889 return 0; 1890 1891 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1892 } 1893 1894 static int record__synthesize(struct record *rec, bool tail); 1895 1896 static int 1897 record__switch_output(struct record *rec, bool at_exit) 1898 { 1899 struct perf_data *data = &rec->data; 1900 char *new_filename = NULL; 1901 int fd, err; 1902 1903 /* Same Size: "2015122520103046"*/ 1904 char timestamp[] = "InvalidTimestamp"; 1905 1906 record__aio_mmap_read_sync(rec); 1907 1908 write_finished_init(rec, true); 1909 1910 record__synthesize(rec, true); 1911 if (target__none(&rec->opts.target)) 1912 record__synthesize_workload(rec, true); 1913 1914 rec->samples = 0; 1915 record__finish_output(rec); 1916 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1917 if (err) { 1918 pr_err("Failed to get current timestamp\n"); 1919 return -EINVAL; 1920 } 1921 1922 fd = perf_data__switch(data, timestamp, 1923 rec->session->header.data_offset, 1924 at_exit, &new_filename); 1925 if (fd >= 0 && !at_exit) { 1926 rec->bytes_written = 0; 1927 rec->session->header.data_size = 0; 1928 } 1929 1930 if (!quiet) { 1931 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1932 data->path, timestamp); 1933 } 1934 1935 if (rec->switch_output.num_files) { 1936 int n = rec->switch_output.cur_file + 1; 1937 1938 if (n >= rec->switch_output.num_files) 1939 n = 0; 1940 rec->switch_output.cur_file = n; 1941 if (rec->switch_output.filenames[n]) { 1942 remove(rec->switch_output.filenames[n]); 1943 zfree(&rec->switch_output.filenames[n]); 1944 } 1945 rec->switch_output.filenames[n] = new_filename; 1946 } else { 1947 free(new_filename); 1948 } 1949 1950 /* Output tracking events */ 1951 if (!at_exit) { 1952 record__synthesize(rec, false); 1953 1954 /* 1955 * In 'perf record --switch-output' without -a, 1956 * record__synthesize() in record__switch_output() won't 1957 * generate tracking events because there's no thread_map 1958 * in evlist. Which causes newly created perf.data doesn't 1959 * contain map and comm information. 1960 * Create a fake thread_map and directly call 1961 * perf_event__synthesize_thread_map() for those events. 1962 */ 1963 if (target__none(&rec->opts.target)) 1964 record__synthesize_workload(rec, false); 1965 write_finished_init(rec, false); 1966 } 1967 return fd; 1968 } 1969 1970 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1971 struct perf_record_lost_samples *lost, 1972 int cpu_idx, int thread_idx, u64 lost_count, 1973 u16 misc_flag) 1974 { 1975 struct perf_sample_id *sid; 1976 struct perf_sample sample; 1977 int id_hdr_size; 1978 1979 perf_sample__init(&sample, /*all=*/true); 1980 lost->lost = lost_count; 1981 if (evsel->core.ids) { 1982 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1983 sample.id = sid->id; 1984 } 1985 1986 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1987 evsel->core.attr.sample_type, &sample); 1988 lost->header.size = sizeof(*lost) + id_hdr_size; 1989 lost->header.misc = misc_flag; 1990 record__write(rec, NULL, lost, lost->header.size); 1991 perf_sample__exit(&sample); 1992 } 1993 1994 static void record__read_lost_samples(struct record *rec) 1995 { 1996 struct perf_session *session = rec->session; 1997 struct perf_record_lost_samples_and_ids lost; 1998 struct evsel *evsel; 1999 2000 /* there was an error during record__open */ 2001 if (session->evlist == NULL) 2002 return; 2003 2004 evlist__for_each_entry(session->evlist, evsel) { 2005 struct xyarray *xy = evsel->core.sample_id; 2006 u64 lost_count; 2007 2008 if (xy == NULL || evsel->core.fd == NULL) 2009 continue; 2010 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 2011 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 2012 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 2013 continue; 2014 } 2015 2016 for (int x = 0; x < xyarray__max_x(xy); x++) { 2017 for (int y = 0; y < xyarray__max_y(xy); y++) { 2018 struct perf_counts_values count; 2019 2020 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 2021 pr_debug("read LOST count failed\n"); 2022 return; 2023 } 2024 2025 if (count.lost) { 2026 memset(&lost, 0, sizeof(lost)); 2027 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 2028 __record__save_lost_samples(rec, evsel, &lost.lost, 2029 x, y, count.lost, 0); 2030 } 2031 } 2032 } 2033 2034 lost_count = perf_bpf_filter__lost_count(evsel); 2035 if (lost_count) { 2036 memset(&lost, 0, sizeof(lost)); 2037 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 2038 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 2039 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 2040 } 2041 } 2042 } 2043 2044 static volatile sig_atomic_t workload_exec_errno; 2045 2046 /* 2047 * evlist__prepare_workload will send a SIGUSR1 2048 * if the fork fails, since we asked by setting its 2049 * want_signal to true. 2050 */ 2051 static void workload_exec_failed_signal(int signo __maybe_unused, 2052 siginfo_t *info, 2053 void *ucontext __maybe_unused) 2054 { 2055 workload_exec_errno = info->si_value.sival_int; 2056 done = 1; 2057 child_finished = 1; 2058 } 2059 2060 static void snapshot_sig_handler(int sig); 2061 static void alarm_sig_handler(int sig); 2062 2063 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2064 { 2065 if (evlist) { 2066 if (evlist->mmap && evlist->mmap[0].core.base) 2067 return evlist->mmap[0].core.base; 2068 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2069 return evlist->overwrite_mmap[0].core.base; 2070 } 2071 return NULL; 2072 } 2073 2074 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2075 { 2076 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2077 if (pc) 2078 return pc; 2079 return NULL; 2080 } 2081 2082 static int record__synthesize(struct record *rec, bool tail) 2083 { 2084 struct perf_session *session = rec->session; 2085 struct machine *machine = &session->machines.host; 2086 struct perf_data *data = &rec->data; 2087 struct record_opts *opts = &rec->opts; 2088 struct perf_tool *tool = &rec->tool; 2089 int err = 0; 2090 event_op f = process_synthesized_event; 2091 2092 if (rec->opts.tail_synthesize != tail) 2093 return 0; 2094 2095 if (data->is_pipe) { 2096 err = perf_event__synthesize_for_pipe(tool, session, data, 2097 process_synthesized_event); 2098 if (err < 0) 2099 goto out; 2100 2101 rec->bytes_written += err; 2102 } 2103 2104 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2105 process_synthesized_event, machine); 2106 if (err) 2107 goto out; 2108 2109 /* Synthesize id_index before auxtrace_info */ 2110 err = perf_event__synthesize_id_index(tool, 2111 process_synthesized_event, 2112 session->evlist, machine); 2113 if (err) 2114 goto out; 2115 2116 if (rec->opts.full_auxtrace) { 2117 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2118 session, process_synthesized_event); 2119 if (err) 2120 goto out; 2121 } 2122 2123 if (!evlist__exclude_kernel(rec->evlist)) { 2124 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2125 machine); 2126 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2127 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2128 "Check /proc/kallsyms permission or run as root.\n"); 2129 2130 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2131 machine); 2132 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2133 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2134 "Check /proc/modules permission or run as root.\n"); 2135 } 2136 2137 if (perf_guest) { 2138 machines__process_guests(&session->machines, 2139 perf_event__synthesize_guest_os, tool); 2140 } 2141 2142 err = perf_event__synthesize_extra_attr(&rec->tool, 2143 rec->evlist, 2144 process_synthesized_event, 2145 data->is_pipe); 2146 if (err) 2147 goto out; 2148 2149 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2150 process_synthesized_event, 2151 NULL); 2152 if (err < 0) { 2153 pr_err("Couldn't synthesize thread map.\n"); 2154 return err; 2155 } 2156 2157 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2158 process_synthesized_event, NULL); 2159 if (err < 0) { 2160 pr_err("Couldn't synthesize cpu map.\n"); 2161 return err; 2162 } 2163 2164 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2165 machine, opts); 2166 if (err < 0) { 2167 pr_warning("Couldn't synthesize bpf events.\n"); 2168 err = 0; 2169 } 2170 2171 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2172 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2173 machine); 2174 if (err < 0) { 2175 pr_warning("Couldn't synthesize cgroup events.\n"); 2176 err = 0; 2177 } 2178 } 2179 2180 if (rec->opts.nr_threads_synthesize > 1) { 2181 mutex_init(&synth_lock); 2182 perf_set_multithreaded(); 2183 f = process_locked_synthesized_event; 2184 } 2185 2186 if (rec->opts.synth & PERF_SYNTH_TASK) { 2187 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2188 2189 err = __machine__synthesize_threads(machine, tool, &opts->target, 2190 rec->evlist->core.threads, 2191 f, needs_mmap, opts->record_data_mmap, 2192 rec->opts.nr_threads_synthesize); 2193 } 2194 2195 if (rec->opts.nr_threads_synthesize > 1) { 2196 perf_set_singlethreaded(); 2197 mutex_destroy(&synth_lock); 2198 } 2199 2200 out: 2201 return err; 2202 } 2203 2204 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused) 2205 { 2206 #ifdef HAVE_LIBBPF_SUPPORT 2207 perf_event__synthesize_final_bpf_metadata(rec->session, 2208 process_synthesized_event); 2209 #endif 2210 } 2211 2212 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2213 { 2214 struct record *rec = data; 2215 pthread_kill(rec->thread_id, SIGUSR2); 2216 return 0; 2217 } 2218 2219 static int record__setup_sb_evlist(struct record *rec) 2220 { 2221 struct record_opts *opts = &rec->opts; 2222 2223 if (rec->sb_evlist != NULL) { 2224 /* 2225 * We get here if --switch-output-event populated the 2226 * sb_evlist, so associate a callback that will send a SIGUSR2 2227 * to the main thread. 2228 */ 2229 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2230 rec->thread_id = pthread_self(); 2231 } 2232 #ifdef HAVE_LIBBPF_SUPPORT 2233 if (!opts->no_bpf_event) { 2234 if (rec->sb_evlist == NULL) { 2235 rec->sb_evlist = evlist__new(); 2236 2237 if (rec->sb_evlist == NULL) { 2238 pr_err("Couldn't create side band evlist.\n."); 2239 return -1; 2240 } 2241 } 2242 2243 if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) { 2244 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2245 return -1; 2246 } 2247 } 2248 #endif 2249 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2250 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2251 opts->no_bpf_event = true; 2252 } 2253 2254 return 0; 2255 } 2256 2257 static int record__init_clock(struct record *rec) 2258 { 2259 struct perf_session *session = rec->session; 2260 struct timespec ref_clockid; 2261 struct timeval ref_tod; 2262 struct perf_env *env = perf_session__env(session); 2263 u64 ref; 2264 2265 if (!rec->opts.use_clockid) 2266 return 0; 2267 2268 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2269 env->clock.clockid_res_ns = rec->opts.clockid_res_ns; 2270 2271 env->clock.clockid = rec->opts.clockid; 2272 2273 if (gettimeofday(&ref_tod, NULL) != 0) { 2274 pr_err("gettimeofday failed, cannot set reference time.\n"); 2275 return -1; 2276 } 2277 2278 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2279 pr_err("clock_gettime failed, cannot set reference time.\n"); 2280 return -1; 2281 } 2282 2283 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2284 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2285 2286 env->clock.tod_ns = ref; 2287 2288 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2289 (u64) ref_clockid.tv_nsec; 2290 2291 env->clock.clockid_ns = ref; 2292 return 0; 2293 } 2294 2295 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2296 { 2297 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2298 trigger_hit(&auxtrace_snapshot_trigger); 2299 auxtrace_record__snapshot_started = 1; 2300 if (auxtrace_record__snapshot_start(rec->itr)) 2301 trigger_error(&auxtrace_snapshot_trigger); 2302 } 2303 } 2304 2305 static int record__terminate_thread(struct record_thread *thread_data) 2306 { 2307 int err; 2308 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2309 pid_t tid = thread_data->tid; 2310 2311 close(thread_data->pipes.msg[1]); 2312 thread_data->pipes.msg[1] = -1; 2313 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2314 if (err > 0) 2315 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2316 else 2317 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2318 thread->tid, tid); 2319 2320 return 0; 2321 } 2322 2323 static int record__start_threads(struct record *rec) 2324 { 2325 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2326 struct record_thread *thread_data = rec->thread_data; 2327 sigset_t full, mask; 2328 pthread_t handle; 2329 pthread_attr_t attrs; 2330 2331 thread = &thread_data[0]; 2332 2333 if (!record__threads_enabled(rec)) 2334 return 0; 2335 2336 sigfillset(&full); 2337 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2338 pr_err("Failed to block signals on threads start: %m\n"); 2339 return -1; 2340 } 2341 2342 pthread_attr_init(&attrs); 2343 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2344 2345 for (t = 1; t < nr_threads; t++) { 2346 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2347 2348 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2349 pthread_attr_setaffinity_np(&attrs, 2350 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2351 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2352 #endif 2353 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2354 for (tt = 1; tt < t; tt++) 2355 record__terminate_thread(&thread_data[t]); 2356 pr_err("Failed to start threads: %m\n"); 2357 ret = -1; 2358 goto out_err; 2359 } 2360 2361 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2362 if (err > 0) 2363 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2364 thread_msg_tags[msg]); 2365 else 2366 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2367 thread->tid, rec->thread_data[t].tid); 2368 } 2369 2370 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2371 (cpu_set_t *)thread->mask->affinity.bits); 2372 2373 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2374 2375 out_err: 2376 pthread_attr_destroy(&attrs); 2377 2378 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2379 pr_err("Failed to unblock signals on threads start: %m\n"); 2380 ret = -1; 2381 } 2382 2383 return ret; 2384 } 2385 2386 static int record__stop_threads(struct record *rec) 2387 { 2388 int t; 2389 struct record_thread *thread_data = rec->thread_data; 2390 2391 for (t = 1; t < rec->nr_threads; t++) 2392 record__terminate_thread(&thread_data[t]); 2393 2394 for (t = 0; t < rec->nr_threads; t++) { 2395 rec->samples += thread_data[t].samples; 2396 if (!record__threads_enabled(rec)) 2397 continue; 2398 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2399 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2400 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2401 thread_data[t].samples, thread_data[t].waking); 2402 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2403 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2404 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2405 else 2406 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2407 } 2408 2409 return 0; 2410 } 2411 2412 static unsigned long record__waking(struct record *rec) 2413 { 2414 int t; 2415 unsigned long waking = 0; 2416 struct record_thread *thread_data = rec->thread_data; 2417 2418 for (t = 0; t < rec->nr_threads; t++) 2419 waking += thread_data[t].waking; 2420 2421 return waking; 2422 } 2423 2424 static int __cmd_record(struct record *rec, int argc, const char **argv) 2425 { 2426 int err; 2427 int status = 0; 2428 const bool forks = argc > 0; 2429 struct perf_tool *tool = &rec->tool; 2430 struct record_opts *opts = &rec->opts; 2431 struct perf_data *data = &rec->data; 2432 struct perf_session *session; 2433 bool disabled = false, draining = false; 2434 int fd; 2435 float ratio = 0; 2436 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2437 struct perf_env *env; 2438 2439 atexit(record__sig_exit); 2440 signal(SIGCHLD, sig_handler); 2441 signal(SIGINT, sig_handler); 2442 signal(SIGTERM, sig_handler); 2443 signal(SIGSEGV, sigsegv_handler); 2444 2445 if (rec->opts.record_cgroup) { 2446 #ifndef HAVE_FILE_HANDLE 2447 pr_err("cgroup tracking is not supported\n"); 2448 return -1; 2449 #endif 2450 } 2451 2452 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2453 signal(SIGUSR2, snapshot_sig_handler); 2454 if (rec->opts.auxtrace_snapshot_mode) 2455 trigger_on(&auxtrace_snapshot_trigger); 2456 if (rec->switch_output.enabled) 2457 trigger_on(&switch_output_trigger); 2458 } else { 2459 signal(SIGUSR2, SIG_IGN); 2460 } 2461 2462 perf_tool__init(tool, /*ordered_events=*/true); 2463 tool->sample = process_sample_event; 2464 tool->fork = perf_event__process_fork; 2465 tool->exit = perf_event__process_exit; 2466 tool->comm = perf_event__process_comm; 2467 tool->namespaces = perf_event__process_namespaces; 2468 tool->mmap = build_id__process_mmap; 2469 tool->mmap2 = build_id__process_mmap2; 2470 tool->itrace_start = process_timestamp_boundary; 2471 tool->aux = process_timestamp_boundary; 2472 tool->namespace_events = rec->opts.record_namespaces; 2473 tool->cgroup_events = rec->opts.record_cgroup; 2474 session = perf_session__new(data, tool); 2475 if (IS_ERR(session)) { 2476 pr_err("Perf session creation failed.\n"); 2477 return PTR_ERR(session); 2478 } 2479 env = perf_session__env(session); 2480 if (record__threads_enabled(rec)) { 2481 if (perf_data__is_pipe(&rec->data)) { 2482 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2483 return -1; 2484 } 2485 if (rec->opts.full_auxtrace) { 2486 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2487 return -1; 2488 } 2489 } 2490 2491 fd = perf_data__fd(data); 2492 rec->session = session; 2493 2494 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2495 pr_err("Compression initialization failed.\n"); 2496 return -1; 2497 } 2498 #ifdef HAVE_EVENTFD_SUPPORT 2499 done_fd = eventfd(0, EFD_NONBLOCK); 2500 if (done_fd < 0) { 2501 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2502 status = -1; 2503 goto out_delete_session; 2504 } 2505 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2506 if (err < 0) { 2507 pr_err("Failed to add wakeup eventfd to poll list\n"); 2508 status = err; 2509 goto out_delete_session; 2510 } 2511 #endif // HAVE_EVENTFD_SUPPORT 2512 2513 env->comp_type = PERF_COMP_ZSTD; 2514 env->comp_level = rec->opts.comp_level; 2515 2516 if (rec->opts.kcore && 2517 !record__kcore_readable(&session->machines.host)) { 2518 pr_err("ERROR: kcore is not readable.\n"); 2519 return -1; 2520 } 2521 2522 if (record__init_clock(rec)) 2523 return -1; 2524 2525 record__init_features(rec); 2526 2527 if (forks) { 2528 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2529 workload_exec_failed_signal); 2530 if (err < 0) { 2531 pr_err("Couldn't run the workload!\n"); 2532 status = err; 2533 goto out_delete_session; 2534 } 2535 } 2536 2537 /* 2538 * If we have just single event and are sending data 2539 * through pipe, we need to force the ids allocation, 2540 * because we synthesize event name through the pipe 2541 * and need the id for that. 2542 */ 2543 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2544 rec->opts.sample_id = true; 2545 2546 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2547 rec->timestamp_filename = false; 2548 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2549 } 2550 2551 /* 2552 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE 2553 * and hybrid_merge is false. 2554 */ 2555 evlist__uniquify_evsel_names(rec->evlist, &stat_config); 2556 2557 evlist__config(rec->evlist, opts, &callchain_param); 2558 2559 /* Debug message used by test scripts */ 2560 pr_debug3("perf record opening and mmapping events\n"); 2561 if (record__open(rec) != 0) { 2562 err = -1; 2563 goto out_free_threads; 2564 } 2565 /* Debug message used by test scripts */ 2566 pr_debug3("perf record done opening and mmapping events\n"); 2567 env->comp_mmap_len = session->evlist->core.mmap_len; 2568 2569 if (rec->opts.kcore) { 2570 err = record__kcore_copy(&session->machines.host, data); 2571 if (err) { 2572 pr_err("ERROR: Failed to copy kcore\n"); 2573 goto out_free_threads; 2574 } 2575 } 2576 2577 /* 2578 * Normally perf_session__new would do this, but it doesn't have the 2579 * evlist. 2580 */ 2581 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2582 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2583 rec->tool.ordered_events = false; 2584 } 2585 2586 if (evlist__nr_groups(rec->evlist) == 0) 2587 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2588 2589 if (data->is_pipe) { 2590 err = perf_header__write_pipe(fd); 2591 if (err < 0) 2592 goto out_free_threads; 2593 } else { 2594 err = perf_session__write_header(session, rec->evlist, fd, false); 2595 if (err < 0) 2596 goto out_free_threads; 2597 } 2598 2599 err = -1; 2600 if (!rec->no_buildid 2601 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2602 pr_err("Couldn't generate buildids. " 2603 "Use --no-buildid to profile anyway.\n"); 2604 goto out_free_threads; 2605 } 2606 2607 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2608 opts->no_bpf_event = true; 2609 2610 err = record__setup_sb_evlist(rec); 2611 if (err) 2612 goto out_free_threads; 2613 2614 err = record__synthesize(rec, false); 2615 if (err < 0) 2616 goto out_free_threads; 2617 2618 if (rec->realtime_prio) { 2619 struct sched_param param; 2620 2621 param.sched_priority = rec->realtime_prio; 2622 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2623 pr_err("Could not set realtime priority.\n"); 2624 err = -1; 2625 goto out_free_threads; 2626 } 2627 } 2628 2629 if (record__start_threads(rec)) 2630 goto out_free_threads; 2631 2632 /* 2633 * When perf is starting the traced process, all the events 2634 * (apart from group members) have enable_on_exec=1 set, 2635 * so don't spoil it by prematurely enabling them. 2636 */ 2637 if (!target__none(&opts->target) && !opts->target.initial_delay) 2638 evlist__enable(rec->evlist); 2639 2640 /* 2641 * offcpu-time does not call execve, so enable_on_exe wouldn't work 2642 * when recording a workload, do it manually 2643 */ 2644 if (rec->off_cpu) 2645 evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT); 2646 2647 /* 2648 * Let the child rip 2649 */ 2650 if (forks) { 2651 struct machine *machine = &session->machines.host; 2652 union perf_event *event; 2653 pid_t tgid; 2654 2655 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2656 if (event == NULL) { 2657 err = -ENOMEM; 2658 goto out_child; 2659 } 2660 2661 /* 2662 * Some H/W events are generated before COMM event 2663 * which is emitted during exec(), so perf script 2664 * cannot see a correct process name for those events. 2665 * Synthesize COMM event to prevent it. 2666 */ 2667 tgid = perf_event__synthesize_comm(tool, event, 2668 rec->evlist->workload.pid, 2669 process_synthesized_event, 2670 machine); 2671 free(event); 2672 2673 if (tgid == -1) 2674 goto out_child; 2675 2676 event = malloc(sizeof(event->namespaces) + 2677 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2678 machine->id_hdr_size); 2679 if (event == NULL) { 2680 err = -ENOMEM; 2681 goto out_child; 2682 } 2683 2684 /* 2685 * Synthesize NAMESPACES event for the command specified. 2686 */ 2687 perf_event__synthesize_namespaces(tool, event, 2688 rec->evlist->workload.pid, 2689 tgid, process_synthesized_event, 2690 machine); 2691 free(event); 2692 2693 evlist__start_workload(rec->evlist); 2694 } 2695 2696 if (opts->target.initial_delay) { 2697 pr_info(EVLIST_DISABLED_MSG); 2698 if (opts->target.initial_delay > 0) { 2699 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2700 evlist__enable(rec->evlist); 2701 pr_info(EVLIST_ENABLED_MSG); 2702 } 2703 } 2704 2705 err = event_enable_timer__start(rec->evlist->eet); 2706 if (err) 2707 goto out_child; 2708 2709 /* Debug message used by test scripts */ 2710 pr_debug3("perf record has started\n"); 2711 fflush(stderr); 2712 2713 trigger_ready(&auxtrace_snapshot_trigger); 2714 trigger_ready(&switch_output_trigger); 2715 perf_hooks__invoke_record_start(); 2716 2717 /* 2718 * Must write FINISHED_INIT so it will be seen after all other 2719 * synthesized user events, but before any regular events. 2720 */ 2721 err = write_finished_init(rec, false); 2722 if (err < 0) 2723 goto out_child; 2724 2725 for (;;) { 2726 unsigned long long hits = thread->samples; 2727 2728 /* 2729 * rec->evlist->bkw_mmap_state is possible to be 2730 * BKW_MMAP_EMPTY here: when done == true and 2731 * hits != rec->samples in previous round. 2732 * 2733 * evlist__toggle_bkw_mmap ensure we never 2734 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2735 */ 2736 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2737 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2738 2739 if (record__mmap_read_all(rec, false) < 0) { 2740 trigger_error(&auxtrace_snapshot_trigger); 2741 trigger_error(&switch_output_trigger); 2742 err = -1; 2743 goto out_child; 2744 } 2745 2746 if (auxtrace_record__snapshot_started) { 2747 auxtrace_record__snapshot_started = 0; 2748 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2749 record__read_auxtrace_snapshot(rec, false); 2750 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2751 pr_err("AUX area tracing snapshot failed\n"); 2752 err = -1; 2753 goto out_child; 2754 } 2755 } 2756 2757 if (trigger_is_hit(&switch_output_trigger)) { 2758 /* 2759 * If switch_output_trigger is hit, the data in 2760 * overwritable ring buffer should have been collected, 2761 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2762 * 2763 * If SIGUSR2 raise after or during record__mmap_read_all(), 2764 * record__mmap_read_all() didn't collect data from 2765 * overwritable ring buffer. Read again. 2766 */ 2767 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2768 continue; 2769 trigger_ready(&switch_output_trigger); 2770 2771 /* 2772 * Reenable events in overwrite ring buffer after 2773 * record__mmap_read_all(): we should have collected 2774 * data from it. 2775 */ 2776 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2777 2778 if (!quiet) 2779 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2780 record__waking(rec)); 2781 thread->waking = 0; 2782 fd = record__switch_output(rec, false); 2783 if (fd < 0) { 2784 pr_err("Failed to switch to new file\n"); 2785 trigger_error(&switch_output_trigger); 2786 err = fd; 2787 goto out_child; 2788 } 2789 2790 /* re-arm the alarm */ 2791 if (rec->switch_output.time) 2792 alarm(rec->switch_output.time); 2793 } 2794 2795 if (hits == thread->samples) { 2796 if (done || draining) 2797 break; 2798 err = fdarray__poll(&thread->pollfd, -1); 2799 /* 2800 * Propagate error, only if there's any. Ignore positive 2801 * number of returned events and interrupt error. 2802 */ 2803 if (err > 0 || (err < 0 && errno == EINTR)) 2804 err = 0; 2805 thread->waking++; 2806 2807 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2808 record__thread_munmap_filtered, NULL) == 0) 2809 draining = true; 2810 2811 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2812 if (err) 2813 goto out_child; 2814 } 2815 2816 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2817 switch (cmd) { 2818 case EVLIST_CTL_CMD_SNAPSHOT: 2819 hit_auxtrace_snapshot_trigger(rec); 2820 evlist__ctlfd_ack(rec->evlist); 2821 break; 2822 case EVLIST_CTL_CMD_STOP: 2823 done = 1; 2824 break; 2825 case EVLIST_CTL_CMD_ACK: 2826 case EVLIST_CTL_CMD_UNSUPPORTED: 2827 case EVLIST_CTL_CMD_ENABLE: 2828 case EVLIST_CTL_CMD_DISABLE: 2829 case EVLIST_CTL_CMD_EVLIST: 2830 case EVLIST_CTL_CMD_PING: 2831 default: 2832 break; 2833 } 2834 } 2835 2836 err = event_enable_timer__process(rec->evlist->eet); 2837 if (err < 0) 2838 goto out_child; 2839 if (err) { 2840 err = 0; 2841 done = 1; 2842 } 2843 2844 /* 2845 * When perf is starting the traced process, at the end events 2846 * die with the process and we wait for that. Thus no need to 2847 * disable events in this case. 2848 */ 2849 if (done && !disabled && !target__none(&opts->target)) { 2850 trigger_off(&auxtrace_snapshot_trigger); 2851 evlist__disable(rec->evlist); 2852 disabled = true; 2853 } 2854 } 2855 2856 trigger_off(&auxtrace_snapshot_trigger); 2857 trigger_off(&switch_output_trigger); 2858 2859 record__synthesize_final_bpf_metadata(rec); 2860 2861 if (opts->auxtrace_snapshot_on_exit) 2862 record__auxtrace_snapshot_exit(rec); 2863 2864 if (forks && workload_exec_errno) { 2865 char msg[STRERR_BUFSIZE]; 2866 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2867 struct strbuf sb = STRBUF_INIT; 2868 2869 evlist__format_evsels(rec->evlist, &sb, 2048); 2870 2871 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2872 sb.buf, argv[0], emsg); 2873 strbuf_release(&sb); 2874 err = -1; 2875 goto out_child; 2876 } 2877 2878 if (!quiet) 2879 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2880 record__waking(rec)); 2881 2882 write_finished_init(rec, true); 2883 2884 if (target__none(&rec->opts.target)) 2885 record__synthesize_workload(rec, true); 2886 2887 out_child: 2888 record__stop_threads(rec); 2889 record__mmap_read_all(rec, true); 2890 out_free_threads: 2891 record__free_thread_data(rec); 2892 evlist__finalize_ctlfd(rec->evlist); 2893 record__aio_mmap_read_sync(rec); 2894 2895 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2896 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2897 env->comp_ratio = ratio + 0.5; 2898 } 2899 2900 if (forks) { 2901 int exit_status; 2902 2903 if (!child_finished) 2904 kill(rec->evlist->workload.pid, SIGTERM); 2905 2906 wait(&exit_status); 2907 2908 if (err < 0) 2909 status = err; 2910 else if (WIFEXITED(exit_status)) 2911 status = WEXITSTATUS(exit_status); 2912 else if (WIFSIGNALED(exit_status)) 2913 signr = WTERMSIG(exit_status); 2914 } else 2915 status = err; 2916 2917 if (rec->off_cpu) 2918 rec->bytes_written += off_cpu_write(rec->session); 2919 2920 record__read_lost_samples(rec); 2921 /* this will be recalculated during process_buildids() */ 2922 rec->samples = 0; 2923 2924 if (!err) { 2925 record__synthesize(rec, true); 2926 if (!rec->timestamp_filename) { 2927 record__finish_output(rec); 2928 } else { 2929 fd = record__switch_output(rec, true); 2930 if (fd < 0) { 2931 status = fd; 2932 goto out_delete_session; 2933 } 2934 } 2935 } 2936 2937 perf_hooks__invoke_record_end(); 2938 2939 if (!err && !quiet) { 2940 char samples[128]; 2941 const char *postfix = rec->timestamp_filename ? 2942 ".<timestamp>" : ""; 2943 2944 if (rec->samples && !rec->opts.full_auxtrace) 2945 scnprintf(samples, sizeof(samples), 2946 " (%" PRIu64 " samples)", rec->samples); 2947 else 2948 samples[0] = '\0'; 2949 2950 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2951 perf_data__size(data) / 1024.0 / 1024.0, 2952 data->path, postfix, samples); 2953 if (ratio) { 2954 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2955 rec->session->bytes_transferred / 1024.0 / 1024.0, 2956 ratio); 2957 } 2958 fprintf(stderr, " ]\n"); 2959 } 2960 2961 out_delete_session: 2962 #ifdef HAVE_EVENTFD_SUPPORT 2963 if (done_fd >= 0) { 2964 fd = done_fd; 2965 done_fd = -1; 2966 2967 close(fd); 2968 } 2969 #endif 2970 zstd_fini(&session->zstd_data); 2971 if (!opts->no_bpf_event) 2972 evlist__stop_sb_thread(rec->sb_evlist); 2973 2974 perf_session__delete(session); 2975 return status; 2976 } 2977 2978 static void callchain_debug(struct callchain_param *callchain) 2979 { 2980 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2981 2982 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2983 2984 if (callchain->record_mode == CALLCHAIN_DWARF) 2985 pr_debug("callchain: stack dump size %d\n", 2986 callchain->dump_size); 2987 } 2988 2989 int record_opts__parse_callchain(struct record_opts *record, 2990 struct callchain_param *callchain, 2991 const char *arg, bool unset) 2992 { 2993 int ret; 2994 callchain->enabled = !unset; 2995 2996 /* --no-call-graph */ 2997 if (unset) { 2998 callchain->record_mode = CALLCHAIN_NONE; 2999 pr_debug("callchain: disabled\n"); 3000 return 0; 3001 } 3002 3003 ret = parse_callchain_record_opt(arg, callchain); 3004 if (!ret) { 3005 /* Enable data address sampling for DWARF unwind. */ 3006 if (callchain->record_mode == CALLCHAIN_DWARF && 3007 !record->record_data_mmap_set) 3008 record->record_data_mmap = true; 3009 callchain_debug(callchain); 3010 } 3011 3012 return ret; 3013 } 3014 3015 int record_parse_callchain_opt(const struct option *opt, 3016 const char *arg, 3017 int unset) 3018 { 3019 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 3020 } 3021 3022 int record_callchain_opt(const struct option *opt, 3023 const char *arg __maybe_unused, 3024 int unset __maybe_unused) 3025 { 3026 struct callchain_param *callchain = opt->value; 3027 3028 callchain->enabled = true; 3029 3030 if (callchain->record_mode == CALLCHAIN_NONE) 3031 callchain->record_mode = CALLCHAIN_FP; 3032 3033 callchain_debug(callchain); 3034 return 0; 3035 } 3036 3037 static int perf_record_config(const char *var, const char *value, void *cb) 3038 { 3039 struct record *rec = cb; 3040 3041 if (!strcmp(var, "record.build-id")) { 3042 if (!strcmp(value, "cache")) 3043 rec->no_buildid_cache = false; 3044 else if (!strcmp(value, "no-cache")) 3045 rec->no_buildid_cache = true; 3046 else if (!strcmp(value, "skip")) 3047 rec->no_buildid = rec->no_buildid_cache = true; 3048 else if (!strcmp(value, "mmap")) 3049 rec->buildid_mmap = true; 3050 else if (!strcmp(value, "no-mmap")) 3051 rec->buildid_mmap = false; 3052 else 3053 return -1; 3054 return 0; 3055 } 3056 if (!strcmp(var, "record.call-graph")) { 3057 var = "call-graph.record-mode"; 3058 return perf_default_config(var, value, cb); 3059 } 3060 #ifdef HAVE_AIO_SUPPORT 3061 if (!strcmp(var, "record.aio")) { 3062 rec->opts.nr_cblocks = strtol(value, NULL, 0); 3063 if (!rec->opts.nr_cblocks) 3064 rec->opts.nr_cblocks = nr_cblocks_default; 3065 } 3066 #endif 3067 if (!strcmp(var, "record.debuginfod")) { 3068 rec->debuginfod.urls = strdup(value); 3069 if (!rec->debuginfod.urls) 3070 return -ENOMEM; 3071 rec->debuginfod.set = true; 3072 } 3073 3074 return 0; 3075 } 3076 3077 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 3078 { 3079 struct record *rec = (struct record *)opt->value; 3080 3081 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 3082 } 3083 3084 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3085 { 3086 struct record_opts *opts = (struct record_opts *)opt->value; 3087 3088 if (unset || !str) 3089 return 0; 3090 3091 if (!strcasecmp(str, "node")) 3092 opts->affinity = PERF_AFFINITY_NODE; 3093 else if (!strcasecmp(str, "cpu")) 3094 opts->affinity = PERF_AFFINITY_CPU; 3095 3096 return 0; 3097 } 3098 3099 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3100 { 3101 mask->nbits = nr_bits; 3102 mask->bits = bitmap_zalloc(mask->nbits); 3103 if (!mask->bits) 3104 return -ENOMEM; 3105 3106 return 0; 3107 } 3108 3109 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3110 { 3111 bitmap_free(mask->bits); 3112 mask->nbits = 0; 3113 } 3114 3115 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3116 { 3117 int ret; 3118 3119 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3120 if (ret) { 3121 mask->affinity.bits = NULL; 3122 return ret; 3123 } 3124 3125 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3126 if (ret) { 3127 record__mmap_cpu_mask_free(&mask->maps); 3128 mask->maps.bits = NULL; 3129 } 3130 3131 return ret; 3132 } 3133 3134 static void record__thread_mask_free(struct thread_mask *mask) 3135 { 3136 record__mmap_cpu_mask_free(&mask->maps); 3137 record__mmap_cpu_mask_free(&mask->affinity); 3138 } 3139 3140 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3141 { 3142 int s; 3143 struct record_opts *opts = opt->value; 3144 3145 if (unset || !str || !strlen(str)) { 3146 opts->threads_spec = THREAD_SPEC__CPU; 3147 } else { 3148 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3149 if (s == THREAD_SPEC__USER) { 3150 opts->threads_user_spec = strdup(str); 3151 if (!opts->threads_user_spec) 3152 return -ENOMEM; 3153 opts->threads_spec = THREAD_SPEC__USER; 3154 break; 3155 } 3156 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3157 opts->threads_spec = s; 3158 break; 3159 } 3160 } 3161 } 3162 3163 if (opts->threads_spec == THREAD_SPEC__USER) 3164 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3165 else 3166 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3167 3168 return 0; 3169 } 3170 3171 static int parse_output_max_size(const struct option *opt, 3172 const char *str, int unset) 3173 { 3174 unsigned long *s = (unsigned long *)opt->value; 3175 static struct parse_tag tags_size[] = { 3176 { .tag = 'B', .mult = 1 }, 3177 { .tag = 'K', .mult = 1 << 10 }, 3178 { .tag = 'M', .mult = 1 << 20 }, 3179 { .tag = 'G', .mult = 1 << 30 }, 3180 { .tag = 0 }, 3181 }; 3182 unsigned long val; 3183 3184 if (unset) { 3185 *s = 0; 3186 return 0; 3187 } 3188 3189 val = parse_tag_value(str, tags_size); 3190 if (val != (unsigned long) -1) { 3191 *s = val; 3192 return 0; 3193 } 3194 3195 return -1; 3196 } 3197 3198 static int record__parse_mmap_pages(const struct option *opt, 3199 const char *str, 3200 int unset __maybe_unused) 3201 { 3202 struct record_opts *opts = opt->value; 3203 char *s, *p; 3204 unsigned int mmap_pages; 3205 int ret; 3206 3207 if (!str) 3208 return -EINVAL; 3209 3210 s = strdup(str); 3211 if (!s) 3212 return -ENOMEM; 3213 3214 p = strchr(s, ','); 3215 if (p) 3216 *p = '\0'; 3217 3218 if (*s) { 3219 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3220 if (ret) 3221 goto out_free; 3222 opts->mmap_pages = mmap_pages; 3223 } 3224 3225 if (!p) { 3226 ret = 0; 3227 goto out_free; 3228 } 3229 3230 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3231 if (ret) 3232 goto out_free; 3233 3234 opts->auxtrace_mmap_pages = mmap_pages; 3235 3236 out_free: 3237 free(s); 3238 return ret; 3239 } 3240 3241 static int record__parse_off_cpu_thresh(const struct option *opt, 3242 const char *str, 3243 int unset __maybe_unused) 3244 { 3245 struct record_opts *opts = opt->value; 3246 char *endptr; 3247 u64 off_cpu_thresh_ms; 3248 3249 if (!str) 3250 return -EINVAL; 3251 3252 off_cpu_thresh_ms = strtoull(str, &endptr, 10); 3253 3254 /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */ 3255 if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0"))) 3256 return -EINVAL; 3257 else 3258 opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC; 3259 3260 return 0; 3261 } 3262 3263 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3264 { 3265 } 3266 3267 static int parse_control_option(const struct option *opt, 3268 const char *str, 3269 int unset __maybe_unused) 3270 { 3271 struct record_opts *opts = opt->value; 3272 3273 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3274 } 3275 3276 static void switch_output_size_warn(struct record *rec) 3277 { 3278 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3279 struct switch_output *s = &rec->switch_output; 3280 3281 wakeup_size /= 2; 3282 3283 if (s->size < wakeup_size) { 3284 char buf[100]; 3285 3286 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3287 pr_warning("WARNING: switch-output data size lower than " 3288 "wakeup kernel buffer size (%s) " 3289 "expect bigger perf.data sizes\n", buf); 3290 } 3291 } 3292 3293 static int switch_output_setup(struct record *rec) 3294 { 3295 struct switch_output *s = &rec->switch_output; 3296 static struct parse_tag tags_size[] = { 3297 { .tag = 'B', .mult = 1 }, 3298 { .tag = 'K', .mult = 1 << 10 }, 3299 { .tag = 'M', .mult = 1 << 20 }, 3300 { .tag = 'G', .mult = 1 << 30 }, 3301 { .tag = 0 }, 3302 }; 3303 static struct parse_tag tags_time[] = { 3304 { .tag = 's', .mult = 1 }, 3305 { .tag = 'm', .mult = 60 }, 3306 { .tag = 'h', .mult = 60*60 }, 3307 { .tag = 'd', .mult = 60*60*24 }, 3308 { .tag = 0 }, 3309 }; 3310 unsigned long val; 3311 3312 /* 3313 * If we're using --switch-output-events, then we imply its 3314 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3315 * thread to its parent. 3316 */ 3317 if (rec->switch_output_event_set) { 3318 if (record__threads_enabled(rec)) { 3319 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3320 return 0; 3321 } 3322 goto do_signal; 3323 } 3324 3325 if (!s->set) 3326 return 0; 3327 3328 if (record__threads_enabled(rec)) { 3329 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3330 return 0; 3331 } 3332 3333 if (!strcmp(s->str, "signal")) { 3334 do_signal: 3335 s->signal = true; 3336 pr_debug("switch-output with SIGUSR2 signal\n"); 3337 goto enabled; 3338 } 3339 3340 val = parse_tag_value(s->str, tags_size); 3341 if (val != (unsigned long) -1) { 3342 s->size = val; 3343 pr_debug("switch-output with %s size threshold\n", s->str); 3344 goto enabled; 3345 } 3346 3347 val = parse_tag_value(s->str, tags_time); 3348 if (val != (unsigned long) -1) { 3349 s->time = val; 3350 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3351 s->str, s->time); 3352 goto enabled; 3353 } 3354 3355 return -1; 3356 3357 enabled: 3358 rec->timestamp_filename = true; 3359 s->enabled = true; 3360 3361 if (s->size && !rec->opts.no_buffering) 3362 switch_output_size_warn(rec); 3363 3364 return 0; 3365 } 3366 3367 static const char * const __record_usage[] = { 3368 "perf record [<options>] [<command>]", 3369 "perf record [<options>] -- <command> [<options>]", 3370 NULL 3371 }; 3372 const char * const *record_usage = __record_usage; 3373 3374 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3375 struct perf_sample *sample, struct machine *machine) 3376 { 3377 /* 3378 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3379 * no need to add them twice. 3380 */ 3381 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3382 return 0; 3383 return perf_event__process_mmap(tool, event, sample, machine); 3384 } 3385 3386 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3387 struct perf_sample *sample, struct machine *machine) 3388 { 3389 /* 3390 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3391 * no need to add them twice. 3392 */ 3393 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3394 return 0; 3395 3396 return perf_event__process_mmap2(tool, event, sample, machine); 3397 } 3398 3399 static int process_timestamp_boundary(const struct perf_tool *tool, 3400 union perf_event *event __maybe_unused, 3401 struct perf_sample *sample, 3402 struct machine *machine __maybe_unused) 3403 { 3404 struct record *rec = container_of(tool, struct record, tool); 3405 3406 set_timestamp_boundary(rec, sample->time); 3407 return 0; 3408 } 3409 3410 static int parse_record_synth_option(const struct option *opt, 3411 const char *str, 3412 int unset __maybe_unused) 3413 { 3414 struct record_opts *opts = opt->value; 3415 char *p = strdup(str); 3416 3417 if (p == NULL) 3418 return -1; 3419 3420 opts->synth = parse_synth_opt(p); 3421 free(p); 3422 3423 if (opts->synth < 0) { 3424 pr_err("Invalid synth option: %s\n", str); 3425 return -1; 3426 } 3427 return 0; 3428 } 3429 3430 /* 3431 * XXX Ideally would be local to cmd_record() and passed to a record__new 3432 * because we need to have access to it in record__exit, that is called 3433 * after cmd_record() exits, but since record_options need to be accessible to 3434 * builtin-script, leave it here. 3435 * 3436 * At least we don't ouch it in all the other functions here directly. 3437 * 3438 * Just say no to tons of global variables, sigh. 3439 */ 3440 static struct record record = { 3441 .opts = { 3442 .sample_time = true, 3443 .mmap_pages = UINT_MAX, 3444 .user_freq = UINT_MAX, 3445 .user_interval = ULLONG_MAX, 3446 .freq = 4000, 3447 .target = { 3448 .uses_mmap = true, 3449 .default_per_cpu = true, 3450 }, 3451 .mmap_flush = MMAP_FLUSH_DEFAULT, 3452 .nr_threads_synthesize = 1, 3453 .ctl_fd = -1, 3454 .ctl_fd_ack = -1, 3455 .synth = PERF_SYNTH_ALL, 3456 .off_cpu_thresh_ns = OFFCPU_THRESH, 3457 }, 3458 .buildid_mmap = true, 3459 }; 3460 3461 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3462 "\n\t\t\t\tDefault: fp"; 3463 3464 static bool dry_run; 3465 3466 static struct parse_events_option_args parse_events_option_args = { 3467 .evlistp = &record.evlist, 3468 }; 3469 3470 static struct parse_events_option_args switch_output_parse_events_option_args = { 3471 .evlistp = &record.sb_evlist, 3472 }; 3473 3474 /* 3475 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3476 * with it and switch to use the library functions in perf_evlist that came 3477 * from builtin-record.c, i.e. use record_opts, 3478 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3479 * using pipes, etc. 3480 */ 3481 static struct option __record_options[] = { 3482 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3483 "event selector. use 'perf list' to list available events", 3484 parse_events_option), 3485 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3486 "event filter", parse_filter), 3487 OPT_BOOLEAN(0, "latency", &record.latency, 3488 "Enable data collection for latency profiling.\n" 3489 "\t\t\t Use perf report --latency for latency-centric profile."), 3490 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3491 NULL, "don't record events from perf itself", 3492 exclude_perf), 3493 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3494 "record events on existing process id"), 3495 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3496 "record events on existing thread id"), 3497 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3498 "collect data with this RT SCHED_FIFO priority"), 3499 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3500 "collect data without buffering"), 3501 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3502 "collect raw sample records from all opened counters"), 3503 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3504 "system-wide collection from all CPUs"), 3505 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3506 "list of cpus to monitor"), 3507 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3508 OPT_STRING('o', "output", &record.data.path, "file", 3509 "output file name"), 3510 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3511 &record.opts.no_inherit_set, 3512 "child tasks do not inherit counters"), 3513 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3514 "synthesize non-sample events at the end of output"), 3515 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3516 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3517 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3518 "Fail if the specified frequency can't be used"), 3519 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3520 "profile at this frequency", 3521 record__parse_freq), 3522 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3523 "number of mmap data pages and AUX area tracing mmap pages", 3524 record__parse_mmap_pages), 3525 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3526 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3527 record__mmap_flush_parse), 3528 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3529 NULL, "enables call-graph recording" , 3530 &record_callchain_opt), 3531 OPT_CALLBACK(0, "call-graph", &record.opts, 3532 "record_mode[,record_size]", record_callchain_help, 3533 &record_parse_callchain_opt), 3534 OPT_INCR('v', "verbose", &verbose, 3535 "be more verbose (show counter open errors, etc)"), 3536 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3537 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3538 "per thread counts"), 3539 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3540 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3541 "Record the sample physical addresses"), 3542 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3543 "Record the sampled data address data page size"), 3544 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3545 "Record the sampled code address (ip) page size"), 3546 OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, 3547 "Record the data source for memory operations"), 3548 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3549 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3550 "Record the sample identifier"), 3551 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3552 &record.opts.sample_time_set, 3553 "Record the sample timestamps"), 3554 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3555 "Record the sample period"), 3556 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3557 "don't sample"), 3558 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3559 &record.no_buildid_cache_set, 3560 "do not update the buildid cache"), 3561 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3562 &record.no_buildid_set, 3563 "do not collect buildids in perf.data"), 3564 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3565 "monitor event in cgroup name only", 3566 parse_cgroups), 3567 OPT_CALLBACK('D', "delay", &record, "ms", 3568 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3569 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3570 record__parse_event_enable_time), 3571 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3572 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), 3573 3574 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3575 "branch any", "sample any taken branches", 3576 parse_branch_stack), 3577 3578 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3579 "branch filter mask", "branch stack filter modes", 3580 parse_branch_stack), 3581 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3582 "sample by weight (on special events only)"), 3583 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3584 "sample transaction flags (special events only)"), 3585 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3586 "use per-thread mmaps"), 3587 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3588 "sample selected machine registers on interrupt," 3589 " use '-I?' to list register names", parse_intr_regs), 3590 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3591 "sample selected machine registers in user space," 3592 " use '--user-regs=?' to list register names", parse_user_regs), 3593 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3594 "Record running/enabled time of read (:S) events"), 3595 OPT_CALLBACK('k', "clockid", &record.opts, 3596 "clockid", "clockid to use for events, see clock_gettime()", 3597 parse_clockid), 3598 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3599 "opts", "AUX area tracing Snapshot Mode", ""), 3600 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3601 "opts", "sample AUX area", ""), 3602 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3603 "per thread proc mmap processing timeout in ms"), 3604 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3605 "Record namespaces events"), 3606 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3607 "Record cgroup events"), 3608 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3609 &record.opts.record_switch_events_set, 3610 "Record context switch events"), 3611 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3612 "Configure all used events to run in kernel space.", 3613 PARSE_OPT_EXCLUSIVE), 3614 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3615 "Configure all used events to run in user space.", 3616 PARSE_OPT_EXCLUSIVE), 3617 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3618 "collect kernel callchains"), 3619 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3620 "collect user callchains"), 3621 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3622 "file", "vmlinux pathname"), 3623 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3624 "Record build-id of all DSOs regardless of hits"), 3625 OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set, 3626 "Record build-id in mmap events and skip build-id processing."), 3627 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3628 "append timestamp to output filename"), 3629 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3630 "Record timestamp boundary (time of first/last samples)"), 3631 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3632 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3633 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3634 "signal"), 3635 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3636 &record.switch_output_event_set, "switch output event", 3637 "switch output event selector. use 'perf list' to list available events", 3638 parse_events_option_new_evlist), 3639 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3640 "Limit number of switch output generated files"), 3641 OPT_BOOLEAN(0, "dry-run", &dry_run, 3642 "Parse options then exit"), 3643 #ifdef HAVE_AIO_SUPPORT 3644 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3645 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3646 record__aio_parse), 3647 #endif 3648 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3649 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3650 record__parse_affinity), 3651 #ifdef HAVE_ZSTD_SUPPORT 3652 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3653 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3654 record__parse_comp_level), 3655 #endif 3656 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3657 "size", "Limit the maximum size of the output file", parse_output_max_size), 3658 OPT_UINTEGER(0, "num-thread-synthesize", 3659 &record.opts.nr_threads_synthesize, 3660 "number of threads to run for event synthesis"), 3661 #ifdef HAVE_LIBPFM 3662 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3663 "libpfm4 event selector. use 'perf list' to list available events", 3664 parse_libpfm_events_option), 3665 #endif 3666 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3667 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3668 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3669 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3670 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3671 parse_control_option), 3672 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3673 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3674 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3675 &record.debuginfod.set, "debuginfod urls", 3676 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3677 "system"), 3678 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3679 "write collected trace data into several data files using parallel threads", 3680 record__parse_threads), 3681 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3682 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3683 "BPF filter action"), 3684 OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", 3685 "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", 3686 record__parse_off_cpu_thresh), 3687 OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap, 3688 &record.opts.record_data_mmap_set, 3689 "Record mmap events for non-executable mappings"), 3690 OPT_END() 3691 }; 3692 3693 struct option *record_options = __record_options; 3694 3695 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3696 { 3697 struct perf_cpu cpu; 3698 int idx; 3699 3700 if (cpu_map__is_dummy(cpus)) 3701 return 0; 3702 3703 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3704 /* Return ENODEV is input cpu is greater than max cpu */ 3705 if ((unsigned long)cpu.cpu > mask->nbits) 3706 return -ENODEV; 3707 __set_bit(cpu.cpu, mask->bits); 3708 } 3709 3710 return 0; 3711 } 3712 3713 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3714 { 3715 struct perf_cpu_map *cpus; 3716 3717 cpus = perf_cpu_map__new(mask_spec); 3718 if (!cpus) 3719 return -ENOMEM; 3720 3721 bitmap_zero(mask->bits, mask->nbits); 3722 if (record__mmap_cpu_mask_init(mask, cpus)) 3723 return -ENODEV; 3724 3725 perf_cpu_map__put(cpus); 3726 3727 return 0; 3728 } 3729 3730 static void record__free_thread_masks(struct record *rec, int nr_threads) 3731 { 3732 int t; 3733 3734 if (rec->thread_masks) 3735 for (t = 0; t < nr_threads; t++) 3736 record__thread_mask_free(&rec->thread_masks[t]); 3737 3738 zfree(&rec->thread_masks); 3739 } 3740 3741 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3742 { 3743 int t, ret; 3744 3745 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3746 if (!rec->thread_masks) { 3747 pr_err("Failed to allocate thread masks\n"); 3748 return -ENOMEM; 3749 } 3750 3751 for (t = 0; t < nr_threads; t++) { 3752 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3753 if (ret) { 3754 pr_err("Failed to allocate thread masks[%d]\n", t); 3755 goto out_free; 3756 } 3757 } 3758 3759 return 0; 3760 3761 out_free: 3762 record__free_thread_masks(rec, nr_threads); 3763 3764 return ret; 3765 } 3766 3767 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3768 { 3769 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3770 3771 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3772 if (ret) 3773 return ret; 3774 3775 rec->nr_threads = nr_cpus; 3776 pr_debug("nr_threads: %d\n", rec->nr_threads); 3777 3778 for (t = 0; t < rec->nr_threads; t++) { 3779 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3780 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3781 if (verbose > 0) { 3782 pr_debug("thread_masks[%d]: ", t); 3783 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3784 pr_debug("thread_masks[%d]: ", t); 3785 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3786 } 3787 } 3788 3789 return 0; 3790 } 3791 3792 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3793 const char **maps_spec, const char **affinity_spec, 3794 u32 nr_spec) 3795 { 3796 u32 s; 3797 int ret = 0, t = 0; 3798 struct mmap_cpu_mask cpus_mask; 3799 struct thread_mask thread_mask, full_mask, *thread_masks; 3800 3801 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3802 if (ret) { 3803 pr_err("Failed to allocate CPUs mask\n"); 3804 return ret; 3805 } 3806 3807 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3808 if (ret) { 3809 pr_err("Failed to init cpu mask\n"); 3810 goto out_free_cpu_mask; 3811 } 3812 3813 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3814 if (ret) { 3815 pr_err("Failed to allocate full mask\n"); 3816 goto out_free_cpu_mask; 3817 } 3818 3819 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3820 if (ret) { 3821 pr_err("Failed to allocate thread mask\n"); 3822 goto out_free_full_and_cpu_masks; 3823 } 3824 3825 for (s = 0; s < nr_spec; s++) { 3826 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3827 if (ret) { 3828 pr_err("Failed to initialize maps thread mask\n"); 3829 goto out_free; 3830 } 3831 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3832 if (ret) { 3833 pr_err("Failed to initialize affinity thread mask\n"); 3834 goto out_free; 3835 } 3836 3837 /* ignore invalid CPUs but do not allow empty masks */ 3838 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3839 cpus_mask.bits, thread_mask.maps.nbits)) { 3840 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3841 ret = -EINVAL; 3842 goto out_free; 3843 } 3844 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3845 cpus_mask.bits, thread_mask.affinity.nbits)) { 3846 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3847 ret = -EINVAL; 3848 goto out_free; 3849 } 3850 3851 /* do not allow intersection with other masks (full_mask) */ 3852 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3853 thread_mask.maps.nbits)) { 3854 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3855 ret = -EINVAL; 3856 goto out_free; 3857 } 3858 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3859 thread_mask.affinity.nbits)) { 3860 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3861 ret = -EINVAL; 3862 goto out_free; 3863 } 3864 3865 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3866 thread_mask.maps.bits, full_mask.maps.nbits); 3867 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3868 thread_mask.affinity.bits, full_mask.maps.nbits); 3869 3870 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3871 if (!thread_masks) { 3872 pr_err("Failed to reallocate thread masks\n"); 3873 ret = -ENOMEM; 3874 goto out_free; 3875 } 3876 rec->thread_masks = thread_masks; 3877 rec->thread_masks[t] = thread_mask; 3878 if (verbose > 0) { 3879 pr_debug("thread_masks[%d]: ", t); 3880 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3881 pr_debug("thread_masks[%d]: ", t); 3882 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3883 } 3884 t++; 3885 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3886 if (ret) { 3887 pr_err("Failed to allocate thread mask\n"); 3888 goto out_free_full_and_cpu_masks; 3889 } 3890 } 3891 rec->nr_threads = t; 3892 pr_debug("nr_threads: %d\n", rec->nr_threads); 3893 if (!rec->nr_threads) 3894 ret = -EINVAL; 3895 3896 out_free: 3897 record__thread_mask_free(&thread_mask); 3898 out_free_full_and_cpu_masks: 3899 record__thread_mask_free(&full_mask); 3900 out_free_cpu_mask: 3901 record__mmap_cpu_mask_free(&cpus_mask); 3902 3903 return ret; 3904 } 3905 3906 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3907 { 3908 int ret; 3909 struct cpu_topology *topo; 3910 3911 topo = cpu_topology__new(); 3912 if (!topo) { 3913 pr_err("Failed to allocate CPU topology\n"); 3914 return -ENOMEM; 3915 } 3916 3917 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3918 topo->core_cpus_list, topo->core_cpus_lists); 3919 cpu_topology__delete(topo); 3920 3921 return ret; 3922 } 3923 3924 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3925 { 3926 int ret; 3927 struct cpu_topology *topo; 3928 3929 topo = cpu_topology__new(); 3930 if (!topo) { 3931 pr_err("Failed to allocate CPU topology\n"); 3932 return -ENOMEM; 3933 } 3934 3935 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3936 topo->package_cpus_list, topo->package_cpus_lists); 3937 cpu_topology__delete(topo); 3938 3939 return ret; 3940 } 3941 3942 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3943 { 3944 u32 s; 3945 int ret; 3946 const char **spec; 3947 struct numa_topology *topo; 3948 3949 topo = numa_topology__new(); 3950 if (!topo) { 3951 pr_err("Failed to allocate NUMA topology\n"); 3952 return -ENOMEM; 3953 } 3954 3955 spec = zalloc(topo->nr * sizeof(char *)); 3956 if (!spec) { 3957 pr_err("Failed to allocate NUMA spec\n"); 3958 ret = -ENOMEM; 3959 goto out_delete_topo; 3960 } 3961 for (s = 0; s < topo->nr; s++) 3962 spec[s] = topo->nodes[s].cpus; 3963 3964 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3965 3966 zfree(&spec); 3967 3968 out_delete_topo: 3969 numa_topology__delete(topo); 3970 3971 return ret; 3972 } 3973 3974 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3975 { 3976 int t, ret; 3977 u32 s, nr_spec = 0; 3978 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3979 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3980 3981 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3982 spec = strtok_r(user_spec, ":", &spec_ptr); 3983 if (spec == NULL) 3984 break; 3985 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3986 mask = strtok_r(spec, "/", &mask_ptr); 3987 if (mask == NULL) 3988 break; 3989 pr_debug2(" maps mask: %s\n", mask); 3990 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3991 if (!tmp_spec) { 3992 pr_err("Failed to reallocate maps spec\n"); 3993 ret = -ENOMEM; 3994 goto out_free; 3995 } 3996 maps_spec = tmp_spec; 3997 maps_spec[nr_spec] = dup_mask = strdup(mask); 3998 if (!maps_spec[nr_spec]) { 3999 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 4000 ret = -ENOMEM; 4001 goto out_free; 4002 } 4003 mask = strtok_r(NULL, "/", &mask_ptr); 4004 if (mask == NULL) { 4005 pr_err("Invalid thread maps or affinity specs\n"); 4006 ret = -EINVAL; 4007 goto out_free; 4008 } 4009 pr_debug2(" affinity mask: %s\n", mask); 4010 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 4011 if (!tmp_spec) { 4012 pr_err("Failed to reallocate affinity spec\n"); 4013 ret = -ENOMEM; 4014 goto out_free; 4015 } 4016 affinity_spec = tmp_spec; 4017 affinity_spec[nr_spec] = strdup(mask); 4018 if (!affinity_spec[nr_spec]) { 4019 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 4020 ret = -ENOMEM; 4021 goto out_free; 4022 } 4023 dup_mask = NULL; 4024 nr_spec++; 4025 } 4026 4027 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 4028 (const char **)affinity_spec, nr_spec); 4029 4030 out_free: 4031 free(dup_mask); 4032 for (s = 0; s < nr_spec; s++) { 4033 if (maps_spec) 4034 free(maps_spec[s]); 4035 if (affinity_spec) 4036 free(affinity_spec[s]); 4037 } 4038 free(affinity_spec); 4039 free(maps_spec); 4040 4041 return ret; 4042 } 4043 4044 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 4045 { 4046 int ret; 4047 4048 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 4049 if (ret) 4050 return ret; 4051 4052 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 4053 return -ENODEV; 4054 4055 rec->nr_threads = 1; 4056 4057 return 0; 4058 } 4059 4060 static int record__init_thread_masks(struct record *rec) 4061 { 4062 int ret = 0; 4063 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 4064 4065 if (!record__threads_enabled(rec)) 4066 return record__init_thread_default_masks(rec, cpus); 4067 4068 if (evlist__per_thread(rec->evlist)) { 4069 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 4070 return -EINVAL; 4071 } 4072 4073 switch (rec->opts.threads_spec) { 4074 case THREAD_SPEC__CPU: 4075 ret = record__init_thread_cpu_masks(rec, cpus); 4076 break; 4077 case THREAD_SPEC__CORE: 4078 ret = record__init_thread_core_masks(rec, cpus); 4079 break; 4080 case THREAD_SPEC__PACKAGE: 4081 ret = record__init_thread_package_masks(rec, cpus); 4082 break; 4083 case THREAD_SPEC__NUMA: 4084 ret = record__init_thread_numa_masks(rec, cpus); 4085 break; 4086 case THREAD_SPEC__USER: 4087 ret = record__init_thread_user_masks(rec, cpus); 4088 break; 4089 default: 4090 break; 4091 } 4092 4093 return ret; 4094 } 4095 4096 int cmd_record(int argc, const char **argv) 4097 { 4098 int err; 4099 struct record *rec = &record; 4100 char errbuf[BUFSIZ]; 4101 4102 setlocale(LC_ALL, ""); 4103 4104 #ifndef HAVE_BPF_SKEL 4105 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 4106 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 4107 # undef set_nobuild 4108 #endif 4109 4110 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 4111 symbol_conf.lazy_load_kernel_maps = true; 4112 rec->opts.affinity = PERF_AFFINITY_SYS; 4113 4114 rec->evlist = evlist__new(); 4115 if (rec->evlist == NULL) 4116 return -ENOMEM; 4117 4118 err = perf_config(perf_record_config, rec); 4119 if (err) 4120 return err; 4121 4122 argc = parse_options(argc, argv, record_options, record_usage, 4123 PARSE_OPT_STOP_AT_NON_OPTION); 4124 if (quiet) 4125 perf_quiet_option(); 4126 4127 err = symbol__validate_sym_arguments(); 4128 if (err) 4129 return err; 4130 4131 perf_debuginfod_setup(&record.debuginfod); 4132 4133 /* Make system wide (-a) the default target. */ 4134 if (!argc && target__none(&rec->opts.target)) 4135 rec->opts.target.system_wide = true; 4136 4137 if (nr_cgroups && !rec->opts.target.system_wide) { 4138 usage_with_options_msg(record_usage, record_options, 4139 "cgroup monitoring only available in system-wide mode"); 4140 4141 } 4142 4143 if (record.latency) { 4144 /* 4145 * There is no fundamental reason why latency profiling 4146 * can't work for system-wide mode, but exact semantics 4147 * and details are to be defined. 4148 * See the following thread for details: 4149 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4150 */ 4151 if (record.opts.target.system_wide) { 4152 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4153 err = -EINVAL; 4154 goto out_opts; 4155 } 4156 record.opts.record_switch_events = true; 4157 } 4158 4159 if (rec->buildid_mmap && !perf_can_record_build_id()) { 4160 pr_warning("Missing support for build id in kernel mmap events.\n" 4161 "Disable this warning with --no-buildid-mmap\n"); 4162 rec->buildid_mmap = false; 4163 } 4164 4165 if (rec->buildid_mmap) { 4166 /* Enable perf_event_attr::build_id bit. */ 4167 rec->opts.build_id = true; 4168 /* Disable build-ID table in the header. */ 4169 rec->no_buildid = true; 4170 } else { 4171 pr_debug("Disabling build id in synthesized mmap2 events.\n"); 4172 symbol_conf.no_buildid_mmap2 = true; 4173 } 4174 4175 if (rec->no_buildid_set && rec->no_buildid) { 4176 /* -B implies -N for historic reasons. */ 4177 rec->no_buildid_cache = true; 4178 } 4179 4180 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4181 pr_err("Kernel has no cgroup sampling support.\n"); 4182 err = -EINVAL; 4183 goto out_opts; 4184 } 4185 4186 if (rec->opts.kcore) 4187 rec->opts.text_poke = true; 4188 4189 if (rec->opts.kcore || record__threads_enabled(rec)) 4190 rec->data.is_dir = true; 4191 4192 if (record__threads_enabled(rec)) { 4193 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4194 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4195 goto out_opts; 4196 } 4197 if (record__aio_enabled(rec)) { 4198 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4199 goto out_opts; 4200 } 4201 } 4202 4203 if (rec->opts.comp_level != 0) { 4204 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4205 rec->no_buildid = true; 4206 } 4207 4208 if (rec->opts.record_switch_events && 4209 !perf_can_record_switch_events()) { 4210 ui__error("kernel does not support recording context switch events\n"); 4211 parse_options_usage(record_usage, record_options, "switch-events", 0); 4212 err = -EINVAL; 4213 goto out_opts; 4214 } 4215 4216 if (switch_output_setup(rec)) { 4217 parse_options_usage(record_usage, record_options, "switch-output", 0); 4218 err = -EINVAL; 4219 goto out_opts; 4220 } 4221 4222 if (rec->switch_output.time) { 4223 signal(SIGALRM, alarm_sig_handler); 4224 alarm(rec->switch_output.time); 4225 } 4226 4227 if (rec->switch_output.num_files) { 4228 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4229 sizeof(char *)); 4230 if (!rec->switch_output.filenames) { 4231 err = -EINVAL; 4232 goto out_opts; 4233 } 4234 } 4235 4236 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4237 rec->timestamp_filename = false; 4238 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4239 } 4240 4241 if (rec->filter_action) { 4242 if (!strcmp(rec->filter_action, "pin")) 4243 err = perf_bpf_filter__pin(); 4244 else if (!strcmp(rec->filter_action, "unpin")) 4245 err = perf_bpf_filter__unpin(); 4246 else { 4247 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4248 err = -EINVAL; 4249 } 4250 goto out_opts; 4251 } 4252 4253 /* For backward compatibility, -d implies --mem-info and --data-mmap */ 4254 if (rec->opts.sample_address) { 4255 rec->opts.sample_data_src = true; 4256 if (!rec->opts.record_data_mmap_set) 4257 rec->opts.record_data_mmap = true; 4258 } 4259 4260 /* 4261 * Allow aliases to facilitate the lookup of symbols for address 4262 * filters. Refer to auxtrace_parse_filters(). 4263 */ 4264 symbol_conf.allow_aliases = true; 4265 4266 symbol__init(NULL); 4267 4268 err = record__auxtrace_init(rec); 4269 if (err) 4270 goto out; 4271 4272 if (dry_run) 4273 goto out; 4274 4275 err = -ENOMEM; 4276 4277 if (rec->no_buildid_cache) { 4278 disable_buildid_cache(); 4279 } else if (rec->switch_output.enabled) { 4280 /* 4281 * In 'perf record --switch-output', disable buildid 4282 * generation by default to reduce data file switching 4283 * overhead. Still generate buildid if they are required 4284 * explicitly using 4285 * 4286 * perf record --switch-output --no-no-buildid \ 4287 * --no-no-buildid-cache 4288 * 4289 * Following code equals to: 4290 * 4291 * if ((rec->no_buildid || !rec->no_buildid_set) && 4292 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4293 * disable_buildid_cache(); 4294 */ 4295 bool disable = true; 4296 4297 if (rec->no_buildid_set && !rec->no_buildid) 4298 disable = false; 4299 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4300 disable = false; 4301 if (disable) { 4302 rec->no_buildid = true; 4303 rec->no_buildid_cache = true; 4304 disable_buildid_cache(); 4305 } 4306 } 4307 4308 if (record.opts.overwrite) 4309 record.opts.tail_synthesize = true; 4310 4311 if (rec->evlist->core.nr_entries == 0) { 4312 struct evlist *def_evlist = evlist__new_default(); 4313 4314 if (!def_evlist) 4315 goto out; 4316 4317 evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries); 4318 evlist__delete(def_evlist); 4319 } 4320 4321 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4322 rec->opts.no_inherit = true; 4323 4324 err = target__validate(&rec->opts.target); 4325 if (err) { 4326 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4327 ui__warning("%s\n", errbuf); 4328 } 4329 4330 if (rec->uid_str) { 4331 uid_t uid = parse_uid(rec->uid_str); 4332 4333 if (uid == UINT_MAX) { 4334 ui__error("Invalid User: %s", rec->uid_str); 4335 err = -EINVAL; 4336 goto out; 4337 } 4338 err = parse_uid_filter(rec->evlist, uid); 4339 if (err) 4340 goto out; 4341 4342 /* User ID filtering implies system wide. */ 4343 rec->opts.target.system_wide = true; 4344 } 4345 4346 /* Enable ignoring missing threads when -p option is defined. */ 4347 rec->opts.ignore_missing_thread = rec->opts.target.pid; 4348 4349 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4350 4351 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4352 arch__add_leaf_frame_record_opts(&rec->opts); 4353 4354 err = -ENOMEM; 4355 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4356 if (rec->opts.target.pid != NULL) { 4357 pr_err("Couldn't create thread/CPU maps: %s\n", 4358 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4359 goto out; 4360 } 4361 else 4362 usage_with_options(record_usage, record_options); 4363 } 4364 4365 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4366 if (err) 4367 goto out; 4368 4369 /* 4370 * We take all buildids when the file contains 4371 * AUX area tracing data because we do not decode the 4372 * trace because it would take too long. 4373 */ 4374 if (rec->opts.full_auxtrace) 4375 rec->buildid_all = true; 4376 4377 if (rec->opts.text_poke) { 4378 err = record__config_text_poke(rec->evlist); 4379 if (err) { 4380 pr_err("record__config_text_poke failed, error %d\n", err); 4381 goto out; 4382 } 4383 } 4384 4385 if (rec->off_cpu) { 4386 err = record__config_off_cpu(rec); 4387 if (err) { 4388 pr_err("record__config_off_cpu failed, error %d\n", err); 4389 goto out; 4390 } 4391 } 4392 4393 if (record_opts__config(&rec->opts)) { 4394 err = -EINVAL; 4395 goto out; 4396 } 4397 4398 err = record__config_tracking_events(rec); 4399 if (err) { 4400 pr_err("record__config_tracking_events failed, error %d\n", err); 4401 goto out; 4402 } 4403 4404 err = record__init_thread_masks(rec); 4405 if (err) { 4406 pr_err("Failed to initialize parallel data streaming masks\n"); 4407 goto out; 4408 } 4409 4410 if (rec->opts.nr_cblocks > nr_cblocks_max) 4411 rec->opts.nr_cblocks = nr_cblocks_max; 4412 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4413 4414 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4415 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4416 4417 if (rec->opts.comp_level > comp_level_max) 4418 rec->opts.comp_level = comp_level_max; 4419 pr_debug("comp level: %d\n", rec->opts.comp_level); 4420 4421 err = __cmd_record(&record, argc, argv); 4422 out: 4423 record__free_thread_masks(rec, rec->nr_threads); 4424 rec->nr_threads = 0; 4425 symbol__exit(); 4426 auxtrace_record__free(rec->itr); 4427 out_opts: 4428 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4429 evlist__delete(rec->evlist); 4430 return err; 4431 } 4432 4433 static void snapshot_sig_handler(int sig __maybe_unused) 4434 { 4435 struct record *rec = &record; 4436 4437 hit_auxtrace_snapshot_trigger(rec); 4438 4439 if (switch_output_signal(rec)) 4440 trigger_hit(&switch_output_trigger); 4441 } 4442 4443 static void alarm_sig_handler(int sig __maybe_unused) 4444 { 4445 struct record *rec = &record; 4446 4447 if (switch_output_time(rec)) 4448 trigger_hit(&switch_output_trigger); 4449 } 4450