1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/target.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/record.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/perf_api_probe.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/clockid.h" 50 #include "util/pmu-hybrid.h" 51 #include "util/evlist-hybrid.h" 52 #include "util/off_cpu.h" 53 #include "asm/bug.h" 54 #include "perf.h" 55 #include "cputopo.h" 56 57 #include <errno.h> 58 #include <inttypes.h> 59 #include <locale.h> 60 #include <poll.h> 61 #include <pthread.h> 62 #include <unistd.h> 63 #ifndef HAVE_GETTID 64 #include <syscall.h> 65 #endif 66 #include <sched.h> 67 #include <signal.h> 68 #ifdef HAVE_EVENTFD_SUPPORT 69 #include <sys/eventfd.h> 70 #endif 71 #include <sys/mman.h> 72 #include <sys/wait.h> 73 #include <sys/types.h> 74 #include <sys/stat.h> 75 #include <fcntl.h> 76 #include <linux/err.h> 77 #include <linux/string.h> 78 #include <linux/time64.h> 79 #include <linux/zalloc.h> 80 #include <linux/bitmap.h> 81 #include <sys/time.h> 82 83 struct switch_output { 84 bool enabled; 85 bool signal; 86 unsigned long size; 87 unsigned long time; 88 const char *str; 89 bool set; 90 char **filenames; 91 int num_files; 92 int cur_file; 93 }; 94 95 struct thread_mask { 96 struct mmap_cpu_mask maps; 97 struct mmap_cpu_mask affinity; 98 }; 99 100 struct record_thread { 101 pid_t tid; 102 struct thread_mask *mask; 103 struct { 104 int msg[2]; 105 int ack[2]; 106 } pipes; 107 struct fdarray pollfd; 108 int ctlfd_pos; 109 int nr_mmaps; 110 struct mmap **maps; 111 struct mmap **overwrite_maps; 112 struct record *rec; 113 unsigned long long samples; 114 unsigned long waking; 115 u64 bytes_written; 116 u64 bytes_transferred; 117 u64 bytes_compressed; 118 }; 119 120 static __thread struct record_thread *thread; 121 122 enum thread_msg { 123 THREAD_MSG__UNDEFINED = 0, 124 THREAD_MSG__READY, 125 THREAD_MSG__MAX, 126 }; 127 128 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 129 "UNDEFINED", "READY" 130 }; 131 132 enum thread_spec { 133 THREAD_SPEC__UNDEFINED = 0, 134 THREAD_SPEC__CPU, 135 THREAD_SPEC__CORE, 136 THREAD_SPEC__PACKAGE, 137 THREAD_SPEC__NUMA, 138 THREAD_SPEC__USER, 139 THREAD_SPEC__MAX, 140 }; 141 142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 143 "undefined", "cpu", "core", "package", "numa", "user" 144 }; 145 146 struct record { 147 struct perf_tool tool; 148 struct record_opts opts; 149 u64 bytes_written; 150 struct perf_data data; 151 struct auxtrace_record *itr; 152 struct evlist *evlist; 153 struct perf_session *session; 154 struct evlist *sb_evlist; 155 pthread_t thread_id; 156 int realtime_prio; 157 bool switch_output_event_set; 158 bool no_buildid; 159 bool no_buildid_set; 160 bool no_buildid_cache; 161 bool no_buildid_cache_set; 162 bool buildid_all; 163 bool buildid_mmap; 164 bool timestamp_filename; 165 bool timestamp_boundary; 166 bool off_cpu; 167 struct switch_output switch_output; 168 unsigned long long samples; 169 unsigned long output_max_size; /* = 0: unlimited */ 170 struct perf_debuginfod debuginfod; 171 int nr_threads; 172 struct thread_mask *thread_masks; 173 struct record_thread *thread_data; 174 }; 175 176 static volatile int done; 177 178 static volatile int auxtrace_record__snapshot_started; 179 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 180 static DEFINE_TRIGGER(switch_output_trigger); 181 182 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 183 "SYS", "NODE", "CPU" 184 }; 185 186 #ifndef HAVE_GETTID 187 static inline pid_t gettid(void) 188 { 189 return (pid_t)syscall(__NR_gettid); 190 } 191 #endif 192 193 static int record__threads_enabled(struct record *rec) 194 { 195 return rec->opts.threads_spec; 196 } 197 198 static bool switch_output_signal(struct record *rec) 199 { 200 return rec->switch_output.signal && 201 trigger_is_ready(&switch_output_trigger); 202 } 203 204 static bool switch_output_size(struct record *rec) 205 { 206 return rec->switch_output.size && 207 trigger_is_ready(&switch_output_trigger) && 208 (rec->bytes_written >= rec->switch_output.size); 209 } 210 211 static bool switch_output_time(struct record *rec) 212 { 213 return rec->switch_output.time && 214 trigger_is_ready(&switch_output_trigger); 215 } 216 217 static u64 record__bytes_written(struct record *rec) 218 { 219 int t; 220 u64 bytes_written = rec->bytes_written; 221 struct record_thread *thread_data = rec->thread_data; 222 223 for (t = 0; t < rec->nr_threads; t++) 224 bytes_written += thread_data[t].bytes_written; 225 226 return bytes_written; 227 } 228 229 static bool record__output_max_size_exceeded(struct record *rec) 230 { 231 return rec->output_max_size && 232 (record__bytes_written(rec) >= rec->output_max_size); 233 } 234 235 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 236 void *bf, size_t size) 237 { 238 struct perf_data_file *file = &rec->session->data->file; 239 240 if (map && map->file) 241 file = map->file; 242 243 if (perf_data_file__write(file, bf, size) < 0) { 244 pr_err("failed to write perf data, error: %m\n"); 245 return -1; 246 } 247 248 if (map && map->file) 249 thread->bytes_written += size; 250 else 251 rec->bytes_written += size; 252 253 if (record__output_max_size_exceeded(rec) && !done) { 254 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 255 " stopping session ]\n", 256 record__bytes_written(rec) >> 10); 257 done = 1; 258 } 259 260 if (switch_output_size(rec)) 261 trigger_hit(&switch_output_trigger); 262 263 return 0; 264 } 265 266 static int record__aio_enabled(struct record *rec); 267 static int record__comp_enabled(struct record *rec); 268 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 269 void *dst, size_t dst_size, void *src, size_t src_size); 270 271 #ifdef HAVE_AIO_SUPPORT 272 static int record__aio_write(struct aiocb *cblock, int trace_fd, 273 void *buf, size_t size, off_t off) 274 { 275 int rc; 276 277 cblock->aio_fildes = trace_fd; 278 cblock->aio_buf = buf; 279 cblock->aio_nbytes = size; 280 cblock->aio_offset = off; 281 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 282 283 do { 284 rc = aio_write(cblock); 285 if (rc == 0) { 286 break; 287 } else if (errno != EAGAIN) { 288 cblock->aio_fildes = -1; 289 pr_err("failed to queue perf data, error: %m\n"); 290 break; 291 } 292 } while (1); 293 294 return rc; 295 } 296 297 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 298 { 299 void *rem_buf; 300 off_t rem_off; 301 size_t rem_size; 302 int rc, aio_errno; 303 ssize_t aio_ret, written; 304 305 aio_errno = aio_error(cblock); 306 if (aio_errno == EINPROGRESS) 307 return 0; 308 309 written = aio_ret = aio_return(cblock); 310 if (aio_ret < 0) { 311 if (aio_errno != EINTR) 312 pr_err("failed to write perf data, error: %m\n"); 313 written = 0; 314 } 315 316 rem_size = cblock->aio_nbytes - written; 317 318 if (rem_size == 0) { 319 cblock->aio_fildes = -1; 320 /* 321 * md->refcount is incremented in record__aio_pushfn() for 322 * every aio write request started in record__aio_push() so 323 * decrement it because the request is now complete. 324 */ 325 perf_mmap__put(&md->core); 326 rc = 1; 327 } else { 328 /* 329 * aio write request may require restart with the 330 * reminder if the kernel didn't write whole 331 * chunk at once. 332 */ 333 rem_off = cblock->aio_offset + written; 334 rem_buf = (void *)(cblock->aio_buf + written); 335 record__aio_write(cblock, cblock->aio_fildes, 336 rem_buf, rem_size, rem_off); 337 rc = 0; 338 } 339 340 return rc; 341 } 342 343 static int record__aio_sync(struct mmap *md, bool sync_all) 344 { 345 struct aiocb **aiocb = md->aio.aiocb; 346 struct aiocb *cblocks = md->aio.cblocks; 347 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 348 int i, do_suspend; 349 350 do { 351 do_suspend = 0; 352 for (i = 0; i < md->aio.nr_cblocks; ++i) { 353 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 354 if (sync_all) 355 aiocb[i] = NULL; 356 else 357 return i; 358 } else { 359 /* 360 * Started aio write is not complete yet 361 * so it has to be waited before the 362 * next allocation. 363 */ 364 aiocb[i] = &cblocks[i]; 365 do_suspend = 1; 366 } 367 } 368 if (!do_suspend) 369 return -1; 370 371 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 372 if (!(errno == EAGAIN || errno == EINTR)) 373 pr_err("failed to sync perf data, error: %m\n"); 374 } 375 } while (1); 376 } 377 378 struct record_aio { 379 struct record *rec; 380 void *data; 381 size_t size; 382 }; 383 384 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 385 { 386 struct record_aio *aio = to; 387 388 /* 389 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 390 * to release space in the kernel buffer as fast as possible, calling 391 * perf_mmap__consume() from perf_mmap__push() function. 392 * 393 * That lets the kernel to proceed with storing more profiling data into 394 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 395 * 396 * Coping can be done in two steps in case the chunk of profiling data 397 * crosses the upper bound of the kernel buffer. In this case we first move 398 * part of data from map->start till the upper bound and then the reminder 399 * from the beginning of the kernel buffer till the end of the data chunk. 400 */ 401 402 if (record__comp_enabled(aio->rec)) { 403 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 404 mmap__mmap_len(map) - aio->size, 405 buf, size); 406 } else { 407 memcpy(aio->data + aio->size, buf, size); 408 } 409 410 if (!aio->size) { 411 /* 412 * Increment map->refcount to guard map->aio.data[] buffer 413 * from premature deallocation because map object can be 414 * released earlier than aio write request started on 415 * map->aio.data[] buffer is complete. 416 * 417 * perf_mmap__put() is done at record__aio_complete() 418 * after started aio request completion or at record__aio_push() 419 * if the request failed to start. 420 */ 421 perf_mmap__get(&map->core); 422 } 423 424 aio->size += size; 425 426 return size; 427 } 428 429 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 430 { 431 int ret, idx; 432 int trace_fd = rec->session->data->file.fd; 433 struct record_aio aio = { .rec = rec, .size = 0 }; 434 435 /* 436 * Call record__aio_sync() to wait till map->aio.data[] buffer 437 * becomes available after previous aio write operation. 438 */ 439 440 idx = record__aio_sync(map, false); 441 aio.data = map->aio.data[idx]; 442 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 443 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 444 return ret; 445 446 rec->samples++; 447 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 448 if (!ret) { 449 *off += aio.size; 450 rec->bytes_written += aio.size; 451 if (switch_output_size(rec)) 452 trigger_hit(&switch_output_trigger); 453 } else { 454 /* 455 * Decrement map->refcount incremented in record__aio_pushfn() 456 * back if record__aio_write() operation failed to start, otherwise 457 * map->refcount is decremented in record__aio_complete() after 458 * aio write operation finishes successfully. 459 */ 460 perf_mmap__put(&map->core); 461 } 462 463 return ret; 464 } 465 466 static off_t record__aio_get_pos(int trace_fd) 467 { 468 return lseek(trace_fd, 0, SEEK_CUR); 469 } 470 471 static void record__aio_set_pos(int trace_fd, off_t pos) 472 { 473 lseek(trace_fd, pos, SEEK_SET); 474 } 475 476 static void record__aio_mmap_read_sync(struct record *rec) 477 { 478 int i; 479 struct evlist *evlist = rec->evlist; 480 struct mmap *maps = evlist->mmap; 481 482 if (!record__aio_enabled(rec)) 483 return; 484 485 for (i = 0; i < evlist->core.nr_mmaps; i++) { 486 struct mmap *map = &maps[i]; 487 488 if (map->core.base) 489 record__aio_sync(map, true); 490 } 491 } 492 493 static int nr_cblocks_default = 1; 494 static int nr_cblocks_max = 4; 495 496 static int record__aio_parse(const struct option *opt, 497 const char *str, 498 int unset) 499 { 500 struct record_opts *opts = (struct record_opts *)opt->value; 501 502 if (unset) { 503 opts->nr_cblocks = 0; 504 } else { 505 if (str) 506 opts->nr_cblocks = strtol(str, NULL, 0); 507 if (!opts->nr_cblocks) 508 opts->nr_cblocks = nr_cblocks_default; 509 } 510 511 return 0; 512 } 513 #else /* HAVE_AIO_SUPPORT */ 514 static int nr_cblocks_max = 0; 515 516 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 517 off_t *off __maybe_unused) 518 { 519 return -1; 520 } 521 522 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 523 { 524 return -1; 525 } 526 527 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 528 { 529 } 530 531 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 532 { 533 } 534 #endif 535 536 static int record__aio_enabled(struct record *rec) 537 { 538 return rec->opts.nr_cblocks > 0; 539 } 540 541 #define MMAP_FLUSH_DEFAULT 1 542 static int record__mmap_flush_parse(const struct option *opt, 543 const char *str, 544 int unset) 545 { 546 int flush_max; 547 struct record_opts *opts = (struct record_opts *)opt->value; 548 static struct parse_tag tags[] = { 549 { .tag = 'B', .mult = 1 }, 550 { .tag = 'K', .mult = 1 << 10 }, 551 { .tag = 'M', .mult = 1 << 20 }, 552 { .tag = 'G', .mult = 1 << 30 }, 553 { .tag = 0 }, 554 }; 555 556 if (unset) 557 return 0; 558 559 if (str) { 560 opts->mmap_flush = parse_tag_value(str, tags); 561 if (opts->mmap_flush == (int)-1) 562 opts->mmap_flush = strtol(str, NULL, 0); 563 } 564 565 if (!opts->mmap_flush) 566 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 567 568 flush_max = evlist__mmap_size(opts->mmap_pages); 569 flush_max /= 4; 570 if (opts->mmap_flush > flush_max) 571 opts->mmap_flush = flush_max; 572 573 return 0; 574 } 575 576 #ifdef HAVE_ZSTD_SUPPORT 577 static unsigned int comp_level_default = 1; 578 579 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 580 { 581 struct record_opts *opts = opt->value; 582 583 if (unset) { 584 opts->comp_level = 0; 585 } else { 586 if (str) 587 opts->comp_level = strtol(str, NULL, 0); 588 if (!opts->comp_level) 589 opts->comp_level = comp_level_default; 590 } 591 592 return 0; 593 } 594 #endif 595 static unsigned int comp_level_max = 22; 596 597 static int record__comp_enabled(struct record *rec) 598 { 599 return rec->opts.comp_level > 0; 600 } 601 602 static int process_synthesized_event(struct perf_tool *tool, 603 union perf_event *event, 604 struct perf_sample *sample __maybe_unused, 605 struct machine *machine __maybe_unused) 606 { 607 struct record *rec = container_of(tool, struct record, tool); 608 return record__write(rec, NULL, event, event->header.size); 609 } 610 611 static int process_locked_synthesized_event(struct perf_tool *tool, 612 union perf_event *event, 613 struct perf_sample *sample __maybe_unused, 614 struct machine *machine __maybe_unused) 615 { 616 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; 617 int ret; 618 619 pthread_mutex_lock(&synth_lock); 620 ret = process_synthesized_event(tool, event, sample, machine); 621 pthread_mutex_unlock(&synth_lock); 622 return ret; 623 } 624 625 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 626 { 627 struct record *rec = to; 628 629 if (record__comp_enabled(rec)) { 630 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 631 bf = map->data; 632 } 633 634 thread->samples++; 635 return record__write(rec, map, bf, size); 636 } 637 638 static volatile int signr = -1; 639 static volatile int child_finished; 640 #ifdef HAVE_EVENTFD_SUPPORT 641 static int done_fd = -1; 642 #endif 643 644 static void sig_handler(int sig) 645 { 646 if (sig == SIGCHLD) 647 child_finished = 1; 648 else 649 signr = sig; 650 651 done = 1; 652 #ifdef HAVE_EVENTFD_SUPPORT 653 { 654 u64 tmp = 1; 655 /* 656 * It is possible for this signal handler to run after done is checked 657 * in the main loop, but before the perf counter fds are polled. If this 658 * happens, the poll() will continue to wait even though done is set, 659 * and will only break out if either another signal is received, or the 660 * counters are ready for read. To ensure the poll() doesn't sleep when 661 * done is set, use an eventfd (done_fd) to wake up the poll(). 662 */ 663 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 664 pr_err("failed to signal wakeup fd, error: %m\n"); 665 } 666 #endif // HAVE_EVENTFD_SUPPORT 667 } 668 669 static void sigsegv_handler(int sig) 670 { 671 perf_hooks__recover(); 672 sighandler_dump_stack(sig); 673 } 674 675 static void record__sig_exit(void) 676 { 677 if (signr == -1) 678 return; 679 680 signal(signr, SIG_DFL); 681 raise(signr); 682 } 683 684 #ifdef HAVE_AUXTRACE_SUPPORT 685 686 static int record__process_auxtrace(struct perf_tool *tool, 687 struct mmap *map, 688 union perf_event *event, void *data1, 689 size_t len1, void *data2, size_t len2) 690 { 691 struct record *rec = container_of(tool, struct record, tool); 692 struct perf_data *data = &rec->data; 693 size_t padding; 694 u8 pad[8] = {0}; 695 696 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 697 off_t file_offset; 698 int fd = perf_data__fd(data); 699 int err; 700 701 file_offset = lseek(fd, 0, SEEK_CUR); 702 if (file_offset == -1) 703 return -1; 704 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 705 event, file_offset); 706 if (err) 707 return err; 708 } 709 710 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 711 padding = (len1 + len2) & 7; 712 if (padding) 713 padding = 8 - padding; 714 715 record__write(rec, map, event, event->header.size); 716 record__write(rec, map, data1, len1); 717 if (len2) 718 record__write(rec, map, data2, len2); 719 record__write(rec, map, &pad, padding); 720 721 return 0; 722 } 723 724 static int record__auxtrace_mmap_read(struct record *rec, 725 struct mmap *map) 726 { 727 int ret; 728 729 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 730 record__process_auxtrace); 731 if (ret < 0) 732 return ret; 733 734 if (ret) 735 rec->samples++; 736 737 return 0; 738 } 739 740 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 741 struct mmap *map) 742 { 743 int ret; 744 745 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 746 record__process_auxtrace, 747 rec->opts.auxtrace_snapshot_size); 748 if (ret < 0) 749 return ret; 750 751 if (ret) 752 rec->samples++; 753 754 return 0; 755 } 756 757 static int record__auxtrace_read_snapshot_all(struct record *rec) 758 { 759 int i; 760 int rc = 0; 761 762 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 763 struct mmap *map = &rec->evlist->mmap[i]; 764 765 if (!map->auxtrace_mmap.base) 766 continue; 767 768 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 769 rc = -1; 770 goto out; 771 } 772 } 773 out: 774 return rc; 775 } 776 777 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 778 { 779 pr_debug("Recording AUX area tracing snapshot\n"); 780 if (record__auxtrace_read_snapshot_all(rec) < 0) { 781 trigger_error(&auxtrace_snapshot_trigger); 782 } else { 783 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 784 trigger_error(&auxtrace_snapshot_trigger); 785 else 786 trigger_ready(&auxtrace_snapshot_trigger); 787 } 788 } 789 790 static int record__auxtrace_snapshot_exit(struct record *rec) 791 { 792 if (trigger_is_error(&auxtrace_snapshot_trigger)) 793 return 0; 794 795 if (!auxtrace_record__snapshot_started && 796 auxtrace_record__snapshot_start(rec->itr)) 797 return -1; 798 799 record__read_auxtrace_snapshot(rec, true); 800 if (trigger_is_error(&auxtrace_snapshot_trigger)) 801 return -1; 802 803 return 0; 804 } 805 806 static int record__auxtrace_init(struct record *rec) 807 { 808 int err; 809 810 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 811 && record__threads_enabled(rec)) { 812 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 813 return -EINVAL; 814 } 815 816 if (!rec->itr) { 817 rec->itr = auxtrace_record__init(rec->evlist, &err); 818 if (err) 819 return err; 820 } 821 822 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 823 rec->opts.auxtrace_snapshot_opts); 824 if (err) 825 return err; 826 827 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 828 rec->opts.auxtrace_sample_opts); 829 if (err) 830 return err; 831 832 auxtrace_regroup_aux_output(rec->evlist); 833 834 return auxtrace_parse_filters(rec->evlist); 835 } 836 837 #else 838 839 static inline 840 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 841 struct mmap *map __maybe_unused) 842 { 843 return 0; 844 } 845 846 static inline 847 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 848 bool on_exit __maybe_unused) 849 { 850 } 851 852 static inline 853 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 854 { 855 return 0; 856 } 857 858 static inline 859 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 860 { 861 return 0; 862 } 863 864 static int record__auxtrace_init(struct record *rec __maybe_unused) 865 { 866 return 0; 867 } 868 869 #endif 870 871 static int record__config_text_poke(struct evlist *evlist) 872 { 873 struct evsel *evsel; 874 875 /* Nothing to do if text poke is already configured */ 876 evlist__for_each_entry(evlist, evsel) { 877 if (evsel->core.attr.text_poke) 878 return 0; 879 } 880 881 evsel = evlist__add_dummy_on_all_cpus(evlist); 882 if (!evsel) 883 return -ENOMEM; 884 885 evsel->core.attr.text_poke = 1; 886 evsel->core.attr.ksymbol = 1; 887 evsel->immediate = true; 888 evsel__set_sample_bit(evsel, TIME); 889 890 return 0; 891 } 892 893 static int record__config_off_cpu(struct record *rec) 894 { 895 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 896 } 897 898 static bool record__kcore_readable(struct machine *machine) 899 { 900 char kcore[PATH_MAX]; 901 int fd; 902 903 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 904 905 fd = open(kcore, O_RDONLY); 906 if (fd < 0) 907 return false; 908 909 close(fd); 910 911 return true; 912 } 913 914 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 915 { 916 char from_dir[PATH_MAX]; 917 char kcore_dir[PATH_MAX]; 918 int ret; 919 920 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 921 922 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 923 if (ret) 924 return ret; 925 926 return kcore_copy(from_dir, kcore_dir); 927 } 928 929 static void record__thread_data_init_pipes(struct record_thread *thread_data) 930 { 931 thread_data->pipes.msg[0] = -1; 932 thread_data->pipes.msg[1] = -1; 933 thread_data->pipes.ack[0] = -1; 934 thread_data->pipes.ack[1] = -1; 935 } 936 937 static int record__thread_data_open_pipes(struct record_thread *thread_data) 938 { 939 if (pipe(thread_data->pipes.msg)) 940 return -EINVAL; 941 942 if (pipe(thread_data->pipes.ack)) { 943 close(thread_data->pipes.msg[0]); 944 thread_data->pipes.msg[0] = -1; 945 close(thread_data->pipes.msg[1]); 946 thread_data->pipes.msg[1] = -1; 947 return -EINVAL; 948 } 949 950 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 951 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 952 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 953 954 return 0; 955 } 956 957 static void record__thread_data_close_pipes(struct record_thread *thread_data) 958 { 959 if (thread_data->pipes.msg[0] != -1) { 960 close(thread_data->pipes.msg[0]); 961 thread_data->pipes.msg[0] = -1; 962 } 963 if (thread_data->pipes.msg[1] != -1) { 964 close(thread_data->pipes.msg[1]); 965 thread_data->pipes.msg[1] = -1; 966 } 967 if (thread_data->pipes.ack[0] != -1) { 968 close(thread_data->pipes.ack[0]); 969 thread_data->pipes.ack[0] = -1; 970 } 971 if (thread_data->pipes.ack[1] != -1) { 972 close(thread_data->pipes.ack[1]); 973 thread_data->pipes.ack[1] = -1; 974 } 975 } 976 977 static bool evlist__per_thread(struct evlist *evlist) 978 { 979 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 980 } 981 982 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 983 { 984 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 985 struct mmap *mmap = evlist->mmap; 986 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 987 struct perf_cpu_map *cpus = evlist->core.all_cpus; 988 bool per_thread = evlist__per_thread(evlist); 989 990 if (per_thread) 991 thread_data->nr_mmaps = nr_mmaps; 992 else 993 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 994 thread_data->mask->maps.nbits); 995 if (mmap) { 996 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 997 if (!thread_data->maps) 998 return -ENOMEM; 999 } 1000 if (overwrite_mmap) { 1001 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1002 if (!thread_data->overwrite_maps) { 1003 zfree(&thread_data->maps); 1004 return -ENOMEM; 1005 } 1006 } 1007 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1008 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1009 1010 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1011 if (per_thread || 1012 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1013 if (thread_data->maps) { 1014 thread_data->maps[tm] = &mmap[m]; 1015 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1016 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1017 } 1018 if (thread_data->overwrite_maps) { 1019 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1020 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1021 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1022 } 1023 tm++; 1024 } 1025 } 1026 1027 return 0; 1028 } 1029 1030 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1031 { 1032 int f, tm, pos; 1033 struct mmap *map, *overwrite_map; 1034 1035 fdarray__init(&thread_data->pollfd, 64); 1036 1037 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1038 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1039 overwrite_map = thread_data->overwrite_maps ? 1040 thread_data->overwrite_maps[tm] : NULL; 1041 1042 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1043 void *ptr = evlist->core.pollfd.priv[f].ptr; 1044 1045 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1046 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1047 &evlist->core.pollfd); 1048 if (pos < 0) 1049 return pos; 1050 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1051 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1052 } 1053 } 1054 } 1055 1056 return 0; 1057 } 1058 1059 static void record__free_thread_data(struct record *rec) 1060 { 1061 int t; 1062 struct record_thread *thread_data = rec->thread_data; 1063 1064 if (thread_data == NULL) 1065 return; 1066 1067 for (t = 0; t < rec->nr_threads; t++) { 1068 record__thread_data_close_pipes(&thread_data[t]); 1069 zfree(&thread_data[t].maps); 1070 zfree(&thread_data[t].overwrite_maps); 1071 fdarray__exit(&thread_data[t].pollfd); 1072 } 1073 1074 zfree(&rec->thread_data); 1075 } 1076 1077 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1078 { 1079 int t, ret; 1080 struct record_thread *thread_data; 1081 1082 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1083 if (!rec->thread_data) { 1084 pr_err("Failed to allocate thread data\n"); 1085 return -ENOMEM; 1086 } 1087 thread_data = rec->thread_data; 1088 1089 for (t = 0; t < rec->nr_threads; t++) 1090 record__thread_data_init_pipes(&thread_data[t]); 1091 1092 for (t = 0; t < rec->nr_threads; t++) { 1093 thread_data[t].rec = rec; 1094 thread_data[t].mask = &rec->thread_masks[t]; 1095 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1096 if (ret) { 1097 pr_err("Failed to initialize thread[%d] maps\n", t); 1098 goto out_free; 1099 } 1100 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1101 if (ret) { 1102 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1103 goto out_free; 1104 } 1105 if (t) { 1106 thread_data[t].tid = -1; 1107 ret = record__thread_data_open_pipes(&thread_data[t]); 1108 if (ret) { 1109 pr_err("Failed to open thread[%d] communication pipes\n", t); 1110 goto out_free; 1111 } 1112 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1113 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1114 if (ret < 0) { 1115 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1116 goto out_free; 1117 } 1118 thread_data[t].ctlfd_pos = ret; 1119 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1120 thread_data, thread_data[t].ctlfd_pos, 1121 thread_data[t].pipes.msg[0]); 1122 } else { 1123 thread_data[t].tid = gettid(); 1124 if (evlist->ctl_fd.pos == -1) 1125 continue; 1126 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos, 1127 &evlist->core.pollfd); 1128 if (ret < 0) { 1129 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1130 goto out_free; 1131 } 1132 thread_data[t].ctlfd_pos = ret; 1133 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1134 thread_data, thread_data[t].ctlfd_pos, 1135 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd); 1136 } 1137 } 1138 1139 return 0; 1140 1141 out_free: 1142 record__free_thread_data(rec); 1143 1144 return ret; 1145 } 1146 1147 static int record__mmap_evlist(struct record *rec, 1148 struct evlist *evlist) 1149 { 1150 int i, ret; 1151 struct record_opts *opts = &rec->opts; 1152 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1153 opts->auxtrace_sample_mode; 1154 char msg[512]; 1155 1156 if (opts->affinity != PERF_AFFINITY_SYS) 1157 cpu__setup_cpunode_map(); 1158 1159 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1160 opts->auxtrace_mmap_pages, 1161 auxtrace_overwrite, 1162 opts->nr_cblocks, opts->affinity, 1163 opts->mmap_flush, opts->comp_level) < 0) { 1164 if (errno == EPERM) { 1165 pr_err("Permission error mapping pages.\n" 1166 "Consider increasing " 1167 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1168 "or try again with a smaller value of -m/--mmap_pages.\n" 1169 "(current value: %u,%u)\n", 1170 opts->mmap_pages, opts->auxtrace_mmap_pages); 1171 return -errno; 1172 } else { 1173 pr_err("failed to mmap with %d (%s)\n", errno, 1174 str_error_r(errno, msg, sizeof(msg))); 1175 if (errno) 1176 return -errno; 1177 else 1178 return -EINVAL; 1179 } 1180 } 1181 1182 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1183 return -1; 1184 1185 ret = record__alloc_thread_data(rec, evlist); 1186 if (ret) 1187 return ret; 1188 1189 if (record__threads_enabled(rec)) { 1190 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1191 if (ret) { 1192 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1193 return ret; 1194 } 1195 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1196 if (evlist->mmap) 1197 evlist->mmap[i].file = &rec->data.dir.files[i]; 1198 if (evlist->overwrite_mmap) 1199 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1200 } 1201 } 1202 1203 return 0; 1204 } 1205 1206 static int record__mmap(struct record *rec) 1207 { 1208 return record__mmap_evlist(rec, rec->evlist); 1209 } 1210 1211 static int record__open(struct record *rec) 1212 { 1213 char msg[BUFSIZ]; 1214 struct evsel *pos; 1215 struct evlist *evlist = rec->evlist; 1216 struct perf_session *session = rec->session; 1217 struct record_opts *opts = &rec->opts; 1218 int rc = 0; 1219 1220 /* 1221 * For initial_delay, system wide or a hybrid system, we need to add a 1222 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1223 * of waiting or event synthesis. 1224 */ 1225 if (opts->initial_delay || target__has_cpu(&opts->target) || 1226 perf_pmu__has_hybrid()) { 1227 pos = evlist__get_tracking_event(evlist); 1228 if (!evsel__is_dummy_event(pos)) { 1229 /* Set up dummy event. */ 1230 if (evlist__add_dummy(evlist)) 1231 return -ENOMEM; 1232 pos = evlist__last(evlist); 1233 evlist__set_tracking_event(evlist, pos); 1234 } 1235 1236 /* 1237 * Enable the dummy event when the process is forked for 1238 * initial_delay, immediately for system wide. 1239 */ 1240 if (opts->initial_delay && !pos->immediate && 1241 !target__has_cpu(&opts->target)) 1242 pos->core.attr.enable_on_exec = 1; 1243 else 1244 pos->immediate = 1; 1245 } 1246 1247 evlist__config(evlist, opts, &callchain_param); 1248 1249 evlist__for_each_entry(evlist, pos) { 1250 try_again: 1251 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1252 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1253 if (verbose > 0) 1254 ui__warning("%s\n", msg); 1255 goto try_again; 1256 } 1257 if ((errno == EINVAL || errno == EBADF) && 1258 pos->core.leader != &pos->core && 1259 pos->weak_group) { 1260 pos = evlist__reset_weak_group(evlist, pos, true); 1261 goto try_again; 1262 } 1263 rc = -errno; 1264 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1265 ui__error("%s\n", msg); 1266 goto out; 1267 } 1268 1269 pos->supported = true; 1270 } 1271 1272 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1273 pr_warning( 1274 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1275 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1276 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1277 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1278 "Samples in kernel modules won't be resolved at all.\n\n" 1279 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1280 "even with a suitable vmlinux or kallsyms file.\n\n"); 1281 } 1282 1283 if (evlist__apply_filters(evlist, &pos)) { 1284 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1285 pos->filter, evsel__name(pos), errno, 1286 str_error_r(errno, msg, sizeof(msg))); 1287 rc = -1; 1288 goto out; 1289 } 1290 1291 rc = record__mmap(rec); 1292 if (rc) 1293 goto out; 1294 1295 session->evlist = evlist; 1296 perf_session__set_id_hdr_size(session); 1297 out: 1298 return rc; 1299 } 1300 1301 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1302 { 1303 if (rec->evlist->first_sample_time == 0) 1304 rec->evlist->first_sample_time = sample_time; 1305 1306 if (sample_time) 1307 rec->evlist->last_sample_time = sample_time; 1308 } 1309 1310 static int process_sample_event(struct perf_tool *tool, 1311 union perf_event *event, 1312 struct perf_sample *sample, 1313 struct evsel *evsel, 1314 struct machine *machine) 1315 { 1316 struct record *rec = container_of(tool, struct record, tool); 1317 1318 set_timestamp_boundary(rec, sample->time); 1319 1320 if (rec->buildid_all) 1321 return 0; 1322 1323 rec->samples++; 1324 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1325 } 1326 1327 static int process_buildids(struct record *rec) 1328 { 1329 struct perf_session *session = rec->session; 1330 1331 if (perf_data__size(&rec->data) == 0) 1332 return 0; 1333 1334 /* 1335 * During this process, it'll load kernel map and replace the 1336 * dso->long_name to a real pathname it found. In this case 1337 * we prefer the vmlinux path like 1338 * /lib/modules/3.16.4/build/vmlinux 1339 * 1340 * rather than build-id path (in debug directory). 1341 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1342 */ 1343 symbol_conf.ignore_vmlinux_buildid = true; 1344 1345 /* 1346 * If --buildid-all is given, it marks all DSO regardless of hits, 1347 * so no need to process samples. But if timestamp_boundary is enabled, 1348 * it still needs to walk on all samples to get the timestamps of 1349 * first/last samples. 1350 */ 1351 if (rec->buildid_all && !rec->timestamp_boundary) 1352 rec->tool.sample = NULL; 1353 1354 return perf_session__process_events(session); 1355 } 1356 1357 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1358 { 1359 int err; 1360 struct perf_tool *tool = data; 1361 /* 1362 *As for guest kernel when processing subcommand record&report, 1363 *we arrange module mmap prior to guest kernel mmap and trigger 1364 *a preload dso because default guest module symbols are loaded 1365 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1366 *method is used to avoid symbol missing when the first addr is 1367 *in module instead of in guest kernel. 1368 */ 1369 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1370 machine); 1371 if (err < 0) 1372 pr_err("Couldn't record guest kernel [%d]'s reference" 1373 " relocation symbol.\n", machine->pid); 1374 1375 /* 1376 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1377 * have no _text sometimes. 1378 */ 1379 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1380 machine); 1381 if (err < 0) 1382 pr_err("Couldn't record guest kernel [%d]'s reference" 1383 " relocation symbol.\n", machine->pid); 1384 } 1385 1386 static struct perf_event_header finished_round_event = { 1387 .size = sizeof(struct perf_event_header), 1388 .type = PERF_RECORD_FINISHED_ROUND, 1389 }; 1390 1391 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1392 { 1393 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1394 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1395 thread->mask->affinity.nbits)) { 1396 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1397 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1398 map->affinity_mask.bits, thread->mask->affinity.nbits); 1399 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1400 (cpu_set_t *)thread->mask->affinity.bits); 1401 if (verbose == 2) { 1402 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1403 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1404 } 1405 } 1406 } 1407 1408 static size_t process_comp_header(void *record, size_t increment) 1409 { 1410 struct perf_record_compressed *event = record; 1411 size_t size = sizeof(*event); 1412 1413 if (increment) { 1414 event->header.size += increment; 1415 return increment; 1416 } 1417 1418 event->header.type = PERF_RECORD_COMPRESSED; 1419 event->header.size = size; 1420 1421 return size; 1422 } 1423 1424 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1425 void *dst, size_t dst_size, void *src, size_t src_size) 1426 { 1427 size_t compressed; 1428 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1429 struct zstd_data *zstd_data = &session->zstd_data; 1430 1431 if (map && map->file) 1432 zstd_data = &map->zstd_data; 1433 1434 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1435 max_record_size, process_comp_header); 1436 1437 if (map && map->file) { 1438 thread->bytes_transferred += src_size; 1439 thread->bytes_compressed += compressed; 1440 } else { 1441 session->bytes_transferred += src_size; 1442 session->bytes_compressed += compressed; 1443 } 1444 1445 return compressed; 1446 } 1447 1448 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1449 bool overwrite, bool synch) 1450 { 1451 u64 bytes_written = rec->bytes_written; 1452 int i; 1453 int rc = 0; 1454 int nr_mmaps; 1455 struct mmap **maps; 1456 int trace_fd = rec->data.file.fd; 1457 off_t off = 0; 1458 1459 if (!evlist) 1460 return 0; 1461 1462 nr_mmaps = thread->nr_mmaps; 1463 maps = overwrite ? thread->overwrite_maps : thread->maps; 1464 1465 if (!maps) 1466 return 0; 1467 1468 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1469 return 0; 1470 1471 if (record__aio_enabled(rec)) 1472 off = record__aio_get_pos(trace_fd); 1473 1474 for (i = 0; i < nr_mmaps; i++) { 1475 u64 flush = 0; 1476 struct mmap *map = maps[i]; 1477 1478 if (map->core.base) { 1479 record__adjust_affinity(rec, map); 1480 if (synch) { 1481 flush = map->core.flush; 1482 map->core.flush = 1; 1483 } 1484 if (!record__aio_enabled(rec)) { 1485 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1486 if (synch) 1487 map->core.flush = flush; 1488 rc = -1; 1489 goto out; 1490 } 1491 } else { 1492 if (record__aio_push(rec, map, &off) < 0) { 1493 record__aio_set_pos(trace_fd, off); 1494 if (synch) 1495 map->core.flush = flush; 1496 rc = -1; 1497 goto out; 1498 } 1499 } 1500 if (synch) 1501 map->core.flush = flush; 1502 } 1503 1504 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1505 !rec->opts.auxtrace_sample_mode && 1506 record__auxtrace_mmap_read(rec, map) != 0) { 1507 rc = -1; 1508 goto out; 1509 } 1510 } 1511 1512 if (record__aio_enabled(rec)) 1513 record__aio_set_pos(trace_fd, off); 1514 1515 /* 1516 * Mark the round finished in case we wrote 1517 * at least one event. 1518 * 1519 * No need for round events in directory mode, 1520 * because per-cpu maps and files have data 1521 * sorted by kernel. 1522 */ 1523 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1524 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1525 1526 if (overwrite) 1527 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1528 out: 1529 return rc; 1530 } 1531 1532 static int record__mmap_read_all(struct record *rec, bool synch) 1533 { 1534 int err; 1535 1536 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1537 if (err) 1538 return err; 1539 1540 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1541 } 1542 1543 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1544 void *arg __maybe_unused) 1545 { 1546 struct perf_mmap *map = fda->priv[fd].ptr; 1547 1548 if (map) 1549 perf_mmap__put(map); 1550 } 1551 1552 static void *record__thread(void *arg) 1553 { 1554 enum thread_msg msg = THREAD_MSG__READY; 1555 bool terminate = false; 1556 struct fdarray *pollfd; 1557 int err, ctlfd_pos; 1558 1559 thread = arg; 1560 thread->tid = gettid(); 1561 1562 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1563 if (err == -1) 1564 pr_warning("threads[%d]: failed to notify on start: %s\n", 1565 thread->tid, strerror(errno)); 1566 1567 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1568 1569 pollfd = &thread->pollfd; 1570 ctlfd_pos = thread->ctlfd_pos; 1571 1572 for (;;) { 1573 unsigned long long hits = thread->samples; 1574 1575 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1576 break; 1577 1578 if (hits == thread->samples) { 1579 1580 err = fdarray__poll(pollfd, -1); 1581 /* 1582 * Propagate error, only if there's any. Ignore positive 1583 * number of returned events and interrupt error. 1584 */ 1585 if (err > 0 || (err < 0 && errno == EINTR)) 1586 err = 0; 1587 thread->waking++; 1588 1589 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1590 record__thread_munmap_filtered, NULL) == 0) 1591 break; 1592 } 1593 1594 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1595 terminate = true; 1596 close(thread->pipes.msg[0]); 1597 thread->pipes.msg[0] = -1; 1598 pollfd->entries[ctlfd_pos].fd = -1; 1599 pollfd->entries[ctlfd_pos].events = 0; 1600 } 1601 1602 pollfd->entries[ctlfd_pos].revents = 0; 1603 } 1604 record__mmap_read_all(thread->rec, true); 1605 1606 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1607 if (err == -1) 1608 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1609 thread->tid, strerror(errno)); 1610 1611 return NULL; 1612 } 1613 1614 static void record__init_features(struct record *rec) 1615 { 1616 struct perf_session *session = rec->session; 1617 int feat; 1618 1619 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1620 perf_header__set_feat(&session->header, feat); 1621 1622 if (rec->no_buildid) 1623 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1624 1625 if (!have_tracepoints(&rec->evlist->core.entries)) 1626 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1627 1628 if (!rec->opts.branch_stack) 1629 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1630 1631 if (!rec->opts.full_auxtrace) 1632 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1633 1634 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1635 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1636 1637 if (!rec->opts.use_clockid) 1638 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1639 1640 if (!record__threads_enabled(rec)) 1641 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1642 1643 if (!record__comp_enabled(rec)) 1644 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1645 1646 perf_header__clear_feat(&session->header, HEADER_STAT); 1647 } 1648 1649 static void 1650 record__finish_output(struct record *rec) 1651 { 1652 int i; 1653 struct perf_data *data = &rec->data; 1654 int fd = perf_data__fd(data); 1655 1656 if (data->is_pipe) 1657 return; 1658 1659 rec->session->header.data_size += rec->bytes_written; 1660 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1661 if (record__threads_enabled(rec)) { 1662 for (i = 0; i < data->dir.nr; i++) 1663 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1664 } 1665 1666 if (!rec->no_buildid) { 1667 process_buildids(rec); 1668 1669 if (rec->buildid_all) 1670 dsos__hit_all(rec->session); 1671 } 1672 perf_session__write_header(rec->session, rec->evlist, fd, true); 1673 1674 return; 1675 } 1676 1677 static int record__synthesize_workload(struct record *rec, bool tail) 1678 { 1679 int err; 1680 struct perf_thread_map *thread_map; 1681 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1682 1683 if (rec->opts.tail_synthesize != tail) 1684 return 0; 1685 1686 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1687 if (thread_map == NULL) 1688 return -1; 1689 1690 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1691 process_synthesized_event, 1692 &rec->session->machines.host, 1693 needs_mmap, 1694 rec->opts.sample_address); 1695 perf_thread_map__put(thread_map); 1696 return err; 1697 } 1698 1699 static int record__synthesize(struct record *rec, bool tail); 1700 1701 static int 1702 record__switch_output(struct record *rec, bool at_exit) 1703 { 1704 struct perf_data *data = &rec->data; 1705 int fd, err; 1706 char *new_filename; 1707 1708 /* Same Size: "2015122520103046"*/ 1709 char timestamp[] = "InvalidTimestamp"; 1710 1711 record__aio_mmap_read_sync(rec); 1712 1713 record__synthesize(rec, true); 1714 if (target__none(&rec->opts.target)) 1715 record__synthesize_workload(rec, true); 1716 1717 rec->samples = 0; 1718 record__finish_output(rec); 1719 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1720 if (err) { 1721 pr_err("Failed to get current timestamp\n"); 1722 return -EINVAL; 1723 } 1724 1725 fd = perf_data__switch(data, timestamp, 1726 rec->session->header.data_offset, 1727 at_exit, &new_filename); 1728 if (fd >= 0 && !at_exit) { 1729 rec->bytes_written = 0; 1730 rec->session->header.data_size = 0; 1731 } 1732 1733 if (!quiet) 1734 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1735 data->path, timestamp); 1736 1737 if (rec->switch_output.num_files) { 1738 int n = rec->switch_output.cur_file + 1; 1739 1740 if (n >= rec->switch_output.num_files) 1741 n = 0; 1742 rec->switch_output.cur_file = n; 1743 if (rec->switch_output.filenames[n]) { 1744 remove(rec->switch_output.filenames[n]); 1745 zfree(&rec->switch_output.filenames[n]); 1746 } 1747 rec->switch_output.filenames[n] = new_filename; 1748 } else { 1749 free(new_filename); 1750 } 1751 1752 /* Output tracking events */ 1753 if (!at_exit) { 1754 record__synthesize(rec, false); 1755 1756 /* 1757 * In 'perf record --switch-output' without -a, 1758 * record__synthesize() in record__switch_output() won't 1759 * generate tracking events because there's no thread_map 1760 * in evlist. Which causes newly created perf.data doesn't 1761 * contain map and comm information. 1762 * Create a fake thread_map and directly call 1763 * perf_event__synthesize_thread_map() for those events. 1764 */ 1765 if (target__none(&rec->opts.target)) 1766 record__synthesize_workload(rec, false); 1767 } 1768 return fd; 1769 } 1770 1771 static volatile int workload_exec_errno; 1772 1773 /* 1774 * evlist__prepare_workload will send a SIGUSR1 1775 * if the fork fails, since we asked by setting its 1776 * want_signal to true. 1777 */ 1778 static void workload_exec_failed_signal(int signo __maybe_unused, 1779 siginfo_t *info, 1780 void *ucontext __maybe_unused) 1781 { 1782 workload_exec_errno = info->si_value.sival_int; 1783 done = 1; 1784 child_finished = 1; 1785 } 1786 1787 static void snapshot_sig_handler(int sig); 1788 static void alarm_sig_handler(int sig); 1789 1790 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1791 { 1792 if (evlist) { 1793 if (evlist->mmap && evlist->mmap[0].core.base) 1794 return evlist->mmap[0].core.base; 1795 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1796 return evlist->overwrite_mmap[0].core.base; 1797 } 1798 return NULL; 1799 } 1800 1801 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1802 { 1803 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1804 if (pc) 1805 return pc; 1806 return NULL; 1807 } 1808 1809 static int record__synthesize(struct record *rec, bool tail) 1810 { 1811 struct perf_session *session = rec->session; 1812 struct machine *machine = &session->machines.host; 1813 struct perf_data *data = &rec->data; 1814 struct record_opts *opts = &rec->opts; 1815 struct perf_tool *tool = &rec->tool; 1816 int err = 0; 1817 event_op f = process_synthesized_event; 1818 1819 if (rec->opts.tail_synthesize != tail) 1820 return 0; 1821 1822 if (data->is_pipe) { 1823 err = perf_event__synthesize_for_pipe(tool, session, data, 1824 process_synthesized_event); 1825 if (err < 0) 1826 goto out; 1827 1828 rec->bytes_written += err; 1829 } 1830 1831 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1832 process_synthesized_event, machine); 1833 if (err) 1834 goto out; 1835 1836 /* Synthesize id_index before auxtrace_info */ 1837 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { 1838 err = perf_event__synthesize_id_index(tool, 1839 process_synthesized_event, 1840 session->evlist, machine); 1841 if (err) 1842 goto out; 1843 } 1844 1845 if (rec->opts.full_auxtrace) { 1846 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1847 session, process_synthesized_event); 1848 if (err) 1849 goto out; 1850 } 1851 1852 if (!evlist__exclude_kernel(rec->evlist)) { 1853 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1854 machine); 1855 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1856 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1857 "Check /proc/kallsyms permission or run as root.\n"); 1858 1859 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1860 machine); 1861 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1862 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1863 "Check /proc/modules permission or run as root.\n"); 1864 } 1865 1866 if (perf_guest) { 1867 machines__process_guests(&session->machines, 1868 perf_event__synthesize_guest_os, tool); 1869 } 1870 1871 err = perf_event__synthesize_extra_attr(&rec->tool, 1872 rec->evlist, 1873 process_synthesized_event, 1874 data->is_pipe); 1875 if (err) 1876 goto out; 1877 1878 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1879 process_synthesized_event, 1880 NULL); 1881 if (err < 0) { 1882 pr_err("Couldn't synthesize thread map.\n"); 1883 return err; 1884 } 1885 1886 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 1887 process_synthesized_event, NULL); 1888 if (err < 0) { 1889 pr_err("Couldn't synthesize cpu map.\n"); 1890 return err; 1891 } 1892 1893 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1894 machine, opts); 1895 if (err < 0) 1896 pr_warning("Couldn't synthesize bpf events.\n"); 1897 1898 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1899 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1900 machine); 1901 if (err < 0) 1902 pr_warning("Couldn't synthesize cgroup events.\n"); 1903 } 1904 1905 if (rec->opts.nr_threads_synthesize > 1) { 1906 perf_set_multithreaded(); 1907 f = process_locked_synthesized_event; 1908 } 1909 1910 if (rec->opts.synth & PERF_SYNTH_TASK) { 1911 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1912 1913 err = __machine__synthesize_threads(machine, tool, &opts->target, 1914 rec->evlist->core.threads, 1915 f, needs_mmap, opts->sample_address, 1916 rec->opts.nr_threads_synthesize); 1917 } 1918 1919 if (rec->opts.nr_threads_synthesize > 1) 1920 perf_set_singlethreaded(); 1921 1922 out: 1923 return err; 1924 } 1925 1926 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 1927 { 1928 struct record *rec = data; 1929 pthread_kill(rec->thread_id, SIGUSR2); 1930 return 0; 1931 } 1932 1933 static int record__setup_sb_evlist(struct record *rec) 1934 { 1935 struct record_opts *opts = &rec->opts; 1936 1937 if (rec->sb_evlist != NULL) { 1938 /* 1939 * We get here if --switch-output-event populated the 1940 * sb_evlist, so associate a callback that will send a SIGUSR2 1941 * to the main thread. 1942 */ 1943 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 1944 rec->thread_id = pthread_self(); 1945 } 1946 #ifdef HAVE_LIBBPF_SUPPORT 1947 if (!opts->no_bpf_event) { 1948 if (rec->sb_evlist == NULL) { 1949 rec->sb_evlist = evlist__new(); 1950 1951 if (rec->sb_evlist == NULL) { 1952 pr_err("Couldn't create side band evlist.\n."); 1953 return -1; 1954 } 1955 } 1956 1957 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 1958 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 1959 return -1; 1960 } 1961 } 1962 #endif 1963 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 1964 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1965 opts->no_bpf_event = true; 1966 } 1967 1968 return 0; 1969 } 1970 1971 static int record__init_clock(struct record *rec) 1972 { 1973 struct perf_session *session = rec->session; 1974 struct timespec ref_clockid; 1975 struct timeval ref_tod; 1976 u64 ref; 1977 1978 if (!rec->opts.use_clockid) 1979 return 0; 1980 1981 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1982 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 1983 1984 session->header.env.clock.clockid = rec->opts.clockid; 1985 1986 if (gettimeofday(&ref_tod, NULL) != 0) { 1987 pr_err("gettimeofday failed, cannot set reference time.\n"); 1988 return -1; 1989 } 1990 1991 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 1992 pr_err("clock_gettime failed, cannot set reference time.\n"); 1993 return -1; 1994 } 1995 1996 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 1997 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 1998 1999 session->header.env.clock.tod_ns = ref; 2000 2001 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2002 (u64) ref_clockid.tv_nsec; 2003 2004 session->header.env.clock.clockid_ns = ref; 2005 return 0; 2006 } 2007 2008 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2009 { 2010 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2011 trigger_hit(&auxtrace_snapshot_trigger); 2012 auxtrace_record__snapshot_started = 1; 2013 if (auxtrace_record__snapshot_start(rec->itr)) 2014 trigger_error(&auxtrace_snapshot_trigger); 2015 } 2016 } 2017 2018 static void record__uniquify_name(struct record *rec) 2019 { 2020 struct evsel *pos; 2021 struct evlist *evlist = rec->evlist; 2022 char *new_name; 2023 int ret; 2024 2025 if (!perf_pmu__has_hybrid()) 2026 return; 2027 2028 evlist__for_each_entry(evlist, pos) { 2029 if (!evsel__is_hybrid(pos)) 2030 continue; 2031 2032 if (strchr(pos->name, '/')) 2033 continue; 2034 2035 ret = asprintf(&new_name, "%s/%s/", 2036 pos->pmu_name, pos->name); 2037 if (ret) { 2038 free(pos->name); 2039 pos->name = new_name; 2040 } 2041 } 2042 } 2043 2044 static int record__terminate_thread(struct record_thread *thread_data) 2045 { 2046 int err; 2047 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2048 pid_t tid = thread_data->tid; 2049 2050 close(thread_data->pipes.msg[1]); 2051 thread_data->pipes.msg[1] = -1; 2052 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2053 if (err > 0) 2054 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2055 else 2056 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2057 thread->tid, tid); 2058 2059 return 0; 2060 } 2061 2062 static int record__start_threads(struct record *rec) 2063 { 2064 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2065 struct record_thread *thread_data = rec->thread_data; 2066 sigset_t full, mask; 2067 pthread_t handle; 2068 pthread_attr_t attrs; 2069 2070 thread = &thread_data[0]; 2071 2072 if (!record__threads_enabled(rec)) 2073 return 0; 2074 2075 sigfillset(&full); 2076 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2077 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2078 return -1; 2079 } 2080 2081 pthread_attr_init(&attrs); 2082 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2083 2084 for (t = 1; t < nr_threads; t++) { 2085 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2086 2087 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2088 pthread_attr_setaffinity_np(&attrs, 2089 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2090 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2091 #endif 2092 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2093 for (tt = 1; tt < t; tt++) 2094 record__terminate_thread(&thread_data[t]); 2095 pr_err("Failed to start threads: %s\n", strerror(errno)); 2096 ret = -1; 2097 goto out_err; 2098 } 2099 2100 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2101 if (err > 0) 2102 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2103 thread_msg_tags[msg]); 2104 else 2105 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2106 thread->tid, rec->thread_data[t].tid); 2107 } 2108 2109 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2110 (cpu_set_t *)thread->mask->affinity.bits); 2111 2112 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2113 2114 out_err: 2115 pthread_attr_destroy(&attrs); 2116 2117 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2118 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2119 ret = -1; 2120 } 2121 2122 return ret; 2123 } 2124 2125 static int record__stop_threads(struct record *rec) 2126 { 2127 int t; 2128 struct record_thread *thread_data = rec->thread_data; 2129 2130 for (t = 1; t < rec->nr_threads; t++) 2131 record__terminate_thread(&thread_data[t]); 2132 2133 for (t = 0; t < rec->nr_threads; t++) { 2134 rec->samples += thread_data[t].samples; 2135 if (!record__threads_enabled(rec)) 2136 continue; 2137 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2138 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2139 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2140 thread_data[t].samples, thread_data[t].waking); 2141 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2142 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2143 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2144 else 2145 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2146 } 2147 2148 return 0; 2149 } 2150 2151 static unsigned long record__waking(struct record *rec) 2152 { 2153 int t; 2154 unsigned long waking = 0; 2155 struct record_thread *thread_data = rec->thread_data; 2156 2157 for (t = 0; t < rec->nr_threads; t++) 2158 waking += thread_data[t].waking; 2159 2160 return waking; 2161 } 2162 2163 static int __cmd_record(struct record *rec, int argc, const char **argv) 2164 { 2165 int err; 2166 int status = 0; 2167 const bool forks = argc > 0; 2168 struct perf_tool *tool = &rec->tool; 2169 struct record_opts *opts = &rec->opts; 2170 struct perf_data *data = &rec->data; 2171 struct perf_session *session; 2172 bool disabled = false, draining = false; 2173 int fd; 2174 float ratio = 0; 2175 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2176 2177 atexit(record__sig_exit); 2178 signal(SIGCHLD, sig_handler); 2179 signal(SIGINT, sig_handler); 2180 signal(SIGTERM, sig_handler); 2181 signal(SIGSEGV, sigsegv_handler); 2182 2183 if (rec->opts.record_namespaces) 2184 tool->namespace_events = true; 2185 2186 if (rec->opts.record_cgroup) { 2187 #ifdef HAVE_FILE_HANDLE 2188 tool->cgroup_events = true; 2189 #else 2190 pr_err("cgroup tracking is not supported\n"); 2191 return -1; 2192 #endif 2193 } 2194 2195 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2196 signal(SIGUSR2, snapshot_sig_handler); 2197 if (rec->opts.auxtrace_snapshot_mode) 2198 trigger_on(&auxtrace_snapshot_trigger); 2199 if (rec->switch_output.enabled) 2200 trigger_on(&switch_output_trigger); 2201 } else { 2202 signal(SIGUSR2, SIG_IGN); 2203 } 2204 2205 session = perf_session__new(data, tool); 2206 if (IS_ERR(session)) { 2207 pr_err("Perf session creation failed.\n"); 2208 return PTR_ERR(session); 2209 } 2210 2211 if (record__threads_enabled(rec)) { 2212 if (perf_data__is_pipe(&rec->data)) { 2213 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2214 return -1; 2215 } 2216 if (rec->opts.full_auxtrace) { 2217 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2218 return -1; 2219 } 2220 } 2221 2222 fd = perf_data__fd(data); 2223 rec->session = session; 2224 2225 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2226 pr_err("Compression initialization failed.\n"); 2227 return -1; 2228 } 2229 #ifdef HAVE_EVENTFD_SUPPORT 2230 done_fd = eventfd(0, EFD_NONBLOCK); 2231 if (done_fd < 0) { 2232 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2233 status = -1; 2234 goto out_delete_session; 2235 } 2236 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2237 if (err < 0) { 2238 pr_err("Failed to add wakeup eventfd to poll list\n"); 2239 status = err; 2240 goto out_delete_session; 2241 } 2242 #endif // HAVE_EVENTFD_SUPPORT 2243 2244 session->header.env.comp_type = PERF_COMP_ZSTD; 2245 session->header.env.comp_level = rec->opts.comp_level; 2246 2247 if (rec->opts.kcore && 2248 !record__kcore_readable(&session->machines.host)) { 2249 pr_err("ERROR: kcore is not readable.\n"); 2250 return -1; 2251 } 2252 2253 if (record__init_clock(rec)) 2254 return -1; 2255 2256 record__init_features(rec); 2257 2258 if (forks) { 2259 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2260 workload_exec_failed_signal); 2261 if (err < 0) { 2262 pr_err("Couldn't run the workload!\n"); 2263 status = err; 2264 goto out_delete_session; 2265 } 2266 } 2267 2268 /* 2269 * If we have just single event and are sending data 2270 * through pipe, we need to force the ids allocation, 2271 * because we synthesize event name through the pipe 2272 * and need the id for that. 2273 */ 2274 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2275 rec->opts.sample_id = true; 2276 2277 record__uniquify_name(rec); 2278 2279 if (record__open(rec) != 0) { 2280 err = -1; 2281 goto out_free_threads; 2282 } 2283 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2284 2285 if (rec->opts.kcore) { 2286 err = record__kcore_copy(&session->machines.host, data); 2287 if (err) { 2288 pr_err("ERROR: Failed to copy kcore\n"); 2289 goto out_free_threads; 2290 } 2291 } 2292 2293 err = bpf__apply_obj_config(); 2294 if (err) { 2295 char errbuf[BUFSIZ]; 2296 2297 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2298 pr_err("ERROR: Apply config to BPF failed: %s\n", 2299 errbuf); 2300 goto out_free_threads; 2301 } 2302 2303 /* 2304 * Normally perf_session__new would do this, but it doesn't have the 2305 * evlist. 2306 */ 2307 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2308 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2309 rec->tool.ordered_events = false; 2310 } 2311 2312 if (!rec->evlist->core.nr_groups) 2313 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2314 2315 if (data->is_pipe) { 2316 err = perf_header__write_pipe(fd); 2317 if (err < 0) 2318 goto out_free_threads; 2319 } else { 2320 err = perf_session__write_header(session, rec->evlist, fd, false); 2321 if (err < 0) 2322 goto out_free_threads; 2323 } 2324 2325 err = -1; 2326 if (!rec->no_buildid 2327 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2328 pr_err("Couldn't generate buildids. " 2329 "Use --no-buildid to profile anyway.\n"); 2330 goto out_free_threads; 2331 } 2332 2333 err = record__setup_sb_evlist(rec); 2334 if (err) 2335 goto out_free_threads; 2336 2337 err = record__synthesize(rec, false); 2338 if (err < 0) 2339 goto out_free_threads; 2340 2341 if (rec->realtime_prio) { 2342 struct sched_param param; 2343 2344 param.sched_priority = rec->realtime_prio; 2345 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2346 pr_err("Could not set realtime priority.\n"); 2347 err = -1; 2348 goto out_free_threads; 2349 } 2350 } 2351 2352 if (record__start_threads(rec)) 2353 goto out_free_threads; 2354 2355 /* 2356 * When perf is starting the traced process, all the events 2357 * (apart from group members) have enable_on_exec=1 set, 2358 * so don't spoil it by prematurely enabling them. 2359 */ 2360 if (!target__none(&opts->target) && !opts->initial_delay) 2361 evlist__enable(rec->evlist); 2362 2363 /* 2364 * Let the child rip 2365 */ 2366 if (forks) { 2367 struct machine *machine = &session->machines.host; 2368 union perf_event *event; 2369 pid_t tgid; 2370 2371 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2372 if (event == NULL) { 2373 err = -ENOMEM; 2374 goto out_child; 2375 } 2376 2377 /* 2378 * Some H/W events are generated before COMM event 2379 * which is emitted during exec(), so perf script 2380 * cannot see a correct process name for those events. 2381 * Synthesize COMM event to prevent it. 2382 */ 2383 tgid = perf_event__synthesize_comm(tool, event, 2384 rec->evlist->workload.pid, 2385 process_synthesized_event, 2386 machine); 2387 free(event); 2388 2389 if (tgid == -1) 2390 goto out_child; 2391 2392 event = malloc(sizeof(event->namespaces) + 2393 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2394 machine->id_hdr_size); 2395 if (event == NULL) { 2396 err = -ENOMEM; 2397 goto out_child; 2398 } 2399 2400 /* 2401 * Synthesize NAMESPACES event for the command specified. 2402 */ 2403 perf_event__synthesize_namespaces(tool, event, 2404 rec->evlist->workload.pid, 2405 tgid, process_synthesized_event, 2406 machine); 2407 free(event); 2408 2409 evlist__start_workload(rec->evlist); 2410 } 2411 2412 if (opts->initial_delay) { 2413 pr_info(EVLIST_DISABLED_MSG); 2414 if (opts->initial_delay > 0) { 2415 usleep(opts->initial_delay * USEC_PER_MSEC); 2416 evlist__enable(rec->evlist); 2417 pr_info(EVLIST_ENABLED_MSG); 2418 } 2419 } 2420 2421 trigger_ready(&auxtrace_snapshot_trigger); 2422 trigger_ready(&switch_output_trigger); 2423 perf_hooks__invoke_record_start(); 2424 for (;;) { 2425 unsigned long long hits = thread->samples; 2426 2427 /* 2428 * rec->evlist->bkw_mmap_state is possible to be 2429 * BKW_MMAP_EMPTY here: when done == true and 2430 * hits != rec->samples in previous round. 2431 * 2432 * evlist__toggle_bkw_mmap ensure we never 2433 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2434 */ 2435 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2436 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2437 2438 if (record__mmap_read_all(rec, false) < 0) { 2439 trigger_error(&auxtrace_snapshot_trigger); 2440 trigger_error(&switch_output_trigger); 2441 err = -1; 2442 goto out_child; 2443 } 2444 2445 if (auxtrace_record__snapshot_started) { 2446 auxtrace_record__snapshot_started = 0; 2447 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2448 record__read_auxtrace_snapshot(rec, false); 2449 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2450 pr_err("AUX area tracing snapshot failed\n"); 2451 err = -1; 2452 goto out_child; 2453 } 2454 } 2455 2456 if (trigger_is_hit(&switch_output_trigger)) { 2457 /* 2458 * If switch_output_trigger is hit, the data in 2459 * overwritable ring buffer should have been collected, 2460 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2461 * 2462 * If SIGUSR2 raise after or during record__mmap_read_all(), 2463 * record__mmap_read_all() didn't collect data from 2464 * overwritable ring buffer. Read again. 2465 */ 2466 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2467 continue; 2468 trigger_ready(&switch_output_trigger); 2469 2470 /* 2471 * Reenable events in overwrite ring buffer after 2472 * record__mmap_read_all(): we should have collected 2473 * data from it. 2474 */ 2475 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2476 2477 if (!quiet) 2478 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2479 record__waking(rec)); 2480 thread->waking = 0; 2481 fd = record__switch_output(rec, false); 2482 if (fd < 0) { 2483 pr_err("Failed to switch to new file\n"); 2484 trigger_error(&switch_output_trigger); 2485 err = fd; 2486 goto out_child; 2487 } 2488 2489 /* re-arm the alarm */ 2490 if (rec->switch_output.time) 2491 alarm(rec->switch_output.time); 2492 } 2493 2494 if (hits == thread->samples) { 2495 if (done || draining) 2496 break; 2497 err = fdarray__poll(&thread->pollfd, -1); 2498 /* 2499 * Propagate error, only if there's any. Ignore positive 2500 * number of returned events and interrupt error. 2501 */ 2502 if (err > 0 || (err < 0 && errno == EINTR)) 2503 err = 0; 2504 thread->waking++; 2505 2506 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2507 record__thread_munmap_filtered, NULL) == 0) 2508 draining = true; 2509 2510 evlist__ctlfd_update(rec->evlist, 2511 &thread->pollfd.entries[thread->ctlfd_pos]); 2512 } 2513 2514 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2515 switch (cmd) { 2516 case EVLIST_CTL_CMD_SNAPSHOT: 2517 hit_auxtrace_snapshot_trigger(rec); 2518 evlist__ctlfd_ack(rec->evlist); 2519 break; 2520 case EVLIST_CTL_CMD_STOP: 2521 done = 1; 2522 break; 2523 case EVLIST_CTL_CMD_ACK: 2524 case EVLIST_CTL_CMD_UNSUPPORTED: 2525 case EVLIST_CTL_CMD_ENABLE: 2526 case EVLIST_CTL_CMD_DISABLE: 2527 case EVLIST_CTL_CMD_EVLIST: 2528 case EVLIST_CTL_CMD_PING: 2529 default: 2530 break; 2531 } 2532 } 2533 2534 /* 2535 * When perf is starting the traced process, at the end events 2536 * die with the process and we wait for that. Thus no need to 2537 * disable events in this case. 2538 */ 2539 if (done && !disabled && !target__none(&opts->target)) { 2540 trigger_off(&auxtrace_snapshot_trigger); 2541 evlist__disable(rec->evlist); 2542 disabled = true; 2543 } 2544 } 2545 2546 trigger_off(&auxtrace_snapshot_trigger); 2547 trigger_off(&switch_output_trigger); 2548 2549 if (opts->auxtrace_snapshot_on_exit) 2550 record__auxtrace_snapshot_exit(rec); 2551 2552 if (forks && workload_exec_errno) { 2553 char msg[STRERR_BUFSIZE], strevsels[2048]; 2554 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2555 2556 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2557 2558 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2559 strevsels, argv[0], emsg); 2560 err = -1; 2561 goto out_child; 2562 } 2563 2564 if (!quiet) 2565 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2566 record__waking(rec)); 2567 2568 if (target__none(&rec->opts.target)) 2569 record__synthesize_workload(rec, true); 2570 2571 out_child: 2572 record__stop_threads(rec); 2573 record__mmap_read_all(rec, true); 2574 out_free_threads: 2575 record__free_thread_data(rec); 2576 evlist__finalize_ctlfd(rec->evlist); 2577 record__aio_mmap_read_sync(rec); 2578 2579 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2580 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2581 session->header.env.comp_ratio = ratio + 0.5; 2582 } 2583 2584 if (forks) { 2585 int exit_status; 2586 2587 if (!child_finished) 2588 kill(rec->evlist->workload.pid, SIGTERM); 2589 2590 wait(&exit_status); 2591 2592 if (err < 0) 2593 status = err; 2594 else if (WIFEXITED(exit_status)) 2595 status = WEXITSTATUS(exit_status); 2596 else if (WIFSIGNALED(exit_status)) 2597 signr = WTERMSIG(exit_status); 2598 } else 2599 status = err; 2600 2601 if (rec->off_cpu) 2602 rec->bytes_written += off_cpu_write(rec->session); 2603 2604 record__synthesize(rec, true); 2605 /* this will be recalculated during process_buildids() */ 2606 rec->samples = 0; 2607 2608 if (!err) { 2609 if (!rec->timestamp_filename) { 2610 record__finish_output(rec); 2611 } else { 2612 fd = record__switch_output(rec, true); 2613 if (fd < 0) { 2614 status = fd; 2615 goto out_delete_session; 2616 } 2617 } 2618 } 2619 2620 perf_hooks__invoke_record_end(); 2621 2622 if (!err && !quiet) { 2623 char samples[128]; 2624 const char *postfix = rec->timestamp_filename ? 2625 ".<timestamp>" : ""; 2626 2627 if (rec->samples && !rec->opts.full_auxtrace) 2628 scnprintf(samples, sizeof(samples), 2629 " (%" PRIu64 " samples)", rec->samples); 2630 else 2631 samples[0] = '\0'; 2632 2633 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2634 perf_data__size(data) / 1024.0 / 1024.0, 2635 data->path, postfix, samples); 2636 if (ratio) { 2637 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2638 rec->session->bytes_transferred / 1024.0 / 1024.0, 2639 ratio); 2640 } 2641 fprintf(stderr, " ]\n"); 2642 } 2643 2644 out_delete_session: 2645 #ifdef HAVE_EVENTFD_SUPPORT 2646 if (done_fd >= 0) 2647 close(done_fd); 2648 #endif 2649 zstd_fini(&session->zstd_data); 2650 perf_session__delete(session); 2651 2652 if (!opts->no_bpf_event) 2653 evlist__stop_sb_thread(rec->sb_evlist); 2654 return status; 2655 } 2656 2657 static void callchain_debug(struct callchain_param *callchain) 2658 { 2659 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2660 2661 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2662 2663 if (callchain->record_mode == CALLCHAIN_DWARF) 2664 pr_debug("callchain: stack dump size %d\n", 2665 callchain->dump_size); 2666 } 2667 2668 int record_opts__parse_callchain(struct record_opts *record, 2669 struct callchain_param *callchain, 2670 const char *arg, bool unset) 2671 { 2672 int ret; 2673 callchain->enabled = !unset; 2674 2675 /* --no-call-graph */ 2676 if (unset) { 2677 callchain->record_mode = CALLCHAIN_NONE; 2678 pr_debug("callchain: disabled\n"); 2679 return 0; 2680 } 2681 2682 ret = parse_callchain_record_opt(arg, callchain); 2683 if (!ret) { 2684 /* Enable data address sampling for DWARF unwind. */ 2685 if (callchain->record_mode == CALLCHAIN_DWARF) 2686 record->sample_address = true; 2687 callchain_debug(callchain); 2688 } 2689 2690 return ret; 2691 } 2692 2693 int record_parse_callchain_opt(const struct option *opt, 2694 const char *arg, 2695 int unset) 2696 { 2697 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2698 } 2699 2700 int record_callchain_opt(const struct option *opt, 2701 const char *arg __maybe_unused, 2702 int unset __maybe_unused) 2703 { 2704 struct callchain_param *callchain = opt->value; 2705 2706 callchain->enabled = true; 2707 2708 if (callchain->record_mode == CALLCHAIN_NONE) 2709 callchain->record_mode = CALLCHAIN_FP; 2710 2711 callchain_debug(callchain); 2712 return 0; 2713 } 2714 2715 static int perf_record_config(const char *var, const char *value, void *cb) 2716 { 2717 struct record *rec = cb; 2718 2719 if (!strcmp(var, "record.build-id")) { 2720 if (!strcmp(value, "cache")) 2721 rec->no_buildid_cache = false; 2722 else if (!strcmp(value, "no-cache")) 2723 rec->no_buildid_cache = true; 2724 else if (!strcmp(value, "skip")) 2725 rec->no_buildid = true; 2726 else if (!strcmp(value, "mmap")) 2727 rec->buildid_mmap = true; 2728 else 2729 return -1; 2730 return 0; 2731 } 2732 if (!strcmp(var, "record.call-graph")) { 2733 var = "call-graph.record-mode"; 2734 return perf_default_config(var, value, cb); 2735 } 2736 #ifdef HAVE_AIO_SUPPORT 2737 if (!strcmp(var, "record.aio")) { 2738 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2739 if (!rec->opts.nr_cblocks) 2740 rec->opts.nr_cblocks = nr_cblocks_default; 2741 } 2742 #endif 2743 if (!strcmp(var, "record.debuginfod")) { 2744 rec->debuginfod.urls = strdup(value); 2745 if (!rec->debuginfod.urls) 2746 return -ENOMEM; 2747 rec->debuginfod.set = true; 2748 } 2749 2750 return 0; 2751 } 2752 2753 2754 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2755 { 2756 struct record_opts *opts = (struct record_opts *)opt->value; 2757 2758 if (unset || !str) 2759 return 0; 2760 2761 if (!strcasecmp(str, "node")) 2762 opts->affinity = PERF_AFFINITY_NODE; 2763 else if (!strcasecmp(str, "cpu")) 2764 opts->affinity = PERF_AFFINITY_CPU; 2765 2766 return 0; 2767 } 2768 2769 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2770 { 2771 mask->nbits = nr_bits; 2772 mask->bits = bitmap_zalloc(mask->nbits); 2773 if (!mask->bits) 2774 return -ENOMEM; 2775 2776 return 0; 2777 } 2778 2779 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2780 { 2781 bitmap_free(mask->bits); 2782 mask->nbits = 0; 2783 } 2784 2785 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2786 { 2787 int ret; 2788 2789 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2790 if (ret) { 2791 mask->affinity.bits = NULL; 2792 return ret; 2793 } 2794 2795 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2796 if (ret) { 2797 record__mmap_cpu_mask_free(&mask->maps); 2798 mask->maps.bits = NULL; 2799 } 2800 2801 return ret; 2802 } 2803 2804 static void record__thread_mask_free(struct thread_mask *mask) 2805 { 2806 record__mmap_cpu_mask_free(&mask->maps); 2807 record__mmap_cpu_mask_free(&mask->affinity); 2808 } 2809 2810 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2811 { 2812 int s; 2813 struct record_opts *opts = opt->value; 2814 2815 if (unset || !str || !strlen(str)) { 2816 opts->threads_spec = THREAD_SPEC__CPU; 2817 } else { 2818 for (s = 1; s < THREAD_SPEC__MAX; s++) { 2819 if (s == THREAD_SPEC__USER) { 2820 opts->threads_user_spec = strdup(str); 2821 if (!opts->threads_user_spec) 2822 return -ENOMEM; 2823 opts->threads_spec = THREAD_SPEC__USER; 2824 break; 2825 } 2826 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 2827 opts->threads_spec = s; 2828 break; 2829 } 2830 } 2831 } 2832 2833 if (opts->threads_spec == THREAD_SPEC__USER) 2834 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 2835 else 2836 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 2837 2838 return 0; 2839 } 2840 2841 static int parse_output_max_size(const struct option *opt, 2842 const char *str, int unset) 2843 { 2844 unsigned long *s = (unsigned long *)opt->value; 2845 static struct parse_tag tags_size[] = { 2846 { .tag = 'B', .mult = 1 }, 2847 { .tag = 'K', .mult = 1 << 10 }, 2848 { .tag = 'M', .mult = 1 << 20 }, 2849 { .tag = 'G', .mult = 1 << 30 }, 2850 { .tag = 0 }, 2851 }; 2852 unsigned long val; 2853 2854 if (unset) { 2855 *s = 0; 2856 return 0; 2857 } 2858 2859 val = parse_tag_value(str, tags_size); 2860 if (val != (unsigned long) -1) { 2861 *s = val; 2862 return 0; 2863 } 2864 2865 return -1; 2866 } 2867 2868 static int record__parse_mmap_pages(const struct option *opt, 2869 const char *str, 2870 int unset __maybe_unused) 2871 { 2872 struct record_opts *opts = opt->value; 2873 char *s, *p; 2874 unsigned int mmap_pages; 2875 int ret; 2876 2877 if (!str) 2878 return -EINVAL; 2879 2880 s = strdup(str); 2881 if (!s) 2882 return -ENOMEM; 2883 2884 p = strchr(s, ','); 2885 if (p) 2886 *p = '\0'; 2887 2888 if (*s) { 2889 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 2890 if (ret) 2891 goto out_free; 2892 opts->mmap_pages = mmap_pages; 2893 } 2894 2895 if (!p) { 2896 ret = 0; 2897 goto out_free; 2898 } 2899 2900 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 2901 if (ret) 2902 goto out_free; 2903 2904 opts->auxtrace_mmap_pages = mmap_pages; 2905 2906 out_free: 2907 free(s); 2908 return ret; 2909 } 2910 2911 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 2912 { 2913 } 2914 2915 static int parse_control_option(const struct option *opt, 2916 const char *str, 2917 int unset __maybe_unused) 2918 { 2919 struct record_opts *opts = opt->value; 2920 2921 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 2922 } 2923 2924 static void switch_output_size_warn(struct record *rec) 2925 { 2926 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 2927 struct switch_output *s = &rec->switch_output; 2928 2929 wakeup_size /= 2; 2930 2931 if (s->size < wakeup_size) { 2932 char buf[100]; 2933 2934 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 2935 pr_warning("WARNING: switch-output data size lower than " 2936 "wakeup kernel buffer size (%s) " 2937 "expect bigger perf.data sizes\n", buf); 2938 } 2939 } 2940 2941 static int switch_output_setup(struct record *rec) 2942 { 2943 struct switch_output *s = &rec->switch_output; 2944 static struct parse_tag tags_size[] = { 2945 { .tag = 'B', .mult = 1 }, 2946 { .tag = 'K', .mult = 1 << 10 }, 2947 { .tag = 'M', .mult = 1 << 20 }, 2948 { .tag = 'G', .mult = 1 << 30 }, 2949 { .tag = 0 }, 2950 }; 2951 static struct parse_tag tags_time[] = { 2952 { .tag = 's', .mult = 1 }, 2953 { .tag = 'm', .mult = 60 }, 2954 { .tag = 'h', .mult = 60*60 }, 2955 { .tag = 'd', .mult = 60*60*24 }, 2956 { .tag = 0 }, 2957 }; 2958 unsigned long val; 2959 2960 /* 2961 * If we're using --switch-output-events, then we imply its 2962 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 2963 * thread to its parent. 2964 */ 2965 if (rec->switch_output_event_set) { 2966 if (record__threads_enabled(rec)) { 2967 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 2968 return 0; 2969 } 2970 goto do_signal; 2971 } 2972 2973 if (!s->set) 2974 return 0; 2975 2976 if (record__threads_enabled(rec)) { 2977 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 2978 return 0; 2979 } 2980 2981 if (!strcmp(s->str, "signal")) { 2982 do_signal: 2983 s->signal = true; 2984 pr_debug("switch-output with SIGUSR2 signal\n"); 2985 goto enabled; 2986 } 2987 2988 val = parse_tag_value(s->str, tags_size); 2989 if (val != (unsigned long) -1) { 2990 s->size = val; 2991 pr_debug("switch-output with %s size threshold\n", s->str); 2992 goto enabled; 2993 } 2994 2995 val = parse_tag_value(s->str, tags_time); 2996 if (val != (unsigned long) -1) { 2997 s->time = val; 2998 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2999 s->str, s->time); 3000 goto enabled; 3001 } 3002 3003 return -1; 3004 3005 enabled: 3006 rec->timestamp_filename = true; 3007 s->enabled = true; 3008 3009 if (s->size && !rec->opts.no_buffering) 3010 switch_output_size_warn(rec); 3011 3012 return 0; 3013 } 3014 3015 static const char * const __record_usage[] = { 3016 "perf record [<options>] [<command>]", 3017 "perf record [<options>] -- <command> [<options>]", 3018 NULL 3019 }; 3020 const char * const *record_usage = __record_usage; 3021 3022 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3023 struct perf_sample *sample, struct machine *machine) 3024 { 3025 /* 3026 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3027 * no need to add them twice. 3028 */ 3029 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3030 return 0; 3031 return perf_event__process_mmap(tool, event, sample, machine); 3032 } 3033 3034 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3035 struct perf_sample *sample, struct machine *machine) 3036 { 3037 /* 3038 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3039 * no need to add them twice. 3040 */ 3041 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3042 return 0; 3043 3044 return perf_event__process_mmap2(tool, event, sample, machine); 3045 } 3046 3047 static int process_timestamp_boundary(struct perf_tool *tool, 3048 union perf_event *event __maybe_unused, 3049 struct perf_sample *sample, 3050 struct machine *machine __maybe_unused) 3051 { 3052 struct record *rec = container_of(tool, struct record, tool); 3053 3054 set_timestamp_boundary(rec, sample->time); 3055 return 0; 3056 } 3057 3058 static int parse_record_synth_option(const struct option *opt, 3059 const char *str, 3060 int unset __maybe_unused) 3061 { 3062 struct record_opts *opts = opt->value; 3063 char *p = strdup(str); 3064 3065 if (p == NULL) 3066 return -1; 3067 3068 opts->synth = parse_synth_opt(p); 3069 free(p); 3070 3071 if (opts->synth < 0) { 3072 pr_err("Invalid synth option: %s\n", str); 3073 return -1; 3074 } 3075 return 0; 3076 } 3077 3078 /* 3079 * XXX Ideally would be local to cmd_record() and passed to a record__new 3080 * because we need to have access to it in record__exit, that is called 3081 * after cmd_record() exits, but since record_options need to be accessible to 3082 * builtin-script, leave it here. 3083 * 3084 * At least we don't ouch it in all the other functions here directly. 3085 * 3086 * Just say no to tons of global variables, sigh. 3087 */ 3088 static struct record record = { 3089 .opts = { 3090 .sample_time = true, 3091 .mmap_pages = UINT_MAX, 3092 .user_freq = UINT_MAX, 3093 .user_interval = ULLONG_MAX, 3094 .freq = 4000, 3095 .target = { 3096 .uses_mmap = true, 3097 .default_per_cpu = true, 3098 }, 3099 .mmap_flush = MMAP_FLUSH_DEFAULT, 3100 .nr_threads_synthesize = 1, 3101 .ctl_fd = -1, 3102 .ctl_fd_ack = -1, 3103 .synth = PERF_SYNTH_ALL, 3104 }, 3105 .tool = { 3106 .sample = process_sample_event, 3107 .fork = perf_event__process_fork, 3108 .exit = perf_event__process_exit, 3109 .comm = perf_event__process_comm, 3110 .namespaces = perf_event__process_namespaces, 3111 .mmap = build_id__process_mmap, 3112 .mmap2 = build_id__process_mmap2, 3113 .itrace_start = process_timestamp_boundary, 3114 .aux = process_timestamp_boundary, 3115 .ordered_events = true, 3116 }, 3117 }; 3118 3119 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3120 "\n\t\t\t\tDefault: fp"; 3121 3122 static bool dry_run; 3123 3124 /* 3125 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3126 * with it and switch to use the library functions in perf_evlist that came 3127 * from builtin-record.c, i.e. use record_opts, 3128 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3129 * using pipes, etc. 3130 */ 3131 static struct option __record_options[] = { 3132 OPT_CALLBACK('e', "event", &record.evlist, "event", 3133 "event selector. use 'perf list' to list available events", 3134 parse_events_option), 3135 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3136 "event filter", parse_filter), 3137 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3138 NULL, "don't record events from perf itself", 3139 exclude_perf), 3140 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3141 "record events on existing process id"), 3142 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3143 "record events on existing thread id"), 3144 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3145 "collect data with this RT SCHED_FIFO priority"), 3146 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3147 "collect data without buffering"), 3148 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3149 "collect raw sample records from all opened counters"), 3150 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3151 "system-wide collection from all CPUs"), 3152 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3153 "list of cpus to monitor"), 3154 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3155 OPT_STRING('o', "output", &record.data.path, "file", 3156 "output file name"), 3157 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3158 &record.opts.no_inherit_set, 3159 "child tasks do not inherit counters"), 3160 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3161 "synthesize non-sample events at the end of output"), 3162 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3163 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3164 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3165 "Fail if the specified frequency can't be used"), 3166 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3167 "profile at this frequency", 3168 record__parse_freq), 3169 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3170 "number of mmap data pages and AUX area tracing mmap pages", 3171 record__parse_mmap_pages), 3172 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3173 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3174 record__mmap_flush_parse), 3175 OPT_BOOLEAN(0, "group", &record.opts.group, 3176 "put the counters into a counter group"), 3177 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3178 NULL, "enables call-graph recording" , 3179 &record_callchain_opt), 3180 OPT_CALLBACK(0, "call-graph", &record.opts, 3181 "record_mode[,record_size]", record_callchain_help, 3182 &record_parse_callchain_opt), 3183 OPT_INCR('v', "verbose", &verbose, 3184 "be more verbose (show counter open errors, etc)"), 3185 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 3186 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3187 "per thread counts"), 3188 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3189 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3190 "Record the sample physical addresses"), 3191 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3192 "Record the sampled data address data page size"), 3193 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3194 "Record the sampled code address (ip) page size"), 3195 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3196 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3197 &record.opts.sample_time_set, 3198 "Record the sample timestamps"), 3199 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3200 "Record the sample period"), 3201 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3202 "don't sample"), 3203 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3204 &record.no_buildid_cache_set, 3205 "do not update the buildid cache"), 3206 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3207 &record.no_buildid_set, 3208 "do not collect buildids in perf.data"), 3209 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3210 "monitor event in cgroup name only", 3211 parse_cgroups), 3212 OPT_INTEGER('D', "delay", &record.opts.initial_delay, 3213 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 3214 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3215 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3216 "user to profile"), 3217 3218 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3219 "branch any", "sample any taken branches", 3220 parse_branch_stack), 3221 3222 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3223 "branch filter mask", "branch stack filter modes", 3224 parse_branch_stack), 3225 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3226 "sample by weight (on special events only)"), 3227 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3228 "sample transaction flags (special events only)"), 3229 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3230 "use per-thread mmaps"), 3231 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3232 "sample selected machine registers on interrupt," 3233 " use '-I?' to list register names", parse_intr_regs), 3234 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3235 "sample selected machine registers on interrupt," 3236 " use '--user-regs=?' to list register names", parse_user_regs), 3237 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3238 "Record running/enabled time of read (:S) events"), 3239 OPT_CALLBACK('k', "clockid", &record.opts, 3240 "clockid", "clockid to use for events, see clock_gettime()", 3241 parse_clockid), 3242 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3243 "opts", "AUX area tracing Snapshot Mode", ""), 3244 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3245 "opts", "sample AUX area", ""), 3246 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3247 "per thread proc mmap processing timeout in ms"), 3248 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3249 "Record namespaces events"), 3250 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3251 "Record cgroup events"), 3252 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3253 &record.opts.record_switch_events_set, 3254 "Record context switch events"), 3255 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3256 "Configure all used events to run in kernel space.", 3257 PARSE_OPT_EXCLUSIVE), 3258 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3259 "Configure all used events to run in user space.", 3260 PARSE_OPT_EXCLUSIVE), 3261 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3262 "collect kernel callchains"), 3263 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3264 "collect user callchains"), 3265 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3266 "clang binary to use for compiling BPF scriptlets"), 3267 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3268 "options passed to clang when compiling BPF scriptlets"), 3269 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3270 "file", "vmlinux pathname"), 3271 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3272 "Record build-id of all DSOs regardless of hits"), 3273 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3274 "Record build-id in map events"), 3275 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3276 "append timestamp to output filename"), 3277 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3278 "Record timestamp boundary (time of first/last samples)"), 3279 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3280 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3281 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3282 "signal"), 3283 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3284 "switch output event selector. use 'perf list' to list available events", 3285 parse_events_option_new_evlist), 3286 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3287 "Limit number of switch output generated files"), 3288 OPT_BOOLEAN(0, "dry-run", &dry_run, 3289 "Parse options then exit"), 3290 #ifdef HAVE_AIO_SUPPORT 3291 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3292 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3293 record__aio_parse), 3294 #endif 3295 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3296 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3297 record__parse_affinity), 3298 #ifdef HAVE_ZSTD_SUPPORT 3299 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3300 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3301 record__parse_comp_level), 3302 #endif 3303 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3304 "size", "Limit the maximum size of the output file", parse_output_max_size), 3305 OPT_UINTEGER(0, "num-thread-synthesize", 3306 &record.opts.nr_threads_synthesize, 3307 "number of threads to run for event synthesis"), 3308 #ifdef HAVE_LIBPFM 3309 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3310 "libpfm4 event selector. use 'perf list' to list available events", 3311 parse_libpfm_events_option), 3312 #endif 3313 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3314 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3315 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3316 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3317 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3318 parse_control_option), 3319 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3320 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3321 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3322 &record.debuginfod.set, "debuginfod urls", 3323 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3324 "system"), 3325 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3326 "write collected trace data into several data files using parallel threads", 3327 record__parse_threads), 3328 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3329 OPT_END() 3330 }; 3331 3332 struct option *record_options = __record_options; 3333 3334 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3335 { 3336 struct perf_cpu cpu; 3337 int idx; 3338 3339 if (cpu_map__is_dummy(cpus)) 3340 return; 3341 3342 perf_cpu_map__for_each_cpu(cpu, idx, cpus) 3343 set_bit(cpu.cpu, mask->bits); 3344 } 3345 3346 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3347 { 3348 struct perf_cpu_map *cpus; 3349 3350 cpus = perf_cpu_map__new(mask_spec); 3351 if (!cpus) 3352 return -ENOMEM; 3353 3354 bitmap_zero(mask->bits, mask->nbits); 3355 record__mmap_cpu_mask_init(mask, cpus); 3356 perf_cpu_map__put(cpus); 3357 3358 return 0; 3359 } 3360 3361 static void record__free_thread_masks(struct record *rec, int nr_threads) 3362 { 3363 int t; 3364 3365 if (rec->thread_masks) 3366 for (t = 0; t < nr_threads; t++) 3367 record__thread_mask_free(&rec->thread_masks[t]); 3368 3369 zfree(&rec->thread_masks); 3370 } 3371 3372 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3373 { 3374 int t, ret; 3375 3376 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3377 if (!rec->thread_masks) { 3378 pr_err("Failed to allocate thread masks\n"); 3379 return -ENOMEM; 3380 } 3381 3382 for (t = 0; t < nr_threads; t++) { 3383 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3384 if (ret) { 3385 pr_err("Failed to allocate thread masks[%d]\n", t); 3386 goto out_free; 3387 } 3388 } 3389 3390 return 0; 3391 3392 out_free: 3393 record__free_thread_masks(rec, nr_threads); 3394 3395 return ret; 3396 } 3397 3398 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3399 { 3400 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3401 3402 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3403 if (ret) 3404 return ret; 3405 3406 rec->nr_threads = nr_cpus; 3407 pr_debug("nr_threads: %d\n", rec->nr_threads); 3408 3409 for (t = 0; t < rec->nr_threads; t++) { 3410 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3411 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3412 if (verbose) { 3413 pr_debug("thread_masks[%d]: ", t); 3414 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3415 pr_debug("thread_masks[%d]: ", t); 3416 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3417 } 3418 } 3419 3420 return 0; 3421 } 3422 3423 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3424 const char **maps_spec, const char **affinity_spec, 3425 u32 nr_spec) 3426 { 3427 u32 s; 3428 int ret = 0, t = 0; 3429 struct mmap_cpu_mask cpus_mask; 3430 struct thread_mask thread_mask, full_mask, *thread_masks; 3431 3432 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3433 if (ret) { 3434 pr_err("Failed to allocate CPUs mask\n"); 3435 return ret; 3436 } 3437 record__mmap_cpu_mask_init(&cpus_mask, cpus); 3438 3439 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3440 if (ret) { 3441 pr_err("Failed to allocate full mask\n"); 3442 goto out_free_cpu_mask; 3443 } 3444 3445 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3446 if (ret) { 3447 pr_err("Failed to allocate thread mask\n"); 3448 goto out_free_full_and_cpu_masks; 3449 } 3450 3451 for (s = 0; s < nr_spec; s++) { 3452 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3453 if (ret) { 3454 pr_err("Failed to initialize maps thread mask\n"); 3455 goto out_free; 3456 } 3457 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3458 if (ret) { 3459 pr_err("Failed to initialize affinity thread mask\n"); 3460 goto out_free; 3461 } 3462 3463 /* ignore invalid CPUs but do not allow empty masks */ 3464 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3465 cpus_mask.bits, thread_mask.maps.nbits)) { 3466 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3467 ret = -EINVAL; 3468 goto out_free; 3469 } 3470 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3471 cpus_mask.bits, thread_mask.affinity.nbits)) { 3472 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3473 ret = -EINVAL; 3474 goto out_free; 3475 } 3476 3477 /* do not allow intersection with other masks (full_mask) */ 3478 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3479 thread_mask.maps.nbits)) { 3480 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3481 ret = -EINVAL; 3482 goto out_free; 3483 } 3484 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3485 thread_mask.affinity.nbits)) { 3486 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3487 ret = -EINVAL; 3488 goto out_free; 3489 } 3490 3491 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3492 thread_mask.maps.bits, full_mask.maps.nbits); 3493 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3494 thread_mask.affinity.bits, full_mask.maps.nbits); 3495 3496 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3497 if (!thread_masks) { 3498 pr_err("Failed to reallocate thread masks\n"); 3499 ret = -ENOMEM; 3500 goto out_free; 3501 } 3502 rec->thread_masks = thread_masks; 3503 rec->thread_masks[t] = thread_mask; 3504 if (verbose) { 3505 pr_debug("thread_masks[%d]: ", t); 3506 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3507 pr_debug("thread_masks[%d]: ", t); 3508 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3509 } 3510 t++; 3511 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3512 if (ret) { 3513 pr_err("Failed to allocate thread mask\n"); 3514 goto out_free_full_and_cpu_masks; 3515 } 3516 } 3517 rec->nr_threads = t; 3518 pr_debug("nr_threads: %d\n", rec->nr_threads); 3519 if (!rec->nr_threads) 3520 ret = -EINVAL; 3521 3522 out_free: 3523 record__thread_mask_free(&thread_mask); 3524 out_free_full_and_cpu_masks: 3525 record__thread_mask_free(&full_mask); 3526 out_free_cpu_mask: 3527 record__mmap_cpu_mask_free(&cpus_mask); 3528 3529 return ret; 3530 } 3531 3532 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3533 { 3534 int ret; 3535 struct cpu_topology *topo; 3536 3537 topo = cpu_topology__new(); 3538 if (!topo) { 3539 pr_err("Failed to allocate CPU topology\n"); 3540 return -ENOMEM; 3541 } 3542 3543 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3544 topo->core_cpus_list, topo->core_cpus_lists); 3545 cpu_topology__delete(topo); 3546 3547 return ret; 3548 } 3549 3550 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3551 { 3552 int ret; 3553 struct cpu_topology *topo; 3554 3555 topo = cpu_topology__new(); 3556 if (!topo) { 3557 pr_err("Failed to allocate CPU topology\n"); 3558 return -ENOMEM; 3559 } 3560 3561 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3562 topo->package_cpus_list, topo->package_cpus_lists); 3563 cpu_topology__delete(topo); 3564 3565 return ret; 3566 } 3567 3568 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3569 { 3570 u32 s; 3571 int ret; 3572 const char **spec; 3573 struct numa_topology *topo; 3574 3575 topo = numa_topology__new(); 3576 if (!topo) { 3577 pr_err("Failed to allocate NUMA topology\n"); 3578 return -ENOMEM; 3579 } 3580 3581 spec = zalloc(topo->nr * sizeof(char *)); 3582 if (!spec) { 3583 pr_err("Failed to allocate NUMA spec\n"); 3584 ret = -ENOMEM; 3585 goto out_delete_topo; 3586 } 3587 for (s = 0; s < topo->nr; s++) 3588 spec[s] = topo->nodes[s].cpus; 3589 3590 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3591 3592 zfree(&spec); 3593 3594 out_delete_topo: 3595 numa_topology__delete(topo); 3596 3597 return ret; 3598 } 3599 3600 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3601 { 3602 int t, ret; 3603 u32 s, nr_spec = 0; 3604 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3605 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3606 3607 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3608 spec = strtok_r(user_spec, ":", &spec_ptr); 3609 if (spec == NULL) 3610 break; 3611 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3612 mask = strtok_r(spec, "/", &mask_ptr); 3613 if (mask == NULL) 3614 break; 3615 pr_debug2(" maps mask: %s\n", mask); 3616 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3617 if (!tmp_spec) { 3618 pr_err("Failed to reallocate maps spec\n"); 3619 ret = -ENOMEM; 3620 goto out_free; 3621 } 3622 maps_spec = tmp_spec; 3623 maps_spec[nr_spec] = dup_mask = strdup(mask); 3624 if (!maps_spec[nr_spec]) { 3625 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3626 ret = -ENOMEM; 3627 goto out_free; 3628 } 3629 mask = strtok_r(NULL, "/", &mask_ptr); 3630 if (mask == NULL) { 3631 pr_err("Invalid thread maps or affinity specs\n"); 3632 ret = -EINVAL; 3633 goto out_free; 3634 } 3635 pr_debug2(" affinity mask: %s\n", mask); 3636 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3637 if (!tmp_spec) { 3638 pr_err("Failed to reallocate affinity spec\n"); 3639 ret = -ENOMEM; 3640 goto out_free; 3641 } 3642 affinity_spec = tmp_spec; 3643 affinity_spec[nr_spec] = strdup(mask); 3644 if (!affinity_spec[nr_spec]) { 3645 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3646 ret = -ENOMEM; 3647 goto out_free; 3648 } 3649 dup_mask = NULL; 3650 nr_spec++; 3651 } 3652 3653 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3654 (const char **)affinity_spec, nr_spec); 3655 3656 out_free: 3657 free(dup_mask); 3658 for (s = 0; s < nr_spec; s++) { 3659 if (maps_spec) 3660 free(maps_spec[s]); 3661 if (affinity_spec) 3662 free(affinity_spec[s]); 3663 } 3664 free(affinity_spec); 3665 free(maps_spec); 3666 3667 return ret; 3668 } 3669 3670 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3671 { 3672 int ret; 3673 3674 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3675 if (ret) 3676 return ret; 3677 3678 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus); 3679 3680 rec->nr_threads = 1; 3681 3682 return 0; 3683 } 3684 3685 static int record__init_thread_masks(struct record *rec) 3686 { 3687 int ret = 0; 3688 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3689 3690 if (!record__threads_enabled(rec)) 3691 return record__init_thread_default_masks(rec, cpus); 3692 3693 if (evlist__per_thread(rec->evlist)) { 3694 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3695 return -EINVAL; 3696 } 3697 3698 switch (rec->opts.threads_spec) { 3699 case THREAD_SPEC__CPU: 3700 ret = record__init_thread_cpu_masks(rec, cpus); 3701 break; 3702 case THREAD_SPEC__CORE: 3703 ret = record__init_thread_core_masks(rec, cpus); 3704 break; 3705 case THREAD_SPEC__PACKAGE: 3706 ret = record__init_thread_package_masks(rec, cpus); 3707 break; 3708 case THREAD_SPEC__NUMA: 3709 ret = record__init_thread_numa_masks(rec, cpus); 3710 break; 3711 case THREAD_SPEC__USER: 3712 ret = record__init_thread_user_masks(rec, cpus); 3713 break; 3714 default: 3715 break; 3716 } 3717 3718 return ret; 3719 } 3720 3721 int cmd_record(int argc, const char **argv) 3722 { 3723 int err; 3724 struct record *rec = &record; 3725 char errbuf[BUFSIZ]; 3726 3727 setlocale(LC_ALL, ""); 3728 3729 #ifndef HAVE_LIBBPF_SUPPORT 3730 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3731 set_nobuild('\0', "clang-path", true); 3732 set_nobuild('\0', "clang-opt", true); 3733 # undef set_nobuild 3734 #endif 3735 3736 #ifndef HAVE_BPF_PROLOGUE 3737 # if !defined (HAVE_DWARF_SUPPORT) 3738 # define REASON "NO_DWARF=1" 3739 # elif !defined (HAVE_LIBBPF_SUPPORT) 3740 # define REASON "NO_LIBBPF=1" 3741 # else 3742 # define REASON "this architecture doesn't support BPF prologue" 3743 # endif 3744 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3745 set_nobuild('\0', "vmlinux", true); 3746 # undef set_nobuild 3747 # undef REASON 3748 #endif 3749 3750 #ifndef HAVE_BPF_SKEL 3751 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3752 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3753 # undef set_nobuild 3754 #endif 3755 3756 rec->opts.affinity = PERF_AFFINITY_SYS; 3757 3758 rec->evlist = evlist__new(); 3759 if (rec->evlist == NULL) 3760 return -ENOMEM; 3761 3762 err = perf_config(perf_record_config, rec); 3763 if (err) 3764 return err; 3765 3766 argc = parse_options(argc, argv, record_options, record_usage, 3767 PARSE_OPT_STOP_AT_NON_OPTION); 3768 if (quiet) 3769 perf_quiet_option(); 3770 3771 err = symbol__validate_sym_arguments(); 3772 if (err) 3773 return err; 3774 3775 perf_debuginfod_setup(&record.debuginfod); 3776 3777 /* Make system wide (-a) the default target. */ 3778 if (!argc && target__none(&rec->opts.target)) 3779 rec->opts.target.system_wide = true; 3780 3781 if (nr_cgroups && !rec->opts.target.system_wide) { 3782 usage_with_options_msg(record_usage, record_options, 3783 "cgroup monitoring only available in system-wide mode"); 3784 3785 } 3786 3787 if (rec->buildid_mmap) { 3788 if (!perf_can_record_build_id()) { 3789 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3790 err = -EINVAL; 3791 goto out_opts; 3792 } 3793 pr_debug("Enabling build id in mmap2 events.\n"); 3794 /* Enable mmap build id synthesizing. */ 3795 symbol_conf.buildid_mmap2 = true; 3796 /* Enable perf_event_attr::build_id bit. */ 3797 rec->opts.build_id = true; 3798 /* Disable build id cache. */ 3799 rec->no_buildid = true; 3800 } 3801 3802 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3803 pr_err("Kernel has no cgroup sampling support.\n"); 3804 err = -EINVAL; 3805 goto out_opts; 3806 } 3807 3808 if (rec->opts.kcore || record__threads_enabled(rec)) 3809 rec->data.is_dir = true; 3810 3811 if (record__threads_enabled(rec)) { 3812 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 3813 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 3814 goto out_opts; 3815 } 3816 if (record__aio_enabled(rec)) { 3817 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 3818 goto out_opts; 3819 } 3820 } 3821 3822 if (rec->opts.comp_level != 0) { 3823 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 3824 rec->no_buildid = true; 3825 } 3826 3827 if (rec->opts.record_switch_events && 3828 !perf_can_record_switch_events()) { 3829 ui__error("kernel does not support recording context switch events\n"); 3830 parse_options_usage(record_usage, record_options, "switch-events", 0); 3831 err = -EINVAL; 3832 goto out_opts; 3833 } 3834 3835 if (switch_output_setup(rec)) { 3836 parse_options_usage(record_usage, record_options, "switch-output", 0); 3837 err = -EINVAL; 3838 goto out_opts; 3839 } 3840 3841 if (rec->switch_output.time) { 3842 signal(SIGALRM, alarm_sig_handler); 3843 alarm(rec->switch_output.time); 3844 } 3845 3846 if (rec->switch_output.num_files) { 3847 rec->switch_output.filenames = calloc(sizeof(char *), 3848 rec->switch_output.num_files); 3849 if (!rec->switch_output.filenames) { 3850 err = -EINVAL; 3851 goto out_opts; 3852 } 3853 } 3854 3855 if (rec->timestamp_filename && record__threads_enabled(rec)) { 3856 rec->timestamp_filename = false; 3857 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 3858 } 3859 3860 /* 3861 * Allow aliases to facilitate the lookup of symbols for address 3862 * filters. Refer to auxtrace_parse_filters(). 3863 */ 3864 symbol_conf.allow_aliases = true; 3865 3866 symbol__init(NULL); 3867 3868 err = record__auxtrace_init(rec); 3869 if (err) 3870 goto out; 3871 3872 if (dry_run) 3873 goto out; 3874 3875 err = bpf__setup_stdout(rec->evlist); 3876 if (err) { 3877 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 3878 pr_err("ERROR: Setup BPF stdout failed: %s\n", 3879 errbuf); 3880 goto out; 3881 } 3882 3883 err = -ENOMEM; 3884 3885 if (rec->no_buildid_cache || rec->no_buildid) { 3886 disable_buildid_cache(); 3887 } else if (rec->switch_output.enabled) { 3888 /* 3889 * In 'perf record --switch-output', disable buildid 3890 * generation by default to reduce data file switching 3891 * overhead. Still generate buildid if they are required 3892 * explicitly using 3893 * 3894 * perf record --switch-output --no-no-buildid \ 3895 * --no-no-buildid-cache 3896 * 3897 * Following code equals to: 3898 * 3899 * if ((rec->no_buildid || !rec->no_buildid_set) && 3900 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 3901 * disable_buildid_cache(); 3902 */ 3903 bool disable = true; 3904 3905 if (rec->no_buildid_set && !rec->no_buildid) 3906 disable = false; 3907 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 3908 disable = false; 3909 if (disable) { 3910 rec->no_buildid = true; 3911 rec->no_buildid_cache = true; 3912 disable_buildid_cache(); 3913 } 3914 } 3915 3916 if (record.opts.overwrite) 3917 record.opts.tail_synthesize = true; 3918 3919 if (rec->evlist->core.nr_entries == 0) { 3920 if (perf_pmu__has_hybrid()) { 3921 err = evlist__add_default_hybrid(rec->evlist, 3922 !record.opts.no_samples); 3923 } else { 3924 err = __evlist__add_default(rec->evlist, 3925 !record.opts.no_samples); 3926 } 3927 3928 if (err < 0) { 3929 pr_err("Not enough memory for event selector list\n"); 3930 goto out; 3931 } 3932 } 3933 3934 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 3935 rec->opts.no_inherit = true; 3936 3937 err = target__validate(&rec->opts.target); 3938 if (err) { 3939 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3940 ui__warning("%s\n", errbuf); 3941 } 3942 3943 err = target__parse_uid(&rec->opts.target); 3944 if (err) { 3945 int saved_errno = errno; 3946 3947 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3948 ui__error("%s", errbuf); 3949 3950 err = -saved_errno; 3951 goto out; 3952 } 3953 3954 /* Enable ignoring missing threads when -u/-p option is defined. */ 3955 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 3956 3957 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 3958 pr_err("failed to use cpu list %s\n", 3959 rec->opts.target.cpu_list); 3960 goto out; 3961 } 3962 3963 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 3964 3965 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 3966 arch__add_leaf_frame_record_opts(&rec->opts); 3967 3968 err = -ENOMEM; 3969 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 3970 usage_with_options(record_usage, record_options); 3971 3972 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 3973 if (err) 3974 goto out; 3975 3976 /* 3977 * We take all buildids when the file contains 3978 * AUX area tracing data because we do not decode the 3979 * trace because it would take too long. 3980 */ 3981 if (rec->opts.full_auxtrace) 3982 rec->buildid_all = true; 3983 3984 if (rec->opts.text_poke) { 3985 err = record__config_text_poke(rec->evlist); 3986 if (err) { 3987 pr_err("record__config_text_poke failed, error %d\n", err); 3988 goto out; 3989 } 3990 } 3991 3992 if (rec->off_cpu) { 3993 err = record__config_off_cpu(rec); 3994 if (err) { 3995 pr_err("record__config_off_cpu failed, error %d\n", err); 3996 goto out; 3997 } 3998 } 3999 4000 if (record_opts__config(&rec->opts)) { 4001 err = -EINVAL; 4002 goto out; 4003 } 4004 4005 err = record__init_thread_masks(rec); 4006 if (err) { 4007 pr_err("Failed to initialize parallel data streaming masks\n"); 4008 goto out; 4009 } 4010 4011 if (rec->opts.nr_cblocks > nr_cblocks_max) 4012 rec->opts.nr_cblocks = nr_cblocks_max; 4013 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4014 4015 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4016 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4017 4018 if (rec->opts.comp_level > comp_level_max) 4019 rec->opts.comp_level = comp_level_max; 4020 pr_debug("comp level: %d\n", rec->opts.comp_level); 4021 4022 err = __cmd_record(&record, argc, argv); 4023 out: 4024 evlist__delete(rec->evlist); 4025 symbol__exit(); 4026 auxtrace_record__free(rec->itr); 4027 out_opts: 4028 record__free_thread_masks(rec, rec->nr_threads); 4029 rec->nr_threads = 0; 4030 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4031 return err; 4032 } 4033 4034 static void snapshot_sig_handler(int sig __maybe_unused) 4035 { 4036 struct record *rec = &record; 4037 4038 hit_auxtrace_snapshot_trigger(rec); 4039 4040 if (switch_output_signal(rec)) 4041 trigger_hit(&switch_output_trigger); 4042 } 4043 4044 static void alarm_sig_handler(int sig __maybe_unused) 4045 { 4046 struct record *rec = &record; 4047 4048 if (switch_output_time(rec)) 4049 trigger_hit(&switch_output_trigger); 4050 } 4051