1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/aslr.h" 12 #include "util/color.h" 13 #include "util/dso.h" 14 #include "util/vdso.h" 15 #include "util/evlist.h" 16 #include "util/evsel.h" 17 #include "util/map.h" 18 #include "util/session.h" 19 #include "util/tool.h" 20 #include "util/debug.h" 21 #include "util/build-id.h" 22 #include "util/data.h" 23 #include "util/auxtrace.h" 24 #include "util/jit.h" 25 #include "util/string2.h" 26 #include "util/symbol.h" 27 #include "util/synthetic-events.h" 28 #include "util/pmus.h" 29 #include "util/thread.h" 30 #include "util/namespaces.h" 31 #include "util/unwind.h" 32 #include "util/util.h" 33 #include "util/tsc.h" 34 35 #include <internal/lib.h> 36 37 #include <linux/err.h> 38 #include <subcmd/parse-options.h> 39 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 40 41 #include <linux/list.h> 42 #include <linux/string.h> 43 #include <linux/zalloc.h> 44 #include <linux/hash.h> 45 #include <ctype.h> 46 #include <errno.h> 47 #include <signal.h> 48 #include <inttypes.h> 49 50 struct guest_event { 51 struct perf_sample sample; 52 union perf_event *event; 53 char *event_buf; 54 }; 55 56 struct guest_id { 57 /* hlist_node must be first, see free_hlist() */ 58 struct hlist_node node; 59 u64 id; 60 u64 host_id; 61 u32 vcpu; 62 }; 63 64 struct guest_tid { 65 /* hlist_node must be first, see free_hlist() */ 66 struct hlist_node node; 67 /* Thread ID of QEMU thread */ 68 u32 tid; 69 u32 vcpu; 70 }; 71 72 struct guest_vcpu { 73 /* Current host CPU */ 74 u32 cpu; 75 /* Thread ID of QEMU thread */ 76 u32 tid; 77 }; 78 79 struct guest_session { 80 char *perf_data_file; 81 u32 machine_pid; 82 u64 time_offset; 83 double time_scale; 84 struct perf_tool tool; 85 struct perf_data data; 86 struct perf_session *session; 87 char *tmp_file_name; 88 int tmp_fd; 89 struct perf_tsc_conversion host_tc; 90 struct perf_tsc_conversion guest_tc; 91 bool copy_kcore_dir; 92 bool have_tc; 93 bool fetched; 94 bool ready; 95 u16 dflt_id_hdr_size; 96 u64 dflt_id; 97 u64 highest_id; 98 /* Array of guest_vcpu */ 99 struct guest_vcpu *vcpu; 100 size_t vcpu_cnt; 101 /* Hash table for guest_id */ 102 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 103 /* Hash table for guest_tid */ 104 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 105 /* Place to stash next guest event */ 106 struct guest_event ev; 107 }; 108 109 enum build_id_rewrite_style { 110 BID_RWS__NONE = 0, 111 BID_RWS__INJECT_HEADER_LAZY, 112 BID_RWS__INJECT_HEADER_ALL, 113 BID_RWS__MMAP2_BUILDID_ALL, 114 BID_RWS__MMAP2_BUILDID_LAZY, 115 }; 116 117 struct perf_inject { 118 struct perf_tool tool; 119 struct perf_session *session; 120 enum build_id_rewrite_style build_id_style; 121 bool sched_stat; 122 bool have_auxtrace; 123 bool strip; 124 bool jit_mode; 125 bool in_place_update; 126 bool in_place_update_dry_run; 127 bool copy_kcore_dir; 128 bool convert_callchain; 129 bool aslr; 130 const char *input_name; 131 struct perf_data output; 132 u64 bytes_written; 133 u64 aux_id; 134 struct list_head samples; 135 struct itrace_synth_opts itrace_synth_opts; 136 char *event_copy; 137 struct perf_file_section secs[HEADER_FEAT_BITS]; 138 struct guest_session guest_session; 139 struct strlist *known_build_ids; 140 struct evsel *mmap_evsel; 141 struct ip_callchain *raw_callchain; 142 }; 143 144 struct event_entry { 145 struct list_head node; 146 u32 tid; 147 union perf_event event[]; 148 }; 149 150 static int tool__inject_build_id(const struct perf_tool *tool, 151 struct perf_sample *sample, 152 struct machine *machine, 153 __u16 misc, 154 const char *filename, 155 struct dso *dso, u32 flags); 156 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 157 struct perf_sample *sample, 158 struct machine *machine, 159 __u16 misc, 160 __u32 pid, __u32 tid, 161 __u64 start, __u64 len, __u64 pgoff, 162 struct dso *dso, 163 __u32 prot, __u32 flags, 164 const char *filename); 165 166 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 167 { 168 ssize_t size; 169 170 size = perf_data__write(&inject->output, buf, sz); 171 if (size < 0) 172 return -errno; 173 174 inject->bytes_written += size; 175 return 0; 176 } 177 178 static int perf_event__repipe_synth(const struct perf_tool *tool, 179 union perf_event *event) 180 181 { 182 struct perf_inject *inject = container_of(tool, struct perf_inject, 183 tool); 184 185 return output_bytes(inject, event, event->header.size); 186 } 187 188 static int perf_event__repipe_oe_synth(const struct perf_tool *tool, 189 union perf_event *event, 190 struct ordered_events *oe __maybe_unused) 191 { 192 return perf_event__repipe_synth(tool, event); 193 } 194 195 #ifdef HAVE_JITDUMP 196 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused, 197 union perf_event *event __maybe_unused, 198 struct ordered_events *oe __maybe_unused) 199 { 200 return 0; 201 } 202 #endif 203 204 static int perf_event__repipe_op2_synth(const struct perf_tool *tool, 205 struct perf_session *session __maybe_unused, 206 union perf_event *event) 207 { 208 return perf_event__repipe_synth(tool, event); 209 } 210 211 static int perf_event__repipe_op4_synth(const struct perf_tool *tool, 212 struct perf_session *session __maybe_unused, 213 union perf_event *event, 214 u64 data __maybe_unused, 215 const char *str __maybe_unused) 216 { 217 return perf_event__repipe_synth(tool, event); 218 } 219 220 static int perf_event__repipe_synth_cb(const struct perf_tool *tool, 221 union perf_event *event, 222 struct perf_sample *sample __maybe_unused, 223 struct machine *machine __maybe_unused) 224 { 225 return perf_event__repipe_synth(tool, event); 226 } 227 228 static int perf_event__repipe_attr(const struct perf_tool *tool, 229 union perf_event *event, 230 struct evlist **pevlist) 231 { 232 struct perf_inject *inject = container_of(tool, struct perf_inject, 233 tool); 234 struct perf_event_attr attr; 235 u32 raw_attr_size, attr_size; 236 size_t n_ids; 237 u64 *ids; 238 int ret; 239 240 union perf_event *aslr_event = NULL; 241 242 ret = perf_event__process_attr(tool, event, pevlist); 243 if (ret) 244 return ret; 245 246 if (inject->aslr) { 247 aslr_event = malloc(event->header.size); 248 if (!aslr_event) 249 return -ENOMEM; 250 memcpy(aslr_event, event, event->header.size); 251 aslr_tool__strip_attr_event(aslr_event, *pevlist); 252 event = aslr_event; 253 } 254 255 /* If the output isn't a pipe then the attributes will be written as part of the header. */ 256 if (!inject->output.is_pipe) { 257 ret = 0; 258 goto out; 259 } 260 261 if (!inject->itrace_synth_opts.set) { 262 ret = perf_event__repipe_synth(tool, event); 263 goto out; 264 } 265 266 if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { 267 pr_err("Attribute event size %u is too small\n", event->header.size); 268 ret = -EINVAL; 269 goto out; 270 } 271 272 /* 273 * ABI0 pipe/inject events have attr.size == 0; default to 274 * PERF_ATTR_SIZE_VER0 (the ABI0 footprint) for the bounded 275 * copy and ID array position. Same pattern as 276 * perf_event__process_attr() in header.c. 277 */ 278 raw_attr_size = event->attr.attr.size; 279 attr_size = raw_attr_size ?: PERF_ATTR_SIZE_VER0; 280 281 if (raw_attr_size && (raw_attr_size < PERF_ATTR_SIZE_VER0 || 282 raw_attr_size > event->header.size - sizeof(event->header))) { 283 pr_err("Attribute event size %u is too small for attr.size %u\n", 284 event->header.size, raw_attr_size); 285 ret = -EINVAL; 286 goto out; 287 } 288 289 memset(&attr, 0, sizeof(attr)); 290 memcpy(&attr, &event->attr.attr, 291 min_t(size_t, sizeof(attr), attr_size)); 292 293 n_ids = event->header.size - sizeof(event->header) - attr_size; 294 n_ids /= sizeof(u64); 295 ids = (void *)&event->attr.attr + attr_size; 296 297 attr.size = sizeof(struct perf_event_attr); 298 attr.sample_type &= ~PERF_SAMPLE_AUX; 299 300 301 if (inject->itrace_synth_opts.add_last_branch) { 302 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 303 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 304 } 305 ret = perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, 306 perf_event__repipe_synth_cb); 307 out: 308 free(aslr_event); 309 return ret; 310 } 311 312 static int perf_event__repipe_event_update(const struct perf_tool *tool, 313 union perf_event *event, 314 struct evlist **pevlist __maybe_unused) 315 { 316 return perf_event__repipe_synth(tool, event); 317 } 318 319 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 320 { 321 char buf[4096]; 322 ssize_t ssz; 323 int ret; 324 325 while (size > 0) { 326 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 327 if (ssz < 0) 328 return -errno; 329 ret = output_bytes(inject, buf, ssz); 330 if (ret) 331 return ret; 332 size -= ssz; 333 } 334 335 return 0; 336 } 337 338 static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool, 339 struct perf_session *session, 340 union perf_event *event) 341 { 342 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 343 int ret; 344 345 inject->have_auxtrace = true; 346 347 if (!inject->output.is_pipe) { 348 off_t offset = perf_data__seek(&inject->output, 0, SEEK_CUR); 349 350 if (offset == -1) 351 return -errno; 352 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 353 event, offset); 354 if (ret < 0) 355 return ret; 356 } 357 358 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 359 ret = output_bytes(inject, event, event->header.size); 360 if (ret < 0) 361 return ret; 362 ret = copy_bytes(inject, session->data, 363 event->auxtrace.size); 364 } else { 365 ret = output_bytes(inject, event, 366 event->header.size + event->auxtrace.size); 367 } 368 if (ret < 0) 369 return ret; 370 371 return event->auxtrace.size; 372 } 373 374 static int perf_event__repipe(const struct perf_tool *tool, 375 union perf_event *event, 376 struct perf_sample *sample __maybe_unused, 377 struct machine *machine __maybe_unused) 378 { 379 return perf_event__repipe_synth(tool, event); 380 } 381 382 static int perf_event__drop(const struct perf_tool *tool __maybe_unused, 383 union perf_event *event __maybe_unused, 384 struct perf_sample *sample __maybe_unused, 385 struct machine *machine __maybe_unused) 386 { 387 return 0; 388 } 389 390 static int perf_event__drop_aux(const struct perf_tool *tool, 391 union perf_event *event __maybe_unused, 392 struct perf_sample *sample, 393 struct machine *machine __maybe_unused) 394 { 395 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 396 397 if (!inject->aux_id) 398 inject->aux_id = sample->id; 399 400 return 0; 401 } 402 403 static union perf_event * 404 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 405 union perf_event *event, 406 struct perf_sample *sample) 407 { 408 size_t sz1 = sample->aux_sample.data - (void *)event - sizeof(u64); 409 size_t sz2 = event->header.size - sample->aux_sample.size - (sz1 + sizeof(u64)); 410 union perf_event *ev; 411 412 if (inject->event_copy == NULL) { 413 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 414 if (!inject->event_copy) 415 return ERR_PTR(-ENOMEM); 416 } 417 ev = (union perf_event *)inject->event_copy; 418 if (sz1 > event->header.size || sz2 > event->header.size || 419 sz1 + sz2 > event->header.size || 420 sz1 < sizeof(struct perf_event_header)) 421 return event; 422 423 memcpy(ev, event, sz1); 424 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 425 ev->header.size = sz1 + sz2; 426 427 return ev; 428 } 429 430 typedef int (*inject_handler)(const struct perf_tool *tool, 431 union perf_event *event, 432 struct perf_sample *sample, 433 struct machine *machine); 434 435 static int perf_event__repipe_sample(const struct perf_tool *tool, 436 union perf_event *event, 437 struct perf_sample *sample, 438 struct machine *machine) 439 { 440 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 441 struct evsel *evsel = sample->evsel; 442 443 if (evsel == NULL) 444 return perf_event__repipe_synth(tool, event); 445 446 if (evsel->handler) { 447 inject_handler f = evsel->handler; 448 return f(tool, event, sample, machine); 449 } 450 451 build_id__mark_dso_hit(tool, event, sample, machine); 452 453 if (inject->itrace_synth_opts.set && 454 (inject->itrace_synth_opts.last_branch || 455 inject->itrace_synth_opts.add_last_branch)) { 456 union perf_event *event_copy = (void *)inject->event_copy; 457 struct branch_stack dummy_bs = { .nr = 0, .hw_idx = 0 }; 458 int err; 459 size_t sz; 460 u64 orig_type = evsel->core.attr.sample_type; 461 u64 orig_branch_type = evsel->core.attr.branch_sample_type; 462 463 struct branch_stack *orig_bs = sample->branch_stack; 464 465 if (event_copy == NULL) { 466 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 467 if (!inject->event_copy) 468 return -ENOMEM; 469 470 event_copy = (void *)inject->event_copy; 471 } 472 473 if (!sample->branch_stack) 474 sample->branch_stack = &dummy_bs; 475 476 if (inject->itrace_synth_opts.add_last_branch) { 477 /* Temporarily add in type bits for synthesis. */ 478 evsel->core.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 479 evsel->core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 480 } 481 evsel->core.attr.sample_type &= ~PERF_SAMPLE_AUX; 482 483 sz = perf_event__sample_event_size(sample, evsel->core.attr.sample_type, 484 evsel->core.attr.read_format, 485 evsel->core.attr.branch_sample_type); 486 487 if (sz >= PERF_SAMPLE_MAX_SIZE) { 488 pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE); 489 evsel->core.attr.sample_type = orig_type; 490 evsel->core.attr.branch_sample_type = orig_branch_type; 491 sample->branch_stack = orig_bs; 492 return -EFAULT; 493 } 494 495 event_copy->header.type = PERF_RECORD_SAMPLE; 496 event_copy->header.misc = event->header.misc; 497 event_copy->header.size = sz; 498 499 err = perf_event__synthesize_sample(event_copy, evsel->core.attr.sample_type, 500 evsel->core.attr.read_format, 501 evsel->core.attr.branch_sample_type, sample); 502 503 evsel->core.attr.sample_type = orig_type; 504 evsel->core.attr.branch_sample_type = orig_branch_type; 505 sample->branch_stack = orig_bs; 506 507 if (err) { 508 pr_err("Failed to synthesize sample\n"); 509 return err; 510 } 511 event = event_copy; 512 } else if (inject->itrace_synth_opts.set && 513 (evsel->core.attr.sample_type & PERF_SAMPLE_AUX)) { 514 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 515 if (IS_ERR(event)) 516 return PTR_ERR(event); 517 } 518 519 return perf_event__repipe_synth(tool, event); 520 } 521 522 static int perf_event__convert_sample_callchain(const struct perf_tool *tool, 523 union perf_event *event, 524 struct perf_sample *sample, 525 struct machine *machine) 526 { 527 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 528 struct evsel *evsel = sample->evsel; 529 struct callchain_cursor *cursor = get_tls_callchain_cursor(); 530 union perf_event *event_copy = (void *)inject->event_copy; 531 struct callchain_cursor_node *node; 532 struct thread *thread; 533 u64 sample_type = evsel->core.attr.sample_type; 534 size_t sz; 535 u64 i, k; 536 int ret; 537 538 if (event_copy == NULL) { 539 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 540 if (!inject->event_copy) 541 return -ENOMEM; 542 543 event_copy = (void *)inject->event_copy; 544 } 545 546 if (cursor == NULL) 547 return -ENOMEM; 548 549 callchain_cursor_reset(cursor); 550 551 thread = machine__find_thread(machine, sample->tid, sample->pid); 552 if (thread == NULL) 553 goto out; 554 555 /* this will parse DWARF using stack and register data */ 556 ret = thread__resolve_callchain(thread, cursor, sample, 557 /*parent=*/NULL, /*root_al=*/NULL, 558 PERF_MAX_STACK_DEPTH); 559 thread__put(thread); 560 if (ret != 0) 561 goto out; 562 563 /* copy kernel callchain and context entries */ 564 for (i = 0; i < sample->callchain->nr; i++) { 565 inject->raw_callchain->ips[i] = sample->callchain->ips[i]; 566 if (sample->callchain->ips[i] == PERF_CONTEXT_USER) { 567 i++; 568 break; 569 } 570 } 571 if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER) 572 inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER; 573 574 node = cursor->first; 575 for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) { 576 if (!(machine->single_address_space && 577 machine__kernel_ip(machine, node->ip)) && 578 !(node->ms.sym && symbol__inlined(node->ms.sym))) { 579 inject->raw_callchain->ips[i++] = node->ip; 580 } 581 582 node = node->next; 583 } 584 585 inject->raw_callchain->nr = i; 586 sample->callchain = inject->raw_callchain; 587 588 out: 589 memcpy(event_copy, event, sizeof(event->header)); 590 591 /* remove sample_type {STACK,REGS}_USER for synthesize */ 592 sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER); 593 594 sz = perf_event__sample_event_size(sample, sample_type, 595 evsel->core.attr.read_format, 596 evsel->core.attr.branch_sample_type); 597 if (sz >= PERF_SAMPLE_MAX_SIZE) { 598 pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE); 599 return -EFAULT; 600 } 601 event_copy->header.size = sz; 602 603 ret = perf_event__synthesize_sample(event_copy, sample_type, 604 evsel->core.attr.read_format, 605 evsel->core.attr.branch_sample_type, sample); 606 if (ret) { 607 pr_err("Failed to synthesize sample\n"); 608 return ret; 609 } 610 return perf_event__repipe_synth(tool, event_copy); 611 } 612 613 static struct dso *findnew_dso(int pid, int tid, const char *filename, 614 const struct dso_id *id, struct machine *machine) 615 { 616 struct thread *thread; 617 struct nsinfo *nsi = NULL; 618 struct nsinfo *nnsi; 619 struct dso *dso; 620 bool vdso; 621 622 thread = machine__findnew_thread(machine, pid, tid); 623 if (thread == NULL) { 624 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 625 return NULL; 626 } 627 628 vdso = is_vdso_map(filename); 629 nsi = nsinfo__get(thread__nsinfo(thread)); 630 631 if (vdso) { 632 /* The vdso maps are always on the host and not the 633 * container. Ensure that we don't use setns to look 634 * them up. 635 */ 636 nnsi = nsinfo__copy(nsi); 637 if (nnsi) { 638 nsinfo__put(nsi); 639 nsinfo__clear_need_setns(nnsi); 640 nsi = nnsi; 641 } 642 dso = machine__findnew_vdso(machine, thread); 643 } else { 644 dso = machine__findnew_dso_id(machine, filename, id); 645 } 646 647 if (dso) { 648 mutex_lock(dso__lock(dso)); 649 dso__set_nsinfo(dso, nsi); 650 mutex_unlock(dso__lock(dso)); 651 } else 652 nsinfo__put(nsi); 653 654 thread__put(thread); 655 return dso; 656 } 657 658 /* 659 * The evsel used for the sample ID for mmap events. Typically stashed when 660 * processing mmap events. If not stashed, search the evlist for the first mmap 661 * gathering event. 662 */ 663 static struct evsel *inject__mmap_evsel(struct perf_inject *inject) 664 { 665 struct evsel *pos; 666 667 if (inject->mmap_evsel) 668 return inject->mmap_evsel; 669 670 evlist__for_each_entry(inject->session->evlist, pos) { 671 if (pos->core.attr.mmap) { 672 inject->mmap_evsel = pos; 673 return pos; 674 } 675 } 676 pr_err("No mmap events found\n"); 677 return NULL; 678 } 679 680 static int perf_event__repipe_common_mmap(const struct perf_tool *tool, 681 union perf_event *event, 682 struct perf_sample *sample, 683 struct machine *machine, 684 __u32 pid, __u32 tid, 685 __u64 start, __u64 len, __u64 pgoff, 686 __u32 flags, __u32 prot, 687 const char *filename, 688 const struct dso_id *dso_id, 689 int (*perf_event_process)(const struct perf_tool *tool, 690 union perf_event *event, 691 struct perf_sample *sample, 692 struct machine *machine)) 693 { 694 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 695 struct dso *dso = NULL; 696 bool dso_sought = false; 697 698 #ifdef HAVE_JITDUMP 699 if (inject->jit_mode) { 700 u64 n = 0; 701 int ret; 702 703 /* If jit marker, then inject jit mmaps and generate ELF images. */ 704 ret = jit_process(inject->session, &inject->output, machine, 705 filename, pid, tid, &n); 706 if (ret < 0) 707 return ret; 708 if (ret) { 709 inject->bytes_written += n; 710 return 0; 711 } 712 } 713 #endif 714 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 715 dso = findnew_dso(pid, tid, filename, dso_id, machine); 716 dso_sought = true; 717 if (dso) { 718 /* mark it not to inject build-id */ 719 dso__set_hit(dso); 720 } 721 } 722 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) { 723 if (!dso_sought) { 724 dso = findnew_dso(pid, tid, filename, dso_id, machine); 725 dso_sought = true; 726 } 727 728 if (dso && !dso__hit(dso)) { 729 if (!sample->evsel) 730 sample->evsel = evlist__event2evsel(inject->session->evlist, event); 731 732 if (sample->evsel) { 733 dso__set_hit(dso); 734 tool__inject_build_id(tool, sample, machine, 735 /*misc=*/sample->cpumode, 736 filename, dso, flags); 737 } 738 } 739 } else { 740 int err; 741 742 /* 743 * Remember the evsel for lazy build id generation. It is used 744 * for the sample id header type. 745 */ 746 if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 747 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) && 748 !inject->mmap_evsel) 749 inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event); 750 751 /* Create the thread, map, etc. Not done for the unordered inject all case. */ 752 err = perf_event_process(tool, event, sample, machine); 753 754 if (err) { 755 dso__put(dso); 756 return err; 757 } 758 } 759 if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) && 760 !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 761 struct evsel *saved_evsel = sample->evsel; 762 763 sample->evsel = evlist__event2evsel(inject->session->evlist, event); 764 if (sample->evsel && !dso_sought) { 765 dso = findnew_dso(pid, tid, filename, dso_id, machine); 766 dso_sought = true; 767 } 768 if (sample->evsel && dso && 769 !tool__inject_mmap2_build_id(tool, sample, machine, 770 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID, 771 pid, tid, start, len, pgoff, 772 dso, 773 prot, flags, 774 filename)) { 775 /* Injected mmap2 so no need to repipe. */ 776 sample->evsel = saved_evsel; 777 dso__put(dso); 778 return 0; 779 } 780 sample->evsel = saved_evsel; 781 } 782 dso__put(dso); 783 if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) 784 return 0; 785 786 return perf_event__repipe(tool, event, sample, machine); 787 } 788 789 static int perf_event__repipe_mmap(const struct perf_tool *tool, 790 union perf_event *event, 791 struct perf_sample *sample, 792 struct machine *machine) 793 { 794 return perf_event__repipe_common_mmap( 795 tool, event, sample, machine, 796 event->mmap.pid, event->mmap.tid, 797 event->mmap.start, event->mmap.len, event->mmap.pgoff, 798 /*flags=*/0, PROT_EXEC, 799 event->mmap.filename, /*dso_id=*/NULL, 800 perf_event__process_mmap); 801 } 802 803 static int perf_event__repipe_mmap2(const struct perf_tool *tool, 804 union perf_event *event, 805 struct perf_sample *sample, 806 struct machine *machine) 807 { 808 struct dso_id id = dso_id_empty; 809 810 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 811 build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); 812 } else { 813 id.maj = event->mmap2.maj; 814 id.min = event->mmap2.min; 815 id.ino = event->mmap2.ino; 816 id.ino_generation = event->mmap2.ino_generation; 817 id.mmap2_valid = true; 818 id.mmap2_ino_generation_valid = true; 819 } 820 821 return perf_event__repipe_common_mmap( 822 tool, event, sample, machine, 823 event->mmap2.pid, event->mmap2.tid, 824 event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, 825 event->mmap2.flags, event->mmap2.prot, 826 event->mmap2.filename, &id, 827 perf_event__process_mmap2); 828 } 829 830 static int perf_event__repipe_fork(const struct perf_tool *tool, 831 union perf_event *event, 832 struct perf_sample *sample, 833 struct machine *machine) 834 { 835 int err; 836 837 err = perf_event__process_fork(tool, event, sample, machine); 838 perf_event__repipe(tool, event, sample, machine); 839 840 return err; 841 } 842 843 static int perf_event__repipe_comm(const struct perf_tool *tool, 844 union perf_event *event, 845 struct perf_sample *sample, 846 struct machine *machine) 847 { 848 int err; 849 850 err = perf_event__process_comm(tool, event, sample, machine); 851 perf_event__repipe(tool, event, sample, machine); 852 853 return err; 854 } 855 856 static int perf_event__repipe_namespaces(const struct perf_tool *tool, 857 union perf_event *event, 858 struct perf_sample *sample, 859 struct machine *machine) 860 { 861 int err = perf_event__process_namespaces(tool, event, sample, machine); 862 863 perf_event__repipe(tool, event, sample, machine); 864 865 return err; 866 } 867 868 static int perf_event__repipe_exit(const struct perf_tool *tool, 869 union perf_event *event, 870 struct perf_sample *sample, 871 struct machine *machine) 872 { 873 int err; 874 875 err = perf_event__process_exit(tool, event, sample, machine); 876 perf_event__repipe(tool, event, sample, machine); 877 878 return err; 879 } 880 881 #ifdef HAVE_LIBTRACEEVENT 882 static int perf_event__repipe_tracing_data(const struct perf_tool *tool, 883 struct perf_session *session, 884 union perf_event *event) 885 { 886 perf_event__repipe_synth(tool, event); 887 888 return perf_event__process_tracing_data(tool, session, event); 889 } 890 #endif 891 892 static int dso__read_build_id(struct dso *dso) 893 { 894 struct nscookie nsc; 895 struct build_id bid = { .size = 0, }; 896 897 if (dso__has_build_id(dso)) 898 return 0; 899 900 mutex_lock(dso__lock(dso)); 901 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 902 if (filename__read_build_id(dso__long_name(dso), &bid) > 0) 903 dso__set_build_id(dso, &bid); 904 else if (dso__nsinfo(dso)) { 905 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); 906 907 if (new_name && filename__read_build_id(new_name, &bid) > 0) 908 dso__set_build_id(dso, &bid); 909 free(new_name); 910 } 911 nsinfo__mountns_exit(&nsc); 912 mutex_unlock(dso__lock(dso)); 913 914 return dso__has_build_id(dso) ? 0 : -1; 915 } 916 917 static struct strlist *perf_inject__parse_known_build_ids( 918 const char *known_build_ids_string) 919 { 920 struct str_node *pos, *tmp; 921 struct strlist *known_build_ids; 922 int bid_len; 923 924 known_build_ids = strlist__new(known_build_ids_string, NULL); 925 if (known_build_ids == NULL) 926 return NULL; 927 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 928 const char *build_id, *dso_name; 929 930 build_id = skip_spaces(pos->s); 931 dso_name = strchr(build_id, ' '); 932 if (dso_name == NULL) { 933 strlist__remove(known_build_ids, pos); 934 continue; 935 } 936 bid_len = dso_name - pos->s; 937 dso_name = skip_spaces(dso_name); 938 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 939 strlist__remove(known_build_ids, pos); 940 continue; 941 } 942 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 943 if (!isxdigit(build_id[2 * ix]) || 944 !isxdigit(build_id[2 * ix + 1])) { 945 strlist__remove(known_build_ids, pos); 946 break; 947 } 948 } 949 } 950 return known_build_ids; 951 } 952 953 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 954 struct dso *dso) 955 { 956 struct str_node *pos; 957 958 strlist__for_each_entry(pos, inject->known_build_ids) { 959 struct build_id bid; 960 const char *build_id, *dso_name; 961 size_t bid_len; 962 963 build_id = skip_spaces(pos->s); 964 dso_name = strchr(build_id, ' '); 965 bid_len = dso_name - pos->s; 966 if (bid_len > sizeof(bid.data)) 967 bid_len = sizeof(bid.data); 968 dso_name = skip_spaces(dso_name); 969 if (strcmp(dso__long_name(dso), dso_name)) 970 continue; 971 for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) { 972 bid.data[ix] = (hex(build_id[2 * ix]) << 4 | 973 hex(build_id[2 * ix + 1])); 974 } 975 bid.size = bid_len / 2; 976 dso__set_build_id(dso, &bid); 977 return true; 978 } 979 return false; 980 } 981 982 static int tool__inject_build_id(const struct perf_tool *tool, 983 struct perf_sample *sample, 984 struct machine *machine, 985 __u16 misc, 986 const char *filename, 987 struct dso *dso, u32 flags) 988 { 989 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 990 int err; 991 992 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 993 return 0; 994 if (is_no_dso_memory(filename)) 995 return 0; 996 997 if (inject->known_build_ids != NULL && 998 perf_inject__lookup_known_build_id(inject, dso)) 999 return 1; 1000 1001 if (dso__read_build_id(dso) < 0) { 1002 pr_debug("no build_id found for %s\n", filename); 1003 return -1; 1004 } 1005 1006 err = perf_event__synthesize_build_id(tool, sample, machine, 1007 perf_event__repipe, 1008 misc, dso__bid(dso), 1009 filename); 1010 if (err) { 1011 pr_err("Can't synthesize build_id event for %s\n", filename); 1012 return -1; 1013 } 1014 1015 return 0; 1016 } 1017 1018 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 1019 struct perf_sample *sample, 1020 struct machine *machine, 1021 __u16 misc, 1022 __u32 pid, __u32 tid, 1023 __u64 start, __u64 len, __u64 pgoff, 1024 struct dso *dso, 1025 __u32 prot, __u32 flags, 1026 const char *filename) 1027 { 1028 int err; 1029 1030 /* Return to repipe anonymous maps. */ 1031 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 1032 return 1; 1033 if (is_no_dso_memory(filename)) 1034 return 1; 1035 1036 if (dso__read_build_id(dso)) { 1037 pr_debug("no build_id found for %s\n", filename); 1038 return -1; 1039 } 1040 1041 err = perf_event__synthesize_mmap2_build_id(tool, sample, machine, 1042 perf_event__repipe, 1043 misc, pid, tid, 1044 start, len, pgoff, 1045 dso__bid(dso), 1046 prot, flags, 1047 filename); 1048 if (err) { 1049 pr_err("Can't synthesize build_id event for %s\n", filename); 1050 return -1; 1051 } 1052 return 0; 1053 } 1054 1055 static int mark_dso_hit(const struct perf_inject *inject, 1056 const struct perf_tool *tool, 1057 struct perf_sample *sample, 1058 struct machine *machine, 1059 struct evsel *mmap_evsel, 1060 struct map *map, bool sample_in_dso) 1061 { 1062 struct dso *dso; 1063 u16 misc = sample->cpumode; 1064 1065 if (!map) 1066 return 0; 1067 1068 if (!sample_in_dso) { 1069 u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL | 1070 PERF_RECORD_MISC_GUEST_USER; 1071 1072 if ((misc & guest_mask) != 0) { 1073 misc &= PERF_RECORD_MISC_HYPERVISOR; 1074 misc |= __map__is_kernel(map) 1075 ? PERF_RECORD_MISC_GUEST_KERNEL 1076 : PERF_RECORD_MISC_GUEST_USER; 1077 } else { 1078 misc &= PERF_RECORD_MISC_HYPERVISOR; 1079 misc |= __map__is_kernel(map) 1080 ? PERF_RECORD_MISC_KERNEL 1081 : PERF_RECORD_MISC_USER; 1082 } 1083 } 1084 dso = map__dso(map); 1085 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 1086 if (dso && !dso__hit(dso)) { 1087 /* 1088 * The sample is just read for identifiers which we want 1089 * to match the for the event of the sample. 1090 */ 1091 dso__set_hit(dso); 1092 tool__inject_build_id(tool, sample, machine, 1093 misc, dso__long_name(dso), dso, 1094 map__flags(map)); 1095 } 1096 } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 1097 if (!map__hit(map)) { 1098 const struct build_id null_bid = { .size = 0 }; 1099 const struct build_id *bid = dso ? dso__bid(dso) : &null_bid; 1100 const char *filename = dso ? dso__long_name(dso) : ""; 1101 struct evsel *saved_evsel = sample->evsel; 1102 1103 map__set_hit(map); 1104 /* Creating a new mmap2 event which has an evsel for the mmap event. */ 1105 sample->evsel = mmap_evsel; 1106 perf_event__synthesize_mmap2_build_id(tool, sample, machine, 1107 perf_event__repipe, 1108 misc, 1109 sample->pid, sample->tid, 1110 map__start(map), 1111 map__end(map) - map__start(map), 1112 map__pgoff(map), 1113 bid, 1114 map__prot(map), 1115 map__flags(map), 1116 filename); 1117 sample->evsel = saved_evsel; 1118 } 1119 } 1120 return 0; 1121 } 1122 1123 struct mark_dso_hit_args { 1124 const struct perf_inject *inject; 1125 const struct perf_tool *tool; 1126 struct perf_sample *sample; 1127 struct machine *machine; 1128 struct evsel *mmap_evsel; 1129 }; 1130 1131 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data) 1132 { 1133 struct mark_dso_hit_args *args = data; 1134 struct map *map = node->ms.map; 1135 1136 return mark_dso_hit(args->inject, args->tool, args->sample, args->machine, 1137 args->mmap_evsel, map, /*sample_in_dso=*/false); 1138 } 1139 1140 static int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, 1141 struct perf_sample *sample, struct machine *machine) 1142 { 1143 struct addr_location al; 1144 struct thread *thread; 1145 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1146 struct mark_dso_hit_args args = { 1147 .inject = inject, 1148 .tool = tool, 1149 /* 1150 * Use the parsed sample data of the sample event, which will 1151 * have a later timestamp than the mmap event. 1152 */ 1153 .sample = sample, 1154 .machine = machine, 1155 .mmap_evsel = inject__mmap_evsel(inject), 1156 }; 1157 1158 addr_location__init(&al); 1159 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 1160 if (thread == NULL) { 1161 pr_err("problem processing %d event, skipping it.\n", 1162 event->header.type); 1163 goto repipe; 1164 } 1165 1166 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 1167 mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map, 1168 /*sample_in_dso=*/true); 1169 } 1170 1171 sample__for_each_callchain_node(thread, sample, PERF_MAX_STACK_DEPTH, 1172 /*symbols=*/false, mark_dso_hit_callback, &args); 1173 thread__put(thread); 1174 repipe: 1175 perf_event__repipe(tool, event, sample, machine); 1176 addr_location__exit(&al); 1177 return 0; 1178 } 1179 1180 static int perf_inject__sched_process_exit(const struct perf_tool *tool, 1181 union perf_event *event __maybe_unused, 1182 struct perf_sample *sample, 1183 struct machine *machine __maybe_unused) 1184 { 1185 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1186 struct event_entry *ent; 1187 1188 list_for_each_entry(ent, &inject->samples, node) { 1189 if (sample->tid == ent->tid) { 1190 list_del_init(&ent->node); 1191 free(ent); 1192 break; 1193 } 1194 } 1195 1196 return 0; 1197 } 1198 1199 static int perf_inject__sched_switch(const struct perf_tool *tool, 1200 union perf_event *event, 1201 struct perf_sample *sample, 1202 struct machine *machine) 1203 { 1204 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1205 struct event_entry *ent; 1206 1207 perf_inject__sched_process_exit(tool, event, sample, machine); 1208 1209 ent = malloc(event->header.size + sizeof(struct event_entry)); 1210 if (ent == NULL) { 1211 color_fprintf(stderr, PERF_COLOR_RED, 1212 "Not enough memory to process sched switch event!"); 1213 return -1; 1214 } 1215 1216 ent->tid = sample->tid; 1217 memcpy(&ent->event, event, event->header.size); 1218 list_add(&ent->node, &inject->samples); 1219 return 0; 1220 } 1221 1222 #ifdef HAVE_LIBTRACEEVENT 1223 static int perf_inject__sched_stat(const struct perf_tool *tool, 1224 union perf_event *event __maybe_unused, 1225 struct perf_sample *sample, 1226 struct machine *machine) 1227 { 1228 struct event_entry *ent; 1229 union perf_event *event_sw; 1230 struct perf_sample sample_sw; 1231 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1232 struct evsel *evsel = sample->evsel; 1233 u32 pid = perf_sample__intval(sample, "pid"); 1234 int ret; 1235 1236 list_for_each_entry(ent, &inject->samples, node) { 1237 if (pid == ent->tid) 1238 goto found; 1239 } 1240 1241 return 0; 1242 found: 1243 event_sw = &ent->event[0]; 1244 evsel__parse_sample(evsel, event_sw, &sample_sw); 1245 1246 sample_sw.period = sample->period; 1247 sample_sw.time = sample->time; 1248 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 1249 evsel->core.attr.read_format, 1250 evsel->core.attr.branch_sample_type, &sample_sw); 1251 build_id__mark_dso_hit(tool, event_sw, &sample_sw, machine); 1252 ret = perf_event__repipe(tool, event_sw, &sample_sw, machine); 1253 perf_sample__exit(&sample_sw); 1254 return ret; 1255 } 1256 #endif 1257 1258 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 1259 { 1260 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 1261 return NULL; 1262 return &gs->vcpu[vcpu]; 1263 } 1264 1265 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 1266 { 1267 ssize_t ret = writen(gs->tmp_fd, buf, sz); 1268 1269 return ret < 0 ? ret : 0; 1270 } 1271 1272 static int guest_session__repipe(const struct perf_tool *tool, 1273 union perf_event *event, 1274 struct perf_sample *sample __maybe_unused, 1275 struct machine *machine __maybe_unused) 1276 { 1277 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1278 1279 return guest_session__output_bytes(gs, event, event->header.size); 1280 } 1281 1282 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 1283 { 1284 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 1285 int hash; 1286 1287 if (!guest_tid) 1288 return -ENOMEM; 1289 1290 guest_tid->tid = tid; 1291 guest_tid->vcpu = vcpu; 1292 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 1293 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 1294 1295 return 0; 1296 } 1297 1298 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 1299 union perf_event *event, 1300 u64 offset __maybe_unused, void *data) 1301 { 1302 struct guest_session *gs = data; 1303 unsigned int vcpu; 1304 struct guest_vcpu *guest_vcpu; 1305 int ret; 1306 1307 if (event->header.type != PERF_RECORD_COMM || 1308 event->comm.pid != gs->machine_pid) 1309 return 0; 1310 1311 /* 1312 * QEMU option -name debug-threads=on, causes thread names formatted as 1313 * below, although it is not an ABI. Also libvirt seems to use this by 1314 * default. Here we rely on it to tell us which thread is which VCPU. 1315 */ 1316 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 1317 if (ret <= 0) 1318 return ret; 1319 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 1320 event->comm.tid, event->comm.comm, vcpu); 1321 if (vcpu > INT_MAX) { 1322 pr_err("Invalid VCPU %u\n", vcpu); 1323 return -EINVAL; 1324 } 1325 guest_vcpu = guest_session__vcpu(gs, vcpu); 1326 if (!guest_vcpu) 1327 return -ENOMEM; 1328 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 1329 pr_err("Fatal error: Two threads found with the same VCPU\n"); 1330 return -EINVAL; 1331 } 1332 guest_vcpu->tid = event->comm.tid; 1333 1334 return guest_session__map_tid(gs, event->comm.tid, vcpu); 1335 } 1336 1337 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 1338 { 1339 return perf_session__peek_events(session, session->header.data_offset, 1340 session->header.data_size, 1341 host_peek_vm_comms_cb, gs); 1342 } 1343 1344 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 1345 { 1346 return evlist__id2sid(evlist, id); 1347 } 1348 1349 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 1350 { 1351 do { 1352 gs->highest_id += 1; 1353 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 1354 1355 return gs->highest_id; 1356 } 1357 1358 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 1359 { 1360 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 1361 int hash; 1362 1363 if (!guest_id) 1364 return -ENOMEM; 1365 1366 guest_id->id = id; 1367 guest_id->host_id = host_id; 1368 guest_id->vcpu = vcpu; 1369 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 1370 hlist_add_head(&guest_id->node, &gs->heads[hash]); 1371 1372 return 0; 1373 } 1374 1375 static u64 evlist__find_highest_id(struct evlist *evlist) 1376 { 1377 struct evsel *evsel; 1378 u64 highest_id = 1; 1379 1380 evlist__for_each_entry(evlist, evsel) { 1381 u32 j; 1382 1383 for (j = 0; j < evsel->core.ids; j++) { 1384 u64 id = evsel->core.id[j]; 1385 1386 if (id > highest_id) 1387 highest_id = id; 1388 } 1389 } 1390 1391 return highest_id; 1392 } 1393 1394 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 1395 { 1396 struct evlist *evlist = gs->session->evlist; 1397 struct evsel *evsel; 1398 int ret; 1399 1400 evlist__for_each_entry(evlist, evsel) { 1401 u32 j; 1402 1403 for (j = 0; j < evsel->core.ids; j++) { 1404 struct perf_sample_id *sid; 1405 u64 host_id; 1406 u64 id; 1407 1408 id = evsel->core.id[j]; 1409 sid = evlist__id2sid(evlist, id); 1410 if (!sid || sid->cpu.cpu == -1) 1411 continue; 1412 host_id = guest_session__allocate_new_id(gs, host_evlist); 1413 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1414 if (ret) 1415 return ret; 1416 } 1417 } 1418 1419 return 0; 1420 } 1421 1422 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1423 { 1424 struct hlist_head *head; 1425 struct guest_id *guest_id; 1426 int hash; 1427 1428 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1429 head = &gs->heads[hash]; 1430 1431 hlist_for_each_entry(guest_id, head, node) 1432 if (guest_id->id == id) 1433 return guest_id; 1434 1435 return NULL; 1436 } 1437 1438 static int process_attr(const struct perf_tool *tool, union perf_event *event, 1439 struct perf_sample *sample __maybe_unused, 1440 struct machine *machine __maybe_unused) 1441 { 1442 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1443 1444 return perf_event__process_attr(tool, event, &inject->session->evlist); 1445 } 1446 1447 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1448 { 1449 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1450 struct perf_event_attr attr = evsel->core.attr; 1451 u64 *id_array; 1452 u32 *vcpu_array; 1453 int ret = -ENOMEM; 1454 u32 i; 1455 1456 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1457 if (!id_array) 1458 return -ENOMEM; 1459 1460 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1461 if (!vcpu_array) 1462 goto out; 1463 1464 for (i = 0; i < evsel->core.ids; i++) { 1465 u64 id = evsel->core.id[i]; 1466 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1467 1468 if (!guest_id) { 1469 pr_err("Failed to find guest id %"PRIu64"\n", id); 1470 ret = -EINVAL; 1471 goto out; 1472 } 1473 id_array[i] = guest_id->host_id; 1474 vcpu_array[i] = guest_id->vcpu; 1475 } 1476 1477 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1478 attr.exclude_host = 1; 1479 attr.exclude_guest = 0; 1480 1481 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1482 id_array, process_attr); 1483 if (ret) 1484 pr_err("Failed to add guest attr.\n"); 1485 1486 for (i = 0; i < evsel->core.ids; i++) { 1487 struct perf_sample_id *sid; 1488 u32 vcpu = vcpu_array[i]; 1489 1490 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1491 /* Guest event is per-thread from the host point of view */ 1492 sid->cpu.cpu = -1; 1493 sid->tid = gs->vcpu[vcpu].tid; 1494 sid->machine_pid = gs->machine_pid; 1495 sid->vcpu.cpu = vcpu; 1496 } 1497 out: 1498 free(vcpu_array); 1499 free(id_array); 1500 return ret; 1501 } 1502 1503 static int guest_session__add_attrs(struct guest_session *gs) 1504 { 1505 struct evlist *evlist = gs->session->evlist; 1506 struct evsel *evsel; 1507 int ret; 1508 1509 evlist__for_each_entry(evlist, evsel) { 1510 ret = guest_session__add_attr(gs, evsel); 1511 if (ret) 1512 return ret; 1513 } 1514 1515 return 0; 1516 } 1517 1518 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1519 { 1520 struct perf_session *session = inject->session; 1521 struct evlist *evlist = session->evlist; 1522 struct machine *machine = &session->machines.host; 1523 size_t from = evlist->core.nr_entries - new_cnt; 1524 1525 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1526 evlist, machine, from); 1527 } 1528 1529 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1530 { 1531 struct hlist_head *head; 1532 struct guest_tid *guest_tid; 1533 int hash; 1534 1535 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1536 head = &gs->tids[hash]; 1537 1538 hlist_for_each_entry(guest_tid, head, node) 1539 if (guest_tid->tid == tid) 1540 return guest_tid; 1541 1542 return NULL; 1543 } 1544 1545 static bool dso__is_in_kernel_space(struct dso *dso) 1546 { 1547 if (dso__is_vdso(dso)) 1548 return false; 1549 1550 return dso__is_kcore(dso) || 1551 dso__kernel(dso) || 1552 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1553 } 1554 1555 static u64 evlist__first_id(struct evlist *evlist) 1556 { 1557 struct evsel *evsel; 1558 1559 evlist__for_each_entry(evlist, evsel) { 1560 if (evsel->core.ids) 1561 return evsel->core.id[0]; 1562 } 1563 return 0; 1564 } 1565 1566 static int process_build_id(const struct perf_tool *tool, 1567 union perf_event *event, 1568 struct perf_sample *sample __maybe_unused, 1569 struct machine *machine __maybe_unused) 1570 { 1571 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1572 1573 return perf_event__process_build_id(tool, inject->session, event); 1574 } 1575 1576 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1577 { 1578 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1579 struct perf_sample synth_sample = { 1580 .evsel = inject__mmap_evsel(inject), 1581 .pid = -1, 1582 .tid = -1, 1583 .time = -1, 1584 .stream_id = -1, 1585 .cpu = -1, 1586 .period = 1, 1587 .cpumode = dso__is_in_kernel_space(dso) 1588 ? PERF_RECORD_MISC_GUEST_KERNEL 1589 : PERF_RECORD_MISC_GUEST_USER, 1590 }; 1591 1592 if (!machine) 1593 return -ENOMEM; 1594 1595 dso__set_hit(dso); 1596 1597 return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine, 1598 process_build_id, 1599 /*misc=*/synth_sample.cpumode, 1600 dso__bid(dso), dso__long_name(dso)); 1601 } 1602 1603 static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1604 { 1605 struct guest_session *gs = data; 1606 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1607 1608 if (!dso__has_build_id(dso)) 1609 return 0; 1610 1611 return synthesize_build_id(inject, dso, gs->machine_pid); 1612 1613 } 1614 1615 static int guest_session__add_build_ids(struct guest_session *gs) 1616 { 1617 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1618 1619 /* Build IDs will be put in the Build ID feature section */ 1620 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1621 1622 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1623 guest_session__add_build_ids_cb, 1624 gs); 1625 } 1626 1627 static int guest_session__ksymbol_event(const struct perf_tool *tool, 1628 union perf_event *event, 1629 struct perf_sample *sample __maybe_unused, 1630 struct machine *machine __maybe_unused) 1631 { 1632 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1633 1634 /* Only support out-of-line i.e. no BPF support */ 1635 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1636 return 0; 1637 1638 return guest_session__output_bytes(gs, event, event->header.size); 1639 } 1640 1641 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1642 { 1643 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1644 struct perf_session *session; 1645 int ret; 1646 1647 /* Only these events will be injected */ 1648 gs->tool.mmap = guest_session__repipe; 1649 gs->tool.mmap2 = guest_session__repipe; 1650 gs->tool.comm = guest_session__repipe; 1651 gs->tool.fork = guest_session__repipe; 1652 gs->tool.exit = guest_session__repipe; 1653 gs->tool.lost = guest_session__repipe; 1654 gs->tool.context_switch = guest_session__repipe; 1655 gs->tool.ksymbol = guest_session__ksymbol_event; 1656 gs->tool.text_poke = guest_session__repipe; 1657 /* 1658 * Processing a build ID creates a struct dso with that build ID. Later, 1659 * all guest dsos are iterated and the build IDs processed into the host 1660 * session where they will be output to the Build ID feature section 1661 * when the perf.data file header is written. 1662 */ 1663 gs->tool.build_id = perf_event__process_build_id; 1664 /* Process the id index to know what VCPU an ID belongs to */ 1665 gs->tool.id_index = perf_event__process_id_index; 1666 1667 gs->tool.ordered_events = true; 1668 gs->tool.ordering_requires_timestamps = true; 1669 1670 gs->data.path = name; 1671 gs->data.force = force; 1672 gs->data.mode = PERF_DATA_MODE_READ; 1673 1674 session = perf_session__new(&gs->data, &gs->tool); 1675 if (IS_ERR(session)) 1676 return PTR_ERR(session); 1677 gs->session = session; 1678 1679 /* 1680 * Initial events have zero'd ID samples. Get default ID sample size 1681 * used for removing them. 1682 */ 1683 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1684 /* And default ID for adding back a host-compatible ID sample */ 1685 gs->dflt_id = evlist__first_id(session->evlist); 1686 if (!gs->dflt_id) { 1687 pr_err("Guest data has no sample IDs"); 1688 return -EINVAL; 1689 } 1690 1691 /* Temporary file for guest events */ 1692 gs->tmp_file_name = strdup(tmp_file_name); 1693 if (!gs->tmp_file_name) 1694 return -ENOMEM; 1695 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1696 if (gs->tmp_fd < 0) 1697 return -errno; 1698 1699 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1700 pr_warning("Guest session decompression initialization failed.\n"); 1701 1702 /* 1703 * perf does not support processing 2 sessions simultaneously, so output 1704 * guest events to a temporary file. 1705 */ 1706 ret = perf_session__process_events(gs->session); 1707 if (ret) 1708 return ret; 1709 1710 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1711 return -errno; 1712 1713 return 0; 1714 } 1715 1716 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1717 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1718 { 1719 struct hlist_node *pos, *n; 1720 size_t i; 1721 1722 for (i = 0; i < hlist_sz; ++i) { 1723 hlist_for_each_safe(pos, n, &heads[i]) { 1724 hlist_del(pos); 1725 free(pos); 1726 } 1727 } 1728 } 1729 1730 static void guest_session__exit(struct guest_session *gs) 1731 { 1732 if (gs->session) { 1733 perf_session__delete(gs->session); 1734 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1735 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1736 } 1737 if (gs->tmp_file_name) { 1738 if (gs->tmp_fd >= 0) 1739 close(gs->tmp_fd); 1740 unlink(gs->tmp_file_name); 1741 zfree(&gs->tmp_file_name); 1742 } 1743 zfree(&gs->vcpu); 1744 zfree(&gs->perf_data_file); 1745 } 1746 1747 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1748 { 1749 tc->time_shift = time_conv->time_shift; 1750 tc->time_mult = time_conv->time_mult; 1751 tc->time_zero = time_conv->time_zero; 1752 tc->time_cycles = time_conv->time_cycles; 1753 tc->time_mask = time_conv->time_mask; 1754 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1755 tc->cap_user_time_short = time_conv->cap_user_time_short; 1756 } 1757 1758 static void guest_session__get_tc(struct guest_session *gs) 1759 { 1760 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1761 1762 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1763 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1764 } 1765 1766 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1767 { 1768 u64 tsc; 1769 1770 if (!guest_time) { 1771 *host_time = 0; 1772 return; 1773 } 1774 1775 if (gs->guest_tc.cap_user_time_zero) 1776 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1777 else 1778 tsc = guest_time; 1779 1780 /* 1781 * This is the correct order of operations for x86 if the TSC Offset and 1782 * Multiplier values are used. 1783 */ 1784 tsc -= gs->time_offset; 1785 tsc /= gs->time_scale; 1786 1787 if (gs->host_tc.cap_user_time_zero) 1788 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1789 else 1790 *host_time = tsc; 1791 } 1792 1793 static int guest_session__fetch(struct guest_session *gs) 1794 { 1795 void *buf; 1796 struct perf_event_header *hdr; 1797 size_t hdr_sz = sizeof(*hdr); 1798 ssize_t ret; 1799 1800 perf_sample__init(&gs->ev.sample, /*all=*/false); 1801 buf = gs->ev.event_buf; 1802 if (!buf) { 1803 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1804 if (!buf) 1805 return -ENOMEM; 1806 gs->ev.event_buf = buf; 1807 } 1808 hdr = buf; 1809 ret = readn(gs->tmp_fd, buf, hdr_sz); 1810 if (ret < 0) 1811 return ret; 1812 1813 if (!ret) { 1814 /* Zero size means EOF */ 1815 hdr->size = 0; 1816 return 0; 1817 } 1818 1819 buf += hdr_sz; 1820 1821 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1822 if (ret < 0) 1823 return ret; 1824 1825 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1826 gs->ev.sample.time = 0; 1827 1828 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1829 pr_err("Unexpected type fetching guest event"); 1830 return 0; 1831 } 1832 1833 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1834 if (ret) { 1835 pr_err("Parse failed fetching guest event"); 1836 return ret; 1837 } 1838 1839 if (!gs->have_tc) { 1840 guest_session__get_tc(gs); 1841 gs->have_tc = true; 1842 } 1843 1844 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1845 1846 return 0; 1847 } 1848 1849 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1850 const struct perf_sample *sample) 1851 { 1852 struct evsel *evsel; 1853 void *array; 1854 int ret; 1855 1856 evsel = evlist__id2evsel(evlist, sample->id); 1857 array = ev; 1858 1859 if (!evsel) { 1860 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1861 return -EINVAL; 1862 } 1863 1864 array += ev->header.size; 1865 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1866 if (ret < 0) 1867 return ret; 1868 1869 if (ret & 7) { 1870 pr_err("Bad id sample size %d\n", ret); 1871 return -EINVAL; 1872 } 1873 1874 ev->header.size += ret; 1875 1876 return 0; 1877 } 1878 1879 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1880 { 1881 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1882 int ret; 1883 1884 if (!gs->ready) 1885 return 0; 1886 1887 while (1) { 1888 struct perf_sample *sample; 1889 struct guest_id *guest_id; 1890 union perf_event *ev; 1891 u16 id_hdr_size; 1892 u8 cpumode; 1893 u64 id; 1894 1895 if (!gs->fetched) { 1896 ret = guest_session__fetch(gs); 1897 if (ret) 1898 break; 1899 gs->fetched = true; 1900 } 1901 1902 ev = gs->ev.event; 1903 sample = &gs->ev.sample; 1904 1905 if (!ev->header.size) { 1906 /* EOF */ 1907 perf_sample__exit(&gs->ev.sample); 1908 gs->fetched = false; 1909 ret = 0; 1910 break; 1911 } 1912 if (sample->time > timestamp) { 1913 ret = 0; 1914 break; 1915 } 1916 1917 /* Change cpumode to guest */ 1918 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1919 if (cpumode & PERF_RECORD_MISC_USER) 1920 cpumode = PERF_RECORD_MISC_GUEST_USER; 1921 else 1922 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1923 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1924 ev->header.misc |= cpumode; 1925 1926 id = sample->id; 1927 if (!id) { 1928 id = gs->dflt_id; 1929 id_hdr_size = gs->dflt_id_hdr_size; 1930 } else { 1931 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1932 1933 id_hdr_size = evsel__id_hdr_size(evsel); 1934 } 1935 1936 if (id_hdr_size & 7) { 1937 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1938 ret = -EINVAL; 1939 break; 1940 } 1941 1942 if (ev->header.size & 7) { 1943 pr_err("Bad event size %u\n", ev->header.size); 1944 ret = -EINVAL; 1945 break; 1946 } 1947 1948 /* Remove guest id sample */ 1949 ev->header.size -= id_hdr_size; 1950 1951 if (ev->header.size & 7) { 1952 pr_err("Bad raw event size %u\n", ev->header.size); 1953 ret = -EINVAL; 1954 break; 1955 } 1956 1957 guest_id = guest_session__lookup_id(gs, id); 1958 if (!guest_id) { 1959 pr_err("Guest event with unknown id %llu\n", 1960 (unsigned long long)id); 1961 ret = -EINVAL; 1962 break; 1963 } 1964 1965 /* Change to host ID to avoid conflicting ID values */ 1966 sample->id = guest_id->host_id; 1967 sample->stream_id = guest_id->host_id; 1968 1969 if (sample->cpu != (u32)-1) { 1970 if (sample->cpu >= gs->vcpu_cnt) { 1971 pr_err("Guest event with unknown VCPU %u\n", 1972 sample->cpu); 1973 return -EINVAL; 1974 } 1975 /* Change to host CPU instead of guest VCPU */ 1976 sample->cpu = gs->vcpu[sample->cpu].cpu; 1977 } 1978 1979 /* New id sample with new ID and CPU */ 1980 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1981 if (ret) 1982 break; 1983 1984 if (ev->header.size & 7) { 1985 pr_err("Bad new event size %u\n", ev->header.size); 1986 ret = -EINVAL; 1987 break; 1988 } 1989 1990 ret = output_bytes(inject, ev, ev->header.size); 1991 if (ret) 1992 break; 1993 1994 /* Reset for next guest session event fetch. */ 1995 perf_sample__exit(sample); 1996 gs->fetched = false; 1997 } 1998 if (ret && gs->fetched) { 1999 /* Clear saved sample state on error. */ 2000 perf_sample__exit(&gs->ev.sample); 2001 gs->fetched = false; 2002 } 2003 return ret; 2004 } 2005 2006 static int guest_session__flush_events(struct guest_session *gs) 2007 { 2008 return guest_session__inject_events(gs, -1); 2009 } 2010 2011 static int host__repipe(const struct perf_tool *tool, 2012 union perf_event *event, 2013 struct perf_sample *sample, 2014 struct machine *machine) 2015 { 2016 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 2017 int ret; 2018 2019 ret = guest_session__inject_events(&inject->guest_session, sample->time); 2020 if (ret) 2021 return ret; 2022 2023 return perf_event__repipe(tool, event, sample, machine); 2024 } 2025 2026 static int host__finished_init(const struct perf_tool *tool, struct perf_session *session, 2027 union perf_event *event) 2028 { 2029 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 2030 struct guest_session *gs = &inject->guest_session; 2031 int ret; 2032 2033 /* 2034 * Peek through host COMM events to find QEMU threads and the VCPU they 2035 * are running. 2036 */ 2037 ret = host_peek_vm_comms(session, gs); 2038 if (ret) 2039 return ret; 2040 2041 if (!gs->vcpu_cnt) { 2042 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 2043 return -EINVAL; 2044 } 2045 2046 /* 2047 * Allocate new (unused) host sample IDs and map them to the guest IDs. 2048 */ 2049 gs->highest_id = evlist__find_highest_id(session->evlist); 2050 ret = guest_session__map_ids(gs, session->evlist); 2051 if (ret) 2052 return ret; 2053 2054 ret = guest_session__add_attrs(gs); 2055 if (ret) 2056 return ret; 2057 2058 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 2059 if (ret) { 2060 pr_err("Failed to synthesize id_index\n"); 2061 return ret; 2062 } 2063 2064 ret = guest_session__add_build_ids(gs); 2065 if (ret) { 2066 pr_err("Failed to add guest build IDs\n"); 2067 return ret; 2068 } 2069 2070 gs->ready = true; 2071 2072 ret = guest_session__inject_events(gs, 0); 2073 if (ret) 2074 return ret; 2075 2076 return perf_event__repipe_op2_synth(tool, session, event); 2077 } 2078 2079 /* 2080 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 2081 * which flushes host events to file up until the last flush time. Then inject 2082 * guest events up to the same time. Finally write out the FINISHED_ROUND event 2083 * itself. 2084 */ 2085 static int host__finished_round(const struct perf_tool *tool, 2086 union perf_event *event, 2087 struct ordered_events *oe) 2088 { 2089 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 2090 int ret = perf_event__process_finished_round(tool, event, oe); 2091 u64 timestamp = ordered_events__last_flush_time(oe); 2092 2093 if (ret) 2094 return ret; 2095 2096 ret = guest_session__inject_events(&inject->guest_session, timestamp); 2097 if (ret) 2098 return ret; 2099 2100 return perf_event__repipe_oe_synth(tool, event, oe); 2101 } 2102 2103 static int host__context_switch(const struct perf_tool *tool, 2104 union perf_event *event, 2105 struct perf_sample *sample, 2106 struct machine *machine) 2107 { 2108 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 2109 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2110 struct guest_session *gs = &inject->guest_session; 2111 u32 pid = event->context_switch.next_prev_pid; 2112 u32 tid = event->context_switch.next_prev_tid; 2113 struct guest_tid *guest_tid; 2114 u32 vcpu; 2115 2116 if (out || pid != gs->machine_pid) 2117 goto out; 2118 2119 guest_tid = guest_session__lookup_tid(gs, tid); 2120 if (!guest_tid) 2121 goto out; 2122 2123 if (sample->cpu == (u32)-1) { 2124 pr_err("Switch event does not have CPU\n"); 2125 return -EINVAL; 2126 } 2127 2128 vcpu = guest_tid->vcpu; 2129 if (vcpu >= gs->vcpu_cnt) 2130 return -EINVAL; 2131 2132 /* Guest is switching in, record which CPU the VCPU is now running on */ 2133 gs->vcpu[vcpu].cpu = sample->cpu; 2134 out: 2135 return host__repipe(tool, event, sample, machine); 2136 } 2137 2138 static void sig_handler(int sig __maybe_unused) 2139 { 2140 session_done = 1; 2141 } 2142 2143 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 2144 { 2145 struct perf_event_attr *attr = &evsel->core.attr; 2146 const char *name = evsel__name(evsel); 2147 2148 if (!(attr->sample_type & sample_type)) { 2149 pr_err("Samples for %s event do not have %s attribute set.", 2150 name, sample_msg); 2151 return -EINVAL; 2152 } 2153 2154 return 0; 2155 } 2156 2157 static int drop_sample(const struct perf_tool *tool __maybe_unused, 2158 union perf_event *event __maybe_unused, 2159 struct perf_sample *sample __maybe_unused, 2160 struct machine *machine __maybe_unused) 2161 { 2162 return 0; 2163 } 2164 2165 static void strip_init(struct perf_inject *inject) 2166 { 2167 struct evlist *evlist = inject->session->evlist; 2168 struct evsel *evsel; 2169 2170 inject->tool.context_switch = perf_event__drop; 2171 2172 evlist__for_each_entry(evlist, evsel) 2173 evsel->handler = drop_sample; 2174 } 2175 2176 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 2177 { 2178 struct perf_inject *inject = opt->value; 2179 const char *args; 2180 char *dry_run; 2181 2182 if (unset) 2183 return 0; 2184 2185 inject->itrace_synth_opts.set = true; 2186 inject->itrace_synth_opts.vm_time_correlation = true; 2187 inject->in_place_update = true; 2188 2189 if (!str) 2190 return 0; 2191 2192 dry_run = skip_spaces(str); 2193 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 2194 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 2195 inject->in_place_update_dry_run = true; 2196 args = dry_run + strlen("dry-run"); 2197 } else { 2198 args = str; 2199 } 2200 2201 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 2202 2203 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 2204 } 2205 2206 static int parse_guest_data(const struct option *opt, const char *str, int unset) 2207 { 2208 struct perf_inject *inject = opt->value; 2209 struct guest_session *gs = &inject->guest_session; 2210 char *tok; 2211 char *s; 2212 2213 if (unset) 2214 return 0; 2215 2216 if (!str) 2217 goto bad_args; 2218 2219 s = strdup(str); 2220 if (!s) 2221 return -ENOMEM; 2222 2223 gs->perf_data_file = strsep(&s, ","); 2224 if (!gs->perf_data_file) 2225 goto bad_args; 2226 2227 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 2228 if (gs->copy_kcore_dir) 2229 inject->output.is_dir = true; 2230 2231 tok = strsep(&s, ","); 2232 if (!tok) 2233 goto bad_args; 2234 gs->machine_pid = strtoul(tok, NULL, 0); 2235 if (!inject->guest_session.machine_pid) 2236 goto bad_args; 2237 2238 gs->time_scale = 1; 2239 2240 tok = strsep(&s, ","); 2241 if (!tok) 2242 goto out; 2243 gs->time_offset = strtoull(tok, NULL, 0); 2244 2245 tok = strsep(&s, ","); 2246 if (!tok) 2247 goto out; 2248 gs->time_scale = strtod(tok, NULL); 2249 if (!gs->time_scale) 2250 goto bad_args; 2251 out: 2252 return 0; 2253 2254 bad_args: 2255 pr_err("--guest-data option requires guest perf.data file name, " 2256 "guest machine PID, and optionally guest timestamp offset, " 2257 "and guest timestamp scale factor, separated by commas.\n"); 2258 return -1; 2259 } 2260 2261 static int save_section_info_cb(struct perf_file_section *section, 2262 struct perf_header *ph __maybe_unused, 2263 int feat, int fd __maybe_unused, void *data) 2264 { 2265 struct perf_inject *inject = data; 2266 2267 inject->secs[feat] = *section; 2268 return 0; 2269 } 2270 2271 static int save_section_info(struct perf_inject *inject) 2272 { 2273 struct perf_header *header = &inject->session->header; 2274 int fd = perf_data__fd(inject->session->data); 2275 2276 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 2277 } 2278 2279 static bool keep_feat(struct perf_inject *inject, int feat) 2280 { 2281 switch (feat) { 2282 /* Keep original information that describes the machine or software */ 2283 case HEADER_TRACING_DATA: 2284 case HEADER_HOSTNAME: 2285 case HEADER_OSRELEASE: 2286 case HEADER_VERSION: 2287 case HEADER_ARCH: 2288 case HEADER_NRCPUS: 2289 case HEADER_CPUDESC: 2290 case HEADER_CPUID: 2291 case HEADER_TOTAL_MEM: 2292 case HEADER_CPU_TOPOLOGY: 2293 case HEADER_NUMA_TOPOLOGY: 2294 case HEADER_PMU_MAPPINGS: 2295 case HEADER_CACHE: 2296 case HEADER_MEM_TOPOLOGY: 2297 case HEADER_CLOCKID: 2298 case HEADER_BPF_PROG_INFO: 2299 case HEADER_BPF_BTF: 2300 case HEADER_CPU_PMU_CAPS: 2301 case HEADER_CLOCK_DATA: 2302 case HEADER_HYBRID_TOPOLOGY: 2303 case HEADER_PMU_CAPS: 2304 case HEADER_CPU_DOMAIN_INFO: 2305 case HEADER_CLN_SIZE: 2306 return true; 2307 /* Information that can be updated */ 2308 case HEADER_BUILD_ID: 2309 return inject->build_id_style == BID_RWS__NONE; 2310 case HEADER_CMDLINE: 2311 case HEADER_EVENT_DESC: 2312 case HEADER_BRANCH_STACK: 2313 case HEADER_GROUP_DESC: 2314 case HEADER_AUXTRACE: 2315 case HEADER_STAT: 2316 case HEADER_SAMPLE_TIME: 2317 case HEADER_DIR_FORMAT: 2318 case HEADER_COMPRESSED: 2319 default: 2320 return false; 2321 }; 2322 } 2323 2324 static int read_file(int fd, u64 offs, void *buf, size_t sz) 2325 { 2326 ssize_t ret = preadn(fd, buf, sz, offs); 2327 2328 if (ret < 0) 2329 return -errno; 2330 if ((size_t)ret != sz) 2331 return -EINVAL; 2332 return 0; 2333 } 2334 2335 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 2336 { 2337 int fd = perf_data__fd(inject->session->data); 2338 u64 offs = inject->secs[feat].offset; 2339 size_t sz = inject->secs[feat].size; 2340 void *buf = malloc(sz); 2341 int ret; 2342 2343 if (!buf) 2344 return -ENOMEM; 2345 2346 ret = read_file(fd, offs, buf, sz); 2347 if (ret) 2348 goto out_free; 2349 2350 ret = fw->write(fw, buf, sz); 2351 out_free: 2352 free(buf); 2353 return ret; 2354 } 2355 2356 struct inject_fc { 2357 struct feat_copier fc; 2358 struct perf_inject *inject; 2359 }; 2360 2361 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 2362 { 2363 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 2364 struct perf_inject *inject = inj_fc->inject; 2365 int ret; 2366 2367 if (!inject->secs[feat].offset || 2368 !keep_feat(inject, feat)) 2369 return 0; 2370 2371 ret = feat_copy(inject, feat, fw); 2372 if (ret < 0) 2373 return ret; 2374 2375 return 1; /* Feature section copied */ 2376 } 2377 2378 static int copy_kcore_dir(struct perf_inject *inject) 2379 { 2380 char *cmd; 2381 int ret; 2382 2383 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 2384 inject->input_name, inject->output.path); 2385 if (ret < 0) 2386 return ret; 2387 pr_debug("%s\n", cmd); 2388 ret = system(cmd); 2389 free(cmd); 2390 return ret; 2391 } 2392 2393 static int guest_session__copy_kcore_dir(struct guest_session *gs) 2394 { 2395 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 2396 char *cmd; 2397 int ret; 2398 2399 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 2400 gs->perf_data_file, inject->output.path, gs->machine_pid); 2401 if (ret < 0) 2402 return ret; 2403 pr_debug("%s\n", cmd); 2404 ret = system(cmd); 2405 free(cmd); 2406 return ret; 2407 } 2408 2409 static int output_fd(struct perf_inject *inject) 2410 { 2411 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 2412 } 2413 2414 static int __cmd_inject(struct perf_inject *inject) 2415 { 2416 int ret = -EINVAL; 2417 struct guest_session *gs = &inject->guest_session; 2418 struct perf_session *session = inject->session; 2419 int fd = output_fd(inject); 2420 u64 output_data_offset = perf_session__data_offset(session->evlist); 2421 /* 2422 * Pipe input hasn't loaded the attributes and will handle them as 2423 * events. So that the attributes don't overlap the data, write the 2424 * attributes after the data. 2425 */ 2426 bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe; 2427 2428 signal(SIGINT, sig_handler); 2429 2430 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat || 2431 inject->itrace_synth_opts.set) { 2432 inject->tool.mmap = perf_event__repipe_mmap; 2433 inject->tool.mmap2 = perf_event__repipe_mmap2; 2434 inject->tool.fork = perf_event__repipe_fork; 2435 #ifdef HAVE_LIBTRACEEVENT 2436 inject->tool.tracing_data = perf_event__repipe_tracing_data; 2437 #endif 2438 } 2439 2440 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2441 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2442 inject->tool.sample = perf_event__inject_buildid; 2443 } else if (inject->sched_stat) { 2444 struct evsel *evsel; 2445 2446 evlist__for_each_entry(session->evlist, evsel) { 2447 const char *name = evsel__name(evsel); 2448 2449 if (!strcmp(name, "sched:sched_switch")) { 2450 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 2451 return -EINVAL; 2452 2453 evsel->handler = perf_inject__sched_switch; 2454 } else if (!strcmp(name, "sched:sched_process_exit")) 2455 evsel->handler = perf_inject__sched_process_exit; 2456 #ifdef HAVE_LIBTRACEEVENT 2457 else if (!strncmp(name, "sched:sched_stat_", 17)) 2458 evsel->handler = perf_inject__sched_stat; 2459 #endif 2460 } 2461 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2462 session->itrace_synth_opts = &inject->itrace_synth_opts; 2463 memset(&inject->tool, 0, sizeof(inject->tool)); 2464 inject->tool.id_index = perf_event__process_id_index; 2465 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2466 inject->tool.auxtrace = perf_event__process_auxtrace; 2467 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2468 inject->tool.ordered_events = true; 2469 inject->tool.ordering_requires_timestamps = true; 2470 } else if (inject->itrace_synth_opts.set) { 2471 session->itrace_synth_opts = &inject->itrace_synth_opts; 2472 inject->itrace_synth_opts.inject = true; 2473 inject->tool.comm = perf_event__repipe_comm; 2474 inject->tool.namespaces = perf_event__repipe_namespaces; 2475 inject->tool.exit = perf_event__repipe_exit; 2476 inject->tool.id_index = perf_event__process_id_index; 2477 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2478 inject->tool.auxtrace = perf_event__process_auxtrace; 2479 inject->tool.aux = perf_event__drop_aux; 2480 inject->tool.itrace_start = perf_event__drop_aux; 2481 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2482 inject->tool.ordered_events = true; 2483 inject->tool.ordering_requires_timestamps = true; 2484 /* Allow space in the header for new attributes */ 2485 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2486 if (inject->strip) 2487 strip_init(inject); 2488 } else if (gs->perf_data_file) { 2489 char *name = gs->perf_data_file; 2490 2491 /* 2492 * Not strictly necessary, but keep these events in order wrt 2493 * guest events. 2494 */ 2495 inject->tool.mmap = host__repipe; 2496 inject->tool.mmap2 = host__repipe; 2497 inject->tool.comm = host__repipe; 2498 inject->tool.fork = host__repipe; 2499 inject->tool.exit = host__repipe; 2500 inject->tool.lost = host__repipe; 2501 inject->tool.context_switch = host__repipe; 2502 inject->tool.ksymbol = host__repipe; 2503 inject->tool.text_poke = host__repipe; 2504 /* 2505 * Once the host session has initialized, set up sample ID 2506 * mapping and feed in guest attrs, build IDs and initial 2507 * events. 2508 */ 2509 inject->tool.finished_init = host__finished_init; 2510 /* Obey finished round ordering */ 2511 inject->tool.finished_round = host__finished_round; 2512 /* Keep track of which CPU a VCPU is runnng on */ 2513 inject->tool.context_switch = host__context_switch; 2514 /* 2515 * Must order events to be able to obey finished round 2516 * ordering. 2517 */ 2518 inject->tool.ordered_events = true; 2519 inject->tool.ordering_requires_timestamps = true; 2520 /* Set up a separate session to process guest perf.data file */ 2521 ret = guest_session__start(gs, name, session->data->force); 2522 if (ret) { 2523 pr_err("Failed to process %s, error %d\n", name, ret); 2524 return ret; 2525 } 2526 /* Allow space in the header for guest attributes */ 2527 output_data_offset += gs->session->header.data_offset; 2528 output_data_offset = roundup(output_data_offset, 4096); 2529 } else if (inject->convert_callchain) { 2530 inject->tool.sample = perf_event__convert_sample_callchain; 2531 inject->tool.fork = perf_event__repipe_fork; 2532 inject->tool.comm = perf_event__repipe_comm; 2533 inject->tool.exit = perf_event__repipe_exit; 2534 inject->tool.mmap = perf_event__repipe_mmap; 2535 inject->tool.mmap2 = perf_event__repipe_mmap2; 2536 inject->tool.ordered_events = true; 2537 inject->tool.ordering_requires_timestamps = true; 2538 } 2539 2540 if (!inject->itrace_synth_opts.set) 2541 auxtrace_index__free(&session->auxtrace_index); 2542 2543 if (!inject->output.is_pipe && !inject->in_place_update) 2544 lseek(fd, output_data_offset, SEEK_SET); 2545 2546 ret = perf_session__process_events(session); 2547 if (ret) 2548 return ret; 2549 2550 if (gs->session) { 2551 /* 2552 * Remaining guest events have later timestamps. Flush them 2553 * out to file. 2554 */ 2555 ret = guest_session__flush_events(gs); 2556 if (ret) { 2557 pr_err("Failed to flush guest events\n"); 2558 return ret; 2559 } 2560 } 2561 2562 if (!inject->output.is_pipe && !inject->in_place_update) { 2563 struct inject_fc inj_fc = { 2564 .fc.copy = feat_copy_cb, 2565 .inject = inject, 2566 }; 2567 2568 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2569 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) 2570 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 2571 /* 2572 * Keep all buildids when there is unprocessed AUX data because 2573 * it is not known which ones the AUX trace hits. 2574 */ 2575 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2576 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2577 perf_session__dsos_hit_all(session); 2578 /* 2579 * The AUX areas have been removed and replaced with 2580 * synthesized hardware events, so clear the feature flag. 2581 */ 2582 if (inject->itrace_synth_opts.set) { 2583 struct evsel *evsel; 2584 2585 perf_header__clear_feat(&session->header, 2586 HEADER_AUXTRACE); 2587 2588 evlist__for_each_entry(session->evlist, evsel) { 2589 evsel->core.attr.sample_type &= ~PERF_SAMPLE_AUX; 2590 } 2591 2592 if (inject->itrace_synth_opts.add_last_branch) { 2593 perf_header__set_feat(&session->header, 2594 HEADER_BRANCH_STACK); 2595 2596 evlist__for_each_entry(session->evlist, evsel) { 2597 evsel->core.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 2598 if (evsel->core.attr.size < PERF_ATTR_SIZE_VER2) 2599 evsel->core.attr.size = PERF_ATTR_SIZE_VER2; 2600 evsel->core.attr.branch_sample_type |= 2601 PERF_SAMPLE_BRANCH_HW_INDEX; 2602 } 2603 } 2604 } 2605 2606 /* 2607 * The converted data file won't have stack and registers. 2608 * Update the perf_event_attr to remove them before writing. 2609 */ 2610 if (inject->convert_callchain) { 2611 struct evsel *evsel; 2612 2613 evlist__for_each_entry(session->evlist, evsel) { 2614 evsel__reset_sample_bit(evsel, REGS_USER); 2615 evsel__reset_sample_bit(evsel, STACK_USER); 2616 evsel->core.attr.sample_regs_user = 0; 2617 evsel->core.attr.sample_stack_user = 0; 2618 evsel->core.attr.exclude_callchain_user = 0; 2619 } 2620 } 2621 2622 if (inject->aslr) 2623 aslr_tool__strip_evlist(inject->session->tool, session->evlist); 2624 2625 session->header.data_offset = output_data_offset; 2626 session->header.data_size = inject->bytes_written; 2627 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, 2628 write_attrs_after_data); 2629 2630 if (inject->copy_kcore_dir) { 2631 ret = copy_kcore_dir(inject); 2632 if (ret) { 2633 pr_err("Failed to copy kcore\n"); 2634 return ret; 2635 } 2636 } 2637 if (gs->copy_kcore_dir) { 2638 ret = guest_session__copy_kcore_dir(gs); 2639 if (ret) { 2640 pr_err("Failed to copy guest kcore\n"); 2641 return ret; 2642 } 2643 } 2644 } 2645 2646 return ret; 2647 } 2648 2649 static bool evsel__has_dwarf_callchain(struct evsel *evsel) 2650 { 2651 struct perf_event_attr *attr = &evsel->core.attr; 2652 const u64 dwarf_callchain_flags = 2653 PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN; 2654 2655 if (!attr->exclude_callchain_user) 2656 return false; 2657 2658 return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags; 2659 } 2660 2661 int cmd_inject(int argc, const char **argv) 2662 { 2663 struct perf_inject inject = { 2664 .input_name = "-", 2665 .samples = LIST_HEAD_INIT(inject.samples), 2666 .output = { 2667 .path = "-", 2668 .mode = PERF_DATA_MODE_WRITE, 2669 .file.use_stdio = true, 2670 }, 2671 }; 2672 struct perf_data data = { 2673 .mode = PERF_DATA_MODE_READ, 2674 .file.use_stdio = true, 2675 }; 2676 int ret; 2677 const char *known_build_ids = NULL; 2678 bool build_ids = false; 2679 bool build_id_all = false; 2680 bool mmap2_build_ids = false; 2681 bool mmap2_build_id_all = false; 2682 2683 struct option options[] = { 2684 OPT_BOOLEAN('b', "build-ids", &build_ids, 2685 "Inject build-ids into the output stream"), 2686 OPT_BOOLEAN(0, "buildid-all", &build_id_all, 2687 "Inject build-ids of all DSOs into the output stream"), 2688 OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids, 2689 "Drop unused mmap events, make others mmap2 with build IDs"), 2690 OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all, 2691 "Rewrite all mmap events as mmap2 events with build IDs"), 2692 OPT_STRING(0, "known-build-ids", &known_build_ids, 2693 "buildid path [,buildid path...]", 2694 "build-ids to use for given paths"), 2695 OPT_STRING('i', "input", &inject.input_name, "file", 2696 "input file name"), 2697 OPT_STRING('o', "output", &inject.output.path, "file", 2698 "output file name"), 2699 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2700 "Merge sched-stat and sched-switch for getting events " 2701 "where and how long tasks slept"), 2702 #ifdef HAVE_JITDUMP 2703 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2704 #endif 2705 OPT_INCR('v', "verbose", &verbose, 2706 "be more verbose (show build ids, etc)"), 2707 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2708 "file", "vmlinux pathname"), 2709 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2710 "don't load vmlinux even if found"), 2711 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2712 "kallsyms pathname"), 2713 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2714 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2715 NULL, "opts", "Instruction Tracing options\n" 2716 ITRACE_HELP, 2717 itrace_parse_synth_opts), 2718 OPT_BOOLEAN(0, "strip", &inject.strip, 2719 "strip non-synthesized events (use with --itrace)"), 2720 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2721 "correlate time between VM guests and the host", 2722 parse_vm_time_correlation), 2723 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2724 "inject events from a guest perf.data file", 2725 parse_guest_data), 2726 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2727 "guest mount directory under which every guest os" 2728 " instance has a subdir"), 2729 OPT_CALLBACK(0, "unwind-style", NULL, "unwind style", 2730 "unwind styles (libdw,libunwind)", 2731 unwind__option), 2732 OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, 2733 "Generate callchains using DWARF and drop register/stack data"), 2734 OPT_BOOLEAN(0, "aslr", &inject.aslr, 2735 "Remap virtual memory addresses similar to ASLR"), 2736 OPT_END() 2737 }; 2738 const char * const inject_usage[] = { 2739 "perf inject [<options>]", 2740 NULL 2741 }; 2742 bool ordered_events; 2743 struct perf_tool *tool = &inject.tool; 2744 2745 if (!inject.itrace_synth_opts.set) { 2746 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2747 symbol_conf.lazy_load_kernel_maps = true; 2748 } 2749 2750 #ifndef HAVE_JITDUMP 2751 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2752 #endif 2753 #ifndef HAVE_LIBDW_SUPPORT 2754 set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true); 2755 #endif 2756 argc = parse_options(argc, argv, options, inject_usage, 0); 2757 2758 /* 2759 * Any (unrecognized) arguments left? 2760 */ 2761 if (argc) 2762 usage_with_options(inject_usage, options); 2763 2764 if (inject.aslr && inject.convert_callchain) { 2765 pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); 2766 return -EINVAL; 2767 } 2768 2769 if (inject.strip && !inject.itrace_synth_opts.set) { 2770 pr_err("--strip option requires --itrace option\n"); 2771 return -1; 2772 } 2773 2774 if (symbol__validate_sym_arguments()) 2775 return -1; 2776 2777 if (inject.in_place_update) { 2778 if (!strcmp(inject.input_name, "-")) { 2779 pr_err("Input file name required for in-place updating\n"); 2780 return -1; 2781 } 2782 if (strcmp(inject.output.path, "-")) { 2783 pr_err("Output file name must not be specified for in-place updating\n"); 2784 return -1; 2785 } 2786 if (!data.force && !inject.in_place_update_dry_run) { 2787 pr_err("The input file would be updated in place, " 2788 "the --force option is required.\n"); 2789 return -1; 2790 } 2791 if (!inject.in_place_update_dry_run) 2792 data.in_place_update = true; 2793 } else { 2794 if (strcmp(inject.output.path, "-") && !inject.strip && 2795 has_kcore_dir(inject.input_name)) { 2796 inject.output.is_dir = true; 2797 inject.copy_kcore_dir = true; 2798 } 2799 if (perf_data__open(&inject.output)) { 2800 perror("failed to create output file"); 2801 return -1; 2802 } 2803 } 2804 if (mmap2_build_ids) 2805 inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY; 2806 if (mmap2_build_id_all) 2807 inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL; 2808 if (build_ids) 2809 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY; 2810 if (build_id_all) 2811 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2812 2813 data.path = inject.input_name; 2814 2815 ordered_events = inject.jit_mode || inject.sched_stat || 2816 inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2817 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY; 2818 perf_tool__init(&inject.tool, ordered_events); 2819 inject.tool.sample = perf_event__repipe_sample; 2820 inject.tool.read = perf_event__repipe_sample; 2821 inject.tool.mmap = perf_event__repipe; 2822 inject.tool.mmap2 = perf_event__repipe; 2823 inject.tool.comm = perf_event__repipe; 2824 inject.tool.namespaces = perf_event__repipe; 2825 inject.tool.cgroup = perf_event__repipe; 2826 inject.tool.fork = perf_event__repipe; 2827 inject.tool.exit = perf_event__repipe; 2828 inject.tool.lost = perf_event__repipe; 2829 inject.tool.lost_samples = perf_event__repipe; 2830 inject.tool.aux = perf_event__repipe; 2831 inject.tool.itrace_start = perf_event__repipe; 2832 inject.tool.aux_output_hw_id = perf_event__repipe; 2833 inject.tool.context_switch = perf_event__repipe; 2834 inject.tool.throttle = perf_event__repipe; 2835 inject.tool.unthrottle = perf_event__repipe; 2836 inject.tool.ksymbol = perf_event__repipe; 2837 inject.tool.bpf = perf_event__repipe; 2838 inject.tool.text_poke = perf_event__repipe; 2839 inject.tool.attr = perf_event__repipe_attr; 2840 inject.tool.event_update = perf_event__repipe_event_update; 2841 inject.tool.tracing_data = perf_event__repipe_op2_synth; 2842 inject.tool.finished_round = perf_event__repipe_oe_synth; 2843 inject.tool.build_id = perf_event__repipe_op2_synth; 2844 inject.tool.id_index = perf_event__repipe_op2_synth; 2845 inject.tool.auxtrace_info = perf_event__repipe_op2_synth; 2846 inject.tool.auxtrace_error = perf_event__repipe_op2_synth; 2847 inject.tool.time_conv = perf_event__repipe_op2_synth; 2848 inject.tool.thread_map = perf_event__repipe_op2_synth; 2849 inject.tool.cpu_map = perf_event__repipe_op2_synth; 2850 inject.tool.stat_config = perf_event__repipe_op2_synth; 2851 inject.tool.stat = perf_event__repipe_op2_synth; 2852 inject.tool.stat_round = perf_event__repipe_op2_synth; 2853 inject.tool.feature = perf_event__repipe_op2_synth; 2854 inject.tool.finished_init = perf_event__repipe_op2_synth; 2855 inject.tool.compressed = perf_event__repipe_op4_synth; 2856 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2857 inject.tool.bpf_metadata = perf_event__repipe_op2_synth; 2858 inject.tool.schedstat_cpu = perf_event__repipe_op2_synth; 2859 inject.tool.schedstat_domain = perf_event__repipe_op2_synth; 2860 inject.tool.dont_split_sample_group = true; 2861 inject.tool.merge_deferred_callchains = false; 2862 if (inject.aslr) { 2863 tool = aslr_tool__new(&inject.tool); 2864 if (!tool) { 2865 ret = -ENOMEM; 2866 goto out_close_output; 2867 } 2868 } 2869 inject.session = __perf_session__new(&data, tool, 2870 /*trace_event_repipe=*/inject.output.is_pipe, 2871 /*host_env=*/NULL); 2872 2873 if (IS_ERR(inject.session)) { 2874 ret = PTR_ERR(inject.session); 2875 if (inject.aslr) 2876 aslr_tool__delete(tool); 2877 goto out_close_output; 2878 } 2879 2880 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2881 pr_warning("Decompression initialization failed.\n"); 2882 2883 if (inject.aslr) { 2884 struct evsel *evsel; 2885 2886 evlist__for_each_entry(inject.session->evlist, evsel) { 2887 ret = aslr_tool__cache_orig_attrs(tool, evsel); 2888 if (ret) { 2889 pr_err("Failed to cache original attributes: %d\n", ret); 2890 goto out_delete; 2891 } 2892 } 2893 } 2894 2895 /* Save original section info before feature bits change */ 2896 ret = save_section_info(&inject); 2897 if (ret) 2898 goto out_delete; 2899 2900 if (inject.output.is_pipe) { 2901 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2902 if (ret < 0) { 2903 pr_err("Couldn't write a new pipe header.\n"); 2904 goto out_delete; 2905 } 2906 2907 /* 2908 * If the input is already a pipe then the features and 2909 * attributes don't need synthesizing, they will be present in 2910 * the input. 2911 */ 2912 if (!data.is_pipe) { 2913 if (inject.aslr) 2914 aslr_tool__strip_evlist(tool, inject.session->evlist); 2915 2916 ret = perf_event__synthesize_for_pipe(&inject.tool, 2917 inject.session, 2918 &inject.output, 2919 perf_event__repipe); 2920 2921 if (inject.aslr) 2922 aslr_tool__restore_evlist(tool, inject.session->evlist); 2923 2924 if (ret < 0) 2925 goto out_delete; 2926 } 2927 } 2928 2929 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2930 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2931 /* 2932 * to make sure the mmap records are ordered correctly 2933 * and so that the correct especially due to jitted code 2934 * mmaps. We cannot generate the buildid hit list and 2935 * inject the jit mmaps at the same time for now. 2936 */ 2937 inject.tool.ordering_requires_timestamps = true; 2938 } 2939 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) { 2940 inject.known_build_ids = 2941 perf_inject__parse_known_build_ids(known_build_ids); 2942 2943 if (inject.known_build_ids == NULL) { 2944 pr_err("Couldn't parse known build ids.\n"); 2945 goto out_delete; 2946 } 2947 } 2948 2949 if (inject.convert_callchain) { 2950 struct evsel *evsel; 2951 2952 if (inject.output.is_pipe || inject.session->data->is_pipe) { 2953 pr_err("--convert-callchain cannot work with pipe\n"); 2954 goto out_delete; 2955 } 2956 2957 evlist__for_each_entry(inject.session->evlist, evsel) { 2958 if (!evsel__has_dwarf_callchain(evsel) && !evsel__is_dummy_event(evsel)) { 2959 pr_err("--convert-callchain requires DWARF call graph.\n"); 2960 goto out_delete; 2961 } 2962 } 2963 2964 inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64)); 2965 if (inject.raw_callchain == NULL) { 2966 pr_err("callchain allocation failed\n"); 2967 goto out_delete; 2968 } 2969 } 2970 2971 #ifdef HAVE_JITDUMP 2972 if (inject.jit_mode) { 2973 inject.tool.mmap2 = perf_event__repipe_mmap2; 2974 inject.tool.mmap = perf_event__repipe_mmap; 2975 inject.tool.ordering_requires_timestamps = true; 2976 /* 2977 * JIT MMAP injection injects all MMAP events in one go, so it 2978 * does not obey finished_round semantics. 2979 */ 2980 inject.tool.finished_round = perf_event__drop_oe; 2981 } 2982 #endif 2983 ret = symbol__init(perf_session__env(inject.session)); 2984 if (ret < 0) 2985 goto out_delete; 2986 2987 ret = __cmd_inject(&inject); 2988 2989 guest_session__exit(&inject.guest_session); 2990 2991 out_delete: 2992 strlist__delete(inject.known_build_ids); 2993 zstd_fini(&(inject.session->zstd_data)); 2994 perf_session__delete(inject.session); 2995 if (inject.aslr) 2996 aslr_tool__delete(tool); 2997 out_close_output: 2998 if (!inject.in_place_update) 2999 perf_data__close(&inject.output); 3000 free(inject.itrace_synth_opts.vm_tm_corr_args); 3001 free(inject.event_copy); 3002 free(inject.guest_session.ev.event_buf); 3003 free(inject.raw_callchain); 3004 return ret; 3005 } 3006