1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include "util/dso.h" 13 #include "util/vdso.h" 14 #include "util/evlist.h" 15 #include "util/evsel.h" 16 #include "util/map.h" 17 #include "util/session.h" 18 #include "util/tool.h" 19 #include "util/debug.h" 20 #include "util/build-id.h" 21 #include "util/data.h" 22 #include "util/auxtrace.h" 23 #include "util/jit.h" 24 #include "util/string2.h" 25 #include "util/symbol.h" 26 #include "util/synthetic-events.h" 27 #include "util/thread.h" 28 #include "util/namespaces.h" 29 #include "util/util.h" 30 #include "util/tsc.h" 31 32 #include <internal/lib.h> 33 34 #include <linux/err.h> 35 #include <subcmd/parse-options.h> 36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38 #include <linux/list.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <linux/hash.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <inttypes.h> 46 47 struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char *event_buf; 51 }; 52 53 struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59 }; 60 61 struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67 }; 68 69 struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74 }; 75 76 struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104 }; 105 106 enum build_id_rewrite_style { 107 BID_RWS__NONE = 0, 108 BID_RWS__INJECT_HEADER_LAZY, 109 BID_RWS__INJECT_HEADER_ALL, 110 BID_RWS__MMAP2_BUILDID_ALL, 111 BID_RWS__MMAP2_BUILDID_LAZY, 112 }; 113 114 struct perf_inject { 115 struct perf_tool tool; 116 struct perf_session *session; 117 enum build_id_rewrite_style build_id_style; 118 bool sched_stat; 119 bool have_auxtrace; 120 bool strip; 121 bool jit_mode; 122 bool in_place_update; 123 bool in_place_update_dry_run; 124 bool copy_kcore_dir; 125 const char *input_name; 126 struct perf_data output; 127 u64 bytes_written; 128 u64 aux_id; 129 struct list_head samples; 130 struct itrace_synth_opts itrace_synth_opts; 131 char *event_copy; 132 struct perf_file_section secs[HEADER_FEAT_BITS]; 133 struct guest_session guest_session; 134 struct strlist *known_build_ids; 135 const struct evsel *mmap_evsel; 136 }; 137 138 struct event_entry { 139 struct list_head node; 140 u32 tid; 141 union perf_event event[]; 142 }; 143 144 static int tool__inject_build_id(const struct perf_tool *tool, 145 struct perf_sample *sample, 146 struct machine *machine, 147 const struct evsel *evsel, 148 __u16 misc, 149 const char *filename, 150 struct dso *dso, u32 flags); 151 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 152 struct perf_sample *sample, 153 struct machine *machine, 154 const struct evsel *evsel, 155 __u16 misc, 156 __u32 pid, __u32 tid, 157 __u64 start, __u64 len, __u64 pgoff, 158 struct dso *dso, 159 __u32 prot, __u32 flags, 160 const char *filename); 161 162 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 163 { 164 ssize_t size; 165 166 size = perf_data__write(&inject->output, buf, sz); 167 if (size < 0) 168 return -errno; 169 170 inject->bytes_written += size; 171 return 0; 172 } 173 174 static int perf_event__repipe_synth(const struct perf_tool *tool, 175 union perf_event *event) 176 177 { 178 struct perf_inject *inject = container_of(tool, struct perf_inject, 179 tool); 180 181 return output_bytes(inject, event, event->header.size); 182 } 183 184 static int perf_event__repipe_oe_synth(const struct perf_tool *tool, 185 union perf_event *event, 186 struct ordered_events *oe __maybe_unused) 187 { 188 return perf_event__repipe_synth(tool, event); 189 } 190 191 #ifdef HAVE_JITDUMP 192 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused, 193 union perf_event *event __maybe_unused, 194 struct ordered_events *oe __maybe_unused) 195 { 196 return 0; 197 } 198 #endif 199 200 static int perf_event__repipe_op2_synth(struct perf_session *session, 201 union perf_event *event) 202 { 203 return perf_event__repipe_synth(session->tool, event); 204 } 205 206 static int perf_event__repipe_op4_synth(struct perf_session *session, 207 union perf_event *event, 208 u64 data __maybe_unused, 209 const char *str __maybe_unused) 210 { 211 return perf_event__repipe_synth(session->tool, event); 212 } 213 214 static int perf_event__repipe_attr(const struct perf_tool *tool, 215 union perf_event *event, 216 struct evlist **pevlist) 217 { 218 struct perf_inject *inject = container_of(tool, struct perf_inject, 219 tool); 220 int ret; 221 222 ret = perf_event__process_attr(tool, event, pevlist); 223 if (ret) 224 return ret; 225 226 /* If the output isn't a pipe then the attributes will be written as part of the header. */ 227 if (!inject->output.is_pipe) 228 return 0; 229 230 return perf_event__repipe_synth(tool, event); 231 } 232 233 static int perf_event__repipe_event_update(const struct perf_tool *tool, 234 union perf_event *event, 235 struct evlist **pevlist __maybe_unused) 236 { 237 return perf_event__repipe_synth(tool, event); 238 } 239 240 #ifdef HAVE_AUXTRACE_SUPPORT 241 242 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 243 { 244 char buf[4096]; 245 ssize_t ssz; 246 int ret; 247 248 while (size > 0) { 249 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 250 if (ssz < 0) 251 return -errno; 252 ret = output_bytes(inject, buf, ssz); 253 if (ret) 254 return ret; 255 size -= ssz; 256 } 257 258 return 0; 259 } 260 261 static s64 perf_event__repipe_auxtrace(struct perf_session *session, 262 union perf_event *event) 263 { 264 const struct perf_tool *tool = session->tool; 265 struct perf_inject *inject = container_of(tool, struct perf_inject, 266 tool); 267 int ret; 268 269 inject->have_auxtrace = true; 270 271 if (!inject->output.is_pipe) { 272 off_t offset; 273 274 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 275 if (offset == -1) 276 return -errno; 277 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 278 event, offset); 279 if (ret < 0) 280 return ret; 281 } 282 283 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 284 ret = output_bytes(inject, event, event->header.size); 285 if (ret < 0) 286 return ret; 287 ret = copy_bytes(inject, session->data, 288 event->auxtrace.size); 289 } else { 290 ret = output_bytes(inject, event, 291 event->header.size + event->auxtrace.size); 292 } 293 if (ret < 0) 294 return ret; 295 296 return event->auxtrace.size; 297 } 298 299 #else 300 301 static s64 302 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, 303 union perf_event *event __maybe_unused) 304 { 305 pr_err("AUX area tracing not supported\n"); 306 return -EINVAL; 307 } 308 309 #endif 310 311 static int perf_event__repipe(const struct perf_tool *tool, 312 union perf_event *event, 313 struct perf_sample *sample __maybe_unused, 314 struct machine *machine __maybe_unused) 315 { 316 return perf_event__repipe_synth(tool, event); 317 } 318 319 static int perf_event__drop(const struct perf_tool *tool __maybe_unused, 320 union perf_event *event __maybe_unused, 321 struct perf_sample *sample __maybe_unused, 322 struct machine *machine __maybe_unused) 323 { 324 return 0; 325 } 326 327 static int perf_event__drop_aux(const struct perf_tool *tool, 328 union perf_event *event __maybe_unused, 329 struct perf_sample *sample, 330 struct machine *machine __maybe_unused) 331 { 332 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 333 334 if (!inject->aux_id) 335 inject->aux_id = sample->id; 336 337 return 0; 338 } 339 340 static union perf_event * 341 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 342 union perf_event *event, 343 struct perf_sample *sample) 344 { 345 size_t sz1 = sample->aux_sample.data - (void *)event; 346 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 347 union perf_event *ev; 348 349 if (inject->event_copy == NULL) { 350 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 351 if (!inject->event_copy) 352 return ERR_PTR(-ENOMEM); 353 } 354 ev = (union perf_event *)inject->event_copy; 355 if (sz1 > event->header.size || sz2 > event->header.size || 356 sz1 + sz2 > event->header.size || 357 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 358 return event; 359 360 memcpy(ev, event, sz1); 361 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 362 ev->header.size = sz1 + sz2; 363 ((u64 *)((void *)ev + sz1))[-1] = 0; 364 365 return ev; 366 } 367 368 typedef int (*inject_handler)(const struct perf_tool *tool, 369 union perf_event *event, 370 struct perf_sample *sample, 371 struct evsel *evsel, 372 struct machine *machine); 373 374 static int perf_event__repipe_sample(const struct perf_tool *tool, 375 union perf_event *event, 376 struct perf_sample *sample, 377 struct evsel *evsel, 378 struct machine *machine) 379 { 380 struct perf_inject *inject = container_of(tool, struct perf_inject, 381 tool); 382 383 if (evsel && evsel->handler) { 384 inject_handler f = evsel->handler; 385 return f(tool, event, sample, evsel, machine); 386 } 387 388 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 389 390 if (inject->itrace_synth_opts.set && sample->aux_sample.size) { 391 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 392 if (IS_ERR(event)) 393 return PTR_ERR(event); 394 } 395 396 return perf_event__repipe_synth(tool, event); 397 } 398 399 static struct dso *findnew_dso(int pid, int tid, const char *filename, 400 const struct dso_id *id, struct machine *machine) 401 { 402 struct thread *thread; 403 struct nsinfo *nsi = NULL; 404 struct nsinfo *nnsi; 405 struct dso *dso; 406 bool vdso; 407 408 thread = machine__findnew_thread(machine, pid, tid); 409 if (thread == NULL) { 410 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 411 return NULL; 412 } 413 414 vdso = is_vdso_map(filename); 415 nsi = nsinfo__get(thread__nsinfo(thread)); 416 417 if (vdso) { 418 /* The vdso maps are always on the host and not the 419 * container. Ensure that we don't use setns to look 420 * them up. 421 */ 422 nnsi = nsinfo__copy(nsi); 423 if (nnsi) { 424 nsinfo__put(nsi); 425 nsinfo__clear_need_setns(nnsi); 426 nsi = nnsi; 427 } 428 dso = machine__findnew_vdso(machine, thread); 429 } else { 430 dso = machine__findnew_dso_id(machine, filename, id); 431 } 432 433 if (dso) { 434 mutex_lock(dso__lock(dso)); 435 dso__set_nsinfo(dso, nsi); 436 mutex_unlock(dso__lock(dso)); 437 } else 438 nsinfo__put(nsi); 439 440 thread__put(thread); 441 return dso; 442 } 443 444 /* 445 * The evsel used for the sample ID for mmap events. Typically stashed when 446 * processing mmap events. If not stashed, search the evlist for the first mmap 447 * gathering event. 448 */ 449 static const struct evsel *inject__mmap_evsel(struct perf_inject *inject) 450 { 451 struct evsel *pos; 452 453 if (inject->mmap_evsel) 454 return inject->mmap_evsel; 455 456 evlist__for_each_entry(inject->session->evlist, pos) { 457 if (pos->core.attr.mmap) { 458 inject->mmap_evsel = pos; 459 return pos; 460 } 461 } 462 pr_err("No mmap events found\n"); 463 return NULL; 464 } 465 466 static int perf_event__repipe_common_mmap(const struct perf_tool *tool, 467 union perf_event *event, 468 struct perf_sample *sample, 469 struct machine *machine, 470 __u32 pid, __u32 tid, 471 __u64 start, __u64 len, __u64 pgoff, 472 __u32 flags, __u32 prot, 473 const char *filename, 474 const struct dso_id *dso_id, 475 int (*perf_event_process)(const struct perf_tool *tool, 476 union perf_event *event, 477 struct perf_sample *sample, 478 struct machine *machine)) 479 { 480 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 481 struct dso *dso = NULL; 482 bool dso_sought = false; 483 484 #ifdef HAVE_JITDUMP 485 if (inject->jit_mode) { 486 u64 n = 0; 487 int ret; 488 489 /* If jit marker, then inject jit mmaps and generate ELF images. */ 490 ret = jit_process(inject->session, &inject->output, machine, 491 filename, pid, tid, &n); 492 if (ret < 0) 493 return ret; 494 if (ret) { 495 inject->bytes_written += n; 496 return 0; 497 } 498 } 499 #endif 500 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 501 dso = findnew_dso(pid, tid, filename, dso_id, machine); 502 dso_sought = true; 503 if (dso) { 504 /* mark it not to inject build-id */ 505 dso__set_hit(dso); 506 } 507 } 508 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) { 509 if (!dso_sought) { 510 dso = findnew_dso(pid, tid, filename, dso_id, machine); 511 dso_sought = true; 512 } 513 514 if (dso && !dso__hit(dso)) { 515 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 516 517 if (evsel) { 518 dso__set_hit(dso); 519 tool__inject_build_id(tool, sample, machine, evsel, 520 /*misc=*/sample->cpumode, 521 filename, dso, flags); 522 } 523 } 524 } else { 525 int err; 526 527 /* 528 * Remember the evsel for lazy build id generation. It is used 529 * for the sample id header type. 530 */ 531 if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 532 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) && 533 !inject->mmap_evsel) 534 inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event); 535 536 /* Create the thread, map, etc. Not done for the unordered inject all case. */ 537 err = perf_event_process(tool, event, sample, machine); 538 539 if (err) { 540 dso__put(dso); 541 return err; 542 } 543 } 544 if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) && 545 !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 546 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 547 548 if (evsel && !dso_sought) { 549 dso = findnew_dso(pid, tid, filename, dso_id, machine); 550 dso_sought = true; 551 } 552 if (evsel && dso && 553 !tool__inject_mmap2_build_id(tool, sample, machine, evsel, 554 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID, 555 pid, tid, start, len, pgoff, 556 dso, 557 prot, flags, 558 filename)) { 559 /* Injected mmap2 so no need to repipe. */ 560 dso__put(dso); 561 return 0; 562 } 563 } 564 dso__put(dso); 565 if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) 566 return 0; 567 568 return perf_event__repipe(tool, event, sample, machine); 569 } 570 571 static int perf_event__repipe_mmap(const struct perf_tool *tool, 572 union perf_event *event, 573 struct perf_sample *sample, 574 struct machine *machine) 575 { 576 return perf_event__repipe_common_mmap( 577 tool, event, sample, machine, 578 event->mmap.pid, event->mmap.tid, 579 event->mmap.start, event->mmap.len, event->mmap.pgoff, 580 /*flags=*/0, PROT_EXEC, 581 event->mmap.filename, /*dso_id=*/NULL, 582 perf_event__process_mmap); 583 } 584 585 static int perf_event__repipe_mmap2(const struct perf_tool *tool, 586 union perf_event *event, 587 struct perf_sample *sample, 588 struct machine *machine) 589 { 590 struct dso_id id; 591 struct dso_id *dso_id = NULL; 592 593 if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 594 id.maj = event->mmap2.maj; 595 id.min = event->mmap2.min; 596 id.ino = event->mmap2.ino; 597 id.ino_generation = event->mmap2.ino_generation; 598 dso_id = &id; 599 } 600 601 return perf_event__repipe_common_mmap( 602 tool, event, sample, machine, 603 event->mmap2.pid, event->mmap2.tid, 604 event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, 605 event->mmap2.flags, event->mmap2.prot, 606 event->mmap2.filename, dso_id, 607 perf_event__process_mmap2); 608 } 609 610 static int perf_event__repipe_fork(const struct perf_tool *tool, 611 union perf_event *event, 612 struct perf_sample *sample, 613 struct machine *machine) 614 { 615 int err; 616 617 err = perf_event__process_fork(tool, event, sample, machine); 618 perf_event__repipe(tool, event, sample, machine); 619 620 return err; 621 } 622 623 static int perf_event__repipe_comm(const struct perf_tool *tool, 624 union perf_event *event, 625 struct perf_sample *sample, 626 struct machine *machine) 627 { 628 int err; 629 630 err = perf_event__process_comm(tool, event, sample, machine); 631 perf_event__repipe(tool, event, sample, machine); 632 633 return err; 634 } 635 636 static int perf_event__repipe_namespaces(const struct perf_tool *tool, 637 union perf_event *event, 638 struct perf_sample *sample, 639 struct machine *machine) 640 { 641 int err = perf_event__process_namespaces(tool, event, sample, machine); 642 643 perf_event__repipe(tool, event, sample, machine); 644 645 return err; 646 } 647 648 static int perf_event__repipe_exit(const struct perf_tool *tool, 649 union perf_event *event, 650 struct perf_sample *sample, 651 struct machine *machine) 652 { 653 int err; 654 655 err = perf_event__process_exit(tool, event, sample, machine); 656 perf_event__repipe(tool, event, sample, machine); 657 658 return err; 659 } 660 661 #ifdef HAVE_LIBTRACEEVENT 662 static int perf_event__repipe_tracing_data(struct perf_session *session, 663 union perf_event *event) 664 { 665 perf_event__repipe_synth(session->tool, event); 666 667 return perf_event__process_tracing_data(session, event); 668 } 669 #endif 670 671 static int dso__read_build_id(struct dso *dso) 672 { 673 struct nscookie nsc; 674 675 if (dso__has_build_id(dso)) 676 return 0; 677 678 mutex_lock(dso__lock(dso)); 679 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 680 if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0) 681 dso__set_has_build_id(dso); 682 else if (dso__nsinfo(dso)) { 683 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); 684 685 if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0) 686 dso__set_has_build_id(dso); 687 free(new_name); 688 } 689 nsinfo__mountns_exit(&nsc); 690 mutex_unlock(dso__lock(dso)); 691 692 return dso__has_build_id(dso) ? 0 : -1; 693 } 694 695 static struct strlist *perf_inject__parse_known_build_ids( 696 const char *known_build_ids_string) 697 { 698 struct str_node *pos, *tmp; 699 struct strlist *known_build_ids; 700 int bid_len; 701 702 known_build_ids = strlist__new(known_build_ids_string, NULL); 703 if (known_build_ids == NULL) 704 return NULL; 705 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 706 const char *build_id, *dso_name; 707 708 build_id = skip_spaces(pos->s); 709 dso_name = strchr(build_id, ' '); 710 if (dso_name == NULL) { 711 strlist__remove(known_build_ids, pos); 712 continue; 713 } 714 bid_len = dso_name - pos->s; 715 dso_name = skip_spaces(dso_name); 716 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 717 strlist__remove(known_build_ids, pos); 718 continue; 719 } 720 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 721 if (!isxdigit(build_id[2 * ix]) || 722 !isxdigit(build_id[2 * ix + 1])) { 723 strlist__remove(known_build_ids, pos); 724 break; 725 } 726 } 727 } 728 return known_build_ids; 729 } 730 731 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 732 struct dso *dso) 733 { 734 struct str_node *pos; 735 int bid_len; 736 737 strlist__for_each_entry(pos, inject->known_build_ids) { 738 const char *build_id, *dso_name; 739 740 build_id = skip_spaces(pos->s); 741 dso_name = strchr(build_id, ' '); 742 bid_len = dso_name - pos->s; 743 dso_name = skip_spaces(dso_name); 744 if (strcmp(dso__long_name(dso), dso_name)) 745 continue; 746 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 747 dso__bid(dso)->data[ix] = (hex(build_id[2 * ix]) << 4 | 748 hex(build_id[2 * ix + 1])); 749 } 750 dso__bid(dso)->size = bid_len / 2; 751 dso__set_has_build_id(dso); 752 return true; 753 } 754 return false; 755 } 756 757 static int tool__inject_build_id(const struct perf_tool *tool, 758 struct perf_sample *sample, 759 struct machine *machine, 760 const struct evsel *evsel, 761 __u16 misc, 762 const char *filename, 763 struct dso *dso, u32 flags) 764 { 765 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 766 int err; 767 768 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 769 return 0; 770 if (is_no_dso_memory(filename)) 771 return 0; 772 773 if (inject->known_build_ids != NULL && 774 perf_inject__lookup_known_build_id(inject, dso)) 775 return 1; 776 777 if (dso__read_build_id(dso) < 0) { 778 pr_debug("no build_id found for %s\n", filename); 779 return -1; 780 } 781 782 err = perf_event__synthesize_build_id(tool, sample, machine, 783 perf_event__repipe, 784 evsel, misc, dso__bid(dso), 785 filename); 786 if (err) { 787 pr_err("Can't synthesize build_id event for %s\n", filename); 788 return -1; 789 } 790 791 return 0; 792 } 793 794 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 795 struct perf_sample *sample, 796 struct machine *machine, 797 const struct evsel *evsel, 798 __u16 misc, 799 __u32 pid, __u32 tid, 800 __u64 start, __u64 len, __u64 pgoff, 801 struct dso *dso, 802 __u32 prot, __u32 flags, 803 const char *filename) 804 { 805 int err; 806 807 /* Return to repipe anonymous maps. */ 808 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 809 return 1; 810 if (is_no_dso_memory(filename)) 811 return 1; 812 813 if (dso__read_build_id(dso)) { 814 pr_debug("no build_id found for %s\n", filename); 815 return -1; 816 } 817 818 err = perf_event__synthesize_mmap2_build_id(tool, sample, machine, 819 perf_event__repipe, 820 evsel, 821 misc, pid, tid, 822 start, len, pgoff, 823 dso__bid(dso), 824 prot, flags, 825 filename); 826 if (err) { 827 pr_err("Can't synthesize build_id event for %s\n", filename); 828 return -1; 829 } 830 return 0; 831 } 832 833 static int mark_dso_hit(const struct perf_inject *inject, 834 const struct perf_tool *tool, 835 struct perf_sample *sample, 836 struct machine *machine, 837 const struct evsel *mmap_evsel, 838 struct map *map, bool sample_in_dso) 839 { 840 struct dso *dso; 841 u16 misc = sample->cpumode; 842 843 if (!map) 844 return 0; 845 846 if (!sample_in_dso) { 847 u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL | 848 PERF_RECORD_MISC_GUEST_USER; 849 850 if ((misc & guest_mask) != 0) { 851 misc &= PERF_RECORD_MISC_HYPERVISOR; 852 misc |= __map__is_kernel(map) 853 ? PERF_RECORD_MISC_GUEST_KERNEL 854 : PERF_RECORD_MISC_GUEST_USER; 855 } else { 856 misc &= PERF_RECORD_MISC_HYPERVISOR; 857 misc |= __map__is_kernel(map) 858 ? PERF_RECORD_MISC_KERNEL 859 : PERF_RECORD_MISC_USER; 860 } 861 } 862 dso = map__dso(map); 863 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 864 if (dso && !dso__hit(dso)) { 865 dso__set_hit(dso); 866 tool__inject_build_id(tool, sample, machine, 867 mmap_evsel, misc, dso__long_name(dso), dso, 868 map__flags(map)); 869 } 870 } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 871 if (!map__hit(map)) { 872 const struct build_id null_bid = { .size = 0 }; 873 const struct build_id *bid = dso ? dso__bid(dso) : &null_bid; 874 const char *filename = dso ? dso__long_name(dso) : ""; 875 876 map__set_hit(map); 877 perf_event__synthesize_mmap2_build_id(tool, sample, machine, 878 perf_event__repipe, 879 mmap_evsel, 880 misc, 881 sample->pid, sample->tid, 882 map__start(map), 883 map__end(map) - map__start(map), 884 map__pgoff(map), 885 bid, 886 map__prot(map), 887 map__flags(map), 888 filename); 889 } 890 } 891 return 0; 892 } 893 894 struct mark_dso_hit_args { 895 const struct perf_inject *inject; 896 const struct perf_tool *tool; 897 struct perf_sample *sample; 898 struct machine *machine; 899 const struct evsel *mmap_evsel; 900 }; 901 902 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data) 903 { 904 struct mark_dso_hit_args *args = data; 905 struct map *map = node->ms.map; 906 907 return mark_dso_hit(args->inject, args->tool, args->sample, args->machine, 908 args->mmap_evsel, map, /*sample_in_dso=*/false); 909 } 910 911 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, 912 struct perf_sample *sample, 913 struct evsel *evsel __maybe_unused, 914 struct machine *machine) 915 { 916 struct addr_location al; 917 struct thread *thread; 918 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 919 struct mark_dso_hit_args args = { 920 .inject = inject, 921 .tool = tool, 922 /* 923 * Use the parsed sample data of the sample event, which will 924 * have a later timestamp than the mmap event. 925 */ 926 .sample = sample, 927 .machine = machine, 928 .mmap_evsel = inject__mmap_evsel(inject), 929 }; 930 931 addr_location__init(&al); 932 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 933 if (thread == NULL) { 934 pr_err("problem processing %d event, skipping it.\n", 935 event->header.type); 936 goto repipe; 937 } 938 939 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 940 mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map, 941 /*sample_in_dso=*/true); 942 } 943 944 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, 945 /*symbols=*/false, mark_dso_hit_callback, &args); 946 947 thread__put(thread); 948 repipe: 949 perf_event__repipe(tool, event, sample, machine); 950 addr_location__exit(&al); 951 return 0; 952 } 953 954 static int perf_inject__sched_process_exit(const struct perf_tool *tool, 955 union perf_event *event __maybe_unused, 956 struct perf_sample *sample, 957 struct evsel *evsel __maybe_unused, 958 struct machine *machine __maybe_unused) 959 { 960 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 961 struct event_entry *ent; 962 963 list_for_each_entry(ent, &inject->samples, node) { 964 if (sample->tid == ent->tid) { 965 list_del_init(&ent->node); 966 free(ent); 967 break; 968 } 969 } 970 971 return 0; 972 } 973 974 static int perf_inject__sched_switch(const struct perf_tool *tool, 975 union perf_event *event, 976 struct perf_sample *sample, 977 struct evsel *evsel, 978 struct machine *machine) 979 { 980 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 981 struct event_entry *ent; 982 983 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 984 985 ent = malloc(event->header.size + sizeof(struct event_entry)); 986 if (ent == NULL) { 987 color_fprintf(stderr, PERF_COLOR_RED, 988 "Not enough memory to process sched switch event!"); 989 return -1; 990 } 991 992 ent->tid = sample->tid; 993 memcpy(&ent->event, event, event->header.size); 994 list_add(&ent->node, &inject->samples); 995 return 0; 996 } 997 998 #ifdef HAVE_LIBTRACEEVENT 999 static int perf_inject__sched_stat(const struct perf_tool *tool, 1000 union perf_event *event __maybe_unused, 1001 struct perf_sample *sample, 1002 struct evsel *evsel, 1003 struct machine *machine) 1004 { 1005 struct event_entry *ent; 1006 union perf_event *event_sw; 1007 struct perf_sample sample_sw; 1008 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1009 u32 pid = evsel__intval(evsel, sample, "pid"); 1010 1011 list_for_each_entry(ent, &inject->samples, node) { 1012 if (pid == ent->tid) 1013 goto found; 1014 } 1015 1016 return 0; 1017 found: 1018 event_sw = &ent->event[0]; 1019 evsel__parse_sample(evsel, event_sw, &sample_sw); 1020 1021 sample_sw.period = sample->period; 1022 sample_sw.time = sample->time; 1023 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 1024 evsel->core.attr.read_format, &sample_sw); 1025 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 1026 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 1027 } 1028 #endif 1029 1030 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 1031 { 1032 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 1033 return NULL; 1034 return &gs->vcpu[vcpu]; 1035 } 1036 1037 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 1038 { 1039 ssize_t ret = writen(gs->tmp_fd, buf, sz); 1040 1041 return ret < 0 ? ret : 0; 1042 } 1043 1044 static int guest_session__repipe(const struct perf_tool *tool, 1045 union perf_event *event, 1046 struct perf_sample *sample __maybe_unused, 1047 struct machine *machine __maybe_unused) 1048 { 1049 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1050 1051 return guest_session__output_bytes(gs, event, event->header.size); 1052 } 1053 1054 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 1055 { 1056 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 1057 int hash; 1058 1059 if (!guest_tid) 1060 return -ENOMEM; 1061 1062 guest_tid->tid = tid; 1063 guest_tid->vcpu = vcpu; 1064 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 1065 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 1066 1067 return 0; 1068 } 1069 1070 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 1071 union perf_event *event, 1072 u64 offset __maybe_unused, void *data) 1073 { 1074 struct guest_session *gs = data; 1075 unsigned int vcpu; 1076 struct guest_vcpu *guest_vcpu; 1077 int ret; 1078 1079 if (event->header.type != PERF_RECORD_COMM || 1080 event->comm.pid != gs->machine_pid) 1081 return 0; 1082 1083 /* 1084 * QEMU option -name debug-threads=on, causes thread names formatted as 1085 * below, although it is not an ABI. Also libvirt seems to use this by 1086 * default. Here we rely on it to tell us which thread is which VCPU. 1087 */ 1088 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 1089 if (ret <= 0) 1090 return ret; 1091 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 1092 event->comm.tid, event->comm.comm, vcpu); 1093 if (vcpu > INT_MAX) { 1094 pr_err("Invalid VCPU %u\n", vcpu); 1095 return -EINVAL; 1096 } 1097 guest_vcpu = guest_session__vcpu(gs, vcpu); 1098 if (!guest_vcpu) 1099 return -ENOMEM; 1100 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 1101 pr_err("Fatal error: Two threads found with the same VCPU\n"); 1102 return -EINVAL; 1103 } 1104 guest_vcpu->tid = event->comm.tid; 1105 1106 return guest_session__map_tid(gs, event->comm.tid, vcpu); 1107 } 1108 1109 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 1110 { 1111 return perf_session__peek_events(session, session->header.data_offset, 1112 session->header.data_size, 1113 host_peek_vm_comms_cb, gs); 1114 } 1115 1116 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 1117 { 1118 return evlist__id2sid(evlist, id); 1119 } 1120 1121 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 1122 { 1123 do { 1124 gs->highest_id += 1; 1125 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 1126 1127 return gs->highest_id; 1128 } 1129 1130 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 1131 { 1132 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 1133 int hash; 1134 1135 if (!guest_id) 1136 return -ENOMEM; 1137 1138 guest_id->id = id; 1139 guest_id->host_id = host_id; 1140 guest_id->vcpu = vcpu; 1141 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 1142 hlist_add_head(&guest_id->node, &gs->heads[hash]); 1143 1144 return 0; 1145 } 1146 1147 static u64 evlist__find_highest_id(struct evlist *evlist) 1148 { 1149 struct evsel *evsel; 1150 u64 highest_id = 1; 1151 1152 evlist__for_each_entry(evlist, evsel) { 1153 u32 j; 1154 1155 for (j = 0; j < evsel->core.ids; j++) { 1156 u64 id = evsel->core.id[j]; 1157 1158 if (id > highest_id) 1159 highest_id = id; 1160 } 1161 } 1162 1163 return highest_id; 1164 } 1165 1166 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 1167 { 1168 struct evlist *evlist = gs->session->evlist; 1169 struct evsel *evsel; 1170 int ret; 1171 1172 evlist__for_each_entry(evlist, evsel) { 1173 u32 j; 1174 1175 for (j = 0; j < evsel->core.ids; j++) { 1176 struct perf_sample_id *sid; 1177 u64 host_id; 1178 u64 id; 1179 1180 id = evsel->core.id[j]; 1181 sid = evlist__id2sid(evlist, id); 1182 if (!sid || sid->cpu.cpu == -1) 1183 continue; 1184 host_id = guest_session__allocate_new_id(gs, host_evlist); 1185 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1186 if (ret) 1187 return ret; 1188 } 1189 } 1190 1191 return 0; 1192 } 1193 1194 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1195 { 1196 struct hlist_head *head; 1197 struct guest_id *guest_id; 1198 int hash; 1199 1200 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1201 head = &gs->heads[hash]; 1202 1203 hlist_for_each_entry(guest_id, head, node) 1204 if (guest_id->id == id) 1205 return guest_id; 1206 1207 return NULL; 1208 } 1209 1210 static int process_attr(const struct perf_tool *tool, union perf_event *event, 1211 struct perf_sample *sample __maybe_unused, 1212 struct machine *machine __maybe_unused) 1213 { 1214 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1215 1216 return perf_event__process_attr(tool, event, &inject->session->evlist); 1217 } 1218 1219 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1220 { 1221 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1222 struct perf_event_attr attr = evsel->core.attr; 1223 u64 *id_array; 1224 u32 *vcpu_array; 1225 int ret = -ENOMEM; 1226 u32 i; 1227 1228 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1229 if (!id_array) 1230 return -ENOMEM; 1231 1232 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1233 if (!vcpu_array) 1234 goto out; 1235 1236 for (i = 0; i < evsel->core.ids; i++) { 1237 u64 id = evsel->core.id[i]; 1238 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1239 1240 if (!guest_id) { 1241 pr_err("Failed to find guest id %"PRIu64"\n", id); 1242 ret = -EINVAL; 1243 goto out; 1244 } 1245 id_array[i] = guest_id->host_id; 1246 vcpu_array[i] = guest_id->vcpu; 1247 } 1248 1249 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1250 attr.exclude_host = 1; 1251 attr.exclude_guest = 0; 1252 1253 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1254 id_array, process_attr); 1255 if (ret) 1256 pr_err("Failed to add guest attr.\n"); 1257 1258 for (i = 0; i < evsel->core.ids; i++) { 1259 struct perf_sample_id *sid; 1260 u32 vcpu = vcpu_array[i]; 1261 1262 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1263 /* Guest event is per-thread from the host point of view */ 1264 sid->cpu.cpu = -1; 1265 sid->tid = gs->vcpu[vcpu].tid; 1266 sid->machine_pid = gs->machine_pid; 1267 sid->vcpu.cpu = vcpu; 1268 } 1269 out: 1270 free(vcpu_array); 1271 free(id_array); 1272 return ret; 1273 } 1274 1275 static int guest_session__add_attrs(struct guest_session *gs) 1276 { 1277 struct evlist *evlist = gs->session->evlist; 1278 struct evsel *evsel; 1279 int ret; 1280 1281 evlist__for_each_entry(evlist, evsel) { 1282 ret = guest_session__add_attr(gs, evsel); 1283 if (ret) 1284 return ret; 1285 } 1286 1287 return 0; 1288 } 1289 1290 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1291 { 1292 struct perf_session *session = inject->session; 1293 struct evlist *evlist = session->evlist; 1294 struct machine *machine = &session->machines.host; 1295 size_t from = evlist->core.nr_entries - new_cnt; 1296 1297 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1298 evlist, machine, from); 1299 } 1300 1301 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1302 { 1303 struct hlist_head *head; 1304 struct guest_tid *guest_tid; 1305 int hash; 1306 1307 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1308 head = &gs->tids[hash]; 1309 1310 hlist_for_each_entry(guest_tid, head, node) 1311 if (guest_tid->tid == tid) 1312 return guest_tid; 1313 1314 return NULL; 1315 } 1316 1317 static bool dso__is_in_kernel_space(struct dso *dso) 1318 { 1319 if (dso__is_vdso(dso)) 1320 return false; 1321 1322 return dso__is_kcore(dso) || 1323 dso__kernel(dso) || 1324 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1325 } 1326 1327 static u64 evlist__first_id(struct evlist *evlist) 1328 { 1329 struct evsel *evsel; 1330 1331 evlist__for_each_entry(evlist, evsel) { 1332 if (evsel->core.ids) 1333 return evsel->core.id[0]; 1334 } 1335 return 0; 1336 } 1337 1338 static int process_build_id(const struct perf_tool *tool, 1339 union perf_event *event, 1340 struct perf_sample *sample __maybe_unused, 1341 struct machine *machine __maybe_unused) 1342 { 1343 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1344 1345 return perf_event__process_build_id(inject->session, event); 1346 } 1347 1348 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1349 { 1350 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1351 struct perf_sample synth_sample = { 1352 .pid = -1, 1353 .tid = -1, 1354 .time = -1, 1355 .stream_id = -1, 1356 .cpu = -1, 1357 .period = 1, 1358 .cpumode = dso__is_in_kernel_space(dso) 1359 ? PERF_RECORD_MISC_GUEST_KERNEL 1360 : PERF_RECORD_MISC_GUEST_USER, 1361 }; 1362 1363 if (!machine) 1364 return -ENOMEM; 1365 1366 dso__set_hit(dso); 1367 1368 return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine, 1369 process_build_id, inject__mmap_evsel(inject), 1370 /*misc=*/synth_sample.cpumode, 1371 dso__bid(dso), dso__long_name(dso)); 1372 } 1373 1374 static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1375 { 1376 struct guest_session *gs = data; 1377 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1378 1379 if (!dso__has_build_id(dso)) 1380 return 0; 1381 1382 return synthesize_build_id(inject, dso, gs->machine_pid); 1383 1384 } 1385 1386 static int guest_session__add_build_ids(struct guest_session *gs) 1387 { 1388 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1389 1390 /* Build IDs will be put in the Build ID feature section */ 1391 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1392 1393 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1394 guest_session__add_build_ids_cb, 1395 gs); 1396 } 1397 1398 static int guest_session__ksymbol_event(const struct perf_tool *tool, 1399 union perf_event *event, 1400 struct perf_sample *sample __maybe_unused, 1401 struct machine *machine __maybe_unused) 1402 { 1403 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1404 1405 /* Only support out-of-line i.e. no BPF support */ 1406 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1407 return 0; 1408 1409 return guest_session__output_bytes(gs, event, event->header.size); 1410 } 1411 1412 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1413 { 1414 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1415 struct perf_session *session; 1416 int ret; 1417 1418 /* Only these events will be injected */ 1419 gs->tool.mmap = guest_session__repipe; 1420 gs->tool.mmap2 = guest_session__repipe; 1421 gs->tool.comm = guest_session__repipe; 1422 gs->tool.fork = guest_session__repipe; 1423 gs->tool.exit = guest_session__repipe; 1424 gs->tool.lost = guest_session__repipe; 1425 gs->tool.context_switch = guest_session__repipe; 1426 gs->tool.ksymbol = guest_session__ksymbol_event; 1427 gs->tool.text_poke = guest_session__repipe; 1428 /* 1429 * Processing a build ID creates a struct dso with that build ID. Later, 1430 * all guest dsos are iterated and the build IDs processed into the host 1431 * session where they will be output to the Build ID feature section 1432 * when the perf.data file header is written. 1433 */ 1434 gs->tool.build_id = perf_event__process_build_id; 1435 /* Process the id index to know what VCPU an ID belongs to */ 1436 gs->tool.id_index = perf_event__process_id_index; 1437 1438 gs->tool.ordered_events = true; 1439 gs->tool.ordering_requires_timestamps = true; 1440 1441 gs->data.path = name; 1442 gs->data.force = force; 1443 gs->data.mode = PERF_DATA_MODE_READ; 1444 1445 session = perf_session__new(&gs->data, &gs->tool); 1446 if (IS_ERR(session)) 1447 return PTR_ERR(session); 1448 gs->session = session; 1449 1450 /* 1451 * Initial events have zero'd ID samples. Get default ID sample size 1452 * used for removing them. 1453 */ 1454 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1455 /* And default ID for adding back a host-compatible ID sample */ 1456 gs->dflt_id = evlist__first_id(session->evlist); 1457 if (!gs->dflt_id) { 1458 pr_err("Guest data has no sample IDs"); 1459 return -EINVAL; 1460 } 1461 1462 /* Temporary file for guest events */ 1463 gs->tmp_file_name = strdup(tmp_file_name); 1464 if (!gs->tmp_file_name) 1465 return -ENOMEM; 1466 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1467 if (gs->tmp_fd < 0) 1468 return -errno; 1469 1470 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1471 pr_warning("Guest session decompression initialization failed.\n"); 1472 1473 /* 1474 * perf does not support processing 2 sessions simultaneously, so output 1475 * guest events to a temporary file. 1476 */ 1477 ret = perf_session__process_events(gs->session); 1478 if (ret) 1479 return ret; 1480 1481 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1482 return -errno; 1483 1484 return 0; 1485 } 1486 1487 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1488 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1489 { 1490 struct hlist_node *pos, *n; 1491 size_t i; 1492 1493 for (i = 0; i < hlist_sz; ++i) { 1494 hlist_for_each_safe(pos, n, &heads[i]) { 1495 hlist_del(pos); 1496 free(pos); 1497 } 1498 } 1499 } 1500 1501 static void guest_session__exit(struct guest_session *gs) 1502 { 1503 if (gs->session) { 1504 perf_session__delete(gs->session); 1505 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1506 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1507 } 1508 if (gs->tmp_file_name) { 1509 if (gs->tmp_fd >= 0) 1510 close(gs->tmp_fd); 1511 unlink(gs->tmp_file_name); 1512 zfree(&gs->tmp_file_name); 1513 } 1514 zfree(&gs->vcpu); 1515 zfree(&gs->perf_data_file); 1516 } 1517 1518 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1519 { 1520 tc->time_shift = time_conv->time_shift; 1521 tc->time_mult = time_conv->time_mult; 1522 tc->time_zero = time_conv->time_zero; 1523 tc->time_cycles = time_conv->time_cycles; 1524 tc->time_mask = time_conv->time_mask; 1525 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1526 tc->cap_user_time_short = time_conv->cap_user_time_short; 1527 } 1528 1529 static void guest_session__get_tc(struct guest_session *gs) 1530 { 1531 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1532 1533 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1534 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1535 } 1536 1537 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1538 { 1539 u64 tsc; 1540 1541 if (!guest_time) { 1542 *host_time = 0; 1543 return; 1544 } 1545 1546 if (gs->guest_tc.cap_user_time_zero) 1547 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1548 else 1549 tsc = guest_time; 1550 1551 /* 1552 * This is the correct order of operations for x86 if the TSC Offset and 1553 * Multiplier values are used. 1554 */ 1555 tsc -= gs->time_offset; 1556 tsc /= gs->time_scale; 1557 1558 if (gs->host_tc.cap_user_time_zero) 1559 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1560 else 1561 *host_time = tsc; 1562 } 1563 1564 static int guest_session__fetch(struct guest_session *gs) 1565 { 1566 void *buf; 1567 struct perf_event_header *hdr; 1568 size_t hdr_sz = sizeof(*hdr); 1569 ssize_t ret; 1570 1571 buf = gs->ev.event_buf; 1572 if (!buf) { 1573 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1574 if (!buf) 1575 return -ENOMEM; 1576 gs->ev.event_buf = buf; 1577 } 1578 hdr = buf; 1579 ret = readn(gs->tmp_fd, buf, hdr_sz); 1580 if (ret < 0) 1581 return ret; 1582 1583 if (!ret) { 1584 /* Zero size means EOF */ 1585 hdr->size = 0; 1586 return 0; 1587 } 1588 1589 buf += hdr_sz; 1590 1591 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1592 if (ret < 0) 1593 return ret; 1594 1595 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1596 gs->ev.sample.time = 0; 1597 1598 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1599 pr_err("Unexpected type fetching guest event"); 1600 return 0; 1601 } 1602 1603 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1604 if (ret) { 1605 pr_err("Parse failed fetching guest event"); 1606 return ret; 1607 } 1608 1609 if (!gs->have_tc) { 1610 guest_session__get_tc(gs); 1611 gs->have_tc = true; 1612 } 1613 1614 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1615 1616 return 0; 1617 } 1618 1619 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1620 const struct perf_sample *sample) 1621 { 1622 struct evsel *evsel; 1623 void *array; 1624 int ret; 1625 1626 evsel = evlist__id2evsel(evlist, sample->id); 1627 array = ev; 1628 1629 if (!evsel) { 1630 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1631 return -EINVAL; 1632 } 1633 1634 array += ev->header.size; 1635 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1636 if (ret < 0) 1637 return ret; 1638 1639 if (ret & 7) { 1640 pr_err("Bad id sample size %d\n", ret); 1641 return -EINVAL; 1642 } 1643 1644 ev->header.size += ret; 1645 1646 return 0; 1647 } 1648 1649 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1650 { 1651 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1652 int ret; 1653 1654 if (!gs->ready) 1655 return 0; 1656 1657 while (1) { 1658 struct perf_sample *sample; 1659 struct guest_id *guest_id; 1660 union perf_event *ev; 1661 u16 id_hdr_size; 1662 u8 cpumode; 1663 u64 id; 1664 1665 if (!gs->fetched) { 1666 ret = guest_session__fetch(gs); 1667 if (ret) 1668 return ret; 1669 gs->fetched = true; 1670 } 1671 1672 ev = gs->ev.event; 1673 sample = &gs->ev.sample; 1674 1675 if (!ev->header.size) 1676 return 0; /* EOF */ 1677 1678 if (sample->time > timestamp) 1679 return 0; 1680 1681 /* Change cpumode to guest */ 1682 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1683 if (cpumode & PERF_RECORD_MISC_USER) 1684 cpumode = PERF_RECORD_MISC_GUEST_USER; 1685 else 1686 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1687 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1688 ev->header.misc |= cpumode; 1689 1690 id = sample->id; 1691 if (!id) { 1692 id = gs->dflt_id; 1693 id_hdr_size = gs->dflt_id_hdr_size; 1694 } else { 1695 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1696 1697 id_hdr_size = evsel__id_hdr_size(evsel); 1698 } 1699 1700 if (id_hdr_size & 7) { 1701 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1702 return -EINVAL; 1703 } 1704 1705 if (ev->header.size & 7) { 1706 pr_err("Bad event size %u\n", ev->header.size); 1707 return -EINVAL; 1708 } 1709 1710 /* Remove guest id sample */ 1711 ev->header.size -= id_hdr_size; 1712 1713 if (ev->header.size & 7) { 1714 pr_err("Bad raw event size %u\n", ev->header.size); 1715 return -EINVAL; 1716 } 1717 1718 guest_id = guest_session__lookup_id(gs, id); 1719 if (!guest_id) { 1720 pr_err("Guest event with unknown id %llu\n", 1721 (unsigned long long)id); 1722 return -EINVAL; 1723 } 1724 1725 /* Change to host ID to avoid conflicting ID values */ 1726 sample->id = guest_id->host_id; 1727 sample->stream_id = guest_id->host_id; 1728 1729 if (sample->cpu != (u32)-1) { 1730 if (sample->cpu >= gs->vcpu_cnt) { 1731 pr_err("Guest event with unknown VCPU %u\n", 1732 sample->cpu); 1733 return -EINVAL; 1734 } 1735 /* Change to host CPU instead of guest VCPU */ 1736 sample->cpu = gs->vcpu[sample->cpu].cpu; 1737 } 1738 1739 /* New id sample with new ID and CPU */ 1740 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1741 if (ret) 1742 return ret; 1743 1744 if (ev->header.size & 7) { 1745 pr_err("Bad new event size %u\n", ev->header.size); 1746 return -EINVAL; 1747 } 1748 1749 gs->fetched = false; 1750 1751 ret = output_bytes(inject, ev, ev->header.size); 1752 if (ret) 1753 return ret; 1754 } 1755 } 1756 1757 static int guest_session__flush_events(struct guest_session *gs) 1758 { 1759 return guest_session__inject_events(gs, -1); 1760 } 1761 1762 static int host__repipe(const struct perf_tool *tool, 1763 union perf_event *event, 1764 struct perf_sample *sample, 1765 struct machine *machine) 1766 { 1767 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1768 int ret; 1769 1770 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1771 if (ret) 1772 return ret; 1773 1774 return perf_event__repipe(tool, event, sample, machine); 1775 } 1776 1777 static int host__finished_init(struct perf_session *session, union perf_event *event) 1778 { 1779 struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool); 1780 struct guest_session *gs = &inject->guest_session; 1781 int ret; 1782 1783 /* 1784 * Peek through host COMM events to find QEMU threads and the VCPU they 1785 * are running. 1786 */ 1787 ret = host_peek_vm_comms(session, gs); 1788 if (ret) 1789 return ret; 1790 1791 if (!gs->vcpu_cnt) { 1792 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1793 return -EINVAL; 1794 } 1795 1796 /* 1797 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1798 */ 1799 gs->highest_id = evlist__find_highest_id(session->evlist); 1800 ret = guest_session__map_ids(gs, session->evlist); 1801 if (ret) 1802 return ret; 1803 1804 ret = guest_session__add_attrs(gs); 1805 if (ret) 1806 return ret; 1807 1808 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1809 if (ret) { 1810 pr_err("Failed to synthesize id_index\n"); 1811 return ret; 1812 } 1813 1814 ret = guest_session__add_build_ids(gs); 1815 if (ret) { 1816 pr_err("Failed to add guest build IDs\n"); 1817 return ret; 1818 } 1819 1820 gs->ready = true; 1821 1822 ret = guest_session__inject_events(gs, 0); 1823 if (ret) 1824 return ret; 1825 1826 return perf_event__repipe_op2_synth(session, event); 1827 } 1828 1829 /* 1830 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1831 * which flushes host events to file up until the last flush time. Then inject 1832 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1833 * itself. 1834 */ 1835 static int host__finished_round(const struct perf_tool *tool, 1836 union perf_event *event, 1837 struct ordered_events *oe) 1838 { 1839 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1840 int ret = perf_event__process_finished_round(tool, event, oe); 1841 u64 timestamp = ordered_events__last_flush_time(oe); 1842 1843 if (ret) 1844 return ret; 1845 1846 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1847 if (ret) 1848 return ret; 1849 1850 return perf_event__repipe_oe_synth(tool, event, oe); 1851 } 1852 1853 static int host__context_switch(const struct perf_tool *tool, 1854 union perf_event *event, 1855 struct perf_sample *sample, 1856 struct machine *machine) 1857 { 1858 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1859 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1860 struct guest_session *gs = &inject->guest_session; 1861 u32 pid = event->context_switch.next_prev_pid; 1862 u32 tid = event->context_switch.next_prev_tid; 1863 struct guest_tid *guest_tid; 1864 u32 vcpu; 1865 1866 if (out || pid != gs->machine_pid) 1867 goto out; 1868 1869 guest_tid = guest_session__lookup_tid(gs, tid); 1870 if (!guest_tid) 1871 goto out; 1872 1873 if (sample->cpu == (u32)-1) { 1874 pr_err("Switch event does not have CPU\n"); 1875 return -EINVAL; 1876 } 1877 1878 vcpu = guest_tid->vcpu; 1879 if (vcpu >= gs->vcpu_cnt) 1880 return -EINVAL; 1881 1882 /* Guest is switching in, record which CPU the VCPU is now running on */ 1883 gs->vcpu[vcpu].cpu = sample->cpu; 1884 out: 1885 return host__repipe(tool, event, sample, machine); 1886 } 1887 1888 static void sig_handler(int sig __maybe_unused) 1889 { 1890 session_done = 1; 1891 } 1892 1893 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1894 { 1895 struct perf_event_attr *attr = &evsel->core.attr; 1896 const char *name = evsel__name(evsel); 1897 1898 if (!(attr->sample_type & sample_type)) { 1899 pr_err("Samples for %s event do not have %s attribute set.", 1900 name, sample_msg); 1901 return -EINVAL; 1902 } 1903 1904 return 0; 1905 } 1906 1907 static int drop_sample(const struct perf_tool *tool __maybe_unused, 1908 union perf_event *event __maybe_unused, 1909 struct perf_sample *sample __maybe_unused, 1910 struct evsel *evsel __maybe_unused, 1911 struct machine *machine __maybe_unused) 1912 { 1913 return 0; 1914 } 1915 1916 static void strip_init(struct perf_inject *inject) 1917 { 1918 struct evlist *evlist = inject->session->evlist; 1919 struct evsel *evsel; 1920 1921 inject->tool.context_switch = perf_event__drop; 1922 1923 evlist__for_each_entry(evlist, evsel) 1924 evsel->handler = drop_sample; 1925 } 1926 1927 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1928 { 1929 struct perf_inject *inject = opt->value; 1930 const char *args; 1931 char *dry_run; 1932 1933 if (unset) 1934 return 0; 1935 1936 inject->itrace_synth_opts.set = true; 1937 inject->itrace_synth_opts.vm_time_correlation = true; 1938 inject->in_place_update = true; 1939 1940 if (!str) 1941 return 0; 1942 1943 dry_run = skip_spaces(str); 1944 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1945 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1946 inject->in_place_update_dry_run = true; 1947 args = dry_run + strlen("dry-run"); 1948 } else { 1949 args = str; 1950 } 1951 1952 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1953 1954 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1955 } 1956 1957 static int parse_guest_data(const struct option *opt, const char *str, int unset) 1958 { 1959 struct perf_inject *inject = opt->value; 1960 struct guest_session *gs = &inject->guest_session; 1961 char *tok; 1962 char *s; 1963 1964 if (unset) 1965 return 0; 1966 1967 if (!str) 1968 goto bad_args; 1969 1970 s = strdup(str); 1971 if (!s) 1972 return -ENOMEM; 1973 1974 gs->perf_data_file = strsep(&s, ","); 1975 if (!gs->perf_data_file) 1976 goto bad_args; 1977 1978 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1979 if (gs->copy_kcore_dir) 1980 inject->output.is_dir = true; 1981 1982 tok = strsep(&s, ","); 1983 if (!tok) 1984 goto bad_args; 1985 gs->machine_pid = strtoul(tok, NULL, 0); 1986 if (!inject->guest_session.machine_pid) 1987 goto bad_args; 1988 1989 gs->time_scale = 1; 1990 1991 tok = strsep(&s, ","); 1992 if (!tok) 1993 goto out; 1994 gs->time_offset = strtoull(tok, NULL, 0); 1995 1996 tok = strsep(&s, ","); 1997 if (!tok) 1998 goto out; 1999 gs->time_scale = strtod(tok, NULL); 2000 if (!gs->time_scale) 2001 goto bad_args; 2002 out: 2003 return 0; 2004 2005 bad_args: 2006 pr_err("--guest-data option requires guest perf.data file name, " 2007 "guest machine PID, and optionally guest timestamp offset, " 2008 "and guest timestamp scale factor, separated by commas.\n"); 2009 return -1; 2010 } 2011 2012 static int save_section_info_cb(struct perf_file_section *section, 2013 struct perf_header *ph __maybe_unused, 2014 int feat, int fd __maybe_unused, void *data) 2015 { 2016 struct perf_inject *inject = data; 2017 2018 inject->secs[feat] = *section; 2019 return 0; 2020 } 2021 2022 static int save_section_info(struct perf_inject *inject) 2023 { 2024 struct perf_header *header = &inject->session->header; 2025 int fd = perf_data__fd(inject->session->data); 2026 2027 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 2028 } 2029 2030 static bool keep_feat(int feat) 2031 { 2032 switch (feat) { 2033 /* Keep original information that describes the machine or software */ 2034 case HEADER_TRACING_DATA: 2035 case HEADER_HOSTNAME: 2036 case HEADER_OSRELEASE: 2037 case HEADER_VERSION: 2038 case HEADER_ARCH: 2039 case HEADER_NRCPUS: 2040 case HEADER_CPUDESC: 2041 case HEADER_CPUID: 2042 case HEADER_TOTAL_MEM: 2043 case HEADER_CPU_TOPOLOGY: 2044 case HEADER_NUMA_TOPOLOGY: 2045 case HEADER_PMU_MAPPINGS: 2046 case HEADER_CACHE: 2047 case HEADER_MEM_TOPOLOGY: 2048 case HEADER_CLOCKID: 2049 case HEADER_BPF_PROG_INFO: 2050 case HEADER_BPF_BTF: 2051 case HEADER_CPU_PMU_CAPS: 2052 case HEADER_CLOCK_DATA: 2053 case HEADER_HYBRID_TOPOLOGY: 2054 case HEADER_PMU_CAPS: 2055 return true; 2056 /* Information that can be updated */ 2057 case HEADER_BUILD_ID: 2058 case HEADER_CMDLINE: 2059 case HEADER_EVENT_DESC: 2060 case HEADER_BRANCH_STACK: 2061 case HEADER_GROUP_DESC: 2062 case HEADER_AUXTRACE: 2063 case HEADER_STAT: 2064 case HEADER_SAMPLE_TIME: 2065 case HEADER_DIR_FORMAT: 2066 case HEADER_COMPRESSED: 2067 default: 2068 return false; 2069 }; 2070 } 2071 2072 static int read_file(int fd, u64 offs, void *buf, size_t sz) 2073 { 2074 ssize_t ret = preadn(fd, buf, sz, offs); 2075 2076 if (ret < 0) 2077 return -errno; 2078 if ((size_t)ret != sz) 2079 return -EINVAL; 2080 return 0; 2081 } 2082 2083 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 2084 { 2085 int fd = perf_data__fd(inject->session->data); 2086 u64 offs = inject->secs[feat].offset; 2087 size_t sz = inject->secs[feat].size; 2088 void *buf = malloc(sz); 2089 int ret; 2090 2091 if (!buf) 2092 return -ENOMEM; 2093 2094 ret = read_file(fd, offs, buf, sz); 2095 if (ret) 2096 goto out_free; 2097 2098 ret = fw->write(fw, buf, sz); 2099 out_free: 2100 free(buf); 2101 return ret; 2102 } 2103 2104 struct inject_fc { 2105 struct feat_copier fc; 2106 struct perf_inject *inject; 2107 }; 2108 2109 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 2110 { 2111 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 2112 struct perf_inject *inject = inj_fc->inject; 2113 int ret; 2114 2115 if (!inject->secs[feat].offset || 2116 !keep_feat(feat)) 2117 return 0; 2118 2119 ret = feat_copy(inject, feat, fw); 2120 if (ret < 0) 2121 return ret; 2122 2123 return 1; /* Feature section copied */ 2124 } 2125 2126 static int copy_kcore_dir(struct perf_inject *inject) 2127 { 2128 char *cmd; 2129 int ret; 2130 2131 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 2132 inject->input_name, inject->output.path); 2133 if (ret < 0) 2134 return ret; 2135 pr_debug("%s\n", cmd); 2136 ret = system(cmd); 2137 free(cmd); 2138 return ret; 2139 } 2140 2141 static int guest_session__copy_kcore_dir(struct guest_session *gs) 2142 { 2143 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 2144 char *cmd; 2145 int ret; 2146 2147 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 2148 gs->perf_data_file, inject->output.path, gs->machine_pid); 2149 if (ret < 0) 2150 return ret; 2151 pr_debug("%s\n", cmd); 2152 ret = system(cmd); 2153 free(cmd); 2154 return ret; 2155 } 2156 2157 static int output_fd(struct perf_inject *inject) 2158 { 2159 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 2160 } 2161 2162 static int __cmd_inject(struct perf_inject *inject) 2163 { 2164 int ret = -EINVAL; 2165 struct guest_session *gs = &inject->guest_session; 2166 struct perf_session *session = inject->session; 2167 int fd = output_fd(inject); 2168 u64 output_data_offset = perf_session__data_offset(session->evlist); 2169 /* 2170 * Pipe input hasn't loaded the attributes and will handle them as 2171 * events. So that the attributes don't overlap the data, write the 2172 * attributes after the data. 2173 */ 2174 bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe; 2175 2176 signal(SIGINT, sig_handler); 2177 2178 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat || 2179 inject->itrace_synth_opts.set) { 2180 inject->tool.mmap = perf_event__repipe_mmap; 2181 inject->tool.mmap2 = perf_event__repipe_mmap2; 2182 inject->tool.fork = perf_event__repipe_fork; 2183 #ifdef HAVE_LIBTRACEEVENT 2184 inject->tool.tracing_data = perf_event__repipe_tracing_data; 2185 #endif 2186 } 2187 2188 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2189 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2190 inject->tool.sample = perf_event__inject_buildid; 2191 } else if (inject->sched_stat) { 2192 struct evsel *evsel; 2193 2194 evlist__for_each_entry(session->evlist, evsel) { 2195 const char *name = evsel__name(evsel); 2196 2197 if (!strcmp(name, "sched:sched_switch")) { 2198 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 2199 return -EINVAL; 2200 2201 evsel->handler = perf_inject__sched_switch; 2202 } else if (!strcmp(name, "sched:sched_process_exit")) 2203 evsel->handler = perf_inject__sched_process_exit; 2204 #ifdef HAVE_LIBTRACEEVENT 2205 else if (!strncmp(name, "sched:sched_stat_", 17)) 2206 evsel->handler = perf_inject__sched_stat; 2207 #endif 2208 } 2209 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2210 session->itrace_synth_opts = &inject->itrace_synth_opts; 2211 memset(&inject->tool, 0, sizeof(inject->tool)); 2212 inject->tool.id_index = perf_event__process_id_index; 2213 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2214 inject->tool.auxtrace = perf_event__process_auxtrace; 2215 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2216 inject->tool.ordered_events = true; 2217 inject->tool.ordering_requires_timestamps = true; 2218 } else if (inject->itrace_synth_opts.set) { 2219 session->itrace_synth_opts = &inject->itrace_synth_opts; 2220 inject->itrace_synth_opts.inject = true; 2221 inject->tool.comm = perf_event__repipe_comm; 2222 inject->tool.namespaces = perf_event__repipe_namespaces; 2223 inject->tool.exit = perf_event__repipe_exit; 2224 inject->tool.id_index = perf_event__process_id_index; 2225 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2226 inject->tool.auxtrace = perf_event__process_auxtrace; 2227 inject->tool.aux = perf_event__drop_aux; 2228 inject->tool.itrace_start = perf_event__drop_aux; 2229 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2230 inject->tool.ordered_events = true; 2231 inject->tool.ordering_requires_timestamps = true; 2232 /* Allow space in the header for new attributes */ 2233 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2234 if (inject->strip) 2235 strip_init(inject); 2236 } else if (gs->perf_data_file) { 2237 char *name = gs->perf_data_file; 2238 2239 /* 2240 * Not strictly necessary, but keep these events in order wrt 2241 * guest events. 2242 */ 2243 inject->tool.mmap = host__repipe; 2244 inject->tool.mmap2 = host__repipe; 2245 inject->tool.comm = host__repipe; 2246 inject->tool.fork = host__repipe; 2247 inject->tool.exit = host__repipe; 2248 inject->tool.lost = host__repipe; 2249 inject->tool.context_switch = host__repipe; 2250 inject->tool.ksymbol = host__repipe; 2251 inject->tool.text_poke = host__repipe; 2252 /* 2253 * Once the host session has initialized, set up sample ID 2254 * mapping and feed in guest attrs, build IDs and initial 2255 * events. 2256 */ 2257 inject->tool.finished_init = host__finished_init; 2258 /* Obey finished round ordering */ 2259 inject->tool.finished_round = host__finished_round; 2260 /* Keep track of which CPU a VCPU is runnng on */ 2261 inject->tool.context_switch = host__context_switch; 2262 /* 2263 * Must order events to be able to obey finished round 2264 * ordering. 2265 */ 2266 inject->tool.ordered_events = true; 2267 inject->tool.ordering_requires_timestamps = true; 2268 /* Set up a separate session to process guest perf.data file */ 2269 ret = guest_session__start(gs, name, session->data->force); 2270 if (ret) { 2271 pr_err("Failed to process %s, error %d\n", name, ret); 2272 return ret; 2273 } 2274 /* Allow space in the header for guest attributes */ 2275 output_data_offset += gs->session->header.data_offset; 2276 output_data_offset = roundup(output_data_offset, 4096); 2277 } 2278 2279 if (!inject->itrace_synth_opts.set) 2280 auxtrace_index__free(&session->auxtrace_index); 2281 2282 if (!inject->output.is_pipe && !inject->in_place_update) 2283 lseek(fd, output_data_offset, SEEK_SET); 2284 2285 ret = perf_session__process_events(session); 2286 if (ret) 2287 return ret; 2288 2289 if (gs->session) { 2290 /* 2291 * Remaining guest events have later timestamps. Flush them 2292 * out to file. 2293 */ 2294 ret = guest_session__flush_events(gs); 2295 if (ret) { 2296 pr_err("Failed to flush guest events\n"); 2297 return ret; 2298 } 2299 } 2300 2301 if (!inject->output.is_pipe && !inject->in_place_update) { 2302 struct inject_fc inj_fc = { 2303 .fc.copy = feat_copy_cb, 2304 .inject = inject, 2305 }; 2306 2307 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2308 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) 2309 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 2310 /* 2311 * Keep all buildids when there is unprocessed AUX data because 2312 * it is not known which ones the AUX trace hits. 2313 */ 2314 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2315 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2316 perf_session__dsos_hit_all(session); 2317 /* 2318 * The AUX areas have been removed and replaced with 2319 * synthesized hardware events, so clear the feature flag. 2320 */ 2321 if (inject->itrace_synth_opts.set) { 2322 perf_header__clear_feat(&session->header, 2323 HEADER_AUXTRACE); 2324 if (inject->itrace_synth_opts.last_branch || 2325 inject->itrace_synth_opts.add_last_branch) 2326 perf_header__set_feat(&session->header, 2327 HEADER_BRANCH_STACK); 2328 } 2329 session->header.data_offset = output_data_offset; 2330 session->header.data_size = inject->bytes_written; 2331 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, 2332 write_attrs_after_data); 2333 2334 if (inject->copy_kcore_dir) { 2335 ret = copy_kcore_dir(inject); 2336 if (ret) { 2337 pr_err("Failed to copy kcore\n"); 2338 return ret; 2339 } 2340 } 2341 if (gs->copy_kcore_dir) { 2342 ret = guest_session__copy_kcore_dir(gs); 2343 if (ret) { 2344 pr_err("Failed to copy guest kcore\n"); 2345 return ret; 2346 } 2347 } 2348 } 2349 2350 return ret; 2351 } 2352 2353 int cmd_inject(int argc, const char **argv) 2354 { 2355 struct perf_inject inject = { 2356 .input_name = "-", 2357 .samples = LIST_HEAD_INIT(inject.samples), 2358 .output = { 2359 .path = "-", 2360 .mode = PERF_DATA_MODE_WRITE, 2361 .use_stdio = true, 2362 }, 2363 }; 2364 struct perf_data data = { 2365 .mode = PERF_DATA_MODE_READ, 2366 .use_stdio = true, 2367 }; 2368 int ret; 2369 const char *known_build_ids = NULL; 2370 bool build_ids; 2371 bool build_id_all; 2372 bool mmap2_build_ids; 2373 bool mmap2_build_id_all; 2374 2375 struct option options[] = { 2376 OPT_BOOLEAN('b', "build-ids", &build_ids, 2377 "Inject build-ids into the output stream"), 2378 OPT_BOOLEAN(0, "buildid-all", &build_id_all, 2379 "Inject build-ids of all DSOs into the output stream"), 2380 OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids, 2381 "Drop unused mmap events, make others mmap2 with build IDs"), 2382 OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all, 2383 "Rewrite all mmap events as mmap2 events with build IDs"), 2384 OPT_STRING(0, "known-build-ids", &known_build_ids, 2385 "buildid path [,buildid path...]", 2386 "build-ids to use for given paths"), 2387 OPT_STRING('i', "input", &inject.input_name, "file", 2388 "input file name"), 2389 OPT_STRING('o', "output", &inject.output.path, "file", 2390 "output file name"), 2391 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2392 "Merge sched-stat and sched-switch for getting events " 2393 "where and how long tasks slept"), 2394 #ifdef HAVE_JITDUMP 2395 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2396 #endif 2397 OPT_INCR('v', "verbose", &verbose, 2398 "be more verbose (show build ids, etc)"), 2399 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2400 "file", "vmlinux pathname"), 2401 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2402 "don't load vmlinux even if found"), 2403 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2404 "kallsyms pathname"), 2405 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2406 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2407 NULL, "opts", "Instruction Tracing options\n" 2408 ITRACE_HELP, 2409 itrace_parse_synth_opts), 2410 OPT_BOOLEAN(0, "strip", &inject.strip, 2411 "strip non-synthesized events (use with --itrace)"), 2412 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2413 "correlate time between VM guests and the host", 2414 parse_vm_time_correlation), 2415 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2416 "inject events from a guest perf.data file", 2417 parse_guest_data), 2418 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2419 "guest mount directory under which every guest os" 2420 " instance has a subdir"), 2421 OPT_END() 2422 }; 2423 const char * const inject_usage[] = { 2424 "perf inject [<options>]", 2425 NULL 2426 }; 2427 bool ordered_events; 2428 2429 if (!inject.itrace_synth_opts.set) { 2430 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2431 symbol_conf.lazy_load_kernel_maps = true; 2432 } 2433 2434 #ifndef HAVE_JITDUMP 2435 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2436 #endif 2437 argc = parse_options(argc, argv, options, inject_usage, 0); 2438 2439 /* 2440 * Any (unrecognized) arguments left? 2441 */ 2442 if (argc) 2443 usage_with_options(inject_usage, options); 2444 2445 if (inject.strip && !inject.itrace_synth_opts.set) { 2446 pr_err("--strip option requires --itrace option\n"); 2447 return -1; 2448 } 2449 2450 if (symbol__validate_sym_arguments()) 2451 return -1; 2452 2453 if (inject.in_place_update) { 2454 if (!strcmp(inject.input_name, "-")) { 2455 pr_err("Input file name required for in-place updating\n"); 2456 return -1; 2457 } 2458 if (strcmp(inject.output.path, "-")) { 2459 pr_err("Output file name must not be specified for in-place updating\n"); 2460 return -1; 2461 } 2462 if (!data.force && !inject.in_place_update_dry_run) { 2463 pr_err("The input file would be updated in place, " 2464 "the --force option is required.\n"); 2465 return -1; 2466 } 2467 if (!inject.in_place_update_dry_run) 2468 data.in_place_update = true; 2469 } else { 2470 if (strcmp(inject.output.path, "-") && !inject.strip && 2471 has_kcore_dir(inject.input_name)) { 2472 inject.output.is_dir = true; 2473 inject.copy_kcore_dir = true; 2474 } 2475 if (perf_data__open(&inject.output)) { 2476 perror("failed to create output file"); 2477 return -1; 2478 } 2479 } 2480 if (mmap2_build_ids) 2481 inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY; 2482 if (mmap2_build_id_all) 2483 inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL; 2484 if (build_ids) 2485 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY; 2486 if (build_id_all) 2487 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2488 2489 data.path = inject.input_name; 2490 2491 ordered_events = inject.jit_mode || inject.sched_stat || 2492 inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2493 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY; 2494 perf_tool__init(&inject.tool, ordered_events); 2495 inject.tool.sample = perf_event__repipe_sample; 2496 inject.tool.read = perf_event__repipe_sample; 2497 inject.tool.mmap = perf_event__repipe; 2498 inject.tool.mmap2 = perf_event__repipe; 2499 inject.tool.comm = perf_event__repipe; 2500 inject.tool.namespaces = perf_event__repipe; 2501 inject.tool.cgroup = perf_event__repipe; 2502 inject.tool.fork = perf_event__repipe; 2503 inject.tool.exit = perf_event__repipe; 2504 inject.tool.lost = perf_event__repipe; 2505 inject.tool.lost_samples = perf_event__repipe; 2506 inject.tool.aux = perf_event__repipe; 2507 inject.tool.itrace_start = perf_event__repipe; 2508 inject.tool.aux_output_hw_id = perf_event__repipe; 2509 inject.tool.context_switch = perf_event__repipe; 2510 inject.tool.throttle = perf_event__repipe; 2511 inject.tool.unthrottle = perf_event__repipe; 2512 inject.tool.ksymbol = perf_event__repipe; 2513 inject.tool.bpf = perf_event__repipe; 2514 inject.tool.text_poke = perf_event__repipe; 2515 inject.tool.attr = perf_event__repipe_attr; 2516 inject.tool.event_update = perf_event__repipe_event_update; 2517 inject.tool.tracing_data = perf_event__repipe_op2_synth; 2518 inject.tool.finished_round = perf_event__repipe_oe_synth; 2519 inject.tool.build_id = perf_event__repipe_op2_synth; 2520 inject.tool.id_index = perf_event__repipe_op2_synth; 2521 inject.tool.auxtrace_info = perf_event__repipe_op2_synth; 2522 inject.tool.auxtrace_error = perf_event__repipe_op2_synth; 2523 inject.tool.time_conv = perf_event__repipe_op2_synth; 2524 inject.tool.thread_map = perf_event__repipe_op2_synth; 2525 inject.tool.cpu_map = perf_event__repipe_op2_synth; 2526 inject.tool.stat_config = perf_event__repipe_op2_synth; 2527 inject.tool.stat = perf_event__repipe_op2_synth; 2528 inject.tool.stat_round = perf_event__repipe_op2_synth; 2529 inject.tool.feature = perf_event__repipe_op2_synth; 2530 inject.tool.finished_init = perf_event__repipe_op2_synth; 2531 inject.tool.compressed = perf_event__repipe_op4_synth; 2532 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2533 inject.tool.dont_split_sample_group = true; 2534 inject.session = __perf_session__new(&data, &inject.tool, 2535 /*trace_event_repipe=*/inject.output.is_pipe); 2536 2537 if (IS_ERR(inject.session)) { 2538 ret = PTR_ERR(inject.session); 2539 goto out_close_output; 2540 } 2541 2542 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2543 pr_warning("Decompression initialization failed.\n"); 2544 2545 /* Save original section info before feature bits change */ 2546 ret = save_section_info(&inject); 2547 if (ret) 2548 goto out_delete; 2549 2550 if (inject.output.is_pipe) { 2551 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2552 if (ret < 0) { 2553 pr_err("Couldn't write a new pipe header.\n"); 2554 goto out_delete; 2555 } 2556 2557 /* 2558 * If the input is already a pipe then the features and 2559 * attributes don't need synthesizing, they will be present in 2560 * the input. 2561 */ 2562 if (!data.is_pipe) { 2563 ret = perf_event__synthesize_for_pipe(&inject.tool, 2564 inject.session, 2565 &inject.output, 2566 perf_event__repipe); 2567 if (ret < 0) 2568 goto out_delete; 2569 } 2570 } 2571 2572 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2573 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2574 /* 2575 * to make sure the mmap records are ordered correctly 2576 * and so that the correct especially due to jitted code 2577 * mmaps. We cannot generate the buildid hit list and 2578 * inject the jit mmaps at the same time for now. 2579 */ 2580 inject.tool.ordering_requires_timestamps = true; 2581 } 2582 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) { 2583 inject.known_build_ids = 2584 perf_inject__parse_known_build_ids(known_build_ids); 2585 2586 if (inject.known_build_ids == NULL) { 2587 pr_err("Couldn't parse known build ids.\n"); 2588 goto out_delete; 2589 } 2590 } 2591 2592 #ifdef HAVE_JITDUMP 2593 if (inject.jit_mode) { 2594 inject.tool.mmap2 = perf_event__repipe_mmap2; 2595 inject.tool.mmap = perf_event__repipe_mmap; 2596 inject.tool.ordering_requires_timestamps = true; 2597 /* 2598 * JIT MMAP injection injects all MMAP events in one go, so it 2599 * does not obey finished_round semantics. 2600 */ 2601 inject.tool.finished_round = perf_event__drop_oe; 2602 } 2603 #endif 2604 ret = symbol__init(&inject.session->header.env); 2605 if (ret < 0) 2606 goto out_delete; 2607 2608 ret = __cmd_inject(&inject); 2609 2610 guest_session__exit(&inject.guest_session); 2611 2612 out_delete: 2613 strlist__delete(inject.known_build_ids); 2614 zstd_fini(&(inject.session->zstd_data)); 2615 perf_session__delete(inject.session); 2616 out_close_output: 2617 if (!inject.in_place_update) 2618 perf_data__close(&inject.output); 2619 free(inject.itrace_synth_opts.vm_tm_corr_args); 2620 free(inject.event_copy); 2621 free(inject.guest_session.ev.event_buf); 2622 return ret; 2623 } 2624