1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include "util/dso.h" 13 #include "util/vdso.h" 14 #include "util/evlist.h" 15 #include "util/evsel.h" 16 #include "util/map.h" 17 #include "util/session.h" 18 #include "util/tool.h" 19 #include "util/debug.h" 20 #include "util/build-id.h" 21 #include "util/data.h" 22 #include "util/auxtrace.h" 23 #include "util/jit.h" 24 #include "util/string2.h" 25 #include "util/symbol.h" 26 #include "util/synthetic-events.h" 27 #include "util/thread.h" 28 #include "util/namespaces.h" 29 #include "util/util.h" 30 #include "util/tsc.h" 31 32 #include <internal/lib.h> 33 34 #include <linux/err.h> 35 #include <subcmd/parse-options.h> 36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38 #include <linux/list.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <linux/hash.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <inttypes.h> 46 47 struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char *event_buf; 51 }; 52 53 struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59 }; 60 61 struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67 }; 68 69 struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74 }; 75 76 struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104 }; 105 106 enum build_id_rewrite_style { 107 BID_RWS__NONE = 0, 108 BID_RWS__INJECT_HEADER_LAZY, 109 BID_RWS__INJECT_HEADER_ALL, 110 BID_RWS__MMAP2_BUILDID_ALL, 111 BID_RWS__MMAP2_BUILDID_LAZY, 112 }; 113 114 struct perf_inject { 115 struct perf_tool tool; 116 struct perf_session *session; 117 enum build_id_rewrite_style build_id_style; 118 bool sched_stat; 119 bool have_auxtrace; 120 bool strip; 121 bool jit_mode; 122 bool in_place_update; 123 bool in_place_update_dry_run; 124 bool copy_kcore_dir; 125 const char *input_name; 126 struct perf_data output; 127 u64 bytes_written; 128 u64 aux_id; 129 struct list_head samples; 130 struct itrace_synth_opts itrace_synth_opts; 131 char *event_copy; 132 struct perf_file_section secs[HEADER_FEAT_BITS]; 133 struct guest_session guest_session; 134 struct strlist *known_build_ids; 135 const struct evsel *mmap_evsel; 136 }; 137 138 struct event_entry { 139 struct list_head node; 140 u32 tid; 141 union perf_event event[]; 142 }; 143 144 static int tool__inject_build_id(const struct perf_tool *tool, 145 struct perf_sample *sample, 146 struct machine *machine, 147 const struct evsel *evsel, 148 __u16 misc, 149 const char *filename, 150 struct dso *dso, u32 flags); 151 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 152 struct perf_sample *sample, 153 struct machine *machine, 154 const struct evsel *evsel, 155 __u16 misc, 156 __u32 pid, __u32 tid, 157 __u64 start, __u64 len, __u64 pgoff, 158 struct dso *dso, 159 __u32 prot, __u32 flags, 160 const char *filename); 161 162 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 163 { 164 ssize_t size; 165 166 size = perf_data__write(&inject->output, buf, sz); 167 if (size < 0) 168 return -errno; 169 170 inject->bytes_written += size; 171 return 0; 172 } 173 174 static int perf_event__repipe_synth(const struct perf_tool *tool, 175 union perf_event *event) 176 177 { 178 struct perf_inject *inject = container_of(tool, struct perf_inject, 179 tool); 180 181 return output_bytes(inject, event, event->header.size); 182 } 183 184 static int perf_event__repipe_oe_synth(const struct perf_tool *tool, 185 union perf_event *event, 186 struct ordered_events *oe __maybe_unused) 187 { 188 return perf_event__repipe_synth(tool, event); 189 } 190 191 #ifdef HAVE_JITDUMP 192 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused, 193 union perf_event *event __maybe_unused, 194 struct ordered_events *oe __maybe_unused) 195 { 196 return 0; 197 } 198 #endif 199 200 static int perf_event__repipe_op2_synth(struct perf_session *session, 201 union perf_event *event) 202 { 203 return perf_event__repipe_synth(session->tool, event); 204 } 205 206 static int perf_event__repipe_op4_synth(struct perf_session *session, 207 union perf_event *event, 208 u64 data __maybe_unused, 209 const char *str __maybe_unused) 210 { 211 return perf_event__repipe_synth(session->tool, event); 212 } 213 214 static int perf_event__repipe_attr(const struct perf_tool *tool, 215 union perf_event *event, 216 struct evlist **pevlist) 217 { 218 struct perf_inject *inject = container_of(tool, struct perf_inject, 219 tool); 220 int ret; 221 222 ret = perf_event__process_attr(tool, event, pevlist); 223 if (ret) 224 return ret; 225 226 /* If the output isn't a pipe then the attributes will be written as part of the header. */ 227 if (!inject->output.is_pipe) 228 return 0; 229 230 return perf_event__repipe_synth(tool, event); 231 } 232 233 static int perf_event__repipe_event_update(const struct perf_tool *tool, 234 union perf_event *event, 235 struct evlist **pevlist __maybe_unused) 236 { 237 return perf_event__repipe_synth(tool, event); 238 } 239 240 #ifdef HAVE_AUXTRACE_SUPPORT 241 242 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 243 { 244 char buf[4096]; 245 ssize_t ssz; 246 int ret; 247 248 while (size > 0) { 249 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 250 if (ssz < 0) 251 return -errno; 252 ret = output_bytes(inject, buf, ssz); 253 if (ret) 254 return ret; 255 size -= ssz; 256 } 257 258 return 0; 259 } 260 261 static s64 perf_event__repipe_auxtrace(struct perf_session *session, 262 union perf_event *event) 263 { 264 const struct perf_tool *tool = session->tool; 265 struct perf_inject *inject = container_of(tool, struct perf_inject, 266 tool); 267 int ret; 268 269 inject->have_auxtrace = true; 270 271 if (!inject->output.is_pipe) { 272 off_t offset; 273 274 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 275 if (offset == -1) 276 return -errno; 277 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 278 event, offset); 279 if (ret < 0) 280 return ret; 281 } 282 283 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 284 ret = output_bytes(inject, event, event->header.size); 285 if (ret < 0) 286 return ret; 287 ret = copy_bytes(inject, session->data, 288 event->auxtrace.size); 289 } else { 290 ret = output_bytes(inject, event, 291 event->header.size + event->auxtrace.size); 292 } 293 if (ret < 0) 294 return ret; 295 296 return event->auxtrace.size; 297 } 298 299 #else 300 301 static s64 302 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, 303 union perf_event *event __maybe_unused) 304 { 305 pr_err("AUX area tracing not supported\n"); 306 return -EINVAL; 307 } 308 309 #endif 310 311 static int perf_event__repipe(const struct perf_tool *tool, 312 union perf_event *event, 313 struct perf_sample *sample __maybe_unused, 314 struct machine *machine __maybe_unused) 315 { 316 return perf_event__repipe_synth(tool, event); 317 } 318 319 static int perf_event__drop(const struct perf_tool *tool __maybe_unused, 320 union perf_event *event __maybe_unused, 321 struct perf_sample *sample __maybe_unused, 322 struct machine *machine __maybe_unused) 323 { 324 return 0; 325 } 326 327 static int perf_event__drop_aux(const struct perf_tool *tool, 328 union perf_event *event __maybe_unused, 329 struct perf_sample *sample, 330 struct machine *machine __maybe_unused) 331 { 332 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 333 334 if (!inject->aux_id) 335 inject->aux_id = sample->id; 336 337 return 0; 338 } 339 340 static union perf_event * 341 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 342 union perf_event *event, 343 struct perf_sample *sample) 344 { 345 size_t sz1 = sample->aux_sample.data - (void *)event; 346 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 347 union perf_event *ev; 348 349 if (inject->event_copy == NULL) { 350 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 351 if (!inject->event_copy) 352 return ERR_PTR(-ENOMEM); 353 } 354 ev = (union perf_event *)inject->event_copy; 355 if (sz1 > event->header.size || sz2 > event->header.size || 356 sz1 + sz2 > event->header.size || 357 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 358 return event; 359 360 memcpy(ev, event, sz1); 361 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 362 ev->header.size = sz1 + sz2; 363 ((u64 *)((void *)ev + sz1))[-1] = 0; 364 365 return ev; 366 } 367 368 typedef int (*inject_handler)(const struct perf_tool *tool, 369 union perf_event *event, 370 struct perf_sample *sample, 371 struct evsel *evsel, 372 struct machine *machine); 373 374 static int perf_event__repipe_sample(const struct perf_tool *tool, 375 union perf_event *event, 376 struct perf_sample *sample, 377 struct evsel *evsel, 378 struct machine *machine) 379 { 380 struct perf_inject *inject = container_of(tool, struct perf_inject, 381 tool); 382 383 if (evsel && evsel->handler) { 384 inject_handler f = evsel->handler; 385 return f(tool, event, sample, evsel, machine); 386 } 387 388 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 389 390 if (inject->itrace_synth_opts.set && sample->aux_sample.size) { 391 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 392 if (IS_ERR(event)) 393 return PTR_ERR(event); 394 } 395 396 return perf_event__repipe_synth(tool, event); 397 } 398 399 static struct dso *findnew_dso(int pid, int tid, const char *filename, 400 const struct dso_id *id, struct machine *machine) 401 { 402 struct thread *thread; 403 struct nsinfo *nsi = NULL; 404 struct nsinfo *nnsi; 405 struct dso *dso; 406 bool vdso; 407 408 thread = machine__findnew_thread(machine, pid, tid); 409 if (thread == NULL) { 410 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 411 return NULL; 412 } 413 414 vdso = is_vdso_map(filename); 415 nsi = nsinfo__get(thread__nsinfo(thread)); 416 417 if (vdso) { 418 /* The vdso maps are always on the host and not the 419 * container. Ensure that we don't use setns to look 420 * them up. 421 */ 422 nnsi = nsinfo__copy(nsi); 423 if (nnsi) { 424 nsinfo__put(nsi); 425 nsinfo__clear_need_setns(nnsi); 426 nsi = nnsi; 427 } 428 dso = machine__findnew_vdso(machine, thread); 429 } else { 430 dso = machine__findnew_dso_id(machine, filename, id); 431 } 432 433 if (dso) { 434 mutex_lock(dso__lock(dso)); 435 dso__set_nsinfo(dso, nsi); 436 mutex_unlock(dso__lock(dso)); 437 } else 438 nsinfo__put(nsi); 439 440 thread__put(thread); 441 return dso; 442 } 443 444 /* 445 * The evsel used for the sample ID for mmap events. Typically stashed when 446 * processing mmap events. If not stashed, search the evlist for the first mmap 447 * gathering event. 448 */ 449 static const struct evsel *inject__mmap_evsel(struct perf_inject *inject) 450 { 451 struct evsel *pos; 452 453 if (inject->mmap_evsel) 454 return inject->mmap_evsel; 455 456 evlist__for_each_entry(inject->session->evlist, pos) { 457 if (pos->core.attr.mmap) { 458 inject->mmap_evsel = pos; 459 return pos; 460 } 461 } 462 pr_err("No mmap events found\n"); 463 return NULL; 464 } 465 466 static int perf_event__repipe_common_mmap(const struct perf_tool *tool, 467 union perf_event *event, 468 struct perf_sample *sample, 469 struct machine *machine, 470 __u32 pid, __u32 tid, 471 __u64 start, __u64 len, __u64 pgoff, 472 __u32 flags, __u32 prot, 473 const char *filename, 474 const struct dso_id *dso_id, 475 int (*perf_event_process)(const struct perf_tool *tool, 476 union perf_event *event, 477 struct perf_sample *sample, 478 struct machine *machine)) 479 { 480 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 481 struct dso *dso = NULL; 482 bool dso_sought = false; 483 484 #ifdef HAVE_JITDUMP 485 if (inject->jit_mode) { 486 u64 n = 0; 487 int ret; 488 489 /* If jit marker, then inject jit mmaps and generate ELF images. */ 490 ret = jit_process(inject->session, &inject->output, machine, 491 filename, pid, tid, &n); 492 if (ret < 0) 493 return ret; 494 if (ret) { 495 inject->bytes_written += n; 496 return 0; 497 } 498 } 499 #endif 500 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 501 dso = findnew_dso(pid, tid, filename, dso_id, machine); 502 dso_sought = true; 503 if (dso) { 504 /* mark it not to inject build-id */ 505 dso__set_hit(dso); 506 } 507 } 508 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) { 509 if (!dso_sought) { 510 dso = findnew_dso(pid, tid, filename, dso_id, machine); 511 dso_sought = true; 512 } 513 514 if (dso && !dso__hit(dso)) { 515 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 516 517 if (evsel) { 518 dso__set_hit(dso); 519 tool__inject_build_id(tool, sample, machine, evsel, 520 /*misc=*/sample->cpumode, 521 filename, dso, flags); 522 } 523 } 524 } else { 525 int err; 526 527 /* 528 * Remember the evsel for lazy build id generation. It is used 529 * for the sample id header type. 530 */ 531 if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 532 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) && 533 !inject->mmap_evsel) 534 inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event); 535 536 /* Create the thread, map, etc. Not done for the unordered inject all case. */ 537 err = perf_event_process(tool, event, sample, machine); 538 539 if (err) { 540 dso__put(dso); 541 return err; 542 } 543 } 544 if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) && 545 !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 546 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 547 548 if (evsel && !dso_sought) { 549 dso = findnew_dso(pid, tid, filename, dso_id, machine); 550 dso_sought = true; 551 } 552 if (evsel && dso && 553 !tool__inject_mmap2_build_id(tool, sample, machine, evsel, 554 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID, 555 pid, tid, start, len, pgoff, 556 dso, 557 prot, flags, 558 filename)) { 559 /* Injected mmap2 so no need to repipe. */ 560 dso__put(dso); 561 return 0; 562 } 563 } 564 dso__put(dso); 565 if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) 566 return 0; 567 568 return perf_event__repipe(tool, event, sample, machine); 569 } 570 571 static int perf_event__repipe_mmap(const struct perf_tool *tool, 572 union perf_event *event, 573 struct perf_sample *sample, 574 struct machine *machine) 575 { 576 return perf_event__repipe_common_mmap( 577 tool, event, sample, machine, 578 event->mmap.pid, event->mmap.tid, 579 event->mmap.start, event->mmap.len, event->mmap.pgoff, 580 /*flags=*/0, PROT_EXEC, 581 event->mmap.filename, /*dso_id=*/NULL, 582 perf_event__process_mmap); 583 } 584 585 static int perf_event__repipe_mmap2(const struct perf_tool *tool, 586 union perf_event *event, 587 struct perf_sample *sample, 588 struct machine *machine) 589 { 590 struct dso_id id = dso_id_empty; 591 592 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 593 build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); 594 } else { 595 id.maj = event->mmap2.maj; 596 id.min = event->mmap2.min; 597 id.ino = event->mmap2.ino; 598 id.ino_generation = event->mmap2.ino_generation; 599 id.mmap2_valid = true; 600 id.mmap2_ino_generation_valid = true; 601 } 602 603 return perf_event__repipe_common_mmap( 604 tool, event, sample, machine, 605 event->mmap2.pid, event->mmap2.tid, 606 event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, 607 event->mmap2.flags, event->mmap2.prot, 608 event->mmap2.filename, &id, 609 perf_event__process_mmap2); 610 } 611 612 static int perf_event__repipe_fork(const struct perf_tool *tool, 613 union perf_event *event, 614 struct perf_sample *sample, 615 struct machine *machine) 616 { 617 int err; 618 619 err = perf_event__process_fork(tool, event, sample, machine); 620 perf_event__repipe(tool, event, sample, machine); 621 622 return err; 623 } 624 625 static int perf_event__repipe_comm(const struct perf_tool *tool, 626 union perf_event *event, 627 struct perf_sample *sample, 628 struct machine *machine) 629 { 630 int err; 631 632 err = perf_event__process_comm(tool, event, sample, machine); 633 perf_event__repipe(tool, event, sample, machine); 634 635 return err; 636 } 637 638 static int perf_event__repipe_namespaces(const struct perf_tool *tool, 639 union perf_event *event, 640 struct perf_sample *sample, 641 struct machine *machine) 642 { 643 int err = perf_event__process_namespaces(tool, event, sample, machine); 644 645 perf_event__repipe(tool, event, sample, machine); 646 647 return err; 648 } 649 650 static int perf_event__repipe_exit(const struct perf_tool *tool, 651 union perf_event *event, 652 struct perf_sample *sample, 653 struct machine *machine) 654 { 655 int err; 656 657 err = perf_event__process_exit(tool, event, sample, machine); 658 perf_event__repipe(tool, event, sample, machine); 659 660 return err; 661 } 662 663 #ifdef HAVE_LIBTRACEEVENT 664 static int perf_event__repipe_tracing_data(struct perf_session *session, 665 union perf_event *event) 666 { 667 perf_event__repipe_synth(session->tool, event); 668 669 return perf_event__process_tracing_data(session, event); 670 } 671 #endif 672 673 static int dso__read_build_id(struct dso *dso) 674 { 675 struct nscookie nsc; 676 struct build_id bid = { .size = 0, }; 677 678 if (dso__has_build_id(dso)) 679 return 0; 680 681 mutex_lock(dso__lock(dso)); 682 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 683 if (filename__read_build_id(dso__long_name(dso), &bid) > 0) 684 dso__set_build_id(dso, &bid); 685 else if (dso__nsinfo(dso)) { 686 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); 687 688 if (new_name && filename__read_build_id(new_name, &bid) > 0) 689 dso__set_build_id(dso, &bid); 690 free(new_name); 691 } 692 nsinfo__mountns_exit(&nsc); 693 mutex_unlock(dso__lock(dso)); 694 695 return dso__has_build_id(dso) ? 0 : -1; 696 } 697 698 static struct strlist *perf_inject__parse_known_build_ids( 699 const char *known_build_ids_string) 700 { 701 struct str_node *pos, *tmp; 702 struct strlist *known_build_ids; 703 int bid_len; 704 705 known_build_ids = strlist__new(known_build_ids_string, NULL); 706 if (known_build_ids == NULL) 707 return NULL; 708 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 709 const char *build_id, *dso_name; 710 711 build_id = skip_spaces(pos->s); 712 dso_name = strchr(build_id, ' '); 713 if (dso_name == NULL) { 714 strlist__remove(known_build_ids, pos); 715 continue; 716 } 717 bid_len = dso_name - pos->s; 718 dso_name = skip_spaces(dso_name); 719 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 720 strlist__remove(known_build_ids, pos); 721 continue; 722 } 723 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 724 if (!isxdigit(build_id[2 * ix]) || 725 !isxdigit(build_id[2 * ix + 1])) { 726 strlist__remove(known_build_ids, pos); 727 break; 728 } 729 } 730 } 731 return known_build_ids; 732 } 733 734 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 735 struct dso *dso) 736 { 737 struct str_node *pos; 738 739 strlist__for_each_entry(pos, inject->known_build_ids) { 740 struct build_id bid; 741 const char *build_id, *dso_name; 742 size_t bid_len; 743 744 build_id = skip_spaces(pos->s); 745 dso_name = strchr(build_id, ' '); 746 bid_len = dso_name - pos->s; 747 if (bid_len > sizeof(bid.data)) 748 bid_len = sizeof(bid.data); 749 dso_name = skip_spaces(dso_name); 750 if (strcmp(dso__long_name(dso), dso_name)) 751 continue; 752 for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) { 753 bid.data[ix] = (hex(build_id[2 * ix]) << 4 | 754 hex(build_id[2 * ix + 1])); 755 } 756 bid.size = bid_len / 2; 757 dso__set_build_id(dso, &bid); 758 return true; 759 } 760 return false; 761 } 762 763 static int tool__inject_build_id(const struct perf_tool *tool, 764 struct perf_sample *sample, 765 struct machine *machine, 766 const struct evsel *evsel, 767 __u16 misc, 768 const char *filename, 769 struct dso *dso, u32 flags) 770 { 771 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 772 int err; 773 774 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 775 return 0; 776 if (is_no_dso_memory(filename)) 777 return 0; 778 779 if (inject->known_build_ids != NULL && 780 perf_inject__lookup_known_build_id(inject, dso)) 781 return 1; 782 783 if (dso__read_build_id(dso) < 0) { 784 pr_debug("no build_id found for %s\n", filename); 785 return -1; 786 } 787 788 err = perf_event__synthesize_build_id(tool, sample, machine, 789 perf_event__repipe, 790 evsel, misc, dso__bid(dso), 791 filename); 792 if (err) { 793 pr_err("Can't synthesize build_id event for %s\n", filename); 794 return -1; 795 } 796 797 return 0; 798 } 799 800 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 801 struct perf_sample *sample, 802 struct machine *machine, 803 const struct evsel *evsel, 804 __u16 misc, 805 __u32 pid, __u32 tid, 806 __u64 start, __u64 len, __u64 pgoff, 807 struct dso *dso, 808 __u32 prot, __u32 flags, 809 const char *filename) 810 { 811 int err; 812 813 /* Return to repipe anonymous maps. */ 814 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 815 return 1; 816 if (is_no_dso_memory(filename)) 817 return 1; 818 819 if (dso__read_build_id(dso)) { 820 pr_debug("no build_id found for %s\n", filename); 821 return -1; 822 } 823 824 err = perf_event__synthesize_mmap2_build_id(tool, sample, machine, 825 perf_event__repipe, 826 evsel, 827 misc, pid, tid, 828 start, len, pgoff, 829 dso__bid(dso), 830 prot, flags, 831 filename); 832 if (err) { 833 pr_err("Can't synthesize build_id event for %s\n", filename); 834 return -1; 835 } 836 return 0; 837 } 838 839 static int mark_dso_hit(const struct perf_inject *inject, 840 const struct perf_tool *tool, 841 struct perf_sample *sample, 842 struct machine *machine, 843 const struct evsel *mmap_evsel, 844 struct map *map, bool sample_in_dso) 845 { 846 struct dso *dso; 847 u16 misc = sample->cpumode; 848 849 if (!map) 850 return 0; 851 852 if (!sample_in_dso) { 853 u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL | 854 PERF_RECORD_MISC_GUEST_USER; 855 856 if ((misc & guest_mask) != 0) { 857 misc &= PERF_RECORD_MISC_HYPERVISOR; 858 misc |= __map__is_kernel(map) 859 ? PERF_RECORD_MISC_GUEST_KERNEL 860 : PERF_RECORD_MISC_GUEST_USER; 861 } else { 862 misc &= PERF_RECORD_MISC_HYPERVISOR; 863 misc |= __map__is_kernel(map) 864 ? PERF_RECORD_MISC_KERNEL 865 : PERF_RECORD_MISC_USER; 866 } 867 } 868 dso = map__dso(map); 869 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 870 if (dso && !dso__hit(dso)) { 871 dso__set_hit(dso); 872 tool__inject_build_id(tool, sample, machine, 873 mmap_evsel, misc, dso__long_name(dso), dso, 874 map__flags(map)); 875 } 876 } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 877 if (!map__hit(map)) { 878 const struct build_id null_bid = { .size = 0 }; 879 const struct build_id *bid = dso ? dso__bid(dso) : &null_bid; 880 const char *filename = dso ? dso__long_name(dso) : ""; 881 882 map__set_hit(map); 883 perf_event__synthesize_mmap2_build_id(tool, sample, machine, 884 perf_event__repipe, 885 mmap_evsel, 886 misc, 887 sample->pid, sample->tid, 888 map__start(map), 889 map__end(map) - map__start(map), 890 map__pgoff(map), 891 bid, 892 map__prot(map), 893 map__flags(map), 894 filename); 895 } 896 } 897 return 0; 898 } 899 900 struct mark_dso_hit_args { 901 const struct perf_inject *inject; 902 const struct perf_tool *tool; 903 struct perf_sample *sample; 904 struct machine *machine; 905 const struct evsel *mmap_evsel; 906 }; 907 908 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data) 909 { 910 struct mark_dso_hit_args *args = data; 911 struct map *map = node->ms.map; 912 913 return mark_dso_hit(args->inject, args->tool, args->sample, args->machine, 914 args->mmap_evsel, map, /*sample_in_dso=*/false); 915 } 916 917 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, 918 struct perf_sample *sample, 919 struct evsel *evsel __maybe_unused, 920 struct machine *machine) 921 { 922 struct addr_location al; 923 struct thread *thread; 924 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 925 struct mark_dso_hit_args args = { 926 .inject = inject, 927 .tool = tool, 928 /* 929 * Use the parsed sample data of the sample event, which will 930 * have a later timestamp than the mmap event. 931 */ 932 .sample = sample, 933 .machine = machine, 934 .mmap_evsel = inject__mmap_evsel(inject), 935 }; 936 937 addr_location__init(&al); 938 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 939 if (thread == NULL) { 940 pr_err("problem processing %d event, skipping it.\n", 941 event->header.type); 942 goto repipe; 943 } 944 945 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 946 mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map, 947 /*sample_in_dso=*/true); 948 } 949 950 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, 951 /*symbols=*/false, mark_dso_hit_callback, &args); 952 953 thread__put(thread); 954 repipe: 955 perf_event__repipe(tool, event, sample, machine); 956 addr_location__exit(&al); 957 return 0; 958 } 959 960 static int perf_inject__sched_process_exit(const struct perf_tool *tool, 961 union perf_event *event __maybe_unused, 962 struct perf_sample *sample, 963 struct evsel *evsel __maybe_unused, 964 struct machine *machine __maybe_unused) 965 { 966 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 967 struct event_entry *ent; 968 969 list_for_each_entry(ent, &inject->samples, node) { 970 if (sample->tid == ent->tid) { 971 list_del_init(&ent->node); 972 free(ent); 973 break; 974 } 975 } 976 977 return 0; 978 } 979 980 static int perf_inject__sched_switch(const struct perf_tool *tool, 981 union perf_event *event, 982 struct perf_sample *sample, 983 struct evsel *evsel, 984 struct machine *machine) 985 { 986 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 987 struct event_entry *ent; 988 989 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 990 991 ent = malloc(event->header.size + sizeof(struct event_entry)); 992 if (ent == NULL) { 993 color_fprintf(stderr, PERF_COLOR_RED, 994 "Not enough memory to process sched switch event!"); 995 return -1; 996 } 997 998 ent->tid = sample->tid; 999 memcpy(&ent->event, event, event->header.size); 1000 list_add(&ent->node, &inject->samples); 1001 return 0; 1002 } 1003 1004 #ifdef HAVE_LIBTRACEEVENT 1005 static int perf_inject__sched_stat(const struct perf_tool *tool, 1006 union perf_event *event __maybe_unused, 1007 struct perf_sample *sample, 1008 struct evsel *evsel, 1009 struct machine *machine) 1010 { 1011 struct event_entry *ent; 1012 union perf_event *event_sw; 1013 struct perf_sample sample_sw; 1014 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1015 u32 pid = evsel__intval(evsel, sample, "pid"); 1016 1017 list_for_each_entry(ent, &inject->samples, node) { 1018 if (pid == ent->tid) 1019 goto found; 1020 } 1021 1022 return 0; 1023 found: 1024 event_sw = &ent->event[0]; 1025 evsel__parse_sample(evsel, event_sw, &sample_sw); 1026 1027 sample_sw.period = sample->period; 1028 sample_sw.time = sample->time; 1029 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 1030 evsel->core.attr.read_format, &sample_sw); 1031 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 1032 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 1033 } 1034 #endif 1035 1036 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 1037 { 1038 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 1039 return NULL; 1040 return &gs->vcpu[vcpu]; 1041 } 1042 1043 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 1044 { 1045 ssize_t ret = writen(gs->tmp_fd, buf, sz); 1046 1047 return ret < 0 ? ret : 0; 1048 } 1049 1050 static int guest_session__repipe(const struct perf_tool *tool, 1051 union perf_event *event, 1052 struct perf_sample *sample __maybe_unused, 1053 struct machine *machine __maybe_unused) 1054 { 1055 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1056 1057 return guest_session__output_bytes(gs, event, event->header.size); 1058 } 1059 1060 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 1061 { 1062 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 1063 int hash; 1064 1065 if (!guest_tid) 1066 return -ENOMEM; 1067 1068 guest_tid->tid = tid; 1069 guest_tid->vcpu = vcpu; 1070 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 1071 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 1072 1073 return 0; 1074 } 1075 1076 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 1077 union perf_event *event, 1078 u64 offset __maybe_unused, void *data) 1079 { 1080 struct guest_session *gs = data; 1081 unsigned int vcpu; 1082 struct guest_vcpu *guest_vcpu; 1083 int ret; 1084 1085 if (event->header.type != PERF_RECORD_COMM || 1086 event->comm.pid != gs->machine_pid) 1087 return 0; 1088 1089 /* 1090 * QEMU option -name debug-threads=on, causes thread names formatted as 1091 * below, although it is not an ABI. Also libvirt seems to use this by 1092 * default. Here we rely on it to tell us which thread is which VCPU. 1093 */ 1094 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 1095 if (ret <= 0) 1096 return ret; 1097 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 1098 event->comm.tid, event->comm.comm, vcpu); 1099 if (vcpu > INT_MAX) { 1100 pr_err("Invalid VCPU %u\n", vcpu); 1101 return -EINVAL; 1102 } 1103 guest_vcpu = guest_session__vcpu(gs, vcpu); 1104 if (!guest_vcpu) 1105 return -ENOMEM; 1106 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 1107 pr_err("Fatal error: Two threads found with the same VCPU\n"); 1108 return -EINVAL; 1109 } 1110 guest_vcpu->tid = event->comm.tid; 1111 1112 return guest_session__map_tid(gs, event->comm.tid, vcpu); 1113 } 1114 1115 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 1116 { 1117 return perf_session__peek_events(session, session->header.data_offset, 1118 session->header.data_size, 1119 host_peek_vm_comms_cb, gs); 1120 } 1121 1122 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 1123 { 1124 return evlist__id2sid(evlist, id); 1125 } 1126 1127 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 1128 { 1129 do { 1130 gs->highest_id += 1; 1131 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 1132 1133 return gs->highest_id; 1134 } 1135 1136 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 1137 { 1138 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 1139 int hash; 1140 1141 if (!guest_id) 1142 return -ENOMEM; 1143 1144 guest_id->id = id; 1145 guest_id->host_id = host_id; 1146 guest_id->vcpu = vcpu; 1147 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 1148 hlist_add_head(&guest_id->node, &gs->heads[hash]); 1149 1150 return 0; 1151 } 1152 1153 static u64 evlist__find_highest_id(struct evlist *evlist) 1154 { 1155 struct evsel *evsel; 1156 u64 highest_id = 1; 1157 1158 evlist__for_each_entry(evlist, evsel) { 1159 u32 j; 1160 1161 for (j = 0; j < evsel->core.ids; j++) { 1162 u64 id = evsel->core.id[j]; 1163 1164 if (id > highest_id) 1165 highest_id = id; 1166 } 1167 } 1168 1169 return highest_id; 1170 } 1171 1172 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 1173 { 1174 struct evlist *evlist = gs->session->evlist; 1175 struct evsel *evsel; 1176 int ret; 1177 1178 evlist__for_each_entry(evlist, evsel) { 1179 u32 j; 1180 1181 for (j = 0; j < evsel->core.ids; j++) { 1182 struct perf_sample_id *sid; 1183 u64 host_id; 1184 u64 id; 1185 1186 id = evsel->core.id[j]; 1187 sid = evlist__id2sid(evlist, id); 1188 if (!sid || sid->cpu.cpu == -1) 1189 continue; 1190 host_id = guest_session__allocate_new_id(gs, host_evlist); 1191 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1192 if (ret) 1193 return ret; 1194 } 1195 } 1196 1197 return 0; 1198 } 1199 1200 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1201 { 1202 struct hlist_head *head; 1203 struct guest_id *guest_id; 1204 int hash; 1205 1206 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1207 head = &gs->heads[hash]; 1208 1209 hlist_for_each_entry(guest_id, head, node) 1210 if (guest_id->id == id) 1211 return guest_id; 1212 1213 return NULL; 1214 } 1215 1216 static int process_attr(const struct perf_tool *tool, union perf_event *event, 1217 struct perf_sample *sample __maybe_unused, 1218 struct machine *machine __maybe_unused) 1219 { 1220 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1221 1222 return perf_event__process_attr(tool, event, &inject->session->evlist); 1223 } 1224 1225 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1226 { 1227 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1228 struct perf_event_attr attr = evsel->core.attr; 1229 u64 *id_array; 1230 u32 *vcpu_array; 1231 int ret = -ENOMEM; 1232 u32 i; 1233 1234 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1235 if (!id_array) 1236 return -ENOMEM; 1237 1238 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1239 if (!vcpu_array) 1240 goto out; 1241 1242 for (i = 0; i < evsel->core.ids; i++) { 1243 u64 id = evsel->core.id[i]; 1244 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1245 1246 if (!guest_id) { 1247 pr_err("Failed to find guest id %"PRIu64"\n", id); 1248 ret = -EINVAL; 1249 goto out; 1250 } 1251 id_array[i] = guest_id->host_id; 1252 vcpu_array[i] = guest_id->vcpu; 1253 } 1254 1255 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1256 attr.exclude_host = 1; 1257 attr.exclude_guest = 0; 1258 1259 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1260 id_array, process_attr); 1261 if (ret) 1262 pr_err("Failed to add guest attr.\n"); 1263 1264 for (i = 0; i < evsel->core.ids; i++) { 1265 struct perf_sample_id *sid; 1266 u32 vcpu = vcpu_array[i]; 1267 1268 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1269 /* Guest event is per-thread from the host point of view */ 1270 sid->cpu.cpu = -1; 1271 sid->tid = gs->vcpu[vcpu].tid; 1272 sid->machine_pid = gs->machine_pid; 1273 sid->vcpu.cpu = vcpu; 1274 } 1275 out: 1276 free(vcpu_array); 1277 free(id_array); 1278 return ret; 1279 } 1280 1281 static int guest_session__add_attrs(struct guest_session *gs) 1282 { 1283 struct evlist *evlist = gs->session->evlist; 1284 struct evsel *evsel; 1285 int ret; 1286 1287 evlist__for_each_entry(evlist, evsel) { 1288 ret = guest_session__add_attr(gs, evsel); 1289 if (ret) 1290 return ret; 1291 } 1292 1293 return 0; 1294 } 1295 1296 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1297 { 1298 struct perf_session *session = inject->session; 1299 struct evlist *evlist = session->evlist; 1300 struct machine *machine = &session->machines.host; 1301 size_t from = evlist->core.nr_entries - new_cnt; 1302 1303 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1304 evlist, machine, from); 1305 } 1306 1307 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1308 { 1309 struct hlist_head *head; 1310 struct guest_tid *guest_tid; 1311 int hash; 1312 1313 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1314 head = &gs->tids[hash]; 1315 1316 hlist_for_each_entry(guest_tid, head, node) 1317 if (guest_tid->tid == tid) 1318 return guest_tid; 1319 1320 return NULL; 1321 } 1322 1323 static bool dso__is_in_kernel_space(struct dso *dso) 1324 { 1325 if (dso__is_vdso(dso)) 1326 return false; 1327 1328 return dso__is_kcore(dso) || 1329 dso__kernel(dso) || 1330 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1331 } 1332 1333 static u64 evlist__first_id(struct evlist *evlist) 1334 { 1335 struct evsel *evsel; 1336 1337 evlist__for_each_entry(evlist, evsel) { 1338 if (evsel->core.ids) 1339 return evsel->core.id[0]; 1340 } 1341 return 0; 1342 } 1343 1344 static int process_build_id(const struct perf_tool *tool, 1345 union perf_event *event, 1346 struct perf_sample *sample __maybe_unused, 1347 struct machine *machine __maybe_unused) 1348 { 1349 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1350 1351 return perf_event__process_build_id(inject->session, event); 1352 } 1353 1354 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1355 { 1356 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1357 struct perf_sample synth_sample = { 1358 .pid = -1, 1359 .tid = -1, 1360 .time = -1, 1361 .stream_id = -1, 1362 .cpu = -1, 1363 .period = 1, 1364 .cpumode = dso__is_in_kernel_space(dso) 1365 ? PERF_RECORD_MISC_GUEST_KERNEL 1366 : PERF_RECORD_MISC_GUEST_USER, 1367 }; 1368 1369 if (!machine) 1370 return -ENOMEM; 1371 1372 dso__set_hit(dso); 1373 1374 return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine, 1375 process_build_id, inject__mmap_evsel(inject), 1376 /*misc=*/synth_sample.cpumode, 1377 dso__bid(dso), dso__long_name(dso)); 1378 } 1379 1380 static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1381 { 1382 struct guest_session *gs = data; 1383 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1384 1385 if (!dso__has_build_id(dso)) 1386 return 0; 1387 1388 return synthesize_build_id(inject, dso, gs->machine_pid); 1389 1390 } 1391 1392 static int guest_session__add_build_ids(struct guest_session *gs) 1393 { 1394 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1395 1396 /* Build IDs will be put in the Build ID feature section */ 1397 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1398 1399 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1400 guest_session__add_build_ids_cb, 1401 gs); 1402 } 1403 1404 static int guest_session__ksymbol_event(const struct perf_tool *tool, 1405 union perf_event *event, 1406 struct perf_sample *sample __maybe_unused, 1407 struct machine *machine __maybe_unused) 1408 { 1409 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1410 1411 /* Only support out-of-line i.e. no BPF support */ 1412 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1413 return 0; 1414 1415 return guest_session__output_bytes(gs, event, event->header.size); 1416 } 1417 1418 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1419 { 1420 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1421 struct perf_session *session; 1422 int ret; 1423 1424 /* Only these events will be injected */ 1425 gs->tool.mmap = guest_session__repipe; 1426 gs->tool.mmap2 = guest_session__repipe; 1427 gs->tool.comm = guest_session__repipe; 1428 gs->tool.fork = guest_session__repipe; 1429 gs->tool.exit = guest_session__repipe; 1430 gs->tool.lost = guest_session__repipe; 1431 gs->tool.context_switch = guest_session__repipe; 1432 gs->tool.ksymbol = guest_session__ksymbol_event; 1433 gs->tool.text_poke = guest_session__repipe; 1434 /* 1435 * Processing a build ID creates a struct dso with that build ID. Later, 1436 * all guest dsos are iterated and the build IDs processed into the host 1437 * session where they will be output to the Build ID feature section 1438 * when the perf.data file header is written. 1439 */ 1440 gs->tool.build_id = perf_event__process_build_id; 1441 /* Process the id index to know what VCPU an ID belongs to */ 1442 gs->tool.id_index = perf_event__process_id_index; 1443 1444 gs->tool.ordered_events = true; 1445 gs->tool.ordering_requires_timestamps = true; 1446 1447 gs->data.path = name; 1448 gs->data.force = force; 1449 gs->data.mode = PERF_DATA_MODE_READ; 1450 1451 session = perf_session__new(&gs->data, &gs->tool); 1452 if (IS_ERR(session)) 1453 return PTR_ERR(session); 1454 gs->session = session; 1455 1456 /* 1457 * Initial events have zero'd ID samples. Get default ID sample size 1458 * used for removing them. 1459 */ 1460 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1461 /* And default ID for adding back a host-compatible ID sample */ 1462 gs->dflt_id = evlist__first_id(session->evlist); 1463 if (!gs->dflt_id) { 1464 pr_err("Guest data has no sample IDs"); 1465 return -EINVAL; 1466 } 1467 1468 /* Temporary file for guest events */ 1469 gs->tmp_file_name = strdup(tmp_file_name); 1470 if (!gs->tmp_file_name) 1471 return -ENOMEM; 1472 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1473 if (gs->tmp_fd < 0) 1474 return -errno; 1475 1476 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1477 pr_warning("Guest session decompression initialization failed.\n"); 1478 1479 /* 1480 * perf does not support processing 2 sessions simultaneously, so output 1481 * guest events to a temporary file. 1482 */ 1483 ret = perf_session__process_events(gs->session); 1484 if (ret) 1485 return ret; 1486 1487 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1488 return -errno; 1489 1490 return 0; 1491 } 1492 1493 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1494 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1495 { 1496 struct hlist_node *pos, *n; 1497 size_t i; 1498 1499 for (i = 0; i < hlist_sz; ++i) { 1500 hlist_for_each_safe(pos, n, &heads[i]) { 1501 hlist_del(pos); 1502 free(pos); 1503 } 1504 } 1505 } 1506 1507 static void guest_session__exit(struct guest_session *gs) 1508 { 1509 if (gs->session) { 1510 perf_session__delete(gs->session); 1511 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1512 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1513 } 1514 if (gs->tmp_file_name) { 1515 if (gs->tmp_fd >= 0) 1516 close(gs->tmp_fd); 1517 unlink(gs->tmp_file_name); 1518 zfree(&gs->tmp_file_name); 1519 } 1520 zfree(&gs->vcpu); 1521 zfree(&gs->perf_data_file); 1522 } 1523 1524 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1525 { 1526 tc->time_shift = time_conv->time_shift; 1527 tc->time_mult = time_conv->time_mult; 1528 tc->time_zero = time_conv->time_zero; 1529 tc->time_cycles = time_conv->time_cycles; 1530 tc->time_mask = time_conv->time_mask; 1531 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1532 tc->cap_user_time_short = time_conv->cap_user_time_short; 1533 } 1534 1535 static void guest_session__get_tc(struct guest_session *gs) 1536 { 1537 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1538 1539 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1540 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1541 } 1542 1543 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1544 { 1545 u64 tsc; 1546 1547 if (!guest_time) { 1548 *host_time = 0; 1549 return; 1550 } 1551 1552 if (gs->guest_tc.cap_user_time_zero) 1553 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1554 else 1555 tsc = guest_time; 1556 1557 /* 1558 * This is the correct order of operations for x86 if the TSC Offset and 1559 * Multiplier values are used. 1560 */ 1561 tsc -= gs->time_offset; 1562 tsc /= gs->time_scale; 1563 1564 if (gs->host_tc.cap_user_time_zero) 1565 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1566 else 1567 *host_time = tsc; 1568 } 1569 1570 static int guest_session__fetch(struct guest_session *gs) 1571 { 1572 void *buf; 1573 struct perf_event_header *hdr; 1574 size_t hdr_sz = sizeof(*hdr); 1575 ssize_t ret; 1576 1577 buf = gs->ev.event_buf; 1578 if (!buf) { 1579 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1580 if (!buf) 1581 return -ENOMEM; 1582 gs->ev.event_buf = buf; 1583 } 1584 hdr = buf; 1585 ret = readn(gs->tmp_fd, buf, hdr_sz); 1586 if (ret < 0) 1587 return ret; 1588 1589 if (!ret) { 1590 /* Zero size means EOF */ 1591 hdr->size = 0; 1592 return 0; 1593 } 1594 1595 buf += hdr_sz; 1596 1597 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1598 if (ret < 0) 1599 return ret; 1600 1601 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1602 gs->ev.sample.time = 0; 1603 1604 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1605 pr_err("Unexpected type fetching guest event"); 1606 return 0; 1607 } 1608 1609 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1610 if (ret) { 1611 pr_err("Parse failed fetching guest event"); 1612 return ret; 1613 } 1614 1615 if (!gs->have_tc) { 1616 guest_session__get_tc(gs); 1617 gs->have_tc = true; 1618 } 1619 1620 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1621 1622 return 0; 1623 } 1624 1625 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1626 const struct perf_sample *sample) 1627 { 1628 struct evsel *evsel; 1629 void *array; 1630 int ret; 1631 1632 evsel = evlist__id2evsel(evlist, sample->id); 1633 array = ev; 1634 1635 if (!evsel) { 1636 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1637 return -EINVAL; 1638 } 1639 1640 array += ev->header.size; 1641 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1642 if (ret < 0) 1643 return ret; 1644 1645 if (ret & 7) { 1646 pr_err("Bad id sample size %d\n", ret); 1647 return -EINVAL; 1648 } 1649 1650 ev->header.size += ret; 1651 1652 return 0; 1653 } 1654 1655 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1656 { 1657 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1658 int ret; 1659 1660 if (!gs->ready) 1661 return 0; 1662 1663 while (1) { 1664 struct perf_sample *sample; 1665 struct guest_id *guest_id; 1666 union perf_event *ev; 1667 u16 id_hdr_size; 1668 u8 cpumode; 1669 u64 id; 1670 1671 if (!gs->fetched) { 1672 ret = guest_session__fetch(gs); 1673 if (ret) 1674 return ret; 1675 gs->fetched = true; 1676 } 1677 1678 ev = gs->ev.event; 1679 sample = &gs->ev.sample; 1680 1681 if (!ev->header.size) 1682 return 0; /* EOF */ 1683 1684 if (sample->time > timestamp) 1685 return 0; 1686 1687 /* Change cpumode to guest */ 1688 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1689 if (cpumode & PERF_RECORD_MISC_USER) 1690 cpumode = PERF_RECORD_MISC_GUEST_USER; 1691 else 1692 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1693 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1694 ev->header.misc |= cpumode; 1695 1696 id = sample->id; 1697 if (!id) { 1698 id = gs->dflt_id; 1699 id_hdr_size = gs->dflt_id_hdr_size; 1700 } else { 1701 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1702 1703 id_hdr_size = evsel__id_hdr_size(evsel); 1704 } 1705 1706 if (id_hdr_size & 7) { 1707 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1708 return -EINVAL; 1709 } 1710 1711 if (ev->header.size & 7) { 1712 pr_err("Bad event size %u\n", ev->header.size); 1713 return -EINVAL; 1714 } 1715 1716 /* Remove guest id sample */ 1717 ev->header.size -= id_hdr_size; 1718 1719 if (ev->header.size & 7) { 1720 pr_err("Bad raw event size %u\n", ev->header.size); 1721 return -EINVAL; 1722 } 1723 1724 guest_id = guest_session__lookup_id(gs, id); 1725 if (!guest_id) { 1726 pr_err("Guest event with unknown id %llu\n", 1727 (unsigned long long)id); 1728 return -EINVAL; 1729 } 1730 1731 /* Change to host ID to avoid conflicting ID values */ 1732 sample->id = guest_id->host_id; 1733 sample->stream_id = guest_id->host_id; 1734 1735 if (sample->cpu != (u32)-1) { 1736 if (sample->cpu >= gs->vcpu_cnt) { 1737 pr_err("Guest event with unknown VCPU %u\n", 1738 sample->cpu); 1739 return -EINVAL; 1740 } 1741 /* Change to host CPU instead of guest VCPU */ 1742 sample->cpu = gs->vcpu[sample->cpu].cpu; 1743 } 1744 1745 /* New id sample with new ID and CPU */ 1746 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1747 if (ret) 1748 return ret; 1749 1750 if (ev->header.size & 7) { 1751 pr_err("Bad new event size %u\n", ev->header.size); 1752 return -EINVAL; 1753 } 1754 1755 gs->fetched = false; 1756 1757 ret = output_bytes(inject, ev, ev->header.size); 1758 if (ret) 1759 return ret; 1760 } 1761 } 1762 1763 static int guest_session__flush_events(struct guest_session *gs) 1764 { 1765 return guest_session__inject_events(gs, -1); 1766 } 1767 1768 static int host__repipe(const struct perf_tool *tool, 1769 union perf_event *event, 1770 struct perf_sample *sample, 1771 struct machine *machine) 1772 { 1773 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1774 int ret; 1775 1776 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1777 if (ret) 1778 return ret; 1779 1780 return perf_event__repipe(tool, event, sample, machine); 1781 } 1782 1783 static int host__finished_init(struct perf_session *session, union perf_event *event) 1784 { 1785 struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool); 1786 struct guest_session *gs = &inject->guest_session; 1787 int ret; 1788 1789 /* 1790 * Peek through host COMM events to find QEMU threads and the VCPU they 1791 * are running. 1792 */ 1793 ret = host_peek_vm_comms(session, gs); 1794 if (ret) 1795 return ret; 1796 1797 if (!gs->vcpu_cnt) { 1798 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1799 return -EINVAL; 1800 } 1801 1802 /* 1803 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1804 */ 1805 gs->highest_id = evlist__find_highest_id(session->evlist); 1806 ret = guest_session__map_ids(gs, session->evlist); 1807 if (ret) 1808 return ret; 1809 1810 ret = guest_session__add_attrs(gs); 1811 if (ret) 1812 return ret; 1813 1814 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1815 if (ret) { 1816 pr_err("Failed to synthesize id_index\n"); 1817 return ret; 1818 } 1819 1820 ret = guest_session__add_build_ids(gs); 1821 if (ret) { 1822 pr_err("Failed to add guest build IDs\n"); 1823 return ret; 1824 } 1825 1826 gs->ready = true; 1827 1828 ret = guest_session__inject_events(gs, 0); 1829 if (ret) 1830 return ret; 1831 1832 return perf_event__repipe_op2_synth(session, event); 1833 } 1834 1835 /* 1836 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1837 * which flushes host events to file up until the last flush time. Then inject 1838 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1839 * itself. 1840 */ 1841 static int host__finished_round(const struct perf_tool *tool, 1842 union perf_event *event, 1843 struct ordered_events *oe) 1844 { 1845 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1846 int ret = perf_event__process_finished_round(tool, event, oe); 1847 u64 timestamp = ordered_events__last_flush_time(oe); 1848 1849 if (ret) 1850 return ret; 1851 1852 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1853 if (ret) 1854 return ret; 1855 1856 return perf_event__repipe_oe_synth(tool, event, oe); 1857 } 1858 1859 static int host__context_switch(const struct perf_tool *tool, 1860 union perf_event *event, 1861 struct perf_sample *sample, 1862 struct machine *machine) 1863 { 1864 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1865 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1866 struct guest_session *gs = &inject->guest_session; 1867 u32 pid = event->context_switch.next_prev_pid; 1868 u32 tid = event->context_switch.next_prev_tid; 1869 struct guest_tid *guest_tid; 1870 u32 vcpu; 1871 1872 if (out || pid != gs->machine_pid) 1873 goto out; 1874 1875 guest_tid = guest_session__lookup_tid(gs, tid); 1876 if (!guest_tid) 1877 goto out; 1878 1879 if (sample->cpu == (u32)-1) { 1880 pr_err("Switch event does not have CPU\n"); 1881 return -EINVAL; 1882 } 1883 1884 vcpu = guest_tid->vcpu; 1885 if (vcpu >= gs->vcpu_cnt) 1886 return -EINVAL; 1887 1888 /* Guest is switching in, record which CPU the VCPU is now running on */ 1889 gs->vcpu[vcpu].cpu = sample->cpu; 1890 out: 1891 return host__repipe(tool, event, sample, machine); 1892 } 1893 1894 static void sig_handler(int sig __maybe_unused) 1895 { 1896 session_done = 1; 1897 } 1898 1899 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1900 { 1901 struct perf_event_attr *attr = &evsel->core.attr; 1902 const char *name = evsel__name(evsel); 1903 1904 if (!(attr->sample_type & sample_type)) { 1905 pr_err("Samples for %s event do not have %s attribute set.", 1906 name, sample_msg); 1907 return -EINVAL; 1908 } 1909 1910 return 0; 1911 } 1912 1913 static int drop_sample(const struct perf_tool *tool __maybe_unused, 1914 union perf_event *event __maybe_unused, 1915 struct perf_sample *sample __maybe_unused, 1916 struct evsel *evsel __maybe_unused, 1917 struct machine *machine __maybe_unused) 1918 { 1919 return 0; 1920 } 1921 1922 static void strip_init(struct perf_inject *inject) 1923 { 1924 struct evlist *evlist = inject->session->evlist; 1925 struct evsel *evsel; 1926 1927 inject->tool.context_switch = perf_event__drop; 1928 1929 evlist__for_each_entry(evlist, evsel) 1930 evsel->handler = drop_sample; 1931 } 1932 1933 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1934 { 1935 struct perf_inject *inject = opt->value; 1936 const char *args; 1937 char *dry_run; 1938 1939 if (unset) 1940 return 0; 1941 1942 inject->itrace_synth_opts.set = true; 1943 inject->itrace_synth_opts.vm_time_correlation = true; 1944 inject->in_place_update = true; 1945 1946 if (!str) 1947 return 0; 1948 1949 dry_run = skip_spaces(str); 1950 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1951 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1952 inject->in_place_update_dry_run = true; 1953 args = dry_run + strlen("dry-run"); 1954 } else { 1955 args = str; 1956 } 1957 1958 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1959 1960 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1961 } 1962 1963 static int parse_guest_data(const struct option *opt, const char *str, int unset) 1964 { 1965 struct perf_inject *inject = opt->value; 1966 struct guest_session *gs = &inject->guest_session; 1967 char *tok; 1968 char *s; 1969 1970 if (unset) 1971 return 0; 1972 1973 if (!str) 1974 goto bad_args; 1975 1976 s = strdup(str); 1977 if (!s) 1978 return -ENOMEM; 1979 1980 gs->perf_data_file = strsep(&s, ","); 1981 if (!gs->perf_data_file) 1982 goto bad_args; 1983 1984 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1985 if (gs->copy_kcore_dir) 1986 inject->output.is_dir = true; 1987 1988 tok = strsep(&s, ","); 1989 if (!tok) 1990 goto bad_args; 1991 gs->machine_pid = strtoul(tok, NULL, 0); 1992 if (!inject->guest_session.machine_pid) 1993 goto bad_args; 1994 1995 gs->time_scale = 1; 1996 1997 tok = strsep(&s, ","); 1998 if (!tok) 1999 goto out; 2000 gs->time_offset = strtoull(tok, NULL, 0); 2001 2002 tok = strsep(&s, ","); 2003 if (!tok) 2004 goto out; 2005 gs->time_scale = strtod(tok, NULL); 2006 if (!gs->time_scale) 2007 goto bad_args; 2008 out: 2009 return 0; 2010 2011 bad_args: 2012 pr_err("--guest-data option requires guest perf.data file name, " 2013 "guest machine PID, and optionally guest timestamp offset, " 2014 "and guest timestamp scale factor, separated by commas.\n"); 2015 return -1; 2016 } 2017 2018 static int save_section_info_cb(struct perf_file_section *section, 2019 struct perf_header *ph __maybe_unused, 2020 int feat, int fd __maybe_unused, void *data) 2021 { 2022 struct perf_inject *inject = data; 2023 2024 inject->secs[feat] = *section; 2025 return 0; 2026 } 2027 2028 static int save_section_info(struct perf_inject *inject) 2029 { 2030 struct perf_header *header = &inject->session->header; 2031 int fd = perf_data__fd(inject->session->data); 2032 2033 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 2034 } 2035 2036 static bool keep_feat(int feat) 2037 { 2038 switch (feat) { 2039 /* Keep original information that describes the machine or software */ 2040 case HEADER_TRACING_DATA: 2041 case HEADER_HOSTNAME: 2042 case HEADER_OSRELEASE: 2043 case HEADER_VERSION: 2044 case HEADER_ARCH: 2045 case HEADER_NRCPUS: 2046 case HEADER_CPUDESC: 2047 case HEADER_CPUID: 2048 case HEADER_TOTAL_MEM: 2049 case HEADER_CPU_TOPOLOGY: 2050 case HEADER_NUMA_TOPOLOGY: 2051 case HEADER_PMU_MAPPINGS: 2052 case HEADER_CACHE: 2053 case HEADER_MEM_TOPOLOGY: 2054 case HEADER_CLOCKID: 2055 case HEADER_BPF_PROG_INFO: 2056 case HEADER_BPF_BTF: 2057 case HEADER_CPU_PMU_CAPS: 2058 case HEADER_CLOCK_DATA: 2059 case HEADER_HYBRID_TOPOLOGY: 2060 case HEADER_PMU_CAPS: 2061 return true; 2062 /* Information that can be updated */ 2063 case HEADER_BUILD_ID: 2064 case HEADER_CMDLINE: 2065 case HEADER_EVENT_DESC: 2066 case HEADER_BRANCH_STACK: 2067 case HEADER_GROUP_DESC: 2068 case HEADER_AUXTRACE: 2069 case HEADER_STAT: 2070 case HEADER_SAMPLE_TIME: 2071 case HEADER_DIR_FORMAT: 2072 case HEADER_COMPRESSED: 2073 default: 2074 return false; 2075 }; 2076 } 2077 2078 static int read_file(int fd, u64 offs, void *buf, size_t sz) 2079 { 2080 ssize_t ret = preadn(fd, buf, sz, offs); 2081 2082 if (ret < 0) 2083 return -errno; 2084 if ((size_t)ret != sz) 2085 return -EINVAL; 2086 return 0; 2087 } 2088 2089 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 2090 { 2091 int fd = perf_data__fd(inject->session->data); 2092 u64 offs = inject->secs[feat].offset; 2093 size_t sz = inject->secs[feat].size; 2094 void *buf = malloc(sz); 2095 int ret; 2096 2097 if (!buf) 2098 return -ENOMEM; 2099 2100 ret = read_file(fd, offs, buf, sz); 2101 if (ret) 2102 goto out_free; 2103 2104 ret = fw->write(fw, buf, sz); 2105 out_free: 2106 free(buf); 2107 return ret; 2108 } 2109 2110 struct inject_fc { 2111 struct feat_copier fc; 2112 struct perf_inject *inject; 2113 }; 2114 2115 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 2116 { 2117 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 2118 struct perf_inject *inject = inj_fc->inject; 2119 int ret; 2120 2121 if (!inject->secs[feat].offset || 2122 !keep_feat(feat)) 2123 return 0; 2124 2125 ret = feat_copy(inject, feat, fw); 2126 if (ret < 0) 2127 return ret; 2128 2129 return 1; /* Feature section copied */ 2130 } 2131 2132 static int copy_kcore_dir(struct perf_inject *inject) 2133 { 2134 char *cmd; 2135 int ret; 2136 2137 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 2138 inject->input_name, inject->output.path); 2139 if (ret < 0) 2140 return ret; 2141 pr_debug("%s\n", cmd); 2142 ret = system(cmd); 2143 free(cmd); 2144 return ret; 2145 } 2146 2147 static int guest_session__copy_kcore_dir(struct guest_session *gs) 2148 { 2149 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 2150 char *cmd; 2151 int ret; 2152 2153 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 2154 gs->perf_data_file, inject->output.path, gs->machine_pid); 2155 if (ret < 0) 2156 return ret; 2157 pr_debug("%s\n", cmd); 2158 ret = system(cmd); 2159 free(cmd); 2160 return ret; 2161 } 2162 2163 static int output_fd(struct perf_inject *inject) 2164 { 2165 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 2166 } 2167 2168 static int __cmd_inject(struct perf_inject *inject) 2169 { 2170 int ret = -EINVAL; 2171 struct guest_session *gs = &inject->guest_session; 2172 struct perf_session *session = inject->session; 2173 int fd = output_fd(inject); 2174 u64 output_data_offset = perf_session__data_offset(session->evlist); 2175 /* 2176 * Pipe input hasn't loaded the attributes and will handle them as 2177 * events. So that the attributes don't overlap the data, write the 2178 * attributes after the data. 2179 */ 2180 bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe; 2181 2182 signal(SIGINT, sig_handler); 2183 2184 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat || 2185 inject->itrace_synth_opts.set) { 2186 inject->tool.mmap = perf_event__repipe_mmap; 2187 inject->tool.mmap2 = perf_event__repipe_mmap2; 2188 inject->tool.fork = perf_event__repipe_fork; 2189 #ifdef HAVE_LIBTRACEEVENT 2190 inject->tool.tracing_data = perf_event__repipe_tracing_data; 2191 #endif 2192 } 2193 2194 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2195 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2196 inject->tool.sample = perf_event__inject_buildid; 2197 } else if (inject->sched_stat) { 2198 struct evsel *evsel; 2199 2200 evlist__for_each_entry(session->evlist, evsel) { 2201 const char *name = evsel__name(evsel); 2202 2203 if (!strcmp(name, "sched:sched_switch")) { 2204 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 2205 return -EINVAL; 2206 2207 evsel->handler = perf_inject__sched_switch; 2208 } else if (!strcmp(name, "sched:sched_process_exit")) 2209 evsel->handler = perf_inject__sched_process_exit; 2210 #ifdef HAVE_LIBTRACEEVENT 2211 else if (!strncmp(name, "sched:sched_stat_", 17)) 2212 evsel->handler = perf_inject__sched_stat; 2213 #endif 2214 } 2215 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2216 session->itrace_synth_opts = &inject->itrace_synth_opts; 2217 memset(&inject->tool, 0, sizeof(inject->tool)); 2218 inject->tool.id_index = perf_event__process_id_index; 2219 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2220 inject->tool.auxtrace = perf_event__process_auxtrace; 2221 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2222 inject->tool.ordered_events = true; 2223 inject->tool.ordering_requires_timestamps = true; 2224 } else if (inject->itrace_synth_opts.set) { 2225 session->itrace_synth_opts = &inject->itrace_synth_opts; 2226 inject->itrace_synth_opts.inject = true; 2227 inject->tool.comm = perf_event__repipe_comm; 2228 inject->tool.namespaces = perf_event__repipe_namespaces; 2229 inject->tool.exit = perf_event__repipe_exit; 2230 inject->tool.id_index = perf_event__process_id_index; 2231 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2232 inject->tool.auxtrace = perf_event__process_auxtrace; 2233 inject->tool.aux = perf_event__drop_aux; 2234 inject->tool.itrace_start = perf_event__drop_aux; 2235 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2236 inject->tool.ordered_events = true; 2237 inject->tool.ordering_requires_timestamps = true; 2238 /* Allow space in the header for new attributes */ 2239 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2240 if (inject->strip) 2241 strip_init(inject); 2242 } else if (gs->perf_data_file) { 2243 char *name = gs->perf_data_file; 2244 2245 /* 2246 * Not strictly necessary, but keep these events in order wrt 2247 * guest events. 2248 */ 2249 inject->tool.mmap = host__repipe; 2250 inject->tool.mmap2 = host__repipe; 2251 inject->tool.comm = host__repipe; 2252 inject->tool.fork = host__repipe; 2253 inject->tool.exit = host__repipe; 2254 inject->tool.lost = host__repipe; 2255 inject->tool.context_switch = host__repipe; 2256 inject->tool.ksymbol = host__repipe; 2257 inject->tool.text_poke = host__repipe; 2258 /* 2259 * Once the host session has initialized, set up sample ID 2260 * mapping and feed in guest attrs, build IDs and initial 2261 * events. 2262 */ 2263 inject->tool.finished_init = host__finished_init; 2264 /* Obey finished round ordering */ 2265 inject->tool.finished_round = host__finished_round; 2266 /* Keep track of which CPU a VCPU is runnng on */ 2267 inject->tool.context_switch = host__context_switch; 2268 /* 2269 * Must order events to be able to obey finished round 2270 * ordering. 2271 */ 2272 inject->tool.ordered_events = true; 2273 inject->tool.ordering_requires_timestamps = true; 2274 /* Set up a separate session to process guest perf.data file */ 2275 ret = guest_session__start(gs, name, session->data->force); 2276 if (ret) { 2277 pr_err("Failed to process %s, error %d\n", name, ret); 2278 return ret; 2279 } 2280 /* Allow space in the header for guest attributes */ 2281 output_data_offset += gs->session->header.data_offset; 2282 output_data_offset = roundup(output_data_offset, 4096); 2283 } 2284 2285 if (!inject->itrace_synth_opts.set) 2286 auxtrace_index__free(&session->auxtrace_index); 2287 2288 if (!inject->output.is_pipe && !inject->in_place_update) 2289 lseek(fd, output_data_offset, SEEK_SET); 2290 2291 ret = perf_session__process_events(session); 2292 if (ret) 2293 return ret; 2294 2295 if (gs->session) { 2296 /* 2297 * Remaining guest events have later timestamps. Flush them 2298 * out to file. 2299 */ 2300 ret = guest_session__flush_events(gs); 2301 if (ret) { 2302 pr_err("Failed to flush guest events\n"); 2303 return ret; 2304 } 2305 } 2306 2307 if (!inject->output.is_pipe && !inject->in_place_update) { 2308 struct inject_fc inj_fc = { 2309 .fc.copy = feat_copy_cb, 2310 .inject = inject, 2311 }; 2312 2313 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2314 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) 2315 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 2316 /* 2317 * Keep all buildids when there is unprocessed AUX data because 2318 * it is not known which ones the AUX trace hits. 2319 */ 2320 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2321 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2322 perf_session__dsos_hit_all(session); 2323 /* 2324 * The AUX areas have been removed and replaced with 2325 * synthesized hardware events, so clear the feature flag. 2326 */ 2327 if (inject->itrace_synth_opts.set) { 2328 perf_header__clear_feat(&session->header, 2329 HEADER_AUXTRACE); 2330 if (inject->itrace_synth_opts.last_branch || 2331 inject->itrace_synth_opts.add_last_branch) 2332 perf_header__set_feat(&session->header, 2333 HEADER_BRANCH_STACK); 2334 } 2335 session->header.data_offset = output_data_offset; 2336 session->header.data_size = inject->bytes_written; 2337 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, 2338 write_attrs_after_data); 2339 2340 if (inject->copy_kcore_dir) { 2341 ret = copy_kcore_dir(inject); 2342 if (ret) { 2343 pr_err("Failed to copy kcore\n"); 2344 return ret; 2345 } 2346 } 2347 if (gs->copy_kcore_dir) { 2348 ret = guest_session__copy_kcore_dir(gs); 2349 if (ret) { 2350 pr_err("Failed to copy guest kcore\n"); 2351 return ret; 2352 } 2353 } 2354 } 2355 2356 return ret; 2357 } 2358 2359 int cmd_inject(int argc, const char **argv) 2360 { 2361 struct perf_inject inject = { 2362 .input_name = "-", 2363 .samples = LIST_HEAD_INIT(inject.samples), 2364 .output = { 2365 .path = "-", 2366 .mode = PERF_DATA_MODE_WRITE, 2367 .use_stdio = true, 2368 }, 2369 }; 2370 struct perf_data data = { 2371 .mode = PERF_DATA_MODE_READ, 2372 .use_stdio = true, 2373 }; 2374 int ret; 2375 const char *known_build_ids = NULL; 2376 bool build_ids = false; 2377 bool build_id_all = false; 2378 bool mmap2_build_ids = false; 2379 bool mmap2_build_id_all = false; 2380 2381 struct option options[] = { 2382 OPT_BOOLEAN('b', "build-ids", &build_ids, 2383 "Inject build-ids into the output stream"), 2384 OPT_BOOLEAN(0, "buildid-all", &build_id_all, 2385 "Inject build-ids of all DSOs into the output stream"), 2386 OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids, 2387 "Drop unused mmap events, make others mmap2 with build IDs"), 2388 OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all, 2389 "Rewrite all mmap events as mmap2 events with build IDs"), 2390 OPT_STRING(0, "known-build-ids", &known_build_ids, 2391 "buildid path [,buildid path...]", 2392 "build-ids to use for given paths"), 2393 OPT_STRING('i', "input", &inject.input_name, "file", 2394 "input file name"), 2395 OPT_STRING('o', "output", &inject.output.path, "file", 2396 "output file name"), 2397 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2398 "Merge sched-stat and sched-switch for getting events " 2399 "where and how long tasks slept"), 2400 #ifdef HAVE_JITDUMP 2401 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2402 #endif 2403 OPT_INCR('v', "verbose", &verbose, 2404 "be more verbose (show build ids, etc)"), 2405 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2406 "file", "vmlinux pathname"), 2407 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2408 "don't load vmlinux even if found"), 2409 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2410 "kallsyms pathname"), 2411 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2412 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2413 NULL, "opts", "Instruction Tracing options\n" 2414 ITRACE_HELP, 2415 itrace_parse_synth_opts), 2416 OPT_BOOLEAN(0, "strip", &inject.strip, 2417 "strip non-synthesized events (use with --itrace)"), 2418 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2419 "correlate time between VM guests and the host", 2420 parse_vm_time_correlation), 2421 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2422 "inject events from a guest perf.data file", 2423 parse_guest_data), 2424 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2425 "guest mount directory under which every guest os" 2426 " instance has a subdir"), 2427 OPT_END() 2428 }; 2429 const char * const inject_usage[] = { 2430 "perf inject [<options>]", 2431 NULL 2432 }; 2433 bool ordered_events; 2434 2435 if (!inject.itrace_synth_opts.set) { 2436 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2437 symbol_conf.lazy_load_kernel_maps = true; 2438 } 2439 2440 #ifndef HAVE_JITDUMP 2441 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2442 #endif 2443 argc = parse_options(argc, argv, options, inject_usage, 0); 2444 2445 /* 2446 * Any (unrecognized) arguments left? 2447 */ 2448 if (argc) 2449 usage_with_options(inject_usage, options); 2450 2451 if (inject.strip && !inject.itrace_synth_opts.set) { 2452 pr_err("--strip option requires --itrace option\n"); 2453 return -1; 2454 } 2455 2456 if (symbol__validate_sym_arguments()) 2457 return -1; 2458 2459 if (inject.in_place_update) { 2460 if (!strcmp(inject.input_name, "-")) { 2461 pr_err("Input file name required for in-place updating\n"); 2462 return -1; 2463 } 2464 if (strcmp(inject.output.path, "-")) { 2465 pr_err("Output file name must not be specified for in-place updating\n"); 2466 return -1; 2467 } 2468 if (!data.force && !inject.in_place_update_dry_run) { 2469 pr_err("The input file would be updated in place, " 2470 "the --force option is required.\n"); 2471 return -1; 2472 } 2473 if (!inject.in_place_update_dry_run) 2474 data.in_place_update = true; 2475 } else { 2476 if (strcmp(inject.output.path, "-") && !inject.strip && 2477 has_kcore_dir(inject.input_name)) { 2478 inject.output.is_dir = true; 2479 inject.copy_kcore_dir = true; 2480 } 2481 if (perf_data__open(&inject.output)) { 2482 perror("failed to create output file"); 2483 return -1; 2484 } 2485 } 2486 if (mmap2_build_ids) 2487 inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY; 2488 if (mmap2_build_id_all) 2489 inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL; 2490 if (build_ids) 2491 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY; 2492 if (build_id_all) 2493 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2494 2495 data.path = inject.input_name; 2496 2497 ordered_events = inject.jit_mode || inject.sched_stat || 2498 inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2499 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY; 2500 perf_tool__init(&inject.tool, ordered_events); 2501 inject.tool.sample = perf_event__repipe_sample; 2502 inject.tool.read = perf_event__repipe_sample; 2503 inject.tool.mmap = perf_event__repipe; 2504 inject.tool.mmap2 = perf_event__repipe; 2505 inject.tool.comm = perf_event__repipe; 2506 inject.tool.namespaces = perf_event__repipe; 2507 inject.tool.cgroup = perf_event__repipe; 2508 inject.tool.fork = perf_event__repipe; 2509 inject.tool.exit = perf_event__repipe; 2510 inject.tool.lost = perf_event__repipe; 2511 inject.tool.lost_samples = perf_event__repipe; 2512 inject.tool.aux = perf_event__repipe; 2513 inject.tool.itrace_start = perf_event__repipe; 2514 inject.tool.aux_output_hw_id = perf_event__repipe; 2515 inject.tool.context_switch = perf_event__repipe; 2516 inject.tool.throttle = perf_event__repipe; 2517 inject.tool.unthrottle = perf_event__repipe; 2518 inject.tool.ksymbol = perf_event__repipe; 2519 inject.tool.bpf = perf_event__repipe; 2520 inject.tool.text_poke = perf_event__repipe; 2521 inject.tool.attr = perf_event__repipe_attr; 2522 inject.tool.event_update = perf_event__repipe_event_update; 2523 inject.tool.tracing_data = perf_event__repipe_op2_synth; 2524 inject.tool.finished_round = perf_event__repipe_oe_synth; 2525 inject.tool.build_id = perf_event__repipe_op2_synth; 2526 inject.tool.id_index = perf_event__repipe_op2_synth; 2527 inject.tool.auxtrace_info = perf_event__repipe_op2_synth; 2528 inject.tool.auxtrace_error = perf_event__repipe_op2_synth; 2529 inject.tool.time_conv = perf_event__repipe_op2_synth; 2530 inject.tool.thread_map = perf_event__repipe_op2_synth; 2531 inject.tool.cpu_map = perf_event__repipe_op2_synth; 2532 inject.tool.stat_config = perf_event__repipe_op2_synth; 2533 inject.tool.stat = perf_event__repipe_op2_synth; 2534 inject.tool.stat_round = perf_event__repipe_op2_synth; 2535 inject.tool.feature = perf_event__repipe_op2_synth; 2536 inject.tool.finished_init = perf_event__repipe_op2_synth; 2537 inject.tool.compressed = perf_event__repipe_op4_synth; 2538 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2539 inject.tool.bpf_metadata = perf_event__repipe_op2_synth; 2540 inject.tool.dont_split_sample_group = true; 2541 inject.session = __perf_session__new(&data, &inject.tool, 2542 /*trace_event_repipe=*/inject.output.is_pipe, 2543 /*host_env=*/NULL); 2544 2545 if (IS_ERR(inject.session)) { 2546 ret = PTR_ERR(inject.session); 2547 goto out_close_output; 2548 } 2549 2550 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2551 pr_warning("Decompression initialization failed.\n"); 2552 2553 /* Save original section info before feature bits change */ 2554 ret = save_section_info(&inject); 2555 if (ret) 2556 goto out_delete; 2557 2558 if (inject.output.is_pipe) { 2559 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2560 if (ret < 0) { 2561 pr_err("Couldn't write a new pipe header.\n"); 2562 goto out_delete; 2563 } 2564 2565 /* 2566 * If the input is already a pipe then the features and 2567 * attributes don't need synthesizing, they will be present in 2568 * the input. 2569 */ 2570 if (!data.is_pipe) { 2571 ret = perf_event__synthesize_for_pipe(&inject.tool, 2572 inject.session, 2573 &inject.output, 2574 perf_event__repipe); 2575 if (ret < 0) 2576 goto out_delete; 2577 } 2578 } 2579 2580 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2581 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2582 /* 2583 * to make sure the mmap records are ordered correctly 2584 * and so that the correct especially due to jitted code 2585 * mmaps. We cannot generate the buildid hit list and 2586 * inject the jit mmaps at the same time for now. 2587 */ 2588 inject.tool.ordering_requires_timestamps = true; 2589 } 2590 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) { 2591 inject.known_build_ids = 2592 perf_inject__parse_known_build_ids(known_build_ids); 2593 2594 if (inject.known_build_ids == NULL) { 2595 pr_err("Couldn't parse known build ids.\n"); 2596 goto out_delete; 2597 } 2598 } 2599 2600 #ifdef HAVE_JITDUMP 2601 if (inject.jit_mode) { 2602 inject.tool.mmap2 = perf_event__repipe_mmap2; 2603 inject.tool.mmap = perf_event__repipe_mmap; 2604 inject.tool.ordering_requires_timestamps = true; 2605 /* 2606 * JIT MMAP injection injects all MMAP events in one go, so it 2607 * does not obey finished_round semantics. 2608 */ 2609 inject.tool.finished_round = perf_event__drop_oe; 2610 } 2611 #endif 2612 ret = symbol__init(perf_session__env(inject.session)); 2613 if (ret < 0) 2614 goto out_delete; 2615 2616 ret = __cmd_inject(&inject); 2617 2618 guest_session__exit(&inject.guest_session); 2619 2620 out_delete: 2621 strlist__delete(inject.known_build_ids); 2622 zstd_fini(&(inject.session->zstd_data)); 2623 perf_session__delete(inject.session); 2624 out_close_output: 2625 if (!inject.in_place_update) 2626 perf_data__close(&inject.output); 2627 free(inject.itrace_synth_opts.vm_tm_corr_args); 2628 free(inject.event_copy); 2629 free(inject.guest_session.ev.event_buf); 2630 return ret; 2631 } 2632