1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include "util/dso.h" 13 #include "util/vdso.h" 14 #include "util/evlist.h" 15 #include "util/evsel.h" 16 #include "util/map.h" 17 #include "util/session.h" 18 #include "util/tool.h" 19 #include "util/debug.h" 20 #include "util/build-id.h" 21 #include "util/data.h" 22 #include "util/auxtrace.h" 23 #include "util/jit.h" 24 #include "util/string2.h" 25 #include "util/symbol.h" 26 #include "util/synthetic-events.h" 27 #include "util/thread.h" 28 #include "util/namespaces.h" 29 #include "util/util.h" 30 #include "util/tsc.h" 31 32 #include <internal/lib.h> 33 34 #include <linux/err.h> 35 #include <subcmd/parse-options.h> 36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38 #include <linux/list.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <linux/hash.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <inttypes.h> 46 47 struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char *event_buf; 51 }; 52 53 struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59 }; 60 61 struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67 }; 68 69 struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74 }; 75 76 struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104 }; 105 106 enum build_id_rewrite_style { 107 BID_RWS__NONE = 0, 108 BID_RWS__INJECT_HEADER_LAZY, 109 BID_RWS__INJECT_HEADER_ALL, 110 BID_RWS__MMAP2_BUILDID_ALL, 111 BID_RWS__MMAP2_BUILDID_LAZY, 112 }; 113 114 struct perf_inject { 115 struct perf_tool tool; 116 struct perf_session *session; 117 enum build_id_rewrite_style build_id_style; 118 bool sched_stat; 119 bool have_auxtrace; 120 bool strip; 121 bool jit_mode; 122 bool in_place_update; 123 bool in_place_update_dry_run; 124 bool copy_kcore_dir; 125 const char *input_name; 126 struct perf_data output; 127 u64 bytes_written; 128 u64 aux_id; 129 struct list_head samples; 130 struct itrace_synth_opts itrace_synth_opts; 131 char *event_copy; 132 struct perf_file_section secs[HEADER_FEAT_BITS]; 133 struct guest_session guest_session; 134 struct strlist *known_build_ids; 135 const struct evsel *mmap_evsel; 136 }; 137 138 struct event_entry { 139 struct list_head node; 140 u32 tid; 141 union perf_event event[]; 142 }; 143 144 static int tool__inject_build_id(const struct perf_tool *tool, 145 struct perf_sample *sample, 146 struct machine *machine, 147 const struct evsel *evsel, 148 __u16 misc, 149 const char *filename, 150 struct dso *dso, u32 flags); 151 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 152 struct perf_sample *sample, 153 struct machine *machine, 154 const struct evsel *evsel, 155 __u16 misc, 156 __u32 pid, __u32 tid, 157 __u64 start, __u64 len, __u64 pgoff, 158 struct dso *dso, 159 __u32 prot, __u32 flags, 160 const char *filename); 161 162 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 163 { 164 ssize_t size; 165 166 size = perf_data__write(&inject->output, buf, sz); 167 if (size < 0) 168 return -errno; 169 170 inject->bytes_written += size; 171 return 0; 172 } 173 174 static int perf_event__repipe_synth(const struct perf_tool *tool, 175 union perf_event *event) 176 177 { 178 struct perf_inject *inject = container_of(tool, struct perf_inject, 179 tool); 180 181 return output_bytes(inject, event, event->header.size); 182 } 183 184 static int perf_event__repipe_oe_synth(const struct perf_tool *tool, 185 union perf_event *event, 186 struct ordered_events *oe __maybe_unused) 187 { 188 return perf_event__repipe_synth(tool, event); 189 } 190 191 #ifdef HAVE_JITDUMP 192 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused, 193 union perf_event *event __maybe_unused, 194 struct ordered_events *oe __maybe_unused) 195 { 196 return 0; 197 } 198 #endif 199 200 static int perf_event__repipe_op2_synth(const struct perf_tool *tool, 201 struct perf_session *session __maybe_unused, 202 union perf_event *event) 203 { 204 return perf_event__repipe_synth(tool, event); 205 } 206 207 static int perf_event__repipe_op4_synth(const struct perf_tool *tool, 208 struct perf_session *session __maybe_unused, 209 union perf_event *event, 210 u64 data __maybe_unused, 211 const char *str __maybe_unused) 212 { 213 return perf_event__repipe_synth(tool, event); 214 } 215 216 static int perf_event__repipe_attr(const struct perf_tool *tool, 217 union perf_event *event, 218 struct evlist **pevlist) 219 { 220 struct perf_inject *inject = container_of(tool, struct perf_inject, 221 tool); 222 int ret; 223 224 ret = perf_event__process_attr(tool, event, pevlist); 225 if (ret) 226 return ret; 227 228 /* If the output isn't a pipe then the attributes will be written as part of the header. */ 229 if (!inject->output.is_pipe) 230 return 0; 231 232 return perf_event__repipe_synth(tool, event); 233 } 234 235 static int perf_event__repipe_event_update(const struct perf_tool *tool, 236 union perf_event *event, 237 struct evlist **pevlist __maybe_unused) 238 { 239 return perf_event__repipe_synth(tool, event); 240 } 241 242 #ifdef HAVE_AUXTRACE_SUPPORT 243 244 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 245 { 246 char buf[4096]; 247 ssize_t ssz; 248 int ret; 249 250 while (size > 0) { 251 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 252 if (ssz < 0) 253 return -errno; 254 ret = output_bytes(inject, buf, ssz); 255 if (ret) 256 return ret; 257 size -= ssz; 258 } 259 260 return 0; 261 } 262 263 static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool, 264 struct perf_session *session, 265 union perf_event *event) 266 { 267 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 268 int ret; 269 270 inject->have_auxtrace = true; 271 272 if (!inject->output.is_pipe) { 273 off_t offset; 274 275 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 276 if (offset == -1) 277 return -errno; 278 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 279 event, offset); 280 if (ret < 0) 281 return ret; 282 } 283 284 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 285 ret = output_bytes(inject, event, event->header.size); 286 if (ret < 0) 287 return ret; 288 ret = copy_bytes(inject, session->data, 289 event->auxtrace.size); 290 } else { 291 ret = output_bytes(inject, event, 292 event->header.size + event->auxtrace.size); 293 } 294 if (ret < 0) 295 return ret; 296 297 return event->auxtrace.size; 298 } 299 300 #else 301 302 static s64 303 perf_event__repipe_auxtrace(const struct perf_tool *tool __maybe_unused, 304 struct perf_session *session __maybe_unused, 305 union perf_event *event __maybe_unused) 306 { 307 pr_err("AUX area tracing not supported\n"); 308 return -EINVAL; 309 } 310 311 #endif 312 313 static int perf_event__repipe(const struct perf_tool *tool, 314 union perf_event *event, 315 struct perf_sample *sample __maybe_unused, 316 struct machine *machine __maybe_unused) 317 { 318 return perf_event__repipe_synth(tool, event); 319 } 320 321 static int perf_event__drop(const struct perf_tool *tool __maybe_unused, 322 union perf_event *event __maybe_unused, 323 struct perf_sample *sample __maybe_unused, 324 struct machine *machine __maybe_unused) 325 { 326 return 0; 327 } 328 329 static int perf_event__drop_aux(const struct perf_tool *tool, 330 union perf_event *event __maybe_unused, 331 struct perf_sample *sample, 332 struct machine *machine __maybe_unused) 333 { 334 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 335 336 if (!inject->aux_id) 337 inject->aux_id = sample->id; 338 339 return 0; 340 } 341 342 static union perf_event * 343 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 344 union perf_event *event, 345 struct perf_sample *sample) 346 { 347 size_t sz1 = sample->aux_sample.data - (void *)event; 348 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 349 union perf_event *ev; 350 351 if (inject->event_copy == NULL) { 352 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 353 if (!inject->event_copy) 354 return ERR_PTR(-ENOMEM); 355 } 356 ev = (union perf_event *)inject->event_copy; 357 if (sz1 > event->header.size || sz2 > event->header.size || 358 sz1 + sz2 > event->header.size || 359 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 360 return event; 361 362 memcpy(ev, event, sz1); 363 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 364 ev->header.size = sz1 + sz2; 365 ((u64 *)((void *)ev + sz1))[-1] = 0; 366 367 return ev; 368 } 369 370 typedef int (*inject_handler)(const struct perf_tool *tool, 371 union perf_event *event, 372 struct perf_sample *sample, 373 struct evsel *evsel, 374 struct machine *machine); 375 376 static int perf_event__repipe_sample(const struct perf_tool *tool, 377 union perf_event *event, 378 struct perf_sample *sample, 379 struct evsel *evsel, 380 struct machine *machine) 381 { 382 struct perf_inject *inject = container_of(tool, struct perf_inject, 383 tool); 384 385 if (evsel && evsel->handler) { 386 inject_handler f = evsel->handler; 387 return f(tool, event, sample, evsel, machine); 388 } 389 390 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 391 392 if (inject->itrace_synth_opts.set && sample->aux_sample.size) { 393 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 394 if (IS_ERR(event)) 395 return PTR_ERR(event); 396 } 397 398 return perf_event__repipe_synth(tool, event); 399 } 400 401 static struct dso *findnew_dso(int pid, int tid, const char *filename, 402 const struct dso_id *id, struct machine *machine) 403 { 404 struct thread *thread; 405 struct nsinfo *nsi = NULL; 406 struct nsinfo *nnsi; 407 struct dso *dso; 408 bool vdso; 409 410 thread = machine__findnew_thread(machine, pid, tid); 411 if (thread == NULL) { 412 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 413 return NULL; 414 } 415 416 vdso = is_vdso_map(filename); 417 nsi = nsinfo__get(thread__nsinfo(thread)); 418 419 if (vdso) { 420 /* The vdso maps are always on the host and not the 421 * container. Ensure that we don't use setns to look 422 * them up. 423 */ 424 nnsi = nsinfo__copy(nsi); 425 if (nnsi) { 426 nsinfo__put(nsi); 427 nsinfo__clear_need_setns(nnsi); 428 nsi = nnsi; 429 } 430 dso = machine__findnew_vdso(machine, thread); 431 } else { 432 dso = machine__findnew_dso_id(machine, filename, id); 433 } 434 435 if (dso) { 436 mutex_lock(dso__lock(dso)); 437 dso__set_nsinfo(dso, nsi); 438 mutex_unlock(dso__lock(dso)); 439 } else 440 nsinfo__put(nsi); 441 442 thread__put(thread); 443 return dso; 444 } 445 446 /* 447 * The evsel used for the sample ID for mmap events. Typically stashed when 448 * processing mmap events. If not stashed, search the evlist for the first mmap 449 * gathering event. 450 */ 451 static const struct evsel *inject__mmap_evsel(struct perf_inject *inject) 452 { 453 struct evsel *pos; 454 455 if (inject->mmap_evsel) 456 return inject->mmap_evsel; 457 458 evlist__for_each_entry(inject->session->evlist, pos) { 459 if (pos->core.attr.mmap) { 460 inject->mmap_evsel = pos; 461 return pos; 462 } 463 } 464 pr_err("No mmap events found\n"); 465 return NULL; 466 } 467 468 static int perf_event__repipe_common_mmap(const struct perf_tool *tool, 469 union perf_event *event, 470 struct perf_sample *sample, 471 struct machine *machine, 472 __u32 pid, __u32 tid, 473 __u64 start, __u64 len, __u64 pgoff, 474 __u32 flags, __u32 prot, 475 const char *filename, 476 const struct dso_id *dso_id, 477 int (*perf_event_process)(const struct perf_tool *tool, 478 union perf_event *event, 479 struct perf_sample *sample, 480 struct machine *machine)) 481 { 482 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 483 struct dso *dso = NULL; 484 bool dso_sought = false; 485 486 #ifdef HAVE_JITDUMP 487 if (inject->jit_mode) { 488 u64 n = 0; 489 int ret; 490 491 /* If jit marker, then inject jit mmaps and generate ELF images. */ 492 ret = jit_process(inject->session, &inject->output, machine, 493 filename, pid, tid, &n); 494 if (ret < 0) 495 return ret; 496 if (ret) { 497 inject->bytes_written += n; 498 return 0; 499 } 500 } 501 #endif 502 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 503 dso = findnew_dso(pid, tid, filename, dso_id, machine); 504 dso_sought = true; 505 if (dso) { 506 /* mark it not to inject build-id */ 507 dso__set_hit(dso); 508 } 509 } 510 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) { 511 if (!dso_sought) { 512 dso = findnew_dso(pid, tid, filename, dso_id, machine); 513 dso_sought = true; 514 } 515 516 if (dso && !dso__hit(dso)) { 517 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 518 519 if (evsel) { 520 dso__set_hit(dso); 521 tool__inject_build_id(tool, sample, machine, evsel, 522 /*misc=*/sample->cpumode, 523 filename, dso, flags); 524 } 525 } 526 } else { 527 int err; 528 529 /* 530 * Remember the evsel for lazy build id generation. It is used 531 * for the sample id header type. 532 */ 533 if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 534 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) && 535 !inject->mmap_evsel) 536 inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event); 537 538 /* Create the thread, map, etc. Not done for the unordered inject all case. */ 539 err = perf_event_process(tool, event, sample, machine); 540 541 if (err) { 542 dso__put(dso); 543 return err; 544 } 545 } 546 if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) && 547 !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 548 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 549 550 if (evsel && !dso_sought) { 551 dso = findnew_dso(pid, tid, filename, dso_id, machine); 552 dso_sought = true; 553 } 554 if (evsel && dso && 555 !tool__inject_mmap2_build_id(tool, sample, machine, evsel, 556 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID, 557 pid, tid, start, len, pgoff, 558 dso, 559 prot, flags, 560 filename)) { 561 /* Injected mmap2 so no need to repipe. */ 562 dso__put(dso); 563 return 0; 564 } 565 } 566 dso__put(dso); 567 if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) 568 return 0; 569 570 return perf_event__repipe(tool, event, sample, machine); 571 } 572 573 static int perf_event__repipe_mmap(const struct perf_tool *tool, 574 union perf_event *event, 575 struct perf_sample *sample, 576 struct machine *machine) 577 { 578 return perf_event__repipe_common_mmap( 579 tool, event, sample, machine, 580 event->mmap.pid, event->mmap.tid, 581 event->mmap.start, event->mmap.len, event->mmap.pgoff, 582 /*flags=*/0, PROT_EXEC, 583 event->mmap.filename, /*dso_id=*/NULL, 584 perf_event__process_mmap); 585 } 586 587 static int perf_event__repipe_mmap2(const struct perf_tool *tool, 588 union perf_event *event, 589 struct perf_sample *sample, 590 struct machine *machine) 591 { 592 struct dso_id id = dso_id_empty; 593 594 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 595 build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); 596 } else { 597 id.maj = event->mmap2.maj; 598 id.min = event->mmap2.min; 599 id.ino = event->mmap2.ino; 600 id.ino_generation = event->mmap2.ino_generation; 601 id.mmap2_valid = true; 602 id.mmap2_ino_generation_valid = true; 603 } 604 605 return perf_event__repipe_common_mmap( 606 tool, event, sample, machine, 607 event->mmap2.pid, event->mmap2.tid, 608 event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, 609 event->mmap2.flags, event->mmap2.prot, 610 event->mmap2.filename, &id, 611 perf_event__process_mmap2); 612 } 613 614 static int perf_event__repipe_fork(const struct perf_tool *tool, 615 union perf_event *event, 616 struct perf_sample *sample, 617 struct machine *machine) 618 { 619 int err; 620 621 err = perf_event__process_fork(tool, event, sample, machine); 622 perf_event__repipe(tool, event, sample, machine); 623 624 return err; 625 } 626 627 static int perf_event__repipe_comm(const struct perf_tool *tool, 628 union perf_event *event, 629 struct perf_sample *sample, 630 struct machine *machine) 631 { 632 int err; 633 634 err = perf_event__process_comm(tool, event, sample, machine); 635 perf_event__repipe(tool, event, sample, machine); 636 637 return err; 638 } 639 640 static int perf_event__repipe_namespaces(const struct perf_tool *tool, 641 union perf_event *event, 642 struct perf_sample *sample, 643 struct machine *machine) 644 { 645 int err = perf_event__process_namespaces(tool, event, sample, machine); 646 647 perf_event__repipe(tool, event, sample, machine); 648 649 return err; 650 } 651 652 static int perf_event__repipe_exit(const struct perf_tool *tool, 653 union perf_event *event, 654 struct perf_sample *sample, 655 struct machine *machine) 656 { 657 int err; 658 659 err = perf_event__process_exit(tool, event, sample, machine); 660 perf_event__repipe(tool, event, sample, machine); 661 662 return err; 663 } 664 665 #ifdef HAVE_LIBTRACEEVENT 666 static int perf_event__repipe_tracing_data(const struct perf_tool *tool, 667 struct perf_session *session, 668 union perf_event *event) 669 { 670 perf_event__repipe_synth(tool, event); 671 672 return perf_event__process_tracing_data(tool, session, event); 673 } 674 #endif 675 676 static int dso__read_build_id(struct dso *dso) 677 { 678 struct nscookie nsc; 679 struct build_id bid = { .size = 0, }; 680 681 if (dso__has_build_id(dso)) 682 return 0; 683 684 mutex_lock(dso__lock(dso)); 685 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 686 if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) 687 dso__set_build_id(dso, &bid); 688 else if (dso__nsinfo(dso)) { 689 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); 690 691 if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) 692 dso__set_build_id(dso, &bid); 693 free(new_name); 694 } 695 nsinfo__mountns_exit(&nsc); 696 mutex_unlock(dso__lock(dso)); 697 698 return dso__has_build_id(dso) ? 0 : -1; 699 } 700 701 static struct strlist *perf_inject__parse_known_build_ids( 702 const char *known_build_ids_string) 703 { 704 struct str_node *pos, *tmp; 705 struct strlist *known_build_ids; 706 int bid_len; 707 708 known_build_ids = strlist__new(known_build_ids_string, NULL); 709 if (known_build_ids == NULL) 710 return NULL; 711 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 712 const char *build_id, *dso_name; 713 714 build_id = skip_spaces(pos->s); 715 dso_name = strchr(build_id, ' '); 716 if (dso_name == NULL) { 717 strlist__remove(known_build_ids, pos); 718 continue; 719 } 720 bid_len = dso_name - pos->s; 721 dso_name = skip_spaces(dso_name); 722 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 723 strlist__remove(known_build_ids, pos); 724 continue; 725 } 726 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 727 if (!isxdigit(build_id[2 * ix]) || 728 !isxdigit(build_id[2 * ix + 1])) { 729 strlist__remove(known_build_ids, pos); 730 break; 731 } 732 } 733 } 734 return known_build_ids; 735 } 736 737 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 738 struct dso *dso) 739 { 740 struct str_node *pos; 741 742 strlist__for_each_entry(pos, inject->known_build_ids) { 743 struct build_id bid; 744 const char *build_id, *dso_name; 745 size_t bid_len; 746 747 build_id = skip_spaces(pos->s); 748 dso_name = strchr(build_id, ' '); 749 bid_len = dso_name - pos->s; 750 if (bid_len > sizeof(bid.data)) 751 bid_len = sizeof(bid.data); 752 dso_name = skip_spaces(dso_name); 753 if (strcmp(dso__long_name(dso), dso_name)) 754 continue; 755 for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) { 756 bid.data[ix] = (hex(build_id[2 * ix]) << 4 | 757 hex(build_id[2 * ix + 1])); 758 } 759 bid.size = bid_len / 2; 760 dso__set_build_id(dso, &bid); 761 return true; 762 } 763 return false; 764 } 765 766 static int tool__inject_build_id(const struct perf_tool *tool, 767 struct perf_sample *sample, 768 struct machine *machine, 769 const struct evsel *evsel, 770 __u16 misc, 771 const char *filename, 772 struct dso *dso, u32 flags) 773 { 774 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 775 int err; 776 777 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 778 return 0; 779 if (is_no_dso_memory(filename)) 780 return 0; 781 782 if (inject->known_build_ids != NULL && 783 perf_inject__lookup_known_build_id(inject, dso)) 784 return 1; 785 786 if (dso__read_build_id(dso) < 0) { 787 pr_debug("no build_id found for %s\n", filename); 788 return -1; 789 } 790 791 err = perf_event__synthesize_build_id(tool, sample, machine, 792 perf_event__repipe, 793 evsel, misc, dso__bid(dso), 794 filename); 795 if (err) { 796 pr_err("Can't synthesize build_id event for %s\n", filename); 797 return -1; 798 } 799 800 return 0; 801 } 802 803 static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 804 struct perf_sample *sample, 805 struct machine *machine, 806 const struct evsel *evsel, 807 __u16 misc, 808 __u32 pid, __u32 tid, 809 __u64 start, __u64 len, __u64 pgoff, 810 struct dso *dso, 811 __u32 prot, __u32 flags, 812 const char *filename) 813 { 814 int err; 815 816 /* Return to repipe anonymous maps. */ 817 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 818 return 1; 819 if (is_no_dso_memory(filename)) 820 return 1; 821 822 if (dso__read_build_id(dso)) { 823 pr_debug("no build_id found for %s\n", filename); 824 return -1; 825 } 826 827 err = perf_event__synthesize_mmap2_build_id(tool, sample, machine, 828 perf_event__repipe, 829 evsel, 830 misc, pid, tid, 831 start, len, pgoff, 832 dso__bid(dso), 833 prot, flags, 834 filename); 835 if (err) { 836 pr_err("Can't synthesize build_id event for %s\n", filename); 837 return -1; 838 } 839 return 0; 840 } 841 842 static int mark_dso_hit(const struct perf_inject *inject, 843 const struct perf_tool *tool, 844 struct perf_sample *sample, 845 struct machine *machine, 846 const struct evsel *mmap_evsel, 847 struct map *map, bool sample_in_dso) 848 { 849 struct dso *dso; 850 u16 misc = sample->cpumode; 851 852 if (!map) 853 return 0; 854 855 if (!sample_in_dso) { 856 u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL | 857 PERF_RECORD_MISC_GUEST_USER; 858 859 if ((misc & guest_mask) != 0) { 860 misc &= PERF_RECORD_MISC_HYPERVISOR; 861 misc |= __map__is_kernel(map) 862 ? PERF_RECORD_MISC_GUEST_KERNEL 863 : PERF_RECORD_MISC_GUEST_USER; 864 } else { 865 misc &= PERF_RECORD_MISC_HYPERVISOR; 866 misc |= __map__is_kernel(map) 867 ? PERF_RECORD_MISC_KERNEL 868 : PERF_RECORD_MISC_USER; 869 } 870 } 871 dso = map__dso(map); 872 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 873 if (dso && !dso__hit(dso)) { 874 dso__set_hit(dso); 875 tool__inject_build_id(tool, sample, machine, 876 mmap_evsel, misc, dso__long_name(dso), dso, 877 map__flags(map)); 878 } 879 } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 880 if (!map__hit(map)) { 881 const struct build_id null_bid = { .size = 0 }; 882 const struct build_id *bid = dso ? dso__bid(dso) : &null_bid; 883 const char *filename = dso ? dso__long_name(dso) : ""; 884 885 map__set_hit(map); 886 perf_event__synthesize_mmap2_build_id(tool, sample, machine, 887 perf_event__repipe, 888 mmap_evsel, 889 misc, 890 sample->pid, sample->tid, 891 map__start(map), 892 map__end(map) - map__start(map), 893 map__pgoff(map), 894 bid, 895 map__prot(map), 896 map__flags(map), 897 filename); 898 } 899 } 900 return 0; 901 } 902 903 struct mark_dso_hit_args { 904 const struct perf_inject *inject; 905 const struct perf_tool *tool; 906 struct perf_sample *sample; 907 struct machine *machine; 908 const struct evsel *mmap_evsel; 909 }; 910 911 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data) 912 { 913 struct mark_dso_hit_args *args = data; 914 struct map *map = node->ms.map; 915 916 return mark_dso_hit(args->inject, args->tool, args->sample, args->machine, 917 args->mmap_evsel, map, /*sample_in_dso=*/false); 918 } 919 920 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, 921 struct perf_sample *sample, 922 struct evsel *evsel __maybe_unused, 923 struct machine *machine) 924 { 925 struct addr_location al; 926 struct thread *thread; 927 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 928 struct mark_dso_hit_args args = { 929 .inject = inject, 930 .tool = tool, 931 /* 932 * Use the parsed sample data of the sample event, which will 933 * have a later timestamp than the mmap event. 934 */ 935 .sample = sample, 936 .machine = machine, 937 .mmap_evsel = inject__mmap_evsel(inject), 938 }; 939 940 addr_location__init(&al); 941 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 942 if (thread == NULL) { 943 pr_err("problem processing %d event, skipping it.\n", 944 event->header.type); 945 goto repipe; 946 } 947 948 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 949 mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map, 950 /*sample_in_dso=*/true); 951 } 952 953 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, 954 /*symbols=*/false, mark_dso_hit_callback, &args); 955 956 thread__put(thread); 957 repipe: 958 perf_event__repipe(tool, event, sample, machine); 959 addr_location__exit(&al); 960 return 0; 961 } 962 963 static int perf_inject__sched_process_exit(const struct perf_tool *tool, 964 union perf_event *event __maybe_unused, 965 struct perf_sample *sample, 966 struct evsel *evsel __maybe_unused, 967 struct machine *machine __maybe_unused) 968 { 969 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 970 struct event_entry *ent; 971 972 list_for_each_entry(ent, &inject->samples, node) { 973 if (sample->tid == ent->tid) { 974 list_del_init(&ent->node); 975 free(ent); 976 break; 977 } 978 } 979 980 return 0; 981 } 982 983 static int perf_inject__sched_switch(const struct perf_tool *tool, 984 union perf_event *event, 985 struct perf_sample *sample, 986 struct evsel *evsel, 987 struct machine *machine) 988 { 989 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 990 struct event_entry *ent; 991 992 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 993 994 ent = malloc(event->header.size + sizeof(struct event_entry)); 995 if (ent == NULL) { 996 color_fprintf(stderr, PERF_COLOR_RED, 997 "Not enough memory to process sched switch event!"); 998 return -1; 999 } 1000 1001 ent->tid = sample->tid; 1002 memcpy(&ent->event, event, event->header.size); 1003 list_add(&ent->node, &inject->samples); 1004 return 0; 1005 } 1006 1007 #ifdef HAVE_LIBTRACEEVENT 1008 static int perf_inject__sched_stat(const struct perf_tool *tool, 1009 union perf_event *event __maybe_unused, 1010 struct perf_sample *sample, 1011 struct evsel *evsel, 1012 struct machine *machine) 1013 { 1014 struct event_entry *ent; 1015 union perf_event *event_sw; 1016 struct perf_sample sample_sw; 1017 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1018 u32 pid = evsel__intval(evsel, sample, "pid"); 1019 1020 list_for_each_entry(ent, &inject->samples, node) { 1021 if (pid == ent->tid) 1022 goto found; 1023 } 1024 1025 return 0; 1026 found: 1027 event_sw = &ent->event[0]; 1028 evsel__parse_sample(evsel, event_sw, &sample_sw); 1029 1030 sample_sw.period = sample->period; 1031 sample_sw.time = sample->time; 1032 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 1033 evsel->core.attr.read_format, &sample_sw); 1034 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 1035 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 1036 } 1037 #endif 1038 1039 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 1040 { 1041 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 1042 return NULL; 1043 return &gs->vcpu[vcpu]; 1044 } 1045 1046 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 1047 { 1048 ssize_t ret = writen(gs->tmp_fd, buf, sz); 1049 1050 return ret < 0 ? ret : 0; 1051 } 1052 1053 static int guest_session__repipe(const struct perf_tool *tool, 1054 union perf_event *event, 1055 struct perf_sample *sample __maybe_unused, 1056 struct machine *machine __maybe_unused) 1057 { 1058 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1059 1060 return guest_session__output_bytes(gs, event, event->header.size); 1061 } 1062 1063 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 1064 { 1065 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 1066 int hash; 1067 1068 if (!guest_tid) 1069 return -ENOMEM; 1070 1071 guest_tid->tid = tid; 1072 guest_tid->vcpu = vcpu; 1073 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 1074 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 1075 1076 return 0; 1077 } 1078 1079 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 1080 union perf_event *event, 1081 u64 offset __maybe_unused, void *data) 1082 { 1083 struct guest_session *gs = data; 1084 unsigned int vcpu; 1085 struct guest_vcpu *guest_vcpu; 1086 int ret; 1087 1088 if (event->header.type != PERF_RECORD_COMM || 1089 event->comm.pid != gs->machine_pid) 1090 return 0; 1091 1092 /* 1093 * QEMU option -name debug-threads=on, causes thread names formatted as 1094 * below, although it is not an ABI. Also libvirt seems to use this by 1095 * default. Here we rely on it to tell us which thread is which VCPU. 1096 */ 1097 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 1098 if (ret <= 0) 1099 return ret; 1100 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 1101 event->comm.tid, event->comm.comm, vcpu); 1102 if (vcpu > INT_MAX) { 1103 pr_err("Invalid VCPU %u\n", vcpu); 1104 return -EINVAL; 1105 } 1106 guest_vcpu = guest_session__vcpu(gs, vcpu); 1107 if (!guest_vcpu) 1108 return -ENOMEM; 1109 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 1110 pr_err("Fatal error: Two threads found with the same VCPU\n"); 1111 return -EINVAL; 1112 } 1113 guest_vcpu->tid = event->comm.tid; 1114 1115 return guest_session__map_tid(gs, event->comm.tid, vcpu); 1116 } 1117 1118 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 1119 { 1120 return perf_session__peek_events(session, session->header.data_offset, 1121 session->header.data_size, 1122 host_peek_vm_comms_cb, gs); 1123 } 1124 1125 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 1126 { 1127 return evlist__id2sid(evlist, id); 1128 } 1129 1130 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 1131 { 1132 do { 1133 gs->highest_id += 1; 1134 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 1135 1136 return gs->highest_id; 1137 } 1138 1139 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 1140 { 1141 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 1142 int hash; 1143 1144 if (!guest_id) 1145 return -ENOMEM; 1146 1147 guest_id->id = id; 1148 guest_id->host_id = host_id; 1149 guest_id->vcpu = vcpu; 1150 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 1151 hlist_add_head(&guest_id->node, &gs->heads[hash]); 1152 1153 return 0; 1154 } 1155 1156 static u64 evlist__find_highest_id(struct evlist *evlist) 1157 { 1158 struct evsel *evsel; 1159 u64 highest_id = 1; 1160 1161 evlist__for_each_entry(evlist, evsel) { 1162 u32 j; 1163 1164 for (j = 0; j < evsel->core.ids; j++) { 1165 u64 id = evsel->core.id[j]; 1166 1167 if (id > highest_id) 1168 highest_id = id; 1169 } 1170 } 1171 1172 return highest_id; 1173 } 1174 1175 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 1176 { 1177 struct evlist *evlist = gs->session->evlist; 1178 struct evsel *evsel; 1179 int ret; 1180 1181 evlist__for_each_entry(evlist, evsel) { 1182 u32 j; 1183 1184 for (j = 0; j < evsel->core.ids; j++) { 1185 struct perf_sample_id *sid; 1186 u64 host_id; 1187 u64 id; 1188 1189 id = evsel->core.id[j]; 1190 sid = evlist__id2sid(evlist, id); 1191 if (!sid || sid->cpu.cpu == -1) 1192 continue; 1193 host_id = guest_session__allocate_new_id(gs, host_evlist); 1194 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1195 if (ret) 1196 return ret; 1197 } 1198 } 1199 1200 return 0; 1201 } 1202 1203 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1204 { 1205 struct hlist_head *head; 1206 struct guest_id *guest_id; 1207 int hash; 1208 1209 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1210 head = &gs->heads[hash]; 1211 1212 hlist_for_each_entry(guest_id, head, node) 1213 if (guest_id->id == id) 1214 return guest_id; 1215 1216 return NULL; 1217 } 1218 1219 static int process_attr(const struct perf_tool *tool, union perf_event *event, 1220 struct perf_sample *sample __maybe_unused, 1221 struct machine *machine __maybe_unused) 1222 { 1223 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1224 1225 return perf_event__process_attr(tool, event, &inject->session->evlist); 1226 } 1227 1228 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1229 { 1230 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1231 struct perf_event_attr attr = evsel->core.attr; 1232 u64 *id_array; 1233 u32 *vcpu_array; 1234 int ret = -ENOMEM; 1235 u32 i; 1236 1237 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1238 if (!id_array) 1239 return -ENOMEM; 1240 1241 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1242 if (!vcpu_array) 1243 goto out; 1244 1245 for (i = 0; i < evsel->core.ids; i++) { 1246 u64 id = evsel->core.id[i]; 1247 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1248 1249 if (!guest_id) { 1250 pr_err("Failed to find guest id %"PRIu64"\n", id); 1251 ret = -EINVAL; 1252 goto out; 1253 } 1254 id_array[i] = guest_id->host_id; 1255 vcpu_array[i] = guest_id->vcpu; 1256 } 1257 1258 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1259 attr.exclude_host = 1; 1260 attr.exclude_guest = 0; 1261 1262 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1263 id_array, process_attr); 1264 if (ret) 1265 pr_err("Failed to add guest attr.\n"); 1266 1267 for (i = 0; i < evsel->core.ids; i++) { 1268 struct perf_sample_id *sid; 1269 u32 vcpu = vcpu_array[i]; 1270 1271 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1272 /* Guest event is per-thread from the host point of view */ 1273 sid->cpu.cpu = -1; 1274 sid->tid = gs->vcpu[vcpu].tid; 1275 sid->machine_pid = gs->machine_pid; 1276 sid->vcpu.cpu = vcpu; 1277 } 1278 out: 1279 free(vcpu_array); 1280 free(id_array); 1281 return ret; 1282 } 1283 1284 static int guest_session__add_attrs(struct guest_session *gs) 1285 { 1286 struct evlist *evlist = gs->session->evlist; 1287 struct evsel *evsel; 1288 int ret; 1289 1290 evlist__for_each_entry(evlist, evsel) { 1291 ret = guest_session__add_attr(gs, evsel); 1292 if (ret) 1293 return ret; 1294 } 1295 1296 return 0; 1297 } 1298 1299 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1300 { 1301 struct perf_session *session = inject->session; 1302 struct evlist *evlist = session->evlist; 1303 struct machine *machine = &session->machines.host; 1304 size_t from = evlist->core.nr_entries - new_cnt; 1305 1306 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1307 evlist, machine, from); 1308 } 1309 1310 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1311 { 1312 struct hlist_head *head; 1313 struct guest_tid *guest_tid; 1314 int hash; 1315 1316 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1317 head = &gs->tids[hash]; 1318 1319 hlist_for_each_entry(guest_tid, head, node) 1320 if (guest_tid->tid == tid) 1321 return guest_tid; 1322 1323 return NULL; 1324 } 1325 1326 static bool dso__is_in_kernel_space(struct dso *dso) 1327 { 1328 if (dso__is_vdso(dso)) 1329 return false; 1330 1331 return dso__is_kcore(dso) || 1332 dso__kernel(dso) || 1333 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1334 } 1335 1336 static u64 evlist__first_id(struct evlist *evlist) 1337 { 1338 struct evsel *evsel; 1339 1340 evlist__for_each_entry(evlist, evsel) { 1341 if (evsel->core.ids) 1342 return evsel->core.id[0]; 1343 } 1344 return 0; 1345 } 1346 1347 static int process_build_id(const struct perf_tool *tool, 1348 union perf_event *event, 1349 struct perf_sample *sample __maybe_unused, 1350 struct machine *machine __maybe_unused) 1351 { 1352 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1353 1354 return perf_event__process_build_id(tool, inject->session, event); 1355 } 1356 1357 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1358 { 1359 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1360 struct perf_sample synth_sample = { 1361 .pid = -1, 1362 .tid = -1, 1363 .time = -1, 1364 .stream_id = -1, 1365 .cpu = -1, 1366 .period = 1, 1367 .cpumode = dso__is_in_kernel_space(dso) 1368 ? PERF_RECORD_MISC_GUEST_KERNEL 1369 : PERF_RECORD_MISC_GUEST_USER, 1370 }; 1371 1372 if (!machine) 1373 return -ENOMEM; 1374 1375 dso__set_hit(dso); 1376 1377 return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine, 1378 process_build_id, inject__mmap_evsel(inject), 1379 /*misc=*/synth_sample.cpumode, 1380 dso__bid(dso), dso__long_name(dso)); 1381 } 1382 1383 static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1384 { 1385 struct guest_session *gs = data; 1386 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1387 1388 if (!dso__has_build_id(dso)) 1389 return 0; 1390 1391 return synthesize_build_id(inject, dso, gs->machine_pid); 1392 1393 } 1394 1395 static int guest_session__add_build_ids(struct guest_session *gs) 1396 { 1397 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1398 1399 /* Build IDs will be put in the Build ID feature section */ 1400 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1401 1402 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1403 guest_session__add_build_ids_cb, 1404 gs); 1405 } 1406 1407 static int guest_session__ksymbol_event(const struct perf_tool *tool, 1408 union perf_event *event, 1409 struct perf_sample *sample __maybe_unused, 1410 struct machine *machine __maybe_unused) 1411 { 1412 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1413 1414 /* Only support out-of-line i.e. no BPF support */ 1415 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1416 return 0; 1417 1418 return guest_session__output_bytes(gs, event, event->header.size); 1419 } 1420 1421 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1422 { 1423 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1424 struct perf_session *session; 1425 int ret; 1426 1427 /* Only these events will be injected */ 1428 gs->tool.mmap = guest_session__repipe; 1429 gs->tool.mmap2 = guest_session__repipe; 1430 gs->tool.comm = guest_session__repipe; 1431 gs->tool.fork = guest_session__repipe; 1432 gs->tool.exit = guest_session__repipe; 1433 gs->tool.lost = guest_session__repipe; 1434 gs->tool.context_switch = guest_session__repipe; 1435 gs->tool.ksymbol = guest_session__ksymbol_event; 1436 gs->tool.text_poke = guest_session__repipe; 1437 /* 1438 * Processing a build ID creates a struct dso with that build ID. Later, 1439 * all guest dsos are iterated and the build IDs processed into the host 1440 * session where they will be output to the Build ID feature section 1441 * when the perf.data file header is written. 1442 */ 1443 gs->tool.build_id = perf_event__process_build_id; 1444 /* Process the id index to know what VCPU an ID belongs to */ 1445 gs->tool.id_index = perf_event__process_id_index; 1446 1447 gs->tool.ordered_events = true; 1448 gs->tool.ordering_requires_timestamps = true; 1449 1450 gs->data.path = name; 1451 gs->data.force = force; 1452 gs->data.mode = PERF_DATA_MODE_READ; 1453 1454 session = perf_session__new(&gs->data, &gs->tool); 1455 if (IS_ERR(session)) 1456 return PTR_ERR(session); 1457 gs->session = session; 1458 1459 /* 1460 * Initial events have zero'd ID samples. Get default ID sample size 1461 * used for removing them. 1462 */ 1463 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1464 /* And default ID for adding back a host-compatible ID sample */ 1465 gs->dflt_id = evlist__first_id(session->evlist); 1466 if (!gs->dflt_id) { 1467 pr_err("Guest data has no sample IDs"); 1468 return -EINVAL; 1469 } 1470 1471 /* Temporary file for guest events */ 1472 gs->tmp_file_name = strdup(tmp_file_name); 1473 if (!gs->tmp_file_name) 1474 return -ENOMEM; 1475 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1476 if (gs->tmp_fd < 0) 1477 return -errno; 1478 1479 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1480 pr_warning("Guest session decompression initialization failed.\n"); 1481 1482 /* 1483 * perf does not support processing 2 sessions simultaneously, so output 1484 * guest events to a temporary file. 1485 */ 1486 ret = perf_session__process_events(gs->session); 1487 if (ret) 1488 return ret; 1489 1490 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1491 return -errno; 1492 1493 return 0; 1494 } 1495 1496 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1497 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1498 { 1499 struct hlist_node *pos, *n; 1500 size_t i; 1501 1502 for (i = 0; i < hlist_sz; ++i) { 1503 hlist_for_each_safe(pos, n, &heads[i]) { 1504 hlist_del(pos); 1505 free(pos); 1506 } 1507 } 1508 } 1509 1510 static void guest_session__exit(struct guest_session *gs) 1511 { 1512 if (gs->session) { 1513 perf_session__delete(gs->session); 1514 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1515 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1516 } 1517 if (gs->tmp_file_name) { 1518 if (gs->tmp_fd >= 0) 1519 close(gs->tmp_fd); 1520 unlink(gs->tmp_file_name); 1521 zfree(&gs->tmp_file_name); 1522 } 1523 zfree(&gs->vcpu); 1524 zfree(&gs->perf_data_file); 1525 } 1526 1527 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1528 { 1529 tc->time_shift = time_conv->time_shift; 1530 tc->time_mult = time_conv->time_mult; 1531 tc->time_zero = time_conv->time_zero; 1532 tc->time_cycles = time_conv->time_cycles; 1533 tc->time_mask = time_conv->time_mask; 1534 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1535 tc->cap_user_time_short = time_conv->cap_user_time_short; 1536 } 1537 1538 static void guest_session__get_tc(struct guest_session *gs) 1539 { 1540 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1541 1542 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1543 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1544 } 1545 1546 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1547 { 1548 u64 tsc; 1549 1550 if (!guest_time) { 1551 *host_time = 0; 1552 return; 1553 } 1554 1555 if (gs->guest_tc.cap_user_time_zero) 1556 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1557 else 1558 tsc = guest_time; 1559 1560 /* 1561 * This is the correct order of operations for x86 if the TSC Offset and 1562 * Multiplier values are used. 1563 */ 1564 tsc -= gs->time_offset; 1565 tsc /= gs->time_scale; 1566 1567 if (gs->host_tc.cap_user_time_zero) 1568 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1569 else 1570 *host_time = tsc; 1571 } 1572 1573 static int guest_session__fetch(struct guest_session *gs) 1574 { 1575 void *buf; 1576 struct perf_event_header *hdr; 1577 size_t hdr_sz = sizeof(*hdr); 1578 ssize_t ret; 1579 1580 buf = gs->ev.event_buf; 1581 if (!buf) { 1582 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1583 if (!buf) 1584 return -ENOMEM; 1585 gs->ev.event_buf = buf; 1586 } 1587 hdr = buf; 1588 ret = readn(gs->tmp_fd, buf, hdr_sz); 1589 if (ret < 0) 1590 return ret; 1591 1592 if (!ret) { 1593 /* Zero size means EOF */ 1594 hdr->size = 0; 1595 return 0; 1596 } 1597 1598 buf += hdr_sz; 1599 1600 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1601 if (ret < 0) 1602 return ret; 1603 1604 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1605 gs->ev.sample.time = 0; 1606 1607 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1608 pr_err("Unexpected type fetching guest event"); 1609 return 0; 1610 } 1611 1612 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1613 if (ret) { 1614 pr_err("Parse failed fetching guest event"); 1615 return ret; 1616 } 1617 1618 if (!gs->have_tc) { 1619 guest_session__get_tc(gs); 1620 gs->have_tc = true; 1621 } 1622 1623 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1624 1625 return 0; 1626 } 1627 1628 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1629 const struct perf_sample *sample) 1630 { 1631 struct evsel *evsel; 1632 void *array; 1633 int ret; 1634 1635 evsel = evlist__id2evsel(evlist, sample->id); 1636 array = ev; 1637 1638 if (!evsel) { 1639 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1640 return -EINVAL; 1641 } 1642 1643 array += ev->header.size; 1644 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1645 if (ret < 0) 1646 return ret; 1647 1648 if (ret & 7) { 1649 pr_err("Bad id sample size %d\n", ret); 1650 return -EINVAL; 1651 } 1652 1653 ev->header.size += ret; 1654 1655 return 0; 1656 } 1657 1658 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1659 { 1660 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1661 int ret; 1662 1663 if (!gs->ready) 1664 return 0; 1665 1666 while (1) { 1667 struct perf_sample *sample; 1668 struct guest_id *guest_id; 1669 union perf_event *ev; 1670 u16 id_hdr_size; 1671 u8 cpumode; 1672 u64 id; 1673 1674 if (!gs->fetched) { 1675 ret = guest_session__fetch(gs); 1676 if (ret) 1677 return ret; 1678 gs->fetched = true; 1679 } 1680 1681 ev = gs->ev.event; 1682 sample = &gs->ev.sample; 1683 1684 if (!ev->header.size) 1685 return 0; /* EOF */ 1686 1687 if (sample->time > timestamp) 1688 return 0; 1689 1690 /* Change cpumode to guest */ 1691 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1692 if (cpumode & PERF_RECORD_MISC_USER) 1693 cpumode = PERF_RECORD_MISC_GUEST_USER; 1694 else 1695 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1696 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1697 ev->header.misc |= cpumode; 1698 1699 id = sample->id; 1700 if (!id) { 1701 id = gs->dflt_id; 1702 id_hdr_size = gs->dflt_id_hdr_size; 1703 } else { 1704 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1705 1706 id_hdr_size = evsel__id_hdr_size(evsel); 1707 } 1708 1709 if (id_hdr_size & 7) { 1710 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1711 return -EINVAL; 1712 } 1713 1714 if (ev->header.size & 7) { 1715 pr_err("Bad event size %u\n", ev->header.size); 1716 return -EINVAL; 1717 } 1718 1719 /* Remove guest id sample */ 1720 ev->header.size -= id_hdr_size; 1721 1722 if (ev->header.size & 7) { 1723 pr_err("Bad raw event size %u\n", ev->header.size); 1724 return -EINVAL; 1725 } 1726 1727 guest_id = guest_session__lookup_id(gs, id); 1728 if (!guest_id) { 1729 pr_err("Guest event with unknown id %llu\n", 1730 (unsigned long long)id); 1731 return -EINVAL; 1732 } 1733 1734 /* Change to host ID to avoid conflicting ID values */ 1735 sample->id = guest_id->host_id; 1736 sample->stream_id = guest_id->host_id; 1737 1738 if (sample->cpu != (u32)-1) { 1739 if (sample->cpu >= gs->vcpu_cnt) { 1740 pr_err("Guest event with unknown VCPU %u\n", 1741 sample->cpu); 1742 return -EINVAL; 1743 } 1744 /* Change to host CPU instead of guest VCPU */ 1745 sample->cpu = gs->vcpu[sample->cpu].cpu; 1746 } 1747 1748 /* New id sample with new ID and CPU */ 1749 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1750 if (ret) 1751 return ret; 1752 1753 if (ev->header.size & 7) { 1754 pr_err("Bad new event size %u\n", ev->header.size); 1755 return -EINVAL; 1756 } 1757 1758 gs->fetched = false; 1759 1760 ret = output_bytes(inject, ev, ev->header.size); 1761 if (ret) 1762 return ret; 1763 } 1764 } 1765 1766 static int guest_session__flush_events(struct guest_session *gs) 1767 { 1768 return guest_session__inject_events(gs, -1); 1769 } 1770 1771 static int host__repipe(const struct perf_tool *tool, 1772 union perf_event *event, 1773 struct perf_sample *sample, 1774 struct machine *machine) 1775 { 1776 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1777 int ret; 1778 1779 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1780 if (ret) 1781 return ret; 1782 1783 return perf_event__repipe(tool, event, sample, machine); 1784 } 1785 1786 static int host__finished_init(const struct perf_tool *tool, struct perf_session *session, 1787 union perf_event *event) 1788 { 1789 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1790 struct guest_session *gs = &inject->guest_session; 1791 int ret; 1792 1793 /* 1794 * Peek through host COMM events to find QEMU threads and the VCPU they 1795 * are running. 1796 */ 1797 ret = host_peek_vm_comms(session, gs); 1798 if (ret) 1799 return ret; 1800 1801 if (!gs->vcpu_cnt) { 1802 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1803 return -EINVAL; 1804 } 1805 1806 /* 1807 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1808 */ 1809 gs->highest_id = evlist__find_highest_id(session->evlist); 1810 ret = guest_session__map_ids(gs, session->evlist); 1811 if (ret) 1812 return ret; 1813 1814 ret = guest_session__add_attrs(gs); 1815 if (ret) 1816 return ret; 1817 1818 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1819 if (ret) { 1820 pr_err("Failed to synthesize id_index\n"); 1821 return ret; 1822 } 1823 1824 ret = guest_session__add_build_ids(gs); 1825 if (ret) { 1826 pr_err("Failed to add guest build IDs\n"); 1827 return ret; 1828 } 1829 1830 gs->ready = true; 1831 1832 ret = guest_session__inject_events(gs, 0); 1833 if (ret) 1834 return ret; 1835 1836 return perf_event__repipe_op2_synth(tool, session, event); 1837 } 1838 1839 /* 1840 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1841 * which flushes host events to file up until the last flush time. Then inject 1842 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1843 * itself. 1844 */ 1845 static int host__finished_round(const struct perf_tool *tool, 1846 union perf_event *event, 1847 struct ordered_events *oe) 1848 { 1849 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1850 int ret = perf_event__process_finished_round(tool, event, oe); 1851 u64 timestamp = ordered_events__last_flush_time(oe); 1852 1853 if (ret) 1854 return ret; 1855 1856 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1857 if (ret) 1858 return ret; 1859 1860 return perf_event__repipe_oe_synth(tool, event, oe); 1861 } 1862 1863 static int host__context_switch(const struct perf_tool *tool, 1864 union perf_event *event, 1865 struct perf_sample *sample, 1866 struct machine *machine) 1867 { 1868 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1869 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1870 struct guest_session *gs = &inject->guest_session; 1871 u32 pid = event->context_switch.next_prev_pid; 1872 u32 tid = event->context_switch.next_prev_tid; 1873 struct guest_tid *guest_tid; 1874 u32 vcpu; 1875 1876 if (out || pid != gs->machine_pid) 1877 goto out; 1878 1879 guest_tid = guest_session__lookup_tid(gs, tid); 1880 if (!guest_tid) 1881 goto out; 1882 1883 if (sample->cpu == (u32)-1) { 1884 pr_err("Switch event does not have CPU\n"); 1885 return -EINVAL; 1886 } 1887 1888 vcpu = guest_tid->vcpu; 1889 if (vcpu >= gs->vcpu_cnt) 1890 return -EINVAL; 1891 1892 /* Guest is switching in, record which CPU the VCPU is now running on */ 1893 gs->vcpu[vcpu].cpu = sample->cpu; 1894 out: 1895 return host__repipe(tool, event, sample, machine); 1896 } 1897 1898 static void sig_handler(int sig __maybe_unused) 1899 { 1900 session_done = 1; 1901 } 1902 1903 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1904 { 1905 struct perf_event_attr *attr = &evsel->core.attr; 1906 const char *name = evsel__name(evsel); 1907 1908 if (!(attr->sample_type & sample_type)) { 1909 pr_err("Samples for %s event do not have %s attribute set.", 1910 name, sample_msg); 1911 return -EINVAL; 1912 } 1913 1914 return 0; 1915 } 1916 1917 static int drop_sample(const struct perf_tool *tool __maybe_unused, 1918 union perf_event *event __maybe_unused, 1919 struct perf_sample *sample __maybe_unused, 1920 struct evsel *evsel __maybe_unused, 1921 struct machine *machine __maybe_unused) 1922 { 1923 return 0; 1924 } 1925 1926 static void strip_init(struct perf_inject *inject) 1927 { 1928 struct evlist *evlist = inject->session->evlist; 1929 struct evsel *evsel; 1930 1931 inject->tool.context_switch = perf_event__drop; 1932 1933 evlist__for_each_entry(evlist, evsel) 1934 evsel->handler = drop_sample; 1935 } 1936 1937 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1938 { 1939 struct perf_inject *inject = opt->value; 1940 const char *args; 1941 char *dry_run; 1942 1943 if (unset) 1944 return 0; 1945 1946 inject->itrace_synth_opts.set = true; 1947 inject->itrace_synth_opts.vm_time_correlation = true; 1948 inject->in_place_update = true; 1949 1950 if (!str) 1951 return 0; 1952 1953 dry_run = skip_spaces(str); 1954 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1955 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1956 inject->in_place_update_dry_run = true; 1957 args = dry_run + strlen("dry-run"); 1958 } else { 1959 args = str; 1960 } 1961 1962 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1963 1964 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1965 } 1966 1967 static int parse_guest_data(const struct option *opt, const char *str, int unset) 1968 { 1969 struct perf_inject *inject = opt->value; 1970 struct guest_session *gs = &inject->guest_session; 1971 char *tok; 1972 char *s; 1973 1974 if (unset) 1975 return 0; 1976 1977 if (!str) 1978 goto bad_args; 1979 1980 s = strdup(str); 1981 if (!s) 1982 return -ENOMEM; 1983 1984 gs->perf_data_file = strsep(&s, ","); 1985 if (!gs->perf_data_file) 1986 goto bad_args; 1987 1988 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1989 if (gs->copy_kcore_dir) 1990 inject->output.is_dir = true; 1991 1992 tok = strsep(&s, ","); 1993 if (!tok) 1994 goto bad_args; 1995 gs->machine_pid = strtoul(tok, NULL, 0); 1996 if (!inject->guest_session.machine_pid) 1997 goto bad_args; 1998 1999 gs->time_scale = 1; 2000 2001 tok = strsep(&s, ","); 2002 if (!tok) 2003 goto out; 2004 gs->time_offset = strtoull(tok, NULL, 0); 2005 2006 tok = strsep(&s, ","); 2007 if (!tok) 2008 goto out; 2009 gs->time_scale = strtod(tok, NULL); 2010 if (!gs->time_scale) 2011 goto bad_args; 2012 out: 2013 return 0; 2014 2015 bad_args: 2016 pr_err("--guest-data option requires guest perf.data file name, " 2017 "guest machine PID, and optionally guest timestamp offset, " 2018 "and guest timestamp scale factor, separated by commas.\n"); 2019 return -1; 2020 } 2021 2022 static int save_section_info_cb(struct perf_file_section *section, 2023 struct perf_header *ph __maybe_unused, 2024 int feat, int fd __maybe_unused, void *data) 2025 { 2026 struct perf_inject *inject = data; 2027 2028 inject->secs[feat] = *section; 2029 return 0; 2030 } 2031 2032 static int save_section_info(struct perf_inject *inject) 2033 { 2034 struct perf_header *header = &inject->session->header; 2035 int fd = perf_data__fd(inject->session->data); 2036 2037 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 2038 } 2039 2040 static bool keep_feat(int feat) 2041 { 2042 switch (feat) { 2043 /* Keep original information that describes the machine or software */ 2044 case HEADER_TRACING_DATA: 2045 case HEADER_HOSTNAME: 2046 case HEADER_OSRELEASE: 2047 case HEADER_VERSION: 2048 case HEADER_ARCH: 2049 case HEADER_NRCPUS: 2050 case HEADER_CPUDESC: 2051 case HEADER_CPUID: 2052 case HEADER_TOTAL_MEM: 2053 case HEADER_CPU_TOPOLOGY: 2054 case HEADER_NUMA_TOPOLOGY: 2055 case HEADER_PMU_MAPPINGS: 2056 case HEADER_CACHE: 2057 case HEADER_MEM_TOPOLOGY: 2058 case HEADER_CLOCKID: 2059 case HEADER_BPF_PROG_INFO: 2060 case HEADER_BPF_BTF: 2061 case HEADER_CPU_PMU_CAPS: 2062 case HEADER_CLOCK_DATA: 2063 case HEADER_HYBRID_TOPOLOGY: 2064 case HEADER_PMU_CAPS: 2065 return true; 2066 /* Information that can be updated */ 2067 case HEADER_BUILD_ID: 2068 case HEADER_CMDLINE: 2069 case HEADER_EVENT_DESC: 2070 case HEADER_BRANCH_STACK: 2071 case HEADER_GROUP_DESC: 2072 case HEADER_AUXTRACE: 2073 case HEADER_STAT: 2074 case HEADER_SAMPLE_TIME: 2075 case HEADER_DIR_FORMAT: 2076 case HEADER_COMPRESSED: 2077 default: 2078 return false; 2079 }; 2080 } 2081 2082 static int read_file(int fd, u64 offs, void *buf, size_t sz) 2083 { 2084 ssize_t ret = preadn(fd, buf, sz, offs); 2085 2086 if (ret < 0) 2087 return -errno; 2088 if ((size_t)ret != sz) 2089 return -EINVAL; 2090 return 0; 2091 } 2092 2093 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 2094 { 2095 int fd = perf_data__fd(inject->session->data); 2096 u64 offs = inject->secs[feat].offset; 2097 size_t sz = inject->secs[feat].size; 2098 void *buf = malloc(sz); 2099 int ret; 2100 2101 if (!buf) 2102 return -ENOMEM; 2103 2104 ret = read_file(fd, offs, buf, sz); 2105 if (ret) 2106 goto out_free; 2107 2108 ret = fw->write(fw, buf, sz); 2109 out_free: 2110 free(buf); 2111 return ret; 2112 } 2113 2114 struct inject_fc { 2115 struct feat_copier fc; 2116 struct perf_inject *inject; 2117 }; 2118 2119 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 2120 { 2121 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 2122 struct perf_inject *inject = inj_fc->inject; 2123 int ret; 2124 2125 if (!inject->secs[feat].offset || 2126 !keep_feat(feat)) 2127 return 0; 2128 2129 ret = feat_copy(inject, feat, fw); 2130 if (ret < 0) 2131 return ret; 2132 2133 return 1; /* Feature section copied */ 2134 } 2135 2136 static int copy_kcore_dir(struct perf_inject *inject) 2137 { 2138 char *cmd; 2139 int ret; 2140 2141 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 2142 inject->input_name, inject->output.path); 2143 if (ret < 0) 2144 return ret; 2145 pr_debug("%s\n", cmd); 2146 ret = system(cmd); 2147 free(cmd); 2148 return ret; 2149 } 2150 2151 static int guest_session__copy_kcore_dir(struct guest_session *gs) 2152 { 2153 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 2154 char *cmd; 2155 int ret; 2156 2157 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 2158 gs->perf_data_file, inject->output.path, gs->machine_pid); 2159 if (ret < 0) 2160 return ret; 2161 pr_debug("%s\n", cmd); 2162 ret = system(cmd); 2163 free(cmd); 2164 return ret; 2165 } 2166 2167 static int output_fd(struct perf_inject *inject) 2168 { 2169 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 2170 } 2171 2172 static int __cmd_inject(struct perf_inject *inject) 2173 { 2174 int ret = -EINVAL; 2175 struct guest_session *gs = &inject->guest_session; 2176 struct perf_session *session = inject->session; 2177 int fd = output_fd(inject); 2178 u64 output_data_offset = perf_session__data_offset(session->evlist); 2179 /* 2180 * Pipe input hasn't loaded the attributes and will handle them as 2181 * events. So that the attributes don't overlap the data, write the 2182 * attributes after the data. 2183 */ 2184 bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe; 2185 2186 signal(SIGINT, sig_handler); 2187 2188 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat || 2189 inject->itrace_synth_opts.set) { 2190 inject->tool.mmap = perf_event__repipe_mmap; 2191 inject->tool.mmap2 = perf_event__repipe_mmap2; 2192 inject->tool.fork = perf_event__repipe_fork; 2193 #ifdef HAVE_LIBTRACEEVENT 2194 inject->tool.tracing_data = perf_event__repipe_tracing_data; 2195 #endif 2196 } 2197 2198 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2199 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2200 inject->tool.sample = perf_event__inject_buildid; 2201 } else if (inject->sched_stat) { 2202 struct evsel *evsel; 2203 2204 evlist__for_each_entry(session->evlist, evsel) { 2205 const char *name = evsel__name(evsel); 2206 2207 if (!strcmp(name, "sched:sched_switch")) { 2208 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 2209 return -EINVAL; 2210 2211 evsel->handler = perf_inject__sched_switch; 2212 } else if (!strcmp(name, "sched:sched_process_exit")) 2213 evsel->handler = perf_inject__sched_process_exit; 2214 #ifdef HAVE_LIBTRACEEVENT 2215 else if (!strncmp(name, "sched:sched_stat_", 17)) 2216 evsel->handler = perf_inject__sched_stat; 2217 #endif 2218 } 2219 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2220 session->itrace_synth_opts = &inject->itrace_synth_opts; 2221 memset(&inject->tool, 0, sizeof(inject->tool)); 2222 inject->tool.id_index = perf_event__process_id_index; 2223 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2224 inject->tool.auxtrace = perf_event__process_auxtrace; 2225 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2226 inject->tool.ordered_events = true; 2227 inject->tool.ordering_requires_timestamps = true; 2228 } else if (inject->itrace_synth_opts.set) { 2229 session->itrace_synth_opts = &inject->itrace_synth_opts; 2230 inject->itrace_synth_opts.inject = true; 2231 inject->tool.comm = perf_event__repipe_comm; 2232 inject->tool.namespaces = perf_event__repipe_namespaces; 2233 inject->tool.exit = perf_event__repipe_exit; 2234 inject->tool.id_index = perf_event__process_id_index; 2235 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2236 inject->tool.auxtrace = perf_event__process_auxtrace; 2237 inject->tool.aux = perf_event__drop_aux; 2238 inject->tool.itrace_start = perf_event__drop_aux; 2239 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2240 inject->tool.ordered_events = true; 2241 inject->tool.ordering_requires_timestamps = true; 2242 /* Allow space in the header for new attributes */ 2243 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2244 if (inject->strip) 2245 strip_init(inject); 2246 } else if (gs->perf_data_file) { 2247 char *name = gs->perf_data_file; 2248 2249 /* 2250 * Not strictly necessary, but keep these events in order wrt 2251 * guest events. 2252 */ 2253 inject->tool.mmap = host__repipe; 2254 inject->tool.mmap2 = host__repipe; 2255 inject->tool.comm = host__repipe; 2256 inject->tool.fork = host__repipe; 2257 inject->tool.exit = host__repipe; 2258 inject->tool.lost = host__repipe; 2259 inject->tool.context_switch = host__repipe; 2260 inject->tool.ksymbol = host__repipe; 2261 inject->tool.text_poke = host__repipe; 2262 /* 2263 * Once the host session has initialized, set up sample ID 2264 * mapping and feed in guest attrs, build IDs and initial 2265 * events. 2266 */ 2267 inject->tool.finished_init = host__finished_init; 2268 /* Obey finished round ordering */ 2269 inject->tool.finished_round = host__finished_round; 2270 /* Keep track of which CPU a VCPU is runnng on */ 2271 inject->tool.context_switch = host__context_switch; 2272 /* 2273 * Must order events to be able to obey finished round 2274 * ordering. 2275 */ 2276 inject->tool.ordered_events = true; 2277 inject->tool.ordering_requires_timestamps = true; 2278 /* Set up a separate session to process guest perf.data file */ 2279 ret = guest_session__start(gs, name, session->data->force); 2280 if (ret) { 2281 pr_err("Failed to process %s, error %d\n", name, ret); 2282 return ret; 2283 } 2284 /* Allow space in the header for guest attributes */ 2285 output_data_offset += gs->session->header.data_offset; 2286 output_data_offset = roundup(output_data_offset, 4096); 2287 } 2288 2289 if (!inject->itrace_synth_opts.set) 2290 auxtrace_index__free(&session->auxtrace_index); 2291 2292 if (!inject->output.is_pipe && !inject->in_place_update) 2293 lseek(fd, output_data_offset, SEEK_SET); 2294 2295 ret = perf_session__process_events(session); 2296 if (ret) 2297 return ret; 2298 2299 if (gs->session) { 2300 /* 2301 * Remaining guest events have later timestamps. Flush them 2302 * out to file. 2303 */ 2304 ret = guest_session__flush_events(gs); 2305 if (ret) { 2306 pr_err("Failed to flush guest events\n"); 2307 return ret; 2308 } 2309 } 2310 2311 if (!inject->output.is_pipe && !inject->in_place_update) { 2312 struct inject_fc inj_fc = { 2313 .fc.copy = feat_copy_cb, 2314 .inject = inject, 2315 }; 2316 2317 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2318 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) 2319 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 2320 /* 2321 * Keep all buildids when there is unprocessed AUX data because 2322 * it is not known which ones the AUX trace hits. 2323 */ 2324 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2325 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2326 perf_session__dsos_hit_all(session); 2327 /* 2328 * The AUX areas have been removed and replaced with 2329 * synthesized hardware events, so clear the feature flag. 2330 */ 2331 if (inject->itrace_synth_opts.set) { 2332 perf_header__clear_feat(&session->header, 2333 HEADER_AUXTRACE); 2334 if (inject->itrace_synth_opts.last_branch || 2335 inject->itrace_synth_opts.add_last_branch) 2336 perf_header__set_feat(&session->header, 2337 HEADER_BRANCH_STACK); 2338 } 2339 session->header.data_offset = output_data_offset; 2340 session->header.data_size = inject->bytes_written; 2341 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, 2342 write_attrs_after_data); 2343 2344 if (inject->copy_kcore_dir) { 2345 ret = copy_kcore_dir(inject); 2346 if (ret) { 2347 pr_err("Failed to copy kcore\n"); 2348 return ret; 2349 } 2350 } 2351 if (gs->copy_kcore_dir) { 2352 ret = guest_session__copy_kcore_dir(gs); 2353 if (ret) { 2354 pr_err("Failed to copy guest kcore\n"); 2355 return ret; 2356 } 2357 } 2358 } 2359 2360 return ret; 2361 } 2362 2363 int cmd_inject(int argc, const char **argv) 2364 { 2365 struct perf_inject inject = { 2366 .input_name = "-", 2367 .samples = LIST_HEAD_INIT(inject.samples), 2368 .output = { 2369 .path = "-", 2370 .mode = PERF_DATA_MODE_WRITE, 2371 .use_stdio = true, 2372 }, 2373 }; 2374 struct perf_data data = { 2375 .mode = PERF_DATA_MODE_READ, 2376 .use_stdio = true, 2377 }; 2378 int ret; 2379 const char *known_build_ids = NULL; 2380 bool build_ids = false; 2381 bool build_id_all = false; 2382 bool mmap2_build_ids = false; 2383 bool mmap2_build_id_all = false; 2384 2385 struct option options[] = { 2386 OPT_BOOLEAN('b', "build-ids", &build_ids, 2387 "Inject build-ids into the output stream"), 2388 OPT_BOOLEAN(0, "buildid-all", &build_id_all, 2389 "Inject build-ids of all DSOs into the output stream"), 2390 OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids, 2391 "Drop unused mmap events, make others mmap2 with build IDs"), 2392 OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all, 2393 "Rewrite all mmap events as mmap2 events with build IDs"), 2394 OPT_STRING(0, "known-build-ids", &known_build_ids, 2395 "buildid path [,buildid path...]", 2396 "build-ids to use for given paths"), 2397 OPT_STRING('i', "input", &inject.input_name, "file", 2398 "input file name"), 2399 OPT_STRING('o', "output", &inject.output.path, "file", 2400 "output file name"), 2401 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2402 "Merge sched-stat and sched-switch for getting events " 2403 "where and how long tasks slept"), 2404 #ifdef HAVE_JITDUMP 2405 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2406 #endif 2407 OPT_INCR('v', "verbose", &verbose, 2408 "be more verbose (show build ids, etc)"), 2409 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2410 "file", "vmlinux pathname"), 2411 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2412 "don't load vmlinux even if found"), 2413 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2414 "kallsyms pathname"), 2415 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2416 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2417 NULL, "opts", "Instruction Tracing options\n" 2418 ITRACE_HELP, 2419 itrace_parse_synth_opts), 2420 OPT_BOOLEAN(0, "strip", &inject.strip, 2421 "strip non-synthesized events (use with --itrace)"), 2422 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2423 "correlate time between VM guests and the host", 2424 parse_vm_time_correlation), 2425 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2426 "inject events from a guest perf.data file", 2427 parse_guest_data), 2428 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2429 "guest mount directory under which every guest os" 2430 " instance has a subdir"), 2431 OPT_END() 2432 }; 2433 const char * const inject_usage[] = { 2434 "perf inject [<options>]", 2435 NULL 2436 }; 2437 bool ordered_events; 2438 2439 if (!inject.itrace_synth_opts.set) { 2440 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2441 symbol_conf.lazy_load_kernel_maps = true; 2442 } 2443 2444 #ifndef HAVE_JITDUMP 2445 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2446 #endif 2447 argc = parse_options(argc, argv, options, inject_usage, 0); 2448 2449 /* 2450 * Any (unrecognized) arguments left? 2451 */ 2452 if (argc) 2453 usage_with_options(inject_usage, options); 2454 2455 if (inject.strip && !inject.itrace_synth_opts.set) { 2456 pr_err("--strip option requires --itrace option\n"); 2457 return -1; 2458 } 2459 2460 if (symbol__validate_sym_arguments()) 2461 return -1; 2462 2463 if (inject.in_place_update) { 2464 if (!strcmp(inject.input_name, "-")) { 2465 pr_err("Input file name required for in-place updating\n"); 2466 return -1; 2467 } 2468 if (strcmp(inject.output.path, "-")) { 2469 pr_err("Output file name must not be specified for in-place updating\n"); 2470 return -1; 2471 } 2472 if (!data.force && !inject.in_place_update_dry_run) { 2473 pr_err("The input file would be updated in place, " 2474 "the --force option is required.\n"); 2475 return -1; 2476 } 2477 if (!inject.in_place_update_dry_run) 2478 data.in_place_update = true; 2479 } else { 2480 if (strcmp(inject.output.path, "-") && !inject.strip && 2481 has_kcore_dir(inject.input_name)) { 2482 inject.output.is_dir = true; 2483 inject.copy_kcore_dir = true; 2484 } 2485 if (perf_data__open(&inject.output)) { 2486 perror("failed to create output file"); 2487 return -1; 2488 } 2489 } 2490 if (mmap2_build_ids) 2491 inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY; 2492 if (mmap2_build_id_all) 2493 inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL; 2494 if (build_ids) 2495 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY; 2496 if (build_id_all) 2497 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2498 2499 data.path = inject.input_name; 2500 2501 ordered_events = inject.jit_mode || inject.sched_stat || 2502 inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2503 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY; 2504 perf_tool__init(&inject.tool, ordered_events); 2505 inject.tool.sample = perf_event__repipe_sample; 2506 inject.tool.read = perf_event__repipe_sample; 2507 inject.tool.mmap = perf_event__repipe; 2508 inject.tool.mmap2 = perf_event__repipe; 2509 inject.tool.comm = perf_event__repipe; 2510 inject.tool.namespaces = perf_event__repipe; 2511 inject.tool.cgroup = perf_event__repipe; 2512 inject.tool.fork = perf_event__repipe; 2513 inject.tool.exit = perf_event__repipe; 2514 inject.tool.lost = perf_event__repipe; 2515 inject.tool.lost_samples = perf_event__repipe; 2516 inject.tool.aux = perf_event__repipe; 2517 inject.tool.itrace_start = perf_event__repipe; 2518 inject.tool.aux_output_hw_id = perf_event__repipe; 2519 inject.tool.context_switch = perf_event__repipe; 2520 inject.tool.throttle = perf_event__repipe; 2521 inject.tool.unthrottle = perf_event__repipe; 2522 inject.tool.ksymbol = perf_event__repipe; 2523 inject.tool.bpf = perf_event__repipe; 2524 inject.tool.text_poke = perf_event__repipe; 2525 inject.tool.attr = perf_event__repipe_attr; 2526 inject.tool.event_update = perf_event__repipe_event_update; 2527 inject.tool.tracing_data = perf_event__repipe_op2_synth; 2528 inject.tool.finished_round = perf_event__repipe_oe_synth; 2529 inject.tool.build_id = perf_event__repipe_op2_synth; 2530 inject.tool.id_index = perf_event__repipe_op2_synth; 2531 inject.tool.auxtrace_info = perf_event__repipe_op2_synth; 2532 inject.tool.auxtrace_error = perf_event__repipe_op2_synth; 2533 inject.tool.time_conv = perf_event__repipe_op2_synth; 2534 inject.tool.thread_map = perf_event__repipe_op2_synth; 2535 inject.tool.cpu_map = perf_event__repipe_op2_synth; 2536 inject.tool.stat_config = perf_event__repipe_op2_synth; 2537 inject.tool.stat = perf_event__repipe_op2_synth; 2538 inject.tool.stat_round = perf_event__repipe_op2_synth; 2539 inject.tool.feature = perf_event__repipe_op2_synth; 2540 inject.tool.finished_init = perf_event__repipe_op2_synth; 2541 inject.tool.compressed = perf_event__repipe_op4_synth; 2542 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2543 inject.tool.bpf_metadata = perf_event__repipe_op2_synth; 2544 inject.tool.dont_split_sample_group = true; 2545 inject.session = __perf_session__new(&data, &inject.tool, 2546 /*trace_event_repipe=*/inject.output.is_pipe, 2547 /*host_env=*/NULL); 2548 2549 if (IS_ERR(inject.session)) { 2550 ret = PTR_ERR(inject.session); 2551 goto out_close_output; 2552 } 2553 2554 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2555 pr_warning("Decompression initialization failed.\n"); 2556 2557 /* Save original section info before feature bits change */ 2558 ret = save_section_info(&inject); 2559 if (ret) 2560 goto out_delete; 2561 2562 if (inject.output.is_pipe) { 2563 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2564 if (ret < 0) { 2565 pr_err("Couldn't write a new pipe header.\n"); 2566 goto out_delete; 2567 } 2568 2569 /* 2570 * If the input is already a pipe then the features and 2571 * attributes don't need synthesizing, they will be present in 2572 * the input. 2573 */ 2574 if (!data.is_pipe) { 2575 ret = perf_event__synthesize_for_pipe(&inject.tool, 2576 inject.session, 2577 &inject.output, 2578 perf_event__repipe); 2579 if (ret < 0) 2580 goto out_delete; 2581 } 2582 } 2583 2584 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2585 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2586 /* 2587 * to make sure the mmap records are ordered correctly 2588 * and so that the correct especially due to jitted code 2589 * mmaps. We cannot generate the buildid hit list and 2590 * inject the jit mmaps at the same time for now. 2591 */ 2592 inject.tool.ordering_requires_timestamps = true; 2593 } 2594 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) { 2595 inject.known_build_ids = 2596 perf_inject__parse_known_build_ids(known_build_ids); 2597 2598 if (inject.known_build_ids == NULL) { 2599 pr_err("Couldn't parse known build ids.\n"); 2600 goto out_delete; 2601 } 2602 } 2603 2604 #ifdef HAVE_JITDUMP 2605 if (inject.jit_mode) { 2606 inject.tool.mmap2 = perf_event__repipe_mmap2; 2607 inject.tool.mmap = perf_event__repipe_mmap; 2608 inject.tool.ordering_requires_timestamps = true; 2609 /* 2610 * JIT MMAP injection injects all MMAP events in one go, so it 2611 * does not obey finished_round semantics. 2612 */ 2613 inject.tool.finished_round = perf_event__drop_oe; 2614 } 2615 #endif 2616 ret = symbol__init(perf_session__env(inject.session)); 2617 if (ret < 0) 2618 goto out_delete; 2619 2620 ret = __cmd_inject(&inject); 2621 2622 guest_session__exit(&inject.guest_session); 2623 2624 out_delete: 2625 strlist__delete(inject.known_build_ids); 2626 zstd_fini(&(inject.session->zstd_data)); 2627 perf_session__delete(inject.session); 2628 out_close_output: 2629 if (!inject.in_place_update) 2630 perf_data__close(&inject.output); 2631 free(inject.itrace_synth_opts.vm_tm_corr_args); 2632 free(inject.event_copy); 2633 free(inject.guest_session.ev.event_buf); 2634 return ret; 2635 } 2636