1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include "util/dso.h" 13 #include "util/vdso.h" 14 #include "util/evlist.h" 15 #include "util/evsel.h" 16 #include "util/map.h" 17 #include "util/session.h" 18 #include "util/tool.h" 19 #include "util/debug.h" 20 #include "util/build-id.h" 21 #include "util/data.h" 22 #include "util/auxtrace.h" 23 #include "util/jit.h" 24 #include "util/string2.h" 25 #include "util/symbol.h" 26 #include "util/synthetic-events.h" 27 #include "util/thread.h" 28 #include "util/namespaces.h" 29 #include "util/util.h" 30 #include "util/tsc.h" 31 32 #include <internal/lib.h> 33 34 #include <linux/err.h> 35 #include <subcmd/parse-options.h> 36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38 #include <linux/list.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <linux/hash.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <inttypes.h> 46 47 struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char event_buf[PERF_SAMPLE_MAX_SIZE]; 51 }; 52 53 struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59 }; 60 61 struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67 }; 68 69 struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74 }; 75 76 struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104 }; 105 106 struct perf_inject { 107 struct perf_tool tool; 108 struct perf_session *session; 109 bool build_ids; 110 bool build_id_all; 111 bool sched_stat; 112 bool have_auxtrace; 113 bool strip; 114 bool jit_mode; 115 bool in_place_update; 116 bool in_place_update_dry_run; 117 bool is_pipe; 118 bool copy_kcore_dir; 119 const char *input_name; 120 struct perf_data output; 121 u64 bytes_written; 122 u64 aux_id; 123 struct list_head samples; 124 struct itrace_synth_opts itrace_synth_opts; 125 char event_copy[PERF_SAMPLE_MAX_SIZE]; 126 struct perf_file_section secs[HEADER_FEAT_BITS]; 127 struct guest_session guest_session; 128 struct strlist *known_build_ids; 129 }; 130 131 struct event_entry { 132 struct list_head node; 133 u32 tid; 134 union perf_event event[]; 135 }; 136 137 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, 138 struct machine *machine, u8 cpumode, u32 flags); 139 140 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 141 { 142 ssize_t size; 143 144 size = perf_data__write(&inject->output, buf, sz); 145 if (size < 0) 146 return -errno; 147 148 inject->bytes_written += size; 149 return 0; 150 } 151 152 static int perf_event__repipe_synth(struct perf_tool *tool, 153 union perf_event *event) 154 { 155 struct perf_inject *inject = container_of(tool, struct perf_inject, 156 tool); 157 158 return output_bytes(inject, event, event->header.size); 159 } 160 161 static int perf_event__repipe_oe_synth(struct perf_tool *tool, 162 union perf_event *event, 163 struct ordered_events *oe __maybe_unused) 164 { 165 return perf_event__repipe_synth(tool, event); 166 } 167 168 #ifdef HAVE_JITDUMP 169 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, 170 union perf_event *event __maybe_unused, 171 struct ordered_events *oe __maybe_unused) 172 { 173 return 0; 174 } 175 #endif 176 177 static int perf_event__repipe_op2_synth(struct perf_session *session, 178 union perf_event *event) 179 { 180 return perf_event__repipe_synth(session->tool, event); 181 } 182 183 static int perf_event__repipe_op4_synth(struct perf_session *session, 184 union perf_event *event, 185 u64 data __maybe_unused, 186 const char *str __maybe_unused) 187 { 188 return perf_event__repipe_synth(session->tool, event); 189 } 190 191 static int perf_event__repipe_attr(struct perf_tool *tool, 192 union perf_event *event, 193 struct evlist **pevlist) 194 { 195 struct perf_inject *inject = container_of(tool, struct perf_inject, 196 tool); 197 int ret; 198 199 ret = perf_event__process_attr(tool, event, pevlist); 200 if (ret) 201 return ret; 202 203 if (!inject->is_pipe) 204 return 0; 205 206 return perf_event__repipe_synth(tool, event); 207 } 208 209 static int perf_event__repipe_event_update(struct perf_tool *tool, 210 union perf_event *event, 211 struct evlist **pevlist __maybe_unused) 212 { 213 return perf_event__repipe_synth(tool, event); 214 } 215 216 #ifdef HAVE_AUXTRACE_SUPPORT 217 218 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 219 { 220 char buf[4096]; 221 ssize_t ssz; 222 int ret; 223 224 while (size > 0) { 225 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 226 if (ssz < 0) 227 return -errno; 228 ret = output_bytes(inject, buf, ssz); 229 if (ret) 230 return ret; 231 size -= ssz; 232 } 233 234 return 0; 235 } 236 237 static s64 perf_event__repipe_auxtrace(struct perf_session *session, 238 union perf_event *event) 239 { 240 struct perf_tool *tool = session->tool; 241 struct perf_inject *inject = container_of(tool, struct perf_inject, 242 tool); 243 int ret; 244 245 inject->have_auxtrace = true; 246 247 if (!inject->output.is_pipe) { 248 off_t offset; 249 250 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 251 if (offset == -1) 252 return -errno; 253 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 254 event, offset); 255 if (ret < 0) 256 return ret; 257 } 258 259 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 260 ret = output_bytes(inject, event, event->header.size); 261 if (ret < 0) 262 return ret; 263 ret = copy_bytes(inject, session->data, 264 event->auxtrace.size); 265 } else { 266 ret = output_bytes(inject, event, 267 event->header.size + event->auxtrace.size); 268 } 269 if (ret < 0) 270 return ret; 271 272 return event->auxtrace.size; 273 } 274 275 #else 276 277 static s64 278 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, 279 union perf_event *event __maybe_unused) 280 { 281 pr_err("AUX area tracing not supported\n"); 282 return -EINVAL; 283 } 284 285 #endif 286 287 static int perf_event__repipe(struct perf_tool *tool, 288 union perf_event *event, 289 struct perf_sample *sample __maybe_unused, 290 struct machine *machine __maybe_unused) 291 { 292 return perf_event__repipe_synth(tool, event); 293 } 294 295 static int perf_event__drop(struct perf_tool *tool __maybe_unused, 296 union perf_event *event __maybe_unused, 297 struct perf_sample *sample __maybe_unused, 298 struct machine *machine __maybe_unused) 299 { 300 return 0; 301 } 302 303 static int perf_event__drop_aux(struct perf_tool *tool, 304 union perf_event *event __maybe_unused, 305 struct perf_sample *sample, 306 struct machine *machine __maybe_unused) 307 { 308 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 309 310 if (!inject->aux_id) 311 inject->aux_id = sample->id; 312 313 return 0; 314 } 315 316 static union perf_event * 317 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 318 union perf_event *event, 319 struct perf_sample *sample) 320 { 321 size_t sz1 = sample->aux_sample.data - (void *)event; 322 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 323 union perf_event *ev = (union perf_event *)inject->event_copy; 324 325 if (sz1 > event->header.size || sz2 > event->header.size || 326 sz1 + sz2 > event->header.size || 327 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 328 return event; 329 330 memcpy(ev, event, sz1); 331 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 332 ev->header.size = sz1 + sz2; 333 ((u64 *)((void *)ev + sz1))[-1] = 0; 334 335 return ev; 336 } 337 338 typedef int (*inject_handler)(struct perf_tool *tool, 339 union perf_event *event, 340 struct perf_sample *sample, 341 struct evsel *evsel, 342 struct machine *machine); 343 344 static int perf_event__repipe_sample(struct perf_tool *tool, 345 union perf_event *event, 346 struct perf_sample *sample, 347 struct evsel *evsel, 348 struct machine *machine) 349 { 350 struct perf_inject *inject = container_of(tool, struct perf_inject, 351 tool); 352 353 if (evsel && evsel->handler) { 354 inject_handler f = evsel->handler; 355 return f(tool, event, sample, evsel, machine); 356 } 357 358 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 359 360 if (inject->itrace_synth_opts.set && sample->aux_sample.size) 361 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 362 363 return perf_event__repipe_synth(tool, event); 364 } 365 366 static int perf_event__repipe_mmap(struct perf_tool *tool, 367 union perf_event *event, 368 struct perf_sample *sample, 369 struct machine *machine) 370 { 371 int err; 372 373 err = perf_event__process_mmap(tool, event, sample, machine); 374 perf_event__repipe(tool, event, sample, machine); 375 376 return err; 377 } 378 379 #ifdef HAVE_JITDUMP 380 static int perf_event__jit_repipe_mmap(struct perf_tool *tool, 381 union perf_event *event, 382 struct perf_sample *sample, 383 struct machine *machine) 384 { 385 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 386 u64 n = 0; 387 int ret; 388 389 /* 390 * if jit marker, then inject jit mmaps and generate ELF images 391 */ 392 ret = jit_process(inject->session, &inject->output, machine, 393 event->mmap.filename, event->mmap.pid, event->mmap.tid, &n); 394 if (ret < 0) 395 return ret; 396 if (ret) { 397 inject->bytes_written += n; 398 return 0; 399 } 400 return perf_event__repipe_mmap(tool, event, sample, machine); 401 } 402 #endif 403 404 static struct dso *findnew_dso(int pid, int tid, const char *filename, 405 struct dso_id *id, struct machine *machine) 406 { 407 struct thread *thread; 408 struct nsinfo *nsi = NULL; 409 struct nsinfo *nnsi; 410 struct dso *dso; 411 bool vdso; 412 413 thread = machine__findnew_thread(machine, pid, tid); 414 if (thread == NULL) { 415 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 416 return NULL; 417 } 418 419 vdso = is_vdso_map(filename); 420 nsi = nsinfo__get(thread__nsinfo(thread)); 421 422 if (vdso) { 423 /* The vdso maps are always on the host and not the 424 * container. Ensure that we don't use setns to look 425 * them up. 426 */ 427 nnsi = nsinfo__copy(nsi); 428 if (nnsi) { 429 nsinfo__put(nsi); 430 nsinfo__clear_need_setns(nnsi); 431 nsi = nnsi; 432 } 433 dso = machine__findnew_vdso(machine, thread); 434 } else { 435 dso = machine__findnew_dso_id(machine, filename, id); 436 } 437 438 if (dso) { 439 mutex_lock(&dso->lock); 440 nsinfo__put(dso->nsinfo); 441 dso->nsinfo = nsi; 442 mutex_unlock(&dso->lock); 443 } else 444 nsinfo__put(nsi); 445 446 thread__put(thread); 447 return dso; 448 } 449 450 static int perf_event__repipe_buildid_mmap(struct perf_tool *tool, 451 union perf_event *event, 452 struct perf_sample *sample, 453 struct machine *machine) 454 { 455 struct dso *dso; 456 457 dso = findnew_dso(event->mmap.pid, event->mmap.tid, 458 event->mmap.filename, NULL, machine); 459 460 if (dso && !dso->hit) { 461 dso->hit = 1; 462 dso__inject_build_id(dso, tool, machine, sample->cpumode, 0); 463 } 464 dso__put(dso); 465 466 return perf_event__repipe(tool, event, sample, machine); 467 } 468 469 static int perf_event__repipe_mmap2(struct perf_tool *tool, 470 union perf_event *event, 471 struct perf_sample *sample, 472 struct machine *machine) 473 { 474 int err; 475 476 err = perf_event__process_mmap2(tool, event, sample, machine); 477 perf_event__repipe(tool, event, sample, machine); 478 479 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 480 struct dso *dso; 481 482 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, 483 event->mmap2.filename, NULL, machine); 484 if (dso) { 485 /* mark it not to inject build-id */ 486 dso->hit = 1; 487 } 488 dso__put(dso); 489 } 490 491 return err; 492 } 493 494 #ifdef HAVE_JITDUMP 495 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, 496 union perf_event *event, 497 struct perf_sample *sample, 498 struct machine *machine) 499 { 500 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 501 u64 n = 0; 502 int ret; 503 504 /* 505 * if jit marker, then inject jit mmaps and generate ELF images 506 */ 507 ret = jit_process(inject->session, &inject->output, machine, 508 event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n); 509 if (ret < 0) 510 return ret; 511 if (ret) { 512 inject->bytes_written += n; 513 return 0; 514 } 515 return perf_event__repipe_mmap2(tool, event, sample, machine); 516 } 517 #endif 518 519 static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, 520 union perf_event *event, 521 struct perf_sample *sample, 522 struct machine *machine) 523 { 524 struct dso_id dso_id = { 525 .maj = event->mmap2.maj, 526 .min = event->mmap2.min, 527 .ino = event->mmap2.ino, 528 .ino_generation = event->mmap2.ino_generation, 529 }; 530 struct dso *dso; 531 532 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 533 /* cannot use dso_id since it'd have invalid info */ 534 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, 535 event->mmap2.filename, NULL, machine); 536 if (dso) { 537 /* mark it not to inject build-id */ 538 dso->hit = 1; 539 } 540 dso__put(dso); 541 perf_event__repipe(tool, event, sample, machine); 542 return 0; 543 } 544 545 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, 546 event->mmap2.filename, &dso_id, machine); 547 548 if (dso && !dso->hit) { 549 dso->hit = 1; 550 dso__inject_build_id(dso, tool, machine, sample->cpumode, 551 event->mmap2.flags); 552 } 553 dso__put(dso); 554 555 perf_event__repipe(tool, event, sample, machine); 556 557 return 0; 558 } 559 560 static int perf_event__repipe_fork(struct perf_tool *tool, 561 union perf_event *event, 562 struct perf_sample *sample, 563 struct machine *machine) 564 { 565 int err; 566 567 err = perf_event__process_fork(tool, event, sample, machine); 568 perf_event__repipe(tool, event, sample, machine); 569 570 return err; 571 } 572 573 static int perf_event__repipe_comm(struct perf_tool *tool, 574 union perf_event *event, 575 struct perf_sample *sample, 576 struct machine *machine) 577 { 578 int err; 579 580 err = perf_event__process_comm(tool, event, sample, machine); 581 perf_event__repipe(tool, event, sample, machine); 582 583 return err; 584 } 585 586 static int perf_event__repipe_namespaces(struct perf_tool *tool, 587 union perf_event *event, 588 struct perf_sample *sample, 589 struct machine *machine) 590 { 591 int err = perf_event__process_namespaces(tool, event, sample, machine); 592 593 perf_event__repipe(tool, event, sample, machine); 594 595 return err; 596 } 597 598 static int perf_event__repipe_exit(struct perf_tool *tool, 599 union perf_event *event, 600 struct perf_sample *sample, 601 struct machine *machine) 602 { 603 int err; 604 605 err = perf_event__process_exit(tool, event, sample, machine); 606 perf_event__repipe(tool, event, sample, machine); 607 608 return err; 609 } 610 611 #ifdef HAVE_LIBTRACEEVENT 612 static int perf_event__repipe_tracing_data(struct perf_session *session, 613 union perf_event *event) 614 { 615 perf_event__repipe_synth(session->tool, event); 616 617 return perf_event__process_tracing_data(session, event); 618 } 619 #endif 620 621 static int dso__read_build_id(struct dso *dso) 622 { 623 struct nscookie nsc; 624 625 if (dso->has_build_id) 626 return 0; 627 628 mutex_lock(&dso->lock); 629 nsinfo__mountns_enter(dso->nsinfo, &nsc); 630 if (filename__read_build_id(dso->long_name, &dso->bid) > 0) 631 dso->has_build_id = true; 632 else if (dso->nsinfo) { 633 char *new_name = dso__filename_with_chroot(dso, dso->long_name); 634 635 if (new_name && filename__read_build_id(new_name, &dso->bid) > 0) 636 dso->has_build_id = true; 637 free(new_name); 638 } 639 nsinfo__mountns_exit(&nsc); 640 mutex_unlock(&dso->lock); 641 642 return dso->has_build_id ? 0 : -1; 643 } 644 645 static struct strlist *perf_inject__parse_known_build_ids( 646 const char *known_build_ids_string) 647 { 648 struct str_node *pos, *tmp; 649 struct strlist *known_build_ids; 650 int bid_len; 651 652 known_build_ids = strlist__new(known_build_ids_string, NULL); 653 if (known_build_ids == NULL) 654 return NULL; 655 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 656 const char *build_id, *dso_name; 657 658 build_id = skip_spaces(pos->s); 659 dso_name = strchr(build_id, ' '); 660 if (dso_name == NULL) { 661 strlist__remove(known_build_ids, pos); 662 continue; 663 } 664 bid_len = dso_name - pos->s; 665 dso_name = skip_spaces(dso_name); 666 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 667 strlist__remove(known_build_ids, pos); 668 continue; 669 } 670 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 671 if (!isxdigit(build_id[2 * ix]) || 672 !isxdigit(build_id[2 * ix + 1])) { 673 strlist__remove(known_build_ids, pos); 674 break; 675 } 676 } 677 } 678 return known_build_ids; 679 } 680 681 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 682 struct dso *dso) 683 { 684 struct str_node *pos; 685 int bid_len; 686 687 strlist__for_each_entry(pos, inject->known_build_ids) { 688 const char *build_id, *dso_name; 689 690 build_id = skip_spaces(pos->s); 691 dso_name = strchr(build_id, ' '); 692 bid_len = dso_name - pos->s; 693 dso_name = skip_spaces(dso_name); 694 if (strcmp(dso->long_name, dso_name)) 695 continue; 696 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 697 dso->bid.data[ix] = (hex(build_id[2 * ix]) << 4 | 698 hex(build_id[2 * ix + 1])); 699 } 700 dso->bid.size = bid_len / 2; 701 dso->has_build_id = 1; 702 return true; 703 } 704 return false; 705 } 706 707 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, 708 struct machine *machine, u8 cpumode, u32 flags) 709 { 710 struct perf_inject *inject = container_of(tool, struct perf_inject, 711 tool); 712 int err; 713 714 if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB) 715 return 0; 716 if (is_no_dso_memory(dso->long_name)) 717 return 0; 718 719 if (inject->known_build_ids != NULL && 720 perf_inject__lookup_known_build_id(inject, dso)) 721 return 1; 722 723 if (dso__read_build_id(dso) < 0) { 724 pr_debug("no build_id found for %s\n", dso->long_name); 725 return -1; 726 } 727 728 err = perf_event__synthesize_build_id(tool, dso, cpumode, 729 perf_event__repipe, machine); 730 if (err) { 731 pr_err("Can't synthesize build_id event for %s\n", dso->long_name); 732 return -1; 733 } 734 735 return 0; 736 } 737 738 int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, 739 struct perf_sample *sample, 740 struct evsel *evsel __maybe_unused, 741 struct machine *machine) 742 { 743 struct addr_location al; 744 struct thread *thread; 745 746 addr_location__init(&al); 747 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 748 if (thread == NULL) { 749 pr_err("problem processing %d event, skipping it.\n", 750 event->header.type); 751 goto repipe; 752 } 753 754 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 755 struct dso *dso = map__dso(al.map); 756 757 if (!dso->hit) { 758 dso->hit = 1; 759 dso__inject_build_id(dso, tool, machine, 760 sample->cpumode, map__flags(al.map)); 761 } 762 } 763 764 thread__put(thread); 765 repipe: 766 perf_event__repipe(tool, event, sample, machine); 767 addr_location__exit(&al); 768 return 0; 769 } 770 771 static int perf_inject__sched_process_exit(struct perf_tool *tool, 772 union perf_event *event __maybe_unused, 773 struct perf_sample *sample, 774 struct evsel *evsel __maybe_unused, 775 struct machine *machine __maybe_unused) 776 { 777 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 778 struct event_entry *ent; 779 780 list_for_each_entry(ent, &inject->samples, node) { 781 if (sample->tid == ent->tid) { 782 list_del_init(&ent->node); 783 free(ent); 784 break; 785 } 786 } 787 788 return 0; 789 } 790 791 static int perf_inject__sched_switch(struct perf_tool *tool, 792 union perf_event *event, 793 struct perf_sample *sample, 794 struct evsel *evsel, 795 struct machine *machine) 796 { 797 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 798 struct event_entry *ent; 799 800 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 801 802 ent = malloc(event->header.size + sizeof(struct event_entry)); 803 if (ent == NULL) { 804 color_fprintf(stderr, PERF_COLOR_RED, 805 "Not enough memory to process sched switch event!"); 806 return -1; 807 } 808 809 ent->tid = sample->tid; 810 memcpy(&ent->event, event, event->header.size); 811 list_add(&ent->node, &inject->samples); 812 return 0; 813 } 814 815 #ifdef HAVE_LIBTRACEEVENT 816 static int perf_inject__sched_stat(struct perf_tool *tool, 817 union perf_event *event __maybe_unused, 818 struct perf_sample *sample, 819 struct evsel *evsel, 820 struct machine *machine) 821 { 822 struct event_entry *ent; 823 union perf_event *event_sw; 824 struct perf_sample sample_sw; 825 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 826 u32 pid = evsel__intval(evsel, sample, "pid"); 827 828 list_for_each_entry(ent, &inject->samples, node) { 829 if (pid == ent->tid) 830 goto found; 831 } 832 833 return 0; 834 found: 835 event_sw = &ent->event[0]; 836 evsel__parse_sample(evsel, event_sw, &sample_sw); 837 838 sample_sw.period = sample->period; 839 sample_sw.time = sample->time; 840 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 841 evsel->core.attr.read_format, &sample_sw); 842 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 843 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 844 } 845 #endif 846 847 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 848 { 849 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 850 return NULL; 851 return &gs->vcpu[vcpu]; 852 } 853 854 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 855 { 856 ssize_t ret = writen(gs->tmp_fd, buf, sz); 857 858 return ret < 0 ? ret : 0; 859 } 860 861 static int guest_session__repipe(struct perf_tool *tool, 862 union perf_event *event, 863 struct perf_sample *sample __maybe_unused, 864 struct machine *machine __maybe_unused) 865 { 866 struct guest_session *gs = container_of(tool, struct guest_session, tool); 867 868 return guest_session__output_bytes(gs, event, event->header.size); 869 } 870 871 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 872 { 873 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 874 int hash; 875 876 if (!guest_tid) 877 return -ENOMEM; 878 879 guest_tid->tid = tid; 880 guest_tid->vcpu = vcpu; 881 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 882 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 883 884 return 0; 885 } 886 887 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 888 union perf_event *event, 889 u64 offset __maybe_unused, void *data) 890 { 891 struct guest_session *gs = data; 892 unsigned int vcpu; 893 struct guest_vcpu *guest_vcpu; 894 int ret; 895 896 if (event->header.type != PERF_RECORD_COMM || 897 event->comm.pid != gs->machine_pid) 898 return 0; 899 900 /* 901 * QEMU option -name debug-threads=on, causes thread names formatted as 902 * below, although it is not an ABI. Also libvirt seems to use this by 903 * default. Here we rely on it to tell us which thread is which VCPU. 904 */ 905 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 906 if (ret <= 0) 907 return ret; 908 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 909 event->comm.tid, event->comm.comm, vcpu); 910 if (vcpu > INT_MAX) { 911 pr_err("Invalid VCPU %u\n", vcpu); 912 return -EINVAL; 913 } 914 guest_vcpu = guest_session__vcpu(gs, vcpu); 915 if (!guest_vcpu) 916 return -ENOMEM; 917 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 918 pr_err("Fatal error: Two threads found with the same VCPU\n"); 919 return -EINVAL; 920 } 921 guest_vcpu->tid = event->comm.tid; 922 923 return guest_session__map_tid(gs, event->comm.tid, vcpu); 924 } 925 926 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 927 { 928 return perf_session__peek_events(session, session->header.data_offset, 929 session->header.data_size, 930 host_peek_vm_comms_cb, gs); 931 } 932 933 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 934 { 935 return evlist__id2sid(evlist, id); 936 } 937 938 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 939 { 940 do { 941 gs->highest_id += 1; 942 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 943 944 return gs->highest_id; 945 } 946 947 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 948 { 949 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 950 int hash; 951 952 if (!guest_id) 953 return -ENOMEM; 954 955 guest_id->id = id; 956 guest_id->host_id = host_id; 957 guest_id->vcpu = vcpu; 958 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 959 hlist_add_head(&guest_id->node, &gs->heads[hash]); 960 961 return 0; 962 } 963 964 static u64 evlist__find_highest_id(struct evlist *evlist) 965 { 966 struct evsel *evsel; 967 u64 highest_id = 1; 968 969 evlist__for_each_entry(evlist, evsel) { 970 u32 j; 971 972 for (j = 0; j < evsel->core.ids; j++) { 973 u64 id = evsel->core.id[j]; 974 975 if (id > highest_id) 976 highest_id = id; 977 } 978 } 979 980 return highest_id; 981 } 982 983 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 984 { 985 struct evlist *evlist = gs->session->evlist; 986 struct evsel *evsel; 987 int ret; 988 989 evlist__for_each_entry(evlist, evsel) { 990 u32 j; 991 992 for (j = 0; j < evsel->core.ids; j++) { 993 struct perf_sample_id *sid; 994 u64 host_id; 995 u64 id; 996 997 id = evsel->core.id[j]; 998 sid = evlist__id2sid(evlist, id); 999 if (!sid || sid->cpu.cpu == -1) 1000 continue; 1001 host_id = guest_session__allocate_new_id(gs, host_evlist); 1002 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1003 if (ret) 1004 return ret; 1005 } 1006 } 1007 1008 return 0; 1009 } 1010 1011 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1012 { 1013 struct hlist_head *head; 1014 struct guest_id *guest_id; 1015 int hash; 1016 1017 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1018 head = &gs->heads[hash]; 1019 1020 hlist_for_each_entry(guest_id, head, node) 1021 if (guest_id->id == id) 1022 return guest_id; 1023 1024 return NULL; 1025 } 1026 1027 static int process_attr(struct perf_tool *tool, union perf_event *event, 1028 struct perf_sample *sample __maybe_unused, 1029 struct machine *machine __maybe_unused) 1030 { 1031 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1032 1033 return perf_event__process_attr(tool, event, &inject->session->evlist); 1034 } 1035 1036 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1037 { 1038 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1039 struct perf_event_attr attr = evsel->core.attr; 1040 u64 *id_array; 1041 u32 *vcpu_array; 1042 int ret = -ENOMEM; 1043 u32 i; 1044 1045 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1046 if (!id_array) 1047 return -ENOMEM; 1048 1049 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1050 if (!vcpu_array) 1051 goto out; 1052 1053 for (i = 0; i < evsel->core.ids; i++) { 1054 u64 id = evsel->core.id[i]; 1055 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1056 1057 if (!guest_id) { 1058 pr_err("Failed to find guest id %"PRIu64"\n", id); 1059 ret = -EINVAL; 1060 goto out; 1061 } 1062 id_array[i] = guest_id->host_id; 1063 vcpu_array[i] = guest_id->vcpu; 1064 } 1065 1066 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1067 attr.exclude_host = 1; 1068 attr.exclude_guest = 0; 1069 1070 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1071 id_array, process_attr); 1072 if (ret) 1073 pr_err("Failed to add guest attr.\n"); 1074 1075 for (i = 0; i < evsel->core.ids; i++) { 1076 struct perf_sample_id *sid; 1077 u32 vcpu = vcpu_array[i]; 1078 1079 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1080 /* Guest event is per-thread from the host point of view */ 1081 sid->cpu.cpu = -1; 1082 sid->tid = gs->vcpu[vcpu].tid; 1083 sid->machine_pid = gs->machine_pid; 1084 sid->vcpu.cpu = vcpu; 1085 } 1086 out: 1087 free(vcpu_array); 1088 free(id_array); 1089 return ret; 1090 } 1091 1092 static int guest_session__add_attrs(struct guest_session *gs) 1093 { 1094 struct evlist *evlist = gs->session->evlist; 1095 struct evsel *evsel; 1096 int ret; 1097 1098 evlist__for_each_entry(evlist, evsel) { 1099 ret = guest_session__add_attr(gs, evsel); 1100 if (ret) 1101 return ret; 1102 } 1103 1104 return 0; 1105 } 1106 1107 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1108 { 1109 struct perf_session *session = inject->session; 1110 struct evlist *evlist = session->evlist; 1111 struct machine *machine = &session->machines.host; 1112 size_t from = evlist->core.nr_entries - new_cnt; 1113 1114 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1115 evlist, machine, from); 1116 } 1117 1118 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1119 { 1120 struct hlist_head *head; 1121 struct guest_tid *guest_tid; 1122 int hash; 1123 1124 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1125 head = &gs->tids[hash]; 1126 1127 hlist_for_each_entry(guest_tid, head, node) 1128 if (guest_tid->tid == tid) 1129 return guest_tid; 1130 1131 return NULL; 1132 } 1133 1134 static bool dso__is_in_kernel_space(struct dso *dso) 1135 { 1136 if (dso__is_vdso(dso)) 1137 return false; 1138 1139 return dso__is_kcore(dso) || 1140 dso->kernel || 1141 is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1142 } 1143 1144 static u64 evlist__first_id(struct evlist *evlist) 1145 { 1146 struct evsel *evsel; 1147 1148 evlist__for_each_entry(evlist, evsel) { 1149 if (evsel->core.ids) 1150 return evsel->core.id[0]; 1151 } 1152 return 0; 1153 } 1154 1155 static int process_build_id(struct perf_tool *tool, 1156 union perf_event *event, 1157 struct perf_sample *sample __maybe_unused, 1158 struct machine *machine __maybe_unused) 1159 { 1160 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1161 1162 return perf_event__process_build_id(inject->session, event); 1163 } 1164 1165 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1166 { 1167 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1168 u8 cpumode = dso__is_in_kernel_space(dso) ? 1169 PERF_RECORD_MISC_GUEST_KERNEL : 1170 PERF_RECORD_MISC_GUEST_USER; 1171 1172 if (!machine) 1173 return -ENOMEM; 1174 1175 dso->hit = 1; 1176 1177 return perf_event__synthesize_build_id(&inject->tool, dso, cpumode, 1178 process_build_id, machine); 1179 } 1180 1181 static int guest_session__add_build_ids(struct guest_session *gs) 1182 { 1183 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1184 struct machine *machine = &gs->session->machines.host; 1185 struct dso *dso; 1186 int ret; 1187 1188 /* Build IDs will be put in the Build ID feature section */ 1189 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1190 1191 dsos__for_each_with_build_id(dso, &machine->dsos.head) { 1192 ret = synthesize_build_id(inject, dso, gs->machine_pid); 1193 if (ret) 1194 return ret; 1195 } 1196 1197 return 0; 1198 } 1199 1200 static int guest_session__ksymbol_event(struct perf_tool *tool, 1201 union perf_event *event, 1202 struct perf_sample *sample __maybe_unused, 1203 struct machine *machine __maybe_unused) 1204 { 1205 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1206 1207 /* Only support out-of-line i.e. no BPF support */ 1208 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1209 return 0; 1210 1211 return guest_session__output_bytes(gs, event, event->header.size); 1212 } 1213 1214 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1215 { 1216 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1217 struct perf_session *session; 1218 int ret; 1219 1220 /* Only these events will be injected */ 1221 gs->tool.mmap = guest_session__repipe; 1222 gs->tool.mmap2 = guest_session__repipe; 1223 gs->tool.comm = guest_session__repipe; 1224 gs->tool.fork = guest_session__repipe; 1225 gs->tool.exit = guest_session__repipe; 1226 gs->tool.lost = guest_session__repipe; 1227 gs->tool.context_switch = guest_session__repipe; 1228 gs->tool.ksymbol = guest_session__ksymbol_event; 1229 gs->tool.text_poke = guest_session__repipe; 1230 /* 1231 * Processing a build ID creates a struct dso with that build ID. Later, 1232 * all guest dsos are iterated and the build IDs processed into the host 1233 * session where they will be output to the Build ID feature section 1234 * when the perf.data file header is written. 1235 */ 1236 gs->tool.build_id = perf_event__process_build_id; 1237 /* Process the id index to know what VCPU an ID belongs to */ 1238 gs->tool.id_index = perf_event__process_id_index; 1239 1240 gs->tool.ordered_events = true; 1241 gs->tool.ordering_requires_timestamps = true; 1242 1243 gs->data.path = name; 1244 gs->data.force = force; 1245 gs->data.mode = PERF_DATA_MODE_READ; 1246 1247 session = perf_session__new(&gs->data, &gs->tool); 1248 if (IS_ERR(session)) 1249 return PTR_ERR(session); 1250 gs->session = session; 1251 1252 /* 1253 * Initial events have zero'd ID samples. Get default ID sample size 1254 * used for removing them. 1255 */ 1256 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1257 /* And default ID for adding back a host-compatible ID sample */ 1258 gs->dflt_id = evlist__first_id(session->evlist); 1259 if (!gs->dflt_id) { 1260 pr_err("Guest data has no sample IDs"); 1261 return -EINVAL; 1262 } 1263 1264 /* Temporary file for guest events */ 1265 gs->tmp_file_name = strdup(tmp_file_name); 1266 if (!gs->tmp_file_name) 1267 return -ENOMEM; 1268 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1269 if (gs->tmp_fd < 0) 1270 return -errno; 1271 1272 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1273 pr_warning("Guest session decompression initialization failed.\n"); 1274 1275 /* 1276 * perf does not support processing 2 sessions simultaneously, so output 1277 * guest events to a temporary file. 1278 */ 1279 ret = perf_session__process_events(gs->session); 1280 if (ret) 1281 return ret; 1282 1283 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1284 return -errno; 1285 1286 return 0; 1287 } 1288 1289 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1290 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1291 { 1292 struct hlist_node *pos, *n; 1293 size_t i; 1294 1295 for (i = 0; i < hlist_sz; ++i) { 1296 hlist_for_each_safe(pos, n, &heads[i]) { 1297 hlist_del(pos); 1298 free(pos); 1299 } 1300 } 1301 } 1302 1303 static void guest_session__exit(struct guest_session *gs) 1304 { 1305 if (gs->session) { 1306 perf_session__delete(gs->session); 1307 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1308 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1309 } 1310 if (gs->tmp_file_name) { 1311 if (gs->tmp_fd >= 0) 1312 close(gs->tmp_fd); 1313 unlink(gs->tmp_file_name); 1314 zfree(&gs->tmp_file_name); 1315 } 1316 zfree(&gs->vcpu); 1317 zfree(&gs->perf_data_file); 1318 } 1319 1320 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1321 { 1322 tc->time_shift = time_conv->time_shift; 1323 tc->time_mult = time_conv->time_mult; 1324 tc->time_zero = time_conv->time_zero; 1325 tc->time_cycles = time_conv->time_cycles; 1326 tc->time_mask = time_conv->time_mask; 1327 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1328 tc->cap_user_time_short = time_conv->cap_user_time_short; 1329 } 1330 1331 static void guest_session__get_tc(struct guest_session *gs) 1332 { 1333 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1334 1335 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1336 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1337 } 1338 1339 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1340 { 1341 u64 tsc; 1342 1343 if (!guest_time) { 1344 *host_time = 0; 1345 return; 1346 } 1347 1348 if (gs->guest_tc.cap_user_time_zero) 1349 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1350 else 1351 tsc = guest_time; 1352 1353 /* 1354 * This is the correct order of operations for x86 if the TSC Offset and 1355 * Multiplier values are used. 1356 */ 1357 tsc -= gs->time_offset; 1358 tsc /= gs->time_scale; 1359 1360 if (gs->host_tc.cap_user_time_zero) 1361 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1362 else 1363 *host_time = tsc; 1364 } 1365 1366 static int guest_session__fetch(struct guest_session *gs) 1367 { 1368 void *buf = gs->ev.event_buf; 1369 struct perf_event_header *hdr = buf; 1370 size_t hdr_sz = sizeof(*hdr); 1371 ssize_t ret; 1372 1373 ret = readn(gs->tmp_fd, buf, hdr_sz); 1374 if (ret < 0) 1375 return ret; 1376 1377 if (!ret) { 1378 /* Zero size means EOF */ 1379 hdr->size = 0; 1380 return 0; 1381 } 1382 1383 buf += hdr_sz; 1384 1385 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1386 if (ret < 0) 1387 return ret; 1388 1389 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1390 gs->ev.sample.time = 0; 1391 1392 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1393 pr_err("Unexpected type fetching guest event"); 1394 return 0; 1395 } 1396 1397 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1398 if (ret) { 1399 pr_err("Parse failed fetching guest event"); 1400 return ret; 1401 } 1402 1403 if (!gs->have_tc) { 1404 guest_session__get_tc(gs); 1405 gs->have_tc = true; 1406 } 1407 1408 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1409 1410 return 0; 1411 } 1412 1413 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1414 const struct perf_sample *sample) 1415 { 1416 struct evsel *evsel; 1417 void *array; 1418 int ret; 1419 1420 evsel = evlist__id2evsel(evlist, sample->id); 1421 array = ev; 1422 1423 if (!evsel) { 1424 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1425 return -EINVAL; 1426 } 1427 1428 array += ev->header.size; 1429 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1430 if (ret < 0) 1431 return ret; 1432 1433 if (ret & 7) { 1434 pr_err("Bad id sample size %d\n", ret); 1435 return -EINVAL; 1436 } 1437 1438 ev->header.size += ret; 1439 1440 return 0; 1441 } 1442 1443 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1444 { 1445 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1446 int ret; 1447 1448 if (!gs->ready) 1449 return 0; 1450 1451 while (1) { 1452 struct perf_sample *sample; 1453 struct guest_id *guest_id; 1454 union perf_event *ev; 1455 u16 id_hdr_size; 1456 u8 cpumode; 1457 u64 id; 1458 1459 if (!gs->fetched) { 1460 ret = guest_session__fetch(gs); 1461 if (ret) 1462 return ret; 1463 gs->fetched = true; 1464 } 1465 1466 ev = gs->ev.event; 1467 sample = &gs->ev.sample; 1468 1469 if (!ev->header.size) 1470 return 0; /* EOF */ 1471 1472 if (sample->time > timestamp) 1473 return 0; 1474 1475 /* Change cpumode to guest */ 1476 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1477 if (cpumode & PERF_RECORD_MISC_USER) 1478 cpumode = PERF_RECORD_MISC_GUEST_USER; 1479 else 1480 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1481 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1482 ev->header.misc |= cpumode; 1483 1484 id = sample->id; 1485 if (!id) { 1486 id = gs->dflt_id; 1487 id_hdr_size = gs->dflt_id_hdr_size; 1488 } else { 1489 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1490 1491 id_hdr_size = evsel__id_hdr_size(evsel); 1492 } 1493 1494 if (id_hdr_size & 7) { 1495 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1496 return -EINVAL; 1497 } 1498 1499 if (ev->header.size & 7) { 1500 pr_err("Bad event size %u\n", ev->header.size); 1501 return -EINVAL; 1502 } 1503 1504 /* Remove guest id sample */ 1505 ev->header.size -= id_hdr_size; 1506 1507 if (ev->header.size & 7) { 1508 pr_err("Bad raw event size %u\n", ev->header.size); 1509 return -EINVAL; 1510 } 1511 1512 guest_id = guest_session__lookup_id(gs, id); 1513 if (!guest_id) { 1514 pr_err("Guest event with unknown id %llu\n", 1515 (unsigned long long)id); 1516 return -EINVAL; 1517 } 1518 1519 /* Change to host ID to avoid conflicting ID values */ 1520 sample->id = guest_id->host_id; 1521 sample->stream_id = guest_id->host_id; 1522 1523 if (sample->cpu != (u32)-1) { 1524 if (sample->cpu >= gs->vcpu_cnt) { 1525 pr_err("Guest event with unknown VCPU %u\n", 1526 sample->cpu); 1527 return -EINVAL; 1528 } 1529 /* Change to host CPU instead of guest VCPU */ 1530 sample->cpu = gs->vcpu[sample->cpu].cpu; 1531 } 1532 1533 /* New id sample with new ID and CPU */ 1534 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1535 if (ret) 1536 return ret; 1537 1538 if (ev->header.size & 7) { 1539 pr_err("Bad new event size %u\n", ev->header.size); 1540 return -EINVAL; 1541 } 1542 1543 gs->fetched = false; 1544 1545 ret = output_bytes(inject, ev, ev->header.size); 1546 if (ret) 1547 return ret; 1548 } 1549 } 1550 1551 static int guest_session__flush_events(struct guest_session *gs) 1552 { 1553 return guest_session__inject_events(gs, -1); 1554 } 1555 1556 static int host__repipe(struct perf_tool *tool, 1557 union perf_event *event, 1558 struct perf_sample *sample, 1559 struct machine *machine) 1560 { 1561 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1562 int ret; 1563 1564 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1565 if (ret) 1566 return ret; 1567 1568 return perf_event__repipe(tool, event, sample, machine); 1569 } 1570 1571 static int host__finished_init(struct perf_session *session, union perf_event *event) 1572 { 1573 struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool); 1574 struct guest_session *gs = &inject->guest_session; 1575 int ret; 1576 1577 /* 1578 * Peek through host COMM events to find QEMU threads and the VCPU they 1579 * are running. 1580 */ 1581 ret = host_peek_vm_comms(session, gs); 1582 if (ret) 1583 return ret; 1584 1585 if (!gs->vcpu_cnt) { 1586 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1587 return -EINVAL; 1588 } 1589 1590 /* 1591 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1592 */ 1593 gs->highest_id = evlist__find_highest_id(session->evlist); 1594 ret = guest_session__map_ids(gs, session->evlist); 1595 if (ret) 1596 return ret; 1597 1598 ret = guest_session__add_attrs(gs); 1599 if (ret) 1600 return ret; 1601 1602 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1603 if (ret) { 1604 pr_err("Failed to synthesize id_index\n"); 1605 return ret; 1606 } 1607 1608 ret = guest_session__add_build_ids(gs); 1609 if (ret) { 1610 pr_err("Failed to add guest build IDs\n"); 1611 return ret; 1612 } 1613 1614 gs->ready = true; 1615 1616 ret = guest_session__inject_events(gs, 0); 1617 if (ret) 1618 return ret; 1619 1620 return perf_event__repipe_op2_synth(session, event); 1621 } 1622 1623 /* 1624 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1625 * which flushes host events to file up until the last flush time. Then inject 1626 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1627 * itself. 1628 */ 1629 static int host__finished_round(struct perf_tool *tool, 1630 union perf_event *event, 1631 struct ordered_events *oe) 1632 { 1633 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1634 int ret = perf_event__process_finished_round(tool, event, oe); 1635 u64 timestamp = ordered_events__last_flush_time(oe); 1636 1637 if (ret) 1638 return ret; 1639 1640 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1641 if (ret) 1642 return ret; 1643 1644 return perf_event__repipe_oe_synth(tool, event, oe); 1645 } 1646 1647 static int host__context_switch(struct perf_tool *tool, 1648 union perf_event *event, 1649 struct perf_sample *sample, 1650 struct machine *machine) 1651 { 1652 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1653 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1654 struct guest_session *gs = &inject->guest_session; 1655 u32 pid = event->context_switch.next_prev_pid; 1656 u32 tid = event->context_switch.next_prev_tid; 1657 struct guest_tid *guest_tid; 1658 u32 vcpu; 1659 1660 if (out || pid != gs->machine_pid) 1661 goto out; 1662 1663 guest_tid = guest_session__lookup_tid(gs, tid); 1664 if (!guest_tid) 1665 goto out; 1666 1667 if (sample->cpu == (u32)-1) { 1668 pr_err("Switch event does not have CPU\n"); 1669 return -EINVAL; 1670 } 1671 1672 vcpu = guest_tid->vcpu; 1673 if (vcpu >= gs->vcpu_cnt) 1674 return -EINVAL; 1675 1676 /* Guest is switching in, record which CPU the VCPU is now running on */ 1677 gs->vcpu[vcpu].cpu = sample->cpu; 1678 out: 1679 return host__repipe(tool, event, sample, machine); 1680 } 1681 1682 static void sig_handler(int sig __maybe_unused) 1683 { 1684 session_done = 1; 1685 } 1686 1687 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1688 { 1689 struct perf_event_attr *attr = &evsel->core.attr; 1690 const char *name = evsel__name(evsel); 1691 1692 if (!(attr->sample_type & sample_type)) { 1693 pr_err("Samples for %s event do not have %s attribute set.", 1694 name, sample_msg); 1695 return -EINVAL; 1696 } 1697 1698 return 0; 1699 } 1700 1701 static int drop_sample(struct perf_tool *tool __maybe_unused, 1702 union perf_event *event __maybe_unused, 1703 struct perf_sample *sample __maybe_unused, 1704 struct evsel *evsel __maybe_unused, 1705 struct machine *machine __maybe_unused) 1706 { 1707 return 0; 1708 } 1709 1710 static void strip_init(struct perf_inject *inject) 1711 { 1712 struct evlist *evlist = inject->session->evlist; 1713 struct evsel *evsel; 1714 1715 inject->tool.context_switch = perf_event__drop; 1716 1717 evlist__for_each_entry(evlist, evsel) 1718 evsel->handler = drop_sample; 1719 } 1720 1721 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1722 { 1723 struct perf_inject *inject = opt->value; 1724 const char *args; 1725 char *dry_run; 1726 1727 if (unset) 1728 return 0; 1729 1730 inject->itrace_synth_opts.set = true; 1731 inject->itrace_synth_opts.vm_time_correlation = true; 1732 inject->in_place_update = true; 1733 1734 if (!str) 1735 return 0; 1736 1737 dry_run = skip_spaces(str); 1738 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1739 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1740 inject->in_place_update_dry_run = true; 1741 args = dry_run + strlen("dry-run"); 1742 } else { 1743 args = str; 1744 } 1745 1746 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1747 1748 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1749 } 1750 1751 static int parse_guest_data(const struct option *opt, const char *str, int unset) 1752 { 1753 struct perf_inject *inject = opt->value; 1754 struct guest_session *gs = &inject->guest_session; 1755 char *tok; 1756 char *s; 1757 1758 if (unset) 1759 return 0; 1760 1761 if (!str) 1762 goto bad_args; 1763 1764 s = strdup(str); 1765 if (!s) 1766 return -ENOMEM; 1767 1768 gs->perf_data_file = strsep(&s, ","); 1769 if (!gs->perf_data_file) 1770 goto bad_args; 1771 1772 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1773 if (gs->copy_kcore_dir) 1774 inject->output.is_dir = true; 1775 1776 tok = strsep(&s, ","); 1777 if (!tok) 1778 goto bad_args; 1779 gs->machine_pid = strtoul(tok, NULL, 0); 1780 if (!inject->guest_session.machine_pid) 1781 goto bad_args; 1782 1783 gs->time_scale = 1; 1784 1785 tok = strsep(&s, ","); 1786 if (!tok) 1787 goto out; 1788 gs->time_offset = strtoull(tok, NULL, 0); 1789 1790 tok = strsep(&s, ","); 1791 if (!tok) 1792 goto out; 1793 gs->time_scale = strtod(tok, NULL); 1794 if (!gs->time_scale) 1795 goto bad_args; 1796 out: 1797 return 0; 1798 1799 bad_args: 1800 pr_err("--guest-data option requires guest perf.data file name, " 1801 "guest machine PID, and optionally guest timestamp offset, " 1802 "and guest timestamp scale factor, separated by commas.\n"); 1803 return -1; 1804 } 1805 1806 static int save_section_info_cb(struct perf_file_section *section, 1807 struct perf_header *ph __maybe_unused, 1808 int feat, int fd __maybe_unused, void *data) 1809 { 1810 struct perf_inject *inject = data; 1811 1812 inject->secs[feat] = *section; 1813 return 0; 1814 } 1815 1816 static int save_section_info(struct perf_inject *inject) 1817 { 1818 struct perf_header *header = &inject->session->header; 1819 int fd = perf_data__fd(inject->session->data); 1820 1821 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 1822 } 1823 1824 static bool keep_feat(int feat) 1825 { 1826 switch (feat) { 1827 /* Keep original information that describes the machine or software */ 1828 case HEADER_TRACING_DATA: 1829 case HEADER_HOSTNAME: 1830 case HEADER_OSRELEASE: 1831 case HEADER_VERSION: 1832 case HEADER_ARCH: 1833 case HEADER_NRCPUS: 1834 case HEADER_CPUDESC: 1835 case HEADER_CPUID: 1836 case HEADER_TOTAL_MEM: 1837 case HEADER_CPU_TOPOLOGY: 1838 case HEADER_NUMA_TOPOLOGY: 1839 case HEADER_PMU_MAPPINGS: 1840 case HEADER_CACHE: 1841 case HEADER_MEM_TOPOLOGY: 1842 case HEADER_CLOCKID: 1843 case HEADER_BPF_PROG_INFO: 1844 case HEADER_BPF_BTF: 1845 case HEADER_CPU_PMU_CAPS: 1846 case HEADER_CLOCK_DATA: 1847 case HEADER_HYBRID_TOPOLOGY: 1848 case HEADER_PMU_CAPS: 1849 return true; 1850 /* Information that can be updated */ 1851 case HEADER_BUILD_ID: 1852 case HEADER_CMDLINE: 1853 case HEADER_EVENT_DESC: 1854 case HEADER_BRANCH_STACK: 1855 case HEADER_GROUP_DESC: 1856 case HEADER_AUXTRACE: 1857 case HEADER_STAT: 1858 case HEADER_SAMPLE_TIME: 1859 case HEADER_DIR_FORMAT: 1860 case HEADER_COMPRESSED: 1861 default: 1862 return false; 1863 }; 1864 } 1865 1866 static int read_file(int fd, u64 offs, void *buf, size_t sz) 1867 { 1868 ssize_t ret = preadn(fd, buf, sz, offs); 1869 1870 if (ret < 0) 1871 return -errno; 1872 if ((size_t)ret != sz) 1873 return -EINVAL; 1874 return 0; 1875 } 1876 1877 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 1878 { 1879 int fd = perf_data__fd(inject->session->data); 1880 u64 offs = inject->secs[feat].offset; 1881 size_t sz = inject->secs[feat].size; 1882 void *buf = malloc(sz); 1883 int ret; 1884 1885 if (!buf) 1886 return -ENOMEM; 1887 1888 ret = read_file(fd, offs, buf, sz); 1889 if (ret) 1890 goto out_free; 1891 1892 ret = fw->write(fw, buf, sz); 1893 out_free: 1894 free(buf); 1895 return ret; 1896 } 1897 1898 struct inject_fc { 1899 struct feat_copier fc; 1900 struct perf_inject *inject; 1901 }; 1902 1903 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 1904 { 1905 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 1906 struct perf_inject *inject = inj_fc->inject; 1907 int ret; 1908 1909 if (!inject->secs[feat].offset || 1910 !keep_feat(feat)) 1911 return 0; 1912 1913 ret = feat_copy(inject, feat, fw); 1914 if (ret < 0) 1915 return ret; 1916 1917 return 1; /* Feature section copied */ 1918 } 1919 1920 static int copy_kcore_dir(struct perf_inject *inject) 1921 { 1922 char *cmd; 1923 int ret; 1924 1925 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 1926 inject->input_name, inject->output.path); 1927 if (ret < 0) 1928 return ret; 1929 pr_debug("%s\n", cmd); 1930 ret = system(cmd); 1931 free(cmd); 1932 return ret; 1933 } 1934 1935 static int guest_session__copy_kcore_dir(struct guest_session *gs) 1936 { 1937 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1938 char *cmd; 1939 int ret; 1940 1941 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 1942 gs->perf_data_file, inject->output.path, gs->machine_pid); 1943 if (ret < 0) 1944 return ret; 1945 pr_debug("%s\n", cmd); 1946 ret = system(cmd); 1947 free(cmd); 1948 return ret; 1949 } 1950 1951 static int output_fd(struct perf_inject *inject) 1952 { 1953 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 1954 } 1955 1956 static int __cmd_inject(struct perf_inject *inject) 1957 { 1958 int ret = -EINVAL; 1959 struct guest_session *gs = &inject->guest_session; 1960 struct perf_session *session = inject->session; 1961 int fd = output_fd(inject); 1962 u64 output_data_offset; 1963 1964 signal(SIGINT, sig_handler); 1965 1966 if (inject->build_ids || inject->sched_stat || 1967 inject->itrace_synth_opts.set || inject->build_id_all) { 1968 inject->tool.mmap = perf_event__repipe_mmap; 1969 inject->tool.mmap2 = perf_event__repipe_mmap2; 1970 inject->tool.fork = perf_event__repipe_fork; 1971 #ifdef HAVE_LIBTRACEEVENT 1972 inject->tool.tracing_data = perf_event__repipe_tracing_data; 1973 #endif 1974 } 1975 1976 output_data_offset = perf_session__data_offset(session->evlist); 1977 1978 if (inject->build_id_all) { 1979 inject->tool.mmap = perf_event__repipe_buildid_mmap; 1980 inject->tool.mmap2 = perf_event__repipe_buildid_mmap2; 1981 } else if (inject->build_ids) { 1982 inject->tool.sample = perf_event__inject_buildid; 1983 } else if (inject->sched_stat) { 1984 struct evsel *evsel; 1985 1986 evlist__for_each_entry(session->evlist, evsel) { 1987 const char *name = evsel__name(evsel); 1988 1989 if (!strcmp(name, "sched:sched_switch")) { 1990 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 1991 return -EINVAL; 1992 1993 evsel->handler = perf_inject__sched_switch; 1994 } else if (!strcmp(name, "sched:sched_process_exit")) 1995 evsel->handler = perf_inject__sched_process_exit; 1996 #ifdef HAVE_LIBTRACEEVENT 1997 else if (!strncmp(name, "sched:sched_stat_", 17)) 1998 evsel->handler = perf_inject__sched_stat; 1999 #endif 2000 } 2001 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2002 session->itrace_synth_opts = &inject->itrace_synth_opts; 2003 memset(&inject->tool, 0, sizeof(inject->tool)); 2004 inject->tool.id_index = perf_event__process_id_index; 2005 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2006 inject->tool.auxtrace = perf_event__process_auxtrace; 2007 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2008 inject->tool.ordered_events = true; 2009 inject->tool.ordering_requires_timestamps = true; 2010 } else if (inject->itrace_synth_opts.set) { 2011 session->itrace_synth_opts = &inject->itrace_synth_opts; 2012 inject->itrace_synth_opts.inject = true; 2013 inject->tool.comm = perf_event__repipe_comm; 2014 inject->tool.namespaces = perf_event__repipe_namespaces; 2015 inject->tool.exit = perf_event__repipe_exit; 2016 inject->tool.id_index = perf_event__process_id_index; 2017 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2018 inject->tool.auxtrace = perf_event__process_auxtrace; 2019 inject->tool.aux = perf_event__drop_aux; 2020 inject->tool.itrace_start = perf_event__drop_aux; 2021 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2022 inject->tool.ordered_events = true; 2023 inject->tool.ordering_requires_timestamps = true; 2024 /* Allow space in the header for new attributes */ 2025 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2026 if (inject->strip) 2027 strip_init(inject); 2028 } else if (gs->perf_data_file) { 2029 char *name = gs->perf_data_file; 2030 2031 /* 2032 * Not strictly necessary, but keep these events in order wrt 2033 * guest events. 2034 */ 2035 inject->tool.mmap = host__repipe; 2036 inject->tool.mmap2 = host__repipe; 2037 inject->tool.comm = host__repipe; 2038 inject->tool.fork = host__repipe; 2039 inject->tool.exit = host__repipe; 2040 inject->tool.lost = host__repipe; 2041 inject->tool.context_switch = host__repipe; 2042 inject->tool.ksymbol = host__repipe; 2043 inject->tool.text_poke = host__repipe; 2044 /* 2045 * Once the host session has initialized, set up sample ID 2046 * mapping and feed in guest attrs, build IDs and initial 2047 * events. 2048 */ 2049 inject->tool.finished_init = host__finished_init; 2050 /* Obey finished round ordering */ 2051 inject->tool.finished_round = host__finished_round, 2052 /* Keep track of which CPU a VCPU is runnng on */ 2053 inject->tool.context_switch = host__context_switch; 2054 /* 2055 * Must order events to be able to obey finished round 2056 * ordering. 2057 */ 2058 inject->tool.ordered_events = true; 2059 inject->tool.ordering_requires_timestamps = true; 2060 /* Set up a separate session to process guest perf.data file */ 2061 ret = guest_session__start(gs, name, session->data->force); 2062 if (ret) { 2063 pr_err("Failed to process %s, error %d\n", name, ret); 2064 return ret; 2065 } 2066 /* Allow space in the header for guest attributes */ 2067 output_data_offset += gs->session->header.data_offset; 2068 output_data_offset = roundup(output_data_offset, 4096); 2069 } 2070 2071 if (!inject->itrace_synth_opts.set) 2072 auxtrace_index__free(&session->auxtrace_index); 2073 2074 if (!inject->is_pipe && !inject->in_place_update) 2075 lseek(fd, output_data_offset, SEEK_SET); 2076 2077 ret = perf_session__process_events(session); 2078 if (ret) 2079 return ret; 2080 2081 if (gs->session) { 2082 /* 2083 * Remaining guest events have later timestamps. Flush them 2084 * out to file. 2085 */ 2086 ret = guest_session__flush_events(gs); 2087 if (ret) { 2088 pr_err("Failed to flush guest events\n"); 2089 return ret; 2090 } 2091 } 2092 2093 if (!inject->is_pipe && !inject->in_place_update) { 2094 struct inject_fc inj_fc = { 2095 .fc.copy = feat_copy_cb, 2096 .inject = inject, 2097 }; 2098 2099 if (inject->build_ids) 2100 perf_header__set_feat(&session->header, 2101 HEADER_BUILD_ID); 2102 /* 2103 * Keep all buildids when there is unprocessed AUX data because 2104 * it is not known which ones the AUX trace hits. 2105 */ 2106 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2107 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2108 dsos__hit_all(session); 2109 /* 2110 * The AUX areas have been removed and replaced with 2111 * synthesized hardware events, so clear the feature flag. 2112 */ 2113 if (inject->itrace_synth_opts.set) { 2114 perf_header__clear_feat(&session->header, 2115 HEADER_AUXTRACE); 2116 if (inject->itrace_synth_opts.last_branch || 2117 inject->itrace_synth_opts.add_last_branch) 2118 perf_header__set_feat(&session->header, 2119 HEADER_BRANCH_STACK); 2120 } 2121 session->header.data_offset = output_data_offset; 2122 session->header.data_size = inject->bytes_written; 2123 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc); 2124 2125 if (inject->copy_kcore_dir) { 2126 ret = copy_kcore_dir(inject); 2127 if (ret) { 2128 pr_err("Failed to copy kcore\n"); 2129 return ret; 2130 } 2131 } 2132 if (gs->copy_kcore_dir) { 2133 ret = guest_session__copy_kcore_dir(gs); 2134 if (ret) { 2135 pr_err("Failed to copy guest kcore\n"); 2136 return ret; 2137 } 2138 } 2139 } 2140 2141 return ret; 2142 } 2143 2144 int cmd_inject(int argc, const char **argv) 2145 { 2146 struct perf_inject inject = { 2147 .tool = { 2148 .sample = perf_event__repipe_sample, 2149 .read = perf_event__repipe_sample, 2150 .mmap = perf_event__repipe, 2151 .mmap2 = perf_event__repipe, 2152 .comm = perf_event__repipe, 2153 .namespaces = perf_event__repipe, 2154 .cgroup = perf_event__repipe, 2155 .fork = perf_event__repipe, 2156 .exit = perf_event__repipe, 2157 .lost = perf_event__repipe, 2158 .lost_samples = perf_event__repipe, 2159 .aux = perf_event__repipe, 2160 .itrace_start = perf_event__repipe, 2161 .aux_output_hw_id = perf_event__repipe, 2162 .context_switch = perf_event__repipe, 2163 .throttle = perf_event__repipe, 2164 .unthrottle = perf_event__repipe, 2165 .ksymbol = perf_event__repipe, 2166 .bpf = perf_event__repipe, 2167 .text_poke = perf_event__repipe, 2168 .attr = perf_event__repipe_attr, 2169 .event_update = perf_event__repipe_event_update, 2170 .tracing_data = perf_event__repipe_op2_synth, 2171 .finished_round = perf_event__repipe_oe_synth, 2172 .build_id = perf_event__repipe_op2_synth, 2173 .id_index = perf_event__repipe_op2_synth, 2174 .auxtrace_info = perf_event__repipe_op2_synth, 2175 .auxtrace_error = perf_event__repipe_op2_synth, 2176 .time_conv = perf_event__repipe_op2_synth, 2177 .thread_map = perf_event__repipe_op2_synth, 2178 .cpu_map = perf_event__repipe_op2_synth, 2179 .stat_config = perf_event__repipe_op2_synth, 2180 .stat = perf_event__repipe_op2_synth, 2181 .stat_round = perf_event__repipe_op2_synth, 2182 .feature = perf_event__repipe_op2_synth, 2183 .finished_init = perf_event__repipe_op2_synth, 2184 .compressed = perf_event__repipe_op4_synth, 2185 .auxtrace = perf_event__repipe_auxtrace, 2186 }, 2187 .input_name = "-", 2188 .samples = LIST_HEAD_INIT(inject.samples), 2189 .output = { 2190 .path = "-", 2191 .mode = PERF_DATA_MODE_WRITE, 2192 .use_stdio = true, 2193 }, 2194 }; 2195 struct perf_data data = { 2196 .mode = PERF_DATA_MODE_READ, 2197 .use_stdio = true, 2198 }; 2199 int ret; 2200 bool repipe = true; 2201 const char *known_build_ids = NULL; 2202 2203 struct option options[] = { 2204 OPT_BOOLEAN('b', "build-ids", &inject.build_ids, 2205 "Inject build-ids into the output stream"), 2206 OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all, 2207 "Inject build-ids of all DSOs into the output stream"), 2208 OPT_STRING(0, "known-build-ids", &known_build_ids, 2209 "buildid path [,buildid path...]", 2210 "build-ids to use for given paths"), 2211 OPT_STRING('i', "input", &inject.input_name, "file", 2212 "input file name"), 2213 OPT_STRING('o', "output", &inject.output.path, "file", 2214 "output file name"), 2215 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2216 "Merge sched-stat and sched-switch for getting events " 2217 "where and how long tasks slept"), 2218 #ifdef HAVE_JITDUMP 2219 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2220 #endif 2221 OPT_INCR('v', "verbose", &verbose, 2222 "be more verbose (show build ids, etc)"), 2223 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2224 "file", "vmlinux pathname"), 2225 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2226 "don't load vmlinux even if found"), 2227 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2228 "kallsyms pathname"), 2229 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2230 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2231 NULL, "opts", "Instruction Tracing options\n" 2232 ITRACE_HELP, 2233 itrace_parse_synth_opts), 2234 OPT_BOOLEAN(0, "strip", &inject.strip, 2235 "strip non-synthesized events (use with --itrace)"), 2236 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2237 "correlate time between VM guests and the host", 2238 parse_vm_time_correlation), 2239 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2240 "inject events from a guest perf.data file", 2241 parse_guest_data), 2242 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2243 "guest mount directory under which every guest os" 2244 " instance has a subdir"), 2245 OPT_END() 2246 }; 2247 const char * const inject_usage[] = { 2248 "perf inject [<options>]", 2249 NULL 2250 }; 2251 #ifndef HAVE_JITDUMP 2252 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2253 #endif 2254 argc = parse_options(argc, argv, options, inject_usage, 0); 2255 2256 /* 2257 * Any (unrecognized) arguments left? 2258 */ 2259 if (argc) 2260 usage_with_options(inject_usage, options); 2261 2262 if (inject.strip && !inject.itrace_synth_opts.set) { 2263 pr_err("--strip option requires --itrace option\n"); 2264 return -1; 2265 } 2266 2267 if (symbol__validate_sym_arguments()) 2268 return -1; 2269 2270 if (inject.in_place_update) { 2271 if (!strcmp(inject.input_name, "-")) { 2272 pr_err("Input file name required for in-place updating\n"); 2273 return -1; 2274 } 2275 if (strcmp(inject.output.path, "-")) { 2276 pr_err("Output file name must not be specified for in-place updating\n"); 2277 return -1; 2278 } 2279 if (!data.force && !inject.in_place_update_dry_run) { 2280 pr_err("The input file would be updated in place, " 2281 "the --force option is required.\n"); 2282 return -1; 2283 } 2284 if (!inject.in_place_update_dry_run) 2285 data.in_place_update = true; 2286 } else { 2287 if (strcmp(inject.output.path, "-") && !inject.strip && 2288 has_kcore_dir(inject.input_name)) { 2289 inject.output.is_dir = true; 2290 inject.copy_kcore_dir = true; 2291 } 2292 if (perf_data__open(&inject.output)) { 2293 perror("failed to create output file"); 2294 return -1; 2295 } 2296 } 2297 2298 data.path = inject.input_name; 2299 if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) { 2300 inject.is_pipe = true; 2301 /* 2302 * Do not repipe header when input is a regular file 2303 * since either it can rewrite the header at the end 2304 * or write a new pipe header. 2305 */ 2306 if (strcmp(inject.input_name, "-")) 2307 repipe = false; 2308 } 2309 2310 inject.session = __perf_session__new(&data, repipe, 2311 output_fd(&inject), 2312 &inject.tool); 2313 if (IS_ERR(inject.session)) { 2314 ret = PTR_ERR(inject.session); 2315 goto out_close_output; 2316 } 2317 2318 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2319 pr_warning("Decompression initialization failed.\n"); 2320 2321 /* Save original section info before feature bits change */ 2322 ret = save_section_info(&inject); 2323 if (ret) 2324 goto out_delete; 2325 2326 if (!data.is_pipe && inject.output.is_pipe) { 2327 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2328 if (ret < 0) { 2329 pr_err("Couldn't write a new pipe header.\n"); 2330 goto out_delete; 2331 } 2332 2333 ret = perf_event__synthesize_for_pipe(&inject.tool, 2334 inject.session, 2335 &inject.output, 2336 perf_event__repipe); 2337 if (ret < 0) 2338 goto out_delete; 2339 } 2340 2341 if (inject.build_ids && !inject.build_id_all) { 2342 /* 2343 * to make sure the mmap records are ordered correctly 2344 * and so that the correct especially due to jitted code 2345 * mmaps. We cannot generate the buildid hit list and 2346 * inject the jit mmaps at the same time for now. 2347 */ 2348 inject.tool.ordered_events = true; 2349 inject.tool.ordering_requires_timestamps = true; 2350 if (known_build_ids != NULL) { 2351 inject.known_build_ids = 2352 perf_inject__parse_known_build_ids(known_build_ids); 2353 2354 if (inject.known_build_ids == NULL) { 2355 pr_err("Couldn't parse known build ids.\n"); 2356 goto out_delete; 2357 } 2358 } 2359 } 2360 2361 if (inject.sched_stat) { 2362 inject.tool.ordered_events = true; 2363 } 2364 2365 #ifdef HAVE_JITDUMP 2366 if (inject.jit_mode) { 2367 inject.tool.mmap2 = perf_event__jit_repipe_mmap2; 2368 inject.tool.mmap = perf_event__jit_repipe_mmap; 2369 inject.tool.ordered_events = true; 2370 inject.tool.ordering_requires_timestamps = true; 2371 /* 2372 * JIT MMAP injection injects all MMAP events in one go, so it 2373 * does not obey finished_round semantics. 2374 */ 2375 inject.tool.finished_round = perf_event__drop_oe; 2376 } 2377 #endif 2378 ret = symbol__init(&inject.session->header.env); 2379 if (ret < 0) 2380 goto out_delete; 2381 2382 ret = __cmd_inject(&inject); 2383 2384 guest_session__exit(&inject.guest_session); 2385 2386 out_delete: 2387 strlist__delete(inject.known_build_ids); 2388 zstd_fini(&(inject.session->zstd_data)); 2389 perf_session__delete(inject.session); 2390 out_close_output: 2391 if (!inject.in_place_update) 2392 perf_data__close(&inject.output); 2393 free(inject.itrace_synth_opts.vm_tm_corr_args); 2394 return ret; 2395 } 2396