1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include "util/dso.h" 13 #include "util/vdso.h" 14 #include "util/evlist.h" 15 #include "util/evsel.h" 16 #include "util/map.h" 17 #include "util/session.h" 18 #include "util/tool.h" 19 #include "util/debug.h" 20 #include "util/build-id.h" 21 #include "util/data.h" 22 #include "util/auxtrace.h" 23 #include "util/jit.h" 24 #include "util/string2.h" 25 #include "util/symbol.h" 26 #include "util/synthetic-events.h" 27 #include "util/thread.h" 28 #include "util/namespaces.h" 29 #include "util/util.h" 30 #include "util/tsc.h" 31 32 #include <internal/lib.h> 33 34 #include <linux/err.h> 35 #include <subcmd/parse-options.h> 36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38 #include <linux/list.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <linux/hash.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <inttypes.h> 46 47 struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char *event_buf; 51 }; 52 53 struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59 }; 60 61 struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67 }; 68 69 struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74 }; 75 76 struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104 }; 105 106 struct perf_inject { 107 struct perf_tool tool; 108 struct perf_session *session; 109 bool build_ids; 110 bool build_id_all; 111 bool sched_stat; 112 bool have_auxtrace; 113 bool strip; 114 bool jit_mode; 115 bool in_place_update; 116 bool in_place_update_dry_run; 117 bool is_pipe; 118 bool copy_kcore_dir; 119 const char *input_name; 120 struct perf_data output; 121 u64 bytes_written; 122 u64 aux_id; 123 struct list_head samples; 124 struct itrace_synth_opts itrace_synth_opts; 125 char *event_copy; 126 struct perf_file_section secs[HEADER_FEAT_BITS]; 127 struct guest_session guest_session; 128 struct strlist *known_build_ids; 129 }; 130 131 struct event_entry { 132 struct list_head node; 133 u32 tid; 134 union perf_event event[]; 135 }; 136 137 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, 138 struct machine *machine, u8 cpumode, u32 flags); 139 140 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 141 { 142 ssize_t size; 143 144 size = perf_data__write(&inject->output, buf, sz); 145 if (size < 0) 146 return -errno; 147 148 inject->bytes_written += size; 149 return 0; 150 } 151 152 static int perf_event__repipe_synth(struct perf_tool *tool, 153 union perf_event *event) 154 { 155 struct perf_inject *inject = container_of(tool, struct perf_inject, 156 tool); 157 158 return output_bytes(inject, event, event->header.size); 159 } 160 161 static int perf_event__repipe_oe_synth(struct perf_tool *tool, 162 union perf_event *event, 163 struct ordered_events *oe __maybe_unused) 164 { 165 return perf_event__repipe_synth(tool, event); 166 } 167 168 #ifdef HAVE_JITDUMP 169 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, 170 union perf_event *event __maybe_unused, 171 struct ordered_events *oe __maybe_unused) 172 { 173 return 0; 174 } 175 #endif 176 177 static int perf_event__repipe_op2_synth(struct perf_session *session, 178 union perf_event *event) 179 { 180 return perf_event__repipe_synth(session->tool, event); 181 } 182 183 static int perf_event__repipe_op4_synth(struct perf_session *session, 184 union perf_event *event, 185 u64 data __maybe_unused, 186 const char *str __maybe_unused) 187 { 188 return perf_event__repipe_synth(session->tool, event); 189 } 190 191 static int perf_event__repipe_attr(struct perf_tool *tool, 192 union perf_event *event, 193 struct evlist **pevlist) 194 { 195 struct perf_inject *inject = container_of(tool, struct perf_inject, 196 tool); 197 int ret; 198 199 ret = perf_event__process_attr(tool, event, pevlist); 200 if (ret) 201 return ret; 202 203 if (!inject->is_pipe) 204 return 0; 205 206 return perf_event__repipe_synth(tool, event); 207 } 208 209 static int perf_event__repipe_event_update(struct perf_tool *tool, 210 union perf_event *event, 211 struct evlist **pevlist __maybe_unused) 212 { 213 return perf_event__repipe_synth(tool, event); 214 } 215 216 #ifdef HAVE_AUXTRACE_SUPPORT 217 218 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 219 { 220 char buf[4096]; 221 ssize_t ssz; 222 int ret; 223 224 while (size > 0) { 225 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 226 if (ssz < 0) 227 return -errno; 228 ret = output_bytes(inject, buf, ssz); 229 if (ret) 230 return ret; 231 size -= ssz; 232 } 233 234 return 0; 235 } 236 237 static s64 perf_event__repipe_auxtrace(struct perf_session *session, 238 union perf_event *event) 239 { 240 struct perf_tool *tool = session->tool; 241 struct perf_inject *inject = container_of(tool, struct perf_inject, 242 tool); 243 int ret; 244 245 inject->have_auxtrace = true; 246 247 if (!inject->output.is_pipe) { 248 off_t offset; 249 250 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 251 if (offset == -1) 252 return -errno; 253 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 254 event, offset); 255 if (ret < 0) 256 return ret; 257 } 258 259 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 260 ret = output_bytes(inject, event, event->header.size); 261 if (ret < 0) 262 return ret; 263 ret = copy_bytes(inject, session->data, 264 event->auxtrace.size); 265 } else { 266 ret = output_bytes(inject, event, 267 event->header.size + event->auxtrace.size); 268 } 269 if (ret < 0) 270 return ret; 271 272 return event->auxtrace.size; 273 } 274 275 #else 276 277 static s64 278 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, 279 union perf_event *event __maybe_unused) 280 { 281 pr_err("AUX area tracing not supported\n"); 282 return -EINVAL; 283 } 284 285 #endif 286 287 static int perf_event__repipe(struct perf_tool *tool, 288 union perf_event *event, 289 struct perf_sample *sample __maybe_unused, 290 struct machine *machine __maybe_unused) 291 { 292 return perf_event__repipe_synth(tool, event); 293 } 294 295 static int perf_event__drop(struct perf_tool *tool __maybe_unused, 296 union perf_event *event __maybe_unused, 297 struct perf_sample *sample __maybe_unused, 298 struct machine *machine __maybe_unused) 299 { 300 return 0; 301 } 302 303 static int perf_event__drop_aux(struct perf_tool *tool, 304 union perf_event *event __maybe_unused, 305 struct perf_sample *sample, 306 struct machine *machine __maybe_unused) 307 { 308 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 309 310 if (!inject->aux_id) 311 inject->aux_id = sample->id; 312 313 return 0; 314 } 315 316 static union perf_event * 317 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 318 union perf_event *event, 319 struct perf_sample *sample) 320 { 321 size_t sz1 = sample->aux_sample.data - (void *)event; 322 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 323 union perf_event *ev; 324 325 if (inject->event_copy == NULL) { 326 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 327 if (!inject->event_copy) 328 return ERR_PTR(-ENOMEM); 329 } 330 ev = (union perf_event *)inject->event_copy; 331 if (sz1 > event->header.size || sz2 > event->header.size || 332 sz1 + sz2 > event->header.size || 333 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 334 return event; 335 336 memcpy(ev, event, sz1); 337 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 338 ev->header.size = sz1 + sz2; 339 ((u64 *)((void *)ev + sz1))[-1] = 0; 340 341 return ev; 342 } 343 344 typedef int (*inject_handler)(struct perf_tool *tool, 345 union perf_event *event, 346 struct perf_sample *sample, 347 struct evsel *evsel, 348 struct machine *machine); 349 350 static int perf_event__repipe_sample(struct perf_tool *tool, 351 union perf_event *event, 352 struct perf_sample *sample, 353 struct evsel *evsel, 354 struct machine *machine) 355 { 356 struct perf_inject *inject = container_of(tool, struct perf_inject, 357 tool); 358 359 if (evsel && evsel->handler) { 360 inject_handler f = evsel->handler; 361 return f(tool, event, sample, evsel, machine); 362 } 363 364 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 365 366 if (inject->itrace_synth_opts.set && sample->aux_sample.size) { 367 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 368 if (IS_ERR(event)) 369 return PTR_ERR(event); 370 } 371 372 return perf_event__repipe_synth(tool, event); 373 } 374 375 static int perf_event__repipe_mmap(struct perf_tool *tool, 376 union perf_event *event, 377 struct perf_sample *sample, 378 struct machine *machine) 379 { 380 int err; 381 382 err = perf_event__process_mmap(tool, event, sample, machine); 383 perf_event__repipe(tool, event, sample, machine); 384 385 return err; 386 } 387 388 #ifdef HAVE_JITDUMP 389 static int perf_event__jit_repipe_mmap(struct perf_tool *tool, 390 union perf_event *event, 391 struct perf_sample *sample, 392 struct machine *machine) 393 { 394 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 395 u64 n = 0; 396 int ret; 397 398 /* 399 * if jit marker, then inject jit mmaps and generate ELF images 400 */ 401 ret = jit_process(inject->session, &inject->output, machine, 402 event->mmap.filename, event->mmap.pid, event->mmap.tid, &n); 403 if (ret < 0) 404 return ret; 405 if (ret) { 406 inject->bytes_written += n; 407 return 0; 408 } 409 return perf_event__repipe_mmap(tool, event, sample, machine); 410 } 411 #endif 412 413 static struct dso *findnew_dso(int pid, int tid, const char *filename, 414 struct dso_id *id, struct machine *machine) 415 { 416 struct thread *thread; 417 struct nsinfo *nsi = NULL; 418 struct nsinfo *nnsi; 419 struct dso *dso; 420 bool vdso; 421 422 thread = machine__findnew_thread(machine, pid, tid); 423 if (thread == NULL) { 424 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 425 return NULL; 426 } 427 428 vdso = is_vdso_map(filename); 429 nsi = nsinfo__get(thread__nsinfo(thread)); 430 431 if (vdso) { 432 /* The vdso maps are always on the host and not the 433 * container. Ensure that we don't use setns to look 434 * them up. 435 */ 436 nnsi = nsinfo__copy(nsi); 437 if (nnsi) { 438 nsinfo__put(nsi); 439 nsinfo__clear_need_setns(nnsi); 440 nsi = nnsi; 441 } 442 dso = machine__findnew_vdso(machine, thread); 443 } else { 444 dso = machine__findnew_dso_id(machine, filename, id); 445 } 446 447 if (dso) { 448 mutex_lock(&dso->lock); 449 nsinfo__put(dso->nsinfo); 450 dso->nsinfo = nsi; 451 mutex_unlock(&dso->lock); 452 } else 453 nsinfo__put(nsi); 454 455 thread__put(thread); 456 return dso; 457 } 458 459 static int perf_event__repipe_buildid_mmap(struct perf_tool *tool, 460 union perf_event *event, 461 struct perf_sample *sample, 462 struct machine *machine) 463 { 464 struct dso *dso; 465 466 dso = findnew_dso(event->mmap.pid, event->mmap.tid, 467 event->mmap.filename, NULL, machine); 468 469 if (dso && !dso->hit) { 470 dso->hit = 1; 471 dso__inject_build_id(dso, tool, machine, sample->cpumode, 0); 472 } 473 dso__put(dso); 474 475 return perf_event__repipe(tool, event, sample, machine); 476 } 477 478 static int perf_event__repipe_mmap2(struct perf_tool *tool, 479 union perf_event *event, 480 struct perf_sample *sample, 481 struct machine *machine) 482 { 483 int err; 484 485 err = perf_event__process_mmap2(tool, event, sample, machine); 486 perf_event__repipe(tool, event, sample, machine); 487 488 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 489 struct dso *dso; 490 491 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, 492 event->mmap2.filename, NULL, machine); 493 if (dso) { 494 /* mark it not to inject build-id */ 495 dso->hit = 1; 496 } 497 dso__put(dso); 498 } 499 500 return err; 501 } 502 503 #ifdef HAVE_JITDUMP 504 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, 505 union perf_event *event, 506 struct perf_sample *sample, 507 struct machine *machine) 508 { 509 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 510 u64 n = 0; 511 int ret; 512 513 /* 514 * if jit marker, then inject jit mmaps and generate ELF images 515 */ 516 ret = jit_process(inject->session, &inject->output, machine, 517 event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n); 518 if (ret < 0) 519 return ret; 520 if (ret) { 521 inject->bytes_written += n; 522 return 0; 523 } 524 return perf_event__repipe_mmap2(tool, event, sample, machine); 525 } 526 #endif 527 528 static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, 529 union perf_event *event, 530 struct perf_sample *sample, 531 struct machine *machine) 532 { 533 struct dso_id dso_id = { 534 .maj = event->mmap2.maj, 535 .min = event->mmap2.min, 536 .ino = event->mmap2.ino, 537 .ino_generation = event->mmap2.ino_generation, 538 }; 539 struct dso *dso; 540 541 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 542 /* cannot use dso_id since it'd have invalid info */ 543 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, 544 event->mmap2.filename, NULL, machine); 545 if (dso) { 546 /* mark it not to inject build-id */ 547 dso->hit = 1; 548 } 549 dso__put(dso); 550 perf_event__repipe(tool, event, sample, machine); 551 return 0; 552 } 553 554 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, 555 event->mmap2.filename, &dso_id, machine); 556 557 if (dso && !dso->hit) { 558 dso->hit = 1; 559 dso__inject_build_id(dso, tool, machine, sample->cpumode, 560 event->mmap2.flags); 561 } 562 dso__put(dso); 563 564 perf_event__repipe(tool, event, sample, machine); 565 566 return 0; 567 } 568 569 static int perf_event__repipe_fork(struct perf_tool *tool, 570 union perf_event *event, 571 struct perf_sample *sample, 572 struct machine *machine) 573 { 574 int err; 575 576 err = perf_event__process_fork(tool, event, sample, machine); 577 perf_event__repipe(tool, event, sample, machine); 578 579 return err; 580 } 581 582 static int perf_event__repipe_comm(struct perf_tool *tool, 583 union perf_event *event, 584 struct perf_sample *sample, 585 struct machine *machine) 586 { 587 int err; 588 589 err = perf_event__process_comm(tool, event, sample, machine); 590 perf_event__repipe(tool, event, sample, machine); 591 592 return err; 593 } 594 595 static int perf_event__repipe_namespaces(struct perf_tool *tool, 596 union perf_event *event, 597 struct perf_sample *sample, 598 struct machine *machine) 599 { 600 int err = perf_event__process_namespaces(tool, event, sample, machine); 601 602 perf_event__repipe(tool, event, sample, machine); 603 604 return err; 605 } 606 607 static int perf_event__repipe_exit(struct perf_tool *tool, 608 union perf_event *event, 609 struct perf_sample *sample, 610 struct machine *machine) 611 { 612 int err; 613 614 err = perf_event__process_exit(tool, event, sample, machine); 615 perf_event__repipe(tool, event, sample, machine); 616 617 return err; 618 } 619 620 #ifdef HAVE_LIBTRACEEVENT 621 static int perf_event__repipe_tracing_data(struct perf_session *session, 622 union perf_event *event) 623 { 624 perf_event__repipe_synth(session->tool, event); 625 626 return perf_event__process_tracing_data(session, event); 627 } 628 #endif 629 630 static int dso__read_build_id(struct dso *dso) 631 { 632 struct nscookie nsc; 633 634 if (dso->has_build_id) 635 return 0; 636 637 mutex_lock(&dso->lock); 638 nsinfo__mountns_enter(dso->nsinfo, &nsc); 639 if (filename__read_build_id(dso->long_name, &dso->bid) > 0) 640 dso->has_build_id = true; 641 else if (dso->nsinfo) { 642 char *new_name = dso__filename_with_chroot(dso, dso->long_name); 643 644 if (new_name && filename__read_build_id(new_name, &dso->bid) > 0) 645 dso->has_build_id = true; 646 free(new_name); 647 } 648 nsinfo__mountns_exit(&nsc); 649 mutex_unlock(&dso->lock); 650 651 return dso->has_build_id ? 0 : -1; 652 } 653 654 static struct strlist *perf_inject__parse_known_build_ids( 655 const char *known_build_ids_string) 656 { 657 struct str_node *pos, *tmp; 658 struct strlist *known_build_ids; 659 int bid_len; 660 661 known_build_ids = strlist__new(known_build_ids_string, NULL); 662 if (known_build_ids == NULL) 663 return NULL; 664 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 665 const char *build_id, *dso_name; 666 667 build_id = skip_spaces(pos->s); 668 dso_name = strchr(build_id, ' '); 669 if (dso_name == NULL) { 670 strlist__remove(known_build_ids, pos); 671 continue; 672 } 673 bid_len = dso_name - pos->s; 674 dso_name = skip_spaces(dso_name); 675 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 676 strlist__remove(known_build_ids, pos); 677 continue; 678 } 679 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 680 if (!isxdigit(build_id[2 * ix]) || 681 !isxdigit(build_id[2 * ix + 1])) { 682 strlist__remove(known_build_ids, pos); 683 break; 684 } 685 } 686 } 687 return known_build_ids; 688 } 689 690 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 691 struct dso *dso) 692 { 693 struct str_node *pos; 694 int bid_len; 695 696 strlist__for_each_entry(pos, inject->known_build_ids) { 697 const char *build_id, *dso_name; 698 699 build_id = skip_spaces(pos->s); 700 dso_name = strchr(build_id, ' '); 701 bid_len = dso_name - pos->s; 702 dso_name = skip_spaces(dso_name); 703 if (strcmp(dso->long_name, dso_name)) 704 continue; 705 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 706 dso->bid.data[ix] = (hex(build_id[2 * ix]) << 4 | 707 hex(build_id[2 * ix + 1])); 708 } 709 dso->bid.size = bid_len / 2; 710 dso->has_build_id = 1; 711 return true; 712 } 713 return false; 714 } 715 716 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, 717 struct machine *machine, u8 cpumode, u32 flags) 718 { 719 struct perf_inject *inject = container_of(tool, struct perf_inject, 720 tool); 721 int err; 722 723 if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB) 724 return 0; 725 if (is_no_dso_memory(dso->long_name)) 726 return 0; 727 728 if (inject->known_build_ids != NULL && 729 perf_inject__lookup_known_build_id(inject, dso)) 730 return 1; 731 732 if (dso__read_build_id(dso) < 0) { 733 pr_debug("no build_id found for %s\n", dso->long_name); 734 return -1; 735 } 736 737 err = perf_event__synthesize_build_id(tool, dso, cpumode, 738 perf_event__repipe, machine); 739 if (err) { 740 pr_err("Can't synthesize build_id event for %s\n", dso->long_name); 741 return -1; 742 } 743 744 return 0; 745 } 746 747 int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, 748 struct perf_sample *sample, 749 struct evsel *evsel __maybe_unused, 750 struct machine *machine) 751 { 752 struct addr_location al; 753 struct thread *thread; 754 755 addr_location__init(&al); 756 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 757 if (thread == NULL) { 758 pr_err("problem processing %d event, skipping it.\n", 759 event->header.type); 760 goto repipe; 761 } 762 763 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 764 struct dso *dso = map__dso(al.map); 765 766 if (!dso->hit) { 767 dso->hit = 1; 768 dso__inject_build_id(dso, tool, machine, 769 sample->cpumode, map__flags(al.map)); 770 } 771 } 772 773 thread__put(thread); 774 repipe: 775 perf_event__repipe(tool, event, sample, machine); 776 addr_location__exit(&al); 777 return 0; 778 } 779 780 static int perf_inject__sched_process_exit(struct perf_tool *tool, 781 union perf_event *event __maybe_unused, 782 struct perf_sample *sample, 783 struct evsel *evsel __maybe_unused, 784 struct machine *machine __maybe_unused) 785 { 786 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 787 struct event_entry *ent; 788 789 list_for_each_entry(ent, &inject->samples, node) { 790 if (sample->tid == ent->tid) { 791 list_del_init(&ent->node); 792 free(ent); 793 break; 794 } 795 } 796 797 return 0; 798 } 799 800 static int perf_inject__sched_switch(struct perf_tool *tool, 801 union perf_event *event, 802 struct perf_sample *sample, 803 struct evsel *evsel, 804 struct machine *machine) 805 { 806 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 807 struct event_entry *ent; 808 809 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 810 811 ent = malloc(event->header.size + sizeof(struct event_entry)); 812 if (ent == NULL) { 813 color_fprintf(stderr, PERF_COLOR_RED, 814 "Not enough memory to process sched switch event!"); 815 return -1; 816 } 817 818 ent->tid = sample->tid; 819 memcpy(&ent->event, event, event->header.size); 820 list_add(&ent->node, &inject->samples); 821 return 0; 822 } 823 824 #ifdef HAVE_LIBTRACEEVENT 825 static int perf_inject__sched_stat(struct perf_tool *tool, 826 union perf_event *event __maybe_unused, 827 struct perf_sample *sample, 828 struct evsel *evsel, 829 struct machine *machine) 830 { 831 struct event_entry *ent; 832 union perf_event *event_sw; 833 struct perf_sample sample_sw; 834 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 835 u32 pid = evsel__intval(evsel, sample, "pid"); 836 837 list_for_each_entry(ent, &inject->samples, node) { 838 if (pid == ent->tid) 839 goto found; 840 } 841 842 return 0; 843 found: 844 event_sw = &ent->event[0]; 845 evsel__parse_sample(evsel, event_sw, &sample_sw); 846 847 sample_sw.period = sample->period; 848 sample_sw.time = sample->time; 849 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 850 evsel->core.attr.read_format, &sample_sw); 851 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 852 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 853 } 854 #endif 855 856 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 857 { 858 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 859 return NULL; 860 return &gs->vcpu[vcpu]; 861 } 862 863 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 864 { 865 ssize_t ret = writen(gs->tmp_fd, buf, sz); 866 867 return ret < 0 ? ret : 0; 868 } 869 870 static int guest_session__repipe(struct perf_tool *tool, 871 union perf_event *event, 872 struct perf_sample *sample __maybe_unused, 873 struct machine *machine __maybe_unused) 874 { 875 struct guest_session *gs = container_of(tool, struct guest_session, tool); 876 877 return guest_session__output_bytes(gs, event, event->header.size); 878 } 879 880 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 881 { 882 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 883 int hash; 884 885 if (!guest_tid) 886 return -ENOMEM; 887 888 guest_tid->tid = tid; 889 guest_tid->vcpu = vcpu; 890 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 891 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 892 893 return 0; 894 } 895 896 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 897 union perf_event *event, 898 u64 offset __maybe_unused, void *data) 899 { 900 struct guest_session *gs = data; 901 unsigned int vcpu; 902 struct guest_vcpu *guest_vcpu; 903 int ret; 904 905 if (event->header.type != PERF_RECORD_COMM || 906 event->comm.pid != gs->machine_pid) 907 return 0; 908 909 /* 910 * QEMU option -name debug-threads=on, causes thread names formatted as 911 * below, although it is not an ABI. Also libvirt seems to use this by 912 * default. Here we rely on it to tell us which thread is which VCPU. 913 */ 914 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 915 if (ret <= 0) 916 return ret; 917 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 918 event->comm.tid, event->comm.comm, vcpu); 919 if (vcpu > INT_MAX) { 920 pr_err("Invalid VCPU %u\n", vcpu); 921 return -EINVAL; 922 } 923 guest_vcpu = guest_session__vcpu(gs, vcpu); 924 if (!guest_vcpu) 925 return -ENOMEM; 926 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 927 pr_err("Fatal error: Two threads found with the same VCPU\n"); 928 return -EINVAL; 929 } 930 guest_vcpu->tid = event->comm.tid; 931 932 return guest_session__map_tid(gs, event->comm.tid, vcpu); 933 } 934 935 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 936 { 937 return perf_session__peek_events(session, session->header.data_offset, 938 session->header.data_size, 939 host_peek_vm_comms_cb, gs); 940 } 941 942 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 943 { 944 return evlist__id2sid(evlist, id); 945 } 946 947 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 948 { 949 do { 950 gs->highest_id += 1; 951 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 952 953 return gs->highest_id; 954 } 955 956 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 957 { 958 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 959 int hash; 960 961 if (!guest_id) 962 return -ENOMEM; 963 964 guest_id->id = id; 965 guest_id->host_id = host_id; 966 guest_id->vcpu = vcpu; 967 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 968 hlist_add_head(&guest_id->node, &gs->heads[hash]); 969 970 return 0; 971 } 972 973 static u64 evlist__find_highest_id(struct evlist *evlist) 974 { 975 struct evsel *evsel; 976 u64 highest_id = 1; 977 978 evlist__for_each_entry(evlist, evsel) { 979 u32 j; 980 981 for (j = 0; j < evsel->core.ids; j++) { 982 u64 id = evsel->core.id[j]; 983 984 if (id > highest_id) 985 highest_id = id; 986 } 987 } 988 989 return highest_id; 990 } 991 992 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 993 { 994 struct evlist *evlist = gs->session->evlist; 995 struct evsel *evsel; 996 int ret; 997 998 evlist__for_each_entry(evlist, evsel) { 999 u32 j; 1000 1001 for (j = 0; j < evsel->core.ids; j++) { 1002 struct perf_sample_id *sid; 1003 u64 host_id; 1004 u64 id; 1005 1006 id = evsel->core.id[j]; 1007 sid = evlist__id2sid(evlist, id); 1008 if (!sid || sid->cpu.cpu == -1) 1009 continue; 1010 host_id = guest_session__allocate_new_id(gs, host_evlist); 1011 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1012 if (ret) 1013 return ret; 1014 } 1015 } 1016 1017 return 0; 1018 } 1019 1020 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1021 { 1022 struct hlist_head *head; 1023 struct guest_id *guest_id; 1024 int hash; 1025 1026 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1027 head = &gs->heads[hash]; 1028 1029 hlist_for_each_entry(guest_id, head, node) 1030 if (guest_id->id == id) 1031 return guest_id; 1032 1033 return NULL; 1034 } 1035 1036 static int process_attr(struct perf_tool *tool, union perf_event *event, 1037 struct perf_sample *sample __maybe_unused, 1038 struct machine *machine __maybe_unused) 1039 { 1040 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1041 1042 return perf_event__process_attr(tool, event, &inject->session->evlist); 1043 } 1044 1045 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1046 { 1047 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1048 struct perf_event_attr attr = evsel->core.attr; 1049 u64 *id_array; 1050 u32 *vcpu_array; 1051 int ret = -ENOMEM; 1052 u32 i; 1053 1054 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1055 if (!id_array) 1056 return -ENOMEM; 1057 1058 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1059 if (!vcpu_array) 1060 goto out; 1061 1062 for (i = 0; i < evsel->core.ids; i++) { 1063 u64 id = evsel->core.id[i]; 1064 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1065 1066 if (!guest_id) { 1067 pr_err("Failed to find guest id %"PRIu64"\n", id); 1068 ret = -EINVAL; 1069 goto out; 1070 } 1071 id_array[i] = guest_id->host_id; 1072 vcpu_array[i] = guest_id->vcpu; 1073 } 1074 1075 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1076 attr.exclude_host = 1; 1077 attr.exclude_guest = 0; 1078 1079 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1080 id_array, process_attr); 1081 if (ret) 1082 pr_err("Failed to add guest attr.\n"); 1083 1084 for (i = 0; i < evsel->core.ids; i++) { 1085 struct perf_sample_id *sid; 1086 u32 vcpu = vcpu_array[i]; 1087 1088 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1089 /* Guest event is per-thread from the host point of view */ 1090 sid->cpu.cpu = -1; 1091 sid->tid = gs->vcpu[vcpu].tid; 1092 sid->machine_pid = gs->machine_pid; 1093 sid->vcpu.cpu = vcpu; 1094 } 1095 out: 1096 free(vcpu_array); 1097 free(id_array); 1098 return ret; 1099 } 1100 1101 static int guest_session__add_attrs(struct guest_session *gs) 1102 { 1103 struct evlist *evlist = gs->session->evlist; 1104 struct evsel *evsel; 1105 int ret; 1106 1107 evlist__for_each_entry(evlist, evsel) { 1108 ret = guest_session__add_attr(gs, evsel); 1109 if (ret) 1110 return ret; 1111 } 1112 1113 return 0; 1114 } 1115 1116 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1117 { 1118 struct perf_session *session = inject->session; 1119 struct evlist *evlist = session->evlist; 1120 struct machine *machine = &session->machines.host; 1121 size_t from = evlist->core.nr_entries - new_cnt; 1122 1123 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1124 evlist, machine, from); 1125 } 1126 1127 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1128 { 1129 struct hlist_head *head; 1130 struct guest_tid *guest_tid; 1131 int hash; 1132 1133 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1134 head = &gs->tids[hash]; 1135 1136 hlist_for_each_entry(guest_tid, head, node) 1137 if (guest_tid->tid == tid) 1138 return guest_tid; 1139 1140 return NULL; 1141 } 1142 1143 static bool dso__is_in_kernel_space(struct dso *dso) 1144 { 1145 if (dso__is_vdso(dso)) 1146 return false; 1147 1148 return dso__is_kcore(dso) || 1149 dso->kernel || 1150 is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1151 } 1152 1153 static u64 evlist__first_id(struct evlist *evlist) 1154 { 1155 struct evsel *evsel; 1156 1157 evlist__for_each_entry(evlist, evsel) { 1158 if (evsel->core.ids) 1159 return evsel->core.id[0]; 1160 } 1161 return 0; 1162 } 1163 1164 static int process_build_id(struct perf_tool *tool, 1165 union perf_event *event, 1166 struct perf_sample *sample __maybe_unused, 1167 struct machine *machine __maybe_unused) 1168 { 1169 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1170 1171 return perf_event__process_build_id(inject->session, event); 1172 } 1173 1174 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1175 { 1176 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1177 u8 cpumode = dso__is_in_kernel_space(dso) ? 1178 PERF_RECORD_MISC_GUEST_KERNEL : 1179 PERF_RECORD_MISC_GUEST_USER; 1180 1181 if (!machine) 1182 return -ENOMEM; 1183 1184 dso->hit = 1; 1185 1186 return perf_event__synthesize_build_id(&inject->tool, dso, cpumode, 1187 process_build_id, machine); 1188 } 1189 1190 static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1191 { 1192 struct guest_session *gs = data; 1193 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1194 1195 if (!dso->has_build_id) 1196 return 0; 1197 1198 return synthesize_build_id(inject, dso, gs->machine_pid); 1199 1200 } 1201 1202 static int guest_session__add_build_ids(struct guest_session *gs) 1203 { 1204 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1205 1206 /* Build IDs will be put in the Build ID feature section */ 1207 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1208 1209 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1210 guest_session__add_build_ids_cb, 1211 gs); 1212 } 1213 1214 static int guest_session__ksymbol_event(struct perf_tool *tool, 1215 union perf_event *event, 1216 struct perf_sample *sample __maybe_unused, 1217 struct machine *machine __maybe_unused) 1218 { 1219 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1220 1221 /* Only support out-of-line i.e. no BPF support */ 1222 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1223 return 0; 1224 1225 return guest_session__output_bytes(gs, event, event->header.size); 1226 } 1227 1228 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1229 { 1230 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1231 struct perf_session *session; 1232 int ret; 1233 1234 /* Only these events will be injected */ 1235 gs->tool.mmap = guest_session__repipe; 1236 gs->tool.mmap2 = guest_session__repipe; 1237 gs->tool.comm = guest_session__repipe; 1238 gs->tool.fork = guest_session__repipe; 1239 gs->tool.exit = guest_session__repipe; 1240 gs->tool.lost = guest_session__repipe; 1241 gs->tool.context_switch = guest_session__repipe; 1242 gs->tool.ksymbol = guest_session__ksymbol_event; 1243 gs->tool.text_poke = guest_session__repipe; 1244 /* 1245 * Processing a build ID creates a struct dso with that build ID. Later, 1246 * all guest dsos are iterated and the build IDs processed into the host 1247 * session where they will be output to the Build ID feature section 1248 * when the perf.data file header is written. 1249 */ 1250 gs->tool.build_id = perf_event__process_build_id; 1251 /* Process the id index to know what VCPU an ID belongs to */ 1252 gs->tool.id_index = perf_event__process_id_index; 1253 1254 gs->tool.ordered_events = true; 1255 gs->tool.ordering_requires_timestamps = true; 1256 1257 gs->data.path = name; 1258 gs->data.force = force; 1259 gs->data.mode = PERF_DATA_MODE_READ; 1260 1261 session = perf_session__new(&gs->data, &gs->tool); 1262 if (IS_ERR(session)) 1263 return PTR_ERR(session); 1264 gs->session = session; 1265 1266 /* 1267 * Initial events have zero'd ID samples. Get default ID sample size 1268 * used for removing them. 1269 */ 1270 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1271 /* And default ID for adding back a host-compatible ID sample */ 1272 gs->dflt_id = evlist__first_id(session->evlist); 1273 if (!gs->dflt_id) { 1274 pr_err("Guest data has no sample IDs"); 1275 return -EINVAL; 1276 } 1277 1278 /* Temporary file for guest events */ 1279 gs->tmp_file_name = strdup(tmp_file_name); 1280 if (!gs->tmp_file_name) 1281 return -ENOMEM; 1282 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1283 if (gs->tmp_fd < 0) 1284 return -errno; 1285 1286 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1287 pr_warning("Guest session decompression initialization failed.\n"); 1288 1289 /* 1290 * perf does not support processing 2 sessions simultaneously, so output 1291 * guest events to a temporary file. 1292 */ 1293 ret = perf_session__process_events(gs->session); 1294 if (ret) 1295 return ret; 1296 1297 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1298 return -errno; 1299 1300 return 0; 1301 } 1302 1303 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1304 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1305 { 1306 struct hlist_node *pos, *n; 1307 size_t i; 1308 1309 for (i = 0; i < hlist_sz; ++i) { 1310 hlist_for_each_safe(pos, n, &heads[i]) { 1311 hlist_del(pos); 1312 free(pos); 1313 } 1314 } 1315 } 1316 1317 static void guest_session__exit(struct guest_session *gs) 1318 { 1319 if (gs->session) { 1320 perf_session__delete(gs->session); 1321 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1322 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1323 } 1324 if (gs->tmp_file_name) { 1325 if (gs->tmp_fd >= 0) 1326 close(gs->tmp_fd); 1327 unlink(gs->tmp_file_name); 1328 zfree(&gs->tmp_file_name); 1329 } 1330 zfree(&gs->vcpu); 1331 zfree(&gs->perf_data_file); 1332 } 1333 1334 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1335 { 1336 tc->time_shift = time_conv->time_shift; 1337 tc->time_mult = time_conv->time_mult; 1338 tc->time_zero = time_conv->time_zero; 1339 tc->time_cycles = time_conv->time_cycles; 1340 tc->time_mask = time_conv->time_mask; 1341 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1342 tc->cap_user_time_short = time_conv->cap_user_time_short; 1343 } 1344 1345 static void guest_session__get_tc(struct guest_session *gs) 1346 { 1347 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1348 1349 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1350 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1351 } 1352 1353 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1354 { 1355 u64 tsc; 1356 1357 if (!guest_time) { 1358 *host_time = 0; 1359 return; 1360 } 1361 1362 if (gs->guest_tc.cap_user_time_zero) 1363 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1364 else 1365 tsc = guest_time; 1366 1367 /* 1368 * This is the correct order of operations for x86 if the TSC Offset and 1369 * Multiplier values are used. 1370 */ 1371 tsc -= gs->time_offset; 1372 tsc /= gs->time_scale; 1373 1374 if (gs->host_tc.cap_user_time_zero) 1375 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1376 else 1377 *host_time = tsc; 1378 } 1379 1380 static int guest_session__fetch(struct guest_session *gs) 1381 { 1382 void *buf; 1383 struct perf_event_header *hdr; 1384 size_t hdr_sz = sizeof(*hdr); 1385 ssize_t ret; 1386 1387 buf = gs->ev.event_buf; 1388 if (!buf) { 1389 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1390 if (!buf) 1391 return -ENOMEM; 1392 gs->ev.event_buf = buf; 1393 } 1394 hdr = buf; 1395 ret = readn(gs->tmp_fd, buf, hdr_sz); 1396 if (ret < 0) 1397 return ret; 1398 1399 if (!ret) { 1400 /* Zero size means EOF */ 1401 hdr->size = 0; 1402 return 0; 1403 } 1404 1405 buf += hdr_sz; 1406 1407 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1408 if (ret < 0) 1409 return ret; 1410 1411 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1412 gs->ev.sample.time = 0; 1413 1414 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1415 pr_err("Unexpected type fetching guest event"); 1416 return 0; 1417 } 1418 1419 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1420 if (ret) { 1421 pr_err("Parse failed fetching guest event"); 1422 return ret; 1423 } 1424 1425 if (!gs->have_tc) { 1426 guest_session__get_tc(gs); 1427 gs->have_tc = true; 1428 } 1429 1430 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1431 1432 return 0; 1433 } 1434 1435 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1436 const struct perf_sample *sample) 1437 { 1438 struct evsel *evsel; 1439 void *array; 1440 int ret; 1441 1442 evsel = evlist__id2evsel(evlist, sample->id); 1443 array = ev; 1444 1445 if (!evsel) { 1446 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1447 return -EINVAL; 1448 } 1449 1450 array += ev->header.size; 1451 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1452 if (ret < 0) 1453 return ret; 1454 1455 if (ret & 7) { 1456 pr_err("Bad id sample size %d\n", ret); 1457 return -EINVAL; 1458 } 1459 1460 ev->header.size += ret; 1461 1462 return 0; 1463 } 1464 1465 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1466 { 1467 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1468 int ret; 1469 1470 if (!gs->ready) 1471 return 0; 1472 1473 while (1) { 1474 struct perf_sample *sample; 1475 struct guest_id *guest_id; 1476 union perf_event *ev; 1477 u16 id_hdr_size; 1478 u8 cpumode; 1479 u64 id; 1480 1481 if (!gs->fetched) { 1482 ret = guest_session__fetch(gs); 1483 if (ret) 1484 return ret; 1485 gs->fetched = true; 1486 } 1487 1488 ev = gs->ev.event; 1489 sample = &gs->ev.sample; 1490 1491 if (!ev->header.size) 1492 return 0; /* EOF */ 1493 1494 if (sample->time > timestamp) 1495 return 0; 1496 1497 /* Change cpumode to guest */ 1498 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1499 if (cpumode & PERF_RECORD_MISC_USER) 1500 cpumode = PERF_RECORD_MISC_GUEST_USER; 1501 else 1502 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1503 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1504 ev->header.misc |= cpumode; 1505 1506 id = sample->id; 1507 if (!id) { 1508 id = gs->dflt_id; 1509 id_hdr_size = gs->dflt_id_hdr_size; 1510 } else { 1511 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1512 1513 id_hdr_size = evsel__id_hdr_size(evsel); 1514 } 1515 1516 if (id_hdr_size & 7) { 1517 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1518 return -EINVAL; 1519 } 1520 1521 if (ev->header.size & 7) { 1522 pr_err("Bad event size %u\n", ev->header.size); 1523 return -EINVAL; 1524 } 1525 1526 /* Remove guest id sample */ 1527 ev->header.size -= id_hdr_size; 1528 1529 if (ev->header.size & 7) { 1530 pr_err("Bad raw event size %u\n", ev->header.size); 1531 return -EINVAL; 1532 } 1533 1534 guest_id = guest_session__lookup_id(gs, id); 1535 if (!guest_id) { 1536 pr_err("Guest event with unknown id %llu\n", 1537 (unsigned long long)id); 1538 return -EINVAL; 1539 } 1540 1541 /* Change to host ID to avoid conflicting ID values */ 1542 sample->id = guest_id->host_id; 1543 sample->stream_id = guest_id->host_id; 1544 1545 if (sample->cpu != (u32)-1) { 1546 if (sample->cpu >= gs->vcpu_cnt) { 1547 pr_err("Guest event with unknown VCPU %u\n", 1548 sample->cpu); 1549 return -EINVAL; 1550 } 1551 /* Change to host CPU instead of guest VCPU */ 1552 sample->cpu = gs->vcpu[sample->cpu].cpu; 1553 } 1554 1555 /* New id sample with new ID and CPU */ 1556 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1557 if (ret) 1558 return ret; 1559 1560 if (ev->header.size & 7) { 1561 pr_err("Bad new event size %u\n", ev->header.size); 1562 return -EINVAL; 1563 } 1564 1565 gs->fetched = false; 1566 1567 ret = output_bytes(inject, ev, ev->header.size); 1568 if (ret) 1569 return ret; 1570 } 1571 } 1572 1573 static int guest_session__flush_events(struct guest_session *gs) 1574 { 1575 return guest_session__inject_events(gs, -1); 1576 } 1577 1578 static int host__repipe(struct perf_tool *tool, 1579 union perf_event *event, 1580 struct perf_sample *sample, 1581 struct machine *machine) 1582 { 1583 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1584 int ret; 1585 1586 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1587 if (ret) 1588 return ret; 1589 1590 return perf_event__repipe(tool, event, sample, machine); 1591 } 1592 1593 static int host__finished_init(struct perf_session *session, union perf_event *event) 1594 { 1595 struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool); 1596 struct guest_session *gs = &inject->guest_session; 1597 int ret; 1598 1599 /* 1600 * Peek through host COMM events to find QEMU threads and the VCPU they 1601 * are running. 1602 */ 1603 ret = host_peek_vm_comms(session, gs); 1604 if (ret) 1605 return ret; 1606 1607 if (!gs->vcpu_cnt) { 1608 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1609 return -EINVAL; 1610 } 1611 1612 /* 1613 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1614 */ 1615 gs->highest_id = evlist__find_highest_id(session->evlist); 1616 ret = guest_session__map_ids(gs, session->evlist); 1617 if (ret) 1618 return ret; 1619 1620 ret = guest_session__add_attrs(gs); 1621 if (ret) 1622 return ret; 1623 1624 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1625 if (ret) { 1626 pr_err("Failed to synthesize id_index\n"); 1627 return ret; 1628 } 1629 1630 ret = guest_session__add_build_ids(gs); 1631 if (ret) { 1632 pr_err("Failed to add guest build IDs\n"); 1633 return ret; 1634 } 1635 1636 gs->ready = true; 1637 1638 ret = guest_session__inject_events(gs, 0); 1639 if (ret) 1640 return ret; 1641 1642 return perf_event__repipe_op2_synth(session, event); 1643 } 1644 1645 /* 1646 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1647 * which flushes host events to file up until the last flush time. Then inject 1648 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1649 * itself. 1650 */ 1651 static int host__finished_round(struct perf_tool *tool, 1652 union perf_event *event, 1653 struct ordered_events *oe) 1654 { 1655 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1656 int ret = perf_event__process_finished_round(tool, event, oe); 1657 u64 timestamp = ordered_events__last_flush_time(oe); 1658 1659 if (ret) 1660 return ret; 1661 1662 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1663 if (ret) 1664 return ret; 1665 1666 return perf_event__repipe_oe_synth(tool, event, oe); 1667 } 1668 1669 static int host__context_switch(struct perf_tool *tool, 1670 union perf_event *event, 1671 struct perf_sample *sample, 1672 struct machine *machine) 1673 { 1674 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1675 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1676 struct guest_session *gs = &inject->guest_session; 1677 u32 pid = event->context_switch.next_prev_pid; 1678 u32 tid = event->context_switch.next_prev_tid; 1679 struct guest_tid *guest_tid; 1680 u32 vcpu; 1681 1682 if (out || pid != gs->machine_pid) 1683 goto out; 1684 1685 guest_tid = guest_session__lookup_tid(gs, tid); 1686 if (!guest_tid) 1687 goto out; 1688 1689 if (sample->cpu == (u32)-1) { 1690 pr_err("Switch event does not have CPU\n"); 1691 return -EINVAL; 1692 } 1693 1694 vcpu = guest_tid->vcpu; 1695 if (vcpu >= gs->vcpu_cnt) 1696 return -EINVAL; 1697 1698 /* Guest is switching in, record which CPU the VCPU is now running on */ 1699 gs->vcpu[vcpu].cpu = sample->cpu; 1700 out: 1701 return host__repipe(tool, event, sample, machine); 1702 } 1703 1704 static void sig_handler(int sig __maybe_unused) 1705 { 1706 session_done = 1; 1707 } 1708 1709 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1710 { 1711 struct perf_event_attr *attr = &evsel->core.attr; 1712 const char *name = evsel__name(evsel); 1713 1714 if (!(attr->sample_type & sample_type)) { 1715 pr_err("Samples for %s event do not have %s attribute set.", 1716 name, sample_msg); 1717 return -EINVAL; 1718 } 1719 1720 return 0; 1721 } 1722 1723 static int drop_sample(struct perf_tool *tool __maybe_unused, 1724 union perf_event *event __maybe_unused, 1725 struct perf_sample *sample __maybe_unused, 1726 struct evsel *evsel __maybe_unused, 1727 struct machine *machine __maybe_unused) 1728 { 1729 return 0; 1730 } 1731 1732 static void strip_init(struct perf_inject *inject) 1733 { 1734 struct evlist *evlist = inject->session->evlist; 1735 struct evsel *evsel; 1736 1737 inject->tool.context_switch = perf_event__drop; 1738 1739 evlist__for_each_entry(evlist, evsel) 1740 evsel->handler = drop_sample; 1741 } 1742 1743 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1744 { 1745 struct perf_inject *inject = opt->value; 1746 const char *args; 1747 char *dry_run; 1748 1749 if (unset) 1750 return 0; 1751 1752 inject->itrace_synth_opts.set = true; 1753 inject->itrace_synth_opts.vm_time_correlation = true; 1754 inject->in_place_update = true; 1755 1756 if (!str) 1757 return 0; 1758 1759 dry_run = skip_spaces(str); 1760 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1761 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1762 inject->in_place_update_dry_run = true; 1763 args = dry_run + strlen("dry-run"); 1764 } else { 1765 args = str; 1766 } 1767 1768 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1769 1770 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1771 } 1772 1773 static int parse_guest_data(const struct option *opt, const char *str, int unset) 1774 { 1775 struct perf_inject *inject = opt->value; 1776 struct guest_session *gs = &inject->guest_session; 1777 char *tok; 1778 char *s; 1779 1780 if (unset) 1781 return 0; 1782 1783 if (!str) 1784 goto bad_args; 1785 1786 s = strdup(str); 1787 if (!s) 1788 return -ENOMEM; 1789 1790 gs->perf_data_file = strsep(&s, ","); 1791 if (!gs->perf_data_file) 1792 goto bad_args; 1793 1794 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1795 if (gs->copy_kcore_dir) 1796 inject->output.is_dir = true; 1797 1798 tok = strsep(&s, ","); 1799 if (!tok) 1800 goto bad_args; 1801 gs->machine_pid = strtoul(tok, NULL, 0); 1802 if (!inject->guest_session.machine_pid) 1803 goto bad_args; 1804 1805 gs->time_scale = 1; 1806 1807 tok = strsep(&s, ","); 1808 if (!tok) 1809 goto out; 1810 gs->time_offset = strtoull(tok, NULL, 0); 1811 1812 tok = strsep(&s, ","); 1813 if (!tok) 1814 goto out; 1815 gs->time_scale = strtod(tok, NULL); 1816 if (!gs->time_scale) 1817 goto bad_args; 1818 out: 1819 return 0; 1820 1821 bad_args: 1822 pr_err("--guest-data option requires guest perf.data file name, " 1823 "guest machine PID, and optionally guest timestamp offset, " 1824 "and guest timestamp scale factor, separated by commas.\n"); 1825 return -1; 1826 } 1827 1828 static int save_section_info_cb(struct perf_file_section *section, 1829 struct perf_header *ph __maybe_unused, 1830 int feat, int fd __maybe_unused, void *data) 1831 { 1832 struct perf_inject *inject = data; 1833 1834 inject->secs[feat] = *section; 1835 return 0; 1836 } 1837 1838 static int save_section_info(struct perf_inject *inject) 1839 { 1840 struct perf_header *header = &inject->session->header; 1841 int fd = perf_data__fd(inject->session->data); 1842 1843 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 1844 } 1845 1846 static bool keep_feat(int feat) 1847 { 1848 switch (feat) { 1849 /* Keep original information that describes the machine or software */ 1850 case HEADER_TRACING_DATA: 1851 case HEADER_HOSTNAME: 1852 case HEADER_OSRELEASE: 1853 case HEADER_VERSION: 1854 case HEADER_ARCH: 1855 case HEADER_NRCPUS: 1856 case HEADER_CPUDESC: 1857 case HEADER_CPUID: 1858 case HEADER_TOTAL_MEM: 1859 case HEADER_CPU_TOPOLOGY: 1860 case HEADER_NUMA_TOPOLOGY: 1861 case HEADER_PMU_MAPPINGS: 1862 case HEADER_CACHE: 1863 case HEADER_MEM_TOPOLOGY: 1864 case HEADER_CLOCKID: 1865 case HEADER_BPF_PROG_INFO: 1866 case HEADER_BPF_BTF: 1867 case HEADER_CPU_PMU_CAPS: 1868 case HEADER_CLOCK_DATA: 1869 case HEADER_HYBRID_TOPOLOGY: 1870 case HEADER_PMU_CAPS: 1871 return true; 1872 /* Information that can be updated */ 1873 case HEADER_BUILD_ID: 1874 case HEADER_CMDLINE: 1875 case HEADER_EVENT_DESC: 1876 case HEADER_BRANCH_STACK: 1877 case HEADER_GROUP_DESC: 1878 case HEADER_AUXTRACE: 1879 case HEADER_STAT: 1880 case HEADER_SAMPLE_TIME: 1881 case HEADER_DIR_FORMAT: 1882 case HEADER_COMPRESSED: 1883 default: 1884 return false; 1885 }; 1886 } 1887 1888 static int read_file(int fd, u64 offs, void *buf, size_t sz) 1889 { 1890 ssize_t ret = preadn(fd, buf, sz, offs); 1891 1892 if (ret < 0) 1893 return -errno; 1894 if ((size_t)ret != sz) 1895 return -EINVAL; 1896 return 0; 1897 } 1898 1899 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 1900 { 1901 int fd = perf_data__fd(inject->session->data); 1902 u64 offs = inject->secs[feat].offset; 1903 size_t sz = inject->secs[feat].size; 1904 void *buf = malloc(sz); 1905 int ret; 1906 1907 if (!buf) 1908 return -ENOMEM; 1909 1910 ret = read_file(fd, offs, buf, sz); 1911 if (ret) 1912 goto out_free; 1913 1914 ret = fw->write(fw, buf, sz); 1915 out_free: 1916 free(buf); 1917 return ret; 1918 } 1919 1920 struct inject_fc { 1921 struct feat_copier fc; 1922 struct perf_inject *inject; 1923 }; 1924 1925 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 1926 { 1927 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 1928 struct perf_inject *inject = inj_fc->inject; 1929 int ret; 1930 1931 if (!inject->secs[feat].offset || 1932 !keep_feat(feat)) 1933 return 0; 1934 1935 ret = feat_copy(inject, feat, fw); 1936 if (ret < 0) 1937 return ret; 1938 1939 return 1; /* Feature section copied */ 1940 } 1941 1942 static int copy_kcore_dir(struct perf_inject *inject) 1943 { 1944 char *cmd; 1945 int ret; 1946 1947 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 1948 inject->input_name, inject->output.path); 1949 if (ret < 0) 1950 return ret; 1951 pr_debug("%s\n", cmd); 1952 ret = system(cmd); 1953 free(cmd); 1954 return ret; 1955 } 1956 1957 static int guest_session__copy_kcore_dir(struct guest_session *gs) 1958 { 1959 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1960 char *cmd; 1961 int ret; 1962 1963 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 1964 gs->perf_data_file, inject->output.path, gs->machine_pid); 1965 if (ret < 0) 1966 return ret; 1967 pr_debug("%s\n", cmd); 1968 ret = system(cmd); 1969 free(cmd); 1970 return ret; 1971 } 1972 1973 static int output_fd(struct perf_inject *inject) 1974 { 1975 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 1976 } 1977 1978 static int __cmd_inject(struct perf_inject *inject) 1979 { 1980 int ret = -EINVAL; 1981 struct guest_session *gs = &inject->guest_session; 1982 struct perf_session *session = inject->session; 1983 int fd = output_fd(inject); 1984 u64 output_data_offset; 1985 1986 signal(SIGINT, sig_handler); 1987 1988 if (inject->build_ids || inject->sched_stat || 1989 inject->itrace_synth_opts.set || inject->build_id_all) { 1990 inject->tool.mmap = perf_event__repipe_mmap; 1991 inject->tool.mmap2 = perf_event__repipe_mmap2; 1992 inject->tool.fork = perf_event__repipe_fork; 1993 #ifdef HAVE_LIBTRACEEVENT 1994 inject->tool.tracing_data = perf_event__repipe_tracing_data; 1995 #endif 1996 } 1997 1998 output_data_offset = perf_session__data_offset(session->evlist); 1999 2000 if (inject->build_id_all) { 2001 inject->tool.mmap = perf_event__repipe_buildid_mmap; 2002 inject->tool.mmap2 = perf_event__repipe_buildid_mmap2; 2003 } else if (inject->build_ids) { 2004 inject->tool.sample = perf_event__inject_buildid; 2005 } else if (inject->sched_stat) { 2006 struct evsel *evsel; 2007 2008 evlist__for_each_entry(session->evlist, evsel) { 2009 const char *name = evsel__name(evsel); 2010 2011 if (!strcmp(name, "sched:sched_switch")) { 2012 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 2013 return -EINVAL; 2014 2015 evsel->handler = perf_inject__sched_switch; 2016 } else if (!strcmp(name, "sched:sched_process_exit")) 2017 evsel->handler = perf_inject__sched_process_exit; 2018 #ifdef HAVE_LIBTRACEEVENT 2019 else if (!strncmp(name, "sched:sched_stat_", 17)) 2020 evsel->handler = perf_inject__sched_stat; 2021 #endif 2022 } 2023 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2024 session->itrace_synth_opts = &inject->itrace_synth_opts; 2025 memset(&inject->tool, 0, sizeof(inject->tool)); 2026 inject->tool.id_index = perf_event__process_id_index; 2027 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2028 inject->tool.auxtrace = perf_event__process_auxtrace; 2029 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2030 inject->tool.ordered_events = true; 2031 inject->tool.ordering_requires_timestamps = true; 2032 } else if (inject->itrace_synth_opts.set) { 2033 session->itrace_synth_opts = &inject->itrace_synth_opts; 2034 inject->itrace_synth_opts.inject = true; 2035 inject->tool.comm = perf_event__repipe_comm; 2036 inject->tool.namespaces = perf_event__repipe_namespaces; 2037 inject->tool.exit = perf_event__repipe_exit; 2038 inject->tool.id_index = perf_event__process_id_index; 2039 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2040 inject->tool.auxtrace = perf_event__process_auxtrace; 2041 inject->tool.aux = perf_event__drop_aux; 2042 inject->tool.itrace_start = perf_event__drop_aux; 2043 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2044 inject->tool.ordered_events = true; 2045 inject->tool.ordering_requires_timestamps = true; 2046 /* Allow space in the header for new attributes */ 2047 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2048 if (inject->strip) 2049 strip_init(inject); 2050 } else if (gs->perf_data_file) { 2051 char *name = gs->perf_data_file; 2052 2053 /* 2054 * Not strictly necessary, but keep these events in order wrt 2055 * guest events. 2056 */ 2057 inject->tool.mmap = host__repipe; 2058 inject->tool.mmap2 = host__repipe; 2059 inject->tool.comm = host__repipe; 2060 inject->tool.fork = host__repipe; 2061 inject->tool.exit = host__repipe; 2062 inject->tool.lost = host__repipe; 2063 inject->tool.context_switch = host__repipe; 2064 inject->tool.ksymbol = host__repipe; 2065 inject->tool.text_poke = host__repipe; 2066 /* 2067 * Once the host session has initialized, set up sample ID 2068 * mapping and feed in guest attrs, build IDs and initial 2069 * events. 2070 */ 2071 inject->tool.finished_init = host__finished_init; 2072 /* Obey finished round ordering */ 2073 inject->tool.finished_round = host__finished_round, 2074 /* Keep track of which CPU a VCPU is runnng on */ 2075 inject->tool.context_switch = host__context_switch; 2076 /* 2077 * Must order events to be able to obey finished round 2078 * ordering. 2079 */ 2080 inject->tool.ordered_events = true; 2081 inject->tool.ordering_requires_timestamps = true; 2082 /* Set up a separate session to process guest perf.data file */ 2083 ret = guest_session__start(gs, name, session->data->force); 2084 if (ret) { 2085 pr_err("Failed to process %s, error %d\n", name, ret); 2086 return ret; 2087 } 2088 /* Allow space in the header for guest attributes */ 2089 output_data_offset += gs->session->header.data_offset; 2090 output_data_offset = roundup(output_data_offset, 4096); 2091 } 2092 2093 if (!inject->itrace_synth_opts.set) 2094 auxtrace_index__free(&session->auxtrace_index); 2095 2096 if (!inject->is_pipe && !inject->in_place_update) 2097 lseek(fd, output_data_offset, SEEK_SET); 2098 2099 ret = perf_session__process_events(session); 2100 if (ret) 2101 return ret; 2102 2103 if (gs->session) { 2104 /* 2105 * Remaining guest events have later timestamps. Flush them 2106 * out to file. 2107 */ 2108 ret = guest_session__flush_events(gs); 2109 if (ret) { 2110 pr_err("Failed to flush guest events\n"); 2111 return ret; 2112 } 2113 } 2114 2115 if (!inject->is_pipe && !inject->in_place_update) { 2116 struct inject_fc inj_fc = { 2117 .fc.copy = feat_copy_cb, 2118 .inject = inject, 2119 }; 2120 2121 if (inject->build_ids) 2122 perf_header__set_feat(&session->header, 2123 HEADER_BUILD_ID); 2124 /* 2125 * Keep all buildids when there is unprocessed AUX data because 2126 * it is not known which ones the AUX trace hits. 2127 */ 2128 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2129 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2130 perf_session__dsos_hit_all(session); 2131 /* 2132 * The AUX areas have been removed and replaced with 2133 * synthesized hardware events, so clear the feature flag. 2134 */ 2135 if (inject->itrace_synth_opts.set) { 2136 perf_header__clear_feat(&session->header, 2137 HEADER_AUXTRACE); 2138 if (inject->itrace_synth_opts.last_branch || 2139 inject->itrace_synth_opts.add_last_branch) 2140 perf_header__set_feat(&session->header, 2141 HEADER_BRANCH_STACK); 2142 } 2143 session->header.data_offset = output_data_offset; 2144 session->header.data_size = inject->bytes_written; 2145 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc); 2146 2147 if (inject->copy_kcore_dir) { 2148 ret = copy_kcore_dir(inject); 2149 if (ret) { 2150 pr_err("Failed to copy kcore\n"); 2151 return ret; 2152 } 2153 } 2154 if (gs->copy_kcore_dir) { 2155 ret = guest_session__copy_kcore_dir(gs); 2156 if (ret) { 2157 pr_err("Failed to copy guest kcore\n"); 2158 return ret; 2159 } 2160 } 2161 } 2162 2163 return ret; 2164 } 2165 2166 int cmd_inject(int argc, const char **argv) 2167 { 2168 struct perf_inject inject = { 2169 .tool = { 2170 .sample = perf_event__repipe_sample, 2171 .read = perf_event__repipe_sample, 2172 .mmap = perf_event__repipe, 2173 .mmap2 = perf_event__repipe, 2174 .comm = perf_event__repipe, 2175 .namespaces = perf_event__repipe, 2176 .cgroup = perf_event__repipe, 2177 .fork = perf_event__repipe, 2178 .exit = perf_event__repipe, 2179 .lost = perf_event__repipe, 2180 .lost_samples = perf_event__repipe, 2181 .aux = perf_event__repipe, 2182 .itrace_start = perf_event__repipe, 2183 .aux_output_hw_id = perf_event__repipe, 2184 .context_switch = perf_event__repipe, 2185 .throttle = perf_event__repipe, 2186 .unthrottle = perf_event__repipe, 2187 .ksymbol = perf_event__repipe, 2188 .bpf = perf_event__repipe, 2189 .text_poke = perf_event__repipe, 2190 .attr = perf_event__repipe_attr, 2191 .event_update = perf_event__repipe_event_update, 2192 .tracing_data = perf_event__repipe_op2_synth, 2193 .finished_round = perf_event__repipe_oe_synth, 2194 .build_id = perf_event__repipe_op2_synth, 2195 .id_index = perf_event__repipe_op2_synth, 2196 .auxtrace_info = perf_event__repipe_op2_synth, 2197 .auxtrace_error = perf_event__repipe_op2_synth, 2198 .time_conv = perf_event__repipe_op2_synth, 2199 .thread_map = perf_event__repipe_op2_synth, 2200 .cpu_map = perf_event__repipe_op2_synth, 2201 .stat_config = perf_event__repipe_op2_synth, 2202 .stat = perf_event__repipe_op2_synth, 2203 .stat_round = perf_event__repipe_op2_synth, 2204 .feature = perf_event__repipe_op2_synth, 2205 .finished_init = perf_event__repipe_op2_synth, 2206 .compressed = perf_event__repipe_op4_synth, 2207 .auxtrace = perf_event__repipe_auxtrace, 2208 }, 2209 .input_name = "-", 2210 .samples = LIST_HEAD_INIT(inject.samples), 2211 .output = { 2212 .path = "-", 2213 .mode = PERF_DATA_MODE_WRITE, 2214 .use_stdio = true, 2215 }, 2216 }; 2217 struct perf_data data = { 2218 .mode = PERF_DATA_MODE_READ, 2219 .use_stdio = true, 2220 }; 2221 int ret; 2222 bool repipe = true; 2223 const char *known_build_ids = NULL; 2224 2225 struct option options[] = { 2226 OPT_BOOLEAN('b', "build-ids", &inject.build_ids, 2227 "Inject build-ids into the output stream"), 2228 OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all, 2229 "Inject build-ids of all DSOs into the output stream"), 2230 OPT_STRING(0, "known-build-ids", &known_build_ids, 2231 "buildid path [,buildid path...]", 2232 "build-ids to use for given paths"), 2233 OPT_STRING('i', "input", &inject.input_name, "file", 2234 "input file name"), 2235 OPT_STRING('o', "output", &inject.output.path, "file", 2236 "output file name"), 2237 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2238 "Merge sched-stat and sched-switch for getting events " 2239 "where and how long tasks slept"), 2240 #ifdef HAVE_JITDUMP 2241 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2242 #endif 2243 OPT_INCR('v', "verbose", &verbose, 2244 "be more verbose (show build ids, etc)"), 2245 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2246 "file", "vmlinux pathname"), 2247 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2248 "don't load vmlinux even if found"), 2249 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2250 "kallsyms pathname"), 2251 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2252 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2253 NULL, "opts", "Instruction Tracing options\n" 2254 ITRACE_HELP, 2255 itrace_parse_synth_opts), 2256 OPT_BOOLEAN(0, "strip", &inject.strip, 2257 "strip non-synthesized events (use with --itrace)"), 2258 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2259 "correlate time between VM guests and the host", 2260 parse_vm_time_correlation), 2261 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2262 "inject events from a guest perf.data file", 2263 parse_guest_data), 2264 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2265 "guest mount directory under which every guest os" 2266 " instance has a subdir"), 2267 OPT_END() 2268 }; 2269 const char * const inject_usage[] = { 2270 "perf inject [<options>]", 2271 NULL 2272 }; 2273 2274 if (!inject.itrace_synth_opts.set) { 2275 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2276 symbol_conf.lazy_load_kernel_maps = true; 2277 } 2278 2279 #ifndef HAVE_JITDUMP 2280 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2281 #endif 2282 argc = parse_options(argc, argv, options, inject_usage, 0); 2283 2284 /* 2285 * Any (unrecognized) arguments left? 2286 */ 2287 if (argc) 2288 usage_with_options(inject_usage, options); 2289 2290 if (inject.strip && !inject.itrace_synth_opts.set) { 2291 pr_err("--strip option requires --itrace option\n"); 2292 return -1; 2293 } 2294 2295 if (symbol__validate_sym_arguments()) 2296 return -1; 2297 2298 if (inject.in_place_update) { 2299 if (!strcmp(inject.input_name, "-")) { 2300 pr_err("Input file name required for in-place updating\n"); 2301 return -1; 2302 } 2303 if (strcmp(inject.output.path, "-")) { 2304 pr_err("Output file name must not be specified for in-place updating\n"); 2305 return -1; 2306 } 2307 if (!data.force && !inject.in_place_update_dry_run) { 2308 pr_err("The input file would be updated in place, " 2309 "the --force option is required.\n"); 2310 return -1; 2311 } 2312 if (!inject.in_place_update_dry_run) 2313 data.in_place_update = true; 2314 } else { 2315 if (strcmp(inject.output.path, "-") && !inject.strip && 2316 has_kcore_dir(inject.input_name)) { 2317 inject.output.is_dir = true; 2318 inject.copy_kcore_dir = true; 2319 } 2320 if (perf_data__open(&inject.output)) { 2321 perror("failed to create output file"); 2322 return -1; 2323 } 2324 } 2325 2326 data.path = inject.input_name; 2327 if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) { 2328 inject.is_pipe = true; 2329 /* 2330 * Do not repipe header when input is a regular file 2331 * since either it can rewrite the header at the end 2332 * or write a new pipe header. 2333 */ 2334 if (strcmp(inject.input_name, "-")) 2335 repipe = false; 2336 } 2337 2338 inject.session = __perf_session__new(&data, repipe, 2339 output_fd(&inject), 2340 &inject.tool); 2341 if (IS_ERR(inject.session)) { 2342 ret = PTR_ERR(inject.session); 2343 goto out_close_output; 2344 } 2345 2346 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2347 pr_warning("Decompression initialization failed.\n"); 2348 2349 /* Save original section info before feature bits change */ 2350 ret = save_section_info(&inject); 2351 if (ret) 2352 goto out_delete; 2353 2354 if (!data.is_pipe && inject.output.is_pipe) { 2355 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2356 if (ret < 0) { 2357 pr_err("Couldn't write a new pipe header.\n"); 2358 goto out_delete; 2359 } 2360 2361 ret = perf_event__synthesize_for_pipe(&inject.tool, 2362 inject.session, 2363 &inject.output, 2364 perf_event__repipe); 2365 if (ret < 0) 2366 goto out_delete; 2367 } 2368 2369 if (inject.build_ids && !inject.build_id_all) { 2370 /* 2371 * to make sure the mmap records are ordered correctly 2372 * and so that the correct especially due to jitted code 2373 * mmaps. We cannot generate the buildid hit list and 2374 * inject the jit mmaps at the same time for now. 2375 */ 2376 inject.tool.ordered_events = true; 2377 inject.tool.ordering_requires_timestamps = true; 2378 if (known_build_ids != NULL) { 2379 inject.known_build_ids = 2380 perf_inject__parse_known_build_ids(known_build_ids); 2381 2382 if (inject.known_build_ids == NULL) { 2383 pr_err("Couldn't parse known build ids.\n"); 2384 goto out_delete; 2385 } 2386 } 2387 } 2388 2389 if (inject.sched_stat) { 2390 inject.tool.ordered_events = true; 2391 } 2392 2393 #ifdef HAVE_JITDUMP 2394 if (inject.jit_mode) { 2395 inject.tool.mmap2 = perf_event__jit_repipe_mmap2; 2396 inject.tool.mmap = perf_event__jit_repipe_mmap; 2397 inject.tool.ordered_events = true; 2398 inject.tool.ordering_requires_timestamps = true; 2399 /* 2400 * JIT MMAP injection injects all MMAP events in one go, so it 2401 * does not obey finished_round semantics. 2402 */ 2403 inject.tool.finished_round = perf_event__drop_oe; 2404 } 2405 #endif 2406 ret = symbol__init(&inject.session->header.env); 2407 if (ret < 0) 2408 goto out_delete; 2409 2410 ret = __cmd_inject(&inject); 2411 2412 guest_session__exit(&inject.guest_session); 2413 2414 out_delete: 2415 strlist__delete(inject.known_build_ids); 2416 zstd_fini(&(inject.session->zstd_data)); 2417 perf_session__delete(inject.session); 2418 out_close_output: 2419 if (!inject.in_place_update) 2420 perf_data__close(&inject.output); 2421 free(inject.itrace_synth_opts.vm_tm_corr_args); 2422 free(inject.event_copy); 2423 free(inject.guest_session.ev.event_buf); 2424 return ret; 2425 } 2426