1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include "util/dso.h" 13 #include "util/vdso.h" 14 #include "util/evlist.h" 15 #include "util/evsel.h" 16 #include "util/map.h" 17 #include "util/session.h" 18 #include "util/tool.h" 19 #include "util/debug.h" 20 #include "util/build-id.h" 21 #include "util/data.h" 22 #include "util/auxtrace.h" 23 #include "util/jit.h" 24 #include "util/string2.h" 25 #include "util/symbol.h" 26 #include "util/synthetic-events.h" 27 #include "util/thread.h" 28 #include "util/namespaces.h" 29 #include "util/util.h" 30 #include "util/tsc.h" 31 32 #include <internal/lib.h> 33 34 #include <linux/err.h> 35 #include <subcmd/parse-options.h> 36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38 #include <linux/list.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <linux/hash.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <signal.h> 45 #include <inttypes.h> 46 47 struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char *event_buf; 51 }; 52 53 struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59 }; 60 61 struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67 }; 68 69 struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74 }; 75 76 struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104 }; 105 106 enum build_id_rewrite_style { 107 BID_RWS__NONE = 0, 108 BID_RWS__INJECT_HEADER_LAZY, 109 BID_RWS__INJECT_HEADER_ALL, 110 }; 111 112 struct perf_inject { 113 struct perf_tool tool; 114 struct perf_session *session; 115 enum build_id_rewrite_style build_id_style; 116 bool sched_stat; 117 bool have_auxtrace; 118 bool strip; 119 bool jit_mode; 120 bool in_place_update; 121 bool in_place_update_dry_run; 122 bool is_pipe; 123 bool copy_kcore_dir; 124 const char *input_name; 125 struct perf_data output; 126 u64 bytes_written; 127 u64 aux_id; 128 struct list_head samples; 129 struct itrace_synth_opts itrace_synth_opts; 130 char *event_copy; 131 struct perf_file_section secs[HEADER_FEAT_BITS]; 132 struct guest_session guest_session; 133 struct strlist *known_build_ids; 134 }; 135 136 struct event_entry { 137 struct list_head node; 138 u32 tid; 139 union perf_event event[]; 140 }; 141 142 static int dso__inject_build_id(struct dso *dso, const struct perf_tool *tool, 143 struct machine *machine, u8 cpumode, u32 flags); 144 145 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 146 { 147 ssize_t size; 148 149 size = perf_data__write(&inject->output, buf, sz); 150 if (size < 0) 151 return -errno; 152 153 inject->bytes_written += size; 154 return 0; 155 } 156 157 static int perf_event__repipe_synth(const struct perf_tool *tool, 158 union perf_event *event) 159 { 160 struct perf_inject *inject = container_of(tool, struct perf_inject, 161 tool); 162 163 return output_bytes(inject, event, event->header.size); 164 } 165 166 static int perf_event__repipe_oe_synth(const struct perf_tool *tool, 167 union perf_event *event, 168 struct ordered_events *oe __maybe_unused) 169 { 170 return perf_event__repipe_synth(tool, event); 171 } 172 173 #ifdef HAVE_JITDUMP 174 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused, 175 union perf_event *event __maybe_unused, 176 struct ordered_events *oe __maybe_unused) 177 { 178 return 0; 179 } 180 #endif 181 182 static int perf_event__repipe_op2_synth(struct perf_session *session, 183 union perf_event *event) 184 { 185 return perf_event__repipe_synth(session->tool, event); 186 } 187 188 static int perf_event__repipe_op4_synth(struct perf_session *session, 189 union perf_event *event, 190 u64 data __maybe_unused, 191 const char *str __maybe_unused) 192 { 193 return perf_event__repipe_synth(session->tool, event); 194 } 195 196 static int perf_event__repipe_attr(const struct perf_tool *tool, 197 union perf_event *event, 198 struct evlist **pevlist) 199 { 200 struct perf_inject *inject = container_of(tool, struct perf_inject, 201 tool); 202 int ret; 203 204 ret = perf_event__process_attr(tool, event, pevlist); 205 if (ret) 206 return ret; 207 208 if (!inject->is_pipe) 209 return 0; 210 211 return perf_event__repipe_synth(tool, event); 212 } 213 214 static int perf_event__repipe_event_update(const struct perf_tool *tool, 215 union perf_event *event, 216 struct evlist **pevlist __maybe_unused) 217 { 218 return perf_event__repipe_synth(tool, event); 219 } 220 221 #ifdef HAVE_AUXTRACE_SUPPORT 222 223 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 224 { 225 char buf[4096]; 226 ssize_t ssz; 227 int ret; 228 229 while (size > 0) { 230 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 231 if (ssz < 0) 232 return -errno; 233 ret = output_bytes(inject, buf, ssz); 234 if (ret) 235 return ret; 236 size -= ssz; 237 } 238 239 return 0; 240 } 241 242 static s64 perf_event__repipe_auxtrace(struct perf_session *session, 243 union perf_event *event) 244 { 245 const struct perf_tool *tool = session->tool; 246 struct perf_inject *inject = container_of(tool, struct perf_inject, 247 tool); 248 int ret; 249 250 inject->have_auxtrace = true; 251 252 if (!inject->output.is_pipe) { 253 off_t offset; 254 255 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 256 if (offset == -1) 257 return -errno; 258 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 259 event, offset); 260 if (ret < 0) 261 return ret; 262 } 263 264 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 265 ret = output_bytes(inject, event, event->header.size); 266 if (ret < 0) 267 return ret; 268 ret = copy_bytes(inject, session->data, 269 event->auxtrace.size); 270 } else { 271 ret = output_bytes(inject, event, 272 event->header.size + event->auxtrace.size); 273 } 274 if (ret < 0) 275 return ret; 276 277 return event->auxtrace.size; 278 } 279 280 #else 281 282 static s64 283 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, 284 union perf_event *event __maybe_unused) 285 { 286 pr_err("AUX area tracing not supported\n"); 287 return -EINVAL; 288 } 289 290 #endif 291 292 static int perf_event__repipe(const struct perf_tool *tool, 293 union perf_event *event, 294 struct perf_sample *sample __maybe_unused, 295 struct machine *machine __maybe_unused) 296 { 297 return perf_event__repipe_synth(tool, event); 298 } 299 300 static int perf_event__drop(const struct perf_tool *tool __maybe_unused, 301 union perf_event *event __maybe_unused, 302 struct perf_sample *sample __maybe_unused, 303 struct machine *machine __maybe_unused) 304 { 305 return 0; 306 } 307 308 static int perf_event__drop_aux(const struct perf_tool *tool, 309 union perf_event *event __maybe_unused, 310 struct perf_sample *sample, 311 struct machine *machine __maybe_unused) 312 { 313 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 314 315 if (!inject->aux_id) 316 inject->aux_id = sample->id; 317 318 return 0; 319 } 320 321 static union perf_event * 322 perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 323 union perf_event *event, 324 struct perf_sample *sample) 325 { 326 size_t sz1 = sample->aux_sample.data - (void *)event; 327 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 328 union perf_event *ev; 329 330 if (inject->event_copy == NULL) { 331 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 332 if (!inject->event_copy) 333 return ERR_PTR(-ENOMEM); 334 } 335 ev = (union perf_event *)inject->event_copy; 336 if (sz1 > event->header.size || sz2 > event->header.size || 337 sz1 + sz2 > event->header.size || 338 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 339 return event; 340 341 memcpy(ev, event, sz1); 342 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 343 ev->header.size = sz1 + sz2; 344 ((u64 *)((void *)ev + sz1))[-1] = 0; 345 346 return ev; 347 } 348 349 typedef int (*inject_handler)(const struct perf_tool *tool, 350 union perf_event *event, 351 struct perf_sample *sample, 352 struct evsel *evsel, 353 struct machine *machine); 354 355 static int perf_event__repipe_sample(const struct perf_tool *tool, 356 union perf_event *event, 357 struct perf_sample *sample, 358 struct evsel *evsel, 359 struct machine *machine) 360 { 361 struct perf_inject *inject = container_of(tool, struct perf_inject, 362 tool); 363 364 if (evsel && evsel->handler) { 365 inject_handler f = evsel->handler; 366 return f(tool, event, sample, evsel, machine); 367 } 368 369 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 370 371 if (inject->itrace_synth_opts.set && sample->aux_sample.size) { 372 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 373 if (IS_ERR(event)) 374 return PTR_ERR(event); 375 } 376 377 return perf_event__repipe_synth(tool, event); 378 } 379 380 static struct dso *findnew_dso(int pid, int tid, const char *filename, 381 const struct dso_id *id, struct machine *machine) 382 { 383 struct thread *thread; 384 struct nsinfo *nsi = NULL; 385 struct nsinfo *nnsi; 386 struct dso *dso; 387 bool vdso; 388 389 thread = machine__findnew_thread(machine, pid, tid); 390 if (thread == NULL) { 391 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 392 return NULL; 393 } 394 395 vdso = is_vdso_map(filename); 396 nsi = nsinfo__get(thread__nsinfo(thread)); 397 398 if (vdso) { 399 /* The vdso maps are always on the host and not the 400 * container. Ensure that we don't use setns to look 401 * them up. 402 */ 403 nnsi = nsinfo__copy(nsi); 404 if (nnsi) { 405 nsinfo__put(nsi); 406 nsinfo__clear_need_setns(nnsi); 407 nsi = nnsi; 408 } 409 dso = machine__findnew_vdso(machine, thread); 410 } else { 411 dso = machine__findnew_dso_id(machine, filename, id); 412 } 413 414 if (dso) { 415 mutex_lock(dso__lock(dso)); 416 dso__set_nsinfo(dso, nsi); 417 mutex_unlock(dso__lock(dso)); 418 } else 419 nsinfo__put(nsi); 420 421 thread__put(thread); 422 return dso; 423 } 424 425 static int perf_event__repipe_common_mmap(const struct perf_tool *tool, 426 union perf_event *event, 427 struct perf_sample *sample, 428 struct machine *machine, 429 __u32 pid, __u32 tid, __u32 flags, 430 const char *filename, 431 const struct dso_id *dso_id, 432 int (*perf_event_process)(const struct perf_tool *tool, 433 union perf_event *event, 434 struct perf_sample *sample, 435 struct machine *machine)) 436 { 437 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 438 struct dso *dso = NULL; 439 bool dso_sought = false; 440 441 #ifdef HAVE_JITDUMP 442 if (inject->jit_mode) { 443 u64 n = 0; 444 int ret; 445 446 /* If jit marker, then inject jit mmaps and generate ELF images. */ 447 ret = jit_process(inject->session, &inject->output, machine, 448 filename, pid, tid, &n); 449 if (ret < 0) 450 return ret; 451 if (ret) { 452 inject->bytes_written += n; 453 return 0; 454 } 455 } 456 #endif 457 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 458 dso = findnew_dso(pid, tid, filename, dso_id, machine); 459 dso_sought = true; 460 if (dso) { 461 /* mark it not to inject build-id */ 462 dso__set_hit(dso); 463 } 464 } 465 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) { 466 if (!dso_sought) { 467 dso = findnew_dso(pid, tid, filename, dso_id, machine); 468 dso_sought = true; 469 } 470 471 if (dso && !dso__hit(dso)) { 472 dso__set_hit(dso); 473 dso__inject_build_id(dso, tool, machine, sample->cpumode, flags); 474 } 475 } else { 476 /* Create the thread, map, etc. Not done for the unordered inject all case. */ 477 int err = perf_event_process(tool, event, sample, machine); 478 479 if (err) { 480 dso__put(dso); 481 return err; 482 } 483 } 484 dso__put(dso); 485 return perf_event__repipe(tool, event, sample, machine); 486 } 487 488 static int perf_event__repipe_mmap(const struct perf_tool *tool, 489 union perf_event *event, 490 struct perf_sample *sample, 491 struct machine *machine) 492 { 493 return perf_event__repipe_common_mmap( 494 tool, event, sample, machine, 495 event->mmap.pid, event->mmap.tid, /*flags=*/0, 496 event->mmap.filename, /*dso_id=*/NULL, 497 perf_event__process_mmap); 498 } 499 500 static int perf_event__repipe_mmap2(const struct perf_tool *tool, 501 union perf_event *event, 502 struct perf_sample *sample, 503 struct machine *machine) 504 { 505 struct dso_id id; 506 struct dso_id *dso_id = NULL; 507 508 if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 509 id.maj = event->mmap2.maj; 510 id.min = event->mmap2.min; 511 id.ino = event->mmap2.ino; 512 id.ino_generation = event->mmap2.ino_generation; 513 dso_id = &id; 514 } 515 516 return perf_event__repipe_common_mmap( 517 tool, event, sample, machine, 518 event->mmap2.pid, event->mmap2.tid, event->mmap2.flags, 519 event->mmap2.filename, dso_id, 520 perf_event__process_mmap2); 521 } 522 523 static int perf_event__repipe_fork(const struct perf_tool *tool, 524 union perf_event *event, 525 struct perf_sample *sample, 526 struct machine *machine) 527 { 528 int err; 529 530 err = perf_event__process_fork(tool, event, sample, machine); 531 perf_event__repipe(tool, event, sample, machine); 532 533 return err; 534 } 535 536 static int perf_event__repipe_comm(const struct perf_tool *tool, 537 union perf_event *event, 538 struct perf_sample *sample, 539 struct machine *machine) 540 { 541 int err; 542 543 err = perf_event__process_comm(tool, event, sample, machine); 544 perf_event__repipe(tool, event, sample, machine); 545 546 return err; 547 } 548 549 static int perf_event__repipe_namespaces(const struct perf_tool *tool, 550 union perf_event *event, 551 struct perf_sample *sample, 552 struct machine *machine) 553 { 554 int err = perf_event__process_namespaces(tool, event, sample, machine); 555 556 perf_event__repipe(tool, event, sample, machine); 557 558 return err; 559 } 560 561 static int perf_event__repipe_exit(const struct perf_tool *tool, 562 union perf_event *event, 563 struct perf_sample *sample, 564 struct machine *machine) 565 { 566 int err; 567 568 err = perf_event__process_exit(tool, event, sample, machine); 569 perf_event__repipe(tool, event, sample, machine); 570 571 return err; 572 } 573 574 #ifdef HAVE_LIBTRACEEVENT 575 static int perf_event__repipe_tracing_data(struct perf_session *session, 576 union perf_event *event) 577 { 578 perf_event__repipe_synth(session->tool, event); 579 580 return perf_event__process_tracing_data(session, event); 581 } 582 #endif 583 584 static int dso__read_build_id(struct dso *dso) 585 { 586 struct nscookie nsc; 587 588 if (dso__has_build_id(dso)) 589 return 0; 590 591 mutex_lock(dso__lock(dso)); 592 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 593 if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0) 594 dso__set_has_build_id(dso); 595 else if (dso__nsinfo(dso)) { 596 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); 597 598 if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0) 599 dso__set_has_build_id(dso); 600 free(new_name); 601 } 602 nsinfo__mountns_exit(&nsc); 603 mutex_unlock(dso__lock(dso)); 604 605 return dso__has_build_id(dso) ? 0 : -1; 606 } 607 608 static struct strlist *perf_inject__parse_known_build_ids( 609 const char *known_build_ids_string) 610 { 611 struct str_node *pos, *tmp; 612 struct strlist *known_build_ids; 613 int bid_len; 614 615 known_build_ids = strlist__new(known_build_ids_string, NULL); 616 if (known_build_ids == NULL) 617 return NULL; 618 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 619 const char *build_id, *dso_name; 620 621 build_id = skip_spaces(pos->s); 622 dso_name = strchr(build_id, ' '); 623 if (dso_name == NULL) { 624 strlist__remove(known_build_ids, pos); 625 continue; 626 } 627 bid_len = dso_name - pos->s; 628 dso_name = skip_spaces(dso_name); 629 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 630 strlist__remove(known_build_ids, pos); 631 continue; 632 } 633 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 634 if (!isxdigit(build_id[2 * ix]) || 635 !isxdigit(build_id[2 * ix + 1])) { 636 strlist__remove(known_build_ids, pos); 637 break; 638 } 639 } 640 } 641 return known_build_ids; 642 } 643 644 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 645 struct dso *dso) 646 { 647 struct str_node *pos; 648 int bid_len; 649 650 strlist__for_each_entry(pos, inject->known_build_ids) { 651 const char *build_id, *dso_name; 652 653 build_id = skip_spaces(pos->s); 654 dso_name = strchr(build_id, ' '); 655 bid_len = dso_name - pos->s; 656 dso_name = skip_spaces(dso_name); 657 if (strcmp(dso__long_name(dso), dso_name)) 658 continue; 659 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 660 dso__bid(dso)->data[ix] = (hex(build_id[2 * ix]) << 4 | 661 hex(build_id[2 * ix + 1])); 662 } 663 dso__bid(dso)->size = bid_len / 2; 664 dso__set_has_build_id(dso); 665 return true; 666 } 667 return false; 668 } 669 670 static int dso__inject_build_id(struct dso *dso, const struct perf_tool *tool, 671 struct machine *machine, u8 cpumode, u32 flags) 672 { 673 struct perf_inject *inject = container_of(tool, struct perf_inject, 674 tool); 675 int err; 676 677 if (is_anon_memory(dso__long_name(dso)) || flags & MAP_HUGETLB) 678 return 0; 679 if (is_no_dso_memory(dso__long_name(dso))) 680 return 0; 681 682 if (inject->known_build_ids != NULL && 683 perf_inject__lookup_known_build_id(inject, dso)) 684 return 1; 685 686 if (dso__read_build_id(dso) < 0) { 687 pr_debug("no build_id found for %s\n", dso__long_name(dso)); 688 return -1; 689 } 690 691 err = perf_event__synthesize_build_id(tool, dso, cpumode, 692 perf_event__repipe, machine); 693 if (err) { 694 pr_err("Can't synthesize build_id event for %s\n", dso__long_name(dso)); 695 return -1; 696 } 697 698 return 0; 699 } 700 701 struct mark_dso_hit_args { 702 const struct perf_tool *tool; 703 struct machine *machine; 704 u8 cpumode; 705 }; 706 707 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data) 708 { 709 struct mark_dso_hit_args *args = data; 710 struct map *map = node->ms.map; 711 712 if (map) { 713 struct dso *dso = map__dso(map); 714 715 if (dso && !dso__hit(dso)) { 716 dso__set_hit(dso); 717 dso__inject_build_id(dso, args->tool, args->machine, 718 args->cpumode, map__flags(map)); 719 } 720 } 721 return 0; 722 } 723 724 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, 725 struct perf_sample *sample, 726 struct evsel *evsel __maybe_unused, 727 struct machine *machine) 728 { 729 struct addr_location al; 730 struct thread *thread; 731 struct mark_dso_hit_args args = { 732 .tool = tool, 733 .machine = machine, 734 .cpumode = sample->cpumode, 735 }; 736 737 addr_location__init(&al); 738 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 739 if (thread == NULL) { 740 pr_err("problem processing %d event, skipping it.\n", 741 event->header.type); 742 goto repipe; 743 } 744 745 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 746 struct dso *dso = map__dso(al.map); 747 748 if (!dso__hit(dso)) { 749 dso__set_hit(dso); 750 dso__inject_build_id(dso, tool, machine, 751 sample->cpumode, map__flags(al.map)); 752 } 753 } 754 755 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, 756 mark_dso_hit_callback, &args); 757 758 thread__put(thread); 759 repipe: 760 perf_event__repipe(tool, event, sample, machine); 761 addr_location__exit(&al); 762 return 0; 763 } 764 765 static int perf_inject__sched_process_exit(const struct perf_tool *tool, 766 union perf_event *event __maybe_unused, 767 struct perf_sample *sample, 768 struct evsel *evsel __maybe_unused, 769 struct machine *machine __maybe_unused) 770 { 771 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 772 struct event_entry *ent; 773 774 list_for_each_entry(ent, &inject->samples, node) { 775 if (sample->tid == ent->tid) { 776 list_del_init(&ent->node); 777 free(ent); 778 break; 779 } 780 } 781 782 return 0; 783 } 784 785 static int perf_inject__sched_switch(const struct perf_tool *tool, 786 union perf_event *event, 787 struct perf_sample *sample, 788 struct evsel *evsel, 789 struct machine *machine) 790 { 791 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 792 struct event_entry *ent; 793 794 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 795 796 ent = malloc(event->header.size + sizeof(struct event_entry)); 797 if (ent == NULL) { 798 color_fprintf(stderr, PERF_COLOR_RED, 799 "Not enough memory to process sched switch event!"); 800 return -1; 801 } 802 803 ent->tid = sample->tid; 804 memcpy(&ent->event, event, event->header.size); 805 list_add(&ent->node, &inject->samples); 806 return 0; 807 } 808 809 #ifdef HAVE_LIBTRACEEVENT 810 static int perf_inject__sched_stat(const struct perf_tool *tool, 811 union perf_event *event __maybe_unused, 812 struct perf_sample *sample, 813 struct evsel *evsel, 814 struct machine *machine) 815 { 816 struct event_entry *ent; 817 union perf_event *event_sw; 818 struct perf_sample sample_sw; 819 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 820 u32 pid = evsel__intval(evsel, sample, "pid"); 821 822 list_for_each_entry(ent, &inject->samples, node) { 823 if (pid == ent->tid) 824 goto found; 825 } 826 827 return 0; 828 found: 829 event_sw = &ent->event[0]; 830 evsel__parse_sample(evsel, event_sw, &sample_sw); 831 832 sample_sw.period = sample->period; 833 sample_sw.time = sample->time; 834 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 835 evsel->core.attr.read_format, &sample_sw); 836 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 837 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 838 } 839 #endif 840 841 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 842 { 843 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 844 return NULL; 845 return &gs->vcpu[vcpu]; 846 } 847 848 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 849 { 850 ssize_t ret = writen(gs->tmp_fd, buf, sz); 851 852 return ret < 0 ? ret : 0; 853 } 854 855 static int guest_session__repipe(const struct perf_tool *tool, 856 union perf_event *event, 857 struct perf_sample *sample __maybe_unused, 858 struct machine *machine __maybe_unused) 859 { 860 struct guest_session *gs = container_of(tool, struct guest_session, tool); 861 862 return guest_session__output_bytes(gs, event, event->header.size); 863 } 864 865 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 866 { 867 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 868 int hash; 869 870 if (!guest_tid) 871 return -ENOMEM; 872 873 guest_tid->tid = tid; 874 guest_tid->vcpu = vcpu; 875 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 876 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 877 878 return 0; 879 } 880 881 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 882 union perf_event *event, 883 u64 offset __maybe_unused, void *data) 884 { 885 struct guest_session *gs = data; 886 unsigned int vcpu; 887 struct guest_vcpu *guest_vcpu; 888 int ret; 889 890 if (event->header.type != PERF_RECORD_COMM || 891 event->comm.pid != gs->machine_pid) 892 return 0; 893 894 /* 895 * QEMU option -name debug-threads=on, causes thread names formatted as 896 * below, although it is not an ABI. Also libvirt seems to use this by 897 * default. Here we rely on it to tell us which thread is which VCPU. 898 */ 899 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 900 if (ret <= 0) 901 return ret; 902 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 903 event->comm.tid, event->comm.comm, vcpu); 904 if (vcpu > INT_MAX) { 905 pr_err("Invalid VCPU %u\n", vcpu); 906 return -EINVAL; 907 } 908 guest_vcpu = guest_session__vcpu(gs, vcpu); 909 if (!guest_vcpu) 910 return -ENOMEM; 911 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 912 pr_err("Fatal error: Two threads found with the same VCPU\n"); 913 return -EINVAL; 914 } 915 guest_vcpu->tid = event->comm.tid; 916 917 return guest_session__map_tid(gs, event->comm.tid, vcpu); 918 } 919 920 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 921 { 922 return perf_session__peek_events(session, session->header.data_offset, 923 session->header.data_size, 924 host_peek_vm_comms_cb, gs); 925 } 926 927 static bool evlist__is_id_used(struct evlist *evlist, u64 id) 928 { 929 return evlist__id2sid(evlist, id); 930 } 931 932 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 933 { 934 do { 935 gs->highest_id += 1; 936 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 937 938 return gs->highest_id; 939 } 940 941 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 942 { 943 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 944 int hash; 945 946 if (!guest_id) 947 return -ENOMEM; 948 949 guest_id->id = id; 950 guest_id->host_id = host_id; 951 guest_id->vcpu = vcpu; 952 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 953 hlist_add_head(&guest_id->node, &gs->heads[hash]); 954 955 return 0; 956 } 957 958 static u64 evlist__find_highest_id(struct evlist *evlist) 959 { 960 struct evsel *evsel; 961 u64 highest_id = 1; 962 963 evlist__for_each_entry(evlist, evsel) { 964 u32 j; 965 966 for (j = 0; j < evsel->core.ids; j++) { 967 u64 id = evsel->core.id[j]; 968 969 if (id > highest_id) 970 highest_id = id; 971 } 972 } 973 974 return highest_id; 975 } 976 977 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 978 { 979 struct evlist *evlist = gs->session->evlist; 980 struct evsel *evsel; 981 int ret; 982 983 evlist__for_each_entry(evlist, evsel) { 984 u32 j; 985 986 for (j = 0; j < evsel->core.ids; j++) { 987 struct perf_sample_id *sid; 988 u64 host_id; 989 u64 id; 990 991 id = evsel->core.id[j]; 992 sid = evlist__id2sid(evlist, id); 993 if (!sid || sid->cpu.cpu == -1) 994 continue; 995 host_id = guest_session__allocate_new_id(gs, host_evlist); 996 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 997 if (ret) 998 return ret; 999 } 1000 } 1001 1002 return 0; 1003 } 1004 1005 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1006 { 1007 struct hlist_head *head; 1008 struct guest_id *guest_id; 1009 int hash; 1010 1011 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1012 head = &gs->heads[hash]; 1013 1014 hlist_for_each_entry(guest_id, head, node) 1015 if (guest_id->id == id) 1016 return guest_id; 1017 1018 return NULL; 1019 } 1020 1021 static int process_attr(const struct perf_tool *tool, union perf_event *event, 1022 struct perf_sample *sample __maybe_unused, 1023 struct machine *machine __maybe_unused) 1024 { 1025 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1026 1027 return perf_event__process_attr(tool, event, &inject->session->evlist); 1028 } 1029 1030 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1031 { 1032 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1033 struct perf_event_attr attr = evsel->core.attr; 1034 u64 *id_array; 1035 u32 *vcpu_array; 1036 int ret = -ENOMEM; 1037 u32 i; 1038 1039 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1040 if (!id_array) 1041 return -ENOMEM; 1042 1043 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1044 if (!vcpu_array) 1045 goto out; 1046 1047 for (i = 0; i < evsel->core.ids; i++) { 1048 u64 id = evsel->core.id[i]; 1049 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1050 1051 if (!guest_id) { 1052 pr_err("Failed to find guest id %"PRIu64"\n", id); 1053 ret = -EINVAL; 1054 goto out; 1055 } 1056 id_array[i] = guest_id->host_id; 1057 vcpu_array[i] = guest_id->vcpu; 1058 } 1059 1060 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1061 attr.exclude_host = 1; 1062 attr.exclude_guest = 0; 1063 1064 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1065 id_array, process_attr); 1066 if (ret) 1067 pr_err("Failed to add guest attr.\n"); 1068 1069 for (i = 0; i < evsel->core.ids; i++) { 1070 struct perf_sample_id *sid; 1071 u32 vcpu = vcpu_array[i]; 1072 1073 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1074 /* Guest event is per-thread from the host point of view */ 1075 sid->cpu.cpu = -1; 1076 sid->tid = gs->vcpu[vcpu].tid; 1077 sid->machine_pid = gs->machine_pid; 1078 sid->vcpu.cpu = vcpu; 1079 } 1080 out: 1081 free(vcpu_array); 1082 free(id_array); 1083 return ret; 1084 } 1085 1086 static int guest_session__add_attrs(struct guest_session *gs) 1087 { 1088 struct evlist *evlist = gs->session->evlist; 1089 struct evsel *evsel; 1090 int ret; 1091 1092 evlist__for_each_entry(evlist, evsel) { 1093 ret = guest_session__add_attr(gs, evsel); 1094 if (ret) 1095 return ret; 1096 } 1097 1098 return 0; 1099 } 1100 1101 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1102 { 1103 struct perf_session *session = inject->session; 1104 struct evlist *evlist = session->evlist; 1105 struct machine *machine = &session->machines.host; 1106 size_t from = evlist->core.nr_entries - new_cnt; 1107 1108 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1109 evlist, machine, from); 1110 } 1111 1112 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1113 { 1114 struct hlist_head *head; 1115 struct guest_tid *guest_tid; 1116 int hash; 1117 1118 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1119 head = &gs->tids[hash]; 1120 1121 hlist_for_each_entry(guest_tid, head, node) 1122 if (guest_tid->tid == tid) 1123 return guest_tid; 1124 1125 return NULL; 1126 } 1127 1128 static bool dso__is_in_kernel_space(struct dso *dso) 1129 { 1130 if (dso__is_vdso(dso)) 1131 return false; 1132 1133 return dso__is_kcore(dso) || 1134 dso__kernel(dso) || 1135 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1136 } 1137 1138 static u64 evlist__first_id(struct evlist *evlist) 1139 { 1140 struct evsel *evsel; 1141 1142 evlist__for_each_entry(evlist, evsel) { 1143 if (evsel->core.ids) 1144 return evsel->core.id[0]; 1145 } 1146 return 0; 1147 } 1148 1149 static int process_build_id(const struct perf_tool *tool, 1150 union perf_event *event, 1151 struct perf_sample *sample __maybe_unused, 1152 struct machine *machine __maybe_unused) 1153 { 1154 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1155 1156 return perf_event__process_build_id(inject->session, event); 1157 } 1158 1159 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1160 { 1161 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1162 u8 cpumode = dso__is_in_kernel_space(dso) ? 1163 PERF_RECORD_MISC_GUEST_KERNEL : 1164 PERF_RECORD_MISC_GUEST_USER; 1165 1166 if (!machine) 1167 return -ENOMEM; 1168 1169 dso__set_hit(dso); 1170 1171 return perf_event__synthesize_build_id(&inject->tool, dso, cpumode, 1172 process_build_id, machine); 1173 } 1174 1175 static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1176 { 1177 struct guest_session *gs = data; 1178 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1179 1180 if (!dso__has_build_id(dso)) 1181 return 0; 1182 1183 return synthesize_build_id(inject, dso, gs->machine_pid); 1184 1185 } 1186 1187 static int guest_session__add_build_ids(struct guest_session *gs) 1188 { 1189 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1190 1191 /* Build IDs will be put in the Build ID feature section */ 1192 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1193 1194 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1195 guest_session__add_build_ids_cb, 1196 gs); 1197 } 1198 1199 static int guest_session__ksymbol_event(const struct perf_tool *tool, 1200 union perf_event *event, 1201 struct perf_sample *sample __maybe_unused, 1202 struct machine *machine __maybe_unused) 1203 { 1204 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1205 1206 /* Only support out-of-line i.e. no BPF support */ 1207 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1208 return 0; 1209 1210 return guest_session__output_bytes(gs, event, event->header.size); 1211 } 1212 1213 static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1214 { 1215 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1216 struct perf_session *session; 1217 int ret; 1218 1219 /* Only these events will be injected */ 1220 gs->tool.mmap = guest_session__repipe; 1221 gs->tool.mmap2 = guest_session__repipe; 1222 gs->tool.comm = guest_session__repipe; 1223 gs->tool.fork = guest_session__repipe; 1224 gs->tool.exit = guest_session__repipe; 1225 gs->tool.lost = guest_session__repipe; 1226 gs->tool.context_switch = guest_session__repipe; 1227 gs->tool.ksymbol = guest_session__ksymbol_event; 1228 gs->tool.text_poke = guest_session__repipe; 1229 /* 1230 * Processing a build ID creates a struct dso with that build ID. Later, 1231 * all guest dsos are iterated and the build IDs processed into the host 1232 * session where they will be output to the Build ID feature section 1233 * when the perf.data file header is written. 1234 */ 1235 gs->tool.build_id = perf_event__process_build_id; 1236 /* Process the id index to know what VCPU an ID belongs to */ 1237 gs->tool.id_index = perf_event__process_id_index; 1238 1239 gs->tool.ordered_events = true; 1240 gs->tool.ordering_requires_timestamps = true; 1241 1242 gs->data.path = name; 1243 gs->data.force = force; 1244 gs->data.mode = PERF_DATA_MODE_READ; 1245 1246 session = perf_session__new(&gs->data, &gs->tool); 1247 if (IS_ERR(session)) 1248 return PTR_ERR(session); 1249 gs->session = session; 1250 1251 /* 1252 * Initial events have zero'd ID samples. Get default ID sample size 1253 * used for removing them. 1254 */ 1255 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1256 /* And default ID for adding back a host-compatible ID sample */ 1257 gs->dflt_id = evlist__first_id(session->evlist); 1258 if (!gs->dflt_id) { 1259 pr_err("Guest data has no sample IDs"); 1260 return -EINVAL; 1261 } 1262 1263 /* Temporary file for guest events */ 1264 gs->tmp_file_name = strdup(tmp_file_name); 1265 if (!gs->tmp_file_name) 1266 return -ENOMEM; 1267 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1268 if (gs->tmp_fd < 0) 1269 return -errno; 1270 1271 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1272 pr_warning("Guest session decompression initialization failed.\n"); 1273 1274 /* 1275 * perf does not support processing 2 sessions simultaneously, so output 1276 * guest events to a temporary file. 1277 */ 1278 ret = perf_session__process_events(gs->session); 1279 if (ret) 1280 return ret; 1281 1282 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1283 return -errno; 1284 1285 return 0; 1286 } 1287 1288 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1289 static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1290 { 1291 struct hlist_node *pos, *n; 1292 size_t i; 1293 1294 for (i = 0; i < hlist_sz; ++i) { 1295 hlist_for_each_safe(pos, n, &heads[i]) { 1296 hlist_del(pos); 1297 free(pos); 1298 } 1299 } 1300 } 1301 1302 static void guest_session__exit(struct guest_session *gs) 1303 { 1304 if (gs->session) { 1305 perf_session__delete(gs->session); 1306 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1307 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1308 } 1309 if (gs->tmp_file_name) { 1310 if (gs->tmp_fd >= 0) 1311 close(gs->tmp_fd); 1312 unlink(gs->tmp_file_name); 1313 zfree(&gs->tmp_file_name); 1314 } 1315 zfree(&gs->vcpu); 1316 zfree(&gs->perf_data_file); 1317 } 1318 1319 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1320 { 1321 tc->time_shift = time_conv->time_shift; 1322 tc->time_mult = time_conv->time_mult; 1323 tc->time_zero = time_conv->time_zero; 1324 tc->time_cycles = time_conv->time_cycles; 1325 tc->time_mask = time_conv->time_mask; 1326 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1327 tc->cap_user_time_short = time_conv->cap_user_time_short; 1328 } 1329 1330 static void guest_session__get_tc(struct guest_session *gs) 1331 { 1332 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1333 1334 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1335 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1336 } 1337 1338 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1339 { 1340 u64 tsc; 1341 1342 if (!guest_time) { 1343 *host_time = 0; 1344 return; 1345 } 1346 1347 if (gs->guest_tc.cap_user_time_zero) 1348 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1349 else 1350 tsc = guest_time; 1351 1352 /* 1353 * This is the correct order of operations for x86 if the TSC Offset and 1354 * Multiplier values are used. 1355 */ 1356 tsc -= gs->time_offset; 1357 tsc /= gs->time_scale; 1358 1359 if (gs->host_tc.cap_user_time_zero) 1360 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1361 else 1362 *host_time = tsc; 1363 } 1364 1365 static int guest_session__fetch(struct guest_session *gs) 1366 { 1367 void *buf; 1368 struct perf_event_header *hdr; 1369 size_t hdr_sz = sizeof(*hdr); 1370 ssize_t ret; 1371 1372 buf = gs->ev.event_buf; 1373 if (!buf) { 1374 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1375 if (!buf) 1376 return -ENOMEM; 1377 gs->ev.event_buf = buf; 1378 } 1379 hdr = buf; 1380 ret = readn(gs->tmp_fd, buf, hdr_sz); 1381 if (ret < 0) 1382 return ret; 1383 1384 if (!ret) { 1385 /* Zero size means EOF */ 1386 hdr->size = 0; 1387 return 0; 1388 } 1389 1390 buf += hdr_sz; 1391 1392 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1393 if (ret < 0) 1394 return ret; 1395 1396 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1397 gs->ev.sample.time = 0; 1398 1399 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1400 pr_err("Unexpected type fetching guest event"); 1401 return 0; 1402 } 1403 1404 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1405 if (ret) { 1406 pr_err("Parse failed fetching guest event"); 1407 return ret; 1408 } 1409 1410 if (!gs->have_tc) { 1411 guest_session__get_tc(gs); 1412 gs->have_tc = true; 1413 } 1414 1415 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1416 1417 return 0; 1418 } 1419 1420 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1421 const struct perf_sample *sample) 1422 { 1423 struct evsel *evsel; 1424 void *array; 1425 int ret; 1426 1427 evsel = evlist__id2evsel(evlist, sample->id); 1428 array = ev; 1429 1430 if (!evsel) { 1431 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1432 return -EINVAL; 1433 } 1434 1435 array += ev->header.size; 1436 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1437 if (ret < 0) 1438 return ret; 1439 1440 if (ret & 7) { 1441 pr_err("Bad id sample size %d\n", ret); 1442 return -EINVAL; 1443 } 1444 1445 ev->header.size += ret; 1446 1447 return 0; 1448 } 1449 1450 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1451 { 1452 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1453 int ret; 1454 1455 if (!gs->ready) 1456 return 0; 1457 1458 while (1) { 1459 struct perf_sample *sample; 1460 struct guest_id *guest_id; 1461 union perf_event *ev; 1462 u16 id_hdr_size; 1463 u8 cpumode; 1464 u64 id; 1465 1466 if (!gs->fetched) { 1467 ret = guest_session__fetch(gs); 1468 if (ret) 1469 return ret; 1470 gs->fetched = true; 1471 } 1472 1473 ev = gs->ev.event; 1474 sample = &gs->ev.sample; 1475 1476 if (!ev->header.size) 1477 return 0; /* EOF */ 1478 1479 if (sample->time > timestamp) 1480 return 0; 1481 1482 /* Change cpumode to guest */ 1483 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1484 if (cpumode & PERF_RECORD_MISC_USER) 1485 cpumode = PERF_RECORD_MISC_GUEST_USER; 1486 else 1487 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1488 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1489 ev->header.misc |= cpumode; 1490 1491 id = sample->id; 1492 if (!id) { 1493 id = gs->dflt_id; 1494 id_hdr_size = gs->dflt_id_hdr_size; 1495 } else { 1496 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1497 1498 id_hdr_size = evsel__id_hdr_size(evsel); 1499 } 1500 1501 if (id_hdr_size & 7) { 1502 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1503 return -EINVAL; 1504 } 1505 1506 if (ev->header.size & 7) { 1507 pr_err("Bad event size %u\n", ev->header.size); 1508 return -EINVAL; 1509 } 1510 1511 /* Remove guest id sample */ 1512 ev->header.size -= id_hdr_size; 1513 1514 if (ev->header.size & 7) { 1515 pr_err("Bad raw event size %u\n", ev->header.size); 1516 return -EINVAL; 1517 } 1518 1519 guest_id = guest_session__lookup_id(gs, id); 1520 if (!guest_id) { 1521 pr_err("Guest event with unknown id %llu\n", 1522 (unsigned long long)id); 1523 return -EINVAL; 1524 } 1525 1526 /* Change to host ID to avoid conflicting ID values */ 1527 sample->id = guest_id->host_id; 1528 sample->stream_id = guest_id->host_id; 1529 1530 if (sample->cpu != (u32)-1) { 1531 if (sample->cpu >= gs->vcpu_cnt) { 1532 pr_err("Guest event with unknown VCPU %u\n", 1533 sample->cpu); 1534 return -EINVAL; 1535 } 1536 /* Change to host CPU instead of guest VCPU */ 1537 sample->cpu = gs->vcpu[sample->cpu].cpu; 1538 } 1539 1540 /* New id sample with new ID and CPU */ 1541 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1542 if (ret) 1543 return ret; 1544 1545 if (ev->header.size & 7) { 1546 pr_err("Bad new event size %u\n", ev->header.size); 1547 return -EINVAL; 1548 } 1549 1550 gs->fetched = false; 1551 1552 ret = output_bytes(inject, ev, ev->header.size); 1553 if (ret) 1554 return ret; 1555 } 1556 } 1557 1558 static int guest_session__flush_events(struct guest_session *gs) 1559 { 1560 return guest_session__inject_events(gs, -1); 1561 } 1562 1563 static int host__repipe(const struct perf_tool *tool, 1564 union perf_event *event, 1565 struct perf_sample *sample, 1566 struct machine *machine) 1567 { 1568 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1569 int ret; 1570 1571 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1572 if (ret) 1573 return ret; 1574 1575 return perf_event__repipe(tool, event, sample, machine); 1576 } 1577 1578 static int host__finished_init(struct perf_session *session, union perf_event *event) 1579 { 1580 struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool); 1581 struct guest_session *gs = &inject->guest_session; 1582 int ret; 1583 1584 /* 1585 * Peek through host COMM events to find QEMU threads and the VCPU they 1586 * are running. 1587 */ 1588 ret = host_peek_vm_comms(session, gs); 1589 if (ret) 1590 return ret; 1591 1592 if (!gs->vcpu_cnt) { 1593 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1594 return -EINVAL; 1595 } 1596 1597 /* 1598 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1599 */ 1600 gs->highest_id = evlist__find_highest_id(session->evlist); 1601 ret = guest_session__map_ids(gs, session->evlist); 1602 if (ret) 1603 return ret; 1604 1605 ret = guest_session__add_attrs(gs); 1606 if (ret) 1607 return ret; 1608 1609 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1610 if (ret) { 1611 pr_err("Failed to synthesize id_index\n"); 1612 return ret; 1613 } 1614 1615 ret = guest_session__add_build_ids(gs); 1616 if (ret) { 1617 pr_err("Failed to add guest build IDs\n"); 1618 return ret; 1619 } 1620 1621 gs->ready = true; 1622 1623 ret = guest_session__inject_events(gs, 0); 1624 if (ret) 1625 return ret; 1626 1627 return perf_event__repipe_op2_synth(session, event); 1628 } 1629 1630 /* 1631 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1632 * which flushes host events to file up until the last flush time. Then inject 1633 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1634 * itself. 1635 */ 1636 static int host__finished_round(const struct perf_tool *tool, 1637 union perf_event *event, 1638 struct ordered_events *oe) 1639 { 1640 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1641 int ret = perf_event__process_finished_round(tool, event, oe); 1642 u64 timestamp = ordered_events__last_flush_time(oe); 1643 1644 if (ret) 1645 return ret; 1646 1647 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1648 if (ret) 1649 return ret; 1650 1651 return perf_event__repipe_oe_synth(tool, event, oe); 1652 } 1653 1654 static int host__context_switch(const struct perf_tool *tool, 1655 union perf_event *event, 1656 struct perf_sample *sample, 1657 struct machine *machine) 1658 { 1659 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1660 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1661 struct guest_session *gs = &inject->guest_session; 1662 u32 pid = event->context_switch.next_prev_pid; 1663 u32 tid = event->context_switch.next_prev_tid; 1664 struct guest_tid *guest_tid; 1665 u32 vcpu; 1666 1667 if (out || pid != gs->machine_pid) 1668 goto out; 1669 1670 guest_tid = guest_session__lookup_tid(gs, tid); 1671 if (!guest_tid) 1672 goto out; 1673 1674 if (sample->cpu == (u32)-1) { 1675 pr_err("Switch event does not have CPU\n"); 1676 return -EINVAL; 1677 } 1678 1679 vcpu = guest_tid->vcpu; 1680 if (vcpu >= gs->vcpu_cnt) 1681 return -EINVAL; 1682 1683 /* Guest is switching in, record which CPU the VCPU is now running on */ 1684 gs->vcpu[vcpu].cpu = sample->cpu; 1685 out: 1686 return host__repipe(tool, event, sample, machine); 1687 } 1688 1689 static void sig_handler(int sig __maybe_unused) 1690 { 1691 session_done = 1; 1692 } 1693 1694 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1695 { 1696 struct perf_event_attr *attr = &evsel->core.attr; 1697 const char *name = evsel__name(evsel); 1698 1699 if (!(attr->sample_type & sample_type)) { 1700 pr_err("Samples for %s event do not have %s attribute set.", 1701 name, sample_msg); 1702 return -EINVAL; 1703 } 1704 1705 return 0; 1706 } 1707 1708 static int drop_sample(const struct perf_tool *tool __maybe_unused, 1709 union perf_event *event __maybe_unused, 1710 struct perf_sample *sample __maybe_unused, 1711 struct evsel *evsel __maybe_unused, 1712 struct machine *machine __maybe_unused) 1713 { 1714 return 0; 1715 } 1716 1717 static void strip_init(struct perf_inject *inject) 1718 { 1719 struct evlist *evlist = inject->session->evlist; 1720 struct evsel *evsel; 1721 1722 inject->tool.context_switch = perf_event__drop; 1723 1724 evlist__for_each_entry(evlist, evsel) 1725 evsel->handler = drop_sample; 1726 } 1727 1728 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1729 { 1730 struct perf_inject *inject = opt->value; 1731 const char *args; 1732 char *dry_run; 1733 1734 if (unset) 1735 return 0; 1736 1737 inject->itrace_synth_opts.set = true; 1738 inject->itrace_synth_opts.vm_time_correlation = true; 1739 inject->in_place_update = true; 1740 1741 if (!str) 1742 return 0; 1743 1744 dry_run = skip_spaces(str); 1745 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1746 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1747 inject->in_place_update_dry_run = true; 1748 args = dry_run + strlen("dry-run"); 1749 } else { 1750 args = str; 1751 } 1752 1753 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1754 1755 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1756 } 1757 1758 static int parse_guest_data(const struct option *opt, const char *str, int unset) 1759 { 1760 struct perf_inject *inject = opt->value; 1761 struct guest_session *gs = &inject->guest_session; 1762 char *tok; 1763 char *s; 1764 1765 if (unset) 1766 return 0; 1767 1768 if (!str) 1769 goto bad_args; 1770 1771 s = strdup(str); 1772 if (!s) 1773 return -ENOMEM; 1774 1775 gs->perf_data_file = strsep(&s, ","); 1776 if (!gs->perf_data_file) 1777 goto bad_args; 1778 1779 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1780 if (gs->copy_kcore_dir) 1781 inject->output.is_dir = true; 1782 1783 tok = strsep(&s, ","); 1784 if (!tok) 1785 goto bad_args; 1786 gs->machine_pid = strtoul(tok, NULL, 0); 1787 if (!inject->guest_session.machine_pid) 1788 goto bad_args; 1789 1790 gs->time_scale = 1; 1791 1792 tok = strsep(&s, ","); 1793 if (!tok) 1794 goto out; 1795 gs->time_offset = strtoull(tok, NULL, 0); 1796 1797 tok = strsep(&s, ","); 1798 if (!tok) 1799 goto out; 1800 gs->time_scale = strtod(tok, NULL); 1801 if (!gs->time_scale) 1802 goto bad_args; 1803 out: 1804 return 0; 1805 1806 bad_args: 1807 pr_err("--guest-data option requires guest perf.data file name, " 1808 "guest machine PID, and optionally guest timestamp offset, " 1809 "and guest timestamp scale factor, separated by commas.\n"); 1810 return -1; 1811 } 1812 1813 static int save_section_info_cb(struct perf_file_section *section, 1814 struct perf_header *ph __maybe_unused, 1815 int feat, int fd __maybe_unused, void *data) 1816 { 1817 struct perf_inject *inject = data; 1818 1819 inject->secs[feat] = *section; 1820 return 0; 1821 } 1822 1823 static int save_section_info(struct perf_inject *inject) 1824 { 1825 struct perf_header *header = &inject->session->header; 1826 int fd = perf_data__fd(inject->session->data); 1827 1828 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 1829 } 1830 1831 static bool keep_feat(int feat) 1832 { 1833 switch (feat) { 1834 /* Keep original information that describes the machine or software */ 1835 case HEADER_TRACING_DATA: 1836 case HEADER_HOSTNAME: 1837 case HEADER_OSRELEASE: 1838 case HEADER_VERSION: 1839 case HEADER_ARCH: 1840 case HEADER_NRCPUS: 1841 case HEADER_CPUDESC: 1842 case HEADER_CPUID: 1843 case HEADER_TOTAL_MEM: 1844 case HEADER_CPU_TOPOLOGY: 1845 case HEADER_NUMA_TOPOLOGY: 1846 case HEADER_PMU_MAPPINGS: 1847 case HEADER_CACHE: 1848 case HEADER_MEM_TOPOLOGY: 1849 case HEADER_CLOCKID: 1850 case HEADER_BPF_PROG_INFO: 1851 case HEADER_BPF_BTF: 1852 case HEADER_CPU_PMU_CAPS: 1853 case HEADER_CLOCK_DATA: 1854 case HEADER_HYBRID_TOPOLOGY: 1855 case HEADER_PMU_CAPS: 1856 return true; 1857 /* Information that can be updated */ 1858 case HEADER_BUILD_ID: 1859 case HEADER_CMDLINE: 1860 case HEADER_EVENT_DESC: 1861 case HEADER_BRANCH_STACK: 1862 case HEADER_GROUP_DESC: 1863 case HEADER_AUXTRACE: 1864 case HEADER_STAT: 1865 case HEADER_SAMPLE_TIME: 1866 case HEADER_DIR_FORMAT: 1867 case HEADER_COMPRESSED: 1868 default: 1869 return false; 1870 }; 1871 } 1872 1873 static int read_file(int fd, u64 offs, void *buf, size_t sz) 1874 { 1875 ssize_t ret = preadn(fd, buf, sz, offs); 1876 1877 if (ret < 0) 1878 return -errno; 1879 if ((size_t)ret != sz) 1880 return -EINVAL; 1881 return 0; 1882 } 1883 1884 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 1885 { 1886 int fd = perf_data__fd(inject->session->data); 1887 u64 offs = inject->secs[feat].offset; 1888 size_t sz = inject->secs[feat].size; 1889 void *buf = malloc(sz); 1890 int ret; 1891 1892 if (!buf) 1893 return -ENOMEM; 1894 1895 ret = read_file(fd, offs, buf, sz); 1896 if (ret) 1897 goto out_free; 1898 1899 ret = fw->write(fw, buf, sz); 1900 out_free: 1901 free(buf); 1902 return ret; 1903 } 1904 1905 struct inject_fc { 1906 struct feat_copier fc; 1907 struct perf_inject *inject; 1908 }; 1909 1910 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 1911 { 1912 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 1913 struct perf_inject *inject = inj_fc->inject; 1914 int ret; 1915 1916 if (!inject->secs[feat].offset || 1917 !keep_feat(feat)) 1918 return 0; 1919 1920 ret = feat_copy(inject, feat, fw); 1921 if (ret < 0) 1922 return ret; 1923 1924 return 1; /* Feature section copied */ 1925 } 1926 1927 static int copy_kcore_dir(struct perf_inject *inject) 1928 { 1929 char *cmd; 1930 int ret; 1931 1932 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 1933 inject->input_name, inject->output.path); 1934 if (ret < 0) 1935 return ret; 1936 pr_debug("%s\n", cmd); 1937 ret = system(cmd); 1938 free(cmd); 1939 return ret; 1940 } 1941 1942 static int guest_session__copy_kcore_dir(struct guest_session *gs) 1943 { 1944 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1945 char *cmd; 1946 int ret; 1947 1948 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 1949 gs->perf_data_file, inject->output.path, gs->machine_pid); 1950 if (ret < 0) 1951 return ret; 1952 pr_debug("%s\n", cmd); 1953 ret = system(cmd); 1954 free(cmd); 1955 return ret; 1956 } 1957 1958 static int output_fd(struct perf_inject *inject) 1959 { 1960 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 1961 } 1962 1963 static int __cmd_inject(struct perf_inject *inject) 1964 { 1965 int ret = -EINVAL; 1966 struct guest_session *gs = &inject->guest_session; 1967 struct perf_session *session = inject->session; 1968 int fd = output_fd(inject); 1969 u64 output_data_offset; 1970 1971 signal(SIGINT, sig_handler); 1972 1973 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat || 1974 inject->itrace_synth_opts.set) { 1975 inject->tool.mmap = perf_event__repipe_mmap; 1976 inject->tool.mmap2 = perf_event__repipe_mmap2; 1977 inject->tool.fork = perf_event__repipe_fork; 1978 #ifdef HAVE_LIBTRACEEVENT 1979 inject->tool.tracing_data = perf_event__repipe_tracing_data; 1980 #endif 1981 } 1982 1983 output_data_offset = perf_session__data_offset(session->evlist); 1984 1985 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 1986 inject->tool.sample = perf_event__inject_buildid; 1987 } else if (inject->sched_stat) { 1988 struct evsel *evsel; 1989 1990 evlist__for_each_entry(session->evlist, evsel) { 1991 const char *name = evsel__name(evsel); 1992 1993 if (!strcmp(name, "sched:sched_switch")) { 1994 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 1995 return -EINVAL; 1996 1997 evsel->handler = perf_inject__sched_switch; 1998 } else if (!strcmp(name, "sched:sched_process_exit")) 1999 evsel->handler = perf_inject__sched_process_exit; 2000 #ifdef HAVE_LIBTRACEEVENT 2001 else if (!strncmp(name, "sched:sched_stat_", 17)) 2002 evsel->handler = perf_inject__sched_stat; 2003 #endif 2004 } 2005 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2006 session->itrace_synth_opts = &inject->itrace_synth_opts; 2007 memset(&inject->tool, 0, sizeof(inject->tool)); 2008 inject->tool.id_index = perf_event__process_id_index; 2009 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2010 inject->tool.auxtrace = perf_event__process_auxtrace; 2011 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2012 inject->tool.ordered_events = true; 2013 inject->tool.ordering_requires_timestamps = true; 2014 } else if (inject->itrace_synth_opts.set) { 2015 session->itrace_synth_opts = &inject->itrace_synth_opts; 2016 inject->itrace_synth_opts.inject = true; 2017 inject->tool.comm = perf_event__repipe_comm; 2018 inject->tool.namespaces = perf_event__repipe_namespaces; 2019 inject->tool.exit = perf_event__repipe_exit; 2020 inject->tool.id_index = perf_event__process_id_index; 2021 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2022 inject->tool.auxtrace = perf_event__process_auxtrace; 2023 inject->tool.aux = perf_event__drop_aux; 2024 inject->tool.itrace_start = perf_event__drop_aux; 2025 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2026 inject->tool.ordered_events = true; 2027 inject->tool.ordering_requires_timestamps = true; 2028 /* Allow space in the header for new attributes */ 2029 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2030 if (inject->strip) 2031 strip_init(inject); 2032 } else if (gs->perf_data_file) { 2033 char *name = gs->perf_data_file; 2034 2035 /* 2036 * Not strictly necessary, but keep these events in order wrt 2037 * guest events. 2038 */ 2039 inject->tool.mmap = host__repipe; 2040 inject->tool.mmap2 = host__repipe; 2041 inject->tool.comm = host__repipe; 2042 inject->tool.fork = host__repipe; 2043 inject->tool.exit = host__repipe; 2044 inject->tool.lost = host__repipe; 2045 inject->tool.context_switch = host__repipe; 2046 inject->tool.ksymbol = host__repipe; 2047 inject->tool.text_poke = host__repipe; 2048 /* 2049 * Once the host session has initialized, set up sample ID 2050 * mapping and feed in guest attrs, build IDs and initial 2051 * events. 2052 */ 2053 inject->tool.finished_init = host__finished_init; 2054 /* Obey finished round ordering */ 2055 inject->tool.finished_round = host__finished_round; 2056 /* Keep track of which CPU a VCPU is runnng on */ 2057 inject->tool.context_switch = host__context_switch; 2058 /* 2059 * Must order events to be able to obey finished round 2060 * ordering. 2061 */ 2062 inject->tool.ordered_events = true; 2063 inject->tool.ordering_requires_timestamps = true; 2064 /* Set up a separate session to process guest perf.data file */ 2065 ret = guest_session__start(gs, name, session->data->force); 2066 if (ret) { 2067 pr_err("Failed to process %s, error %d\n", name, ret); 2068 return ret; 2069 } 2070 /* Allow space in the header for guest attributes */ 2071 output_data_offset += gs->session->header.data_offset; 2072 output_data_offset = roundup(output_data_offset, 4096); 2073 } 2074 2075 if (!inject->itrace_synth_opts.set) 2076 auxtrace_index__free(&session->auxtrace_index); 2077 2078 if (!inject->is_pipe && !inject->in_place_update) 2079 lseek(fd, output_data_offset, SEEK_SET); 2080 2081 ret = perf_session__process_events(session); 2082 if (ret) 2083 return ret; 2084 2085 if (gs->session) { 2086 /* 2087 * Remaining guest events have later timestamps. Flush them 2088 * out to file. 2089 */ 2090 ret = guest_session__flush_events(gs); 2091 if (ret) { 2092 pr_err("Failed to flush guest events\n"); 2093 return ret; 2094 } 2095 } 2096 2097 if (!inject->is_pipe && !inject->in_place_update) { 2098 struct inject_fc inj_fc = { 2099 .fc.copy = feat_copy_cb, 2100 .inject = inject, 2101 }; 2102 2103 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2104 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) 2105 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 2106 /* 2107 * Keep all buildids when there is unprocessed AUX data because 2108 * it is not known which ones the AUX trace hits. 2109 */ 2110 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2111 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2112 perf_session__dsos_hit_all(session); 2113 /* 2114 * The AUX areas have been removed and replaced with 2115 * synthesized hardware events, so clear the feature flag. 2116 */ 2117 if (inject->itrace_synth_opts.set) { 2118 perf_header__clear_feat(&session->header, 2119 HEADER_AUXTRACE); 2120 if (inject->itrace_synth_opts.last_branch || 2121 inject->itrace_synth_opts.add_last_branch) 2122 perf_header__set_feat(&session->header, 2123 HEADER_BRANCH_STACK); 2124 } 2125 session->header.data_offset = output_data_offset; 2126 session->header.data_size = inject->bytes_written; 2127 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc); 2128 2129 if (inject->copy_kcore_dir) { 2130 ret = copy_kcore_dir(inject); 2131 if (ret) { 2132 pr_err("Failed to copy kcore\n"); 2133 return ret; 2134 } 2135 } 2136 if (gs->copy_kcore_dir) { 2137 ret = guest_session__copy_kcore_dir(gs); 2138 if (ret) { 2139 pr_err("Failed to copy guest kcore\n"); 2140 return ret; 2141 } 2142 } 2143 } 2144 2145 return ret; 2146 } 2147 2148 int cmd_inject(int argc, const char **argv) 2149 { 2150 struct perf_inject inject = { 2151 .input_name = "-", 2152 .samples = LIST_HEAD_INIT(inject.samples), 2153 .output = { 2154 .path = "-", 2155 .mode = PERF_DATA_MODE_WRITE, 2156 .use_stdio = true, 2157 }, 2158 }; 2159 struct perf_data data = { 2160 .mode = PERF_DATA_MODE_READ, 2161 .use_stdio = true, 2162 }; 2163 int ret; 2164 bool repipe = true; 2165 const char *known_build_ids = NULL; 2166 bool build_ids; 2167 bool build_id_all; 2168 2169 struct option options[] = { 2170 OPT_BOOLEAN('b', "build-ids", &build_ids, 2171 "Inject build-ids into the output stream"), 2172 OPT_BOOLEAN(0, "buildid-all", &build_id_all, 2173 "Inject build-ids of all DSOs into the output stream"), 2174 OPT_STRING(0, "known-build-ids", &known_build_ids, 2175 "buildid path [,buildid path...]", 2176 "build-ids to use for given paths"), 2177 OPT_STRING('i', "input", &inject.input_name, "file", 2178 "input file name"), 2179 OPT_STRING('o', "output", &inject.output.path, "file", 2180 "output file name"), 2181 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2182 "Merge sched-stat and sched-switch for getting events " 2183 "where and how long tasks slept"), 2184 #ifdef HAVE_JITDUMP 2185 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2186 #endif 2187 OPT_INCR('v', "verbose", &verbose, 2188 "be more verbose (show build ids, etc)"), 2189 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2190 "file", "vmlinux pathname"), 2191 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2192 "don't load vmlinux even if found"), 2193 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2194 "kallsyms pathname"), 2195 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2196 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2197 NULL, "opts", "Instruction Tracing options\n" 2198 ITRACE_HELP, 2199 itrace_parse_synth_opts), 2200 OPT_BOOLEAN(0, "strip", &inject.strip, 2201 "strip non-synthesized events (use with --itrace)"), 2202 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2203 "correlate time between VM guests and the host", 2204 parse_vm_time_correlation), 2205 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2206 "inject events from a guest perf.data file", 2207 parse_guest_data), 2208 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2209 "guest mount directory under which every guest os" 2210 " instance has a subdir"), 2211 OPT_END() 2212 }; 2213 const char * const inject_usage[] = { 2214 "perf inject [<options>]", 2215 NULL 2216 }; 2217 bool ordered_events; 2218 2219 if (!inject.itrace_synth_opts.set) { 2220 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2221 symbol_conf.lazy_load_kernel_maps = true; 2222 } 2223 2224 #ifndef HAVE_JITDUMP 2225 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2226 #endif 2227 argc = parse_options(argc, argv, options, inject_usage, 0); 2228 2229 /* 2230 * Any (unrecognized) arguments left? 2231 */ 2232 if (argc) 2233 usage_with_options(inject_usage, options); 2234 2235 if (inject.strip && !inject.itrace_synth_opts.set) { 2236 pr_err("--strip option requires --itrace option\n"); 2237 return -1; 2238 } 2239 2240 if (symbol__validate_sym_arguments()) 2241 return -1; 2242 2243 if (inject.in_place_update) { 2244 if (!strcmp(inject.input_name, "-")) { 2245 pr_err("Input file name required for in-place updating\n"); 2246 return -1; 2247 } 2248 if (strcmp(inject.output.path, "-")) { 2249 pr_err("Output file name must not be specified for in-place updating\n"); 2250 return -1; 2251 } 2252 if (!data.force && !inject.in_place_update_dry_run) { 2253 pr_err("The input file would be updated in place, " 2254 "the --force option is required.\n"); 2255 return -1; 2256 } 2257 if (!inject.in_place_update_dry_run) 2258 data.in_place_update = true; 2259 } else { 2260 if (strcmp(inject.output.path, "-") && !inject.strip && 2261 has_kcore_dir(inject.input_name)) { 2262 inject.output.is_dir = true; 2263 inject.copy_kcore_dir = true; 2264 } 2265 if (perf_data__open(&inject.output)) { 2266 perror("failed to create output file"); 2267 return -1; 2268 } 2269 } 2270 if (build_ids) 2271 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY; 2272 if (build_id_all) 2273 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2274 2275 data.path = inject.input_name; 2276 if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) { 2277 inject.is_pipe = true; 2278 /* 2279 * Do not repipe header when input is a regular file 2280 * since either it can rewrite the header at the end 2281 * or write a new pipe header. 2282 */ 2283 if (strcmp(inject.input_name, "-")) 2284 repipe = false; 2285 } 2286 ordered_events = inject.jit_mode || inject.sched_stat || 2287 (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY); 2288 perf_tool__init(&inject.tool, ordered_events); 2289 inject.tool.sample = perf_event__repipe_sample; 2290 inject.tool.read = perf_event__repipe_sample; 2291 inject.tool.mmap = perf_event__repipe; 2292 inject.tool.mmap2 = perf_event__repipe; 2293 inject.tool.comm = perf_event__repipe; 2294 inject.tool.namespaces = perf_event__repipe; 2295 inject.tool.cgroup = perf_event__repipe; 2296 inject.tool.fork = perf_event__repipe; 2297 inject.tool.exit = perf_event__repipe; 2298 inject.tool.lost = perf_event__repipe; 2299 inject.tool.lost_samples = perf_event__repipe; 2300 inject.tool.aux = perf_event__repipe; 2301 inject.tool.itrace_start = perf_event__repipe; 2302 inject.tool.aux_output_hw_id = perf_event__repipe; 2303 inject.tool.context_switch = perf_event__repipe; 2304 inject.tool.throttle = perf_event__repipe; 2305 inject.tool.unthrottle = perf_event__repipe; 2306 inject.tool.ksymbol = perf_event__repipe; 2307 inject.tool.bpf = perf_event__repipe; 2308 inject.tool.text_poke = perf_event__repipe; 2309 inject.tool.attr = perf_event__repipe_attr; 2310 inject.tool.event_update = perf_event__repipe_event_update; 2311 inject.tool.tracing_data = perf_event__repipe_op2_synth; 2312 inject.tool.finished_round = perf_event__repipe_oe_synth; 2313 inject.tool.build_id = perf_event__repipe_op2_synth; 2314 inject.tool.id_index = perf_event__repipe_op2_synth; 2315 inject.tool.auxtrace_info = perf_event__repipe_op2_synth; 2316 inject.tool.auxtrace_error = perf_event__repipe_op2_synth; 2317 inject.tool.time_conv = perf_event__repipe_op2_synth; 2318 inject.tool.thread_map = perf_event__repipe_op2_synth; 2319 inject.tool.cpu_map = perf_event__repipe_op2_synth; 2320 inject.tool.stat_config = perf_event__repipe_op2_synth; 2321 inject.tool.stat = perf_event__repipe_op2_synth; 2322 inject.tool.stat_round = perf_event__repipe_op2_synth; 2323 inject.tool.feature = perf_event__repipe_op2_synth; 2324 inject.tool.finished_init = perf_event__repipe_op2_synth; 2325 inject.tool.compressed = perf_event__repipe_op4_synth; 2326 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2327 inject.tool.dont_split_sample_group = true; 2328 inject.session = __perf_session__new(&data, repipe, 2329 output_fd(&inject), 2330 &inject.tool); 2331 if (IS_ERR(inject.session)) { 2332 ret = PTR_ERR(inject.session); 2333 goto out_close_output; 2334 } 2335 2336 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2337 pr_warning("Decompression initialization failed.\n"); 2338 2339 /* Save original section info before feature bits change */ 2340 ret = save_section_info(&inject); 2341 if (ret) 2342 goto out_delete; 2343 2344 if (!data.is_pipe && inject.output.is_pipe) { 2345 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2346 if (ret < 0) { 2347 pr_err("Couldn't write a new pipe header.\n"); 2348 goto out_delete; 2349 } 2350 2351 ret = perf_event__synthesize_for_pipe(&inject.tool, 2352 inject.session, 2353 &inject.output, 2354 perf_event__repipe); 2355 if (ret < 0) 2356 goto out_delete; 2357 } 2358 2359 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 2360 /* 2361 * to make sure the mmap records are ordered correctly 2362 * and so that the correct especially due to jitted code 2363 * mmaps. We cannot generate the buildid hit list and 2364 * inject the jit mmaps at the same time for now. 2365 */ 2366 inject.tool.ordering_requires_timestamps = true; 2367 } 2368 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) { 2369 inject.known_build_ids = 2370 perf_inject__parse_known_build_ids(known_build_ids); 2371 2372 if (inject.known_build_ids == NULL) { 2373 pr_err("Couldn't parse known build ids.\n"); 2374 goto out_delete; 2375 } 2376 } 2377 2378 #ifdef HAVE_JITDUMP 2379 if (inject.jit_mode) { 2380 inject.tool.mmap2 = perf_event__repipe_mmap2; 2381 inject.tool.mmap = perf_event__repipe_mmap; 2382 inject.tool.ordering_requires_timestamps = true; 2383 /* 2384 * JIT MMAP injection injects all MMAP events in one go, so it 2385 * does not obey finished_round semantics. 2386 */ 2387 inject.tool.finished_round = perf_event__drop_oe; 2388 } 2389 #endif 2390 ret = symbol__init(&inject.session->header.env); 2391 if (ret < 0) 2392 goto out_delete; 2393 2394 ret = __cmd_inject(&inject); 2395 2396 guest_session__exit(&inject.guest_session); 2397 2398 out_delete: 2399 strlist__delete(inject.known_build_ids); 2400 zstd_fini(&(inject.session->zstd_data)); 2401 perf_session__delete(inject.session); 2402 out_close_output: 2403 if (!inject.in_place_update) 2404 perf_data__close(&inject.output); 2405 free(inject.itrace_synth_opts.vm_tm_corr_args); 2406 free(inject.event_copy); 2407 free(inject.guest_session.ev.event_buf); 2408 return ret; 2409 } 2410