1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * builtin-inject.c
4 *
5 * Builtin inject command: Examine the live mode (stdin) event stream
6 * and repipe it to stdout while optionally injecting additional
7 * events into it.
8 */
9 #include "builtin.h"
10
11 #include "util/color.h"
12 #include "util/dso.h"
13 #include "util/vdso.h"
14 #include "util/evlist.h"
15 #include "util/evsel.h"
16 #include "util/map.h"
17 #include "util/session.h"
18 #include "util/tool.h"
19 #include "util/debug.h"
20 #include "util/build-id.h"
21 #include "util/data.h"
22 #include "util/auxtrace.h"
23 #include "util/jit.h"
24 #include "util/string2.h"
25 #include "util/symbol.h"
26 #include "util/synthetic-events.h"
27 #include "util/thread.h"
28 #include "util/namespaces.h"
29 #include "util/util.h"
30 #include "util/tsc.h"
31
32 #include <internal/lib.h>
33
34 #include <linux/err.h>
35 #include <subcmd/parse-options.h>
36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37
38 #include <linux/list.h>
39 #include <linux/string.h>
40 #include <linux/zalloc.h>
41 #include <linux/hash.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <signal.h>
45 #include <inttypes.h>
46
47 struct guest_event {
48 struct perf_sample sample;
49 union perf_event *event;
50 char *event_buf;
51 };
52
53 struct guest_id {
54 /* hlist_node must be first, see free_hlist() */
55 struct hlist_node node;
56 u64 id;
57 u64 host_id;
58 u32 vcpu;
59 };
60
61 struct guest_tid {
62 /* hlist_node must be first, see free_hlist() */
63 struct hlist_node node;
64 /* Thread ID of QEMU thread */
65 u32 tid;
66 u32 vcpu;
67 };
68
69 struct guest_vcpu {
70 /* Current host CPU */
71 u32 cpu;
72 /* Thread ID of QEMU thread */
73 u32 tid;
74 };
75
76 struct guest_session {
77 char *perf_data_file;
78 u32 machine_pid;
79 u64 time_offset;
80 double time_scale;
81 struct perf_tool tool;
82 struct perf_data data;
83 struct perf_session *session;
84 char *tmp_file_name;
85 int tmp_fd;
86 struct perf_tsc_conversion host_tc;
87 struct perf_tsc_conversion guest_tc;
88 bool copy_kcore_dir;
89 bool have_tc;
90 bool fetched;
91 bool ready;
92 u16 dflt_id_hdr_size;
93 u64 dflt_id;
94 u64 highest_id;
95 /* Array of guest_vcpu */
96 struct guest_vcpu *vcpu;
97 size_t vcpu_cnt;
98 /* Hash table for guest_id */
99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
100 /* Hash table for guest_tid */
101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE];
102 /* Place to stash next guest event */
103 struct guest_event ev;
104 };
105
106 enum build_id_rewrite_style {
107 BID_RWS__NONE = 0,
108 BID_RWS__INJECT_HEADER_LAZY,
109 BID_RWS__INJECT_HEADER_ALL,
110 BID_RWS__MMAP2_BUILDID_ALL,
111 BID_RWS__MMAP2_BUILDID_LAZY,
112 };
113
114 struct perf_inject {
115 struct perf_tool tool;
116 struct perf_session *session;
117 enum build_id_rewrite_style build_id_style;
118 bool sched_stat;
119 bool have_auxtrace;
120 bool strip;
121 bool jit_mode;
122 bool in_place_update;
123 bool in_place_update_dry_run;
124 bool copy_kcore_dir;
125 bool convert_callchain;
126 const char *input_name;
127 struct perf_data output;
128 u64 bytes_written;
129 u64 aux_id;
130 struct list_head samples;
131 struct itrace_synth_opts itrace_synth_opts;
132 char *event_copy;
133 struct perf_file_section secs[HEADER_FEAT_BITS];
134 struct guest_session guest_session;
135 struct strlist *known_build_ids;
136 const struct evsel *mmap_evsel;
137 struct ip_callchain *raw_callchain;
138 };
139
140 struct event_entry {
141 struct list_head node;
142 u32 tid;
143 union perf_event event[];
144 };
145
146 static int tool__inject_build_id(const struct perf_tool *tool,
147 struct perf_sample *sample,
148 struct machine *machine,
149 const struct evsel *evsel,
150 __u16 misc,
151 const char *filename,
152 struct dso *dso, u32 flags);
153 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
154 struct perf_sample *sample,
155 struct machine *machine,
156 const struct evsel *evsel,
157 __u16 misc,
158 __u32 pid, __u32 tid,
159 __u64 start, __u64 len, __u64 pgoff,
160 struct dso *dso,
161 __u32 prot, __u32 flags,
162 const char *filename);
163
output_bytes(struct perf_inject * inject,void * buf,size_t sz)164 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
165 {
166 ssize_t size;
167
168 size = perf_data__write(&inject->output, buf, sz);
169 if (size < 0)
170 return -errno;
171
172 inject->bytes_written += size;
173 return 0;
174 }
175
perf_event__repipe_synth(const struct perf_tool * tool,union perf_event * event)176 static int perf_event__repipe_synth(const struct perf_tool *tool,
177 union perf_event *event)
178
179 {
180 struct perf_inject *inject = container_of(tool, struct perf_inject,
181 tool);
182
183 return output_bytes(inject, event, event->header.size);
184 }
185
perf_event__repipe_oe_synth(const struct perf_tool * tool,union perf_event * event,struct ordered_events * oe __maybe_unused)186 static int perf_event__repipe_oe_synth(const struct perf_tool *tool,
187 union perf_event *event,
188 struct ordered_events *oe __maybe_unused)
189 {
190 return perf_event__repipe_synth(tool, event);
191 }
192
193 #ifdef HAVE_JITDUMP
perf_event__drop_oe(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct ordered_events * oe __maybe_unused)194 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
195 union perf_event *event __maybe_unused,
196 struct ordered_events *oe __maybe_unused)
197 {
198 return 0;
199 }
200 #endif
201
perf_event__repipe_op2_synth(const struct perf_tool * tool,struct perf_session * session __maybe_unused,union perf_event * event)202 static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
203 struct perf_session *session __maybe_unused,
204 union perf_event *event)
205 {
206 return perf_event__repipe_synth(tool, event);
207 }
208
perf_event__repipe_op4_synth(const struct perf_tool * tool,struct perf_session * session __maybe_unused,union perf_event * event,u64 data __maybe_unused,const char * str __maybe_unused)209 static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
210 struct perf_session *session __maybe_unused,
211 union perf_event *event,
212 u64 data __maybe_unused,
213 const char *str __maybe_unused)
214 {
215 return perf_event__repipe_synth(tool, event);
216 }
217
perf_event__repipe_attr(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)218 static int perf_event__repipe_attr(const struct perf_tool *tool,
219 union perf_event *event,
220 struct evlist **pevlist)
221 {
222 struct perf_inject *inject = container_of(tool, struct perf_inject,
223 tool);
224 int ret;
225
226 ret = perf_event__process_attr(tool, event, pevlist);
227 if (ret)
228 return ret;
229
230 /* If the output isn't a pipe then the attributes will be written as part of the header. */
231 if (!inject->output.is_pipe)
232 return 0;
233
234 return perf_event__repipe_synth(tool, event);
235 }
236
perf_event__repipe_event_update(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist __maybe_unused)237 static int perf_event__repipe_event_update(const struct perf_tool *tool,
238 union perf_event *event,
239 struct evlist **pevlist __maybe_unused)
240 {
241 return perf_event__repipe_synth(tool, event);
242 }
243
copy_bytes(struct perf_inject * inject,struct perf_data * data,off_t size)244 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
245 {
246 char buf[4096];
247 ssize_t ssz;
248 int ret;
249
250 while (size > 0) {
251 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
252 if (ssz < 0)
253 return -errno;
254 ret = output_bytes(inject, buf, ssz);
255 if (ret)
256 return ret;
257 size -= ssz;
258 }
259
260 return 0;
261 }
262
perf_event__repipe_auxtrace(const struct perf_tool * tool,struct perf_session * session,union perf_event * event)263 static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
264 struct perf_session *session,
265 union perf_event *event)
266 {
267 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
268 int ret;
269
270 inject->have_auxtrace = true;
271
272 if (!inject->output.is_pipe) {
273 off_t offset;
274
275 offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
276 if (offset == -1)
277 return -errno;
278 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
279 event, offset);
280 if (ret < 0)
281 return ret;
282 }
283
284 if (perf_data__is_pipe(session->data) || !session->one_mmap) {
285 ret = output_bytes(inject, event, event->header.size);
286 if (ret < 0)
287 return ret;
288 ret = copy_bytes(inject, session->data,
289 event->auxtrace.size);
290 } else {
291 ret = output_bytes(inject, event,
292 event->header.size + event->auxtrace.size);
293 }
294 if (ret < 0)
295 return ret;
296
297 return event->auxtrace.size;
298 }
299
perf_event__repipe(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)300 static int perf_event__repipe(const struct perf_tool *tool,
301 union perf_event *event,
302 struct perf_sample *sample __maybe_unused,
303 struct machine *machine __maybe_unused)
304 {
305 return perf_event__repipe_synth(tool, event);
306 }
307
perf_event__drop(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)308 static int perf_event__drop(const struct perf_tool *tool __maybe_unused,
309 union perf_event *event __maybe_unused,
310 struct perf_sample *sample __maybe_unused,
311 struct machine *machine __maybe_unused)
312 {
313 return 0;
314 }
315
perf_event__drop_aux(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)316 static int perf_event__drop_aux(const struct perf_tool *tool,
317 union perf_event *event __maybe_unused,
318 struct perf_sample *sample,
319 struct machine *machine __maybe_unused)
320 {
321 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
322
323 if (!inject->aux_id)
324 inject->aux_id = sample->id;
325
326 return 0;
327 }
328
329 static union perf_event *
perf_inject__cut_auxtrace_sample(struct perf_inject * inject,union perf_event * event,struct perf_sample * sample)330 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
331 union perf_event *event,
332 struct perf_sample *sample)
333 {
334 size_t sz1 = sample->aux_sample.data - (void *)event;
335 size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
336 union perf_event *ev;
337
338 if (inject->event_copy == NULL) {
339 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
340 if (!inject->event_copy)
341 return ERR_PTR(-ENOMEM);
342 }
343 ev = (union perf_event *)inject->event_copy;
344 if (sz1 > event->header.size || sz2 > event->header.size ||
345 sz1 + sz2 > event->header.size ||
346 sz1 < sizeof(struct perf_event_header) + sizeof(u64))
347 return event;
348
349 memcpy(ev, event, sz1);
350 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
351 ev->header.size = sz1 + sz2;
352 ((u64 *)((void *)ev + sz1))[-1] = 0;
353
354 return ev;
355 }
356
357 typedef int (*inject_handler)(const struct perf_tool *tool,
358 union perf_event *event,
359 struct perf_sample *sample,
360 struct evsel *evsel,
361 struct machine *machine);
362
perf_event__repipe_sample(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)363 static int perf_event__repipe_sample(const struct perf_tool *tool,
364 union perf_event *event,
365 struct perf_sample *sample,
366 struct evsel *evsel,
367 struct machine *machine)
368 {
369 struct perf_inject *inject = container_of(tool, struct perf_inject,
370 tool);
371
372 if (evsel && evsel->handler) {
373 inject_handler f = evsel->handler;
374 return f(tool, event, sample, evsel, machine);
375 }
376
377 build_id__mark_dso_hit(tool, event, sample, evsel, machine);
378
379 if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
380 event = perf_inject__cut_auxtrace_sample(inject, event, sample);
381 if (IS_ERR(event))
382 return PTR_ERR(event);
383 }
384
385 return perf_event__repipe_synth(tool, event);
386 }
387
perf_event__convert_sample_callchain(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)388 static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
389 union perf_event *event,
390 struct perf_sample *sample,
391 struct evsel *evsel,
392 struct machine *machine)
393 {
394 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
395 struct callchain_cursor *cursor = get_tls_callchain_cursor();
396 union perf_event *event_copy = (void *)inject->event_copy;
397 struct callchain_cursor_node *node;
398 struct thread *thread;
399 u64 sample_type = evsel->core.attr.sample_type;
400 u32 sample_size = event->header.size;
401 u64 i, k;
402 int ret;
403
404 if (event_copy == NULL) {
405 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
406 if (!inject->event_copy)
407 return -ENOMEM;
408
409 event_copy = (void *)inject->event_copy;
410 }
411
412 if (cursor == NULL)
413 return -ENOMEM;
414
415 callchain_cursor_reset(cursor);
416
417 thread = machine__find_thread(machine, sample->tid, sample->pid);
418 if (thread == NULL)
419 goto out;
420
421 /* this will parse DWARF using stack and register data */
422 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
423 /*parent=*/NULL, /*root_al=*/NULL,
424 PERF_MAX_STACK_DEPTH);
425 thread__put(thread);
426 if (ret != 0)
427 goto out;
428
429 /* copy kernel callchain and context entries */
430 for (i = 0; i < sample->callchain->nr; i++) {
431 inject->raw_callchain->ips[i] = sample->callchain->ips[i];
432 if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
433 i++;
434 break;
435 }
436 }
437 if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
438 inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
439
440 node = cursor->first;
441 for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
442 if (machine__kernel_ip(machine, node->ip))
443 /* kernel IPs were added already */;
444 else if (node->ms.sym && node->ms.sym->inlined)
445 /* we can't handle inlined callchains */;
446 else
447 inject->raw_callchain->ips[i++] = node->ip;
448
449 node = node->next;
450 }
451
452 inject->raw_callchain->nr = i;
453 sample->callchain = inject->raw_callchain;
454
455 out:
456 memcpy(event_copy, event, sizeof(event->header));
457
458 /* adjust sample size for stack and regs */
459 sample_size -= sample->user_stack.size;
460 sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
461 sample_size += (sample->callchain->nr + 1) * sizeof(u64);
462 event_copy->header.size = sample_size;
463
464 /* remove sample_type {STACK,REGS}_USER for synthesize */
465 sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
466
467 perf_event__synthesize_sample(event_copy, sample_type,
468 evsel->core.attr.read_format, sample);
469 return perf_event__repipe_synth(tool, event_copy);
470 }
471
findnew_dso(int pid,int tid,const char * filename,const struct dso_id * id,struct machine * machine)472 static struct dso *findnew_dso(int pid, int tid, const char *filename,
473 const struct dso_id *id, struct machine *machine)
474 {
475 struct thread *thread;
476 struct nsinfo *nsi = NULL;
477 struct nsinfo *nnsi;
478 struct dso *dso;
479 bool vdso;
480
481 thread = machine__findnew_thread(machine, pid, tid);
482 if (thread == NULL) {
483 pr_err("cannot find or create a task %d/%d.\n", tid, pid);
484 return NULL;
485 }
486
487 vdso = is_vdso_map(filename);
488 nsi = nsinfo__get(thread__nsinfo(thread));
489
490 if (vdso) {
491 /* The vdso maps are always on the host and not the
492 * container. Ensure that we don't use setns to look
493 * them up.
494 */
495 nnsi = nsinfo__copy(nsi);
496 if (nnsi) {
497 nsinfo__put(nsi);
498 nsinfo__clear_need_setns(nnsi);
499 nsi = nnsi;
500 }
501 dso = machine__findnew_vdso(machine, thread);
502 } else {
503 dso = machine__findnew_dso_id(machine, filename, id);
504 }
505
506 if (dso) {
507 mutex_lock(dso__lock(dso));
508 dso__set_nsinfo(dso, nsi);
509 mutex_unlock(dso__lock(dso));
510 } else
511 nsinfo__put(nsi);
512
513 thread__put(thread);
514 return dso;
515 }
516
517 /*
518 * The evsel used for the sample ID for mmap events. Typically stashed when
519 * processing mmap events. If not stashed, search the evlist for the first mmap
520 * gathering event.
521 */
inject__mmap_evsel(struct perf_inject * inject)522 static const struct evsel *inject__mmap_evsel(struct perf_inject *inject)
523 {
524 struct evsel *pos;
525
526 if (inject->mmap_evsel)
527 return inject->mmap_evsel;
528
529 evlist__for_each_entry(inject->session->evlist, pos) {
530 if (pos->core.attr.mmap) {
531 inject->mmap_evsel = pos;
532 return pos;
533 }
534 }
535 pr_err("No mmap events found\n");
536 return NULL;
537 }
538
perf_event__repipe_common_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine,__u32 pid,__u32 tid,__u64 start,__u64 len,__u64 pgoff,__u32 flags,__u32 prot,const char * filename,const struct dso_id * dso_id,int (* perf_event_process)(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine))539 static int perf_event__repipe_common_mmap(const struct perf_tool *tool,
540 union perf_event *event,
541 struct perf_sample *sample,
542 struct machine *machine,
543 __u32 pid, __u32 tid,
544 __u64 start, __u64 len, __u64 pgoff,
545 __u32 flags, __u32 prot,
546 const char *filename,
547 const struct dso_id *dso_id,
548 int (*perf_event_process)(const struct perf_tool *tool,
549 union perf_event *event,
550 struct perf_sample *sample,
551 struct machine *machine))
552 {
553 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
554 struct dso *dso = NULL;
555 bool dso_sought = false;
556
557 #ifdef HAVE_JITDUMP
558 if (inject->jit_mode) {
559 u64 n = 0;
560 int ret;
561
562 /* If jit marker, then inject jit mmaps and generate ELF images. */
563 ret = jit_process(inject->session, &inject->output, machine,
564 filename, pid, tid, &n);
565 if (ret < 0)
566 return ret;
567 if (ret) {
568 inject->bytes_written += n;
569 return 0;
570 }
571 }
572 #endif
573 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
574 dso = findnew_dso(pid, tid, filename, dso_id, machine);
575 dso_sought = true;
576 if (dso) {
577 /* mark it not to inject build-id */
578 dso__set_hit(dso);
579 }
580 }
581 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) {
582 if (!dso_sought) {
583 dso = findnew_dso(pid, tid, filename, dso_id, machine);
584 dso_sought = true;
585 }
586
587 if (dso && !dso__hit(dso)) {
588 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
589
590 if (evsel) {
591 dso__set_hit(dso);
592 tool__inject_build_id(tool, sample, machine, evsel,
593 /*misc=*/sample->cpumode,
594 filename, dso, flags);
595 }
596 }
597 } else {
598 int err;
599
600 /*
601 * Remember the evsel for lazy build id generation. It is used
602 * for the sample id header type.
603 */
604 if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
605 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) &&
606 !inject->mmap_evsel)
607 inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event);
608
609 /* Create the thread, map, etc. Not done for the unordered inject all case. */
610 err = perf_event_process(tool, event, sample, machine);
611
612 if (err) {
613 dso__put(dso);
614 return err;
615 }
616 }
617 if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) &&
618 !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
619 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
620
621 if (evsel && !dso_sought) {
622 dso = findnew_dso(pid, tid, filename, dso_id, machine);
623 dso_sought = true;
624 }
625 if (evsel && dso &&
626 !tool__inject_mmap2_build_id(tool, sample, machine, evsel,
627 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID,
628 pid, tid, start, len, pgoff,
629 dso,
630 prot, flags,
631 filename)) {
632 /* Injected mmap2 so no need to repipe. */
633 dso__put(dso);
634 return 0;
635 }
636 }
637 dso__put(dso);
638 if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY)
639 return 0;
640
641 return perf_event__repipe(tool, event, sample, machine);
642 }
643
perf_event__repipe_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)644 static int perf_event__repipe_mmap(const struct perf_tool *tool,
645 union perf_event *event,
646 struct perf_sample *sample,
647 struct machine *machine)
648 {
649 return perf_event__repipe_common_mmap(
650 tool, event, sample, machine,
651 event->mmap.pid, event->mmap.tid,
652 event->mmap.start, event->mmap.len, event->mmap.pgoff,
653 /*flags=*/0, PROT_EXEC,
654 event->mmap.filename, /*dso_id=*/NULL,
655 perf_event__process_mmap);
656 }
657
perf_event__repipe_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)658 static int perf_event__repipe_mmap2(const struct perf_tool *tool,
659 union perf_event *event,
660 struct perf_sample *sample,
661 struct machine *machine)
662 {
663 struct dso_id id = dso_id_empty;
664
665 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
666 build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size);
667 } else {
668 id.maj = event->mmap2.maj;
669 id.min = event->mmap2.min;
670 id.ino = event->mmap2.ino;
671 id.ino_generation = event->mmap2.ino_generation;
672 id.mmap2_valid = true;
673 id.mmap2_ino_generation_valid = true;
674 }
675
676 return perf_event__repipe_common_mmap(
677 tool, event, sample, machine,
678 event->mmap2.pid, event->mmap2.tid,
679 event->mmap2.start, event->mmap2.len, event->mmap2.pgoff,
680 event->mmap2.flags, event->mmap2.prot,
681 event->mmap2.filename, &id,
682 perf_event__process_mmap2);
683 }
684
perf_event__repipe_fork(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)685 static int perf_event__repipe_fork(const struct perf_tool *tool,
686 union perf_event *event,
687 struct perf_sample *sample,
688 struct machine *machine)
689 {
690 int err;
691
692 err = perf_event__process_fork(tool, event, sample, machine);
693 perf_event__repipe(tool, event, sample, machine);
694
695 return err;
696 }
697
perf_event__repipe_comm(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)698 static int perf_event__repipe_comm(const struct perf_tool *tool,
699 union perf_event *event,
700 struct perf_sample *sample,
701 struct machine *machine)
702 {
703 int err;
704
705 err = perf_event__process_comm(tool, event, sample, machine);
706 perf_event__repipe(tool, event, sample, machine);
707
708 return err;
709 }
710
perf_event__repipe_namespaces(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)711 static int perf_event__repipe_namespaces(const struct perf_tool *tool,
712 union perf_event *event,
713 struct perf_sample *sample,
714 struct machine *machine)
715 {
716 int err = perf_event__process_namespaces(tool, event, sample, machine);
717
718 perf_event__repipe(tool, event, sample, machine);
719
720 return err;
721 }
722
perf_event__repipe_exit(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)723 static int perf_event__repipe_exit(const struct perf_tool *tool,
724 union perf_event *event,
725 struct perf_sample *sample,
726 struct machine *machine)
727 {
728 int err;
729
730 err = perf_event__process_exit(tool, event, sample, machine);
731 perf_event__repipe(tool, event, sample, machine);
732
733 return err;
734 }
735
736 #ifdef HAVE_LIBTRACEEVENT
perf_event__repipe_tracing_data(const struct perf_tool * tool,struct perf_session * session,union perf_event * event)737 static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
738 struct perf_session *session,
739 union perf_event *event)
740 {
741 perf_event__repipe_synth(tool, event);
742
743 return perf_event__process_tracing_data(tool, session, event);
744 }
745 #endif
746
dso__read_build_id(struct dso * dso)747 static int dso__read_build_id(struct dso *dso)
748 {
749 struct nscookie nsc;
750 struct build_id bid = { .size = 0, };
751
752 if (dso__has_build_id(dso))
753 return 0;
754
755 mutex_lock(dso__lock(dso));
756 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
757 if (filename__read_build_id(dso__long_name(dso), &bid) > 0)
758 dso__set_build_id(dso, &bid);
759 else if (dso__nsinfo(dso)) {
760 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
761
762 if (new_name && filename__read_build_id(new_name, &bid) > 0)
763 dso__set_build_id(dso, &bid);
764 free(new_name);
765 }
766 nsinfo__mountns_exit(&nsc);
767 mutex_unlock(dso__lock(dso));
768
769 return dso__has_build_id(dso) ? 0 : -1;
770 }
771
perf_inject__parse_known_build_ids(const char * known_build_ids_string)772 static struct strlist *perf_inject__parse_known_build_ids(
773 const char *known_build_ids_string)
774 {
775 struct str_node *pos, *tmp;
776 struct strlist *known_build_ids;
777 int bid_len;
778
779 known_build_ids = strlist__new(known_build_ids_string, NULL);
780 if (known_build_ids == NULL)
781 return NULL;
782 strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
783 const char *build_id, *dso_name;
784
785 build_id = skip_spaces(pos->s);
786 dso_name = strchr(build_id, ' ');
787 if (dso_name == NULL) {
788 strlist__remove(known_build_ids, pos);
789 continue;
790 }
791 bid_len = dso_name - pos->s;
792 dso_name = skip_spaces(dso_name);
793 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
794 strlist__remove(known_build_ids, pos);
795 continue;
796 }
797 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
798 if (!isxdigit(build_id[2 * ix]) ||
799 !isxdigit(build_id[2 * ix + 1])) {
800 strlist__remove(known_build_ids, pos);
801 break;
802 }
803 }
804 }
805 return known_build_ids;
806 }
807
perf_inject__lookup_known_build_id(struct perf_inject * inject,struct dso * dso)808 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
809 struct dso *dso)
810 {
811 struct str_node *pos;
812
813 strlist__for_each_entry(pos, inject->known_build_ids) {
814 struct build_id bid;
815 const char *build_id, *dso_name;
816 size_t bid_len;
817
818 build_id = skip_spaces(pos->s);
819 dso_name = strchr(build_id, ' ');
820 bid_len = dso_name - pos->s;
821 if (bid_len > sizeof(bid.data))
822 bid_len = sizeof(bid.data);
823 dso_name = skip_spaces(dso_name);
824 if (strcmp(dso__long_name(dso), dso_name))
825 continue;
826 for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) {
827 bid.data[ix] = (hex(build_id[2 * ix]) << 4 |
828 hex(build_id[2 * ix + 1]));
829 }
830 bid.size = bid_len / 2;
831 dso__set_build_id(dso, &bid);
832 return true;
833 }
834 return false;
835 }
836
tool__inject_build_id(const struct perf_tool * tool,struct perf_sample * sample,struct machine * machine,const struct evsel * evsel,__u16 misc,const char * filename,struct dso * dso,u32 flags)837 static int tool__inject_build_id(const struct perf_tool *tool,
838 struct perf_sample *sample,
839 struct machine *machine,
840 const struct evsel *evsel,
841 __u16 misc,
842 const char *filename,
843 struct dso *dso, u32 flags)
844 {
845 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
846 int err;
847
848 if (is_anon_memory(filename) || flags & MAP_HUGETLB)
849 return 0;
850 if (is_no_dso_memory(filename))
851 return 0;
852
853 if (inject->known_build_ids != NULL &&
854 perf_inject__lookup_known_build_id(inject, dso))
855 return 1;
856
857 if (dso__read_build_id(dso) < 0) {
858 pr_debug("no build_id found for %s\n", filename);
859 return -1;
860 }
861
862 err = perf_event__synthesize_build_id(tool, sample, machine,
863 perf_event__repipe,
864 evsel, misc, dso__bid(dso),
865 filename);
866 if (err) {
867 pr_err("Can't synthesize build_id event for %s\n", filename);
868 return -1;
869 }
870
871 return 0;
872 }
873
tool__inject_mmap2_build_id(const struct perf_tool * tool,struct perf_sample * sample,struct machine * machine,const struct evsel * evsel,__u16 misc,__u32 pid,__u32 tid,__u64 start,__u64 len,__u64 pgoff,struct dso * dso,__u32 prot,__u32 flags,const char * filename)874 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
875 struct perf_sample *sample,
876 struct machine *machine,
877 const struct evsel *evsel,
878 __u16 misc,
879 __u32 pid, __u32 tid,
880 __u64 start, __u64 len, __u64 pgoff,
881 struct dso *dso,
882 __u32 prot, __u32 flags,
883 const char *filename)
884 {
885 int err;
886
887 /* Return to repipe anonymous maps. */
888 if (is_anon_memory(filename) || flags & MAP_HUGETLB)
889 return 1;
890 if (is_no_dso_memory(filename))
891 return 1;
892
893 if (dso__read_build_id(dso)) {
894 pr_debug("no build_id found for %s\n", filename);
895 return -1;
896 }
897
898 err = perf_event__synthesize_mmap2_build_id(tool, sample, machine,
899 perf_event__repipe,
900 evsel,
901 misc, pid, tid,
902 start, len, pgoff,
903 dso__bid(dso),
904 prot, flags,
905 filename);
906 if (err) {
907 pr_err("Can't synthesize build_id event for %s\n", filename);
908 return -1;
909 }
910 return 0;
911 }
912
mark_dso_hit(const struct perf_inject * inject,const struct perf_tool * tool,struct perf_sample * sample,struct machine * machine,const struct evsel * mmap_evsel,struct map * map,bool sample_in_dso)913 static int mark_dso_hit(const struct perf_inject *inject,
914 const struct perf_tool *tool,
915 struct perf_sample *sample,
916 struct machine *machine,
917 const struct evsel *mmap_evsel,
918 struct map *map, bool sample_in_dso)
919 {
920 struct dso *dso;
921 u16 misc = sample->cpumode;
922
923 if (!map)
924 return 0;
925
926 if (!sample_in_dso) {
927 u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL |
928 PERF_RECORD_MISC_GUEST_USER;
929
930 if ((misc & guest_mask) != 0) {
931 misc &= PERF_RECORD_MISC_HYPERVISOR;
932 misc |= __map__is_kernel(map)
933 ? PERF_RECORD_MISC_GUEST_KERNEL
934 : PERF_RECORD_MISC_GUEST_USER;
935 } else {
936 misc &= PERF_RECORD_MISC_HYPERVISOR;
937 misc |= __map__is_kernel(map)
938 ? PERF_RECORD_MISC_KERNEL
939 : PERF_RECORD_MISC_USER;
940 }
941 }
942 dso = map__dso(map);
943 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
944 if (dso && !dso__hit(dso)) {
945 dso__set_hit(dso);
946 tool__inject_build_id(tool, sample, machine,
947 mmap_evsel, misc, dso__long_name(dso), dso,
948 map__flags(map));
949 }
950 } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
951 if (!map__hit(map)) {
952 const struct build_id null_bid = { .size = 0 };
953 const struct build_id *bid = dso ? dso__bid(dso) : &null_bid;
954 const char *filename = dso ? dso__long_name(dso) : "";
955
956 map__set_hit(map);
957 perf_event__synthesize_mmap2_build_id(tool, sample, machine,
958 perf_event__repipe,
959 mmap_evsel,
960 misc,
961 sample->pid, sample->tid,
962 map__start(map),
963 map__end(map) - map__start(map),
964 map__pgoff(map),
965 bid,
966 map__prot(map),
967 map__flags(map),
968 filename);
969 }
970 }
971 return 0;
972 }
973
974 struct mark_dso_hit_args {
975 const struct perf_inject *inject;
976 const struct perf_tool *tool;
977 struct perf_sample *sample;
978 struct machine *machine;
979 const struct evsel *mmap_evsel;
980 };
981
mark_dso_hit_callback(struct callchain_cursor_node * node,void * data)982 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data)
983 {
984 struct mark_dso_hit_args *args = data;
985 struct map *map = node->ms.map;
986
987 return mark_dso_hit(args->inject, args->tool, args->sample, args->machine,
988 args->mmap_evsel, map, /*sample_in_dso=*/false);
989 }
990
perf_event__inject_buildid(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel __maybe_unused,struct machine * machine)991 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
992 struct perf_sample *sample,
993 struct evsel *evsel __maybe_unused,
994 struct machine *machine)
995 {
996 struct addr_location al;
997 struct thread *thread;
998 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
999 struct mark_dso_hit_args args = {
1000 .inject = inject,
1001 .tool = tool,
1002 /*
1003 * Use the parsed sample data of the sample event, which will
1004 * have a later timestamp than the mmap event.
1005 */
1006 .sample = sample,
1007 .machine = machine,
1008 .mmap_evsel = inject__mmap_evsel(inject),
1009 };
1010
1011 addr_location__init(&al);
1012 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
1013 if (thread == NULL) {
1014 pr_err("problem processing %d event, skipping it.\n",
1015 event->header.type);
1016 goto repipe;
1017 }
1018
1019 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
1020 mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map,
1021 /*sample_in_dso=*/true);
1022 }
1023
1024 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
1025 /*symbols=*/false, mark_dso_hit_callback, &args);
1026
1027 thread__put(thread);
1028 repipe:
1029 perf_event__repipe(tool, event, sample, machine);
1030 addr_location__exit(&al);
1031 return 0;
1032 }
1033
perf_inject__sched_process_exit(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct evsel * evsel __maybe_unused,struct machine * machine __maybe_unused)1034 static int perf_inject__sched_process_exit(const struct perf_tool *tool,
1035 union perf_event *event __maybe_unused,
1036 struct perf_sample *sample,
1037 struct evsel *evsel __maybe_unused,
1038 struct machine *machine __maybe_unused)
1039 {
1040 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1041 struct event_entry *ent;
1042
1043 list_for_each_entry(ent, &inject->samples, node) {
1044 if (sample->tid == ent->tid) {
1045 list_del_init(&ent->node);
1046 free(ent);
1047 break;
1048 }
1049 }
1050
1051 return 0;
1052 }
1053
perf_inject__sched_switch(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1054 static int perf_inject__sched_switch(const struct perf_tool *tool,
1055 union perf_event *event,
1056 struct perf_sample *sample,
1057 struct evsel *evsel,
1058 struct machine *machine)
1059 {
1060 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1061 struct event_entry *ent;
1062
1063 perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
1064
1065 ent = malloc(event->header.size + sizeof(struct event_entry));
1066 if (ent == NULL) {
1067 color_fprintf(stderr, PERF_COLOR_RED,
1068 "Not enough memory to process sched switch event!");
1069 return -1;
1070 }
1071
1072 ent->tid = sample->tid;
1073 memcpy(&ent->event, event, event->header.size);
1074 list_add(&ent->node, &inject->samples);
1075 return 0;
1076 }
1077
1078 #ifdef HAVE_LIBTRACEEVENT
perf_inject__sched_stat(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1079 static int perf_inject__sched_stat(const struct perf_tool *tool,
1080 union perf_event *event __maybe_unused,
1081 struct perf_sample *sample,
1082 struct evsel *evsel,
1083 struct machine *machine)
1084 {
1085 struct event_entry *ent;
1086 union perf_event *event_sw;
1087 struct perf_sample sample_sw;
1088 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1089 u32 pid = evsel__intval(evsel, sample, "pid");
1090
1091 list_for_each_entry(ent, &inject->samples, node) {
1092 if (pid == ent->tid)
1093 goto found;
1094 }
1095
1096 return 0;
1097 found:
1098 event_sw = &ent->event[0];
1099 evsel__parse_sample(evsel, event_sw, &sample_sw);
1100
1101 sample_sw.period = sample->period;
1102 sample_sw.time = sample->time;
1103 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
1104 evsel->core.attr.read_format, &sample_sw);
1105 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
1106 return perf_event__repipe(tool, event_sw, &sample_sw, machine);
1107 }
1108 #endif
1109
guest_session__vcpu(struct guest_session * gs,u32 vcpu)1110 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
1111 {
1112 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
1113 return NULL;
1114 return &gs->vcpu[vcpu];
1115 }
1116
guest_session__output_bytes(struct guest_session * gs,void * buf,size_t sz)1117 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
1118 {
1119 ssize_t ret = writen(gs->tmp_fd, buf, sz);
1120
1121 return ret < 0 ? ret : 0;
1122 }
1123
guest_session__repipe(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1124 static int guest_session__repipe(const struct perf_tool *tool,
1125 union perf_event *event,
1126 struct perf_sample *sample __maybe_unused,
1127 struct machine *machine __maybe_unused)
1128 {
1129 struct guest_session *gs = container_of(tool, struct guest_session, tool);
1130
1131 return guest_session__output_bytes(gs, event, event->header.size);
1132 }
1133
guest_session__map_tid(struct guest_session * gs,u32 tid,u32 vcpu)1134 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
1135 {
1136 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
1137 int hash;
1138
1139 if (!guest_tid)
1140 return -ENOMEM;
1141
1142 guest_tid->tid = tid;
1143 guest_tid->vcpu = vcpu;
1144 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
1145 hlist_add_head(&guest_tid->node, &gs->tids[hash]);
1146
1147 return 0;
1148 }
1149
host_peek_vm_comms_cb(struct perf_session * session __maybe_unused,union perf_event * event,u64 offset __maybe_unused,void * data)1150 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
1151 union perf_event *event,
1152 u64 offset __maybe_unused, void *data)
1153 {
1154 struct guest_session *gs = data;
1155 unsigned int vcpu;
1156 struct guest_vcpu *guest_vcpu;
1157 int ret;
1158
1159 if (event->header.type != PERF_RECORD_COMM ||
1160 event->comm.pid != gs->machine_pid)
1161 return 0;
1162
1163 /*
1164 * QEMU option -name debug-threads=on, causes thread names formatted as
1165 * below, although it is not an ABI. Also libvirt seems to use this by
1166 * default. Here we rely on it to tell us which thread is which VCPU.
1167 */
1168 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
1169 if (ret <= 0)
1170 return ret;
1171 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
1172 event->comm.tid, event->comm.comm, vcpu);
1173 if (vcpu > INT_MAX) {
1174 pr_err("Invalid VCPU %u\n", vcpu);
1175 return -EINVAL;
1176 }
1177 guest_vcpu = guest_session__vcpu(gs, vcpu);
1178 if (!guest_vcpu)
1179 return -ENOMEM;
1180 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
1181 pr_err("Fatal error: Two threads found with the same VCPU\n");
1182 return -EINVAL;
1183 }
1184 guest_vcpu->tid = event->comm.tid;
1185
1186 return guest_session__map_tid(gs, event->comm.tid, vcpu);
1187 }
1188
host_peek_vm_comms(struct perf_session * session,struct guest_session * gs)1189 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
1190 {
1191 return perf_session__peek_events(session, session->header.data_offset,
1192 session->header.data_size,
1193 host_peek_vm_comms_cb, gs);
1194 }
1195
evlist__is_id_used(struct evlist * evlist,u64 id)1196 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
1197 {
1198 return evlist__id2sid(evlist, id);
1199 }
1200
guest_session__allocate_new_id(struct guest_session * gs,struct evlist * host_evlist)1201 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
1202 {
1203 do {
1204 gs->highest_id += 1;
1205 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
1206
1207 return gs->highest_id;
1208 }
1209
guest_session__map_id(struct guest_session * gs,u64 id,u64 host_id,u32 vcpu)1210 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
1211 {
1212 struct guest_id *guest_id = zalloc(sizeof(*guest_id));
1213 int hash;
1214
1215 if (!guest_id)
1216 return -ENOMEM;
1217
1218 guest_id->id = id;
1219 guest_id->host_id = host_id;
1220 guest_id->vcpu = vcpu;
1221 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
1222 hlist_add_head(&guest_id->node, &gs->heads[hash]);
1223
1224 return 0;
1225 }
1226
evlist__find_highest_id(struct evlist * evlist)1227 static u64 evlist__find_highest_id(struct evlist *evlist)
1228 {
1229 struct evsel *evsel;
1230 u64 highest_id = 1;
1231
1232 evlist__for_each_entry(evlist, evsel) {
1233 u32 j;
1234
1235 for (j = 0; j < evsel->core.ids; j++) {
1236 u64 id = evsel->core.id[j];
1237
1238 if (id > highest_id)
1239 highest_id = id;
1240 }
1241 }
1242
1243 return highest_id;
1244 }
1245
guest_session__map_ids(struct guest_session * gs,struct evlist * host_evlist)1246 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
1247 {
1248 struct evlist *evlist = gs->session->evlist;
1249 struct evsel *evsel;
1250 int ret;
1251
1252 evlist__for_each_entry(evlist, evsel) {
1253 u32 j;
1254
1255 for (j = 0; j < evsel->core.ids; j++) {
1256 struct perf_sample_id *sid;
1257 u64 host_id;
1258 u64 id;
1259
1260 id = evsel->core.id[j];
1261 sid = evlist__id2sid(evlist, id);
1262 if (!sid || sid->cpu.cpu == -1)
1263 continue;
1264 host_id = guest_session__allocate_new_id(gs, host_evlist);
1265 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1266 if (ret)
1267 return ret;
1268 }
1269 }
1270
1271 return 0;
1272 }
1273
guest_session__lookup_id(struct guest_session * gs,u64 id)1274 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1275 {
1276 struct hlist_head *head;
1277 struct guest_id *guest_id;
1278 int hash;
1279
1280 hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1281 head = &gs->heads[hash];
1282
1283 hlist_for_each_entry(guest_id, head, node)
1284 if (guest_id->id == id)
1285 return guest_id;
1286
1287 return NULL;
1288 }
1289
process_attr(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1290 static int process_attr(const struct perf_tool *tool, union perf_event *event,
1291 struct perf_sample *sample __maybe_unused,
1292 struct machine *machine __maybe_unused)
1293 {
1294 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1295
1296 return perf_event__process_attr(tool, event, &inject->session->evlist);
1297 }
1298
guest_session__add_attr(struct guest_session * gs,struct evsel * evsel)1299 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1300 {
1301 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1302 struct perf_event_attr attr = evsel->core.attr;
1303 u64 *id_array;
1304 u32 *vcpu_array;
1305 int ret = -ENOMEM;
1306 u32 i;
1307
1308 id_array = calloc(evsel->core.ids, sizeof(*id_array));
1309 if (!id_array)
1310 return -ENOMEM;
1311
1312 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1313 if (!vcpu_array)
1314 goto out;
1315
1316 for (i = 0; i < evsel->core.ids; i++) {
1317 u64 id = evsel->core.id[i];
1318 struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1319
1320 if (!guest_id) {
1321 pr_err("Failed to find guest id %"PRIu64"\n", id);
1322 ret = -EINVAL;
1323 goto out;
1324 }
1325 id_array[i] = guest_id->host_id;
1326 vcpu_array[i] = guest_id->vcpu;
1327 }
1328
1329 attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1330 attr.exclude_host = 1;
1331 attr.exclude_guest = 0;
1332
1333 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1334 id_array, process_attr);
1335 if (ret)
1336 pr_err("Failed to add guest attr.\n");
1337
1338 for (i = 0; i < evsel->core.ids; i++) {
1339 struct perf_sample_id *sid;
1340 u32 vcpu = vcpu_array[i];
1341
1342 sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1343 /* Guest event is per-thread from the host point of view */
1344 sid->cpu.cpu = -1;
1345 sid->tid = gs->vcpu[vcpu].tid;
1346 sid->machine_pid = gs->machine_pid;
1347 sid->vcpu.cpu = vcpu;
1348 }
1349 out:
1350 free(vcpu_array);
1351 free(id_array);
1352 return ret;
1353 }
1354
guest_session__add_attrs(struct guest_session * gs)1355 static int guest_session__add_attrs(struct guest_session *gs)
1356 {
1357 struct evlist *evlist = gs->session->evlist;
1358 struct evsel *evsel;
1359 int ret;
1360
1361 evlist__for_each_entry(evlist, evsel) {
1362 ret = guest_session__add_attr(gs, evsel);
1363 if (ret)
1364 return ret;
1365 }
1366
1367 return 0;
1368 }
1369
synthesize_id_index(struct perf_inject * inject,size_t new_cnt)1370 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1371 {
1372 struct perf_session *session = inject->session;
1373 struct evlist *evlist = session->evlist;
1374 struct machine *machine = &session->machines.host;
1375 size_t from = evlist->core.nr_entries - new_cnt;
1376
1377 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1378 evlist, machine, from);
1379 }
1380
guest_session__lookup_tid(struct guest_session * gs,u32 tid)1381 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1382 {
1383 struct hlist_head *head;
1384 struct guest_tid *guest_tid;
1385 int hash;
1386
1387 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1388 head = &gs->tids[hash];
1389
1390 hlist_for_each_entry(guest_tid, head, node)
1391 if (guest_tid->tid == tid)
1392 return guest_tid;
1393
1394 return NULL;
1395 }
1396
dso__is_in_kernel_space(struct dso * dso)1397 static bool dso__is_in_kernel_space(struct dso *dso)
1398 {
1399 if (dso__is_vdso(dso))
1400 return false;
1401
1402 return dso__is_kcore(dso) ||
1403 dso__kernel(dso) ||
1404 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1405 }
1406
evlist__first_id(struct evlist * evlist)1407 static u64 evlist__first_id(struct evlist *evlist)
1408 {
1409 struct evsel *evsel;
1410
1411 evlist__for_each_entry(evlist, evsel) {
1412 if (evsel->core.ids)
1413 return evsel->core.id[0];
1414 }
1415 return 0;
1416 }
1417
process_build_id(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1418 static int process_build_id(const struct perf_tool *tool,
1419 union perf_event *event,
1420 struct perf_sample *sample __maybe_unused,
1421 struct machine *machine __maybe_unused)
1422 {
1423 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1424
1425 return perf_event__process_build_id(tool, inject->session, event);
1426 }
1427
synthesize_build_id(struct perf_inject * inject,struct dso * dso,pid_t machine_pid)1428 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1429 {
1430 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1431 struct perf_sample synth_sample = {
1432 .pid = -1,
1433 .tid = -1,
1434 .time = -1,
1435 .stream_id = -1,
1436 .cpu = -1,
1437 .period = 1,
1438 .cpumode = dso__is_in_kernel_space(dso)
1439 ? PERF_RECORD_MISC_GUEST_KERNEL
1440 : PERF_RECORD_MISC_GUEST_USER,
1441 };
1442
1443 if (!machine)
1444 return -ENOMEM;
1445
1446 dso__set_hit(dso);
1447
1448 return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine,
1449 process_build_id, inject__mmap_evsel(inject),
1450 /*misc=*/synth_sample.cpumode,
1451 dso__bid(dso), dso__long_name(dso));
1452 }
1453
guest_session__add_build_ids_cb(struct dso * dso,void * data)1454 static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
1455 {
1456 struct guest_session *gs = data;
1457 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1458
1459 if (!dso__has_build_id(dso))
1460 return 0;
1461
1462 return synthesize_build_id(inject, dso, gs->machine_pid);
1463
1464 }
1465
guest_session__add_build_ids(struct guest_session * gs)1466 static int guest_session__add_build_ids(struct guest_session *gs)
1467 {
1468 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1469
1470 /* Build IDs will be put in the Build ID feature section */
1471 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1472
1473 return dsos__for_each_dso(&gs->session->machines.host.dsos,
1474 guest_session__add_build_ids_cb,
1475 gs);
1476 }
1477
guest_session__ksymbol_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1478 static int guest_session__ksymbol_event(const struct perf_tool *tool,
1479 union perf_event *event,
1480 struct perf_sample *sample __maybe_unused,
1481 struct machine *machine __maybe_unused)
1482 {
1483 struct guest_session *gs = container_of(tool, struct guest_session, tool);
1484
1485 /* Only support out-of-line i.e. no BPF support */
1486 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1487 return 0;
1488
1489 return guest_session__output_bytes(gs, event, event->header.size);
1490 }
1491
guest_session__start(struct guest_session * gs,const char * name,bool force)1492 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1493 {
1494 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1495 struct perf_session *session;
1496 int ret;
1497
1498 /* Only these events will be injected */
1499 gs->tool.mmap = guest_session__repipe;
1500 gs->tool.mmap2 = guest_session__repipe;
1501 gs->tool.comm = guest_session__repipe;
1502 gs->tool.fork = guest_session__repipe;
1503 gs->tool.exit = guest_session__repipe;
1504 gs->tool.lost = guest_session__repipe;
1505 gs->tool.context_switch = guest_session__repipe;
1506 gs->tool.ksymbol = guest_session__ksymbol_event;
1507 gs->tool.text_poke = guest_session__repipe;
1508 /*
1509 * Processing a build ID creates a struct dso with that build ID. Later,
1510 * all guest dsos are iterated and the build IDs processed into the host
1511 * session where they will be output to the Build ID feature section
1512 * when the perf.data file header is written.
1513 */
1514 gs->tool.build_id = perf_event__process_build_id;
1515 /* Process the id index to know what VCPU an ID belongs to */
1516 gs->tool.id_index = perf_event__process_id_index;
1517
1518 gs->tool.ordered_events = true;
1519 gs->tool.ordering_requires_timestamps = true;
1520
1521 gs->data.path = name;
1522 gs->data.force = force;
1523 gs->data.mode = PERF_DATA_MODE_READ;
1524
1525 session = perf_session__new(&gs->data, &gs->tool);
1526 if (IS_ERR(session))
1527 return PTR_ERR(session);
1528 gs->session = session;
1529
1530 /*
1531 * Initial events have zero'd ID samples. Get default ID sample size
1532 * used for removing them.
1533 */
1534 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1535 /* And default ID for adding back a host-compatible ID sample */
1536 gs->dflt_id = evlist__first_id(session->evlist);
1537 if (!gs->dflt_id) {
1538 pr_err("Guest data has no sample IDs");
1539 return -EINVAL;
1540 }
1541
1542 /* Temporary file for guest events */
1543 gs->tmp_file_name = strdup(tmp_file_name);
1544 if (!gs->tmp_file_name)
1545 return -ENOMEM;
1546 gs->tmp_fd = mkstemp(gs->tmp_file_name);
1547 if (gs->tmp_fd < 0)
1548 return -errno;
1549
1550 if (zstd_init(&gs->session->zstd_data, 0) < 0)
1551 pr_warning("Guest session decompression initialization failed.\n");
1552
1553 /*
1554 * perf does not support processing 2 sessions simultaneously, so output
1555 * guest events to a temporary file.
1556 */
1557 ret = perf_session__process_events(gs->session);
1558 if (ret)
1559 return ret;
1560
1561 if (lseek(gs->tmp_fd, 0, SEEK_SET))
1562 return -errno;
1563
1564 return 0;
1565 }
1566
1567 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
free_hlist(struct hlist_head * heads,size_t hlist_sz)1568 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1569 {
1570 struct hlist_node *pos, *n;
1571 size_t i;
1572
1573 for (i = 0; i < hlist_sz; ++i) {
1574 hlist_for_each_safe(pos, n, &heads[i]) {
1575 hlist_del(pos);
1576 free(pos);
1577 }
1578 }
1579 }
1580
guest_session__exit(struct guest_session * gs)1581 static void guest_session__exit(struct guest_session *gs)
1582 {
1583 if (gs->session) {
1584 perf_session__delete(gs->session);
1585 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1586 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1587 }
1588 if (gs->tmp_file_name) {
1589 if (gs->tmp_fd >= 0)
1590 close(gs->tmp_fd);
1591 unlink(gs->tmp_file_name);
1592 zfree(&gs->tmp_file_name);
1593 }
1594 zfree(&gs->vcpu);
1595 zfree(&gs->perf_data_file);
1596 }
1597
get_tsc_conv(struct perf_tsc_conversion * tc,struct perf_record_time_conv * time_conv)1598 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1599 {
1600 tc->time_shift = time_conv->time_shift;
1601 tc->time_mult = time_conv->time_mult;
1602 tc->time_zero = time_conv->time_zero;
1603 tc->time_cycles = time_conv->time_cycles;
1604 tc->time_mask = time_conv->time_mask;
1605 tc->cap_user_time_zero = time_conv->cap_user_time_zero;
1606 tc->cap_user_time_short = time_conv->cap_user_time_short;
1607 }
1608
guest_session__get_tc(struct guest_session * gs)1609 static void guest_session__get_tc(struct guest_session *gs)
1610 {
1611 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1612
1613 get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1614 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1615 }
1616
guest_session__convert_time(struct guest_session * gs,u64 guest_time,u64 * host_time)1617 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1618 {
1619 u64 tsc;
1620
1621 if (!guest_time) {
1622 *host_time = 0;
1623 return;
1624 }
1625
1626 if (gs->guest_tc.cap_user_time_zero)
1627 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1628 else
1629 tsc = guest_time;
1630
1631 /*
1632 * This is the correct order of operations for x86 if the TSC Offset and
1633 * Multiplier values are used.
1634 */
1635 tsc -= gs->time_offset;
1636 tsc /= gs->time_scale;
1637
1638 if (gs->host_tc.cap_user_time_zero)
1639 *host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1640 else
1641 *host_time = tsc;
1642 }
1643
guest_session__fetch(struct guest_session * gs)1644 static int guest_session__fetch(struct guest_session *gs)
1645 {
1646 void *buf;
1647 struct perf_event_header *hdr;
1648 size_t hdr_sz = sizeof(*hdr);
1649 ssize_t ret;
1650
1651 buf = gs->ev.event_buf;
1652 if (!buf) {
1653 buf = malloc(PERF_SAMPLE_MAX_SIZE);
1654 if (!buf)
1655 return -ENOMEM;
1656 gs->ev.event_buf = buf;
1657 }
1658 hdr = buf;
1659 ret = readn(gs->tmp_fd, buf, hdr_sz);
1660 if (ret < 0)
1661 return ret;
1662
1663 if (!ret) {
1664 /* Zero size means EOF */
1665 hdr->size = 0;
1666 return 0;
1667 }
1668
1669 buf += hdr_sz;
1670
1671 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1672 if (ret < 0)
1673 return ret;
1674
1675 gs->ev.event = (union perf_event *)gs->ev.event_buf;
1676 gs->ev.sample.time = 0;
1677
1678 if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1679 pr_err("Unexpected type fetching guest event");
1680 return 0;
1681 }
1682
1683 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1684 if (ret) {
1685 pr_err("Parse failed fetching guest event");
1686 return ret;
1687 }
1688
1689 if (!gs->have_tc) {
1690 guest_session__get_tc(gs);
1691 gs->have_tc = true;
1692 }
1693
1694 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1695
1696 return 0;
1697 }
1698
evlist__append_id_sample(struct evlist * evlist,union perf_event * ev,const struct perf_sample * sample)1699 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1700 const struct perf_sample *sample)
1701 {
1702 struct evsel *evsel;
1703 void *array;
1704 int ret;
1705
1706 evsel = evlist__id2evsel(evlist, sample->id);
1707 array = ev;
1708
1709 if (!evsel) {
1710 pr_err("No evsel for id %"PRIu64"\n", sample->id);
1711 return -EINVAL;
1712 }
1713
1714 array += ev->header.size;
1715 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1716 if (ret < 0)
1717 return ret;
1718
1719 if (ret & 7) {
1720 pr_err("Bad id sample size %d\n", ret);
1721 return -EINVAL;
1722 }
1723
1724 ev->header.size += ret;
1725
1726 return 0;
1727 }
1728
guest_session__inject_events(struct guest_session * gs,u64 timestamp)1729 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1730 {
1731 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1732 int ret;
1733
1734 if (!gs->ready)
1735 return 0;
1736
1737 while (1) {
1738 struct perf_sample *sample;
1739 struct guest_id *guest_id;
1740 union perf_event *ev;
1741 u16 id_hdr_size;
1742 u8 cpumode;
1743 u64 id;
1744
1745 if (!gs->fetched) {
1746 ret = guest_session__fetch(gs);
1747 if (ret)
1748 return ret;
1749 gs->fetched = true;
1750 }
1751
1752 ev = gs->ev.event;
1753 sample = &gs->ev.sample;
1754
1755 if (!ev->header.size)
1756 return 0; /* EOF */
1757
1758 if (sample->time > timestamp)
1759 return 0;
1760
1761 /* Change cpumode to guest */
1762 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1763 if (cpumode & PERF_RECORD_MISC_USER)
1764 cpumode = PERF_RECORD_MISC_GUEST_USER;
1765 else
1766 cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1767 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1768 ev->header.misc |= cpumode;
1769
1770 id = sample->id;
1771 if (!id) {
1772 id = gs->dflt_id;
1773 id_hdr_size = gs->dflt_id_hdr_size;
1774 } else {
1775 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1776
1777 id_hdr_size = evsel__id_hdr_size(evsel);
1778 }
1779
1780 if (id_hdr_size & 7) {
1781 pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1782 return -EINVAL;
1783 }
1784
1785 if (ev->header.size & 7) {
1786 pr_err("Bad event size %u\n", ev->header.size);
1787 return -EINVAL;
1788 }
1789
1790 /* Remove guest id sample */
1791 ev->header.size -= id_hdr_size;
1792
1793 if (ev->header.size & 7) {
1794 pr_err("Bad raw event size %u\n", ev->header.size);
1795 return -EINVAL;
1796 }
1797
1798 guest_id = guest_session__lookup_id(gs, id);
1799 if (!guest_id) {
1800 pr_err("Guest event with unknown id %llu\n",
1801 (unsigned long long)id);
1802 return -EINVAL;
1803 }
1804
1805 /* Change to host ID to avoid conflicting ID values */
1806 sample->id = guest_id->host_id;
1807 sample->stream_id = guest_id->host_id;
1808
1809 if (sample->cpu != (u32)-1) {
1810 if (sample->cpu >= gs->vcpu_cnt) {
1811 pr_err("Guest event with unknown VCPU %u\n",
1812 sample->cpu);
1813 return -EINVAL;
1814 }
1815 /* Change to host CPU instead of guest VCPU */
1816 sample->cpu = gs->vcpu[sample->cpu].cpu;
1817 }
1818
1819 /* New id sample with new ID and CPU */
1820 ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1821 if (ret)
1822 return ret;
1823
1824 if (ev->header.size & 7) {
1825 pr_err("Bad new event size %u\n", ev->header.size);
1826 return -EINVAL;
1827 }
1828
1829 gs->fetched = false;
1830
1831 ret = output_bytes(inject, ev, ev->header.size);
1832 if (ret)
1833 return ret;
1834 }
1835 }
1836
guest_session__flush_events(struct guest_session * gs)1837 static int guest_session__flush_events(struct guest_session *gs)
1838 {
1839 return guest_session__inject_events(gs, -1);
1840 }
1841
host__repipe(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1842 static int host__repipe(const struct perf_tool *tool,
1843 union perf_event *event,
1844 struct perf_sample *sample,
1845 struct machine *machine)
1846 {
1847 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1848 int ret;
1849
1850 ret = guest_session__inject_events(&inject->guest_session, sample->time);
1851 if (ret)
1852 return ret;
1853
1854 return perf_event__repipe(tool, event, sample, machine);
1855 }
1856
host__finished_init(const struct perf_tool * tool,struct perf_session * session,union perf_event * event)1857 static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
1858 union perf_event *event)
1859 {
1860 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1861 struct guest_session *gs = &inject->guest_session;
1862 int ret;
1863
1864 /*
1865 * Peek through host COMM events to find QEMU threads and the VCPU they
1866 * are running.
1867 */
1868 ret = host_peek_vm_comms(session, gs);
1869 if (ret)
1870 return ret;
1871
1872 if (!gs->vcpu_cnt) {
1873 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1874 return -EINVAL;
1875 }
1876
1877 /*
1878 * Allocate new (unused) host sample IDs and map them to the guest IDs.
1879 */
1880 gs->highest_id = evlist__find_highest_id(session->evlist);
1881 ret = guest_session__map_ids(gs, session->evlist);
1882 if (ret)
1883 return ret;
1884
1885 ret = guest_session__add_attrs(gs);
1886 if (ret)
1887 return ret;
1888
1889 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1890 if (ret) {
1891 pr_err("Failed to synthesize id_index\n");
1892 return ret;
1893 }
1894
1895 ret = guest_session__add_build_ids(gs);
1896 if (ret) {
1897 pr_err("Failed to add guest build IDs\n");
1898 return ret;
1899 }
1900
1901 gs->ready = true;
1902
1903 ret = guest_session__inject_events(gs, 0);
1904 if (ret)
1905 return ret;
1906
1907 return perf_event__repipe_op2_synth(tool, session, event);
1908 }
1909
1910 /*
1911 * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1912 * which flushes host events to file up until the last flush time. Then inject
1913 * guest events up to the same time. Finally write out the FINISHED_ROUND event
1914 * itself.
1915 */
host__finished_round(const struct perf_tool * tool,union perf_event * event,struct ordered_events * oe)1916 static int host__finished_round(const struct perf_tool *tool,
1917 union perf_event *event,
1918 struct ordered_events *oe)
1919 {
1920 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1921 int ret = perf_event__process_finished_round(tool, event, oe);
1922 u64 timestamp = ordered_events__last_flush_time(oe);
1923
1924 if (ret)
1925 return ret;
1926
1927 ret = guest_session__inject_events(&inject->guest_session, timestamp);
1928 if (ret)
1929 return ret;
1930
1931 return perf_event__repipe_oe_synth(tool, event, oe);
1932 }
1933
host__context_switch(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1934 static int host__context_switch(const struct perf_tool *tool,
1935 union perf_event *event,
1936 struct perf_sample *sample,
1937 struct machine *machine)
1938 {
1939 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1940 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1941 struct guest_session *gs = &inject->guest_session;
1942 u32 pid = event->context_switch.next_prev_pid;
1943 u32 tid = event->context_switch.next_prev_tid;
1944 struct guest_tid *guest_tid;
1945 u32 vcpu;
1946
1947 if (out || pid != gs->machine_pid)
1948 goto out;
1949
1950 guest_tid = guest_session__lookup_tid(gs, tid);
1951 if (!guest_tid)
1952 goto out;
1953
1954 if (sample->cpu == (u32)-1) {
1955 pr_err("Switch event does not have CPU\n");
1956 return -EINVAL;
1957 }
1958
1959 vcpu = guest_tid->vcpu;
1960 if (vcpu >= gs->vcpu_cnt)
1961 return -EINVAL;
1962
1963 /* Guest is switching in, record which CPU the VCPU is now running on */
1964 gs->vcpu[vcpu].cpu = sample->cpu;
1965 out:
1966 return host__repipe(tool, event, sample, machine);
1967 }
1968
sig_handler(int sig __maybe_unused)1969 static void sig_handler(int sig __maybe_unused)
1970 {
1971 session_done = 1;
1972 }
1973
evsel__check_stype(struct evsel * evsel,u64 sample_type,const char * sample_msg)1974 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1975 {
1976 struct perf_event_attr *attr = &evsel->core.attr;
1977 const char *name = evsel__name(evsel);
1978
1979 if (!(attr->sample_type & sample_type)) {
1980 pr_err("Samples for %s event do not have %s attribute set.",
1981 name, sample_msg);
1982 return -EINVAL;
1983 }
1984
1985 return 0;
1986 }
1987
drop_sample(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct evsel * evsel __maybe_unused,struct machine * machine __maybe_unused)1988 static int drop_sample(const struct perf_tool *tool __maybe_unused,
1989 union perf_event *event __maybe_unused,
1990 struct perf_sample *sample __maybe_unused,
1991 struct evsel *evsel __maybe_unused,
1992 struct machine *machine __maybe_unused)
1993 {
1994 return 0;
1995 }
1996
strip_init(struct perf_inject * inject)1997 static void strip_init(struct perf_inject *inject)
1998 {
1999 struct evlist *evlist = inject->session->evlist;
2000 struct evsel *evsel;
2001
2002 inject->tool.context_switch = perf_event__drop;
2003
2004 evlist__for_each_entry(evlist, evsel)
2005 evsel->handler = drop_sample;
2006 }
2007
parse_vm_time_correlation(const struct option * opt,const char * str,int unset)2008 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
2009 {
2010 struct perf_inject *inject = opt->value;
2011 const char *args;
2012 char *dry_run;
2013
2014 if (unset)
2015 return 0;
2016
2017 inject->itrace_synth_opts.set = true;
2018 inject->itrace_synth_opts.vm_time_correlation = true;
2019 inject->in_place_update = true;
2020
2021 if (!str)
2022 return 0;
2023
2024 dry_run = skip_spaces(str);
2025 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
2026 inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
2027 inject->in_place_update_dry_run = true;
2028 args = dry_run + strlen("dry-run");
2029 } else {
2030 args = str;
2031 }
2032
2033 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
2034
2035 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
2036 }
2037
parse_guest_data(const struct option * opt,const char * str,int unset)2038 static int parse_guest_data(const struct option *opt, const char *str, int unset)
2039 {
2040 struct perf_inject *inject = opt->value;
2041 struct guest_session *gs = &inject->guest_session;
2042 char *tok;
2043 char *s;
2044
2045 if (unset)
2046 return 0;
2047
2048 if (!str)
2049 goto bad_args;
2050
2051 s = strdup(str);
2052 if (!s)
2053 return -ENOMEM;
2054
2055 gs->perf_data_file = strsep(&s, ",");
2056 if (!gs->perf_data_file)
2057 goto bad_args;
2058
2059 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
2060 if (gs->copy_kcore_dir)
2061 inject->output.is_dir = true;
2062
2063 tok = strsep(&s, ",");
2064 if (!tok)
2065 goto bad_args;
2066 gs->machine_pid = strtoul(tok, NULL, 0);
2067 if (!inject->guest_session.machine_pid)
2068 goto bad_args;
2069
2070 gs->time_scale = 1;
2071
2072 tok = strsep(&s, ",");
2073 if (!tok)
2074 goto out;
2075 gs->time_offset = strtoull(tok, NULL, 0);
2076
2077 tok = strsep(&s, ",");
2078 if (!tok)
2079 goto out;
2080 gs->time_scale = strtod(tok, NULL);
2081 if (!gs->time_scale)
2082 goto bad_args;
2083 out:
2084 return 0;
2085
2086 bad_args:
2087 pr_err("--guest-data option requires guest perf.data file name, "
2088 "guest machine PID, and optionally guest timestamp offset, "
2089 "and guest timestamp scale factor, separated by commas.\n");
2090 return -1;
2091 }
2092
save_section_info_cb(struct perf_file_section * section,struct perf_header * ph __maybe_unused,int feat,int fd __maybe_unused,void * data)2093 static int save_section_info_cb(struct perf_file_section *section,
2094 struct perf_header *ph __maybe_unused,
2095 int feat, int fd __maybe_unused, void *data)
2096 {
2097 struct perf_inject *inject = data;
2098
2099 inject->secs[feat] = *section;
2100 return 0;
2101 }
2102
save_section_info(struct perf_inject * inject)2103 static int save_section_info(struct perf_inject *inject)
2104 {
2105 struct perf_header *header = &inject->session->header;
2106 int fd = perf_data__fd(inject->session->data);
2107
2108 return perf_header__process_sections(header, fd, inject, save_section_info_cb);
2109 }
2110
keep_feat(struct perf_inject * inject,int feat)2111 static bool keep_feat(struct perf_inject *inject, int feat)
2112 {
2113 switch (feat) {
2114 /* Keep original information that describes the machine or software */
2115 case HEADER_TRACING_DATA:
2116 case HEADER_HOSTNAME:
2117 case HEADER_OSRELEASE:
2118 case HEADER_VERSION:
2119 case HEADER_ARCH:
2120 case HEADER_NRCPUS:
2121 case HEADER_CPUDESC:
2122 case HEADER_CPUID:
2123 case HEADER_TOTAL_MEM:
2124 case HEADER_CPU_TOPOLOGY:
2125 case HEADER_NUMA_TOPOLOGY:
2126 case HEADER_PMU_MAPPINGS:
2127 case HEADER_CACHE:
2128 case HEADER_MEM_TOPOLOGY:
2129 case HEADER_CLOCKID:
2130 case HEADER_BPF_PROG_INFO:
2131 case HEADER_BPF_BTF:
2132 case HEADER_CPU_PMU_CAPS:
2133 case HEADER_CLOCK_DATA:
2134 case HEADER_HYBRID_TOPOLOGY:
2135 case HEADER_PMU_CAPS:
2136 case HEADER_CPU_DOMAIN_INFO:
2137 return true;
2138 /* Information that can be updated */
2139 case HEADER_BUILD_ID:
2140 return inject->build_id_style == BID_RWS__NONE;
2141 case HEADER_CMDLINE:
2142 case HEADER_EVENT_DESC:
2143 case HEADER_BRANCH_STACK:
2144 case HEADER_GROUP_DESC:
2145 case HEADER_AUXTRACE:
2146 case HEADER_STAT:
2147 case HEADER_SAMPLE_TIME:
2148 case HEADER_DIR_FORMAT:
2149 case HEADER_COMPRESSED:
2150 default:
2151 return false;
2152 };
2153 }
2154
read_file(int fd,u64 offs,void * buf,size_t sz)2155 static int read_file(int fd, u64 offs, void *buf, size_t sz)
2156 {
2157 ssize_t ret = preadn(fd, buf, sz, offs);
2158
2159 if (ret < 0)
2160 return -errno;
2161 if ((size_t)ret != sz)
2162 return -EINVAL;
2163 return 0;
2164 }
2165
feat_copy(struct perf_inject * inject,int feat,struct feat_writer * fw)2166 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
2167 {
2168 int fd = perf_data__fd(inject->session->data);
2169 u64 offs = inject->secs[feat].offset;
2170 size_t sz = inject->secs[feat].size;
2171 void *buf = malloc(sz);
2172 int ret;
2173
2174 if (!buf)
2175 return -ENOMEM;
2176
2177 ret = read_file(fd, offs, buf, sz);
2178 if (ret)
2179 goto out_free;
2180
2181 ret = fw->write(fw, buf, sz);
2182 out_free:
2183 free(buf);
2184 return ret;
2185 }
2186
2187 struct inject_fc {
2188 struct feat_copier fc;
2189 struct perf_inject *inject;
2190 };
2191
feat_copy_cb(struct feat_copier * fc,int feat,struct feat_writer * fw)2192 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
2193 {
2194 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
2195 struct perf_inject *inject = inj_fc->inject;
2196 int ret;
2197
2198 if (!inject->secs[feat].offset ||
2199 !keep_feat(inject, feat))
2200 return 0;
2201
2202 ret = feat_copy(inject, feat, fw);
2203 if (ret < 0)
2204 return ret;
2205
2206 return 1; /* Feature section copied */
2207 }
2208
copy_kcore_dir(struct perf_inject * inject)2209 static int copy_kcore_dir(struct perf_inject *inject)
2210 {
2211 char *cmd;
2212 int ret;
2213
2214 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
2215 inject->input_name, inject->output.path);
2216 if (ret < 0)
2217 return ret;
2218 pr_debug("%s\n", cmd);
2219 ret = system(cmd);
2220 free(cmd);
2221 return ret;
2222 }
2223
guest_session__copy_kcore_dir(struct guest_session * gs)2224 static int guest_session__copy_kcore_dir(struct guest_session *gs)
2225 {
2226 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
2227 char *cmd;
2228 int ret;
2229
2230 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
2231 gs->perf_data_file, inject->output.path, gs->machine_pid);
2232 if (ret < 0)
2233 return ret;
2234 pr_debug("%s\n", cmd);
2235 ret = system(cmd);
2236 free(cmd);
2237 return ret;
2238 }
2239
output_fd(struct perf_inject * inject)2240 static int output_fd(struct perf_inject *inject)
2241 {
2242 return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
2243 }
2244
__cmd_inject(struct perf_inject * inject)2245 static int __cmd_inject(struct perf_inject *inject)
2246 {
2247 int ret = -EINVAL;
2248 struct guest_session *gs = &inject->guest_session;
2249 struct perf_session *session = inject->session;
2250 int fd = output_fd(inject);
2251 u64 output_data_offset = perf_session__data_offset(session->evlist);
2252 /*
2253 * Pipe input hasn't loaded the attributes and will handle them as
2254 * events. So that the attributes don't overlap the data, write the
2255 * attributes after the data.
2256 */
2257 bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe;
2258
2259 signal(SIGINT, sig_handler);
2260
2261 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat ||
2262 inject->itrace_synth_opts.set) {
2263 inject->tool.mmap = perf_event__repipe_mmap;
2264 inject->tool.mmap2 = perf_event__repipe_mmap2;
2265 inject->tool.fork = perf_event__repipe_fork;
2266 #ifdef HAVE_LIBTRACEEVENT
2267 inject->tool.tracing_data = perf_event__repipe_tracing_data;
2268 #endif
2269 }
2270
2271 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2272 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2273 inject->tool.sample = perf_event__inject_buildid;
2274 } else if (inject->sched_stat) {
2275 struct evsel *evsel;
2276
2277 evlist__for_each_entry(session->evlist, evsel) {
2278 const char *name = evsel__name(evsel);
2279
2280 if (!strcmp(name, "sched:sched_switch")) {
2281 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
2282 return -EINVAL;
2283
2284 evsel->handler = perf_inject__sched_switch;
2285 } else if (!strcmp(name, "sched:sched_process_exit"))
2286 evsel->handler = perf_inject__sched_process_exit;
2287 #ifdef HAVE_LIBTRACEEVENT
2288 else if (!strncmp(name, "sched:sched_stat_", 17))
2289 evsel->handler = perf_inject__sched_stat;
2290 #endif
2291 }
2292 } else if (inject->itrace_synth_opts.vm_time_correlation) {
2293 session->itrace_synth_opts = &inject->itrace_synth_opts;
2294 memset(&inject->tool, 0, sizeof(inject->tool));
2295 inject->tool.id_index = perf_event__process_id_index;
2296 inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
2297 inject->tool.auxtrace = perf_event__process_auxtrace;
2298 inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2299 inject->tool.ordered_events = true;
2300 inject->tool.ordering_requires_timestamps = true;
2301 } else if (inject->itrace_synth_opts.set) {
2302 session->itrace_synth_opts = &inject->itrace_synth_opts;
2303 inject->itrace_synth_opts.inject = true;
2304 inject->tool.comm = perf_event__repipe_comm;
2305 inject->tool.namespaces = perf_event__repipe_namespaces;
2306 inject->tool.exit = perf_event__repipe_exit;
2307 inject->tool.id_index = perf_event__process_id_index;
2308 inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
2309 inject->tool.auxtrace = perf_event__process_auxtrace;
2310 inject->tool.aux = perf_event__drop_aux;
2311 inject->tool.itrace_start = perf_event__drop_aux;
2312 inject->tool.aux_output_hw_id = perf_event__drop_aux;
2313 inject->tool.ordered_events = true;
2314 inject->tool.ordering_requires_timestamps = true;
2315 /* Allow space in the header for new attributes */
2316 output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2317 if (inject->strip)
2318 strip_init(inject);
2319 } else if (gs->perf_data_file) {
2320 char *name = gs->perf_data_file;
2321
2322 /*
2323 * Not strictly necessary, but keep these events in order wrt
2324 * guest events.
2325 */
2326 inject->tool.mmap = host__repipe;
2327 inject->tool.mmap2 = host__repipe;
2328 inject->tool.comm = host__repipe;
2329 inject->tool.fork = host__repipe;
2330 inject->tool.exit = host__repipe;
2331 inject->tool.lost = host__repipe;
2332 inject->tool.context_switch = host__repipe;
2333 inject->tool.ksymbol = host__repipe;
2334 inject->tool.text_poke = host__repipe;
2335 /*
2336 * Once the host session has initialized, set up sample ID
2337 * mapping and feed in guest attrs, build IDs and initial
2338 * events.
2339 */
2340 inject->tool.finished_init = host__finished_init;
2341 /* Obey finished round ordering */
2342 inject->tool.finished_round = host__finished_round;
2343 /* Keep track of which CPU a VCPU is runnng on */
2344 inject->tool.context_switch = host__context_switch;
2345 /*
2346 * Must order events to be able to obey finished round
2347 * ordering.
2348 */
2349 inject->tool.ordered_events = true;
2350 inject->tool.ordering_requires_timestamps = true;
2351 /* Set up a separate session to process guest perf.data file */
2352 ret = guest_session__start(gs, name, session->data->force);
2353 if (ret) {
2354 pr_err("Failed to process %s, error %d\n", name, ret);
2355 return ret;
2356 }
2357 /* Allow space in the header for guest attributes */
2358 output_data_offset += gs->session->header.data_offset;
2359 output_data_offset = roundup(output_data_offset, 4096);
2360 } else if (inject->convert_callchain) {
2361 inject->tool.sample = perf_event__convert_sample_callchain;
2362 inject->tool.fork = perf_event__repipe_fork;
2363 inject->tool.comm = perf_event__repipe_comm;
2364 inject->tool.exit = perf_event__repipe_exit;
2365 inject->tool.mmap = perf_event__repipe_mmap;
2366 inject->tool.mmap2 = perf_event__repipe_mmap2;
2367 inject->tool.ordered_events = true;
2368 inject->tool.ordering_requires_timestamps = true;
2369 }
2370
2371 if (!inject->itrace_synth_opts.set)
2372 auxtrace_index__free(&session->auxtrace_index);
2373
2374 if (!inject->output.is_pipe && !inject->in_place_update)
2375 lseek(fd, output_data_offset, SEEK_SET);
2376
2377 ret = perf_session__process_events(session);
2378 if (ret)
2379 return ret;
2380
2381 if (gs->session) {
2382 /*
2383 * Remaining guest events have later timestamps. Flush them
2384 * out to file.
2385 */
2386 ret = guest_session__flush_events(gs);
2387 if (ret) {
2388 pr_err("Failed to flush guest events\n");
2389 return ret;
2390 }
2391 }
2392
2393 if (!inject->output.is_pipe && !inject->in_place_update) {
2394 struct inject_fc inj_fc = {
2395 .fc.copy = feat_copy_cb,
2396 .inject = inject,
2397 };
2398
2399 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2400 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL)
2401 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
2402 /*
2403 * Keep all buildids when there is unprocessed AUX data because
2404 * it is not known which ones the AUX trace hits.
2405 */
2406 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2407 inject->have_auxtrace && !inject->itrace_synth_opts.set)
2408 perf_session__dsos_hit_all(session);
2409 /*
2410 * The AUX areas have been removed and replaced with
2411 * synthesized hardware events, so clear the feature flag.
2412 */
2413 if (inject->itrace_synth_opts.set) {
2414 perf_header__clear_feat(&session->header,
2415 HEADER_AUXTRACE);
2416 if (inject->itrace_synth_opts.last_branch ||
2417 inject->itrace_synth_opts.add_last_branch)
2418 perf_header__set_feat(&session->header,
2419 HEADER_BRANCH_STACK);
2420 }
2421
2422 /*
2423 * The converted data file won't have stack and registers.
2424 * Update the perf_event_attr to remove them before writing.
2425 */
2426 if (inject->convert_callchain) {
2427 struct evsel *evsel;
2428
2429 evlist__for_each_entry(session->evlist, evsel) {
2430 evsel__reset_sample_bit(evsel, REGS_USER);
2431 evsel__reset_sample_bit(evsel, STACK_USER);
2432 evsel->core.attr.sample_regs_user = 0;
2433 evsel->core.attr.sample_stack_user = 0;
2434 evsel->core.attr.exclude_callchain_user = 0;
2435 }
2436 }
2437
2438 session->header.data_offset = output_data_offset;
2439 session->header.data_size = inject->bytes_written;
2440 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
2441 write_attrs_after_data);
2442
2443 if (inject->copy_kcore_dir) {
2444 ret = copy_kcore_dir(inject);
2445 if (ret) {
2446 pr_err("Failed to copy kcore\n");
2447 return ret;
2448 }
2449 }
2450 if (gs->copy_kcore_dir) {
2451 ret = guest_session__copy_kcore_dir(gs);
2452 if (ret) {
2453 pr_err("Failed to copy guest kcore\n");
2454 return ret;
2455 }
2456 }
2457 }
2458
2459 return ret;
2460 }
2461
evsel__has_dwarf_callchain(struct evsel * evsel)2462 static bool evsel__has_dwarf_callchain(struct evsel *evsel)
2463 {
2464 struct perf_event_attr *attr = &evsel->core.attr;
2465 const u64 dwarf_callchain_flags =
2466 PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
2467
2468 if (!attr->exclude_callchain_user)
2469 return false;
2470
2471 return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
2472 }
2473
cmd_inject(int argc,const char ** argv)2474 int cmd_inject(int argc, const char **argv)
2475 {
2476 struct perf_inject inject = {
2477 .input_name = "-",
2478 .samples = LIST_HEAD_INIT(inject.samples),
2479 .output = {
2480 .path = "-",
2481 .mode = PERF_DATA_MODE_WRITE,
2482 .use_stdio = true,
2483 },
2484 };
2485 struct perf_data data = {
2486 .mode = PERF_DATA_MODE_READ,
2487 .use_stdio = true,
2488 };
2489 int ret;
2490 const char *known_build_ids = NULL;
2491 bool build_ids = false;
2492 bool build_id_all = false;
2493 bool mmap2_build_ids = false;
2494 bool mmap2_build_id_all = false;
2495
2496 struct option options[] = {
2497 OPT_BOOLEAN('b', "build-ids", &build_ids,
2498 "Inject build-ids into the output stream"),
2499 OPT_BOOLEAN(0, "buildid-all", &build_id_all,
2500 "Inject build-ids of all DSOs into the output stream"),
2501 OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids,
2502 "Drop unused mmap events, make others mmap2 with build IDs"),
2503 OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all,
2504 "Rewrite all mmap events as mmap2 events with build IDs"),
2505 OPT_STRING(0, "known-build-ids", &known_build_ids,
2506 "buildid path [,buildid path...]",
2507 "build-ids to use for given paths"),
2508 OPT_STRING('i', "input", &inject.input_name, "file",
2509 "input file name"),
2510 OPT_STRING('o', "output", &inject.output.path, "file",
2511 "output file name"),
2512 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2513 "Merge sched-stat and sched-switch for getting events "
2514 "where and how long tasks slept"),
2515 #ifdef HAVE_JITDUMP
2516 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2517 #endif
2518 OPT_INCR('v', "verbose", &verbose,
2519 "be more verbose (show build ids, etc)"),
2520 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2521 "file", "vmlinux pathname"),
2522 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2523 "don't load vmlinux even if found"),
2524 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2525 "kallsyms pathname"),
2526 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2527 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2528 NULL, "opts", "Instruction Tracing options\n"
2529 ITRACE_HELP,
2530 itrace_parse_synth_opts),
2531 OPT_BOOLEAN(0, "strip", &inject.strip,
2532 "strip non-synthesized events (use with --itrace)"),
2533 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2534 "correlate time between VM guests and the host",
2535 parse_vm_time_correlation),
2536 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2537 "inject events from a guest perf.data file",
2538 parse_guest_data),
2539 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2540 "guest mount directory under which every guest os"
2541 " instance has a subdir"),
2542 OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
2543 "Generate callchains using DWARF and drop register/stack data"),
2544 OPT_END()
2545 };
2546 const char * const inject_usage[] = {
2547 "perf inject [<options>]",
2548 NULL
2549 };
2550 bool ordered_events;
2551
2552 if (!inject.itrace_synth_opts.set) {
2553 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */
2554 symbol_conf.lazy_load_kernel_maps = true;
2555 }
2556
2557 #ifndef HAVE_JITDUMP
2558 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2559 #endif
2560 #ifndef HAVE_LIBDW_SUPPORT
2561 set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
2562 #endif
2563 argc = parse_options(argc, argv, options, inject_usage, 0);
2564
2565 /*
2566 * Any (unrecognized) arguments left?
2567 */
2568 if (argc)
2569 usage_with_options(inject_usage, options);
2570
2571 if (inject.strip && !inject.itrace_synth_opts.set) {
2572 pr_err("--strip option requires --itrace option\n");
2573 return -1;
2574 }
2575
2576 if (symbol__validate_sym_arguments())
2577 return -1;
2578
2579 if (inject.in_place_update) {
2580 if (!strcmp(inject.input_name, "-")) {
2581 pr_err("Input file name required for in-place updating\n");
2582 return -1;
2583 }
2584 if (strcmp(inject.output.path, "-")) {
2585 pr_err("Output file name must not be specified for in-place updating\n");
2586 return -1;
2587 }
2588 if (!data.force && !inject.in_place_update_dry_run) {
2589 pr_err("The input file would be updated in place, "
2590 "the --force option is required.\n");
2591 return -1;
2592 }
2593 if (!inject.in_place_update_dry_run)
2594 data.in_place_update = true;
2595 } else {
2596 if (strcmp(inject.output.path, "-") && !inject.strip &&
2597 has_kcore_dir(inject.input_name)) {
2598 inject.output.is_dir = true;
2599 inject.copy_kcore_dir = true;
2600 }
2601 if (perf_data__open(&inject.output)) {
2602 perror("failed to create output file");
2603 return -1;
2604 }
2605 }
2606 if (mmap2_build_ids)
2607 inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY;
2608 if (mmap2_build_id_all)
2609 inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL;
2610 if (build_ids)
2611 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY;
2612 if (build_id_all)
2613 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL;
2614
2615 data.path = inject.input_name;
2616
2617 ordered_events = inject.jit_mode || inject.sched_stat ||
2618 inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2619 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY;
2620 perf_tool__init(&inject.tool, ordered_events);
2621 inject.tool.sample = perf_event__repipe_sample;
2622 inject.tool.read = perf_event__repipe_sample;
2623 inject.tool.mmap = perf_event__repipe;
2624 inject.tool.mmap2 = perf_event__repipe;
2625 inject.tool.comm = perf_event__repipe;
2626 inject.tool.namespaces = perf_event__repipe;
2627 inject.tool.cgroup = perf_event__repipe;
2628 inject.tool.fork = perf_event__repipe;
2629 inject.tool.exit = perf_event__repipe;
2630 inject.tool.lost = perf_event__repipe;
2631 inject.tool.lost_samples = perf_event__repipe;
2632 inject.tool.aux = perf_event__repipe;
2633 inject.tool.itrace_start = perf_event__repipe;
2634 inject.tool.aux_output_hw_id = perf_event__repipe;
2635 inject.tool.context_switch = perf_event__repipe;
2636 inject.tool.throttle = perf_event__repipe;
2637 inject.tool.unthrottle = perf_event__repipe;
2638 inject.tool.ksymbol = perf_event__repipe;
2639 inject.tool.bpf = perf_event__repipe;
2640 inject.tool.text_poke = perf_event__repipe;
2641 inject.tool.attr = perf_event__repipe_attr;
2642 inject.tool.event_update = perf_event__repipe_event_update;
2643 inject.tool.tracing_data = perf_event__repipe_op2_synth;
2644 inject.tool.finished_round = perf_event__repipe_oe_synth;
2645 inject.tool.build_id = perf_event__repipe_op2_synth;
2646 inject.tool.id_index = perf_event__repipe_op2_synth;
2647 inject.tool.auxtrace_info = perf_event__repipe_op2_synth;
2648 inject.tool.auxtrace_error = perf_event__repipe_op2_synth;
2649 inject.tool.time_conv = perf_event__repipe_op2_synth;
2650 inject.tool.thread_map = perf_event__repipe_op2_synth;
2651 inject.tool.cpu_map = perf_event__repipe_op2_synth;
2652 inject.tool.stat_config = perf_event__repipe_op2_synth;
2653 inject.tool.stat = perf_event__repipe_op2_synth;
2654 inject.tool.stat_round = perf_event__repipe_op2_synth;
2655 inject.tool.feature = perf_event__repipe_op2_synth;
2656 inject.tool.finished_init = perf_event__repipe_op2_synth;
2657 inject.tool.compressed = perf_event__repipe_op4_synth;
2658 inject.tool.auxtrace = perf_event__repipe_auxtrace;
2659 inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
2660 inject.tool.schedstat_cpu = perf_event__repipe_op2_synth;
2661 inject.tool.schedstat_domain = perf_event__repipe_op2_synth;
2662 inject.tool.dont_split_sample_group = true;
2663 inject.tool.merge_deferred_callchains = false;
2664 inject.session = __perf_session__new(&data, &inject.tool,
2665 /*trace_event_repipe=*/inject.output.is_pipe,
2666 /*host_env=*/NULL);
2667
2668 if (IS_ERR(inject.session)) {
2669 ret = PTR_ERR(inject.session);
2670 goto out_close_output;
2671 }
2672
2673 if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2674 pr_warning("Decompression initialization failed.\n");
2675
2676 /* Save original section info before feature bits change */
2677 ret = save_section_info(&inject);
2678 if (ret)
2679 goto out_delete;
2680
2681 if (inject.output.is_pipe) {
2682 ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2683 if (ret < 0) {
2684 pr_err("Couldn't write a new pipe header.\n");
2685 goto out_delete;
2686 }
2687
2688 /*
2689 * If the input is already a pipe then the features and
2690 * attributes don't need synthesizing, they will be present in
2691 * the input.
2692 */
2693 if (!data.is_pipe) {
2694 ret = perf_event__synthesize_for_pipe(&inject.tool,
2695 inject.session,
2696 &inject.output,
2697 perf_event__repipe);
2698 if (ret < 0)
2699 goto out_delete;
2700 }
2701 }
2702
2703 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2704 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2705 /*
2706 * to make sure the mmap records are ordered correctly
2707 * and so that the correct especially due to jitted code
2708 * mmaps. We cannot generate the buildid hit list and
2709 * inject the jit mmaps at the same time for now.
2710 */
2711 inject.tool.ordering_requires_timestamps = true;
2712 }
2713 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) {
2714 inject.known_build_ids =
2715 perf_inject__parse_known_build_ids(known_build_ids);
2716
2717 if (inject.known_build_ids == NULL) {
2718 pr_err("Couldn't parse known build ids.\n");
2719 goto out_delete;
2720 }
2721 }
2722
2723 if (inject.convert_callchain) {
2724 struct evsel *evsel;
2725
2726 if (inject.output.is_pipe || inject.session->data->is_pipe) {
2727 pr_err("--convert-callchain cannot work with pipe\n");
2728 goto out_delete;
2729 }
2730
2731 evlist__for_each_entry(inject.session->evlist, evsel) {
2732 if (!evsel__has_dwarf_callchain(evsel) && !evsel__is_dummy_event(evsel)) {
2733 pr_err("--convert-callchain requires DWARF call graph.\n");
2734 goto out_delete;
2735 }
2736 }
2737
2738 inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
2739 if (inject.raw_callchain == NULL) {
2740 pr_err("callchain allocation failed\n");
2741 goto out_delete;
2742 }
2743 }
2744
2745 #ifdef HAVE_JITDUMP
2746 if (inject.jit_mode) {
2747 inject.tool.mmap2 = perf_event__repipe_mmap2;
2748 inject.tool.mmap = perf_event__repipe_mmap;
2749 inject.tool.ordering_requires_timestamps = true;
2750 /*
2751 * JIT MMAP injection injects all MMAP events in one go, so it
2752 * does not obey finished_round semantics.
2753 */
2754 inject.tool.finished_round = perf_event__drop_oe;
2755 }
2756 #endif
2757 ret = symbol__init(perf_session__env(inject.session));
2758 if (ret < 0)
2759 goto out_delete;
2760
2761 ret = __cmd_inject(&inject);
2762
2763 guest_session__exit(&inject.guest_session);
2764
2765 out_delete:
2766 strlist__delete(inject.known_build_ids);
2767 zstd_fini(&(inject.session->zstd_data));
2768 perf_session__delete(inject.session);
2769 out_close_output:
2770 if (!inject.in_place_update)
2771 perf_data__close(&inject.output);
2772 free(inject.itrace_synth_opts.vm_tm_corr_args);
2773 free(inject.event_copy);
2774 free(inject.guest_session.ev.event_buf);
2775 free(inject.raw_callchain);
2776 return ret;
2777 }
2778