xref: /linux/tools/perf/builtin-inject.c (revision d6dbf2d4a9009a9cec1b33325308fa5b3a7b6ba9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-inject.c
4  *
5  * Builtin inject command: Examine the live mode (stdin) event stream
6  * and repipe it to stdout while optionally injecting additional
7  * events into it.
8  */
9 #include "builtin.h"
10 
11 #include "util/aslr.h"
12 #include "util/color.h"
13 #include "util/dso.h"
14 #include "util/vdso.h"
15 #include "util/evlist.h"
16 #include "util/evsel.h"
17 #include "util/map.h"
18 #include "util/session.h"
19 #include "util/tool.h"
20 #include "util/debug.h"
21 #include "util/build-id.h"
22 #include "util/data.h"
23 #include "util/auxtrace.h"
24 #include "util/jit.h"
25 #include "util/string2.h"
26 #include "util/symbol.h"
27 #include "util/synthetic-events.h"
28 #include "util/pmus.h"
29 #include "util/thread.h"
30 #include "util/namespaces.h"
31 #include "util/unwind.h"
32 #include "util/util.h"
33 #include "util/tsc.h"
34 
35 #include <internal/lib.h>
36 
37 #include <linux/err.h>
38 #include <subcmd/parse-options.h>
39 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
40 
41 #include <linux/list.h>
42 #include <linux/string.h>
43 #include <linux/zalloc.h>
44 #include <linux/hash.h>
45 #include <ctype.h>
46 #include <errno.h>
47 #include <signal.h>
48 #include <inttypes.h>
49 
50 struct guest_event {
51 	struct perf_sample		sample;
52 	union perf_event		*event;
53 	char				*event_buf;
54 };
55 
56 struct guest_id {
57 	/* hlist_node must be first, see free_hlist() */
58 	struct hlist_node		node;
59 	u64				id;
60 	u64				host_id;
61 	u32				vcpu;
62 };
63 
64 struct guest_tid {
65 	/* hlist_node must be first, see free_hlist() */
66 	struct hlist_node		node;
67 	/* Thread ID of QEMU thread */
68 	u32				tid;
69 	u32				vcpu;
70 };
71 
72 struct guest_vcpu {
73 	/* Current host CPU */
74 	u32				cpu;
75 	/* Thread ID of QEMU thread */
76 	u32				tid;
77 };
78 
79 struct guest_session {
80 	char				*perf_data_file;
81 	u32				machine_pid;
82 	u64				time_offset;
83 	double				time_scale;
84 	struct perf_tool		tool;
85 	struct perf_data		data;
86 	struct perf_session		*session;
87 	char				*tmp_file_name;
88 	int				tmp_fd;
89 	struct perf_tsc_conversion	host_tc;
90 	struct perf_tsc_conversion	guest_tc;
91 	bool				copy_kcore_dir;
92 	bool				have_tc;
93 	bool				fetched;
94 	bool				ready;
95 	u16				dflt_id_hdr_size;
96 	u64				dflt_id;
97 	u64				highest_id;
98 	/* Array of guest_vcpu */
99 	struct guest_vcpu		*vcpu;
100 	size_t				vcpu_cnt;
101 	/* Hash table for guest_id */
102 	struct hlist_head		heads[PERF_EVLIST__HLIST_SIZE];
103 	/* Hash table for guest_tid */
104 	struct hlist_head		tids[PERF_EVLIST__HLIST_SIZE];
105 	/* Place to stash next guest event */
106 	struct guest_event		ev;
107 };
108 
109 enum build_id_rewrite_style {
110 	BID_RWS__NONE = 0,
111 	BID_RWS__INJECT_HEADER_LAZY,
112 	BID_RWS__INJECT_HEADER_ALL,
113 	BID_RWS__MMAP2_BUILDID_ALL,
114 	BID_RWS__MMAP2_BUILDID_LAZY,
115 };
116 
117 struct perf_inject {
118 	struct perf_tool	tool;
119 	struct perf_session	*session;
120 	enum build_id_rewrite_style build_id_style;
121 	bool			sched_stat;
122 	bool			have_auxtrace;
123 	bool			strip;
124 	bool			jit_mode;
125 	bool			in_place_update;
126 	bool			in_place_update_dry_run;
127 	bool			copy_kcore_dir;
128 	bool			convert_callchain;
129 	bool			aslr;
130 	const char		*input_name;
131 	struct perf_data	output;
132 	u64			bytes_written;
133 	u64			aux_id;
134 	struct list_head	samples;
135 	struct itrace_synth_opts itrace_synth_opts;
136 	char			*event_copy;
137 	struct perf_file_section secs[HEADER_FEAT_BITS];
138 	struct guest_session	guest_session;
139 	struct strlist		*known_build_ids;
140 	struct evsel		*mmap_evsel;
141 	struct ip_callchain	*raw_callchain;
142 };
143 
144 struct event_entry {
145 	struct list_head node;
146 	u32		 tid;
147 	union perf_event event[];
148 };
149 
150 static int tool__inject_build_id(const struct perf_tool *tool,
151 				 struct perf_sample *sample,
152 				 struct machine *machine,
153 				 __u16 misc,
154 				 const char *filename,
155 				 struct dso *dso, u32 flags);
156 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
157 				      struct perf_sample *sample,
158 				      struct machine *machine,
159 				      __u16 misc,
160 				      __u32 pid, __u32 tid,
161 				      __u64 start, __u64 len, __u64 pgoff,
162 				      struct dso *dso,
163 				      __u32 prot, __u32 flags,
164 				      const char *filename);
165 
166 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
167 {
168 	ssize_t size;
169 
170 	size = perf_data__write(&inject->output, buf, sz);
171 	if (size < 0)
172 		return -errno;
173 
174 	inject->bytes_written += size;
175 	return 0;
176 }
177 
178 static int perf_event__repipe_synth(const struct perf_tool *tool,
179 				    union perf_event *event)
180 
181 {
182 	struct perf_inject *inject = container_of(tool, struct perf_inject,
183 						  tool);
184 
185 	return output_bytes(inject, event, event->header.size);
186 }
187 
188 static int perf_event__repipe_oe_synth(const struct perf_tool *tool,
189 				       union perf_event *event,
190 				       struct ordered_events *oe __maybe_unused)
191 {
192 	return perf_event__repipe_synth(tool, event);
193 }
194 
195 #ifdef HAVE_JITDUMP
196 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
197 			       union perf_event *event __maybe_unused,
198 			       struct ordered_events *oe __maybe_unused)
199 {
200 	return 0;
201 }
202 #endif
203 
204 static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
205 					struct perf_session *session __maybe_unused,
206 					union perf_event *event)
207 {
208 	return perf_event__repipe_synth(tool, event);
209 }
210 
211 static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
212 					struct perf_session *session __maybe_unused,
213 					union perf_event *event,
214 					u64 data __maybe_unused,
215 					const char *str __maybe_unused)
216 {
217 	return perf_event__repipe_synth(tool, event);
218 }
219 
220 static int perf_event__repipe_synth_cb(const struct perf_tool *tool,
221 				       union perf_event *event,
222 				       struct perf_sample *sample __maybe_unused,
223 				       struct machine *machine __maybe_unused)
224 {
225 	return perf_event__repipe_synth(tool, event);
226 }
227 
228 static int perf_event__repipe_attr(const struct perf_tool *tool,
229 				   union perf_event *event,
230 				   struct evlist **pevlist)
231 {
232 	struct perf_inject *inject = container_of(tool, struct perf_inject,
233 						  tool);
234 	struct perf_event_attr attr;
235 	u32 raw_attr_size, attr_size;
236 	size_t n_ids;
237 	u64 *ids;
238 	int ret;
239 
240 	union perf_event *aslr_event = NULL;
241 
242 	ret = perf_event__process_attr(tool, event, pevlist);
243 	if (ret)
244 		return ret;
245 
246 	if (inject->aslr) {
247 		aslr_event = malloc(event->header.size);
248 		if (!aslr_event)
249 			return -ENOMEM;
250 		memcpy(aslr_event, event, event->header.size);
251 		aslr_tool__strip_attr_event(aslr_event, *pevlist);
252 		event = aslr_event;
253 	}
254 
255 	/* If the output isn't a pipe then the attributes will be written as part of the header. */
256 	if (!inject->output.is_pipe) {
257 		ret = 0;
258 		goto out;
259 	}
260 
261 	if (!inject->itrace_synth_opts.set) {
262 		ret = perf_event__repipe_synth(tool, event);
263 		goto out;
264 	}
265 
266 	if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) {
267 		pr_err("Attribute event size %u is too small\n", event->header.size);
268 		ret = -EINVAL;
269 		goto out;
270 	}
271 
272 	/*
273 	 * ABI0 pipe/inject events have attr.size == 0; default to
274 	 * PERF_ATTR_SIZE_VER0 (the ABI0 footprint) for the bounded
275 	 * copy and ID array position.  Same pattern as
276 	 * perf_event__process_attr() in header.c.
277 	 */
278 	raw_attr_size = event->attr.attr.size;
279 	attr_size = raw_attr_size ?: PERF_ATTR_SIZE_VER0;
280 
281 	if (raw_attr_size && (raw_attr_size < PERF_ATTR_SIZE_VER0 ||
282 			      raw_attr_size > event->header.size - sizeof(event->header))) {
283 		pr_err("Attribute event size %u is too small for attr.size %u\n",
284 		       event->header.size, raw_attr_size);
285 		ret = -EINVAL;
286 		goto out;
287 	}
288 
289 	memset(&attr, 0, sizeof(attr));
290 	memcpy(&attr, &event->attr.attr,
291 	       min_t(size_t, sizeof(attr), attr_size));
292 
293 	n_ids = event->header.size - sizeof(event->header) - attr_size;
294 	n_ids /= sizeof(u64);
295 	ids = (void *)&event->attr.attr + attr_size;
296 
297 	attr.size = sizeof(struct perf_event_attr);
298 	attr.sample_type &= ~PERF_SAMPLE_AUX;
299 
300 
301 	if (inject->itrace_synth_opts.add_last_branch) {
302 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
303 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
304 	}
305 	ret = perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids,
306 					   perf_event__repipe_synth_cb);
307 out:
308 	free(aslr_event);
309 	return ret;
310 }
311 
312 static int perf_event__repipe_event_update(const struct perf_tool *tool,
313 					   union perf_event *event,
314 					   struct evlist **pevlist __maybe_unused)
315 {
316 	return perf_event__repipe_synth(tool, event);
317 }
318 
319 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
320 {
321 	char buf[4096];
322 	ssize_t ssz;
323 	int ret;
324 
325 	while (size > 0) {
326 		ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
327 		if (ssz < 0)
328 			return -errno;
329 		ret = output_bytes(inject, buf, ssz);
330 		if (ret)
331 			return ret;
332 		size -= ssz;
333 	}
334 
335 	return 0;
336 }
337 
338 static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
339 				       struct perf_session *session,
340 				       union perf_event *event)
341 {
342 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
343 	int ret;
344 
345 	inject->have_auxtrace = true;
346 
347 	if (!inject->output.is_pipe) {
348 		off_t offset = perf_data__seek(&inject->output, 0, SEEK_CUR);
349 
350 		if (offset == -1)
351 			return -errno;
352 		ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
353 						     event, offset);
354 		if (ret < 0)
355 			return ret;
356 	}
357 
358 	if (perf_data__is_pipe(session->data) || !session->one_mmap) {
359 		ret = output_bytes(inject, event, event->header.size);
360 		if (ret < 0)
361 			return ret;
362 		ret = copy_bytes(inject, session->data,
363 				 event->auxtrace.size);
364 	} else {
365 		ret = output_bytes(inject, event,
366 				   event->header.size + event->auxtrace.size);
367 	}
368 	if (ret < 0)
369 		return ret;
370 
371 	return event->auxtrace.size;
372 }
373 
374 static int perf_event__repipe(const struct perf_tool *tool,
375 			      union perf_event *event,
376 			      struct perf_sample *sample __maybe_unused,
377 			      struct machine *machine __maybe_unused)
378 {
379 	return perf_event__repipe_synth(tool, event);
380 }
381 
382 static int perf_event__drop(const struct perf_tool *tool __maybe_unused,
383 			    union perf_event *event __maybe_unused,
384 			    struct perf_sample *sample __maybe_unused,
385 			    struct machine *machine __maybe_unused)
386 {
387 	return 0;
388 }
389 
390 static int perf_event__drop_aux(const struct perf_tool *tool,
391 				union perf_event *event __maybe_unused,
392 				struct perf_sample *sample,
393 				struct machine *machine __maybe_unused)
394 {
395 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
396 
397 	if (!inject->aux_id)
398 		inject->aux_id = sample->id;
399 
400 	return 0;
401 }
402 
403 static union perf_event *
404 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
405 				 union perf_event *event,
406 				 struct perf_sample *sample)
407 {
408 	size_t sz1 = sample->aux_sample.data - (void *)event - sizeof(u64);
409 	size_t sz2 = event->header.size - sample->aux_sample.size - (sz1 + sizeof(u64));
410 	union perf_event *ev;
411 
412 	if (inject->event_copy == NULL) {
413 		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
414 		if (!inject->event_copy)
415 			return ERR_PTR(-ENOMEM);
416 	}
417 	ev = (union perf_event *)inject->event_copy;
418 	if (sz1 > event->header.size || sz2 > event->header.size ||
419 	    sz1 + sz2 > event->header.size ||
420 	    sz1 < sizeof(struct perf_event_header))
421 		return event;
422 
423 	memcpy(ev, event, sz1);
424 	memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
425 	ev->header.size = sz1 + sz2;
426 
427 	return ev;
428 }
429 
430 typedef int (*inject_handler)(const struct perf_tool *tool,
431 			      union perf_event *event,
432 			      struct perf_sample *sample,
433 			      struct machine *machine);
434 
435 static int perf_event__repipe_sample(const struct perf_tool *tool,
436 				     union perf_event *event,
437 				     struct perf_sample *sample,
438 				     struct machine *machine)
439 {
440 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
441 	struct evsel *evsel = sample->evsel;
442 
443 	if (evsel == NULL)
444 		return perf_event__repipe_synth(tool, event);
445 
446 	if (evsel->handler) {
447 		inject_handler f = evsel->handler;
448 		return f(tool, event, sample, machine);
449 	}
450 
451 	build_id__mark_dso_hit(tool, event, sample, machine);
452 
453 	if (inject->itrace_synth_opts.set &&
454 	    (inject->itrace_synth_opts.last_branch ||
455 	     inject->itrace_synth_opts.add_last_branch)) {
456 		union perf_event *event_copy = (void *)inject->event_copy;
457 		struct branch_stack dummy_bs = { .nr = 0, .hw_idx = 0 };
458 		int err;
459 		size_t sz;
460 		u64 orig_type = evsel->core.attr.sample_type;
461 		u64 orig_branch_type = evsel->core.attr.branch_sample_type;
462 
463 		struct branch_stack *orig_bs = sample->branch_stack;
464 
465 		if (event_copy == NULL) {
466 			inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
467 			if (!inject->event_copy)
468 				return -ENOMEM;
469 
470 			event_copy = (void *)inject->event_copy;
471 		}
472 
473 		if (!sample->branch_stack)
474 			sample->branch_stack = &dummy_bs;
475 
476 		if (inject->itrace_synth_opts.add_last_branch) {
477 			/* Temporarily add in type bits for synthesis. */
478 			evsel->core.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
479 			evsel->core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
480 		}
481 		evsel->core.attr.sample_type &= ~PERF_SAMPLE_AUX;
482 
483 		sz = perf_event__sample_event_size(sample, evsel->core.attr.sample_type,
484 						   evsel->core.attr.read_format,
485 						   evsel->core.attr.branch_sample_type);
486 
487 		if (sz >= PERF_SAMPLE_MAX_SIZE) {
488 			pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE);
489 			evsel->core.attr.sample_type = orig_type;
490 			evsel->core.attr.branch_sample_type = orig_branch_type;
491 			sample->branch_stack = orig_bs;
492 			return -EFAULT;
493 		}
494 
495 		event_copy->header.type = PERF_RECORD_SAMPLE;
496 		event_copy->header.misc = event->header.misc;
497 		event_copy->header.size = sz;
498 
499 		err = perf_event__synthesize_sample(event_copy, evsel->core.attr.sample_type,
500 						    evsel->core.attr.read_format,
501 						    evsel->core.attr.branch_sample_type, sample);
502 
503 		evsel->core.attr.sample_type = orig_type;
504 		evsel->core.attr.branch_sample_type = orig_branch_type;
505 		sample->branch_stack = orig_bs;
506 
507 		if (err) {
508 			pr_err("Failed to synthesize sample\n");
509 			return err;
510 		}
511 		event = event_copy;
512 	} else if (inject->itrace_synth_opts.set &&
513 		   (evsel->core.attr.sample_type & PERF_SAMPLE_AUX)) {
514 		event = perf_inject__cut_auxtrace_sample(inject, event, sample);
515 		if (IS_ERR(event))
516 			return PTR_ERR(event);
517 	}
518 
519 	return perf_event__repipe_synth(tool, event);
520 }
521 
522 static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
523 						union perf_event *event,
524 						struct perf_sample *sample,
525 						struct machine *machine)
526 {
527 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
528 	struct evsel *evsel = sample->evsel;
529 	struct callchain_cursor *cursor = get_tls_callchain_cursor();
530 	union perf_event *event_copy = (void *)inject->event_copy;
531 	struct callchain_cursor_node *node;
532 	struct thread *thread;
533 	u64 sample_type = evsel->core.attr.sample_type;
534 	size_t sz;
535 	u64 i, k;
536 	int ret;
537 
538 	if (event_copy == NULL) {
539 		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
540 		if (!inject->event_copy)
541 			return -ENOMEM;
542 
543 		event_copy = (void *)inject->event_copy;
544 	}
545 
546 	if (cursor == NULL)
547 		return -ENOMEM;
548 
549 	callchain_cursor_reset(cursor);
550 
551 	thread = machine__find_thread(machine, sample->tid, sample->pid);
552 	if (thread == NULL)
553 		goto out;
554 
555 	/* this will parse DWARF using stack and register data */
556 	ret = thread__resolve_callchain(thread, cursor, sample,
557 					/*parent=*/NULL, /*root_al=*/NULL,
558 					PERF_MAX_STACK_DEPTH);
559 	thread__put(thread);
560 	if (ret != 0)
561 		goto out;
562 
563 	/* copy kernel callchain and context entries */
564 	for (i = 0; i < sample->callchain->nr; i++) {
565 		inject->raw_callchain->ips[i] = sample->callchain->ips[i];
566 		if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
567 			i++;
568 			break;
569 		}
570 	}
571 	if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
572 		inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
573 
574 	node = cursor->first;
575 	for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
576 		if (!(machine->single_address_space &&
577 		      machine__kernel_ip(machine, node->ip)) &&
578 		    !(node->ms.sym && symbol__inlined(node->ms.sym))) {
579 			inject->raw_callchain->ips[i++] = node->ip;
580 		}
581 
582 		node = node->next;
583 	}
584 
585 	inject->raw_callchain->nr = i;
586 	sample->callchain = inject->raw_callchain;
587 
588 out:
589 	memcpy(event_copy, event, sizeof(event->header));
590 
591 	/* remove sample_type {STACK,REGS}_USER for synthesize */
592 	sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
593 
594 	sz = perf_event__sample_event_size(sample, sample_type,
595 					   evsel->core.attr.read_format,
596 					   evsel->core.attr.branch_sample_type);
597 	if (sz >= PERF_SAMPLE_MAX_SIZE) {
598 		pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE);
599 		return -EFAULT;
600 	}
601 	event_copy->header.size = sz;
602 
603 	ret = perf_event__synthesize_sample(event_copy, sample_type,
604 					    evsel->core.attr.read_format,
605 					    evsel->core.attr.branch_sample_type, sample);
606 	if (ret) {
607 		pr_err("Failed to synthesize sample\n");
608 		return ret;
609 	}
610 	return perf_event__repipe_synth(tool, event_copy);
611 }
612 
613 static struct dso *findnew_dso(int pid, int tid, const char *filename,
614 			       const struct dso_id *id, struct machine *machine)
615 {
616 	struct thread *thread;
617 	struct nsinfo *nsi = NULL;
618 	struct nsinfo *nnsi;
619 	struct dso *dso;
620 	bool vdso;
621 
622 	thread = machine__findnew_thread(machine, pid, tid);
623 	if (thread == NULL) {
624 		pr_err("cannot find or create a task %d/%d.\n", tid, pid);
625 		return NULL;
626 	}
627 
628 	vdso = is_vdso_map(filename);
629 	nsi = nsinfo__get(thread__nsinfo(thread));
630 
631 	if (vdso) {
632 		/* The vdso maps are always on the host and not the
633 		 * container.  Ensure that we don't use setns to look
634 		 * them up.
635 		 */
636 		nnsi = nsinfo__copy(nsi);
637 		if (nnsi) {
638 			nsinfo__put(nsi);
639 			nsinfo__clear_need_setns(nnsi);
640 			nsi = nnsi;
641 		}
642 		dso = machine__findnew_vdso(machine, thread);
643 	} else {
644 		dso = machine__findnew_dso_id(machine, filename, id);
645 	}
646 
647 	if (dso) {
648 		mutex_lock(dso__lock(dso));
649 		dso__set_nsinfo(dso, nsi);
650 		mutex_unlock(dso__lock(dso));
651 	} else
652 		nsinfo__put(nsi);
653 
654 	thread__put(thread);
655 	return dso;
656 }
657 
658 /*
659  * The evsel used for the sample ID for mmap events. Typically stashed when
660  * processing mmap events. If not stashed, search the evlist for the first mmap
661  * gathering event.
662  */
663 static struct evsel *inject__mmap_evsel(struct perf_inject *inject)
664 {
665 	struct evsel *pos;
666 
667 	if (inject->mmap_evsel)
668 		return inject->mmap_evsel;
669 
670 	evlist__for_each_entry(inject->session->evlist, pos) {
671 		if (pos->core.attr.mmap) {
672 			inject->mmap_evsel = pos;
673 			return pos;
674 		}
675 	}
676 	pr_err("No mmap events found\n");
677 	return NULL;
678 }
679 
680 static int perf_event__repipe_common_mmap(const struct perf_tool *tool,
681 					  union perf_event *event,
682 					  struct perf_sample *sample,
683 					  struct machine *machine,
684 					  __u32 pid, __u32 tid,
685 					  __u64 start, __u64 len, __u64 pgoff,
686 					  __u32 flags, __u32 prot,
687 					  const char *filename,
688 					  const struct dso_id *dso_id,
689 					  int (*perf_event_process)(const struct perf_tool *tool,
690 								    union perf_event *event,
691 								    struct perf_sample *sample,
692 								    struct machine *machine))
693 {
694 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
695 	struct dso *dso = NULL;
696 	bool dso_sought = false;
697 
698 #ifdef HAVE_JITDUMP
699 	if (inject->jit_mode) {
700 		u64 n = 0;
701 		int ret;
702 
703 		/* If jit marker, then inject jit mmaps and generate ELF images. */
704 		ret = jit_process(inject->session, &inject->output, machine,
705 				  filename, pid, tid, &n);
706 		if (ret < 0)
707 			return ret;
708 		if (ret) {
709 			inject->bytes_written += n;
710 			return 0;
711 		}
712 	}
713 #endif
714 	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
715 		dso = findnew_dso(pid, tid, filename, dso_id, machine);
716 		dso_sought = true;
717 		if (dso) {
718 			/* mark it not to inject build-id */
719 			dso__set_hit(dso);
720 		}
721 	}
722 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) {
723 		if (!dso_sought) {
724 			dso = findnew_dso(pid, tid, filename, dso_id, machine);
725 			dso_sought = true;
726 		}
727 
728 		if (dso && !dso__hit(dso)) {
729 			if (!sample->evsel)
730 				sample->evsel = evlist__event2evsel(inject->session->evlist, event);
731 
732 			if (sample->evsel) {
733 				dso__set_hit(dso);
734 				tool__inject_build_id(tool, sample, machine,
735 						      /*misc=*/sample->cpumode,
736 						      filename, dso, flags);
737 			}
738 		}
739 	} else {
740 		int err;
741 
742 		/*
743 		 * Remember the evsel for lazy build id generation. It is used
744 		 * for the sample id header type.
745 		 */
746 		if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
747 		     inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) &&
748 		    !inject->mmap_evsel)
749 			inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event);
750 
751 		/* Create the thread, map, etc. Not done for the unordered inject all case. */
752 		err = perf_event_process(tool, event, sample, machine);
753 
754 		if (err) {
755 			dso__put(dso);
756 			return err;
757 		}
758 	}
759 	if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) &&
760 	    !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
761 		struct evsel *saved_evsel = sample->evsel;
762 
763 		sample->evsel = evlist__event2evsel(inject->session->evlist, event);
764 		if (sample->evsel && !dso_sought) {
765 			dso = findnew_dso(pid, tid, filename, dso_id, machine);
766 			dso_sought = true;
767 		}
768 		if (sample->evsel && dso &&
769 		    !tool__inject_mmap2_build_id(tool, sample, machine,
770 						 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID,
771 						 pid, tid, start, len, pgoff,
772 						 dso,
773 						 prot, flags,
774 						 filename)) {
775 			/* Injected mmap2 so no need to repipe. */
776 			sample->evsel = saved_evsel;
777 			dso__put(dso);
778 			return 0;
779 		}
780 		sample->evsel = saved_evsel;
781 	}
782 	dso__put(dso);
783 	if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY)
784 		return 0;
785 
786 	return perf_event__repipe(tool, event, sample, machine);
787 }
788 
789 static int perf_event__repipe_mmap(const struct perf_tool *tool,
790 				union perf_event *event,
791 				struct perf_sample *sample,
792 				struct machine *machine)
793 {
794 	return perf_event__repipe_common_mmap(
795 		tool, event, sample, machine,
796 		event->mmap.pid, event->mmap.tid,
797 		event->mmap.start, event->mmap.len, event->mmap.pgoff,
798 		/*flags=*/0, PROT_EXEC,
799 		event->mmap.filename, /*dso_id=*/NULL,
800 		perf_event__process_mmap);
801 }
802 
803 static int perf_event__repipe_mmap2(const struct perf_tool *tool,
804 				union perf_event *event,
805 				struct perf_sample *sample,
806 				struct machine *machine)
807 {
808 	struct dso_id id = dso_id_empty;
809 
810 	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
811 		build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size);
812 	} else {
813 		id.maj = event->mmap2.maj;
814 		id.min = event->mmap2.min;
815 		id.ino = event->mmap2.ino;
816 		id.ino_generation = event->mmap2.ino_generation;
817 		id.mmap2_valid = true;
818 		id.mmap2_ino_generation_valid = true;
819 	}
820 
821 	return perf_event__repipe_common_mmap(
822 		tool, event, sample, machine,
823 		event->mmap2.pid, event->mmap2.tid,
824 		event->mmap2.start, event->mmap2.len, event->mmap2.pgoff,
825 		event->mmap2.flags, event->mmap2.prot,
826 		event->mmap2.filename, &id,
827 		perf_event__process_mmap2);
828 }
829 
830 static int perf_event__repipe_fork(const struct perf_tool *tool,
831 				   union perf_event *event,
832 				   struct perf_sample *sample,
833 				   struct machine *machine)
834 {
835 	int err;
836 
837 	err = perf_event__process_fork(tool, event, sample, machine);
838 	perf_event__repipe(tool, event, sample, machine);
839 
840 	return err;
841 }
842 
843 static int perf_event__repipe_comm(const struct perf_tool *tool,
844 				   union perf_event *event,
845 				   struct perf_sample *sample,
846 				   struct machine *machine)
847 {
848 	int err;
849 
850 	err = perf_event__process_comm(tool, event, sample, machine);
851 	perf_event__repipe(tool, event, sample, machine);
852 
853 	return err;
854 }
855 
856 static int perf_event__repipe_namespaces(const struct perf_tool *tool,
857 					 union perf_event *event,
858 					 struct perf_sample *sample,
859 					 struct machine *machine)
860 {
861 	int err = perf_event__process_namespaces(tool, event, sample, machine);
862 
863 	perf_event__repipe(tool, event, sample, machine);
864 
865 	return err;
866 }
867 
868 static int perf_event__repipe_exit(const struct perf_tool *tool,
869 				   union perf_event *event,
870 				   struct perf_sample *sample,
871 				   struct machine *machine)
872 {
873 	int err;
874 
875 	err = perf_event__process_exit(tool, event, sample, machine);
876 	perf_event__repipe(tool, event, sample, machine);
877 
878 	return err;
879 }
880 
881 #ifdef HAVE_LIBTRACEEVENT
882 static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
883 					   struct perf_session *session,
884 					   union perf_event *event)
885 {
886 	perf_event__repipe_synth(tool, event);
887 
888 	return perf_event__process_tracing_data(tool, session, event);
889 }
890 #endif
891 
892 static int dso__read_build_id(struct dso *dso)
893 {
894 	struct nscookie nsc;
895 	struct build_id bid = { .size = 0, };
896 
897 	if (dso__has_build_id(dso))
898 		return 0;
899 
900 	mutex_lock(dso__lock(dso));
901 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
902 	if (filename__read_build_id(dso__long_name(dso), &bid) > 0)
903 		dso__set_build_id(dso, &bid);
904 	else if (dso__nsinfo(dso)) {
905 		char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
906 
907 		if (new_name && filename__read_build_id(new_name, &bid) > 0)
908 			dso__set_build_id(dso, &bid);
909 		free(new_name);
910 	}
911 	nsinfo__mountns_exit(&nsc);
912 	mutex_unlock(dso__lock(dso));
913 
914 	return dso__has_build_id(dso) ? 0 : -1;
915 }
916 
917 static struct strlist *perf_inject__parse_known_build_ids(
918 	const char *known_build_ids_string)
919 {
920 	struct str_node *pos, *tmp;
921 	struct strlist *known_build_ids;
922 	int bid_len;
923 
924 	known_build_ids = strlist__new(known_build_ids_string, NULL);
925 	if (known_build_ids == NULL)
926 		return NULL;
927 	strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
928 		const char *build_id, *dso_name;
929 
930 		build_id = skip_spaces(pos->s);
931 		dso_name = strchr(build_id, ' ');
932 		if (dso_name == NULL) {
933 			strlist__remove(known_build_ids, pos);
934 			continue;
935 		}
936 		bid_len = dso_name - pos->s;
937 		dso_name = skip_spaces(dso_name);
938 		if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
939 			strlist__remove(known_build_ids, pos);
940 			continue;
941 		}
942 		for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
943 			if (!isxdigit(build_id[2 * ix]) ||
944 			    !isxdigit(build_id[2 * ix + 1])) {
945 				strlist__remove(known_build_ids, pos);
946 				break;
947 			}
948 		}
949 	}
950 	return known_build_ids;
951 }
952 
953 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
954 					       struct dso *dso)
955 {
956 	struct str_node *pos;
957 
958 	strlist__for_each_entry(pos, inject->known_build_ids) {
959 		struct build_id bid;
960 		const char *build_id, *dso_name;
961 		size_t bid_len;
962 
963 		build_id = skip_spaces(pos->s);
964 		dso_name = strchr(build_id, ' ');
965 		bid_len = dso_name - pos->s;
966 		if (bid_len > sizeof(bid.data))
967 			bid_len = sizeof(bid.data);
968 		dso_name = skip_spaces(dso_name);
969 		if (strcmp(dso__long_name(dso), dso_name))
970 			continue;
971 		for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) {
972 			bid.data[ix] = (hex(build_id[2 * ix]) << 4 |
973 					hex(build_id[2 * ix + 1]));
974 		}
975 		bid.size = bid_len / 2;
976 		dso__set_build_id(dso, &bid);
977 		return true;
978 	}
979 	return false;
980 }
981 
982 static int tool__inject_build_id(const struct perf_tool *tool,
983 				 struct perf_sample *sample,
984 				 struct machine *machine,
985 				 __u16 misc,
986 				 const char *filename,
987 				 struct dso *dso, u32 flags)
988 {
989 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
990 	int err;
991 
992 	if (is_anon_memory(filename) || flags & MAP_HUGETLB)
993 		return 0;
994 	if (is_no_dso_memory(filename))
995 		return 0;
996 
997 	if (inject->known_build_ids != NULL &&
998 	    perf_inject__lookup_known_build_id(inject, dso))
999 		return 1;
1000 
1001 	if (dso__read_build_id(dso) < 0) {
1002 		pr_debug("no build_id found for %s\n", filename);
1003 		return -1;
1004 	}
1005 
1006 	err = perf_event__synthesize_build_id(tool, sample, machine,
1007 					      perf_event__repipe,
1008 					      misc, dso__bid(dso),
1009 					      filename);
1010 	if (err) {
1011 		pr_err("Can't synthesize build_id event for %s\n", filename);
1012 		return -1;
1013 	}
1014 
1015 	return 0;
1016 }
1017 
1018 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
1019 				       struct perf_sample *sample,
1020 				       struct machine *machine,
1021 				       __u16 misc,
1022 				       __u32 pid, __u32 tid,
1023 				       __u64 start, __u64 len, __u64 pgoff,
1024 				       struct dso *dso,
1025 				       __u32 prot, __u32 flags,
1026 				       const char *filename)
1027 {
1028 	int err;
1029 
1030 	/* Return to repipe anonymous maps. */
1031 	if (is_anon_memory(filename) || flags & MAP_HUGETLB)
1032 		return 1;
1033 	if (is_no_dso_memory(filename))
1034 		return 1;
1035 
1036 	if (dso__read_build_id(dso)) {
1037 		pr_debug("no build_id found for %s\n", filename);
1038 		return -1;
1039 	}
1040 
1041 	err = perf_event__synthesize_mmap2_build_id(tool, sample, machine,
1042 						    perf_event__repipe,
1043 						    misc, pid, tid,
1044 						    start, len, pgoff,
1045 						    dso__bid(dso),
1046 						    prot, flags,
1047 						    filename);
1048 	if (err) {
1049 		pr_err("Can't synthesize build_id event for %s\n", filename);
1050 		return -1;
1051 	}
1052 	return 0;
1053 }
1054 
1055 static int mark_dso_hit(const struct perf_inject *inject,
1056 			const struct perf_tool *tool,
1057 			struct perf_sample *sample,
1058 			struct machine *machine,
1059 			struct evsel *mmap_evsel,
1060 			struct map *map, bool sample_in_dso)
1061 {
1062 	struct dso *dso;
1063 	u16 misc = sample->cpumode;
1064 
1065 	if (!map)
1066 		return 0;
1067 
1068 	if (!sample_in_dso) {
1069 		u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL |
1070 			PERF_RECORD_MISC_GUEST_USER;
1071 
1072 		if ((misc & guest_mask) != 0) {
1073 			misc &= PERF_RECORD_MISC_HYPERVISOR;
1074 			misc |= __map__is_kernel(map)
1075 				? PERF_RECORD_MISC_GUEST_KERNEL
1076 				: PERF_RECORD_MISC_GUEST_USER;
1077 		} else {
1078 			misc &= PERF_RECORD_MISC_HYPERVISOR;
1079 			misc |= __map__is_kernel(map)
1080 				? PERF_RECORD_MISC_KERNEL
1081 				: PERF_RECORD_MISC_USER;
1082 		}
1083 	}
1084 	dso = map__dso(map);
1085 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
1086 		if (dso && !dso__hit(dso)) {
1087 			/*
1088 			 * The sample is just read for identifiers which we want
1089 			 * to match the for the event of the sample.
1090 			 */
1091 			dso__set_hit(dso);
1092 			tool__inject_build_id(tool, sample, machine,
1093 					     misc, dso__long_name(dso), dso,
1094 					     map__flags(map));
1095 		}
1096 	} else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
1097 		if (!map__hit(map)) {
1098 			const struct build_id null_bid = { .size = 0 };
1099 			const struct build_id *bid = dso ? dso__bid(dso) : &null_bid;
1100 			const char *filename = dso ? dso__long_name(dso) : "";
1101 			struct evsel *saved_evsel = sample->evsel;
1102 
1103 			map__set_hit(map);
1104 			/* Creating a new mmap2 event which has an evsel for the mmap event. */
1105 			sample->evsel = mmap_evsel;
1106 			perf_event__synthesize_mmap2_build_id(tool, sample, machine,
1107 								perf_event__repipe,
1108 								misc,
1109 								sample->pid, sample->tid,
1110 								map__start(map),
1111 								map__end(map) - map__start(map),
1112 								map__pgoff(map),
1113 								bid,
1114 								map__prot(map),
1115 								map__flags(map),
1116 								filename);
1117 			sample->evsel = saved_evsel;
1118 		}
1119 	}
1120 	return 0;
1121 }
1122 
1123 struct mark_dso_hit_args {
1124 	const struct perf_inject *inject;
1125 	const struct perf_tool *tool;
1126 	struct perf_sample *sample;
1127 	struct machine *machine;
1128 	struct evsel *mmap_evsel;
1129 };
1130 
1131 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data)
1132 {
1133 	struct mark_dso_hit_args *args = data;
1134 	struct map *map = node->ms.map;
1135 
1136 	return mark_dso_hit(args->inject, args->tool, args->sample, args->machine,
1137 			    args->mmap_evsel, map, /*sample_in_dso=*/false);
1138 }
1139 
1140 static int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
1141 				      struct perf_sample *sample, struct machine *machine)
1142 {
1143 	struct addr_location al;
1144 	struct thread *thread;
1145 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1146 	struct mark_dso_hit_args args = {
1147 		.inject = inject,
1148 		.tool = tool,
1149 		/*
1150 		 * Use the parsed sample data of the sample event, which will
1151 		 * have a later timestamp than the mmap event.
1152 		 */
1153 		.sample = sample,
1154 		.machine = machine,
1155 		.mmap_evsel = inject__mmap_evsel(inject),
1156 	};
1157 
1158 	addr_location__init(&al);
1159 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
1160 	if (thread == NULL) {
1161 		pr_err("problem processing %d event, skipping it.\n",
1162 		       event->header.type);
1163 		goto repipe;
1164 	}
1165 
1166 	if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
1167 		mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map,
1168 			     /*sample_in_dso=*/true);
1169 	}
1170 
1171 	sample__for_each_callchain_node(thread, sample, PERF_MAX_STACK_DEPTH,
1172 					/*symbols=*/false, mark_dso_hit_callback, &args);
1173 	thread__put(thread);
1174 repipe:
1175 	perf_event__repipe(tool, event, sample, machine);
1176 	addr_location__exit(&al);
1177 	return 0;
1178 }
1179 
1180 static int perf_inject__sched_process_exit(const struct perf_tool *tool,
1181 					   union perf_event *event __maybe_unused,
1182 					   struct perf_sample *sample,
1183 					   struct machine *machine __maybe_unused)
1184 {
1185 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1186 	struct event_entry *ent;
1187 
1188 	list_for_each_entry(ent, &inject->samples, node) {
1189 		if (sample->tid == ent->tid) {
1190 			list_del_init(&ent->node);
1191 			free(ent);
1192 			break;
1193 		}
1194 	}
1195 
1196 	return 0;
1197 }
1198 
1199 static int perf_inject__sched_switch(const struct perf_tool *tool,
1200 				     union perf_event *event,
1201 				     struct perf_sample *sample,
1202 				     struct machine *machine)
1203 {
1204 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1205 	struct event_entry *ent;
1206 
1207 	perf_inject__sched_process_exit(tool, event, sample, machine);
1208 
1209 	ent = malloc(event->header.size + sizeof(struct event_entry));
1210 	if (ent == NULL) {
1211 		color_fprintf(stderr, PERF_COLOR_RED,
1212 			     "Not enough memory to process sched switch event!");
1213 		return -1;
1214 	}
1215 
1216 	ent->tid = sample->tid;
1217 	memcpy(&ent->event, event, event->header.size);
1218 	list_add(&ent->node, &inject->samples);
1219 	return 0;
1220 }
1221 
1222 #ifdef HAVE_LIBTRACEEVENT
1223 static int perf_inject__sched_stat(const struct perf_tool *tool,
1224 				   union perf_event *event __maybe_unused,
1225 				   struct perf_sample *sample,
1226 				   struct machine *machine)
1227 {
1228 	struct event_entry *ent;
1229 	union perf_event *event_sw;
1230 	struct perf_sample sample_sw;
1231 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1232 	struct evsel *evsel = sample->evsel;
1233 	u32 pid = perf_sample__intval(sample, "pid");
1234 	int ret;
1235 
1236 	list_for_each_entry(ent, &inject->samples, node) {
1237 		if (pid == ent->tid)
1238 			goto found;
1239 	}
1240 
1241 	return 0;
1242 found:
1243 	event_sw = &ent->event[0];
1244 	evsel__parse_sample(evsel, event_sw, &sample_sw);
1245 
1246 	sample_sw.period = sample->period;
1247 	sample_sw.time	 = sample->time;
1248 	perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
1249 				      evsel->core.attr.read_format,
1250 				      evsel->core.attr.branch_sample_type, &sample_sw);
1251 	build_id__mark_dso_hit(tool, event_sw, &sample_sw, machine);
1252 	ret = perf_event__repipe(tool, event_sw, &sample_sw, machine);
1253 	perf_sample__exit(&sample_sw);
1254 	return ret;
1255 }
1256 #endif
1257 
1258 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
1259 {
1260 	if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
1261 		return NULL;
1262 	return &gs->vcpu[vcpu];
1263 }
1264 
1265 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
1266 {
1267 	ssize_t ret = writen(gs->tmp_fd, buf, sz);
1268 
1269 	return ret < 0 ? ret : 0;
1270 }
1271 
1272 static int guest_session__repipe(const struct perf_tool *tool,
1273 				 union perf_event *event,
1274 				 struct perf_sample *sample __maybe_unused,
1275 				 struct machine *machine __maybe_unused)
1276 {
1277 	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1278 
1279 	return guest_session__output_bytes(gs, event, event->header.size);
1280 }
1281 
1282 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
1283 {
1284 	struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
1285 	int hash;
1286 
1287 	if (!guest_tid)
1288 		return -ENOMEM;
1289 
1290 	guest_tid->tid = tid;
1291 	guest_tid->vcpu = vcpu;
1292 	hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
1293 	hlist_add_head(&guest_tid->node, &gs->tids[hash]);
1294 
1295 	return 0;
1296 }
1297 
1298 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
1299 				 union perf_event *event,
1300 				 u64 offset __maybe_unused, void *data)
1301 {
1302 	struct guest_session *gs = data;
1303 	unsigned int vcpu;
1304 	struct guest_vcpu *guest_vcpu;
1305 	int ret;
1306 
1307 	if (event->header.type != PERF_RECORD_COMM ||
1308 	    event->comm.pid != gs->machine_pid)
1309 		return 0;
1310 
1311 	/*
1312 	 * QEMU option -name debug-threads=on, causes thread names formatted as
1313 	 * below, although it is not an ABI. Also libvirt seems to use this by
1314 	 * default. Here we rely on it to tell us which thread is which VCPU.
1315 	 */
1316 	ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
1317 	if (ret <= 0)
1318 		return ret;
1319 	pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
1320 		 event->comm.tid, event->comm.comm, vcpu);
1321 	if (vcpu > INT_MAX) {
1322 		pr_err("Invalid VCPU %u\n", vcpu);
1323 		return -EINVAL;
1324 	}
1325 	guest_vcpu = guest_session__vcpu(gs, vcpu);
1326 	if (!guest_vcpu)
1327 		return -ENOMEM;
1328 	if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
1329 		pr_err("Fatal error: Two threads found with the same VCPU\n");
1330 		return -EINVAL;
1331 	}
1332 	guest_vcpu->tid = event->comm.tid;
1333 
1334 	return guest_session__map_tid(gs, event->comm.tid, vcpu);
1335 }
1336 
1337 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
1338 {
1339 	return perf_session__peek_events(session, session->header.data_offset,
1340 					 session->header.data_size,
1341 					 host_peek_vm_comms_cb, gs);
1342 }
1343 
1344 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
1345 {
1346 	return evlist__id2sid(evlist, id);
1347 }
1348 
1349 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
1350 {
1351 	do {
1352 		gs->highest_id += 1;
1353 	} while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
1354 
1355 	return gs->highest_id;
1356 }
1357 
1358 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
1359 {
1360 	struct guest_id *guest_id = zalloc(sizeof(*guest_id));
1361 	int hash;
1362 
1363 	if (!guest_id)
1364 		return -ENOMEM;
1365 
1366 	guest_id->id = id;
1367 	guest_id->host_id = host_id;
1368 	guest_id->vcpu = vcpu;
1369 	hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
1370 	hlist_add_head(&guest_id->node, &gs->heads[hash]);
1371 
1372 	return 0;
1373 }
1374 
1375 static u64 evlist__find_highest_id(struct evlist *evlist)
1376 {
1377 	struct evsel *evsel;
1378 	u64 highest_id = 1;
1379 
1380 	evlist__for_each_entry(evlist, evsel) {
1381 		u32 j;
1382 
1383 		for (j = 0; j < evsel->core.ids; j++) {
1384 			u64 id = evsel->core.id[j];
1385 
1386 			if (id > highest_id)
1387 				highest_id = id;
1388 		}
1389 	}
1390 
1391 	return highest_id;
1392 }
1393 
1394 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
1395 {
1396 	struct evlist *evlist = gs->session->evlist;
1397 	struct evsel *evsel;
1398 	int ret;
1399 
1400 	evlist__for_each_entry(evlist, evsel) {
1401 		u32 j;
1402 
1403 		for (j = 0; j < evsel->core.ids; j++) {
1404 			struct perf_sample_id *sid;
1405 			u64 host_id;
1406 			u64 id;
1407 
1408 			id = evsel->core.id[j];
1409 			sid = evlist__id2sid(evlist, id);
1410 			if (!sid || sid->cpu.cpu == -1)
1411 				continue;
1412 			host_id = guest_session__allocate_new_id(gs, host_evlist);
1413 			ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1414 			if (ret)
1415 				return ret;
1416 		}
1417 	}
1418 
1419 	return 0;
1420 }
1421 
1422 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1423 {
1424 	struct hlist_head *head;
1425 	struct guest_id *guest_id;
1426 	int hash;
1427 
1428 	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1429 	head = &gs->heads[hash];
1430 
1431 	hlist_for_each_entry(guest_id, head, node)
1432 		if (guest_id->id == id)
1433 			return guest_id;
1434 
1435 	return NULL;
1436 }
1437 
1438 static int process_attr(const struct perf_tool *tool, union perf_event *event,
1439 			struct perf_sample *sample __maybe_unused,
1440 			struct machine *machine __maybe_unused)
1441 {
1442 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1443 
1444 	return perf_event__process_attr(tool, event, &inject->session->evlist);
1445 }
1446 
1447 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1448 {
1449 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1450 	struct perf_event_attr attr = evsel->core.attr;
1451 	u64 *id_array;
1452 	u32 *vcpu_array;
1453 	int ret = -ENOMEM;
1454 	u32 i;
1455 
1456 	id_array = calloc(evsel->core.ids, sizeof(*id_array));
1457 	if (!id_array)
1458 		return -ENOMEM;
1459 
1460 	vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1461 	if (!vcpu_array)
1462 		goto out;
1463 
1464 	for (i = 0; i < evsel->core.ids; i++) {
1465 		u64 id = evsel->core.id[i];
1466 		struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1467 
1468 		if (!guest_id) {
1469 			pr_err("Failed to find guest id %"PRIu64"\n", id);
1470 			ret = -EINVAL;
1471 			goto out;
1472 		}
1473 		id_array[i] = guest_id->host_id;
1474 		vcpu_array[i] = guest_id->vcpu;
1475 	}
1476 
1477 	attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1478 	attr.exclude_host = 1;
1479 	attr.exclude_guest = 0;
1480 
1481 	ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1482 					  id_array, process_attr);
1483 	if (ret)
1484 		pr_err("Failed to add guest attr.\n");
1485 
1486 	for (i = 0; i < evsel->core.ids; i++) {
1487 		struct perf_sample_id *sid;
1488 		u32 vcpu = vcpu_array[i];
1489 
1490 		sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1491 		/* Guest event is per-thread from the host point of view */
1492 		sid->cpu.cpu = -1;
1493 		sid->tid = gs->vcpu[vcpu].tid;
1494 		sid->machine_pid = gs->machine_pid;
1495 		sid->vcpu.cpu = vcpu;
1496 	}
1497 out:
1498 	free(vcpu_array);
1499 	free(id_array);
1500 	return ret;
1501 }
1502 
1503 static int guest_session__add_attrs(struct guest_session *gs)
1504 {
1505 	struct evlist *evlist = gs->session->evlist;
1506 	struct evsel *evsel;
1507 	int ret;
1508 
1509 	evlist__for_each_entry(evlist, evsel) {
1510 		ret = guest_session__add_attr(gs, evsel);
1511 		if (ret)
1512 			return ret;
1513 	}
1514 
1515 	return 0;
1516 }
1517 
1518 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1519 {
1520 	struct perf_session *session = inject->session;
1521 	struct evlist *evlist = session->evlist;
1522 	struct machine *machine = &session->machines.host;
1523 	size_t from = evlist->core.nr_entries - new_cnt;
1524 
1525 	return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1526 						 evlist, machine, from);
1527 }
1528 
1529 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1530 {
1531 	struct hlist_head *head;
1532 	struct guest_tid *guest_tid;
1533 	int hash;
1534 
1535 	hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1536 	head = &gs->tids[hash];
1537 
1538 	hlist_for_each_entry(guest_tid, head, node)
1539 		if (guest_tid->tid == tid)
1540 			return guest_tid;
1541 
1542 	return NULL;
1543 }
1544 
1545 static bool dso__is_in_kernel_space(struct dso *dso)
1546 {
1547 	if (dso__is_vdso(dso))
1548 		return false;
1549 
1550 	return dso__is_kcore(dso) ||
1551 	       dso__kernel(dso) ||
1552 	       is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1553 }
1554 
1555 static u64 evlist__first_id(struct evlist *evlist)
1556 {
1557 	struct evsel *evsel;
1558 
1559 	evlist__for_each_entry(evlist, evsel) {
1560 		if (evsel->core.ids)
1561 			return evsel->core.id[0];
1562 	}
1563 	return 0;
1564 }
1565 
1566 static int process_build_id(const struct perf_tool *tool,
1567 			    union perf_event *event,
1568 			    struct perf_sample *sample __maybe_unused,
1569 			    struct machine *machine __maybe_unused)
1570 {
1571 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1572 
1573 	return perf_event__process_build_id(tool, inject->session, event);
1574 }
1575 
1576 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1577 {
1578 	struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1579 	struct perf_sample synth_sample = {
1580 		.evsel	   = inject__mmap_evsel(inject),
1581 		.pid	   = -1,
1582 		.tid	   = -1,
1583 		.time	   = -1,
1584 		.stream_id = -1,
1585 		.cpu	   = -1,
1586 		.period	   = 1,
1587 		.cpumode   = dso__is_in_kernel_space(dso)
1588 		? PERF_RECORD_MISC_GUEST_KERNEL
1589 		: PERF_RECORD_MISC_GUEST_USER,
1590 	};
1591 
1592 	if (!machine)
1593 		return -ENOMEM;
1594 
1595 	dso__set_hit(dso);
1596 
1597 	return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine,
1598 					       process_build_id,
1599 					       /*misc=*/synth_sample.cpumode,
1600 					       dso__bid(dso), dso__long_name(dso));
1601 }
1602 
1603 static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
1604 {
1605 	struct guest_session *gs = data;
1606 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1607 
1608 	if (!dso__has_build_id(dso))
1609 		return 0;
1610 
1611 	return synthesize_build_id(inject, dso, gs->machine_pid);
1612 
1613 }
1614 
1615 static int guest_session__add_build_ids(struct guest_session *gs)
1616 {
1617 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1618 
1619 	/* Build IDs will be put in the Build ID feature section */
1620 	perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1621 
1622 	return dsos__for_each_dso(&gs->session->machines.host.dsos,
1623 				  guest_session__add_build_ids_cb,
1624 				  gs);
1625 }
1626 
1627 static int guest_session__ksymbol_event(const struct perf_tool *tool,
1628 					union perf_event *event,
1629 					struct perf_sample *sample __maybe_unused,
1630 					struct machine *machine __maybe_unused)
1631 {
1632 	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1633 
1634 	/* Only support out-of-line i.e. no BPF support */
1635 	if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1636 		return 0;
1637 
1638 	return guest_session__output_bytes(gs, event, event->header.size);
1639 }
1640 
1641 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1642 {
1643 	char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1644 	struct perf_session *session;
1645 	int ret;
1646 
1647 	/* Only these events will be injected */
1648 	gs->tool.mmap		= guest_session__repipe;
1649 	gs->tool.mmap2		= guest_session__repipe;
1650 	gs->tool.comm		= guest_session__repipe;
1651 	gs->tool.fork		= guest_session__repipe;
1652 	gs->tool.exit		= guest_session__repipe;
1653 	gs->tool.lost		= guest_session__repipe;
1654 	gs->tool.context_switch	= guest_session__repipe;
1655 	gs->tool.ksymbol	= guest_session__ksymbol_event;
1656 	gs->tool.text_poke	= guest_session__repipe;
1657 	/*
1658 	 * Processing a build ID creates a struct dso with that build ID. Later,
1659 	 * all guest dsos are iterated and the build IDs processed into the host
1660 	 * session where they will be output to the Build ID feature section
1661 	 * when the perf.data file header is written.
1662 	 */
1663 	gs->tool.build_id	= perf_event__process_build_id;
1664 	/* Process the id index to know what VCPU an ID belongs to */
1665 	gs->tool.id_index	= perf_event__process_id_index;
1666 
1667 	gs->tool.ordered_events	= true;
1668 	gs->tool.ordering_requires_timestamps = true;
1669 
1670 	gs->data.path	= name;
1671 	gs->data.force	= force;
1672 	gs->data.mode	= PERF_DATA_MODE_READ;
1673 
1674 	session = perf_session__new(&gs->data, &gs->tool);
1675 	if (IS_ERR(session))
1676 		return PTR_ERR(session);
1677 	gs->session = session;
1678 
1679 	/*
1680 	 * Initial events have zero'd ID samples. Get default ID sample size
1681 	 * used for removing them.
1682 	 */
1683 	gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1684 	/* And default ID for adding back a host-compatible ID sample */
1685 	gs->dflt_id = evlist__first_id(session->evlist);
1686 	if (!gs->dflt_id) {
1687 		pr_err("Guest data has no sample IDs");
1688 		return -EINVAL;
1689 	}
1690 
1691 	/* Temporary file for guest events */
1692 	gs->tmp_file_name = strdup(tmp_file_name);
1693 	if (!gs->tmp_file_name)
1694 		return -ENOMEM;
1695 	gs->tmp_fd = mkstemp(gs->tmp_file_name);
1696 	if (gs->tmp_fd < 0)
1697 		return -errno;
1698 
1699 	if (zstd_init(&gs->session->zstd_data, 0) < 0)
1700 		pr_warning("Guest session decompression initialization failed.\n");
1701 
1702 	/*
1703 	 * perf does not support processing 2 sessions simultaneously, so output
1704 	 * guest events to a temporary file.
1705 	 */
1706 	ret = perf_session__process_events(gs->session);
1707 	if (ret)
1708 		return ret;
1709 
1710 	if (lseek(gs->tmp_fd, 0, SEEK_SET))
1711 		return -errno;
1712 
1713 	return 0;
1714 }
1715 
1716 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1717 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1718 {
1719 	struct hlist_node *pos, *n;
1720 	size_t i;
1721 
1722 	for (i = 0; i < hlist_sz; ++i) {
1723 		hlist_for_each_safe(pos, n, &heads[i]) {
1724 			hlist_del(pos);
1725 			free(pos);
1726 		}
1727 	}
1728 }
1729 
1730 static void guest_session__exit(struct guest_session *gs)
1731 {
1732 	if (gs->session) {
1733 		perf_session__delete(gs->session);
1734 		free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1735 		free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1736 	}
1737 	if (gs->tmp_file_name) {
1738 		if (gs->tmp_fd >= 0)
1739 			close(gs->tmp_fd);
1740 		unlink(gs->tmp_file_name);
1741 		zfree(&gs->tmp_file_name);
1742 	}
1743 	zfree(&gs->vcpu);
1744 	zfree(&gs->perf_data_file);
1745 }
1746 
1747 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1748 {
1749 	tc->time_shift		= time_conv->time_shift;
1750 	tc->time_mult		= time_conv->time_mult;
1751 	tc->time_zero		= time_conv->time_zero;
1752 	tc->time_cycles		= time_conv->time_cycles;
1753 	tc->time_mask		= time_conv->time_mask;
1754 	tc->cap_user_time_zero	= time_conv->cap_user_time_zero;
1755 	tc->cap_user_time_short	= time_conv->cap_user_time_short;
1756 }
1757 
1758 static void guest_session__get_tc(struct guest_session *gs)
1759 {
1760 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1761 
1762 	get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1763 	get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1764 }
1765 
1766 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1767 {
1768 	u64 tsc;
1769 
1770 	if (!guest_time) {
1771 		*host_time = 0;
1772 		return;
1773 	}
1774 
1775 	if (gs->guest_tc.cap_user_time_zero)
1776 		tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1777 	else
1778 		tsc = guest_time;
1779 
1780 	/*
1781 	 * This is the correct order of operations for x86 if the TSC Offset and
1782 	 * Multiplier values are used.
1783 	 */
1784 	tsc -= gs->time_offset;
1785 	tsc /= gs->time_scale;
1786 
1787 	if (gs->host_tc.cap_user_time_zero)
1788 		*host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1789 	else
1790 		*host_time = tsc;
1791 }
1792 
1793 static int guest_session__fetch(struct guest_session *gs)
1794 {
1795 	void *buf;
1796 	struct perf_event_header *hdr;
1797 	size_t hdr_sz = sizeof(*hdr);
1798 	ssize_t ret;
1799 
1800 	perf_sample__init(&gs->ev.sample, /*all=*/false);
1801 	buf = gs->ev.event_buf;
1802 	if (!buf) {
1803 		buf = malloc(PERF_SAMPLE_MAX_SIZE);
1804 		if (!buf)
1805 			return -ENOMEM;
1806 		gs->ev.event_buf = buf;
1807 	}
1808 	hdr = buf;
1809 	ret = readn(gs->tmp_fd, buf, hdr_sz);
1810 	if (ret < 0)
1811 		return ret;
1812 
1813 	if (!ret) {
1814 		/* Zero size means EOF */
1815 		hdr->size = 0;
1816 		return 0;
1817 	}
1818 
1819 	buf += hdr_sz;
1820 
1821 	ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1822 	if (ret < 0)
1823 		return ret;
1824 
1825 	gs->ev.event = (union perf_event *)gs->ev.event_buf;
1826 	gs->ev.sample.time = 0;
1827 
1828 	if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1829 		pr_err("Unexpected type fetching guest event");
1830 		return 0;
1831 	}
1832 
1833 	ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1834 	if (ret) {
1835 		pr_err("Parse failed fetching guest event");
1836 		return ret;
1837 	}
1838 
1839 	if (!gs->have_tc) {
1840 		guest_session__get_tc(gs);
1841 		gs->have_tc = true;
1842 	}
1843 
1844 	guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1845 
1846 	return 0;
1847 }
1848 
1849 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1850 				    const struct perf_sample *sample)
1851 {
1852 	struct evsel *evsel;
1853 	void *array;
1854 	int ret;
1855 
1856 	evsel = evlist__id2evsel(evlist, sample->id);
1857 	array = ev;
1858 
1859 	if (!evsel) {
1860 		pr_err("No evsel for id %"PRIu64"\n", sample->id);
1861 		return -EINVAL;
1862 	}
1863 
1864 	array += ev->header.size;
1865 	ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1866 	if (ret < 0)
1867 		return ret;
1868 
1869 	if (ret & 7) {
1870 		pr_err("Bad id sample size %d\n", ret);
1871 		return -EINVAL;
1872 	}
1873 
1874 	ev->header.size += ret;
1875 
1876 	return 0;
1877 }
1878 
1879 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1880 {
1881 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1882 	int ret;
1883 
1884 	if (!gs->ready)
1885 		return 0;
1886 
1887 	while (1) {
1888 		struct perf_sample *sample;
1889 		struct guest_id *guest_id;
1890 		union perf_event *ev;
1891 		u16 id_hdr_size;
1892 		u8 cpumode;
1893 		u64 id;
1894 
1895 		if (!gs->fetched) {
1896 			ret = guest_session__fetch(gs);
1897 			if (ret)
1898 				break;
1899 			gs->fetched = true;
1900 		}
1901 
1902 		ev = gs->ev.event;
1903 		sample = &gs->ev.sample;
1904 
1905 		if (!ev->header.size) {
1906 			/* EOF */
1907 			perf_sample__exit(&gs->ev.sample);
1908 			gs->fetched = false;
1909 			ret = 0;
1910 			break;
1911 		}
1912 		if (sample->time > timestamp) {
1913 			ret = 0;
1914 			break;
1915 		}
1916 
1917 		/* Change cpumode to guest */
1918 		cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1919 		if (cpumode & PERF_RECORD_MISC_USER)
1920 			cpumode = PERF_RECORD_MISC_GUEST_USER;
1921 		else
1922 			cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1923 		ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1924 		ev->header.misc |= cpumode;
1925 
1926 		id = sample->id;
1927 		if (!id) {
1928 			id = gs->dflt_id;
1929 			id_hdr_size = gs->dflt_id_hdr_size;
1930 		} else {
1931 			struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1932 
1933 			id_hdr_size = evsel__id_hdr_size(evsel);
1934 		}
1935 
1936 		if (id_hdr_size & 7) {
1937 			pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1938 			ret = -EINVAL;
1939 			break;
1940 		}
1941 
1942 		if (ev->header.size & 7) {
1943 			pr_err("Bad event size %u\n", ev->header.size);
1944 			ret = -EINVAL;
1945 			break;
1946 		}
1947 
1948 		/* Remove guest id sample */
1949 		ev->header.size -= id_hdr_size;
1950 
1951 		if (ev->header.size & 7) {
1952 			pr_err("Bad raw event size %u\n", ev->header.size);
1953 			ret = -EINVAL;
1954 			break;
1955 		}
1956 
1957 		guest_id = guest_session__lookup_id(gs, id);
1958 		if (!guest_id) {
1959 			pr_err("Guest event with unknown id %llu\n",
1960 			       (unsigned long long)id);
1961 			ret = -EINVAL;
1962 			break;
1963 		}
1964 
1965 		/* Change to host ID to avoid conflicting ID values */
1966 		sample->id = guest_id->host_id;
1967 		sample->stream_id = guest_id->host_id;
1968 
1969 		if (sample->cpu != (u32)-1) {
1970 			if (sample->cpu >= gs->vcpu_cnt) {
1971 				pr_err("Guest event with unknown VCPU %u\n",
1972 				       sample->cpu);
1973 				return -EINVAL;
1974 			}
1975 			/* Change to host CPU instead of guest VCPU */
1976 			sample->cpu = gs->vcpu[sample->cpu].cpu;
1977 		}
1978 
1979 		/* New id sample with new ID and CPU */
1980 		ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1981 		if (ret)
1982 			break;
1983 
1984 		if (ev->header.size & 7) {
1985 			pr_err("Bad new event size %u\n", ev->header.size);
1986 			ret = -EINVAL;
1987 			break;
1988 		}
1989 
1990 		ret = output_bytes(inject, ev, ev->header.size);
1991 		if (ret)
1992 			break;
1993 
1994 		/* Reset for next guest session event fetch. */
1995 		perf_sample__exit(sample);
1996 		gs->fetched = false;
1997 	}
1998 	if (ret && gs->fetched) {
1999 		/* Clear saved sample state on error. */
2000 		perf_sample__exit(&gs->ev.sample);
2001 		gs->fetched = false;
2002 	}
2003 	return ret;
2004 }
2005 
2006 static int guest_session__flush_events(struct guest_session *gs)
2007 {
2008 	return guest_session__inject_events(gs, -1);
2009 }
2010 
2011 static int host__repipe(const struct perf_tool *tool,
2012 			union perf_event *event,
2013 			struct perf_sample *sample,
2014 			struct machine *machine)
2015 {
2016 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
2017 	int ret;
2018 
2019 	ret = guest_session__inject_events(&inject->guest_session, sample->time);
2020 	if (ret)
2021 		return ret;
2022 
2023 	return perf_event__repipe(tool, event, sample, machine);
2024 }
2025 
2026 static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
2027 			       union perf_event *event)
2028 {
2029 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
2030 	struct guest_session *gs = &inject->guest_session;
2031 	int ret;
2032 
2033 	/*
2034 	 * Peek through host COMM events to find QEMU threads and the VCPU they
2035 	 * are running.
2036 	 */
2037 	ret = host_peek_vm_comms(session, gs);
2038 	if (ret)
2039 		return ret;
2040 
2041 	if (!gs->vcpu_cnt) {
2042 		pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
2043 		return -EINVAL;
2044 	}
2045 
2046 	/*
2047 	 * Allocate new (unused) host sample IDs and map them to the guest IDs.
2048 	 */
2049 	gs->highest_id = evlist__find_highest_id(session->evlist);
2050 	ret = guest_session__map_ids(gs, session->evlist);
2051 	if (ret)
2052 		return ret;
2053 
2054 	ret = guest_session__add_attrs(gs);
2055 	if (ret)
2056 		return ret;
2057 
2058 	ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
2059 	if (ret) {
2060 		pr_err("Failed to synthesize id_index\n");
2061 		return ret;
2062 	}
2063 
2064 	ret = guest_session__add_build_ids(gs);
2065 	if (ret) {
2066 		pr_err("Failed to add guest build IDs\n");
2067 		return ret;
2068 	}
2069 
2070 	gs->ready = true;
2071 
2072 	ret = guest_session__inject_events(gs, 0);
2073 	if (ret)
2074 		return ret;
2075 
2076 	return perf_event__repipe_op2_synth(tool, session, event);
2077 }
2078 
2079 /*
2080  * Obey finished-round ordering. The FINISHED_ROUND event is first processed
2081  * which flushes host events to file up until the last flush time. Then inject
2082  * guest events up to the same time. Finally write out the FINISHED_ROUND event
2083  * itself.
2084  */
2085 static int host__finished_round(const struct perf_tool *tool,
2086 				union perf_event *event,
2087 				struct ordered_events *oe)
2088 {
2089 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
2090 	int ret = perf_event__process_finished_round(tool, event, oe);
2091 	u64 timestamp = ordered_events__last_flush_time(oe);
2092 
2093 	if (ret)
2094 		return ret;
2095 
2096 	ret = guest_session__inject_events(&inject->guest_session, timestamp);
2097 	if (ret)
2098 		return ret;
2099 
2100 	return perf_event__repipe_oe_synth(tool, event, oe);
2101 }
2102 
2103 static int host__context_switch(const struct perf_tool *tool,
2104 				union perf_event *event,
2105 				struct perf_sample *sample,
2106 				struct machine *machine)
2107 {
2108 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
2109 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2110 	struct guest_session *gs = &inject->guest_session;
2111 	u32 pid = event->context_switch.next_prev_pid;
2112 	u32 tid = event->context_switch.next_prev_tid;
2113 	struct guest_tid *guest_tid;
2114 	u32 vcpu;
2115 
2116 	if (out || pid != gs->machine_pid)
2117 		goto out;
2118 
2119 	guest_tid = guest_session__lookup_tid(gs, tid);
2120 	if (!guest_tid)
2121 		goto out;
2122 
2123 	if (sample->cpu == (u32)-1) {
2124 		pr_err("Switch event does not have CPU\n");
2125 		return -EINVAL;
2126 	}
2127 
2128 	vcpu = guest_tid->vcpu;
2129 	if (vcpu >= gs->vcpu_cnt)
2130 		return -EINVAL;
2131 
2132 	/* Guest is switching in, record which CPU the VCPU is now running on */
2133 	gs->vcpu[vcpu].cpu = sample->cpu;
2134 out:
2135 	return host__repipe(tool, event, sample, machine);
2136 }
2137 
2138 static void sig_handler(int sig __maybe_unused)
2139 {
2140 	session_done = 1;
2141 }
2142 
2143 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
2144 {
2145 	struct perf_event_attr *attr = &evsel->core.attr;
2146 	const char *name = evsel__name(evsel);
2147 
2148 	if (!(attr->sample_type & sample_type)) {
2149 		pr_err("Samples for %s event do not have %s attribute set.",
2150 			name, sample_msg);
2151 		return -EINVAL;
2152 	}
2153 
2154 	return 0;
2155 }
2156 
2157 static int drop_sample(const struct perf_tool *tool __maybe_unused,
2158 		       union perf_event *event __maybe_unused,
2159 		       struct perf_sample *sample __maybe_unused,
2160 		       struct machine *machine __maybe_unused)
2161 {
2162 	return 0;
2163 }
2164 
2165 static void strip_init(struct perf_inject *inject)
2166 {
2167 	struct evlist *evlist = inject->session->evlist;
2168 	struct evsel *evsel;
2169 
2170 	inject->tool.context_switch = perf_event__drop;
2171 
2172 	evlist__for_each_entry(evlist, evsel)
2173 		evsel->handler = drop_sample;
2174 }
2175 
2176 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
2177 {
2178 	struct perf_inject *inject = opt->value;
2179 	const char *args;
2180 	char *dry_run;
2181 
2182 	if (unset)
2183 		return 0;
2184 
2185 	inject->itrace_synth_opts.set = true;
2186 	inject->itrace_synth_opts.vm_time_correlation = true;
2187 	inject->in_place_update = true;
2188 
2189 	if (!str)
2190 		return 0;
2191 
2192 	dry_run = skip_spaces(str);
2193 	if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
2194 		inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
2195 		inject->in_place_update_dry_run = true;
2196 		args = dry_run + strlen("dry-run");
2197 	} else {
2198 		args = str;
2199 	}
2200 
2201 	inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
2202 
2203 	return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
2204 }
2205 
2206 static int parse_guest_data(const struct option *opt, const char *str, int unset)
2207 {
2208 	struct perf_inject *inject = opt->value;
2209 	struct guest_session *gs = &inject->guest_session;
2210 	char *tok;
2211 	char *s;
2212 
2213 	if (unset)
2214 		return 0;
2215 
2216 	if (!str)
2217 		goto bad_args;
2218 
2219 	s = strdup(str);
2220 	if (!s)
2221 		return -ENOMEM;
2222 
2223 	gs->perf_data_file = strsep(&s, ",");
2224 	if (!gs->perf_data_file)
2225 		goto bad_args;
2226 
2227 	gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
2228 	if (gs->copy_kcore_dir)
2229 		inject->output.is_dir = true;
2230 
2231 	tok = strsep(&s, ",");
2232 	if (!tok)
2233 		goto bad_args;
2234 	gs->machine_pid = strtoul(tok, NULL, 0);
2235 	if (!inject->guest_session.machine_pid)
2236 		goto bad_args;
2237 
2238 	gs->time_scale = 1;
2239 
2240 	tok = strsep(&s, ",");
2241 	if (!tok)
2242 		goto out;
2243 	gs->time_offset = strtoull(tok, NULL, 0);
2244 
2245 	tok = strsep(&s, ",");
2246 	if (!tok)
2247 		goto out;
2248 	gs->time_scale = strtod(tok, NULL);
2249 	if (!gs->time_scale)
2250 		goto bad_args;
2251 out:
2252 	return 0;
2253 
2254 bad_args:
2255 	pr_err("--guest-data option requires guest perf.data file name, "
2256 	       "guest machine PID, and optionally guest timestamp offset, "
2257 	       "and guest timestamp scale factor, separated by commas.\n");
2258 	return -1;
2259 }
2260 
2261 static int save_section_info_cb(struct perf_file_section *section,
2262 				struct perf_header *ph __maybe_unused,
2263 				int feat, int fd __maybe_unused, void *data)
2264 {
2265 	struct perf_inject *inject = data;
2266 
2267 	inject->secs[feat] = *section;
2268 	return 0;
2269 }
2270 
2271 static int save_section_info(struct perf_inject *inject)
2272 {
2273 	struct perf_header *header = &inject->session->header;
2274 	int fd = perf_data__fd(inject->session->data);
2275 
2276 	return perf_header__process_sections(header, fd, inject, save_section_info_cb);
2277 }
2278 
2279 static bool keep_feat(struct perf_inject *inject, int feat)
2280 {
2281 	switch (feat) {
2282 	/* Keep original information that describes the machine or software */
2283 	case HEADER_TRACING_DATA:
2284 	case HEADER_HOSTNAME:
2285 	case HEADER_OSRELEASE:
2286 	case HEADER_VERSION:
2287 	case HEADER_ARCH:
2288 	case HEADER_NRCPUS:
2289 	case HEADER_CPUDESC:
2290 	case HEADER_CPUID:
2291 	case HEADER_TOTAL_MEM:
2292 	case HEADER_CPU_TOPOLOGY:
2293 	case HEADER_NUMA_TOPOLOGY:
2294 	case HEADER_PMU_MAPPINGS:
2295 	case HEADER_CACHE:
2296 	case HEADER_MEM_TOPOLOGY:
2297 	case HEADER_CLOCKID:
2298 	case HEADER_BPF_PROG_INFO:
2299 	case HEADER_BPF_BTF:
2300 	case HEADER_CPU_PMU_CAPS:
2301 	case HEADER_CLOCK_DATA:
2302 	case HEADER_HYBRID_TOPOLOGY:
2303 	case HEADER_PMU_CAPS:
2304 	case HEADER_CPU_DOMAIN_INFO:
2305 	case HEADER_CLN_SIZE:
2306 		return true;
2307 	/* Information that can be updated */
2308 	case HEADER_BUILD_ID:
2309 		return inject->build_id_style == BID_RWS__NONE;
2310 	case HEADER_CMDLINE:
2311 	case HEADER_EVENT_DESC:
2312 	case HEADER_BRANCH_STACK:
2313 	case HEADER_GROUP_DESC:
2314 	case HEADER_AUXTRACE:
2315 	case HEADER_STAT:
2316 	case HEADER_SAMPLE_TIME:
2317 	case HEADER_DIR_FORMAT:
2318 	case HEADER_COMPRESSED:
2319 	default:
2320 		return false;
2321 	};
2322 }
2323 
2324 static int read_file(int fd, u64 offs, void *buf, size_t sz)
2325 {
2326 	ssize_t ret = preadn(fd, buf, sz, offs);
2327 
2328 	if (ret < 0)
2329 		return -errno;
2330 	if ((size_t)ret != sz)
2331 		return -EINVAL;
2332 	return 0;
2333 }
2334 
2335 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
2336 {
2337 	int fd = perf_data__fd(inject->session->data);
2338 	u64 offs = inject->secs[feat].offset;
2339 	size_t sz = inject->secs[feat].size;
2340 	void *buf = malloc(sz);
2341 	int ret;
2342 
2343 	if (!buf)
2344 		return -ENOMEM;
2345 
2346 	ret = read_file(fd, offs, buf, sz);
2347 	if (ret)
2348 		goto out_free;
2349 
2350 	ret = fw->write(fw, buf, sz);
2351 out_free:
2352 	free(buf);
2353 	return ret;
2354 }
2355 
2356 struct inject_fc {
2357 	struct feat_copier fc;
2358 	struct perf_inject *inject;
2359 };
2360 
2361 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
2362 {
2363 	struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
2364 	struct perf_inject *inject = inj_fc->inject;
2365 	int ret;
2366 
2367 	if (!inject->secs[feat].offset ||
2368 	    !keep_feat(inject, feat))
2369 		return 0;
2370 
2371 	ret = feat_copy(inject, feat, fw);
2372 	if (ret < 0)
2373 		return ret;
2374 
2375 	return 1; /* Feature section copied */
2376 }
2377 
2378 static int copy_kcore_dir(struct perf_inject *inject)
2379 {
2380 	char *cmd;
2381 	int ret;
2382 
2383 	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
2384 		       inject->input_name, inject->output.path);
2385 	if (ret < 0)
2386 		return ret;
2387 	pr_debug("%s\n", cmd);
2388 	ret = system(cmd);
2389 	free(cmd);
2390 	return ret;
2391 }
2392 
2393 static int guest_session__copy_kcore_dir(struct guest_session *gs)
2394 {
2395 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
2396 	char *cmd;
2397 	int ret;
2398 
2399 	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
2400 		       gs->perf_data_file, inject->output.path, gs->machine_pid);
2401 	if (ret < 0)
2402 		return ret;
2403 	pr_debug("%s\n", cmd);
2404 	ret = system(cmd);
2405 	free(cmd);
2406 	return ret;
2407 }
2408 
2409 static int output_fd(struct perf_inject *inject)
2410 {
2411 	return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
2412 }
2413 
2414 static int __cmd_inject(struct perf_inject *inject)
2415 {
2416 	int ret = -EINVAL;
2417 	struct guest_session *gs = &inject->guest_session;
2418 	struct perf_session *session = inject->session;
2419 	int fd = output_fd(inject);
2420 	u64 output_data_offset = perf_session__data_offset(session->evlist);
2421 	/*
2422 	 * Pipe input hasn't loaded the attributes and will handle them as
2423 	 * events. So that the attributes don't overlap the data, write the
2424 	 * attributes after the data.
2425 	 */
2426 	bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe;
2427 
2428 	signal(SIGINT, sig_handler);
2429 
2430 	if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat ||
2431 	    inject->itrace_synth_opts.set) {
2432 		inject->tool.mmap	  = perf_event__repipe_mmap;
2433 		inject->tool.mmap2	  = perf_event__repipe_mmap2;
2434 		inject->tool.fork	  = perf_event__repipe_fork;
2435 #ifdef HAVE_LIBTRACEEVENT
2436 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
2437 #endif
2438 	}
2439 
2440 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2441 	    inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2442 		inject->tool.sample = perf_event__inject_buildid;
2443 	} else if (inject->sched_stat) {
2444 		struct evsel *evsel;
2445 
2446 		evlist__for_each_entry(session->evlist, evsel) {
2447 			const char *name = evsel__name(evsel);
2448 
2449 			if (!strcmp(name, "sched:sched_switch")) {
2450 				if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
2451 					return -EINVAL;
2452 
2453 				evsel->handler = perf_inject__sched_switch;
2454 			} else if (!strcmp(name, "sched:sched_process_exit"))
2455 				evsel->handler = perf_inject__sched_process_exit;
2456 #ifdef HAVE_LIBTRACEEVENT
2457 			else if (!strncmp(name, "sched:sched_stat_", 17))
2458 				evsel->handler = perf_inject__sched_stat;
2459 #endif
2460 		}
2461 	} else if (inject->itrace_synth_opts.vm_time_correlation) {
2462 		session->itrace_synth_opts = &inject->itrace_synth_opts;
2463 		memset(&inject->tool, 0, sizeof(inject->tool));
2464 		inject->tool.id_index	    = perf_event__process_id_index;
2465 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2466 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2467 		inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2468 		inject->tool.ordered_events = true;
2469 		inject->tool.ordering_requires_timestamps = true;
2470 	} else if (inject->itrace_synth_opts.set) {
2471 		session->itrace_synth_opts = &inject->itrace_synth_opts;
2472 		inject->itrace_synth_opts.inject = true;
2473 		inject->tool.comm	    = perf_event__repipe_comm;
2474 		inject->tool.namespaces	    = perf_event__repipe_namespaces;
2475 		inject->tool.exit	    = perf_event__repipe_exit;
2476 		inject->tool.id_index	    = perf_event__process_id_index;
2477 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2478 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2479 		inject->tool.aux	    = perf_event__drop_aux;
2480 		inject->tool.itrace_start   = perf_event__drop_aux;
2481 		inject->tool.aux_output_hw_id = perf_event__drop_aux;
2482 		inject->tool.ordered_events = true;
2483 		inject->tool.ordering_requires_timestamps = true;
2484 		/* Allow space in the header for new attributes */
2485 		output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2486 		if (inject->strip)
2487 			strip_init(inject);
2488 	} else if (gs->perf_data_file) {
2489 		char *name = gs->perf_data_file;
2490 
2491 		/*
2492 		 * Not strictly necessary, but keep these events in order wrt
2493 		 * guest events.
2494 		 */
2495 		inject->tool.mmap		= host__repipe;
2496 		inject->tool.mmap2		= host__repipe;
2497 		inject->tool.comm		= host__repipe;
2498 		inject->tool.fork		= host__repipe;
2499 		inject->tool.exit		= host__repipe;
2500 		inject->tool.lost		= host__repipe;
2501 		inject->tool.context_switch	= host__repipe;
2502 		inject->tool.ksymbol		= host__repipe;
2503 		inject->tool.text_poke		= host__repipe;
2504 		/*
2505 		 * Once the host session has initialized, set up sample ID
2506 		 * mapping and feed in guest attrs, build IDs and initial
2507 		 * events.
2508 		 */
2509 		inject->tool.finished_init	= host__finished_init;
2510 		/* Obey finished round ordering */
2511 		inject->tool.finished_round	= host__finished_round;
2512 		/* Keep track of which CPU a VCPU is runnng on */
2513 		inject->tool.context_switch	= host__context_switch;
2514 		/*
2515 		 * Must order events to be able to obey finished round
2516 		 * ordering.
2517 		 */
2518 		inject->tool.ordered_events	= true;
2519 		inject->tool.ordering_requires_timestamps = true;
2520 		/* Set up a separate session to process guest perf.data file */
2521 		ret = guest_session__start(gs, name, session->data->force);
2522 		if (ret) {
2523 			pr_err("Failed to process %s, error %d\n", name, ret);
2524 			return ret;
2525 		}
2526 		/* Allow space in the header for guest attributes */
2527 		output_data_offset += gs->session->header.data_offset;
2528 		output_data_offset = roundup(output_data_offset, 4096);
2529 	} else if (inject->convert_callchain) {
2530 		inject->tool.sample	= perf_event__convert_sample_callchain;
2531 		inject->tool.fork	= perf_event__repipe_fork;
2532 		inject->tool.comm	= perf_event__repipe_comm;
2533 		inject->tool.exit	= perf_event__repipe_exit;
2534 		inject->tool.mmap	= perf_event__repipe_mmap;
2535 		inject->tool.mmap2	= perf_event__repipe_mmap2;
2536 		inject->tool.ordered_events = true;
2537 		inject->tool.ordering_requires_timestamps = true;
2538 	}
2539 
2540 	if (!inject->itrace_synth_opts.set)
2541 		auxtrace_index__free(&session->auxtrace_index);
2542 
2543 	if (!inject->output.is_pipe && !inject->in_place_update)
2544 		lseek(fd, output_data_offset, SEEK_SET);
2545 
2546 	ret = perf_session__process_events(session);
2547 	if (ret)
2548 		return ret;
2549 
2550 	if (gs->session) {
2551 		/*
2552 		 * Remaining guest events have later timestamps. Flush them
2553 		 * out to file.
2554 		 */
2555 		ret = guest_session__flush_events(gs);
2556 		if (ret) {
2557 			pr_err("Failed to flush guest events\n");
2558 			return ret;
2559 		}
2560 	}
2561 
2562 	if (!inject->output.is_pipe && !inject->in_place_update) {
2563 		struct inject_fc inj_fc = {
2564 			.fc.copy = feat_copy_cb,
2565 			.inject = inject,
2566 		};
2567 
2568 		if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2569 		    inject->build_id_style == BID_RWS__INJECT_HEADER_ALL)
2570 			perf_header__set_feat(&session->header, HEADER_BUILD_ID);
2571 		/*
2572 		 * Keep all buildids when there is unprocessed AUX data because
2573 		 * it is not known which ones the AUX trace hits.
2574 		 */
2575 		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2576 		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
2577 			perf_session__dsos_hit_all(session);
2578 		/*
2579 		 * The AUX areas have been removed and replaced with
2580 		 * synthesized hardware events, so clear the feature flag.
2581 		 */
2582 		if (inject->itrace_synth_opts.set) {
2583 			struct evsel *evsel;
2584 
2585 			perf_header__clear_feat(&session->header,
2586 						HEADER_AUXTRACE);
2587 
2588 			evlist__for_each_entry(session->evlist, evsel) {
2589 				evsel->core.attr.sample_type &= ~PERF_SAMPLE_AUX;
2590 			}
2591 
2592 			if (inject->itrace_synth_opts.add_last_branch) {
2593 				perf_header__set_feat(&session->header,
2594 						      HEADER_BRANCH_STACK);
2595 
2596 				evlist__for_each_entry(session->evlist, evsel) {
2597 					evsel->core.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2598 					if (evsel->core.attr.size < PERF_ATTR_SIZE_VER2)
2599 						evsel->core.attr.size = PERF_ATTR_SIZE_VER2;
2600 					evsel->core.attr.branch_sample_type |=
2601 						PERF_SAMPLE_BRANCH_HW_INDEX;
2602 				}
2603 			}
2604 		}
2605 
2606 		/*
2607 		 * The converted data file won't have stack and registers.
2608 		 * Update the perf_event_attr to remove them before writing.
2609 		 */
2610 		if (inject->convert_callchain) {
2611 			struct evsel *evsel;
2612 
2613 			evlist__for_each_entry(session->evlist, evsel) {
2614 				evsel__reset_sample_bit(evsel, REGS_USER);
2615 				evsel__reset_sample_bit(evsel, STACK_USER);
2616 				evsel->core.attr.sample_regs_user = 0;
2617 				evsel->core.attr.sample_stack_user = 0;
2618 				evsel->core.attr.exclude_callchain_user = 0;
2619 			}
2620 		}
2621 
2622 		if (inject->aslr)
2623 			aslr_tool__strip_evlist(inject->session->tool, session->evlist);
2624 
2625 		session->header.data_offset = output_data_offset;
2626 		session->header.data_size = inject->bytes_written;
2627 		perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
2628 					    write_attrs_after_data);
2629 
2630 		if (inject->copy_kcore_dir) {
2631 			ret = copy_kcore_dir(inject);
2632 			if (ret) {
2633 				pr_err("Failed to copy kcore\n");
2634 				return ret;
2635 			}
2636 		}
2637 		if (gs->copy_kcore_dir) {
2638 			ret = guest_session__copy_kcore_dir(gs);
2639 			if (ret) {
2640 				pr_err("Failed to copy guest kcore\n");
2641 				return ret;
2642 			}
2643 		}
2644 	}
2645 
2646 	return ret;
2647 }
2648 
2649 static bool evsel__has_dwarf_callchain(struct evsel *evsel)
2650 {
2651 	struct perf_event_attr *attr = &evsel->core.attr;
2652 	const u64 dwarf_callchain_flags =
2653 		PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
2654 
2655 	if (!attr->exclude_callchain_user)
2656 		return false;
2657 
2658 	return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
2659 }
2660 
2661 int cmd_inject(int argc, const char **argv)
2662 {
2663 	struct perf_inject inject = {
2664 		.input_name  = "-",
2665 		.samples = LIST_HEAD_INIT(inject.samples),
2666 		.output = {
2667 			.path = "-",
2668 			.mode = PERF_DATA_MODE_WRITE,
2669 			.file.use_stdio = true,
2670 		},
2671 	};
2672 	struct perf_data data = {
2673 		.mode = PERF_DATA_MODE_READ,
2674 		.file.use_stdio = true,
2675 	};
2676 	int ret;
2677 	const char *known_build_ids = NULL;
2678 	bool build_ids = false;
2679 	bool build_id_all = false;
2680 	bool mmap2_build_ids = false;
2681 	bool mmap2_build_id_all = false;
2682 
2683 	struct option options[] = {
2684 		OPT_BOOLEAN('b', "build-ids", &build_ids,
2685 			    "Inject build-ids into the output stream"),
2686 		OPT_BOOLEAN(0, "buildid-all", &build_id_all,
2687 			    "Inject build-ids of all DSOs into the output stream"),
2688 		OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids,
2689 			    "Drop unused mmap events, make others mmap2 with build IDs"),
2690 		OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all,
2691 			    "Rewrite all mmap events as mmap2 events with build IDs"),
2692 		OPT_STRING(0, "known-build-ids", &known_build_ids,
2693 			   "buildid path [,buildid path...]",
2694 			   "build-ids to use for given paths"),
2695 		OPT_STRING('i', "input", &inject.input_name, "file",
2696 			   "input file name"),
2697 		OPT_STRING('o', "output", &inject.output.path, "file",
2698 			   "output file name"),
2699 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2700 			    "Merge sched-stat and sched-switch for getting events "
2701 			    "where and how long tasks slept"),
2702 #ifdef HAVE_JITDUMP
2703 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2704 #endif
2705 		OPT_INCR('v', "verbose", &verbose,
2706 			 "be more verbose (show build ids, etc)"),
2707 		OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2708 			   "file", "vmlinux pathname"),
2709 		OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2710 			    "don't load vmlinux even if found"),
2711 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2712 			   "kallsyms pathname"),
2713 		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2714 		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2715 				    NULL, "opts", "Instruction Tracing options\n"
2716 				    ITRACE_HELP,
2717 				    itrace_parse_synth_opts),
2718 		OPT_BOOLEAN(0, "strip", &inject.strip,
2719 			    "strip non-synthesized events (use with --itrace)"),
2720 		OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2721 				    "correlate time between VM guests and the host",
2722 				    parse_vm_time_correlation),
2723 		OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2724 				    "inject events from a guest perf.data file",
2725 				    parse_guest_data),
2726 		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2727 			   "guest mount directory under which every guest os"
2728 			   " instance has a subdir"),
2729 		OPT_CALLBACK(0, "unwind-style", NULL, "unwind style",
2730 			     "unwind styles (libdw,libunwind)",
2731 			     unwind__option),
2732 		OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
2733 			    "Generate callchains using DWARF and drop register/stack data"),
2734 		OPT_BOOLEAN(0, "aslr", &inject.aslr,
2735 			    "Remap virtual memory addresses similar to ASLR"),
2736 		OPT_END()
2737 	};
2738 	const char * const inject_usage[] = {
2739 		"perf inject [<options>]",
2740 		NULL
2741 	};
2742 	bool ordered_events;
2743 	struct perf_tool *tool = &inject.tool;
2744 
2745 	if (!inject.itrace_synth_opts.set) {
2746 		/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
2747 		symbol_conf.lazy_load_kernel_maps = true;
2748 	}
2749 
2750 #ifndef HAVE_JITDUMP
2751 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2752 #endif
2753 #ifndef HAVE_LIBDW_SUPPORT
2754 	set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
2755 #endif
2756 	argc = parse_options(argc, argv, options, inject_usage, 0);
2757 
2758 	/*
2759 	 * Any (unrecognized) arguments left?
2760 	 */
2761 	if (argc)
2762 		usage_with_options(inject_usage, options);
2763 
2764 	if (inject.aslr && inject.convert_callchain) {
2765 		pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n");
2766 		return -EINVAL;
2767 	}
2768 
2769 	if (inject.strip && !inject.itrace_synth_opts.set) {
2770 		pr_err("--strip option requires --itrace option\n");
2771 		return -1;
2772 	}
2773 
2774 	if (symbol__validate_sym_arguments())
2775 		return -1;
2776 
2777 	if (inject.in_place_update) {
2778 		if (!strcmp(inject.input_name, "-")) {
2779 			pr_err("Input file name required for in-place updating\n");
2780 			return -1;
2781 		}
2782 		if (strcmp(inject.output.path, "-")) {
2783 			pr_err("Output file name must not be specified for in-place updating\n");
2784 			return -1;
2785 		}
2786 		if (!data.force && !inject.in_place_update_dry_run) {
2787 			pr_err("The input file would be updated in place, "
2788 				"the --force option is required.\n");
2789 			return -1;
2790 		}
2791 		if (!inject.in_place_update_dry_run)
2792 			data.in_place_update = true;
2793 	} else {
2794 		if (strcmp(inject.output.path, "-") && !inject.strip &&
2795 		    has_kcore_dir(inject.input_name)) {
2796 			inject.output.is_dir = true;
2797 			inject.copy_kcore_dir = true;
2798 		}
2799 		if (perf_data__open(&inject.output)) {
2800 			perror("failed to create output file");
2801 			return -1;
2802 		}
2803 	}
2804 	if (mmap2_build_ids)
2805 		inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY;
2806 	if (mmap2_build_id_all)
2807 		inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL;
2808 	if (build_ids)
2809 		inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY;
2810 	if (build_id_all)
2811 		inject.build_id_style = BID_RWS__INJECT_HEADER_ALL;
2812 
2813 	data.path = inject.input_name;
2814 
2815 	ordered_events = inject.jit_mode || inject.sched_stat ||
2816 		inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2817 		inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY;
2818 	perf_tool__init(&inject.tool, ordered_events);
2819 	inject.tool.sample		= perf_event__repipe_sample;
2820 	inject.tool.read		= perf_event__repipe_sample;
2821 	inject.tool.mmap		= perf_event__repipe;
2822 	inject.tool.mmap2		= perf_event__repipe;
2823 	inject.tool.comm		= perf_event__repipe;
2824 	inject.tool.namespaces		= perf_event__repipe;
2825 	inject.tool.cgroup		= perf_event__repipe;
2826 	inject.tool.fork		= perf_event__repipe;
2827 	inject.tool.exit		= perf_event__repipe;
2828 	inject.tool.lost		= perf_event__repipe;
2829 	inject.tool.lost_samples	= perf_event__repipe;
2830 	inject.tool.aux			= perf_event__repipe;
2831 	inject.tool.itrace_start	= perf_event__repipe;
2832 	inject.tool.aux_output_hw_id	= perf_event__repipe;
2833 	inject.tool.context_switch	= perf_event__repipe;
2834 	inject.tool.throttle		= perf_event__repipe;
2835 	inject.tool.unthrottle		= perf_event__repipe;
2836 	inject.tool.ksymbol		= perf_event__repipe;
2837 	inject.tool.bpf			= perf_event__repipe;
2838 	inject.tool.text_poke		= perf_event__repipe;
2839 	inject.tool.attr		= perf_event__repipe_attr;
2840 	inject.tool.event_update	= perf_event__repipe_event_update;
2841 	inject.tool.tracing_data	= perf_event__repipe_op2_synth;
2842 	inject.tool.finished_round	= perf_event__repipe_oe_synth;
2843 	inject.tool.build_id		= perf_event__repipe_op2_synth;
2844 	inject.tool.id_index		= perf_event__repipe_op2_synth;
2845 	inject.tool.auxtrace_info	= perf_event__repipe_op2_synth;
2846 	inject.tool.auxtrace_error	= perf_event__repipe_op2_synth;
2847 	inject.tool.time_conv		= perf_event__repipe_op2_synth;
2848 	inject.tool.thread_map		= perf_event__repipe_op2_synth;
2849 	inject.tool.cpu_map		= perf_event__repipe_op2_synth;
2850 	inject.tool.stat_config		= perf_event__repipe_op2_synth;
2851 	inject.tool.stat		= perf_event__repipe_op2_synth;
2852 	inject.tool.stat_round		= perf_event__repipe_op2_synth;
2853 	inject.tool.feature		= perf_event__repipe_op2_synth;
2854 	inject.tool.finished_init	= perf_event__repipe_op2_synth;
2855 	inject.tool.compressed		= perf_event__repipe_op4_synth;
2856 	inject.tool.auxtrace		= perf_event__repipe_auxtrace;
2857 	inject.tool.bpf_metadata	= perf_event__repipe_op2_synth;
2858 	inject.tool.schedstat_cpu	= perf_event__repipe_op2_synth;
2859 	inject.tool.schedstat_domain	= perf_event__repipe_op2_synth;
2860 	inject.tool.dont_split_sample_group = true;
2861 	inject.tool.merge_deferred_callchains = false;
2862 	if (inject.aslr) {
2863 		tool = aslr_tool__new(&inject.tool);
2864 		if (!tool) {
2865 			ret = -ENOMEM;
2866 			goto out_close_output;
2867 		}
2868 	}
2869 	inject.session = __perf_session__new(&data, tool,
2870 					     /*trace_event_repipe=*/inject.output.is_pipe,
2871 					     /*host_env=*/NULL);
2872 
2873 	if (IS_ERR(inject.session)) {
2874 		ret = PTR_ERR(inject.session);
2875 		if (inject.aslr)
2876 			aslr_tool__delete(tool);
2877 		goto out_close_output;
2878 	}
2879 
2880 	if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2881 		pr_warning("Decompression initialization failed.\n");
2882 
2883 	if (inject.aslr) {
2884 		struct evsel *evsel;
2885 
2886 		evlist__for_each_entry(inject.session->evlist, evsel) {
2887 			ret = aslr_tool__cache_orig_attrs(tool, evsel);
2888 			if (ret) {
2889 				pr_err("Failed to cache original attributes: %d\n", ret);
2890 				goto out_delete;
2891 			}
2892 		}
2893 	}
2894 
2895 	/* Save original section info before feature bits change */
2896 	ret = save_section_info(&inject);
2897 	if (ret)
2898 		goto out_delete;
2899 
2900 	if (inject.output.is_pipe) {
2901 		ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2902 		if (ret < 0) {
2903 			pr_err("Couldn't write a new pipe header.\n");
2904 			goto out_delete;
2905 		}
2906 
2907 		/*
2908 		 * If the input is already a pipe then the features and
2909 		 * attributes don't need synthesizing, they will be present in
2910 		 * the input.
2911 		 */
2912 		if (!data.is_pipe) {
2913 			if (inject.aslr)
2914 				aslr_tool__strip_evlist(tool, inject.session->evlist);
2915 
2916 			ret = perf_event__synthesize_for_pipe(&inject.tool,
2917 							      inject.session,
2918 							      &inject.output,
2919 							      perf_event__repipe);
2920 
2921 			if (inject.aslr)
2922 				aslr_tool__restore_evlist(tool, inject.session->evlist);
2923 
2924 			if (ret < 0)
2925 				goto out_delete;
2926 		}
2927 	}
2928 
2929 	if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2930 	    inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2931 		/*
2932 		 * to make sure the mmap records are ordered correctly
2933 		 * and so that the correct especially due to jitted code
2934 		 * mmaps. We cannot generate the buildid hit list and
2935 		 * inject the jit mmaps at the same time for now.
2936 		 */
2937 		inject.tool.ordering_requires_timestamps = true;
2938 	}
2939 	if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) {
2940 		inject.known_build_ids =
2941 			perf_inject__parse_known_build_ids(known_build_ids);
2942 
2943 		if (inject.known_build_ids == NULL) {
2944 			pr_err("Couldn't parse known build ids.\n");
2945 			goto out_delete;
2946 		}
2947 	}
2948 
2949 	if (inject.convert_callchain) {
2950 		struct evsel *evsel;
2951 
2952 		if (inject.output.is_pipe || inject.session->data->is_pipe) {
2953 			pr_err("--convert-callchain cannot work with pipe\n");
2954 			goto out_delete;
2955 		}
2956 
2957 		evlist__for_each_entry(inject.session->evlist, evsel) {
2958 			if (!evsel__has_dwarf_callchain(evsel) && !evsel__is_dummy_event(evsel)) {
2959 				pr_err("--convert-callchain requires DWARF call graph.\n");
2960 				goto out_delete;
2961 			}
2962 		}
2963 
2964 		inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
2965 		if (inject.raw_callchain == NULL) {
2966 			pr_err("callchain allocation failed\n");
2967 			goto out_delete;
2968 		}
2969 	}
2970 
2971 #ifdef HAVE_JITDUMP
2972 	if (inject.jit_mode) {
2973 		inject.tool.mmap2	   = perf_event__repipe_mmap2;
2974 		inject.tool.mmap	   = perf_event__repipe_mmap;
2975 		inject.tool.ordering_requires_timestamps = true;
2976 		/*
2977 		 * JIT MMAP injection injects all MMAP events in one go, so it
2978 		 * does not obey finished_round semantics.
2979 		 */
2980 		inject.tool.finished_round = perf_event__drop_oe;
2981 	}
2982 #endif
2983 	ret = symbol__init(perf_session__env(inject.session));
2984 	if (ret < 0)
2985 		goto out_delete;
2986 
2987 	ret = __cmd_inject(&inject);
2988 
2989 	guest_session__exit(&inject.guest_session);
2990 
2991 out_delete:
2992 	strlist__delete(inject.known_build_ids);
2993 	zstd_fini(&(inject.session->zstd_data));
2994 	perf_session__delete(inject.session);
2995 	if (inject.aslr)
2996 		aslr_tool__delete(tool);
2997 out_close_output:
2998 	if (!inject.in_place_update)
2999 		perf_data__close(&inject.output);
3000 	free(inject.itrace_synth_opts.vm_tm_corr_args);
3001 	free(inject.event_copy);
3002 	free(inject.guest_session.ev.event_buf);
3003 	free(inject.raw_callchain);
3004 	return ret;
3005 }
3006