xref: /linux/tools/perf/builtin-inject.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-inject.c
4  *
5  * Builtin inject command: Examine the live mode (stdin) event stream
6  * and repipe it to stdout while optionally injecting additional
7  * events into it.
8  */
9 #include "builtin.h"
10 
11 #include "util/color.h"
12 #include "util/dso.h"
13 #include "util/vdso.h"
14 #include "util/evlist.h"
15 #include "util/evsel.h"
16 #include "util/map.h"
17 #include "util/session.h"
18 #include "util/tool.h"
19 #include "util/debug.h"
20 #include "util/build-id.h"
21 #include "util/data.h"
22 #include "util/auxtrace.h"
23 #include "util/jit.h"
24 #include "util/string2.h"
25 #include "util/symbol.h"
26 #include "util/synthetic-events.h"
27 #include "util/thread.h"
28 #include "util/namespaces.h"
29 #include "util/util.h"
30 #include "util/tsc.h"
31 
32 #include <internal/lib.h>
33 
34 #include <linux/err.h>
35 #include <subcmd/parse-options.h>
36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37 
38 #include <linux/list.h>
39 #include <linux/string.h>
40 #include <linux/zalloc.h>
41 #include <linux/hash.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <signal.h>
45 #include <inttypes.h>
46 
47 struct guest_event {
48 	struct perf_sample		sample;
49 	union perf_event		*event;
50 	char				*event_buf;
51 };
52 
53 struct guest_id {
54 	/* hlist_node must be first, see free_hlist() */
55 	struct hlist_node		node;
56 	u64				id;
57 	u64				host_id;
58 	u32				vcpu;
59 };
60 
61 struct guest_tid {
62 	/* hlist_node must be first, see free_hlist() */
63 	struct hlist_node		node;
64 	/* Thread ID of QEMU thread */
65 	u32				tid;
66 	u32				vcpu;
67 };
68 
69 struct guest_vcpu {
70 	/* Current host CPU */
71 	u32				cpu;
72 	/* Thread ID of QEMU thread */
73 	u32				tid;
74 };
75 
76 struct guest_session {
77 	char				*perf_data_file;
78 	u32				machine_pid;
79 	u64				time_offset;
80 	double				time_scale;
81 	struct perf_tool		tool;
82 	struct perf_data		data;
83 	struct perf_session		*session;
84 	char				*tmp_file_name;
85 	int				tmp_fd;
86 	struct perf_tsc_conversion	host_tc;
87 	struct perf_tsc_conversion	guest_tc;
88 	bool				copy_kcore_dir;
89 	bool				have_tc;
90 	bool				fetched;
91 	bool				ready;
92 	u16				dflt_id_hdr_size;
93 	u64				dflt_id;
94 	u64				highest_id;
95 	/* Array of guest_vcpu */
96 	struct guest_vcpu		*vcpu;
97 	size_t				vcpu_cnt;
98 	/* Hash table for guest_id */
99 	struct hlist_head		heads[PERF_EVLIST__HLIST_SIZE];
100 	/* Hash table for guest_tid */
101 	struct hlist_head		tids[PERF_EVLIST__HLIST_SIZE];
102 	/* Place to stash next guest event */
103 	struct guest_event		ev;
104 };
105 
106 enum build_id_rewrite_style {
107 	BID_RWS__NONE = 0,
108 	BID_RWS__INJECT_HEADER_LAZY,
109 	BID_RWS__INJECT_HEADER_ALL,
110 	BID_RWS__MMAP2_BUILDID_ALL,
111 	BID_RWS__MMAP2_BUILDID_LAZY,
112 };
113 
114 struct perf_inject {
115 	struct perf_tool	tool;
116 	struct perf_session	*session;
117 	enum build_id_rewrite_style build_id_style;
118 	bool			sched_stat;
119 	bool			have_auxtrace;
120 	bool			strip;
121 	bool			jit_mode;
122 	bool			in_place_update;
123 	bool			in_place_update_dry_run;
124 	bool			copy_kcore_dir;
125 	bool			convert_callchain;
126 	const char		*input_name;
127 	struct perf_data	output;
128 	u64			bytes_written;
129 	u64			aux_id;
130 	struct list_head	samples;
131 	struct itrace_synth_opts itrace_synth_opts;
132 	char			*event_copy;
133 	struct perf_file_section secs[HEADER_FEAT_BITS];
134 	struct guest_session	guest_session;
135 	struct strlist		*known_build_ids;
136 	const struct evsel	*mmap_evsel;
137 	struct ip_callchain	*raw_callchain;
138 };
139 
140 struct event_entry {
141 	struct list_head node;
142 	u32		 tid;
143 	union perf_event event[];
144 };
145 
146 static int tool__inject_build_id(const struct perf_tool *tool,
147 				 struct perf_sample *sample,
148 				 struct machine *machine,
149 				 const struct evsel *evsel,
150 				 __u16 misc,
151 				 const char *filename,
152 				 struct dso *dso, u32 flags);
153 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
154 				      struct perf_sample *sample,
155 				      struct machine *machine,
156 				      const struct evsel *evsel,
157 				      __u16 misc,
158 				      __u32 pid, __u32 tid,
159 				      __u64 start, __u64 len, __u64 pgoff,
160 				      struct dso *dso,
161 				      __u32 prot, __u32 flags,
162 				      const char *filename);
163 
164 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
165 {
166 	ssize_t size;
167 
168 	size = perf_data__write(&inject->output, buf, sz);
169 	if (size < 0)
170 		return -errno;
171 
172 	inject->bytes_written += size;
173 	return 0;
174 }
175 
176 static int perf_event__repipe_synth(const struct perf_tool *tool,
177 				    union perf_event *event)
178 
179 {
180 	struct perf_inject *inject = container_of(tool, struct perf_inject,
181 						  tool);
182 
183 	return output_bytes(inject, event, event->header.size);
184 }
185 
186 static int perf_event__repipe_oe_synth(const struct perf_tool *tool,
187 				       union perf_event *event,
188 				       struct ordered_events *oe __maybe_unused)
189 {
190 	return perf_event__repipe_synth(tool, event);
191 }
192 
193 #ifdef HAVE_JITDUMP
194 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
195 			       union perf_event *event __maybe_unused,
196 			       struct ordered_events *oe __maybe_unused)
197 {
198 	return 0;
199 }
200 #endif
201 
202 static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
203 					struct perf_session *session __maybe_unused,
204 					union perf_event *event)
205 {
206 	return perf_event__repipe_synth(tool, event);
207 }
208 
209 static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
210 					struct perf_session *session __maybe_unused,
211 					union perf_event *event,
212 					u64 data __maybe_unused,
213 					const char *str __maybe_unused)
214 {
215 	return perf_event__repipe_synth(tool, event);
216 }
217 
218 static int perf_event__repipe_attr(const struct perf_tool *tool,
219 				   union perf_event *event,
220 				   struct evlist **pevlist)
221 {
222 	struct perf_inject *inject = container_of(tool, struct perf_inject,
223 						  tool);
224 	int ret;
225 
226 	ret = perf_event__process_attr(tool, event, pevlist);
227 	if (ret)
228 		return ret;
229 
230 	/* If the output isn't a pipe then the attributes will be written as part of the header. */
231 	if (!inject->output.is_pipe)
232 		return 0;
233 
234 	return perf_event__repipe_synth(tool, event);
235 }
236 
237 static int perf_event__repipe_event_update(const struct perf_tool *tool,
238 					   union perf_event *event,
239 					   struct evlist **pevlist __maybe_unused)
240 {
241 	return perf_event__repipe_synth(tool, event);
242 }
243 
244 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
245 {
246 	char buf[4096];
247 	ssize_t ssz;
248 	int ret;
249 
250 	while (size > 0) {
251 		ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
252 		if (ssz < 0)
253 			return -errno;
254 		ret = output_bytes(inject, buf, ssz);
255 		if (ret)
256 			return ret;
257 		size -= ssz;
258 	}
259 
260 	return 0;
261 }
262 
263 static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
264 				       struct perf_session *session,
265 				       union perf_event *event)
266 {
267 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
268 	int ret;
269 
270 	inject->have_auxtrace = true;
271 
272 	if (!inject->output.is_pipe) {
273 		off_t offset;
274 
275 		offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
276 		if (offset == -1)
277 			return -errno;
278 		ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
279 						     event, offset);
280 		if (ret < 0)
281 			return ret;
282 	}
283 
284 	if (perf_data__is_pipe(session->data) || !session->one_mmap) {
285 		ret = output_bytes(inject, event, event->header.size);
286 		if (ret < 0)
287 			return ret;
288 		ret = copy_bytes(inject, session->data,
289 				 event->auxtrace.size);
290 	} else {
291 		ret = output_bytes(inject, event,
292 				   event->header.size + event->auxtrace.size);
293 	}
294 	if (ret < 0)
295 		return ret;
296 
297 	return event->auxtrace.size;
298 }
299 
300 static int perf_event__repipe(const struct perf_tool *tool,
301 			      union perf_event *event,
302 			      struct perf_sample *sample __maybe_unused,
303 			      struct machine *machine __maybe_unused)
304 {
305 	return perf_event__repipe_synth(tool, event);
306 }
307 
308 static int perf_event__drop(const struct perf_tool *tool __maybe_unused,
309 			    union perf_event *event __maybe_unused,
310 			    struct perf_sample *sample __maybe_unused,
311 			    struct machine *machine __maybe_unused)
312 {
313 	return 0;
314 }
315 
316 static int perf_event__drop_aux(const struct perf_tool *tool,
317 				union perf_event *event __maybe_unused,
318 				struct perf_sample *sample,
319 				struct machine *machine __maybe_unused)
320 {
321 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
322 
323 	if (!inject->aux_id)
324 		inject->aux_id = sample->id;
325 
326 	return 0;
327 }
328 
329 static union perf_event *
330 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
331 				 union perf_event *event,
332 				 struct perf_sample *sample)
333 {
334 	size_t sz1 = sample->aux_sample.data - (void *)event;
335 	size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
336 	union perf_event *ev;
337 
338 	if (inject->event_copy == NULL) {
339 		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
340 		if (!inject->event_copy)
341 			return ERR_PTR(-ENOMEM);
342 	}
343 	ev = (union perf_event *)inject->event_copy;
344 	if (sz1 > event->header.size || sz2 > event->header.size ||
345 	    sz1 + sz2 > event->header.size ||
346 	    sz1 < sizeof(struct perf_event_header) + sizeof(u64))
347 		return event;
348 
349 	memcpy(ev, event, sz1);
350 	memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
351 	ev->header.size = sz1 + sz2;
352 	((u64 *)((void *)ev + sz1))[-1] = 0;
353 
354 	return ev;
355 }
356 
357 typedef int (*inject_handler)(const struct perf_tool *tool,
358 			      union perf_event *event,
359 			      struct perf_sample *sample,
360 			      struct evsel *evsel,
361 			      struct machine *machine);
362 
363 static int perf_event__repipe_sample(const struct perf_tool *tool,
364 				     union perf_event *event,
365 				     struct perf_sample *sample,
366 				     struct evsel *evsel,
367 				     struct machine *machine)
368 {
369 	struct perf_inject *inject = container_of(tool, struct perf_inject,
370 						  tool);
371 
372 	if (evsel && evsel->handler) {
373 		inject_handler f = evsel->handler;
374 		return f(tool, event, sample, evsel, machine);
375 	}
376 
377 	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
378 
379 	if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
380 		event = perf_inject__cut_auxtrace_sample(inject, event, sample);
381 		if (IS_ERR(event))
382 			return PTR_ERR(event);
383 	}
384 
385 	return perf_event__repipe_synth(tool, event);
386 }
387 
388 static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
389 						union perf_event *event,
390 						struct perf_sample *sample,
391 						struct evsel *evsel,
392 						struct machine *machine)
393 {
394 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
395 	struct callchain_cursor *cursor = get_tls_callchain_cursor();
396 	union perf_event *event_copy = (void *)inject->event_copy;
397 	struct callchain_cursor_node *node;
398 	struct thread *thread;
399 	u64 sample_type = evsel->core.attr.sample_type;
400 	u32 sample_size = event->header.size;
401 	u64 i, k;
402 	int ret;
403 
404 	if (event_copy == NULL) {
405 		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
406 		if (!inject->event_copy)
407 			return -ENOMEM;
408 
409 		event_copy = (void *)inject->event_copy;
410 	}
411 
412 	if (cursor == NULL)
413 		return -ENOMEM;
414 
415 	callchain_cursor_reset(cursor);
416 
417 	thread = machine__find_thread(machine, sample->tid, sample->pid);
418 	if (thread == NULL)
419 		goto out;
420 
421 	/* this will parse DWARF using stack and register data */
422 	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
423 					/*parent=*/NULL, /*root_al=*/NULL,
424 					PERF_MAX_STACK_DEPTH);
425 	thread__put(thread);
426 	if (ret != 0)
427 		goto out;
428 
429 	/* copy kernel callchain and context entries */
430 	for (i = 0; i < sample->callchain->nr; i++) {
431 		inject->raw_callchain->ips[i] = sample->callchain->ips[i];
432 		if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
433 			i++;
434 			break;
435 		}
436 	}
437 	if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
438 		inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
439 
440 	node = cursor->first;
441 	for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
442 		if (machine__kernel_ip(machine, node->ip))
443 			/* kernel IPs were added already */;
444 		else if (node->ms.sym && node->ms.sym->inlined)
445 			/* we can't handle inlined callchains */;
446 		else
447 			inject->raw_callchain->ips[i++] = node->ip;
448 
449 		node = node->next;
450 	}
451 
452 	inject->raw_callchain->nr = i;
453 	sample->callchain = inject->raw_callchain;
454 
455 out:
456 	memcpy(event_copy, event, sizeof(event->header));
457 
458 	/* adjust sample size for stack and regs */
459 	sample_size -= sample->user_stack.size;
460 	sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
461 	sample_size += (sample->callchain->nr + 1) * sizeof(u64);
462 	event_copy->header.size = sample_size;
463 
464 	/* remove sample_type {STACK,REGS}_USER for synthesize */
465 	sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
466 
467 	perf_event__synthesize_sample(event_copy, sample_type,
468 				      evsel->core.attr.read_format, sample);
469 	return perf_event__repipe_synth(tool, event_copy);
470 }
471 
472 static struct dso *findnew_dso(int pid, int tid, const char *filename,
473 			       const struct dso_id *id, struct machine *machine)
474 {
475 	struct thread *thread;
476 	struct nsinfo *nsi = NULL;
477 	struct nsinfo *nnsi;
478 	struct dso *dso;
479 	bool vdso;
480 
481 	thread = machine__findnew_thread(machine, pid, tid);
482 	if (thread == NULL) {
483 		pr_err("cannot find or create a task %d/%d.\n", tid, pid);
484 		return NULL;
485 	}
486 
487 	vdso = is_vdso_map(filename);
488 	nsi = nsinfo__get(thread__nsinfo(thread));
489 
490 	if (vdso) {
491 		/* The vdso maps are always on the host and not the
492 		 * container.  Ensure that we don't use setns to look
493 		 * them up.
494 		 */
495 		nnsi = nsinfo__copy(nsi);
496 		if (nnsi) {
497 			nsinfo__put(nsi);
498 			nsinfo__clear_need_setns(nnsi);
499 			nsi = nnsi;
500 		}
501 		dso = machine__findnew_vdso(machine, thread);
502 	} else {
503 		dso = machine__findnew_dso_id(machine, filename, id);
504 	}
505 
506 	if (dso) {
507 		mutex_lock(dso__lock(dso));
508 		dso__set_nsinfo(dso, nsi);
509 		mutex_unlock(dso__lock(dso));
510 	} else
511 		nsinfo__put(nsi);
512 
513 	thread__put(thread);
514 	return dso;
515 }
516 
517 /*
518  * The evsel used for the sample ID for mmap events. Typically stashed when
519  * processing mmap events. If not stashed, search the evlist for the first mmap
520  * gathering event.
521  */
522 static const struct evsel *inject__mmap_evsel(struct perf_inject *inject)
523 {
524 	struct evsel *pos;
525 
526 	if (inject->mmap_evsel)
527 		return inject->mmap_evsel;
528 
529 	evlist__for_each_entry(inject->session->evlist, pos) {
530 		if (pos->core.attr.mmap) {
531 			inject->mmap_evsel = pos;
532 			return pos;
533 		}
534 	}
535 	pr_err("No mmap events found\n");
536 	return NULL;
537 }
538 
539 static int perf_event__repipe_common_mmap(const struct perf_tool *tool,
540 					  union perf_event *event,
541 					  struct perf_sample *sample,
542 					  struct machine *machine,
543 					  __u32 pid, __u32 tid,
544 					  __u64 start, __u64 len, __u64 pgoff,
545 					  __u32 flags, __u32 prot,
546 					  const char *filename,
547 					  const struct dso_id *dso_id,
548 					  int (*perf_event_process)(const struct perf_tool *tool,
549 								    union perf_event *event,
550 								    struct perf_sample *sample,
551 								    struct machine *machine))
552 {
553 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
554 	struct dso *dso = NULL;
555 	bool dso_sought = false;
556 
557 #ifdef HAVE_JITDUMP
558 	if (inject->jit_mode) {
559 		u64 n = 0;
560 		int ret;
561 
562 		/* If jit marker, then inject jit mmaps and generate ELF images. */
563 		ret = jit_process(inject->session, &inject->output, machine,
564 				  filename, pid, tid, &n);
565 		if (ret < 0)
566 			return ret;
567 		if (ret) {
568 			inject->bytes_written += n;
569 			return 0;
570 		}
571 	}
572 #endif
573 	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
574 		dso = findnew_dso(pid, tid, filename, dso_id, machine);
575 		dso_sought = true;
576 		if (dso) {
577 			/* mark it not to inject build-id */
578 			dso__set_hit(dso);
579 		}
580 	}
581 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) {
582 		if (!dso_sought) {
583 			dso = findnew_dso(pid, tid, filename, dso_id, machine);
584 			dso_sought = true;
585 		}
586 
587 		if (dso && !dso__hit(dso)) {
588 			struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
589 
590 			if (evsel) {
591 				dso__set_hit(dso);
592 				tool__inject_build_id(tool, sample, machine, evsel,
593 						      /*misc=*/sample->cpumode,
594 						      filename, dso, flags);
595 			}
596 		}
597 	} else {
598 		int err;
599 
600 		/*
601 		 * Remember the evsel for lazy build id generation. It is used
602 		 * for the sample id header type.
603 		 */
604 		if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
605 		     inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) &&
606 		    !inject->mmap_evsel)
607 			inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event);
608 
609 		/* Create the thread, map, etc. Not done for the unordered inject all case. */
610 		err = perf_event_process(tool, event, sample, machine);
611 
612 		if (err) {
613 			dso__put(dso);
614 			return err;
615 		}
616 	}
617 	if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) &&
618 	    !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
619 		struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
620 
621 		if (evsel && !dso_sought) {
622 			dso = findnew_dso(pid, tid, filename, dso_id, machine);
623 			dso_sought = true;
624 		}
625 		if (evsel && dso &&
626 		    !tool__inject_mmap2_build_id(tool, sample, machine, evsel,
627 						 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID,
628 						 pid, tid, start, len, pgoff,
629 						 dso,
630 						 prot, flags,
631 						 filename)) {
632 			/* Injected mmap2 so no need to repipe. */
633 			dso__put(dso);
634 			return 0;
635 		}
636 	}
637 	dso__put(dso);
638 	if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY)
639 		return 0;
640 
641 	return perf_event__repipe(tool, event, sample, machine);
642 }
643 
644 static int perf_event__repipe_mmap(const struct perf_tool *tool,
645 				union perf_event *event,
646 				struct perf_sample *sample,
647 				struct machine *machine)
648 {
649 	return perf_event__repipe_common_mmap(
650 		tool, event, sample, machine,
651 		event->mmap.pid, event->mmap.tid,
652 		event->mmap.start, event->mmap.len, event->mmap.pgoff,
653 		/*flags=*/0, PROT_EXEC,
654 		event->mmap.filename, /*dso_id=*/NULL,
655 		perf_event__process_mmap);
656 }
657 
658 static int perf_event__repipe_mmap2(const struct perf_tool *tool,
659 				union perf_event *event,
660 				struct perf_sample *sample,
661 				struct machine *machine)
662 {
663 	struct dso_id id = dso_id_empty;
664 
665 	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
666 		build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size);
667 	} else {
668 		id.maj = event->mmap2.maj;
669 		id.min = event->mmap2.min;
670 		id.ino = event->mmap2.ino;
671 		id.ino_generation = event->mmap2.ino_generation;
672 		id.mmap2_valid = true;
673 		id.mmap2_ino_generation_valid = true;
674 	}
675 
676 	return perf_event__repipe_common_mmap(
677 		tool, event, sample, machine,
678 		event->mmap2.pid, event->mmap2.tid,
679 		event->mmap2.start, event->mmap2.len, event->mmap2.pgoff,
680 		event->mmap2.flags, event->mmap2.prot,
681 		event->mmap2.filename, &id,
682 		perf_event__process_mmap2);
683 }
684 
685 static int perf_event__repipe_fork(const struct perf_tool *tool,
686 				   union perf_event *event,
687 				   struct perf_sample *sample,
688 				   struct machine *machine)
689 {
690 	int err;
691 
692 	err = perf_event__process_fork(tool, event, sample, machine);
693 	perf_event__repipe(tool, event, sample, machine);
694 
695 	return err;
696 }
697 
698 static int perf_event__repipe_comm(const struct perf_tool *tool,
699 				   union perf_event *event,
700 				   struct perf_sample *sample,
701 				   struct machine *machine)
702 {
703 	int err;
704 
705 	err = perf_event__process_comm(tool, event, sample, machine);
706 	perf_event__repipe(tool, event, sample, machine);
707 
708 	return err;
709 }
710 
711 static int perf_event__repipe_namespaces(const struct perf_tool *tool,
712 					 union perf_event *event,
713 					 struct perf_sample *sample,
714 					 struct machine *machine)
715 {
716 	int err = perf_event__process_namespaces(tool, event, sample, machine);
717 
718 	perf_event__repipe(tool, event, sample, machine);
719 
720 	return err;
721 }
722 
723 static int perf_event__repipe_exit(const struct perf_tool *tool,
724 				   union perf_event *event,
725 				   struct perf_sample *sample,
726 				   struct machine *machine)
727 {
728 	int err;
729 
730 	err = perf_event__process_exit(tool, event, sample, machine);
731 	perf_event__repipe(tool, event, sample, machine);
732 
733 	return err;
734 }
735 
736 #ifdef HAVE_LIBTRACEEVENT
737 static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
738 					   struct perf_session *session,
739 					   union perf_event *event)
740 {
741 	perf_event__repipe_synth(tool, event);
742 
743 	return perf_event__process_tracing_data(tool, session, event);
744 }
745 #endif
746 
747 static int dso__read_build_id(struct dso *dso)
748 {
749 	struct nscookie nsc;
750 	struct build_id bid = { .size = 0, };
751 
752 	if (dso__has_build_id(dso))
753 		return 0;
754 
755 	mutex_lock(dso__lock(dso));
756 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
757 	if (filename__read_build_id(dso__long_name(dso), &bid) > 0)
758 		dso__set_build_id(dso, &bid);
759 	else if (dso__nsinfo(dso)) {
760 		char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
761 
762 		if (new_name && filename__read_build_id(new_name, &bid) > 0)
763 			dso__set_build_id(dso, &bid);
764 		free(new_name);
765 	}
766 	nsinfo__mountns_exit(&nsc);
767 	mutex_unlock(dso__lock(dso));
768 
769 	return dso__has_build_id(dso) ? 0 : -1;
770 }
771 
772 static struct strlist *perf_inject__parse_known_build_ids(
773 	const char *known_build_ids_string)
774 {
775 	struct str_node *pos, *tmp;
776 	struct strlist *known_build_ids;
777 	int bid_len;
778 
779 	known_build_ids = strlist__new(known_build_ids_string, NULL);
780 	if (known_build_ids == NULL)
781 		return NULL;
782 	strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
783 		const char *build_id, *dso_name;
784 
785 		build_id = skip_spaces(pos->s);
786 		dso_name = strchr(build_id, ' ');
787 		if (dso_name == NULL) {
788 			strlist__remove(known_build_ids, pos);
789 			continue;
790 		}
791 		bid_len = dso_name - pos->s;
792 		dso_name = skip_spaces(dso_name);
793 		if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
794 			strlist__remove(known_build_ids, pos);
795 			continue;
796 		}
797 		for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
798 			if (!isxdigit(build_id[2 * ix]) ||
799 			    !isxdigit(build_id[2 * ix + 1])) {
800 				strlist__remove(known_build_ids, pos);
801 				break;
802 			}
803 		}
804 	}
805 	return known_build_ids;
806 }
807 
808 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
809 					       struct dso *dso)
810 {
811 	struct str_node *pos;
812 
813 	strlist__for_each_entry(pos, inject->known_build_ids) {
814 		struct build_id bid;
815 		const char *build_id, *dso_name;
816 		size_t bid_len;
817 
818 		build_id = skip_spaces(pos->s);
819 		dso_name = strchr(build_id, ' ');
820 		bid_len = dso_name - pos->s;
821 		if (bid_len > sizeof(bid.data))
822 			bid_len = sizeof(bid.data);
823 		dso_name = skip_spaces(dso_name);
824 		if (strcmp(dso__long_name(dso), dso_name))
825 			continue;
826 		for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) {
827 			bid.data[ix] = (hex(build_id[2 * ix]) << 4 |
828 					hex(build_id[2 * ix + 1]));
829 		}
830 		bid.size = bid_len / 2;
831 		dso__set_build_id(dso, &bid);
832 		return true;
833 	}
834 	return false;
835 }
836 
837 static int tool__inject_build_id(const struct perf_tool *tool,
838 				 struct perf_sample *sample,
839 				 struct machine *machine,
840 				 const struct evsel *evsel,
841 				 __u16 misc,
842 				 const char *filename,
843 				 struct dso *dso, u32 flags)
844 {
845 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
846 	int err;
847 
848 	if (is_anon_memory(filename) || flags & MAP_HUGETLB)
849 		return 0;
850 	if (is_no_dso_memory(filename))
851 		return 0;
852 
853 	if (inject->known_build_ids != NULL &&
854 	    perf_inject__lookup_known_build_id(inject, dso))
855 		return 1;
856 
857 	if (dso__read_build_id(dso) < 0) {
858 		pr_debug("no build_id found for %s\n", filename);
859 		return -1;
860 	}
861 
862 	err = perf_event__synthesize_build_id(tool, sample, machine,
863 					      perf_event__repipe,
864 					      evsel, misc, dso__bid(dso),
865 					      filename);
866 	if (err) {
867 		pr_err("Can't synthesize build_id event for %s\n", filename);
868 		return -1;
869 	}
870 
871 	return 0;
872 }
873 
874 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
875 				       struct perf_sample *sample,
876 				       struct machine *machine,
877 				       const struct evsel *evsel,
878 				       __u16 misc,
879 				       __u32 pid, __u32 tid,
880 				       __u64 start, __u64 len, __u64 pgoff,
881 				       struct dso *dso,
882 				       __u32 prot, __u32 flags,
883 				       const char *filename)
884 {
885 	int err;
886 
887 	/* Return to repipe anonymous maps. */
888 	if (is_anon_memory(filename) || flags & MAP_HUGETLB)
889 		return 1;
890 	if (is_no_dso_memory(filename))
891 		return 1;
892 
893 	if (dso__read_build_id(dso)) {
894 		pr_debug("no build_id found for %s\n", filename);
895 		return -1;
896 	}
897 
898 	err = perf_event__synthesize_mmap2_build_id(tool, sample, machine,
899 						    perf_event__repipe,
900 						    evsel,
901 						    misc, pid, tid,
902 						    start, len, pgoff,
903 						    dso__bid(dso),
904 						    prot, flags,
905 						    filename);
906 	if (err) {
907 		pr_err("Can't synthesize build_id event for %s\n", filename);
908 		return -1;
909 	}
910 	return 0;
911 }
912 
913 static int mark_dso_hit(const struct perf_inject *inject,
914 			const struct perf_tool *tool,
915 			struct perf_sample *sample,
916 			struct machine *machine,
917 			const struct evsel *mmap_evsel,
918 			struct map *map, bool sample_in_dso)
919 {
920 	struct dso *dso;
921 	u16 misc = sample->cpumode;
922 
923 	if (!map)
924 		return 0;
925 
926 	if (!sample_in_dso) {
927 		u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL |
928 			PERF_RECORD_MISC_GUEST_USER;
929 
930 		if ((misc & guest_mask) != 0) {
931 			misc &= PERF_RECORD_MISC_HYPERVISOR;
932 			misc |= __map__is_kernel(map)
933 				? PERF_RECORD_MISC_GUEST_KERNEL
934 				: PERF_RECORD_MISC_GUEST_USER;
935 		} else {
936 			misc &= PERF_RECORD_MISC_HYPERVISOR;
937 			misc |= __map__is_kernel(map)
938 				? PERF_RECORD_MISC_KERNEL
939 				: PERF_RECORD_MISC_USER;
940 		}
941 	}
942 	dso = map__dso(map);
943 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
944 		if (dso && !dso__hit(dso)) {
945 			dso__set_hit(dso);
946 			tool__inject_build_id(tool, sample, machine,
947 					     mmap_evsel, misc, dso__long_name(dso), dso,
948 					     map__flags(map));
949 		}
950 	} else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
951 		if (!map__hit(map)) {
952 			const struct build_id null_bid = { .size = 0 };
953 			const struct build_id *bid = dso ? dso__bid(dso) : &null_bid;
954 			const char *filename = dso ? dso__long_name(dso) : "";
955 
956 			map__set_hit(map);
957 			perf_event__synthesize_mmap2_build_id(tool, sample, machine,
958 								perf_event__repipe,
959 								mmap_evsel,
960 								misc,
961 								sample->pid, sample->tid,
962 								map__start(map),
963 								map__end(map) - map__start(map),
964 								map__pgoff(map),
965 								bid,
966 								map__prot(map),
967 								map__flags(map),
968 								filename);
969 		}
970 	}
971 	return 0;
972 }
973 
974 struct mark_dso_hit_args {
975 	const struct perf_inject *inject;
976 	const struct perf_tool *tool;
977 	struct perf_sample *sample;
978 	struct machine *machine;
979 	const struct evsel *mmap_evsel;
980 };
981 
982 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data)
983 {
984 	struct mark_dso_hit_args *args = data;
985 	struct map *map = node->ms.map;
986 
987 	return mark_dso_hit(args->inject, args->tool, args->sample, args->machine,
988 			    args->mmap_evsel, map, /*sample_in_dso=*/false);
989 }
990 
991 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
992 			       struct perf_sample *sample,
993 			       struct evsel *evsel __maybe_unused,
994 			       struct machine *machine)
995 {
996 	struct addr_location al;
997 	struct thread *thread;
998 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
999 	struct mark_dso_hit_args args = {
1000 		.inject = inject,
1001 		.tool = tool,
1002 		/*
1003 		 * Use the parsed sample data of the sample event, which will
1004 		 * have a later timestamp than the mmap event.
1005 		 */
1006 		.sample = sample,
1007 		.machine = machine,
1008 		.mmap_evsel = inject__mmap_evsel(inject),
1009 	};
1010 
1011 	addr_location__init(&al);
1012 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
1013 	if (thread == NULL) {
1014 		pr_err("problem processing %d event, skipping it.\n",
1015 		       event->header.type);
1016 		goto repipe;
1017 	}
1018 
1019 	if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
1020 		mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map,
1021 			     /*sample_in_dso=*/true);
1022 	}
1023 
1024 	sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
1025 					/*symbols=*/false, mark_dso_hit_callback, &args);
1026 
1027 	thread__put(thread);
1028 repipe:
1029 	perf_event__repipe(tool, event, sample, machine);
1030 	addr_location__exit(&al);
1031 	return 0;
1032 }
1033 
1034 static int perf_inject__sched_process_exit(const struct perf_tool *tool,
1035 					   union perf_event *event __maybe_unused,
1036 					   struct perf_sample *sample,
1037 					   struct evsel *evsel __maybe_unused,
1038 					   struct machine *machine __maybe_unused)
1039 {
1040 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1041 	struct event_entry *ent;
1042 
1043 	list_for_each_entry(ent, &inject->samples, node) {
1044 		if (sample->tid == ent->tid) {
1045 			list_del_init(&ent->node);
1046 			free(ent);
1047 			break;
1048 		}
1049 	}
1050 
1051 	return 0;
1052 }
1053 
1054 static int perf_inject__sched_switch(const struct perf_tool *tool,
1055 				     union perf_event *event,
1056 				     struct perf_sample *sample,
1057 				     struct evsel *evsel,
1058 				     struct machine *machine)
1059 {
1060 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1061 	struct event_entry *ent;
1062 
1063 	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
1064 
1065 	ent = malloc(event->header.size + sizeof(struct event_entry));
1066 	if (ent == NULL) {
1067 		color_fprintf(stderr, PERF_COLOR_RED,
1068 			     "Not enough memory to process sched switch event!");
1069 		return -1;
1070 	}
1071 
1072 	ent->tid = sample->tid;
1073 	memcpy(&ent->event, event, event->header.size);
1074 	list_add(&ent->node, &inject->samples);
1075 	return 0;
1076 }
1077 
1078 #ifdef HAVE_LIBTRACEEVENT
1079 static int perf_inject__sched_stat(const struct perf_tool *tool,
1080 				   union perf_event *event __maybe_unused,
1081 				   struct perf_sample *sample,
1082 				   struct evsel *evsel,
1083 				   struct machine *machine)
1084 {
1085 	struct event_entry *ent;
1086 	union perf_event *event_sw;
1087 	struct perf_sample sample_sw;
1088 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1089 	u32 pid = evsel__intval(evsel, sample, "pid");
1090 
1091 	list_for_each_entry(ent, &inject->samples, node) {
1092 		if (pid == ent->tid)
1093 			goto found;
1094 	}
1095 
1096 	return 0;
1097 found:
1098 	event_sw = &ent->event[0];
1099 	evsel__parse_sample(evsel, event_sw, &sample_sw);
1100 
1101 	sample_sw.period = sample->period;
1102 	sample_sw.time	 = sample->time;
1103 	perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
1104 				      evsel->core.attr.read_format, &sample_sw);
1105 	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
1106 	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
1107 }
1108 #endif
1109 
1110 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
1111 {
1112 	if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
1113 		return NULL;
1114 	return &gs->vcpu[vcpu];
1115 }
1116 
1117 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
1118 {
1119 	ssize_t ret = writen(gs->tmp_fd, buf, sz);
1120 
1121 	return ret < 0 ? ret : 0;
1122 }
1123 
1124 static int guest_session__repipe(const struct perf_tool *tool,
1125 				 union perf_event *event,
1126 				 struct perf_sample *sample __maybe_unused,
1127 				 struct machine *machine __maybe_unused)
1128 {
1129 	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1130 
1131 	return guest_session__output_bytes(gs, event, event->header.size);
1132 }
1133 
1134 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
1135 {
1136 	struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
1137 	int hash;
1138 
1139 	if (!guest_tid)
1140 		return -ENOMEM;
1141 
1142 	guest_tid->tid = tid;
1143 	guest_tid->vcpu = vcpu;
1144 	hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
1145 	hlist_add_head(&guest_tid->node, &gs->tids[hash]);
1146 
1147 	return 0;
1148 }
1149 
1150 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
1151 				 union perf_event *event,
1152 				 u64 offset __maybe_unused, void *data)
1153 {
1154 	struct guest_session *gs = data;
1155 	unsigned int vcpu;
1156 	struct guest_vcpu *guest_vcpu;
1157 	int ret;
1158 
1159 	if (event->header.type != PERF_RECORD_COMM ||
1160 	    event->comm.pid != gs->machine_pid)
1161 		return 0;
1162 
1163 	/*
1164 	 * QEMU option -name debug-threads=on, causes thread names formatted as
1165 	 * below, although it is not an ABI. Also libvirt seems to use this by
1166 	 * default. Here we rely on it to tell us which thread is which VCPU.
1167 	 */
1168 	ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
1169 	if (ret <= 0)
1170 		return ret;
1171 	pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
1172 		 event->comm.tid, event->comm.comm, vcpu);
1173 	if (vcpu > INT_MAX) {
1174 		pr_err("Invalid VCPU %u\n", vcpu);
1175 		return -EINVAL;
1176 	}
1177 	guest_vcpu = guest_session__vcpu(gs, vcpu);
1178 	if (!guest_vcpu)
1179 		return -ENOMEM;
1180 	if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
1181 		pr_err("Fatal error: Two threads found with the same VCPU\n");
1182 		return -EINVAL;
1183 	}
1184 	guest_vcpu->tid = event->comm.tid;
1185 
1186 	return guest_session__map_tid(gs, event->comm.tid, vcpu);
1187 }
1188 
1189 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
1190 {
1191 	return perf_session__peek_events(session, session->header.data_offset,
1192 					 session->header.data_size,
1193 					 host_peek_vm_comms_cb, gs);
1194 }
1195 
1196 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
1197 {
1198 	return evlist__id2sid(evlist, id);
1199 }
1200 
1201 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
1202 {
1203 	do {
1204 		gs->highest_id += 1;
1205 	} while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
1206 
1207 	return gs->highest_id;
1208 }
1209 
1210 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
1211 {
1212 	struct guest_id *guest_id = zalloc(sizeof(*guest_id));
1213 	int hash;
1214 
1215 	if (!guest_id)
1216 		return -ENOMEM;
1217 
1218 	guest_id->id = id;
1219 	guest_id->host_id = host_id;
1220 	guest_id->vcpu = vcpu;
1221 	hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
1222 	hlist_add_head(&guest_id->node, &gs->heads[hash]);
1223 
1224 	return 0;
1225 }
1226 
1227 static u64 evlist__find_highest_id(struct evlist *evlist)
1228 {
1229 	struct evsel *evsel;
1230 	u64 highest_id = 1;
1231 
1232 	evlist__for_each_entry(evlist, evsel) {
1233 		u32 j;
1234 
1235 		for (j = 0; j < evsel->core.ids; j++) {
1236 			u64 id = evsel->core.id[j];
1237 
1238 			if (id > highest_id)
1239 				highest_id = id;
1240 		}
1241 	}
1242 
1243 	return highest_id;
1244 }
1245 
1246 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
1247 {
1248 	struct evlist *evlist = gs->session->evlist;
1249 	struct evsel *evsel;
1250 	int ret;
1251 
1252 	evlist__for_each_entry(evlist, evsel) {
1253 		u32 j;
1254 
1255 		for (j = 0; j < evsel->core.ids; j++) {
1256 			struct perf_sample_id *sid;
1257 			u64 host_id;
1258 			u64 id;
1259 
1260 			id = evsel->core.id[j];
1261 			sid = evlist__id2sid(evlist, id);
1262 			if (!sid || sid->cpu.cpu == -1)
1263 				continue;
1264 			host_id = guest_session__allocate_new_id(gs, host_evlist);
1265 			ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1266 			if (ret)
1267 				return ret;
1268 		}
1269 	}
1270 
1271 	return 0;
1272 }
1273 
1274 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1275 {
1276 	struct hlist_head *head;
1277 	struct guest_id *guest_id;
1278 	int hash;
1279 
1280 	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1281 	head = &gs->heads[hash];
1282 
1283 	hlist_for_each_entry(guest_id, head, node)
1284 		if (guest_id->id == id)
1285 			return guest_id;
1286 
1287 	return NULL;
1288 }
1289 
1290 static int process_attr(const struct perf_tool *tool, union perf_event *event,
1291 			struct perf_sample *sample __maybe_unused,
1292 			struct machine *machine __maybe_unused)
1293 {
1294 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1295 
1296 	return perf_event__process_attr(tool, event, &inject->session->evlist);
1297 }
1298 
1299 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1300 {
1301 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1302 	struct perf_event_attr attr = evsel->core.attr;
1303 	u64 *id_array;
1304 	u32 *vcpu_array;
1305 	int ret = -ENOMEM;
1306 	u32 i;
1307 
1308 	id_array = calloc(evsel->core.ids, sizeof(*id_array));
1309 	if (!id_array)
1310 		return -ENOMEM;
1311 
1312 	vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1313 	if (!vcpu_array)
1314 		goto out;
1315 
1316 	for (i = 0; i < evsel->core.ids; i++) {
1317 		u64 id = evsel->core.id[i];
1318 		struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1319 
1320 		if (!guest_id) {
1321 			pr_err("Failed to find guest id %"PRIu64"\n", id);
1322 			ret = -EINVAL;
1323 			goto out;
1324 		}
1325 		id_array[i] = guest_id->host_id;
1326 		vcpu_array[i] = guest_id->vcpu;
1327 	}
1328 
1329 	attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1330 	attr.exclude_host = 1;
1331 	attr.exclude_guest = 0;
1332 
1333 	ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1334 					  id_array, process_attr);
1335 	if (ret)
1336 		pr_err("Failed to add guest attr.\n");
1337 
1338 	for (i = 0; i < evsel->core.ids; i++) {
1339 		struct perf_sample_id *sid;
1340 		u32 vcpu = vcpu_array[i];
1341 
1342 		sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1343 		/* Guest event is per-thread from the host point of view */
1344 		sid->cpu.cpu = -1;
1345 		sid->tid = gs->vcpu[vcpu].tid;
1346 		sid->machine_pid = gs->machine_pid;
1347 		sid->vcpu.cpu = vcpu;
1348 	}
1349 out:
1350 	free(vcpu_array);
1351 	free(id_array);
1352 	return ret;
1353 }
1354 
1355 static int guest_session__add_attrs(struct guest_session *gs)
1356 {
1357 	struct evlist *evlist = gs->session->evlist;
1358 	struct evsel *evsel;
1359 	int ret;
1360 
1361 	evlist__for_each_entry(evlist, evsel) {
1362 		ret = guest_session__add_attr(gs, evsel);
1363 		if (ret)
1364 			return ret;
1365 	}
1366 
1367 	return 0;
1368 }
1369 
1370 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1371 {
1372 	struct perf_session *session = inject->session;
1373 	struct evlist *evlist = session->evlist;
1374 	struct machine *machine = &session->machines.host;
1375 	size_t from = evlist->core.nr_entries - new_cnt;
1376 
1377 	return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1378 						 evlist, machine, from);
1379 }
1380 
1381 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1382 {
1383 	struct hlist_head *head;
1384 	struct guest_tid *guest_tid;
1385 	int hash;
1386 
1387 	hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1388 	head = &gs->tids[hash];
1389 
1390 	hlist_for_each_entry(guest_tid, head, node)
1391 		if (guest_tid->tid == tid)
1392 			return guest_tid;
1393 
1394 	return NULL;
1395 }
1396 
1397 static bool dso__is_in_kernel_space(struct dso *dso)
1398 {
1399 	if (dso__is_vdso(dso))
1400 		return false;
1401 
1402 	return dso__is_kcore(dso) ||
1403 	       dso__kernel(dso) ||
1404 	       is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1405 }
1406 
1407 static u64 evlist__first_id(struct evlist *evlist)
1408 {
1409 	struct evsel *evsel;
1410 
1411 	evlist__for_each_entry(evlist, evsel) {
1412 		if (evsel->core.ids)
1413 			return evsel->core.id[0];
1414 	}
1415 	return 0;
1416 }
1417 
1418 static int process_build_id(const struct perf_tool *tool,
1419 			    union perf_event *event,
1420 			    struct perf_sample *sample __maybe_unused,
1421 			    struct machine *machine __maybe_unused)
1422 {
1423 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1424 
1425 	return perf_event__process_build_id(tool, inject->session, event);
1426 }
1427 
1428 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1429 {
1430 	struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1431 	struct perf_sample synth_sample = {
1432 		.pid	   = -1,
1433 		.tid	   = -1,
1434 		.time	   = -1,
1435 		.stream_id = -1,
1436 		.cpu	   = -1,
1437 		.period	   = 1,
1438 		.cpumode   = dso__is_in_kernel_space(dso)
1439 		? PERF_RECORD_MISC_GUEST_KERNEL
1440 		: PERF_RECORD_MISC_GUEST_USER,
1441 	};
1442 
1443 	if (!machine)
1444 		return -ENOMEM;
1445 
1446 	dso__set_hit(dso);
1447 
1448 	return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine,
1449 					       process_build_id, inject__mmap_evsel(inject),
1450 					       /*misc=*/synth_sample.cpumode,
1451 					       dso__bid(dso), dso__long_name(dso));
1452 }
1453 
1454 static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
1455 {
1456 	struct guest_session *gs = data;
1457 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1458 
1459 	if (!dso__has_build_id(dso))
1460 		return 0;
1461 
1462 	return synthesize_build_id(inject, dso, gs->machine_pid);
1463 
1464 }
1465 
1466 static int guest_session__add_build_ids(struct guest_session *gs)
1467 {
1468 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1469 
1470 	/* Build IDs will be put in the Build ID feature section */
1471 	perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1472 
1473 	return dsos__for_each_dso(&gs->session->machines.host.dsos,
1474 				  guest_session__add_build_ids_cb,
1475 				  gs);
1476 }
1477 
1478 static int guest_session__ksymbol_event(const struct perf_tool *tool,
1479 					union perf_event *event,
1480 					struct perf_sample *sample __maybe_unused,
1481 					struct machine *machine __maybe_unused)
1482 {
1483 	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1484 
1485 	/* Only support out-of-line i.e. no BPF support */
1486 	if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1487 		return 0;
1488 
1489 	return guest_session__output_bytes(gs, event, event->header.size);
1490 }
1491 
1492 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1493 {
1494 	char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1495 	struct perf_session *session;
1496 	int ret;
1497 
1498 	/* Only these events will be injected */
1499 	gs->tool.mmap		= guest_session__repipe;
1500 	gs->tool.mmap2		= guest_session__repipe;
1501 	gs->tool.comm		= guest_session__repipe;
1502 	gs->tool.fork		= guest_session__repipe;
1503 	gs->tool.exit		= guest_session__repipe;
1504 	gs->tool.lost		= guest_session__repipe;
1505 	gs->tool.context_switch	= guest_session__repipe;
1506 	gs->tool.ksymbol	= guest_session__ksymbol_event;
1507 	gs->tool.text_poke	= guest_session__repipe;
1508 	/*
1509 	 * Processing a build ID creates a struct dso with that build ID. Later,
1510 	 * all guest dsos are iterated and the build IDs processed into the host
1511 	 * session where they will be output to the Build ID feature section
1512 	 * when the perf.data file header is written.
1513 	 */
1514 	gs->tool.build_id	= perf_event__process_build_id;
1515 	/* Process the id index to know what VCPU an ID belongs to */
1516 	gs->tool.id_index	= perf_event__process_id_index;
1517 
1518 	gs->tool.ordered_events	= true;
1519 	gs->tool.ordering_requires_timestamps = true;
1520 
1521 	gs->data.path	= name;
1522 	gs->data.force	= force;
1523 	gs->data.mode	= PERF_DATA_MODE_READ;
1524 
1525 	session = perf_session__new(&gs->data, &gs->tool);
1526 	if (IS_ERR(session))
1527 		return PTR_ERR(session);
1528 	gs->session = session;
1529 
1530 	/*
1531 	 * Initial events have zero'd ID samples. Get default ID sample size
1532 	 * used for removing them.
1533 	 */
1534 	gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1535 	/* And default ID for adding back a host-compatible ID sample */
1536 	gs->dflt_id = evlist__first_id(session->evlist);
1537 	if (!gs->dflt_id) {
1538 		pr_err("Guest data has no sample IDs");
1539 		return -EINVAL;
1540 	}
1541 
1542 	/* Temporary file for guest events */
1543 	gs->tmp_file_name = strdup(tmp_file_name);
1544 	if (!gs->tmp_file_name)
1545 		return -ENOMEM;
1546 	gs->tmp_fd = mkstemp(gs->tmp_file_name);
1547 	if (gs->tmp_fd < 0)
1548 		return -errno;
1549 
1550 	if (zstd_init(&gs->session->zstd_data, 0) < 0)
1551 		pr_warning("Guest session decompression initialization failed.\n");
1552 
1553 	/*
1554 	 * perf does not support processing 2 sessions simultaneously, so output
1555 	 * guest events to a temporary file.
1556 	 */
1557 	ret = perf_session__process_events(gs->session);
1558 	if (ret)
1559 		return ret;
1560 
1561 	if (lseek(gs->tmp_fd, 0, SEEK_SET))
1562 		return -errno;
1563 
1564 	return 0;
1565 }
1566 
1567 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1568 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1569 {
1570 	struct hlist_node *pos, *n;
1571 	size_t i;
1572 
1573 	for (i = 0; i < hlist_sz; ++i) {
1574 		hlist_for_each_safe(pos, n, &heads[i]) {
1575 			hlist_del(pos);
1576 			free(pos);
1577 		}
1578 	}
1579 }
1580 
1581 static void guest_session__exit(struct guest_session *gs)
1582 {
1583 	if (gs->session) {
1584 		perf_session__delete(gs->session);
1585 		free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1586 		free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1587 	}
1588 	if (gs->tmp_file_name) {
1589 		if (gs->tmp_fd >= 0)
1590 			close(gs->tmp_fd);
1591 		unlink(gs->tmp_file_name);
1592 		zfree(&gs->tmp_file_name);
1593 	}
1594 	zfree(&gs->vcpu);
1595 	zfree(&gs->perf_data_file);
1596 }
1597 
1598 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1599 {
1600 	tc->time_shift		= time_conv->time_shift;
1601 	tc->time_mult		= time_conv->time_mult;
1602 	tc->time_zero		= time_conv->time_zero;
1603 	tc->time_cycles		= time_conv->time_cycles;
1604 	tc->time_mask		= time_conv->time_mask;
1605 	tc->cap_user_time_zero	= time_conv->cap_user_time_zero;
1606 	tc->cap_user_time_short	= time_conv->cap_user_time_short;
1607 }
1608 
1609 static void guest_session__get_tc(struct guest_session *gs)
1610 {
1611 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1612 
1613 	get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1614 	get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1615 }
1616 
1617 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1618 {
1619 	u64 tsc;
1620 
1621 	if (!guest_time) {
1622 		*host_time = 0;
1623 		return;
1624 	}
1625 
1626 	if (gs->guest_tc.cap_user_time_zero)
1627 		tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1628 	else
1629 		tsc = guest_time;
1630 
1631 	/*
1632 	 * This is the correct order of operations for x86 if the TSC Offset and
1633 	 * Multiplier values are used.
1634 	 */
1635 	tsc -= gs->time_offset;
1636 	tsc /= gs->time_scale;
1637 
1638 	if (gs->host_tc.cap_user_time_zero)
1639 		*host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1640 	else
1641 		*host_time = tsc;
1642 }
1643 
1644 static int guest_session__fetch(struct guest_session *gs)
1645 {
1646 	void *buf;
1647 	struct perf_event_header *hdr;
1648 	size_t hdr_sz = sizeof(*hdr);
1649 	ssize_t ret;
1650 
1651 	buf = gs->ev.event_buf;
1652 	if (!buf) {
1653 		buf = malloc(PERF_SAMPLE_MAX_SIZE);
1654 		if (!buf)
1655 			return -ENOMEM;
1656 		gs->ev.event_buf = buf;
1657 	}
1658 	hdr = buf;
1659 	ret = readn(gs->tmp_fd, buf, hdr_sz);
1660 	if (ret < 0)
1661 		return ret;
1662 
1663 	if (!ret) {
1664 		/* Zero size means EOF */
1665 		hdr->size = 0;
1666 		return 0;
1667 	}
1668 
1669 	buf += hdr_sz;
1670 
1671 	ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1672 	if (ret < 0)
1673 		return ret;
1674 
1675 	gs->ev.event = (union perf_event *)gs->ev.event_buf;
1676 	gs->ev.sample.time = 0;
1677 
1678 	if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1679 		pr_err("Unexpected type fetching guest event");
1680 		return 0;
1681 	}
1682 
1683 	ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1684 	if (ret) {
1685 		pr_err("Parse failed fetching guest event");
1686 		return ret;
1687 	}
1688 
1689 	if (!gs->have_tc) {
1690 		guest_session__get_tc(gs);
1691 		gs->have_tc = true;
1692 	}
1693 
1694 	guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1695 
1696 	return 0;
1697 }
1698 
1699 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1700 				    const struct perf_sample *sample)
1701 {
1702 	struct evsel *evsel;
1703 	void *array;
1704 	int ret;
1705 
1706 	evsel = evlist__id2evsel(evlist, sample->id);
1707 	array = ev;
1708 
1709 	if (!evsel) {
1710 		pr_err("No evsel for id %"PRIu64"\n", sample->id);
1711 		return -EINVAL;
1712 	}
1713 
1714 	array += ev->header.size;
1715 	ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1716 	if (ret < 0)
1717 		return ret;
1718 
1719 	if (ret & 7) {
1720 		pr_err("Bad id sample size %d\n", ret);
1721 		return -EINVAL;
1722 	}
1723 
1724 	ev->header.size += ret;
1725 
1726 	return 0;
1727 }
1728 
1729 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1730 {
1731 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1732 	int ret;
1733 
1734 	if (!gs->ready)
1735 		return 0;
1736 
1737 	while (1) {
1738 		struct perf_sample *sample;
1739 		struct guest_id *guest_id;
1740 		union perf_event *ev;
1741 		u16 id_hdr_size;
1742 		u8 cpumode;
1743 		u64 id;
1744 
1745 		if (!gs->fetched) {
1746 			ret = guest_session__fetch(gs);
1747 			if (ret)
1748 				return ret;
1749 			gs->fetched = true;
1750 		}
1751 
1752 		ev = gs->ev.event;
1753 		sample = &gs->ev.sample;
1754 
1755 		if (!ev->header.size)
1756 			return 0; /* EOF */
1757 
1758 		if (sample->time > timestamp)
1759 			return 0;
1760 
1761 		/* Change cpumode to guest */
1762 		cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1763 		if (cpumode & PERF_RECORD_MISC_USER)
1764 			cpumode = PERF_RECORD_MISC_GUEST_USER;
1765 		else
1766 			cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1767 		ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1768 		ev->header.misc |= cpumode;
1769 
1770 		id = sample->id;
1771 		if (!id) {
1772 			id = gs->dflt_id;
1773 			id_hdr_size = gs->dflt_id_hdr_size;
1774 		} else {
1775 			struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1776 
1777 			id_hdr_size = evsel__id_hdr_size(evsel);
1778 		}
1779 
1780 		if (id_hdr_size & 7) {
1781 			pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1782 			return -EINVAL;
1783 		}
1784 
1785 		if (ev->header.size & 7) {
1786 			pr_err("Bad event size %u\n", ev->header.size);
1787 			return -EINVAL;
1788 		}
1789 
1790 		/* Remove guest id sample */
1791 		ev->header.size -= id_hdr_size;
1792 
1793 		if (ev->header.size & 7) {
1794 			pr_err("Bad raw event size %u\n", ev->header.size);
1795 			return -EINVAL;
1796 		}
1797 
1798 		guest_id = guest_session__lookup_id(gs, id);
1799 		if (!guest_id) {
1800 			pr_err("Guest event with unknown id %llu\n",
1801 			       (unsigned long long)id);
1802 			return -EINVAL;
1803 		}
1804 
1805 		/* Change to host ID to avoid conflicting ID values */
1806 		sample->id = guest_id->host_id;
1807 		sample->stream_id = guest_id->host_id;
1808 
1809 		if (sample->cpu != (u32)-1) {
1810 			if (sample->cpu >= gs->vcpu_cnt) {
1811 				pr_err("Guest event with unknown VCPU %u\n",
1812 				       sample->cpu);
1813 				return -EINVAL;
1814 			}
1815 			/* Change to host CPU instead of guest VCPU */
1816 			sample->cpu = gs->vcpu[sample->cpu].cpu;
1817 		}
1818 
1819 		/* New id sample with new ID and CPU */
1820 		ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1821 		if (ret)
1822 			return ret;
1823 
1824 		if (ev->header.size & 7) {
1825 			pr_err("Bad new event size %u\n", ev->header.size);
1826 			return -EINVAL;
1827 		}
1828 
1829 		gs->fetched = false;
1830 
1831 		ret = output_bytes(inject, ev, ev->header.size);
1832 		if (ret)
1833 			return ret;
1834 	}
1835 }
1836 
1837 static int guest_session__flush_events(struct guest_session *gs)
1838 {
1839 	return guest_session__inject_events(gs, -1);
1840 }
1841 
1842 static int host__repipe(const struct perf_tool *tool,
1843 			union perf_event *event,
1844 			struct perf_sample *sample,
1845 			struct machine *machine)
1846 {
1847 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1848 	int ret;
1849 
1850 	ret = guest_session__inject_events(&inject->guest_session, sample->time);
1851 	if (ret)
1852 		return ret;
1853 
1854 	return perf_event__repipe(tool, event, sample, machine);
1855 }
1856 
1857 static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
1858 			       union perf_event *event)
1859 {
1860 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1861 	struct guest_session *gs = &inject->guest_session;
1862 	int ret;
1863 
1864 	/*
1865 	 * Peek through host COMM events to find QEMU threads and the VCPU they
1866 	 * are running.
1867 	 */
1868 	ret = host_peek_vm_comms(session, gs);
1869 	if (ret)
1870 		return ret;
1871 
1872 	if (!gs->vcpu_cnt) {
1873 		pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1874 		return -EINVAL;
1875 	}
1876 
1877 	/*
1878 	 * Allocate new (unused) host sample IDs and map them to the guest IDs.
1879 	 */
1880 	gs->highest_id = evlist__find_highest_id(session->evlist);
1881 	ret = guest_session__map_ids(gs, session->evlist);
1882 	if (ret)
1883 		return ret;
1884 
1885 	ret = guest_session__add_attrs(gs);
1886 	if (ret)
1887 		return ret;
1888 
1889 	ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1890 	if (ret) {
1891 		pr_err("Failed to synthesize id_index\n");
1892 		return ret;
1893 	}
1894 
1895 	ret = guest_session__add_build_ids(gs);
1896 	if (ret) {
1897 		pr_err("Failed to add guest build IDs\n");
1898 		return ret;
1899 	}
1900 
1901 	gs->ready = true;
1902 
1903 	ret = guest_session__inject_events(gs, 0);
1904 	if (ret)
1905 		return ret;
1906 
1907 	return perf_event__repipe_op2_synth(tool, session, event);
1908 }
1909 
1910 /*
1911  * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1912  * which flushes host events to file up until the last flush time. Then inject
1913  * guest events up to the same time. Finally write out the FINISHED_ROUND event
1914  * itself.
1915  */
1916 static int host__finished_round(const struct perf_tool *tool,
1917 				union perf_event *event,
1918 				struct ordered_events *oe)
1919 {
1920 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1921 	int ret = perf_event__process_finished_round(tool, event, oe);
1922 	u64 timestamp = ordered_events__last_flush_time(oe);
1923 
1924 	if (ret)
1925 		return ret;
1926 
1927 	ret = guest_session__inject_events(&inject->guest_session, timestamp);
1928 	if (ret)
1929 		return ret;
1930 
1931 	return perf_event__repipe_oe_synth(tool, event, oe);
1932 }
1933 
1934 static int host__context_switch(const struct perf_tool *tool,
1935 				union perf_event *event,
1936 				struct perf_sample *sample,
1937 				struct machine *machine)
1938 {
1939 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1940 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1941 	struct guest_session *gs = &inject->guest_session;
1942 	u32 pid = event->context_switch.next_prev_pid;
1943 	u32 tid = event->context_switch.next_prev_tid;
1944 	struct guest_tid *guest_tid;
1945 	u32 vcpu;
1946 
1947 	if (out || pid != gs->machine_pid)
1948 		goto out;
1949 
1950 	guest_tid = guest_session__lookup_tid(gs, tid);
1951 	if (!guest_tid)
1952 		goto out;
1953 
1954 	if (sample->cpu == (u32)-1) {
1955 		pr_err("Switch event does not have CPU\n");
1956 		return -EINVAL;
1957 	}
1958 
1959 	vcpu = guest_tid->vcpu;
1960 	if (vcpu >= gs->vcpu_cnt)
1961 		return -EINVAL;
1962 
1963 	/* Guest is switching in, record which CPU the VCPU is now running on */
1964 	gs->vcpu[vcpu].cpu = sample->cpu;
1965 out:
1966 	return host__repipe(tool, event, sample, machine);
1967 }
1968 
1969 static void sig_handler(int sig __maybe_unused)
1970 {
1971 	session_done = 1;
1972 }
1973 
1974 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1975 {
1976 	struct perf_event_attr *attr = &evsel->core.attr;
1977 	const char *name = evsel__name(evsel);
1978 
1979 	if (!(attr->sample_type & sample_type)) {
1980 		pr_err("Samples for %s event do not have %s attribute set.",
1981 			name, sample_msg);
1982 		return -EINVAL;
1983 	}
1984 
1985 	return 0;
1986 }
1987 
1988 static int drop_sample(const struct perf_tool *tool __maybe_unused,
1989 		       union perf_event *event __maybe_unused,
1990 		       struct perf_sample *sample __maybe_unused,
1991 		       struct evsel *evsel __maybe_unused,
1992 		       struct machine *machine __maybe_unused)
1993 {
1994 	return 0;
1995 }
1996 
1997 static void strip_init(struct perf_inject *inject)
1998 {
1999 	struct evlist *evlist = inject->session->evlist;
2000 	struct evsel *evsel;
2001 
2002 	inject->tool.context_switch = perf_event__drop;
2003 
2004 	evlist__for_each_entry(evlist, evsel)
2005 		evsel->handler = drop_sample;
2006 }
2007 
2008 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
2009 {
2010 	struct perf_inject *inject = opt->value;
2011 	const char *args;
2012 	char *dry_run;
2013 
2014 	if (unset)
2015 		return 0;
2016 
2017 	inject->itrace_synth_opts.set = true;
2018 	inject->itrace_synth_opts.vm_time_correlation = true;
2019 	inject->in_place_update = true;
2020 
2021 	if (!str)
2022 		return 0;
2023 
2024 	dry_run = skip_spaces(str);
2025 	if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
2026 		inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
2027 		inject->in_place_update_dry_run = true;
2028 		args = dry_run + strlen("dry-run");
2029 	} else {
2030 		args = str;
2031 	}
2032 
2033 	inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
2034 
2035 	return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
2036 }
2037 
2038 static int parse_guest_data(const struct option *opt, const char *str, int unset)
2039 {
2040 	struct perf_inject *inject = opt->value;
2041 	struct guest_session *gs = &inject->guest_session;
2042 	char *tok;
2043 	char *s;
2044 
2045 	if (unset)
2046 		return 0;
2047 
2048 	if (!str)
2049 		goto bad_args;
2050 
2051 	s = strdup(str);
2052 	if (!s)
2053 		return -ENOMEM;
2054 
2055 	gs->perf_data_file = strsep(&s, ",");
2056 	if (!gs->perf_data_file)
2057 		goto bad_args;
2058 
2059 	gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
2060 	if (gs->copy_kcore_dir)
2061 		inject->output.is_dir = true;
2062 
2063 	tok = strsep(&s, ",");
2064 	if (!tok)
2065 		goto bad_args;
2066 	gs->machine_pid = strtoul(tok, NULL, 0);
2067 	if (!inject->guest_session.machine_pid)
2068 		goto bad_args;
2069 
2070 	gs->time_scale = 1;
2071 
2072 	tok = strsep(&s, ",");
2073 	if (!tok)
2074 		goto out;
2075 	gs->time_offset = strtoull(tok, NULL, 0);
2076 
2077 	tok = strsep(&s, ",");
2078 	if (!tok)
2079 		goto out;
2080 	gs->time_scale = strtod(tok, NULL);
2081 	if (!gs->time_scale)
2082 		goto bad_args;
2083 out:
2084 	return 0;
2085 
2086 bad_args:
2087 	pr_err("--guest-data option requires guest perf.data file name, "
2088 	       "guest machine PID, and optionally guest timestamp offset, "
2089 	       "and guest timestamp scale factor, separated by commas.\n");
2090 	return -1;
2091 }
2092 
2093 static int save_section_info_cb(struct perf_file_section *section,
2094 				struct perf_header *ph __maybe_unused,
2095 				int feat, int fd __maybe_unused, void *data)
2096 {
2097 	struct perf_inject *inject = data;
2098 
2099 	inject->secs[feat] = *section;
2100 	return 0;
2101 }
2102 
2103 static int save_section_info(struct perf_inject *inject)
2104 {
2105 	struct perf_header *header = &inject->session->header;
2106 	int fd = perf_data__fd(inject->session->data);
2107 
2108 	return perf_header__process_sections(header, fd, inject, save_section_info_cb);
2109 }
2110 
2111 static bool keep_feat(struct perf_inject *inject, int feat)
2112 {
2113 	switch (feat) {
2114 	/* Keep original information that describes the machine or software */
2115 	case HEADER_TRACING_DATA:
2116 	case HEADER_HOSTNAME:
2117 	case HEADER_OSRELEASE:
2118 	case HEADER_VERSION:
2119 	case HEADER_ARCH:
2120 	case HEADER_NRCPUS:
2121 	case HEADER_CPUDESC:
2122 	case HEADER_CPUID:
2123 	case HEADER_TOTAL_MEM:
2124 	case HEADER_CPU_TOPOLOGY:
2125 	case HEADER_NUMA_TOPOLOGY:
2126 	case HEADER_PMU_MAPPINGS:
2127 	case HEADER_CACHE:
2128 	case HEADER_MEM_TOPOLOGY:
2129 	case HEADER_CLOCKID:
2130 	case HEADER_BPF_PROG_INFO:
2131 	case HEADER_BPF_BTF:
2132 	case HEADER_CPU_PMU_CAPS:
2133 	case HEADER_CLOCK_DATA:
2134 	case HEADER_HYBRID_TOPOLOGY:
2135 	case HEADER_PMU_CAPS:
2136 	case HEADER_CPU_DOMAIN_INFO:
2137 		return true;
2138 	/* Information that can be updated */
2139 	case HEADER_BUILD_ID:
2140 		return inject->build_id_style == BID_RWS__NONE;
2141 	case HEADER_CMDLINE:
2142 	case HEADER_EVENT_DESC:
2143 	case HEADER_BRANCH_STACK:
2144 	case HEADER_GROUP_DESC:
2145 	case HEADER_AUXTRACE:
2146 	case HEADER_STAT:
2147 	case HEADER_SAMPLE_TIME:
2148 	case HEADER_DIR_FORMAT:
2149 	case HEADER_COMPRESSED:
2150 	default:
2151 		return false;
2152 	};
2153 }
2154 
2155 static int read_file(int fd, u64 offs, void *buf, size_t sz)
2156 {
2157 	ssize_t ret = preadn(fd, buf, sz, offs);
2158 
2159 	if (ret < 0)
2160 		return -errno;
2161 	if ((size_t)ret != sz)
2162 		return -EINVAL;
2163 	return 0;
2164 }
2165 
2166 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
2167 {
2168 	int fd = perf_data__fd(inject->session->data);
2169 	u64 offs = inject->secs[feat].offset;
2170 	size_t sz = inject->secs[feat].size;
2171 	void *buf = malloc(sz);
2172 	int ret;
2173 
2174 	if (!buf)
2175 		return -ENOMEM;
2176 
2177 	ret = read_file(fd, offs, buf, sz);
2178 	if (ret)
2179 		goto out_free;
2180 
2181 	ret = fw->write(fw, buf, sz);
2182 out_free:
2183 	free(buf);
2184 	return ret;
2185 }
2186 
2187 struct inject_fc {
2188 	struct feat_copier fc;
2189 	struct perf_inject *inject;
2190 };
2191 
2192 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
2193 {
2194 	struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
2195 	struct perf_inject *inject = inj_fc->inject;
2196 	int ret;
2197 
2198 	if (!inject->secs[feat].offset ||
2199 	    !keep_feat(inject, feat))
2200 		return 0;
2201 
2202 	ret = feat_copy(inject, feat, fw);
2203 	if (ret < 0)
2204 		return ret;
2205 
2206 	return 1; /* Feature section copied */
2207 }
2208 
2209 static int copy_kcore_dir(struct perf_inject *inject)
2210 {
2211 	char *cmd;
2212 	int ret;
2213 
2214 	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
2215 		       inject->input_name, inject->output.path);
2216 	if (ret < 0)
2217 		return ret;
2218 	pr_debug("%s\n", cmd);
2219 	ret = system(cmd);
2220 	free(cmd);
2221 	return ret;
2222 }
2223 
2224 static int guest_session__copy_kcore_dir(struct guest_session *gs)
2225 {
2226 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
2227 	char *cmd;
2228 	int ret;
2229 
2230 	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
2231 		       gs->perf_data_file, inject->output.path, gs->machine_pid);
2232 	if (ret < 0)
2233 		return ret;
2234 	pr_debug("%s\n", cmd);
2235 	ret = system(cmd);
2236 	free(cmd);
2237 	return ret;
2238 }
2239 
2240 static int output_fd(struct perf_inject *inject)
2241 {
2242 	return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
2243 }
2244 
2245 static int __cmd_inject(struct perf_inject *inject)
2246 {
2247 	int ret = -EINVAL;
2248 	struct guest_session *gs = &inject->guest_session;
2249 	struct perf_session *session = inject->session;
2250 	int fd = output_fd(inject);
2251 	u64 output_data_offset = perf_session__data_offset(session->evlist);
2252 	/*
2253 	 * Pipe input hasn't loaded the attributes and will handle them as
2254 	 * events. So that the attributes don't overlap the data, write the
2255 	 * attributes after the data.
2256 	 */
2257 	bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe;
2258 
2259 	signal(SIGINT, sig_handler);
2260 
2261 	if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat ||
2262 	    inject->itrace_synth_opts.set) {
2263 		inject->tool.mmap	  = perf_event__repipe_mmap;
2264 		inject->tool.mmap2	  = perf_event__repipe_mmap2;
2265 		inject->tool.fork	  = perf_event__repipe_fork;
2266 #ifdef HAVE_LIBTRACEEVENT
2267 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
2268 #endif
2269 	}
2270 
2271 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2272 	    inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2273 		inject->tool.sample = perf_event__inject_buildid;
2274 	} else if (inject->sched_stat) {
2275 		struct evsel *evsel;
2276 
2277 		evlist__for_each_entry(session->evlist, evsel) {
2278 			const char *name = evsel__name(evsel);
2279 
2280 			if (!strcmp(name, "sched:sched_switch")) {
2281 				if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
2282 					return -EINVAL;
2283 
2284 				evsel->handler = perf_inject__sched_switch;
2285 			} else if (!strcmp(name, "sched:sched_process_exit"))
2286 				evsel->handler = perf_inject__sched_process_exit;
2287 #ifdef HAVE_LIBTRACEEVENT
2288 			else if (!strncmp(name, "sched:sched_stat_", 17))
2289 				evsel->handler = perf_inject__sched_stat;
2290 #endif
2291 		}
2292 	} else if (inject->itrace_synth_opts.vm_time_correlation) {
2293 		session->itrace_synth_opts = &inject->itrace_synth_opts;
2294 		memset(&inject->tool, 0, sizeof(inject->tool));
2295 		inject->tool.id_index	    = perf_event__process_id_index;
2296 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2297 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2298 		inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2299 		inject->tool.ordered_events = true;
2300 		inject->tool.ordering_requires_timestamps = true;
2301 	} else if (inject->itrace_synth_opts.set) {
2302 		session->itrace_synth_opts = &inject->itrace_synth_opts;
2303 		inject->itrace_synth_opts.inject = true;
2304 		inject->tool.comm	    = perf_event__repipe_comm;
2305 		inject->tool.namespaces	    = perf_event__repipe_namespaces;
2306 		inject->tool.exit	    = perf_event__repipe_exit;
2307 		inject->tool.id_index	    = perf_event__process_id_index;
2308 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2309 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2310 		inject->tool.aux	    = perf_event__drop_aux;
2311 		inject->tool.itrace_start   = perf_event__drop_aux;
2312 		inject->tool.aux_output_hw_id = perf_event__drop_aux;
2313 		inject->tool.ordered_events = true;
2314 		inject->tool.ordering_requires_timestamps = true;
2315 		/* Allow space in the header for new attributes */
2316 		output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2317 		if (inject->strip)
2318 			strip_init(inject);
2319 	} else if (gs->perf_data_file) {
2320 		char *name = gs->perf_data_file;
2321 
2322 		/*
2323 		 * Not strictly necessary, but keep these events in order wrt
2324 		 * guest events.
2325 		 */
2326 		inject->tool.mmap		= host__repipe;
2327 		inject->tool.mmap2		= host__repipe;
2328 		inject->tool.comm		= host__repipe;
2329 		inject->tool.fork		= host__repipe;
2330 		inject->tool.exit		= host__repipe;
2331 		inject->tool.lost		= host__repipe;
2332 		inject->tool.context_switch	= host__repipe;
2333 		inject->tool.ksymbol		= host__repipe;
2334 		inject->tool.text_poke		= host__repipe;
2335 		/*
2336 		 * Once the host session has initialized, set up sample ID
2337 		 * mapping and feed in guest attrs, build IDs and initial
2338 		 * events.
2339 		 */
2340 		inject->tool.finished_init	= host__finished_init;
2341 		/* Obey finished round ordering */
2342 		inject->tool.finished_round	= host__finished_round;
2343 		/* Keep track of which CPU a VCPU is runnng on */
2344 		inject->tool.context_switch	= host__context_switch;
2345 		/*
2346 		 * Must order events to be able to obey finished round
2347 		 * ordering.
2348 		 */
2349 		inject->tool.ordered_events	= true;
2350 		inject->tool.ordering_requires_timestamps = true;
2351 		/* Set up a separate session to process guest perf.data file */
2352 		ret = guest_session__start(gs, name, session->data->force);
2353 		if (ret) {
2354 			pr_err("Failed to process %s, error %d\n", name, ret);
2355 			return ret;
2356 		}
2357 		/* Allow space in the header for guest attributes */
2358 		output_data_offset += gs->session->header.data_offset;
2359 		output_data_offset = roundup(output_data_offset, 4096);
2360 	} else if (inject->convert_callchain) {
2361 		inject->tool.sample	= perf_event__convert_sample_callchain;
2362 		inject->tool.fork	= perf_event__repipe_fork;
2363 		inject->tool.comm	= perf_event__repipe_comm;
2364 		inject->tool.exit	= perf_event__repipe_exit;
2365 		inject->tool.mmap	= perf_event__repipe_mmap;
2366 		inject->tool.mmap2	= perf_event__repipe_mmap2;
2367 		inject->tool.ordered_events = true;
2368 		inject->tool.ordering_requires_timestamps = true;
2369 	}
2370 
2371 	if (!inject->itrace_synth_opts.set)
2372 		auxtrace_index__free(&session->auxtrace_index);
2373 
2374 	if (!inject->output.is_pipe && !inject->in_place_update)
2375 		lseek(fd, output_data_offset, SEEK_SET);
2376 
2377 	ret = perf_session__process_events(session);
2378 	if (ret)
2379 		return ret;
2380 
2381 	if (gs->session) {
2382 		/*
2383 		 * Remaining guest events have later timestamps. Flush them
2384 		 * out to file.
2385 		 */
2386 		ret = guest_session__flush_events(gs);
2387 		if (ret) {
2388 			pr_err("Failed to flush guest events\n");
2389 			return ret;
2390 		}
2391 	}
2392 
2393 	if (!inject->output.is_pipe && !inject->in_place_update) {
2394 		struct inject_fc inj_fc = {
2395 			.fc.copy = feat_copy_cb,
2396 			.inject = inject,
2397 		};
2398 
2399 		if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2400 		    inject->build_id_style == BID_RWS__INJECT_HEADER_ALL)
2401 			perf_header__set_feat(&session->header, HEADER_BUILD_ID);
2402 		/*
2403 		 * Keep all buildids when there is unprocessed AUX data because
2404 		 * it is not known which ones the AUX trace hits.
2405 		 */
2406 		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2407 		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
2408 			perf_session__dsos_hit_all(session);
2409 		/*
2410 		 * The AUX areas have been removed and replaced with
2411 		 * synthesized hardware events, so clear the feature flag.
2412 		 */
2413 		if (inject->itrace_synth_opts.set) {
2414 			perf_header__clear_feat(&session->header,
2415 						HEADER_AUXTRACE);
2416 			if (inject->itrace_synth_opts.last_branch ||
2417 			    inject->itrace_synth_opts.add_last_branch)
2418 				perf_header__set_feat(&session->header,
2419 						      HEADER_BRANCH_STACK);
2420 		}
2421 
2422 		/*
2423 		 * The converted data file won't have stack and registers.
2424 		 * Update the perf_event_attr to remove them before writing.
2425 		 */
2426 		if (inject->convert_callchain) {
2427 			struct evsel *evsel;
2428 
2429 			evlist__for_each_entry(session->evlist, evsel) {
2430 				evsel__reset_sample_bit(evsel, REGS_USER);
2431 				evsel__reset_sample_bit(evsel, STACK_USER);
2432 				evsel->core.attr.sample_regs_user = 0;
2433 				evsel->core.attr.sample_stack_user = 0;
2434 				evsel->core.attr.exclude_callchain_user = 0;
2435 			}
2436 		}
2437 
2438 		session->header.data_offset = output_data_offset;
2439 		session->header.data_size = inject->bytes_written;
2440 		perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
2441 					    write_attrs_after_data);
2442 
2443 		if (inject->copy_kcore_dir) {
2444 			ret = copy_kcore_dir(inject);
2445 			if (ret) {
2446 				pr_err("Failed to copy kcore\n");
2447 				return ret;
2448 			}
2449 		}
2450 		if (gs->copy_kcore_dir) {
2451 			ret = guest_session__copy_kcore_dir(gs);
2452 			if (ret) {
2453 				pr_err("Failed to copy guest kcore\n");
2454 				return ret;
2455 			}
2456 		}
2457 	}
2458 
2459 	return ret;
2460 }
2461 
2462 static bool evsel__has_dwarf_callchain(struct evsel *evsel)
2463 {
2464 	struct perf_event_attr *attr = &evsel->core.attr;
2465 	const u64 dwarf_callchain_flags =
2466 		PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
2467 
2468 	if (!attr->exclude_callchain_user)
2469 		return false;
2470 
2471 	return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
2472 }
2473 
2474 int cmd_inject(int argc, const char **argv)
2475 {
2476 	struct perf_inject inject = {
2477 		.input_name  = "-",
2478 		.samples = LIST_HEAD_INIT(inject.samples),
2479 		.output = {
2480 			.path = "-",
2481 			.mode = PERF_DATA_MODE_WRITE,
2482 			.use_stdio = true,
2483 		},
2484 	};
2485 	struct perf_data data = {
2486 		.mode = PERF_DATA_MODE_READ,
2487 		.use_stdio = true,
2488 	};
2489 	int ret;
2490 	const char *known_build_ids = NULL;
2491 	bool build_ids = false;
2492 	bool build_id_all = false;
2493 	bool mmap2_build_ids = false;
2494 	bool mmap2_build_id_all = false;
2495 
2496 	struct option options[] = {
2497 		OPT_BOOLEAN('b', "build-ids", &build_ids,
2498 			    "Inject build-ids into the output stream"),
2499 		OPT_BOOLEAN(0, "buildid-all", &build_id_all,
2500 			    "Inject build-ids of all DSOs into the output stream"),
2501 		OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids,
2502 			    "Drop unused mmap events, make others mmap2 with build IDs"),
2503 		OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all,
2504 			    "Rewrite all mmap events as mmap2 events with build IDs"),
2505 		OPT_STRING(0, "known-build-ids", &known_build_ids,
2506 			   "buildid path [,buildid path...]",
2507 			   "build-ids to use for given paths"),
2508 		OPT_STRING('i', "input", &inject.input_name, "file",
2509 			   "input file name"),
2510 		OPT_STRING('o', "output", &inject.output.path, "file",
2511 			   "output file name"),
2512 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2513 			    "Merge sched-stat and sched-switch for getting events "
2514 			    "where and how long tasks slept"),
2515 #ifdef HAVE_JITDUMP
2516 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2517 #endif
2518 		OPT_INCR('v', "verbose", &verbose,
2519 			 "be more verbose (show build ids, etc)"),
2520 		OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2521 			   "file", "vmlinux pathname"),
2522 		OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2523 			    "don't load vmlinux even if found"),
2524 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2525 			   "kallsyms pathname"),
2526 		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2527 		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2528 				    NULL, "opts", "Instruction Tracing options\n"
2529 				    ITRACE_HELP,
2530 				    itrace_parse_synth_opts),
2531 		OPT_BOOLEAN(0, "strip", &inject.strip,
2532 			    "strip non-synthesized events (use with --itrace)"),
2533 		OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2534 				    "correlate time between VM guests and the host",
2535 				    parse_vm_time_correlation),
2536 		OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2537 				    "inject events from a guest perf.data file",
2538 				    parse_guest_data),
2539 		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2540 			   "guest mount directory under which every guest os"
2541 			   " instance has a subdir"),
2542 		OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
2543 			    "Generate callchains using DWARF and drop register/stack data"),
2544 		OPT_END()
2545 	};
2546 	const char * const inject_usage[] = {
2547 		"perf inject [<options>]",
2548 		NULL
2549 	};
2550 	bool ordered_events;
2551 
2552 	if (!inject.itrace_synth_opts.set) {
2553 		/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
2554 		symbol_conf.lazy_load_kernel_maps = true;
2555 	}
2556 
2557 #ifndef HAVE_JITDUMP
2558 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2559 #endif
2560 #ifndef HAVE_LIBDW_SUPPORT
2561 	set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
2562 #endif
2563 	argc = parse_options(argc, argv, options, inject_usage, 0);
2564 
2565 	/*
2566 	 * Any (unrecognized) arguments left?
2567 	 */
2568 	if (argc)
2569 		usage_with_options(inject_usage, options);
2570 
2571 	if (inject.strip && !inject.itrace_synth_opts.set) {
2572 		pr_err("--strip option requires --itrace option\n");
2573 		return -1;
2574 	}
2575 
2576 	if (symbol__validate_sym_arguments())
2577 		return -1;
2578 
2579 	if (inject.in_place_update) {
2580 		if (!strcmp(inject.input_name, "-")) {
2581 			pr_err("Input file name required for in-place updating\n");
2582 			return -1;
2583 		}
2584 		if (strcmp(inject.output.path, "-")) {
2585 			pr_err("Output file name must not be specified for in-place updating\n");
2586 			return -1;
2587 		}
2588 		if (!data.force && !inject.in_place_update_dry_run) {
2589 			pr_err("The input file would be updated in place, "
2590 				"the --force option is required.\n");
2591 			return -1;
2592 		}
2593 		if (!inject.in_place_update_dry_run)
2594 			data.in_place_update = true;
2595 	} else {
2596 		if (strcmp(inject.output.path, "-") && !inject.strip &&
2597 		    has_kcore_dir(inject.input_name)) {
2598 			inject.output.is_dir = true;
2599 			inject.copy_kcore_dir = true;
2600 		}
2601 		if (perf_data__open(&inject.output)) {
2602 			perror("failed to create output file");
2603 			return -1;
2604 		}
2605 	}
2606 	if (mmap2_build_ids)
2607 		inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY;
2608 	if (mmap2_build_id_all)
2609 		inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL;
2610 	if (build_ids)
2611 		inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY;
2612 	if (build_id_all)
2613 		inject.build_id_style = BID_RWS__INJECT_HEADER_ALL;
2614 
2615 	data.path = inject.input_name;
2616 
2617 	ordered_events = inject.jit_mode || inject.sched_stat ||
2618 		inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2619 		inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY;
2620 	perf_tool__init(&inject.tool, ordered_events);
2621 	inject.tool.sample		= perf_event__repipe_sample;
2622 	inject.tool.read		= perf_event__repipe_sample;
2623 	inject.tool.mmap		= perf_event__repipe;
2624 	inject.tool.mmap2		= perf_event__repipe;
2625 	inject.tool.comm		= perf_event__repipe;
2626 	inject.tool.namespaces		= perf_event__repipe;
2627 	inject.tool.cgroup		= perf_event__repipe;
2628 	inject.tool.fork		= perf_event__repipe;
2629 	inject.tool.exit		= perf_event__repipe;
2630 	inject.tool.lost		= perf_event__repipe;
2631 	inject.tool.lost_samples	= perf_event__repipe;
2632 	inject.tool.aux			= perf_event__repipe;
2633 	inject.tool.itrace_start	= perf_event__repipe;
2634 	inject.tool.aux_output_hw_id	= perf_event__repipe;
2635 	inject.tool.context_switch	= perf_event__repipe;
2636 	inject.tool.throttle		= perf_event__repipe;
2637 	inject.tool.unthrottle		= perf_event__repipe;
2638 	inject.tool.ksymbol		= perf_event__repipe;
2639 	inject.tool.bpf			= perf_event__repipe;
2640 	inject.tool.text_poke		= perf_event__repipe;
2641 	inject.tool.attr		= perf_event__repipe_attr;
2642 	inject.tool.event_update	= perf_event__repipe_event_update;
2643 	inject.tool.tracing_data	= perf_event__repipe_op2_synth;
2644 	inject.tool.finished_round	= perf_event__repipe_oe_synth;
2645 	inject.tool.build_id		= perf_event__repipe_op2_synth;
2646 	inject.tool.id_index		= perf_event__repipe_op2_synth;
2647 	inject.tool.auxtrace_info	= perf_event__repipe_op2_synth;
2648 	inject.tool.auxtrace_error	= perf_event__repipe_op2_synth;
2649 	inject.tool.time_conv		= perf_event__repipe_op2_synth;
2650 	inject.tool.thread_map		= perf_event__repipe_op2_synth;
2651 	inject.tool.cpu_map		= perf_event__repipe_op2_synth;
2652 	inject.tool.stat_config		= perf_event__repipe_op2_synth;
2653 	inject.tool.stat		= perf_event__repipe_op2_synth;
2654 	inject.tool.stat_round		= perf_event__repipe_op2_synth;
2655 	inject.tool.feature		= perf_event__repipe_op2_synth;
2656 	inject.tool.finished_init	= perf_event__repipe_op2_synth;
2657 	inject.tool.compressed		= perf_event__repipe_op4_synth;
2658 	inject.tool.auxtrace		= perf_event__repipe_auxtrace;
2659 	inject.tool.bpf_metadata	= perf_event__repipe_op2_synth;
2660 	inject.tool.schedstat_cpu	= perf_event__repipe_op2_synth;
2661 	inject.tool.schedstat_domain	= perf_event__repipe_op2_synth;
2662 	inject.tool.dont_split_sample_group = true;
2663 	inject.tool.merge_deferred_callchains = false;
2664 	inject.session = __perf_session__new(&data, &inject.tool,
2665 					     /*trace_event_repipe=*/inject.output.is_pipe,
2666 					     /*host_env=*/NULL);
2667 
2668 	if (IS_ERR(inject.session)) {
2669 		ret = PTR_ERR(inject.session);
2670 		goto out_close_output;
2671 	}
2672 
2673 	if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2674 		pr_warning("Decompression initialization failed.\n");
2675 
2676 	/* Save original section info before feature bits change */
2677 	ret = save_section_info(&inject);
2678 	if (ret)
2679 		goto out_delete;
2680 
2681 	if (inject.output.is_pipe) {
2682 		ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2683 		if (ret < 0) {
2684 			pr_err("Couldn't write a new pipe header.\n");
2685 			goto out_delete;
2686 		}
2687 
2688 		/*
2689 		 * If the input is already a pipe then the features and
2690 		 * attributes don't need synthesizing, they will be present in
2691 		 * the input.
2692 		 */
2693 		if (!data.is_pipe) {
2694 			ret = perf_event__synthesize_for_pipe(&inject.tool,
2695 							      inject.session,
2696 							      &inject.output,
2697 							      perf_event__repipe);
2698 			if (ret < 0)
2699 				goto out_delete;
2700 		}
2701 	}
2702 
2703 	if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2704 	    inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2705 		/*
2706 		 * to make sure the mmap records are ordered correctly
2707 		 * and so that the correct especially due to jitted code
2708 		 * mmaps. We cannot generate the buildid hit list and
2709 		 * inject the jit mmaps at the same time for now.
2710 		 */
2711 		inject.tool.ordering_requires_timestamps = true;
2712 	}
2713 	if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) {
2714 		inject.known_build_ids =
2715 			perf_inject__parse_known_build_ids(known_build_ids);
2716 
2717 		if (inject.known_build_ids == NULL) {
2718 			pr_err("Couldn't parse known build ids.\n");
2719 			goto out_delete;
2720 		}
2721 	}
2722 
2723 	if (inject.convert_callchain) {
2724 		struct evsel *evsel;
2725 
2726 		if (inject.output.is_pipe || inject.session->data->is_pipe) {
2727 			pr_err("--convert-callchain cannot work with pipe\n");
2728 			goto out_delete;
2729 		}
2730 
2731 		evlist__for_each_entry(inject.session->evlist, evsel) {
2732 			if (!evsel__has_dwarf_callchain(evsel) && !evsel__is_dummy_event(evsel)) {
2733 				pr_err("--convert-callchain requires DWARF call graph.\n");
2734 				goto out_delete;
2735 			}
2736 		}
2737 
2738 		inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
2739 		if (inject.raw_callchain == NULL) {
2740 			pr_err("callchain allocation failed\n");
2741 			goto out_delete;
2742 		}
2743 	}
2744 
2745 #ifdef HAVE_JITDUMP
2746 	if (inject.jit_mode) {
2747 		inject.tool.mmap2	   = perf_event__repipe_mmap2;
2748 		inject.tool.mmap	   = perf_event__repipe_mmap;
2749 		inject.tool.ordering_requires_timestamps = true;
2750 		/*
2751 		 * JIT MMAP injection injects all MMAP events in one go, so it
2752 		 * does not obey finished_round semantics.
2753 		 */
2754 		inject.tool.finished_round = perf_event__drop_oe;
2755 	}
2756 #endif
2757 	ret = symbol__init(perf_session__env(inject.session));
2758 	if (ret < 0)
2759 		goto out_delete;
2760 
2761 	ret = __cmd_inject(&inject);
2762 
2763 	guest_session__exit(&inject.guest_session);
2764 
2765 out_delete:
2766 	strlist__delete(inject.known_build_ids);
2767 	zstd_fini(&(inject.session->zstd_data));
2768 	perf_session__delete(inject.session);
2769 out_close_output:
2770 	if (!inject.in_place_update)
2771 		perf_data__close(&inject.output);
2772 	free(inject.itrace_synth_opts.vm_tm_corr_args);
2773 	free(inject.event_copy);
2774 	free(inject.guest_session.ev.event_buf);
2775 	free(inject.raw_callchain);
2776 	return ret;
2777 }
2778