xref: /linux/tools/perf/builtin-inject.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-inject.c
4  *
5  * Builtin inject command: Examine the live mode (stdin) event stream
6  * and repipe it to stdout while optionally injecting additional
7  * events into it.
8  */
9 #include "builtin.h"
10 
11 #include "util/color.h"
12 #include "util/dso.h"
13 #include "util/vdso.h"
14 #include "util/evlist.h"
15 #include "util/evsel.h"
16 #include "util/map.h"
17 #include "util/session.h"
18 #include "util/tool.h"
19 #include "util/debug.h"
20 #include "util/build-id.h"
21 #include "util/data.h"
22 #include "util/auxtrace.h"
23 #include "util/jit.h"
24 #include "util/string2.h"
25 #include "util/symbol.h"
26 #include "util/synthetic-events.h"
27 #include "util/thread.h"
28 #include "util/namespaces.h"
29 #include "util/util.h"
30 #include "util/tsc.h"
31 
32 #include <internal/lib.h>
33 
34 #include <linux/err.h>
35 #include <subcmd/parse-options.h>
36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37 
38 #include <linux/list.h>
39 #include <linux/string.h>
40 #include <linux/zalloc.h>
41 #include <linux/hash.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <signal.h>
45 #include <inttypes.h>
46 
47 struct guest_event {
48 	struct perf_sample		sample;
49 	union perf_event		*event;
50 	char				*event_buf;
51 };
52 
53 struct guest_id {
54 	/* hlist_node must be first, see free_hlist() */
55 	struct hlist_node		node;
56 	u64				id;
57 	u64				host_id;
58 	u32				vcpu;
59 };
60 
61 struct guest_tid {
62 	/* hlist_node must be first, see free_hlist() */
63 	struct hlist_node		node;
64 	/* Thread ID of QEMU thread */
65 	u32				tid;
66 	u32				vcpu;
67 };
68 
69 struct guest_vcpu {
70 	/* Current host CPU */
71 	u32				cpu;
72 	/* Thread ID of QEMU thread */
73 	u32				tid;
74 };
75 
76 struct guest_session {
77 	char				*perf_data_file;
78 	u32				machine_pid;
79 	u64				time_offset;
80 	double				time_scale;
81 	struct perf_tool		tool;
82 	struct perf_data		data;
83 	struct perf_session		*session;
84 	char				*tmp_file_name;
85 	int				tmp_fd;
86 	struct perf_tsc_conversion	host_tc;
87 	struct perf_tsc_conversion	guest_tc;
88 	bool				copy_kcore_dir;
89 	bool				have_tc;
90 	bool				fetched;
91 	bool				ready;
92 	u16				dflt_id_hdr_size;
93 	u64				dflt_id;
94 	u64				highest_id;
95 	/* Array of guest_vcpu */
96 	struct guest_vcpu		*vcpu;
97 	size_t				vcpu_cnt;
98 	/* Hash table for guest_id */
99 	struct hlist_head		heads[PERF_EVLIST__HLIST_SIZE];
100 	/* Hash table for guest_tid */
101 	struct hlist_head		tids[PERF_EVLIST__HLIST_SIZE];
102 	/* Place to stash next guest event */
103 	struct guest_event		ev;
104 };
105 
106 enum build_id_rewrite_style {
107 	BID_RWS__NONE = 0,
108 	BID_RWS__INJECT_HEADER_LAZY,
109 	BID_RWS__INJECT_HEADER_ALL,
110 	BID_RWS__MMAP2_BUILDID_ALL,
111 	BID_RWS__MMAP2_BUILDID_LAZY,
112 };
113 
114 struct perf_inject {
115 	struct perf_tool	tool;
116 	struct perf_session	*session;
117 	enum build_id_rewrite_style build_id_style;
118 	bool			sched_stat;
119 	bool			have_auxtrace;
120 	bool			strip;
121 	bool			jit_mode;
122 	bool			in_place_update;
123 	bool			in_place_update_dry_run;
124 	bool			copy_kcore_dir;
125 	const char		*input_name;
126 	struct perf_data	output;
127 	u64			bytes_written;
128 	u64			aux_id;
129 	struct list_head	samples;
130 	struct itrace_synth_opts itrace_synth_opts;
131 	char			*event_copy;
132 	struct perf_file_section secs[HEADER_FEAT_BITS];
133 	struct guest_session	guest_session;
134 	struct strlist		*known_build_ids;
135 	const struct evsel	*mmap_evsel;
136 };
137 
138 struct event_entry {
139 	struct list_head node;
140 	u32		 tid;
141 	union perf_event event[];
142 };
143 
144 static int tool__inject_build_id(const struct perf_tool *tool,
145 				 struct perf_sample *sample,
146 				 struct machine *machine,
147 				 const struct evsel *evsel,
148 				 __u16 misc,
149 				 const char *filename,
150 				 struct dso *dso, u32 flags);
151 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
152 				      struct perf_sample *sample,
153 				      struct machine *machine,
154 				      const struct evsel *evsel,
155 				      __u16 misc,
156 				      __u32 pid, __u32 tid,
157 				      __u64 start, __u64 len, __u64 pgoff,
158 				      struct dso *dso,
159 				      __u32 prot, __u32 flags,
160 				      const char *filename);
161 
output_bytes(struct perf_inject * inject,void * buf,size_t sz)162 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
163 {
164 	ssize_t size;
165 
166 	size = perf_data__write(&inject->output, buf, sz);
167 	if (size < 0)
168 		return -errno;
169 
170 	inject->bytes_written += size;
171 	return 0;
172 }
173 
perf_event__repipe_synth(const struct perf_tool * tool,union perf_event * event)174 static int perf_event__repipe_synth(const struct perf_tool *tool,
175 				    union perf_event *event)
176 
177 {
178 	struct perf_inject *inject = container_of(tool, struct perf_inject,
179 						  tool);
180 
181 	return output_bytes(inject, event, event->header.size);
182 }
183 
perf_event__repipe_oe_synth(const struct perf_tool * tool,union perf_event * event,struct ordered_events * oe __maybe_unused)184 static int perf_event__repipe_oe_synth(const struct perf_tool *tool,
185 				       union perf_event *event,
186 				       struct ordered_events *oe __maybe_unused)
187 {
188 	return perf_event__repipe_synth(tool, event);
189 }
190 
191 #ifdef HAVE_JITDUMP
perf_event__drop_oe(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct ordered_events * oe __maybe_unused)192 static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
193 			       union perf_event *event __maybe_unused,
194 			       struct ordered_events *oe __maybe_unused)
195 {
196 	return 0;
197 }
198 #endif
199 
perf_event__repipe_op2_synth(const struct perf_tool * tool,struct perf_session * session __maybe_unused,union perf_event * event)200 static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
201 					struct perf_session *session __maybe_unused,
202 					union perf_event *event)
203 {
204 	return perf_event__repipe_synth(tool, event);
205 }
206 
perf_event__repipe_op4_synth(const struct perf_tool * tool,struct perf_session * session __maybe_unused,union perf_event * event,u64 data __maybe_unused,const char * str __maybe_unused)207 static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
208 					struct perf_session *session __maybe_unused,
209 					union perf_event *event,
210 					u64 data __maybe_unused,
211 					const char *str __maybe_unused)
212 {
213 	return perf_event__repipe_synth(tool, event);
214 }
215 
perf_event__repipe_attr(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)216 static int perf_event__repipe_attr(const struct perf_tool *tool,
217 				   union perf_event *event,
218 				   struct evlist **pevlist)
219 {
220 	struct perf_inject *inject = container_of(tool, struct perf_inject,
221 						  tool);
222 	int ret;
223 
224 	ret = perf_event__process_attr(tool, event, pevlist);
225 	if (ret)
226 		return ret;
227 
228 	/* If the output isn't a pipe then the attributes will be written as part of the header. */
229 	if (!inject->output.is_pipe)
230 		return 0;
231 
232 	return perf_event__repipe_synth(tool, event);
233 }
234 
perf_event__repipe_event_update(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist __maybe_unused)235 static int perf_event__repipe_event_update(const struct perf_tool *tool,
236 					   union perf_event *event,
237 					   struct evlist **pevlist __maybe_unused)
238 {
239 	return perf_event__repipe_synth(tool, event);
240 }
241 
copy_bytes(struct perf_inject * inject,struct perf_data * data,off_t size)242 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
243 {
244 	char buf[4096];
245 	ssize_t ssz;
246 	int ret;
247 
248 	while (size > 0) {
249 		ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
250 		if (ssz < 0)
251 			return -errno;
252 		ret = output_bytes(inject, buf, ssz);
253 		if (ret)
254 			return ret;
255 		size -= ssz;
256 	}
257 
258 	return 0;
259 }
260 
perf_event__repipe_auxtrace(const struct perf_tool * tool,struct perf_session * session,union perf_event * event)261 static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
262 				       struct perf_session *session,
263 				       union perf_event *event)
264 {
265 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
266 	int ret;
267 
268 	inject->have_auxtrace = true;
269 
270 	if (!inject->output.is_pipe) {
271 		off_t offset;
272 
273 		offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
274 		if (offset == -1)
275 			return -errno;
276 		ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
277 						     event, offset);
278 		if (ret < 0)
279 			return ret;
280 	}
281 
282 	if (perf_data__is_pipe(session->data) || !session->one_mmap) {
283 		ret = output_bytes(inject, event, event->header.size);
284 		if (ret < 0)
285 			return ret;
286 		ret = copy_bytes(inject, session->data,
287 				 event->auxtrace.size);
288 	} else {
289 		ret = output_bytes(inject, event,
290 				   event->header.size + event->auxtrace.size);
291 	}
292 	if (ret < 0)
293 		return ret;
294 
295 	return event->auxtrace.size;
296 }
297 
perf_event__repipe(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)298 static int perf_event__repipe(const struct perf_tool *tool,
299 			      union perf_event *event,
300 			      struct perf_sample *sample __maybe_unused,
301 			      struct machine *machine __maybe_unused)
302 {
303 	return perf_event__repipe_synth(tool, event);
304 }
305 
perf_event__drop(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)306 static int perf_event__drop(const struct perf_tool *tool __maybe_unused,
307 			    union perf_event *event __maybe_unused,
308 			    struct perf_sample *sample __maybe_unused,
309 			    struct machine *machine __maybe_unused)
310 {
311 	return 0;
312 }
313 
perf_event__drop_aux(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)314 static int perf_event__drop_aux(const struct perf_tool *tool,
315 				union perf_event *event __maybe_unused,
316 				struct perf_sample *sample,
317 				struct machine *machine __maybe_unused)
318 {
319 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
320 
321 	if (!inject->aux_id)
322 		inject->aux_id = sample->id;
323 
324 	return 0;
325 }
326 
327 static union perf_event *
perf_inject__cut_auxtrace_sample(struct perf_inject * inject,union perf_event * event,struct perf_sample * sample)328 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
329 				 union perf_event *event,
330 				 struct perf_sample *sample)
331 {
332 	size_t sz1 = sample->aux_sample.data - (void *)event;
333 	size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
334 	union perf_event *ev;
335 
336 	if (inject->event_copy == NULL) {
337 		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
338 		if (!inject->event_copy)
339 			return ERR_PTR(-ENOMEM);
340 	}
341 	ev = (union perf_event *)inject->event_copy;
342 	if (sz1 > event->header.size || sz2 > event->header.size ||
343 	    sz1 + sz2 > event->header.size ||
344 	    sz1 < sizeof(struct perf_event_header) + sizeof(u64))
345 		return event;
346 
347 	memcpy(ev, event, sz1);
348 	memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
349 	ev->header.size = sz1 + sz2;
350 	((u64 *)((void *)ev + sz1))[-1] = 0;
351 
352 	return ev;
353 }
354 
355 typedef int (*inject_handler)(const struct perf_tool *tool,
356 			      union perf_event *event,
357 			      struct perf_sample *sample,
358 			      struct evsel *evsel,
359 			      struct machine *machine);
360 
perf_event__repipe_sample(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)361 static int perf_event__repipe_sample(const struct perf_tool *tool,
362 				     union perf_event *event,
363 				     struct perf_sample *sample,
364 				     struct evsel *evsel,
365 				     struct machine *machine)
366 {
367 	struct perf_inject *inject = container_of(tool, struct perf_inject,
368 						  tool);
369 
370 	if (evsel && evsel->handler) {
371 		inject_handler f = evsel->handler;
372 		return f(tool, event, sample, evsel, machine);
373 	}
374 
375 	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
376 
377 	if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
378 		event = perf_inject__cut_auxtrace_sample(inject, event, sample);
379 		if (IS_ERR(event))
380 			return PTR_ERR(event);
381 	}
382 
383 	return perf_event__repipe_synth(tool, event);
384 }
385 
findnew_dso(int pid,int tid,const char * filename,const struct dso_id * id,struct machine * machine)386 static struct dso *findnew_dso(int pid, int tid, const char *filename,
387 			       const struct dso_id *id, struct machine *machine)
388 {
389 	struct thread *thread;
390 	struct nsinfo *nsi = NULL;
391 	struct nsinfo *nnsi;
392 	struct dso *dso;
393 	bool vdso;
394 
395 	thread = machine__findnew_thread(machine, pid, tid);
396 	if (thread == NULL) {
397 		pr_err("cannot find or create a task %d/%d.\n", tid, pid);
398 		return NULL;
399 	}
400 
401 	vdso = is_vdso_map(filename);
402 	nsi = nsinfo__get(thread__nsinfo(thread));
403 
404 	if (vdso) {
405 		/* The vdso maps are always on the host and not the
406 		 * container.  Ensure that we don't use setns to look
407 		 * them up.
408 		 */
409 		nnsi = nsinfo__copy(nsi);
410 		if (nnsi) {
411 			nsinfo__put(nsi);
412 			nsinfo__clear_need_setns(nnsi);
413 			nsi = nnsi;
414 		}
415 		dso = machine__findnew_vdso(machine, thread);
416 	} else {
417 		dso = machine__findnew_dso_id(machine, filename, id);
418 	}
419 
420 	if (dso) {
421 		mutex_lock(dso__lock(dso));
422 		dso__set_nsinfo(dso, nsi);
423 		mutex_unlock(dso__lock(dso));
424 	} else
425 		nsinfo__put(nsi);
426 
427 	thread__put(thread);
428 	return dso;
429 }
430 
431 /*
432  * The evsel used for the sample ID for mmap events. Typically stashed when
433  * processing mmap events. If not stashed, search the evlist for the first mmap
434  * gathering event.
435  */
inject__mmap_evsel(struct perf_inject * inject)436 static const struct evsel *inject__mmap_evsel(struct perf_inject *inject)
437 {
438 	struct evsel *pos;
439 
440 	if (inject->mmap_evsel)
441 		return inject->mmap_evsel;
442 
443 	evlist__for_each_entry(inject->session->evlist, pos) {
444 		if (pos->core.attr.mmap) {
445 			inject->mmap_evsel = pos;
446 			return pos;
447 		}
448 	}
449 	pr_err("No mmap events found\n");
450 	return NULL;
451 }
452 
perf_event__repipe_common_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine,__u32 pid,__u32 tid,__u64 start,__u64 len,__u64 pgoff,__u32 flags,__u32 prot,const char * filename,const struct dso_id * dso_id,int (* perf_event_process)(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine))453 static int perf_event__repipe_common_mmap(const struct perf_tool *tool,
454 					  union perf_event *event,
455 					  struct perf_sample *sample,
456 					  struct machine *machine,
457 					  __u32 pid, __u32 tid,
458 					  __u64 start, __u64 len, __u64 pgoff,
459 					  __u32 flags, __u32 prot,
460 					  const char *filename,
461 					  const struct dso_id *dso_id,
462 					  int (*perf_event_process)(const struct perf_tool *tool,
463 								    union perf_event *event,
464 								    struct perf_sample *sample,
465 								    struct machine *machine))
466 {
467 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
468 	struct dso *dso = NULL;
469 	bool dso_sought = false;
470 
471 #ifdef HAVE_JITDUMP
472 	if (inject->jit_mode) {
473 		u64 n = 0;
474 		int ret;
475 
476 		/* If jit marker, then inject jit mmaps and generate ELF images. */
477 		ret = jit_process(inject->session, &inject->output, machine,
478 				  filename, pid, tid, &n);
479 		if (ret < 0)
480 			return ret;
481 		if (ret) {
482 			inject->bytes_written += n;
483 			return 0;
484 		}
485 	}
486 #endif
487 	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
488 		dso = findnew_dso(pid, tid, filename, dso_id, machine);
489 		dso_sought = true;
490 		if (dso) {
491 			/* mark it not to inject build-id */
492 			dso__set_hit(dso);
493 		}
494 	}
495 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) {
496 		if (!dso_sought) {
497 			dso = findnew_dso(pid, tid, filename, dso_id, machine);
498 			dso_sought = true;
499 		}
500 
501 		if (dso && !dso__hit(dso)) {
502 			struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
503 
504 			if (evsel) {
505 				dso__set_hit(dso);
506 				tool__inject_build_id(tool, sample, machine, evsel,
507 						      /*misc=*/sample->cpumode,
508 						      filename, dso, flags);
509 			}
510 		}
511 	} else {
512 		int err;
513 
514 		/*
515 		 * Remember the evsel for lazy build id generation. It is used
516 		 * for the sample id header type.
517 		 */
518 		if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
519 		     inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) &&
520 		    !inject->mmap_evsel)
521 			inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event);
522 
523 		/* Create the thread, map, etc. Not done for the unordered inject all case. */
524 		err = perf_event_process(tool, event, sample, machine);
525 
526 		if (err) {
527 			dso__put(dso);
528 			return err;
529 		}
530 	}
531 	if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) &&
532 	    !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
533 		struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
534 
535 		if (evsel && !dso_sought) {
536 			dso = findnew_dso(pid, tid, filename, dso_id, machine);
537 			dso_sought = true;
538 		}
539 		if (evsel && dso &&
540 		    !tool__inject_mmap2_build_id(tool, sample, machine, evsel,
541 						 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID,
542 						 pid, tid, start, len, pgoff,
543 						 dso,
544 						 prot, flags,
545 						 filename)) {
546 			/* Injected mmap2 so no need to repipe. */
547 			dso__put(dso);
548 			return 0;
549 		}
550 	}
551 	dso__put(dso);
552 	if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY)
553 		return 0;
554 
555 	return perf_event__repipe(tool, event, sample, machine);
556 }
557 
perf_event__repipe_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)558 static int perf_event__repipe_mmap(const struct perf_tool *tool,
559 				union perf_event *event,
560 				struct perf_sample *sample,
561 				struct machine *machine)
562 {
563 	return perf_event__repipe_common_mmap(
564 		tool, event, sample, machine,
565 		event->mmap.pid, event->mmap.tid,
566 		event->mmap.start, event->mmap.len, event->mmap.pgoff,
567 		/*flags=*/0, PROT_EXEC,
568 		event->mmap.filename, /*dso_id=*/NULL,
569 		perf_event__process_mmap);
570 }
571 
perf_event__repipe_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)572 static int perf_event__repipe_mmap2(const struct perf_tool *tool,
573 				union perf_event *event,
574 				struct perf_sample *sample,
575 				struct machine *machine)
576 {
577 	struct dso_id id = dso_id_empty;
578 
579 	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
580 		build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size);
581 	} else {
582 		id.maj = event->mmap2.maj;
583 		id.min = event->mmap2.min;
584 		id.ino = event->mmap2.ino;
585 		id.ino_generation = event->mmap2.ino_generation;
586 		id.mmap2_valid = true;
587 		id.mmap2_ino_generation_valid = true;
588 	}
589 
590 	return perf_event__repipe_common_mmap(
591 		tool, event, sample, machine,
592 		event->mmap2.pid, event->mmap2.tid,
593 		event->mmap2.start, event->mmap2.len, event->mmap2.pgoff,
594 		event->mmap2.flags, event->mmap2.prot,
595 		event->mmap2.filename, &id,
596 		perf_event__process_mmap2);
597 }
598 
perf_event__repipe_fork(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)599 static int perf_event__repipe_fork(const struct perf_tool *tool,
600 				   union perf_event *event,
601 				   struct perf_sample *sample,
602 				   struct machine *machine)
603 {
604 	int err;
605 
606 	err = perf_event__process_fork(tool, event, sample, machine);
607 	perf_event__repipe(tool, event, sample, machine);
608 
609 	return err;
610 }
611 
perf_event__repipe_comm(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)612 static int perf_event__repipe_comm(const struct perf_tool *tool,
613 				   union perf_event *event,
614 				   struct perf_sample *sample,
615 				   struct machine *machine)
616 {
617 	int err;
618 
619 	err = perf_event__process_comm(tool, event, sample, machine);
620 	perf_event__repipe(tool, event, sample, machine);
621 
622 	return err;
623 }
624 
perf_event__repipe_namespaces(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)625 static int perf_event__repipe_namespaces(const struct perf_tool *tool,
626 					 union perf_event *event,
627 					 struct perf_sample *sample,
628 					 struct machine *machine)
629 {
630 	int err = perf_event__process_namespaces(tool, event, sample, machine);
631 
632 	perf_event__repipe(tool, event, sample, machine);
633 
634 	return err;
635 }
636 
perf_event__repipe_exit(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)637 static int perf_event__repipe_exit(const struct perf_tool *tool,
638 				   union perf_event *event,
639 				   struct perf_sample *sample,
640 				   struct machine *machine)
641 {
642 	int err;
643 
644 	err = perf_event__process_exit(tool, event, sample, machine);
645 	perf_event__repipe(tool, event, sample, machine);
646 
647 	return err;
648 }
649 
650 #ifdef HAVE_LIBTRACEEVENT
perf_event__repipe_tracing_data(const struct perf_tool * tool,struct perf_session * session,union perf_event * event)651 static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
652 					   struct perf_session *session,
653 					   union perf_event *event)
654 {
655 	perf_event__repipe_synth(tool, event);
656 
657 	return perf_event__process_tracing_data(tool, session, event);
658 }
659 #endif
660 
dso__read_build_id(struct dso * dso)661 static int dso__read_build_id(struct dso *dso)
662 {
663 	struct nscookie nsc;
664 	struct build_id bid = { .size = 0, };
665 
666 	if (dso__has_build_id(dso))
667 		return 0;
668 
669 	mutex_lock(dso__lock(dso));
670 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
671 	if (filename__read_build_id(dso__long_name(dso), &bid) > 0)
672 		dso__set_build_id(dso, &bid);
673 	else if (dso__nsinfo(dso)) {
674 		char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
675 
676 		if (new_name && filename__read_build_id(new_name, &bid) > 0)
677 			dso__set_build_id(dso, &bid);
678 		free(new_name);
679 	}
680 	nsinfo__mountns_exit(&nsc);
681 	mutex_unlock(dso__lock(dso));
682 
683 	return dso__has_build_id(dso) ? 0 : -1;
684 }
685 
perf_inject__parse_known_build_ids(const char * known_build_ids_string)686 static struct strlist *perf_inject__parse_known_build_ids(
687 	const char *known_build_ids_string)
688 {
689 	struct str_node *pos, *tmp;
690 	struct strlist *known_build_ids;
691 	int bid_len;
692 
693 	known_build_ids = strlist__new(known_build_ids_string, NULL);
694 	if (known_build_ids == NULL)
695 		return NULL;
696 	strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
697 		const char *build_id, *dso_name;
698 
699 		build_id = skip_spaces(pos->s);
700 		dso_name = strchr(build_id, ' ');
701 		if (dso_name == NULL) {
702 			strlist__remove(known_build_ids, pos);
703 			continue;
704 		}
705 		bid_len = dso_name - pos->s;
706 		dso_name = skip_spaces(dso_name);
707 		if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
708 			strlist__remove(known_build_ids, pos);
709 			continue;
710 		}
711 		for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
712 			if (!isxdigit(build_id[2 * ix]) ||
713 			    !isxdigit(build_id[2 * ix + 1])) {
714 				strlist__remove(known_build_ids, pos);
715 				break;
716 			}
717 		}
718 	}
719 	return known_build_ids;
720 }
721 
perf_inject__lookup_known_build_id(struct perf_inject * inject,struct dso * dso)722 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
723 					       struct dso *dso)
724 {
725 	struct str_node *pos;
726 
727 	strlist__for_each_entry(pos, inject->known_build_ids) {
728 		struct build_id bid;
729 		const char *build_id, *dso_name;
730 		size_t bid_len;
731 
732 		build_id = skip_spaces(pos->s);
733 		dso_name = strchr(build_id, ' ');
734 		bid_len = dso_name - pos->s;
735 		if (bid_len > sizeof(bid.data))
736 			bid_len = sizeof(bid.data);
737 		dso_name = skip_spaces(dso_name);
738 		if (strcmp(dso__long_name(dso), dso_name))
739 			continue;
740 		for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) {
741 			bid.data[ix] = (hex(build_id[2 * ix]) << 4 |
742 					hex(build_id[2 * ix + 1]));
743 		}
744 		bid.size = bid_len / 2;
745 		dso__set_build_id(dso, &bid);
746 		return true;
747 	}
748 	return false;
749 }
750 
tool__inject_build_id(const struct perf_tool * tool,struct perf_sample * sample,struct machine * machine,const struct evsel * evsel,__u16 misc,const char * filename,struct dso * dso,u32 flags)751 static int tool__inject_build_id(const struct perf_tool *tool,
752 				 struct perf_sample *sample,
753 				 struct machine *machine,
754 				 const struct evsel *evsel,
755 				 __u16 misc,
756 				 const char *filename,
757 				 struct dso *dso, u32 flags)
758 {
759 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
760 	int err;
761 
762 	if (is_anon_memory(filename) || flags & MAP_HUGETLB)
763 		return 0;
764 	if (is_no_dso_memory(filename))
765 		return 0;
766 
767 	if (inject->known_build_ids != NULL &&
768 	    perf_inject__lookup_known_build_id(inject, dso))
769 		return 1;
770 
771 	if (dso__read_build_id(dso) < 0) {
772 		pr_debug("no build_id found for %s\n", filename);
773 		return -1;
774 	}
775 
776 	err = perf_event__synthesize_build_id(tool, sample, machine,
777 					      perf_event__repipe,
778 					      evsel, misc, dso__bid(dso),
779 					      filename);
780 	if (err) {
781 		pr_err("Can't synthesize build_id event for %s\n", filename);
782 		return -1;
783 	}
784 
785 	return 0;
786 }
787 
tool__inject_mmap2_build_id(const struct perf_tool * tool,struct perf_sample * sample,struct machine * machine,const struct evsel * evsel,__u16 misc,__u32 pid,__u32 tid,__u64 start,__u64 len,__u64 pgoff,struct dso * dso,__u32 prot,__u32 flags,const char * filename)788 static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
789 				       struct perf_sample *sample,
790 				       struct machine *machine,
791 				       const struct evsel *evsel,
792 				       __u16 misc,
793 				       __u32 pid, __u32 tid,
794 				       __u64 start, __u64 len, __u64 pgoff,
795 				       struct dso *dso,
796 				       __u32 prot, __u32 flags,
797 				       const char *filename)
798 {
799 	int err;
800 
801 	/* Return to repipe anonymous maps. */
802 	if (is_anon_memory(filename) || flags & MAP_HUGETLB)
803 		return 1;
804 	if (is_no_dso_memory(filename))
805 		return 1;
806 
807 	if (dso__read_build_id(dso)) {
808 		pr_debug("no build_id found for %s\n", filename);
809 		return -1;
810 	}
811 
812 	err = perf_event__synthesize_mmap2_build_id(tool, sample, machine,
813 						    perf_event__repipe,
814 						    evsel,
815 						    misc, pid, tid,
816 						    start, len, pgoff,
817 						    dso__bid(dso),
818 						    prot, flags,
819 						    filename);
820 	if (err) {
821 		pr_err("Can't synthesize build_id event for %s\n", filename);
822 		return -1;
823 	}
824 	return 0;
825 }
826 
mark_dso_hit(const struct perf_inject * inject,const struct perf_tool * tool,struct perf_sample * sample,struct machine * machine,const struct evsel * mmap_evsel,struct map * map,bool sample_in_dso)827 static int mark_dso_hit(const struct perf_inject *inject,
828 			const struct perf_tool *tool,
829 			struct perf_sample *sample,
830 			struct machine *machine,
831 			const struct evsel *mmap_evsel,
832 			struct map *map, bool sample_in_dso)
833 {
834 	struct dso *dso;
835 	u16 misc = sample->cpumode;
836 
837 	if (!map)
838 		return 0;
839 
840 	if (!sample_in_dso) {
841 		u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL |
842 			PERF_RECORD_MISC_GUEST_USER;
843 
844 		if ((misc & guest_mask) != 0) {
845 			misc &= PERF_RECORD_MISC_HYPERVISOR;
846 			misc |= __map__is_kernel(map)
847 				? PERF_RECORD_MISC_GUEST_KERNEL
848 				: PERF_RECORD_MISC_GUEST_USER;
849 		} else {
850 			misc &= PERF_RECORD_MISC_HYPERVISOR;
851 			misc |= __map__is_kernel(map)
852 				? PERF_RECORD_MISC_KERNEL
853 				: PERF_RECORD_MISC_USER;
854 		}
855 	}
856 	dso = map__dso(map);
857 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
858 		if (dso && !dso__hit(dso)) {
859 			dso__set_hit(dso);
860 			tool__inject_build_id(tool, sample, machine,
861 					     mmap_evsel, misc, dso__long_name(dso), dso,
862 					     map__flags(map));
863 		}
864 	} else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
865 		if (!map__hit(map)) {
866 			const struct build_id null_bid = { .size = 0 };
867 			const struct build_id *bid = dso ? dso__bid(dso) : &null_bid;
868 			const char *filename = dso ? dso__long_name(dso) : "";
869 
870 			map__set_hit(map);
871 			perf_event__synthesize_mmap2_build_id(tool, sample, machine,
872 								perf_event__repipe,
873 								mmap_evsel,
874 								misc,
875 								sample->pid, sample->tid,
876 								map__start(map),
877 								map__end(map) - map__start(map),
878 								map__pgoff(map),
879 								bid,
880 								map__prot(map),
881 								map__flags(map),
882 								filename);
883 		}
884 	}
885 	return 0;
886 }
887 
888 struct mark_dso_hit_args {
889 	const struct perf_inject *inject;
890 	const struct perf_tool *tool;
891 	struct perf_sample *sample;
892 	struct machine *machine;
893 	const struct evsel *mmap_evsel;
894 };
895 
mark_dso_hit_callback(struct callchain_cursor_node * node,void * data)896 static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data)
897 {
898 	struct mark_dso_hit_args *args = data;
899 	struct map *map = node->ms.map;
900 
901 	return mark_dso_hit(args->inject, args->tool, args->sample, args->machine,
902 			    args->mmap_evsel, map, /*sample_in_dso=*/false);
903 }
904 
perf_event__inject_buildid(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel __maybe_unused,struct machine * machine)905 int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
906 			       struct perf_sample *sample,
907 			       struct evsel *evsel __maybe_unused,
908 			       struct machine *machine)
909 {
910 	struct addr_location al;
911 	struct thread *thread;
912 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
913 	struct mark_dso_hit_args args = {
914 		.inject = inject,
915 		.tool = tool,
916 		/*
917 		 * Use the parsed sample data of the sample event, which will
918 		 * have a later timestamp than the mmap event.
919 		 */
920 		.sample = sample,
921 		.machine = machine,
922 		.mmap_evsel = inject__mmap_evsel(inject),
923 	};
924 
925 	addr_location__init(&al);
926 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
927 	if (thread == NULL) {
928 		pr_err("problem processing %d event, skipping it.\n",
929 		       event->header.type);
930 		goto repipe;
931 	}
932 
933 	if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
934 		mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map,
935 			     /*sample_in_dso=*/true);
936 	}
937 
938 	sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
939 					/*symbols=*/false, mark_dso_hit_callback, &args);
940 
941 	thread__put(thread);
942 repipe:
943 	perf_event__repipe(tool, event, sample, machine);
944 	addr_location__exit(&al);
945 	return 0;
946 }
947 
perf_inject__sched_process_exit(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct evsel * evsel __maybe_unused,struct machine * machine __maybe_unused)948 static int perf_inject__sched_process_exit(const struct perf_tool *tool,
949 					   union perf_event *event __maybe_unused,
950 					   struct perf_sample *sample,
951 					   struct evsel *evsel __maybe_unused,
952 					   struct machine *machine __maybe_unused)
953 {
954 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
955 	struct event_entry *ent;
956 
957 	list_for_each_entry(ent, &inject->samples, node) {
958 		if (sample->tid == ent->tid) {
959 			list_del_init(&ent->node);
960 			free(ent);
961 			break;
962 		}
963 	}
964 
965 	return 0;
966 }
967 
perf_inject__sched_switch(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)968 static int perf_inject__sched_switch(const struct perf_tool *tool,
969 				     union perf_event *event,
970 				     struct perf_sample *sample,
971 				     struct evsel *evsel,
972 				     struct machine *machine)
973 {
974 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
975 	struct event_entry *ent;
976 
977 	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
978 
979 	ent = malloc(event->header.size + sizeof(struct event_entry));
980 	if (ent == NULL) {
981 		color_fprintf(stderr, PERF_COLOR_RED,
982 			     "Not enough memory to process sched switch event!");
983 		return -1;
984 	}
985 
986 	ent->tid = sample->tid;
987 	memcpy(&ent->event, event, event->header.size);
988 	list_add(&ent->node, &inject->samples);
989 	return 0;
990 }
991 
992 #ifdef HAVE_LIBTRACEEVENT
perf_inject__sched_stat(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)993 static int perf_inject__sched_stat(const struct perf_tool *tool,
994 				   union perf_event *event __maybe_unused,
995 				   struct perf_sample *sample,
996 				   struct evsel *evsel,
997 				   struct machine *machine)
998 {
999 	struct event_entry *ent;
1000 	union perf_event *event_sw;
1001 	struct perf_sample sample_sw;
1002 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1003 	u32 pid = evsel__intval(evsel, sample, "pid");
1004 
1005 	list_for_each_entry(ent, &inject->samples, node) {
1006 		if (pid == ent->tid)
1007 			goto found;
1008 	}
1009 
1010 	return 0;
1011 found:
1012 	event_sw = &ent->event[0];
1013 	evsel__parse_sample(evsel, event_sw, &sample_sw);
1014 
1015 	sample_sw.period = sample->period;
1016 	sample_sw.time	 = sample->time;
1017 	perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
1018 				      evsel->core.attr.read_format, &sample_sw);
1019 	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
1020 	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
1021 }
1022 #endif
1023 
guest_session__vcpu(struct guest_session * gs,u32 vcpu)1024 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
1025 {
1026 	if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
1027 		return NULL;
1028 	return &gs->vcpu[vcpu];
1029 }
1030 
guest_session__output_bytes(struct guest_session * gs,void * buf,size_t sz)1031 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
1032 {
1033 	ssize_t ret = writen(gs->tmp_fd, buf, sz);
1034 
1035 	return ret < 0 ? ret : 0;
1036 }
1037 
guest_session__repipe(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1038 static int guest_session__repipe(const struct perf_tool *tool,
1039 				 union perf_event *event,
1040 				 struct perf_sample *sample __maybe_unused,
1041 				 struct machine *machine __maybe_unused)
1042 {
1043 	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1044 
1045 	return guest_session__output_bytes(gs, event, event->header.size);
1046 }
1047 
guest_session__map_tid(struct guest_session * gs,u32 tid,u32 vcpu)1048 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
1049 {
1050 	struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
1051 	int hash;
1052 
1053 	if (!guest_tid)
1054 		return -ENOMEM;
1055 
1056 	guest_tid->tid = tid;
1057 	guest_tid->vcpu = vcpu;
1058 	hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
1059 	hlist_add_head(&guest_tid->node, &gs->tids[hash]);
1060 
1061 	return 0;
1062 }
1063 
host_peek_vm_comms_cb(struct perf_session * session __maybe_unused,union perf_event * event,u64 offset __maybe_unused,void * data)1064 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
1065 				 union perf_event *event,
1066 				 u64 offset __maybe_unused, void *data)
1067 {
1068 	struct guest_session *gs = data;
1069 	unsigned int vcpu;
1070 	struct guest_vcpu *guest_vcpu;
1071 	int ret;
1072 
1073 	if (event->header.type != PERF_RECORD_COMM ||
1074 	    event->comm.pid != gs->machine_pid)
1075 		return 0;
1076 
1077 	/*
1078 	 * QEMU option -name debug-threads=on, causes thread names formatted as
1079 	 * below, although it is not an ABI. Also libvirt seems to use this by
1080 	 * default. Here we rely on it to tell us which thread is which VCPU.
1081 	 */
1082 	ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
1083 	if (ret <= 0)
1084 		return ret;
1085 	pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
1086 		 event->comm.tid, event->comm.comm, vcpu);
1087 	if (vcpu > INT_MAX) {
1088 		pr_err("Invalid VCPU %u\n", vcpu);
1089 		return -EINVAL;
1090 	}
1091 	guest_vcpu = guest_session__vcpu(gs, vcpu);
1092 	if (!guest_vcpu)
1093 		return -ENOMEM;
1094 	if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
1095 		pr_err("Fatal error: Two threads found with the same VCPU\n");
1096 		return -EINVAL;
1097 	}
1098 	guest_vcpu->tid = event->comm.tid;
1099 
1100 	return guest_session__map_tid(gs, event->comm.tid, vcpu);
1101 }
1102 
host_peek_vm_comms(struct perf_session * session,struct guest_session * gs)1103 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
1104 {
1105 	return perf_session__peek_events(session, session->header.data_offset,
1106 					 session->header.data_size,
1107 					 host_peek_vm_comms_cb, gs);
1108 }
1109 
evlist__is_id_used(struct evlist * evlist,u64 id)1110 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
1111 {
1112 	return evlist__id2sid(evlist, id);
1113 }
1114 
guest_session__allocate_new_id(struct guest_session * gs,struct evlist * host_evlist)1115 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
1116 {
1117 	do {
1118 		gs->highest_id += 1;
1119 	} while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
1120 
1121 	return gs->highest_id;
1122 }
1123 
guest_session__map_id(struct guest_session * gs,u64 id,u64 host_id,u32 vcpu)1124 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
1125 {
1126 	struct guest_id *guest_id = zalloc(sizeof(*guest_id));
1127 	int hash;
1128 
1129 	if (!guest_id)
1130 		return -ENOMEM;
1131 
1132 	guest_id->id = id;
1133 	guest_id->host_id = host_id;
1134 	guest_id->vcpu = vcpu;
1135 	hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
1136 	hlist_add_head(&guest_id->node, &gs->heads[hash]);
1137 
1138 	return 0;
1139 }
1140 
evlist__find_highest_id(struct evlist * evlist)1141 static u64 evlist__find_highest_id(struct evlist *evlist)
1142 {
1143 	struct evsel *evsel;
1144 	u64 highest_id = 1;
1145 
1146 	evlist__for_each_entry(evlist, evsel) {
1147 		u32 j;
1148 
1149 		for (j = 0; j < evsel->core.ids; j++) {
1150 			u64 id = evsel->core.id[j];
1151 
1152 			if (id > highest_id)
1153 				highest_id = id;
1154 		}
1155 	}
1156 
1157 	return highest_id;
1158 }
1159 
guest_session__map_ids(struct guest_session * gs,struct evlist * host_evlist)1160 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
1161 {
1162 	struct evlist *evlist = gs->session->evlist;
1163 	struct evsel *evsel;
1164 	int ret;
1165 
1166 	evlist__for_each_entry(evlist, evsel) {
1167 		u32 j;
1168 
1169 		for (j = 0; j < evsel->core.ids; j++) {
1170 			struct perf_sample_id *sid;
1171 			u64 host_id;
1172 			u64 id;
1173 
1174 			id = evsel->core.id[j];
1175 			sid = evlist__id2sid(evlist, id);
1176 			if (!sid || sid->cpu.cpu == -1)
1177 				continue;
1178 			host_id = guest_session__allocate_new_id(gs, host_evlist);
1179 			ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1180 			if (ret)
1181 				return ret;
1182 		}
1183 	}
1184 
1185 	return 0;
1186 }
1187 
guest_session__lookup_id(struct guest_session * gs,u64 id)1188 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1189 {
1190 	struct hlist_head *head;
1191 	struct guest_id *guest_id;
1192 	int hash;
1193 
1194 	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1195 	head = &gs->heads[hash];
1196 
1197 	hlist_for_each_entry(guest_id, head, node)
1198 		if (guest_id->id == id)
1199 			return guest_id;
1200 
1201 	return NULL;
1202 }
1203 
process_attr(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1204 static int process_attr(const struct perf_tool *tool, union perf_event *event,
1205 			struct perf_sample *sample __maybe_unused,
1206 			struct machine *machine __maybe_unused)
1207 {
1208 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1209 
1210 	return perf_event__process_attr(tool, event, &inject->session->evlist);
1211 }
1212 
guest_session__add_attr(struct guest_session * gs,struct evsel * evsel)1213 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1214 {
1215 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1216 	struct perf_event_attr attr = evsel->core.attr;
1217 	u64 *id_array;
1218 	u32 *vcpu_array;
1219 	int ret = -ENOMEM;
1220 	u32 i;
1221 
1222 	id_array = calloc(evsel->core.ids, sizeof(*id_array));
1223 	if (!id_array)
1224 		return -ENOMEM;
1225 
1226 	vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1227 	if (!vcpu_array)
1228 		goto out;
1229 
1230 	for (i = 0; i < evsel->core.ids; i++) {
1231 		u64 id = evsel->core.id[i];
1232 		struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1233 
1234 		if (!guest_id) {
1235 			pr_err("Failed to find guest id %"PRIu64"\n", id);
1236 			ret = -EINVAL;
1237 			goto out;
1238 		}
1239 		id_array[i] = guest_id->host_id;
1240 		vcpu_array[i] = guest_id->vcpu;
1241 	}
1242 
1243 	attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1244 	attr.exclude_host = 1;
1245 	attr.exclude_guest = 0;
1246 
1247 	ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1248 					  id_array, process_attr);
1249 	if (ret)
1250 		pr_err("Failed to add guest attr.\n");
1251 
1252 	for (i = 0; i < evsel->core.ids; i++) {
1253 		struct perf_sample_id *sid;
1254 		u32 vcpu = vcpu_array[i];
1255 
1256 		sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1257 		/* Guest event is per-thread from the host point of view */
1258 		sid->cpu.cpu = -1;
1259 		sid->tid = gs->vcpu[vcpu].tid;
1260 		sid->machine_pid = gs->machine_pid;
1261 		sid->vcpu.cpu = vcpu;
1262 	}
1263 out:
1264 	free(vcpu_array);
1265 	free(id_array);
1266 	return ret;
1267 }
1268 
guest_session__add_attrs(struct guest_session * gs)1269 static int guest_session__add_attrs(struct guest_session *gs)
1270 {
1271 	struct evlist *evlist = gs->session->evlist;
1272 	struct evsel *evsel;
1273 	int ret;
1274 
1275 	evlist__for_each_entry(evlist, evsel) {
1276 		ret = guest_session__add_attr(gs, evsel);
1277 		if (ret)
1278 			return ret;
1279 	}
1280 
1281 	return 0;
1282 }
1283 
synthesize_id_index(struct perf_inject * inject,size_t new_cnt)1284 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1285 {
1286 	struct perf_session *session = inject->session;
1287 	struct evlist *evlist = session->evlist;
1288 	struct machine *machine = &session->machines.host;
1289 	size_t from = evlist->core.nr_entries - new_cnt;
1290 
1291 	return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1292 						 evlist, machine, from);
1293 }
1294 
guest_session__lookup_tid(struct guest_session * gs,u32 tid)1295 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1296 {
1297 	struct hlist_head *head;
1298 	struct guest_tid *guest_tid;
1299 	int hash;
1300 
1301 	hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1302 	head = &gs->tids[hash];
1303 
1304 	hlist_for_each_entry(guest_tid, head, node)
1305 		if (guest_tid->tid == tid)
1306 			return guest_tid;
1307 
1308 	return NULL;
1309 }
1310 
dso__is_in_kernel_space(struct dso * dso)1311 static bool dso__is_in_kernel_space(struct dso *dso)
1312 {
1313 	if (dso__is_vdso(dso))
1314 		return false;
1315 
1316 	return dso__is_kcore(dso) ||
1317 	       dso__kernel(dso) ||
1318 	       is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1319 }
1320 
evlist__first_id(struct evlist * evlist)1321 static u64 evlist__first_id(struct evlist *evlist)
1322 {
1323 	struct evsel *evsel;
1324 
1325 	evlist__for_each_entry(evlist, evsel) {
1326 		if (evsel->core.ids)
1327 			return evsel->core.id[0];
1328 	}
1329 	return 0;
1330 }
1331 
process_build_id(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1332 static int process_build_id(const struct perf_tool *tool,
1333 			    union perf_event *event,
1334 			    struct perf_sample *sample __maybe_unused,
1335 			    struct machine *machine __maybe_unused)
1336 {
1337 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1338 
1339 	return perf_event__process_build_id(tool, inject->session, event);
1340 }
1341 
synthesize_build_id(struct perf_inject * inject,struct dso * dso,pid_t machine_pid)1342 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1343 {
1344 	struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1345 	struct perf_sample synth_sample = {
1346 		.pid	   = -1,
1347 		.tid	   = -1,
1348 		.time	   = -1,
1349 		.stream_id = -1,
1350 		.cpu	   = -1,
1351 		.period	   = 1,
1352 		.cpumode   = dso__is_in_kernel_space(dso)
1353 		? PERF_RECORD_MISC_GUEST_KERNEL
1354 		: PERF_RECORD_MISC_GUEST_USER,
1355 	};
1356 
1357 	if (!machine)
1358 		return -ENOMEM;
1359 
1360 	dso__set_hit(dso);
1361 
1362 	return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine,
1363 					       process_build_id, inject__mmap_evsel(inject),
1364 					       /*misc=*/synth_sample.cpumode,
1365 					       dso__bid(dso), dso__long_name(dso));
1366 }
1367 
guest_session__add_build_ids_cb(struct dso * dso,void * data)1368 static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
1369 {
1370 	struct guest_session *gs = data;
1371 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1372 
1373 	if (!dso__has_build_id(dso))
1374 		return 0;
1375 
1376 	return synthesize_build_id(inject, dso, gs->machine_pid);
1377 
1378 }
1379 
guest_session__add_build_ids(struct guest_session * gs)1380 static int guest_session__add_build_ids(struct guest_session *gs)
1381 {
1382 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1383 
1384 	/* Build IDs will be put in the Build ID feature section */
1385 	perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1386 
1387 	return dsos__for_each_dso(&gs->session->machines.host.dsos,
1388 				  guest_session__add_build_ids_cb,
1389 				  gs);
1390 }
1391 
guest_session__ksymbol_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1392 static int guest_session__ksymbol_event(const struct perf_tool *tool,
1393 					union perf_event *event,
1394 					struct perf_sample *sample __maybe_unused,
1395 					struct machine *machine __maybe_unused)
1396 {
1397 	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1398 
1399 	/* Only support out-of-line i.e. no BPF support */
1400 	if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1401 		return 0;
1402 
1403 	return guest_session__output_bytes(gs, event, event->header.size);
1404 }
1405 
guest_session__start(struct guest_session * gs,const char * name,bool force)1406 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1407 {
1408 	char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1409 	struct perf_session *session;
1410 	int ret;
1411 
1412 	/* Only these events will be injected */
1413 	gs->tool.mmap		= guest_session__repipe;
1414 	gs->tool.mmap2		= guest_session__repipe;
1415 	gs->tool.comm		= guest_session__repipe;
1416 	gs->tool.fork		= guest_session__repipe;
1417 	gs->tool.exit		= guest_session__repipe;
1418 	gs->tool.lost		= guest_session__repipe;
1419 	gs->tool.context_switch	= guest_session__repipe;
1420 	gs->tool.ksymbol	= guest_session__ksymbol_event;
1421 	gs->tool.text_poke	= guest_session__repipe;
1422 	/*
1423 	 * Processing a build ID creates a struct dso with that build ID. Later,
1424 	 * all guest dsos are iterated and the build IDs processed into the host
1425 	 * session where they will be output to the Build ID feature section
1426 	 * when the perf.data file header is written.
1427 	 */
1428 	gs->tool.build_id	= perf_event__process_build_id;
1429 	/* Process the id index to know what VCPU an ID belongs to */
1430 	gs->tool.id_index	= perf_event__process_id_index;
1431 
1432 	gs->tool.ordered_events	= true;
1433 	gs->tool.ordering_requires_timestamps = true;
1434 
1435 	gs->data.path	= name;
1436 	gs->data.force	= force;
1437 	gs->data.mode	= PERF_DATA_MODE_READ;
1438 
1439 	session = perf_session__new(&gs->data, &gs->tool);
1440 	if (IS_ERR(session))
1441 		return PTR_ERR(session);
1442 	gs->session = session;
1443 
1444 	/*
1445 	 * Initial events have zero'd ID samples. Get default ID sample size
1446 	 * used for removing them.
1447 	 */
1448 	gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1449 	/* And default ID for adding back a host-compatible ID sample */
1450 	gs->dflt_id = evlist__first_id(session->evlist);
1451 	if (!gs->dflt_id) {
1452 		pr_err("Guest data has no sample IDs");
1453 		return -EINVAL;
1454 	}
1455 
1456 	/* Temporary file for guest events */
1457 	gs->tmp_file_name = strdup(tmp_file_name);
1458 	if (!gs->tmp_file_name)
1459 		return -ENOMEM;
1460 	gs->tmp_fd = mkstemp(gs->tmp_file_name);
1461 	if (gs->tmp_fd < 0)
1462 		return -errno;
1463 
1464 	if (zstd_init(&gs->session->zstd_data, 0) < 0)
1465 		pr_warning("Guest session decompression initialization failed.\n");
1466 
1467 	/*
1468 	 * perf does not support processing 2 sessions simultaneously, so output
1469 	 * guest events to a temporary file.
1470 	 */
1471 	ret = perf_session__process_events(gs->session);
1472 	if (ret)
1473 		return ret;
1474 
1475 	if (lseek(gs->tmp_fd, 0, SEEK_SET))
1476 		return -errno;
1477 
1478 	return 0;
1479 }
1480 
1481 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
free_hlist(struct hlist_head * heads,size_t hlist_sz)1482 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1483 {
1484 	struct hlist_node *pos, *n;
1485 	size_t i;
1486 
1487 	for (i = 0; i < hlist_sz; ++i) {
1488 		hlist_for_each_safe(pos, n, &heads[i]) {
1489 			hlist_del(pos);
1490 			free(pos);
1491 		}
1492 	}
1493 }
1494 
guest_session__exit(struct guest_session * gs)1495 static void guest_session__exit(struct guest_session *gs)
1496 {
1497 	if (gs->session) {
1498 		perf_session__delete(gs->session);
1499 		free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1500 		free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1501 	}
1502 	if (gs->tmp_file_name) {
1503 		if (gs->tmp_fd >= 0)
1504 			close(gs->tmp_fd);
1505 		unlink(gs->tmp_file_name);
1506 		zfree(&gs->tmp_file_name);
1507 	}
1508 	zfree(&gs->vcpu);
1509 	zfree(&gs->perf_data_file);
1510 }
1511 
get_tsc_conv(struct perf_tsc_conversion * tc,struct perf_record_time_conv * time_conv)1512 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1513 {
1514 	tc->time_shift		= time_conv->time_shift;
1515 	tc->time_mult		= time_conv->time_mult;
1516 	tc->time_zero		= time_conv->time_zero;
1517 	tc->time_cycles		= time_conv->time_cycles;
1518 	tc->time_mask		= time_conv->time_mask;
1519 	tc->cap_user_time_zero	= time_conv->cap_user_time_zero;
1520 	tc->cap_user_time_short	= time_conv->cap_user_time_short;
1521 }
1522 
guest_session__get_tc(struct guest_session * gs)1523 static void guest_session__get_tc(struct guest_session *gs)
1524 {
1525 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1526 
1527 	get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1528 	get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1529 }
1530 
guest_session__convert_time(struct guest_session * gs,u64 guest_time,u64 * host_time)1531 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1532 {
1533 	u64 tsc;
1534 
1535 	if (!guest_time) {
1536 		*host_time = 0;
1537 		return;
1538 	}
1539 
1540 	if (gs->guest_tc.cap_user_time_zero)
1541 		tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1542 	else
1543 		tsc = guest_time;
1544 
1545 	/*
1546 	 * This is the correct order of operations for x86 if the TSC Offset and
1547 	 * Multiplier values are used.
1548 	 */
1549 	tsc -= gs->time_offset;
1550 	tsc /= gs->time_scale;
1551 
1552 	if (gs->host_tc.cap_user_time_zero)
1553 		*host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1554 	else
1555 		*host_time = tsc;
1556 }
1557 
guest_session__fetch(struct guest_session * gs)1558 static int guest_session__fetch(struct guest_session *gs)
1559 {
1560 	void *buf;
1561 	struct perf_event_header *hdr;
1562 	size_t hdr_sz = sizeof(*hdr);
1563 	ssize_t ret;
1564 
1565 	buf = gs->ev.event_buf;
1566 	if (!buf) {
1567 		buf = malloc(PERF_SAMPLE_MAX_SIZE);
1568 		if (!buf)
1569 			return -ENOMEM;
1570 		gs->ev.event_buf = buf;
1571 	}
1572 	hdr = buf;
1573 	ret = readn(gs->tmp_fd, buf, hdr_sz);
1574 	if (ret < 0)
1575 		return ret;
1576 
1577 	if (!ret) {
1578 		/* Zero size means EOF */
1579 		hdr->size = 0;
1580 		return 0;
1581 	}
1582 
1583 	buf += hdr_sz;
1584 
1585 	ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1586 	if (ret < 0)
1587 		return ret;
1588 
1589 	gs->ev.event = (union perf_event *)gs->ev.event_buf;
1590 	gs->ev.sample.time = 0;
1591 
1592 	if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1593 		pr_err("Unexpected type fetching guest event");
1594 		return 0;
1595 	}
1596 
1597 	ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1598 	if (ret) {
1599 		pr_err("Parse failed fetching guest event");
1600 		return ret;
1601 	}
1602 
1603 	if (!gs->have_tc) {
1604 		guest_session__get_tc(gs);
1605 		gs->have_tc = true;
1606 	}
1607 
1608 	guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1609 
1610 	return 0;
1611 }
1612 
evlist__append_id_sample(struct evlist * evlist,union perf_event * ev,const struct perf_sample * sample)1613 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1614 				    const struct perf_sample *sample)
1615 {
1616 	struct evsel *evsel;
1617 	void *array;
1618 	int ret;
1619 
1620 	evsel = evlist__id2evsel(evlist, sample->id);
1621 	array = ev;
1622 
1623 	if (!evsel) {
1624 		pr_err("No evsel for id %"PRIu64"\n", sample->id);
1625 		return -EINVAL;
1626 	}
1627 
1628 	array += ev->header.size;
1629 	ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1630 	if (ret < 0)
1631 		return ret;
1632 
1633 	if (ret & 7) {
1634 		pr_err("Bad id sample size %d\n", ret);
1635 		return -EINVAL;
1636 	}
1637 
1638 	ev->header.size += ret;
1639 
1640 	return 0;
1641 }
1642 
guest_session__inject_events(struct guest_session * gs,u64 timestamp)1643 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1644 {
1645 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1646 	int ret;
1647 
1648 	if (!gs->ready)
1649 		return 0;
1650 
1651 	while (1) {
1652 		struct perf_sample *sample;
1653 		struct guest_id *guest_id;
1654 		union perf_event *ev;
1655 		u16 id_hdr_size;
1656 		u8 cpumode;
1657 		u64 id;
1658 
1659 		if (!gs->fetched) {
1660 			ret = guest_session__fetch(gs);
1661 			if (ret)
1662 				return ret;
1663 			gs->fetched = true;
1664 		}
1665 
1666 		ev = gs->ev.event;
1667 		sample = &gs->ev.sample;
1668 
1669 		if (!ev->header.size)
1670 			return 0; /* EOF */
1671 
1672 		if (sample->time > timestamp)
1673 			return 0;
1674 
1675 		/* Change cpumode to guest */
1676 		cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1677 		if (cpumode & PERF_RECORD_MISC_USER)
1678 			cpumode = PERF_RECORD_MISC_GUEST_USER;
1679 		else
1680 			cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1681 		ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1682 		ev->header.misc |= cpumode;
1683 
1684 		id = sample->id;
1685 		if (!id) {
1686 			id = gs->dflt_id;
1687 			id_hdr_size = gs->dflt_id_hdr_size;
1688 		} else {
1689 			struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1690 
1691 			id_hdr_size = evsel__id_hdr_size(evsel);
1692 		}
1693 
1694 		if (id_hdr_size & 7) {
1695 			pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1696 			return -EINVAL;
1697 		}
1698 
1699 		if (ev->header.size & 7) {
1700 			pr_err("Bad event size %u\n", ev->header.size);
1701 			return -EINVAL;
1702 		}
1703 
1704 		/* Remove guest id sample */
1705 		ev->header.size -= id_hdr_size;
1706 
1707 		if (ev->header.size & 7) {
1708 			pr_err("Bad raw event size %u\n", ev->header.size);
1709 			return -EINVAL;
1710 		}
1711 
1712 		guest_id = guest_session__lookup_id(gs, id);
1713 		if (!guest_id) {
1714 			pr_err("Guest event with unknown id %llu\n",
1715 			       (unsigned long long)id);
1716 			return -EINVAL;
1717 		}
1718 
1719 		/* Change to host ID to avoid conflicting ID values */
1720 		sample->id = guest_id->host_id;
1721 		sample->stream_id = guest_id->host_id;
1722 
1723 		if (sample->cpu != (u32)-1) {
1724 			if (sample->cpu >= gs->vcpu_cnt) {
1725 				pr_err("Guest event with unknown VCPU %u\n",
1726 				       sample->cpu);
1727 				return -EINVAL;
1728 			}
1729 			/* Change to host CPU instead of guest VCPU */
1730 			sample->cpu = gs->vcpu[sample->cpu].cpu;
1731 		}
1732 
1733 		/* New id sample with new ID and CPU */
1734 		ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1735 		if (ret)
1736 			return ret;
1737 
1738 		if (ev->header.size & 7) {
1739 			pr_err("Bad new event size %u\n", ev->header.size);
1740 			return -EINVAL;
1741 		}
1742 
1743 		gs->fetched = false;
1744 
1745 		ret = output_bytes(inject, ev, ev->header.size);
1746 		if (ret)
1747 			return ret;
1748 	}
1749 }
1750 
guest_session__flush_events(struct guest_session * gs)1751 static int guest_session__flush_events(struct guest_session *gs)
1752 {
1753 	return guest_session__inject_events(gs, -1);
1754 }
1755 
host__repipe(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1756 static int host__repipe(const struct perf_tool *tool,
1757 			union perf_event *event,
1758 			struct perf_sample *sample,
1759 			struct machine *machine)
1760 {
1761 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1762 	int ret;
1763 
1764 	ret = guest_session__inject_events(&inject->guest_session, sample->time);
1765 	if (ret)
1766 		return ret;
1767 
1768 	return perf_event__repipe(tool, event, sample, machine);
1769 }
1770 
host__finished_init(const struct perf_tool * tool,struct perf_session * session,union perf_event * event)1771 static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
1772 			       union perf_event *event)
1773 {
1774 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1775 	struct guest_session *gs = &inject->guest_session;
1776 	int ret;
1777 
1778 	/*
1779 	 * Peek through host COMM events to find QEMU threads and the VCPU they
1780 	 * are running.
1781 	 */
1782 	ret = host_peek_vm_comms(session, gs);
1783 	if (ret)
1784 		return ret;
1785 
1786 	if (!gs->vcpu_cnt) {
1787 		pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1788 		return -EINVAL;
1789 	}
1790 
1791 	/*
1792 	 * Allocate new (unused) host sample IDs and map them to the guest IDs.
1793 	 */
1794 	gs->highest_id = evlist__find_highest_id(session->evlist);
1795 	ret = guest_session__map_ids(gs, session->evlist);
1796 	if (ret)
1797 		return ret;
1798 
1799 	ret = guest_session__add_attrs(gs);
1800 	if (ret)
1801 		return ret;
1802 
1803 	ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1804 	if (ret) {
1805 		pr_err("Failed to synthesize id_index\n");
1806 		return ret;
1807 	}
1808 
1809 	ret = guest_session__add_build_ids(gs);
1810 	if (ret) {
1811 		pr_err("Failed to add guest build IDs\n");
1812 		return ret;
1813 	}
1814 
1815 	gs->ready = true;
1816 
1817 	ret = guest_session__inject_events(gs, 0);
1818 	if (ret)
1819 		return ret;
1820 
1821 	return perf_event__repipe_op2_synth(tool, session, event);
1822 }
1823 
1824 /*
1825  * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1826  * which flushes host events to file up until the last flush time. Then inject
1827  * guest events up to the same time. Finally write out the FINISHED_ROUND event
1828  * itself.
1829  */
host__finished_round(const struct perf_tool * tool,union perf_event * event,struct ordered_events * oe)1830 static int host__finished_round(const struct perf_tool *tool,
1831 				union perf_event *event,
1832 				struct ordered_events *oe)
1833 {
1834 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1835 	int ret = perf_event__process_finished_round(tool, event, oe);
1836 	u64 timestamp = ordered_events__last_flush_time(oe);
1837 
1838 	if (ret)
1839 		return ret;
1840 
1841 	ret = guest_session__inject_events(&inject->guest_session, timestamp);
1842 	if (ret)
1843 		return ret;
1844 
1845 	return perf_event__repipe_oe_synth(tool, event, oe);
1846 }
1847 
host__context_switch(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1848 static int host__context_switch(const struct perf_tool *tool,
1849 				union perf_event *event,
1850 				struct perf_sample *sample,
1851 				struct machine *machine)
1852 {
1853 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1854 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1855 	struct guest_session *gs = &inject->guest_session;
1856 	u32 pid = event->context_switch.next_prev_pid;
1857 	u32 tid = event->context_switch.next_prev_tid;
1858 	struct guest_tid *guest_tid;
1859 	u32 vcpu;
1860 
1861 	if (out || pid != gs->machine_pid)
1862 		goto out;
1863 
1864 	guest_tid = guest_session__lookup_tid(gs, tid);
1865 	if (!guest_tid)
1866 		goto out;
1867 
1868 	if (sample->cpu == (u32)-1) {
1869 		pr_err("Switch event does not have CPU\n");
1870 		return -EINVAL;
1871 	}
1872 
1873 	vcpu = guest_tid->vcpu;
1874 	if (vcpu >= gs->vcpu_cnt)
1875 		return -EINVAL;
1876 
1877 	/* Guest is switching in, record which CPU the VCPU is now running on */
1878 	gs->vcpu[vcpu].cpu = sample->cpu;
1879 out:
1880 	return host__repipe(tool, event, sample, machine);
1881 }
1882 
sig_handler(int sig __maybe_unused)1883 static void sig_handler(int sig __maybe_unused)
1884 {
1885 	session_done = 1;
1886 }
1887 
evsel__check_stype(struct evsel * evsel,u64 sample_type,const char * sample_msg)1888 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1889 {
1890 	struct perf_event_attr *attr = &evsel->core.attr;
1891 	const char *name = evsel__name(evsel);
1892 
1893 	if (!(attr->sample_type & sample_type)) {
1894 		pr_err("Samples for %s event do not have %s attribute set.",
1895 			name, sample_msg);
1896 		return -EINVAL;
1897 	}
1898 
1899 	return 0;
1900 }
1901 
drop_sample(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct evsel * evsel __maybe_unused,struct machine * machine __maybe_unused)1902 static int drop_sample(const struct perf_tool *tool __maybe_unused,
1903 		       union perf_event *event __maybe_unused,
1904 		       struct perf_sample *sample __maybe_unused,
1905 		       struct evsel *evsel __maybe_unused,
1906 		       struct machine *machine __maybe_unused)
1907 {
1908 	return 0;
1909 }
1910 
strip_init(struct perf_inject * inject)1911 static void strip_init(struct perf_inject *inject)
1912 {
1913 	struct evlist *evlist = inject->session->evlist;
1914 	struct evsel *evsel;
1915 
1916 	inject->tool.context_switch = perf_event__drop;
1917 
1918 	evlist__for_each_entry(evlist, evsel)
1919 		evsel->handler = drop_sample;
1920 }
1921 
parse_vm_time_correlation(const struct option * opt,const char * str,int unset)1922 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1923 {
1924 	struct perf_inject *inject = opt->value;
1925 	const char *args;
1926 	char *dry_run;
1927 
1928 	if (unset)
1929 		return 0;
1930 
1931 	inject->itrace_synth_opts.set = true;
1932 	inject->itrace_synth_opts.vm_time_correlation = true;
1933 	inject->in_place_update = true;
1934 
1935 	if (!str)
1936 		return 0;
1937 
1938 	dry_run = skip_spaces(str);
1939 	if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1940 		inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1941 		inject->in_place_update_dry_run = true;
1942 		args = dry_run + strlen("dry-run");
1943 	} else {
1944 		args = str;
1945 	}
1946 
1947 	inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1948 
1949 	return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1950 }
1951 
parse_guest_data(const struct option * opt,const char * str,int unset)1952 static int parse_guest_data(const struct option *opt, const char *str, int unset)
1953 {
1954 	struct perf_inject *inject = opt->value;
1955 	struct guest_session *gs = &inject->guest_session;
1956 	char *tok;
1957 	char *s;
1958 
1959 	if (unset)
1960 		return 0;
1961 
1962 	if (!str)
1963 		goto bad_args;
1964 
1965 	s = strdup(str);
1966 	if (!s)
1967 		return -ENOMEM;
1968 
1969 	gs->perf_data_file = strsep(&s, ",");
1970 	if (!gs->perf_data_file)
1971 		goto bad_args;
1972 
1973 	gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1974 	if (gs->copy_kcore_dir)
1975 		inject->output.is_dir = true;
1976 
1977 	tok = strsep(&s, ",");
1978 	if (!tok)
1979 		goto bad_args;
1980 	gs->machine_pid = strtoul(tok, NULL, 0);
1981 	if (!inject->guest_session.machine_pid)
1982 		goto bad_args;
1983 
1984 	gs->time_scale = 1;
1985 
1986 	tok = strsep(&s, ",");
1987 	if (!tok)
1988 		goto out;
1989 	gs->time_offset = strtoull(tok, NULL, 0);
1990 
1991 	tok = strsep(&s, ",");
1992 	if (!tok)
1993 		goto out;
1994 	gs->time_scale = strtod(tok, NULL);
1995 	if (!gs->time_scale)
1996 		goto bad_args;
1997 out:
1998 	return 0;
1999 
2000 bad_args:
2001 	pr_err("--guest-data option requires guest perf.data file name, "
2002 	       "guest machine PID, and optionally guest timestamp offset, "
2003 	       "and guest timestamp scale factor, separated by commas.\n");
2004 	return -1;
2005 }
2006 
save_section_info_cb(struct perf_file_section * section,struct perf_header * ph __maybe_unused,int feat,int fd __maybe_unused,void * data)2007 static int save_section_info_cb(struct perf_file_section *section,
2008 				struct perf_header *ph __maybe_unused,
2009 				int feat, int fd __maybe_unused, void *data)
2010 {
2011 	struct perf_inject *inject = data;
2012 
2013 	inject->secs[feat] = *section;
2014 	return 0;
2015 }
2016 
save_section_info(struct perf_inject * inject)2017 static int save_section_info(struct perf_inject *inject)
2018 {
2019 	struct perf_header *header = &inject->session->header;
2020 	int fd = perf_data__fd(inject->session->data);
2021 
2022 	return perf_header__process_sections(header, fd, inject, save_section_info_cb);
2023 }
2024 
keep_feat(int feat)2025 static bool keep_feat(int feat)
2026 {
2027 	switch (feat) {
2028 	/* Keep original information that describes the machine or software */
2029 	case HEADER_TRACING_DATA:
2030 	case HEADER_HOSTNAME:
2031 	case HEADER_OSRELEASE:
2032 	case HEADER_VERSION:
2033 	case HEADER_ARCH:
2034 	case HEADER_NRCPUS:
2035 	case HEADER_CPUDESC:
2036 	case HEADER_CPUID:
2037 	case HEADER_TOTAL_MEM:
2038 	case HEADER_CPU_TOPOLOGY:
2039 	case HEADER_NUMA_TOPOLOGY:
2040 	case HEADER_PMU_MAPPINGS:
2041 	case HEADER_CACHE:
2042 	case HEADER_MEM_TOPOLOGY:
2043 	case HEADER_CLOCKID:
2044 	case HEADER_BPF_PROG_INFO:
2045 	case HEADER_BPF_BTF:
2046 	case HEADER_CPU_PMU_CAPS:
2047 	case HEADER_CLOCK_DATA:
2048 	case HEADER_HYBRID_TOPOLOGY:
2049 	case HEADER_PMU_CAPS:
2050 		return true;
2051 	/* Information that can be updated */
2052 	case HEADER_BUILD_ID:
2053 	case HEADER_CMDLINE:
2054 	case HEADER_EVENT_DESC:
2055 	case HEADER_BRANCH_STACK:
2056 	case HEADER_GROUP_DESC:
2057 	case HEADER_AUXTRACE:
2058 	case HEADER_STAT:
2059 	case HEADER_SAMPLE_TIME:
2060 	case HEADER_DIR_FORMAT:
2061 	case HEADER_COMPRESSED:
2062 	default:
2063 		return false;
2064 	};
2065 }
2066 
read_file(int fd,u64 offs,void * buf,size_t sz)2067 static int read_file(int fd, u64 offs, void *buf, size_t sz)
2068 {
2069 	ssize_t ret = preadn(fd, buf, sz, offs);
2070 
2071 	if (ret < 0)
2072 		return -errno;
2073 	if ((size_t)ret != sz)
2074 		return -EINVAL;
2075 	return 0;
2076 }
2077 
feat_copy(struct perf_inject * inject,int feat,struct feat_writer * fw)2078 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
2079 {
2080 	int fd = perf_data__fd(inject->session->data);
2081 	u64 offs = inject->secs[feat].offset;
2082 	size_t sz = inject->secs[feat].size;
2083 	void *buf = malloc(sz);
2084 	int ret;
2085 
2086 	if (!buf)
2087 		return -ENOMEM;
2088 
2089 	ret = read_file(fd, offs, buf, sz);
2090 	if (ret)
2091 		goto out_free;
2092 
2093 	ret = fw->write(fw, buf, sz);
2094 out_free:
2095 	free(buf);
2096 	return ret;
2097 }
2098 
2099 struct inject_fc {
2100 	struct feat_copier fc;
2101 	struct perf_inject *inject;
2102 };
2103 
feat_copy_cb(struct feat_copier * fc,int feat,struct feat_writer * fw)2104 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
2105 {
2106 	struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
2107 	struct perf_inject *inject = inj_fc->inject;
2108 	int ret;
2109 
2110 	if (!inject->secs[feat].offset ||
2111 	    !keep_feat(feat))
2112 		return 0;
2113 
2114 	ret = feat_copy(inject, feat, fw);
2115 	if (ret < 0)
2116 		return ret;
2117 
2118 	return 1; /* Feature section copied */
2119 }
2120 
copy_kcore_dir(struct perf_inject * inject)2121 static int copy_kcore_dir(struct perf_inject *inject)
2122 {
2123 	char *cmd;
2124 	int ret;
2125 
2126 	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
2127 		       inject->input_name, inject->output.path);
2128 	if (ret < 0)
2129 		return ret;
2130 	pr_debug("%s\n", cmd);
2131 	ret = system(cmd);
2132 	free(cmd);
2133 	return ret;
2134 }
2135 
guest_session__copy_kcore_dir(struct guest_session * gs)2136 static int guest_session__copy_kcore_dir(struct guest_session *gs)
2137 {
2138 	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
2139 	char *cmd;
2140 	int ret;
2141 
2142 	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
2143 		       gs->perf_data_file, inject->output.path, gs->machine_pid);
2144 	if (ret < 0)
2145 		return ret;
2146 	pr_debug("%s\n", cmd);
2147 	ret = system(cmd);
2148 	free(cmd);
2149 	return ret;
2150 }
2151 
output_fd(struct perf_inject * inject)2152 static int output_fd(struct perf_inject *inject)
2153 {
2154 	return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
2155 }
2156 
__cmd_inject(struct perf_inject * inject)2157 static int __cmd_inject(struct perf_inject *inject)
2158 {
2159 	int ret = -EINVAL;
2160 	struct guest_session *gs = &inject->guest_session;
2161 	struct perf_session *session = inject->session;
2162 	int fd = output_fd(inject);
2163 	u64 output_data_offset = perf_session__data_offset(session->evlist);
2164 	/*
2165 	 * Pipe input hasn't loaded the attributes and will handle them as
2166 	 * events. So that the attributes don't overlap the data, write the
2167 	 * attributes after the data.
2168 	 */
2169 	bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe;
2170 
2171 	signal(SIGINT, sig_handler);
2172 
2173 	if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat ||
2174 	    inject->itrace_synth_opts.set) {
2175 		inject->tool.mmap	  = perf_event__repipe_mmap;
2176 		inject->tool.mmap2	  = perf_event__repipe_mmap2;
2177 		inject->tool.fork	  = perf_event__repipe_fork;
2178 #ifdef HAVE_LIBTRACEEVENT
2179 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
2180 #endif
2181 	}
2182 
2183 	if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2184 	    inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2185 		inject->tool.sample = perf_event__inject_buildid;
2186 	} else if (inject->sched_stat) {
2187 		struct evsel *evsel;
2188 
2189 		evlist__for_each_entry(session->evlist, evsel) {
2190 			const char *name = evsel__name(evsel);
2191 
2192 			if (!strcmp(name, "sched:sched_switch")) {
2193 				if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
2194 					return -EINVAL;
2195 
2196 				evsel->handler = perf_inject__sched_switch;
2197 			} else if (!strcmp(name, "sched:sched_process_exit"))
2198 				evsel->handler = perf_inject__sched_process_exit;
2199 #ifdef HAVE_LIBTRACEEVENT
2200 			else if (!strncmp(name, "sched:sched_stat_", 17))
2201 				evsel->handler = perf_inject__sched_stat;
2202 #endif
2203 		}
2204 	} else if (inject->itrace_synth_opts.vm_time_correlation) {
2205 		session->itrace_synth_opts = &inject->itrace_synth_opts;
2206 		memset(&inject->tool, 0, sizeof(inject->tool));
2207 		inject->tool.id_index	    = perf_event__process_id_index;
2208 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2209 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2210 		inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2211 		inject->tool.ordered_events = true;
2212 		inject->tool.ordering_requires_timestamps = true;
2213 	} else if (inject->itrace_synth_opts.set) {
2214 		session->itrace_synth_opts = &inject->itrace_synth_opts;
2215 		inject->itrace_synth_opts.inject = true;
2216 		inject->tool.comm	    = perf_event__repipe_comm;
2217 		inject->tool.namespaces	    = perf_event__repipe_namespaces;
2218 		inject->tool.exit	    = perf_event__repipe_exit;
2219 		inject->tool.id_index	    = perf_event__process_id_index;
2220 		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2221 		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2222 		inject->tool.aux	    = perf_event__drop_aux;
2223 		inject->tool.itrace_start   = perf_event__drop_aux;
2224 		inject->tool.aux_output_hw_id = perf_event__drop_aux;
2225 		inject->tool.ordered_events = true;
2226 		inject->tool.ordering_requires_timestamps = true;
2227 		/* Allow space in the header for new attributes */
2228 		output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2229 		if (inject->strip)
2230 			strip_init(inject);
2231 	} else if (gs->perf_data_file) {
2232 		char *name = gs->perf_data_file;
2233 
2234 		/*
2235 		 * Not strictly necessary, but keep these events in order wrt
2236 		 * guest events.
2237 		 */
2238 		inject->tool.mmap		= host__repipe;
2239 		inject->tool.mmap2		= host__repipe;
2240 		inject->tool.comm		= host__repipe;
2241 		inject->tool.fork		= host__repipe;
2242 		inject->tool.exit		= host__repipe;
2243 		inject->tool.lost		= host__repipe;
2244 		inject->tool.context_switch	= host__repipe;
2245 		inject->tool.ksymbol		= host__repipe;
2246 		inject->tool.text_poke		= host__repipe;
2247 		/*
2248 		 * Once the host session has initialized, set up sample ID
2249 		 * mapping and feed in guest attrs, build IDs and initial
2250 		 * events.
2251 		 */
2252 		inject->tool.finished_init	= host__finished_init;
2253 		/* Obey finished round ordering */
2254 		inject->tool.finished_round	= host__finished_round;
2255 		/* Keep track of which CPU a VCPU is runnng on */
2256 		inject->tool.context_switch	= host__context_switch;
2257 		/*
2258 		 * Must order events to be able to obey finished round
2259 		 * ordering.
2260 		 */
2261 		inject->tool.ordered_events	= true;
2262 		inject->tool.ordering_requires_timestamps = true;
2263 		/* Set up a separate session to process guest perf.data file */
2264 		ret = guest_session__start(gs, name, session->data->force);
2265 		if (ret) {
2266 			pr_err("Failed to process %s, error %d\n", name, ret);
2267 			return ret;
2268 		}
2269 		/* Allow space in the header for guest attributes */
2270 		output_data_offset += gs->session->header.data_offset;
2271 		output_data_offset = roundup(output_data_offset, 4096);
2272 	}
2273 
2274 	if (!inject->itrace_synth_opts.set)
2275 		auxtrace_index__free(&session->auxtrace_index);
2276 
2277 	if (!inject->output.is_pipe && !inject->in_place_update)
2278 		lseek(fd, output_data_offset, SEEK_SET);
2279 
2280 	ret = perf_session__process_events(session);
2281 	if (ret)
2282 		return ret;
2283 
2284 	if (gs->session) {
2285 		/*
2286 		 * Remaining guest events have later timestamps. Flush them
2287 		 * out to file.
2288 		 */
2289 		ret = guest_session__flush_events(gs);
2290 		if (ret) {
2291 			pr_err("Failed to flush guest events\n");
2292 			return ret;
2293 		}
2294 	}
2295 
2296 	if (!inject->output.is_pipe && !inject->in_place_update) {
2297 		struct inject_fc inj_fc = {
2298 			.fc.copy = feat_copy_cb,
2299 			.inject = inject,
2300 		};
2301 
2302 		if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2303 		    inject->build_id_style == BID_RWS__INJECT_HEADER_ALL)
2304 			perf_header__set_feat(&session->header, HEADER_BUILD_ID);
2305 		/*
2306 		 * Keep all buildids when there is unprocessed AUX data because
2307 		 * it is not known which ones the AUX trace hits.
2308 		 */
2309 		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2310 		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
2311 			perf_session__dsos_hit_all(session);
2312 		/*
2313 		 * The AUX areas have been removed and replaced with
2314 		 * synthesized hardware events, so clear the feature flag.
2315 		 */
2316 		if (inject->itrace_synth_opts.set) {
2317 			perf_header__clear_feat(&session->header,
2318 						HEADER_AUXTRACE);
2319 			if (inject->itrace_synth_opts.last_branch ||
2320 			    inject->itrace_synth_opts.add_last_branch)
2321 				perf_header__set_feat(&session->header,
2322 						      HEADER_BRANCH_STACK);
2323 		}
2324 		session->header.data_offset = output_data_offset;
2325 		session->header.data_size = inject->bytes_written;
2326 		perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
2327 					    write_attrs_after_data);
2328 
2329 		if (inject->copy_kcore_dir) {
2330 			ret = copy_kcore_dir(inject);
2331 			if (ret) {
2332 				pr_err("Failed to copy kcore\n");
2333 				return ret;
2334 			}
2335 		}
2336 		if (gs->copy_kcore_dir) {
2337 			ret = guest_session__copy_kcore_dir(gs);
2338 			if (ret) {
2339 				pr_err("Failed to copy guest kcore\n");
2340 				return ret;
2341 			}
2342 		}
2343 	}
2344 
2345 	return ret;
2346 }
2347 
cmd_inject(int argc,const char ** argv)2348 int cmd_inject(int argc, const char **argv)
2349 {
2350 	struct perf_inject inject = {
2351 		.input_name  = "-",
2352 		.samples = LIST_HEAD_INIT(inject.samples),
2353 		.output = {
2354 			.path = "-",
2355 			.mode = PERF_DATA_MODE_WRITE,
2356 			.use_stdio = true,
2357 		},
2358 	};
2359 	struct perf_data data = {
2360 		.mode = PERF_DATA_MODE_READ,
2361 		.use_stdio = true,
2362 	};
2363 	int ret;
2364 	const char *known_build_ids = NULL;
2365 	bool build_ids = false;
2366 	bool build_id_all = false;
2367 	bool mmap2_build_ids = false;
2368 	bool mmap2_build_id_all = false;
2369 
2370 	struct option options[] = {
2371 		OPT_BOOLEAN('b', "build-ids", &build_ids,
2372 			    "Inject build-ids into the output stream"),
2373 		OPT_BOOLEAN(0, "buildid-all", &build_id_all,
2374 			    "Inject build-ids of all DSOs into the output stream"),
2375 		OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids,
2376 			    "Drop unused mmap events, make others mmap2 with build IDs"),
2377 		OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all,
2378 			    "Rewrite all mmap events as mmap2 events with build IDs"),
2379 		OPT_STRING(0, "known-build-ids", &known_build_ids,
2380 			   "buildid path [,buildid path...]",
2381 			   "build-ids to use for given paths"),
2382 		OPT_STRING('i', "input", &inject.input_name, "file",
2383 			   "input file name"),
2384 		OPT_STRING('o', "output", &inject.output.path, "file",
2385 			   "output file name"),
2386 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2387 			    "Merge sched-stat and sched-switch for getting events "
2388 			    "where and how long tasks slept"),
2389 #ifdef HAVE_JITDUMP
2390 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2391 #endif
2392 		OPT_INCR('v', "verbose", &verbose,
2393 			 "be more verbose (show build ids, etc)"),
2394 		OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2395 			   "file", "vmlinux pathname"),
2396 		OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2397 			    "don't load vmlinux even if found"),
2398 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2399 			   "kallsyms pathname"),
2400 		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2401 		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2402 				    NULL, "opts", "Instruction Tracing options\n"
2403 				    ITRACE_HELP,
2404 				    itrace_parse_synth_opts),
2405 		OPT_BOOLEAN(0, "strip", &inject.strip,
2406 			    "strip non-synthesized events (use with --itrace)"),
2407 		OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2408 				    "correlate time between VM guests and the host",
2409 				    parse_vm_time_correlation),
2410 		OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2411 				    "inject events from a guest perf.data file",
2412 				    parse_guest_data),
2413 		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2414 			   "guest mount directory under which every guest os"
2415 			   " instance has a subdir"),
2416 		OPT_END()
2417 	};
2418 	const char * const inject_usage[] = {
2419 		"perf inject [<options>]",
2420 		NULL
2421 	};
2422 	bool ordered_events;
2423 
2424 	if (!inject.itrace_synth_opts.set) {
2425 		/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
2426 		symbol_conf.lazy_load_kernel_maps = true;
2427 	}
2428 
2429 #ifndef HAVE_JITDUMP
2430 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2431 #endif
2432 	argc = parse_options(argc, argv, options, inject_usage, 0);
2433 
2434 	/*
2435 	 * Any (unrecognized) arguments left?
2436 	 */
2437 	if (argc)
2438 		usage_with_options(inject_usage, options);
2439 
2440 	if (inject.strip && !inject.itrace_synth_opts.set) {
2441 		pr_err("--strip option requires --itrace option\n");
2442 		return -1;
2443 	}
2444 
2445 	if (symbol__validate_sym_arguments())
2446 		return -1;
2447 
2448 	if (inject.in_place_update) {
2449 		if (!strcmp(inject.input_name, "-")) {
2450 			pr_err("Input file name required for in-place updating\n");
2451 			return -1;
2452 		}
2453 		if (strcmp(inject.output.path, "-")) {
2454 			pr_err("Output file name must not be specified for in-place updating\n");
2455 			return -1;
2456 		}
2457 		if (!data.force && !inject.in_place_update_dry_run) {
2458 			pr_err("The input file would be updated in place, "
2459 				"the --force option is required.\n");
2460 			return -1;
2461 		}
2462 		if (!inject.in_place_update_dry_run)
2463 			data.in_place_update = true;
2464 	} else {
2465 		if (strcmp(inject.output.path, "-") && !inject.strip &&
2466 		    has_kcore_dir(inject.input_name)) {
2467 			inject.output.is_dir = true;
2468 			inject.copy_kcore_dir = true;
2469 		}
2470 		if (perf_data__open(&inject.output)) {
2471 			perror("failed to create output file");
2472 			return -1;
2473 		}
2474 	}
2475 	if (mmap2_build_ids)
2476 		inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY;
2477 	if (mmap2_build_id_all)
2478 		inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL;
2479 	if (build_ids)
2480 		inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY;
2481 	if (build_id_all)
2482 		inject.build_id_style = BID_RWS__INJECT_HEADER_ALL;
2483 
2484 	data.path = inject.input_name;
2485 
2486 	ordered_events = inject.jit_mode || inject.sched_stat ||
2487 		inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2488 		inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY;
2489 	perf_tool__init(&inject.tool, ordered_events);
2490 	inject.tool.sample		= perf_event__repipe_sample;
2491 	inject.tool.read		= perf_event__repipe_sample;
2492 	inject.tool.mmap		= perf_event__repipe;
2493 	inject.tool.mmap2		= perf_event__repipe;
2494 	inject.tool.comm		= perf_event__repipe;
2495 	inject.tool.namespaces		= perf_event__repipe;
2496 	inject.tool.cgroup		= perf_event__repipe;
2497 	inject.tool.fork		= perf_event__repipe;
2498 	inject.tool.exit		= perf_event__repipe;
2499 	inject.tool.lost		= perf_event__repipe;
2500 	inject.tool.lost_samples	= perf_event__repipe;
2501 	inject.tool.aux			= perf_event__repipe;
2502 	inject.tool.itrace_start	= perf_event__repipe;
2503 	inject.tool.aux_output_hw_id	= perf_event__repipe;
2504 	inject.tool.context_switch	= perf_event__repipe;
2505 	inject.tool.throttle		= perf_event__repipe;
2506 	inject.tool.unthrottle		= perf_event__repipe;
2507 	inject.tool.ksymbol		= perf_event__repipe;
2508 	inject.tool.bpf			= perf_event__repipe;
2509 	inject.tool.text_poke		= perf_event__repipe;
2510 	inject.tool.attr		= perf_event__repipe_attr;
2511 	inject.tool.event_update	= perf_event__repipe_event_update;
2512 	inject.tool.tracing_data	= perf_event__repipe_op2_synth;
2513 	inject.tool.finished_round	= perf_event__repipe_oe_synth;
2514 	inject.tool.build_id		= perf_event__repipe_op2_synth;
2515 	inject.tool.id_index		= perf_event__repipe_op2_synth;
2516 	inject.tool.auxtrace_info	= perf_event__repipe_op2_synth;
2517 	inject.tool.auxtrace_error	= perf_event__repipe_op2_synth;
2518 	inject.tool.time_conv		= perf_event__repipe_op2_synth;
2519 	inject.tool.thread_map		= perf_event__repipe_op2_synth;
2520 	inject.tool.cpu_map		= perf_event__repipe_op2_synth;
2521 	inject.tool.stat_config		= perf_event__repipe_op2_synth;
2522 	inject.tool.stat		= perf_event__repipe_op2_synth;
2523 	inject.tool.stat_round		= perf_event__repipe_op2_synth;
2524 	inject.tool.feature		= perf_event__repipe_op2_synth;
2525 	inject.tool.finished_init	= perf_event__repipe_op2_synth;
2526 	inject.tool.compressed		= perf_event__repipe_op4_synth;
2527 	inject.tool.auxtrace		= perf_event__repipe_auxtrace;
2528 	inject.tool.bpf_metadata	= perf_event__repipe_op2_synth;
2529 	inject.tool.dont_split_sample_group = true;
2530 	inject.tool.merge_deferred_callchains = false;
2531 	inject.session = __perf_session__new(&data, &inject.tool,
2532 					     /*trace_event_repipe=*/inject.output.is_pipe,
2533 					     /*host_env=*/NULL);
2534 
2535 	if (IS_ERR(inject.session)) {
2536 		ret = PTR_ERR(inject.session);
2537 		goto out_close_output;
2538 	}
2539 
2540 	if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2541 		pr_warning("Decompression initialization failed.\n");
2542 
2543 	/* Save original section info before feature bits change */
2544 	ret = save_section_info(&inject);
2545 	if (ret)
2546 		goto out_delete;
2547 
2548 	if (inject.output.is_pipe) {
2549 		ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2550 		if (ret < 0) {
2551 			pr_err("Couldn't write a new pipe header.\n");
2552 			goto out_delete;
2553 		}
2554 
2555 		/*
2556 		 * If the input is already a pipe then the features and
2557 		 * attributes don't need synthesizing, they will be present in
2558 		 * the input.
2559 		 */
2560 		if (!data.is_pipe) {
2561 			ret = perf_event__synthesize_for_pipe(&inject.tool,
2562 							      inject.session,
2563 							      &inject.output,
2564 							      perf_event__repipe);
2565 			if (ret < 0)
2566 				goto out_delete;
2567 		}
2568 	}
2569 
2570 	if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2571 	    inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2572 		/*
2573 		 * to make sure the mmap records are ordered correctly
2574 		 * and so that the correct especially due to jitted code
2575 		 * mmaps. We cannot generate the buildid hit list and
2576 		 * inject the jit mmaps at the same time for now.
2577 		 */
2578 		inject.tool.ordering_requires_timestamps = true;
2579 	}
2580 	if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) {
2581 		inject.known_build_ids =
2582 			perf_inject__parse_known_build_ids(known_build_ids);
2583 
2584 		if (inject.known_build_ids == NULL) {
2585 			pr_err("Couldn't parse known build ids.\n");
2586 			goto out_delete;
2587 		}
2588 	}
2589 
2590 #ifdef HAVE_JITDUMP
2591 	if (inject.jit_mode) {
2592 		inject.tool.mmap2	   = perf_event__repipe_mmap2;
2593 		inject.tool.mmap	   = perf_event__repipe_mmap;
2594 		inject.tool.ordering_requires_timestamps = true;
2595 		/*
2596 		 * JIT MMAP injection injects all MMAP events in one go, so it
2597 		 * does not obey finished_round semantics.
2598 		 */
2599 		inject.tool.finished_round = perf_event__drop_oe;
2600 	}
2601 #endif
2602 	ret = symbol__init(perf_session__env(inject.session));
2603 	if (ret < 0)
2604 		goto out_delete;
2605 
2606 	ret = __cmd_inject(&inject);
2607 
2608 	guest_session__exit(&inject.guest_session);
2609 
2610 out_delete:
2611 	strlist__delete(inject.known_build_ids);
2612 	zstd_fini(&(inject.session->zstd_data));
2613 	perf_session__delete(inject.session);
2614 out_close_output:
2615 	if (!inject.in_place_update)
2616 		perf_data__close(&inject.output);
2617 	free(inject.itrace_synth_opts.vm_tm_corr_args);
2618 	free(inject.event_copy);
2619 	free(inject.guest_session.ev.event_buf);
2620 	return ret;
2621 }
2622