xref: /linux/tools/perf/util/session.c (revision 80b549be27de0f11124c66eaeb5307c7b4582edd)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <signal.h>
4 #include <inttypes.h>
5 #include <linux/err.h>
6 #include <linux/kernel.h>
7 #include <linux/zalloc.h>
8 #include <api/fs/fs.h>
9 
10 #include <byteswap.h>
11 #include <unistd.h>
12 #include <sys/types.h>
13 #include <sys/mman.h>
14 #include <perf/cpumap.h>
15 #include <perf/event.h>
16 
17 #include "map_symbol.h"
18 #include "branch.h"
19 #include "debug.h"
20 #include "dwarf-regs.h"
21 #include "env.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "memswap.h"
25 #include "map.h"
26 #include "symbol.h"
27 #include "session.h"
28 #include "tool.h"
29 #include "perf_regs.h"
30 #include "asm/bug.h"
31 #include "auxtrace.h"
32 #include "thread.h"
33 #include "thread-stack.h"
34 #include "sample-raw.h"
35 #include "stat.h"
36 #include "tsc.h"
37 #include "ui/progress.h"
38 #include "util.h"
39 #include "arch/common.h"
40 #include "units.h"
41 #include "annotate.h"
42 #include "perf.h"
43 #include <internal/lib.h>
44 
45 static int perf_session__deliver_event(struct perf_session *session,
46 				       union perf_event *event,
47 				       const struct perf_tool *tool,
48 				       u64 file_offset,
49 				       const char *file_path);
50 
51 static int perf_session__open(struct perf_session *session)
52 {
53 	struct perf_data *data = session->data;
54 
55 	if (perf_session__read_header(session) < 0) {
56 		pr_err("incompatible file format (rerun with -v to learn more)\n");
57 		return -1;
58 	}
59 
60 	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) {
61 		/* Auxiliary events may reference exited threads, hold onto dead ones. */
62 		symbol_conf.keep_exited_threads = true;
63 	}
64 
65 	if (perf_data__is_pipe(data))
66 		return 0;
67 
68 	if (perf_header__has_feat(&session->header, HEADER_STAT))
69 		return 0;
70 
71 	if (!evlist__valid_sample_type(session->evlist)) {
72 		pr_err("non matching sample_type\n");
73 		return -1;
74 	}
75 
76 	if (!evlist__valid_sample_id_all(session->evlist)) {
77 		pr_err("non matching sample_id_all\n");
78 		return -1;
79 	}
80 
81 	if (!evlist__valid_read_format(session->evlist)) {
82 		pr_err("non matching read_format\n");
83 		return -1;
84 	}
85 
86 	return 0;
87 }
88 
89 void perf_session__set_id_hdr_size(struct perf_session *session)
90 {
91 	u16 id_hdr_size = evlist__id_hdr_size(session->evlist);
92 
93 	machines__set_id_hdr_size(&session->machines, id_hdr_size);
94 }
95 
96 int perf_session__create_kernel_maps(struct perf_session *session)
97 {
98 	int ret = machine__create_kernel_maps(&session->machines.host);
99 
100 	if (ret >= 0)
101 		ret = machines__create_guest_kernel_maps(&session->machines);
102 	return ret;
103 }
104 
105 static void perf_session__destroy_kernel_maps(struct perf_session *session)
106 {
107 	machines__destroy_kernel_maps(&session->machines);
108 }
109 
110 static bool perf_session__has_comm_exec(struct perf_session *session)
111 {
112 	struct evsel *evsel;
113 
114 	evlist__for_each_entry(session->evlist, evsel) {
115 		if (evsel->core.attr.comm_exec)
116 			return true;
117 	}
118 
119 	return false;
120 }
121 
122 static void perf_session__set_comm_exec(struct perf_session *session)
123 {
124 	bool comm_exec = perf_session__has_comm_exec(session);
125 
126 	machines__set_comm_exec(&session->machines, comm_exec);
127 }
128 
129 static int ordered_events__deliver_event(struct ordered_events *oe,
130 					 struct ordered_event *event)
131 {
132 	struct perf_session *session = container_of(oe, struct perf_session,
133 						    ordered_events);
134 	int ret =  perf_session__deliver_event(session, event->event,
135 					       session->tool, event->file_offset,
136 					       event->file_path);
137 
138 	if (ret) {
139 		pr_err("%#" PRIx64 " [%#x]: ordered event processing failed (%d) for event of type: %s (%d)\n",
140 			event->file_offset, event->event->header.size, ret,
141 			perf_event__name(event->event->header.type),
142 			event->event->header.type);
143 	}
144 	return ret;
145 }
146 
147 struct perf_session *__perf_session__new(struct perf_data *data,
148 					 struct perf_tool *tool,
149 					 bool trace_event_repipe,
150 					 struct perf_env *host_env)
151 {
152 	int ret = -ENOMEM;
153 	struct perf_session *session = zalloc(sizeof(*session));
154 
155 	if (!session)
156 		goto out;
157 
158 	session->trace_event_repipe = trace_event_repipe;
159 	session->tool   = tool;
160 	session->decomp_data.zstd_decomp = &session->zstd_data;
161 	session->active_decomp = &session->decomp_data;
162 	INIT_LIST_HEAD(&session->auxtrace_index);
163 	machines__init(&session->machines);
164 	ordered_events__init(&session->ordered_events,
165 			     ordered_events__deliver_event, NULL);
166 
167 	perf_env__init(&session->header.env);
168 	if (data) {
169 		ret = perf_data__open(data);
170 		if (ret < 0)
171 			goto out_delete;
172 
173 		session->data = data;
174 
175 		if (perf_data__is_read(data)) {
176 			ret = perf_session__open(session);
177 			if (ret < 0)
178 				goto out_delete;
179 
180 			/*
181 			 * set session attributes that are present in perf.data
182 			 * but not in pipe-mode.
183 			 */
184 			if (!data->is_pipe) {
185 				perf_session__set_id_hdr_size(session);
186 				perf_session__set_comm_exec(session);
187 			}
188 
189 			evlist__init_trace_event_sample_raw(session->evlist, &session->header.env);
190 
191 			/* Open the directory data. */
192 			if (data->is_dir) {
193 				ret = perf_data__open_dir(data);
194 				if (ret)
195 					goto out_delete;
196 			}
197 
198 			if (!symbol_conf.kallsyms_name &&
199 			    !symbol_conf.vmlinux_name)
200 				symbol_conf.kallsyms_name = perf_data__kallsyms_name(data);
201 		}
202 	} else  {
203 		assert(host_env != NULL);
204 		session->machines.host.env = host_env;
205 	}
206 	if (session->evlist)
207 		session->evlist->session = session;
208 
209 	session->machines.host.single_address_space =
210 		perf_env__single_address_space(session->machines.host.env);
211 
212 	if (!data || perf_data__is_write(data)) {
213 		/*
214 		 * In O_RDONLY mode this will be performed when reading the
215 		 * kernel MMAP event, in perf_event__process_mmap().
216 		 */
217 		if (perf_session__create_kernel_maps(session) < 0)
218 			pr_warning("Cannot read kernel map\n");
219 	}
220 
221 	/*
222 	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
223 	 * processed, so evlist__sample_id_all is not meaningful here.
224 	 */
225 	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
226 	    tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
227 		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
228 		tool->ordered_events = false;
229 	}
230 
231 	return session;
232 
233  out_delete:
234 	perf_session__delete(session);
235  out:
236 	return ERR_PTR(ret);
237 }
238 
239 static void perf_decomp__release_events(struct decomp *next)
240 {
241 	struct decomp *decomp;
242 	size_t mmap_len;
243 
244 	do {
245 		decomp = next;
246 		if (decomp == NULL)
247 			break;
248 		next = decomp->next;
249 		mmap_len = decomp->mmap_len;
250 		munmap(decomp, mmap_len);
251 	} while (1);
252 }
253 
254 void perf_session__delete(struct perf_session *session)
255 {
256 	if (session == NULL)
257 		return;
258 	auxtrace__free(session);
259 	auxtrace_index__free(&session->auxtrace_index);
260 	debuginfo_cache__delete();
261 	perf_session__destroy_kernel_maps(session);
262 	perf_decomp__release_events(session->decomp_data.decomp);
263 	perf_env__exit(&session->header.env);
264 	machines__exit(&session->machines);
265 	if (session->data) {
266 		if (perf_data__is_read(session->data))
267 			evlist__delete(session->evlist);
268 		perf_data__close(session->data);
269 	}
270 #ifdef HAVE_LIBTRACEEVENT
271 	trace_event__cleanup(&session->tevent);
272 #endif
273 	free(session);
274 }
275 
276 static void swap_sample_id_all(union perf_event *event, void *data)
277 {
278 	void *end = (void *) event + event->header.size;
279 	int size = end - data;
280 
281 	BUG_ON(size % sizeof(u64));
282 	mem_bswap_64(data, size);
283 }
284 
285 static void perf_event__all64_swap(union perf_event *event,
286 				   bool sample_id_all __maybe_unused)
287 {
288 	struct perf_event_header *hdr = &event->header;
289 	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
290 }
291 
292 static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
293 {
294 	event->comm.pid = bswap_32(event->comm.pid);
295 	event->comm.tid = bswap_32(event->comm.tid);
296 
297 	if (sample_id_all) {
298 		void *data = &event->comm.comm;
299 
300 		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
301 		swap_sample_id_all(event, data);
302 	}
303 }
304 
305 static void perf_event__mmap_swap(union perf_event *event,
306 				  bool sample_id_all)
307 {
308 	event->mmap.pid	  = bswap_32(event->mmap.pid);
309 	event->mmap.tid	  = bswap_32(event->mmap.tid);
310 	event->mmap.start = bswap_64(event->mmap.start);
311 	event->mmap.len	  = bswap_64(event->mmap.len);
312 	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
313 
314 	if (sample_id_all) {
315 		void *data = &event->mmap.filename;
316 
317 		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
318 		swap_sample_id_all(event, data);
319 	}
320 }
321 
322 static void perf_event__mmap2_swap(union perf_event *event,
323 				  bool sample_id_all)
324 {
325 	event->mmap2.pid   = bswap_32(event->mmap2.pid);
326 	event->mmap2.tid   = bswap_32(event->mmap2.tid);
327 	event->mmap2.start = bswap_64(event->mmap2.start);
328 	event->mmap2.len   = bswap_64(event->mmap2.len);
329 	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
330 
331 	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
332 		event->mmap2.maj   = bswap_32(event->mmap2.maj);
333 		event->mmap2.min   = bswap_32(event->mmap2.min);
334 		event->mmap2.ino   = bswap_64(event->mmap2.ino);
335 		event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
336 	}
337 
338 	if (sample_id_all) {
339 		void *data = &event->mmap2.filename;
340 
341 		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
342 		swap_sample_id_all(event, data);
343 	}
344 }
345 static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
346 {
347 	event->fork.pid	 = bswap_32(event->fork.pid);
348 	event->fork.tid	 = bswap_32(event->fork.tid);
349 	event->fork.ppid = bswap_32(event->fork.ppid);
350 	event->fork.ptid = bswap_32(event->fork.ptid);
351 	event->fork.time = bswap_64(event->fork.time);
352 
353 	if (sample_id_all)
354 		swap_sample_id_all(event, &event->fork + 1);
355 }
356 
357 static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
358 {
359 	event->read.pid		 = bswap_32(event->read.pid);
360 	event->read.tid		 = bswap_32(event->read.tid);
361 	event->read.value	 = bswap_64(event->read.value);
362 	event->read.time_enabled = bswap_64(event->read.time_enabled);
363 	event->read.time_running = bswap_64(event->read.time_running);
364 	event->read.id		 = bswap_64(event->read.id);
365 
366 	if (sample_id_all)
367 		swap_sample_id_all(event, &event->read + 1);
368 }
369 
370 static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
371 {
372 	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
373 	event->aux.aux_size   = bswap_64(event->aux.aux_size);
374 	event->aux.flags      = bswap_64(event->aux.flags);
375 
376 	if (sample_id_all)
377 		swap_sample_id_all(event, &event->aux + 1);
378 }
379 
380 static void perf_event__itrace_start_swap(union perf_event *event,
381 					  bool sample_id_all)
382 {
383 	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
384 	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);
385 
386 	if (sample_id_all)
387 		swap_sample_id_all(event, &event->itrace_start + 1);
388 }
389 
390 static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
391 {
392 	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
393 		event->context_switch.next_prev_pid =
394 				bswap_32(event->context_switch.next_prev_pid);
395 		event->context_switch.next_prev_tid =
396 				bswap_32(event->context_switch.next_prev_tid);
397 	}
398 
399 	if (sample_id_all)
400 		swap_sample_id_all(event, &event->context_switch + 1);
401 }
402 
403 static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
404 {
405 	event->text_poke.addr    = bswap_64(event->text_poke.addr);
406 	event->text_poke.old_len = bswap_16(event->text_poke.old_len);
407 	event->text_poke.new_len = bswap_16(event->text_poke.new_len);
408 
409 	if (sample_id_all) {
410 		size_t len = sizeof(event->text_poke.old_len) +
411 			     sizeof(event->text_poke.new_len) +
412 			     event->text_poke.old_len +
413 			     event->text_poke.new_len;
414 		void *data = &event->text_poke.old_len;
415 
416 		data += PERF_ALIGN(len, sizeof(u64));
417 		swap_sample_id_all(event, data);
418 	}
419 }
420 
421 static void perf_event__throttle_swap(union perf_event *event,
422 				      bool sample_id_all)
423 {
424 	event->throttle.time	  = bswap_64(event->throttle.time);
425 	event->throttle.id	  = bswap_64(event->throttle.id);
426 	event->throttle.stream_id = bswap_64(event->throttle.stream_id);
427 
428 	if (sample_id_all)
429 		swap_sample_id_all(event, &event->throttle + 1);
430 }
431 
432 static void perf_event__namespaces_swap(union perf_event *event,
433 					bool sample_id_all)
434 {
435 	u64 i;
436 
437 	event->namespaces.pid		= bswap_32(event->namespaces.pid);
438 	event->namespaces.tid		= bswap_32(event->namespaces.tid);
439 	event->namespaces.nr_namespaces	= bswap_64(event->namespaces.nr_namespaces);
440 
441 	for (i = 0; i < event->namespaces.nr_namespaces; i++) {
442 		struct perf_ns_link_info *ns = &event->namespaces.link_info[i];
443 
444 		ns->dev = bswap_64(ns->dev);
445 		ns->ino = bswap_64(ns->ino);
446 	}
447 
448 	if (sample_id_all)
449 		swap_sample_id_all(event, &event->namespaces.link_info[i]);
450 }
451 
452 static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all)
453 {
454 	event->cgroup.id = bswap_64(event->cgroup.id);
455 
456 	if (sample_id_all) {
457 		void *data = &event->cgroup.path;
458 
459 		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
460 		swap_sample_id_all(event, data);
461 	}
462 }
463 
464 static u8 revbyte(u8 b)
465 {
466 	int rev = (b >> 4) | ((b & 0xf) << 4);
467 	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
468 	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
469 	return (u8) rev;
470 }
471 
472 /*
473  * XXX this is hack in attempt to carry flags bitfield
474  * through endian village. ABI says:
475  *
476  * Bit-fields are allocated from right to left (least to most significant)
477  * on little-endian implementations and from left to right (most to least
478  * significant) on big-endian implementations.
479  *
480  * The above seems to be byte specific, so we need to reverse each
481  * byte of the bitfield. 'Internet' also says this might be implementation
482  * specific and we probably need proper fix and carry perf_event_attr
483  * bitfield flags in separate data file FEAT_ section. Thought this seems
484  * to work for now.
485  */
486 static void swap_bitfield(u8 *p, unsigned len)
487 {
488 	unsigned i;
489 
490 	for (i = 0; i < len; i++) {
491 		*p = revbyte(*p);
492 		p++;
493 	}
494 }
495 
496 /* exported for swapping attributes in file header */
497 void perf_event__attr_swap(struct perf_event_attr *attr)
498 {
499 	attr->type		= bswap_32(attr->type);
500 	attr->size		= bswap_32(attr->size);
501 
502 #define bswap_safe(f, n) 					\
503 	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
504 		       sizeof(attr->f) * (n)))
505 #define bswap_field(f, sz) 			\
506 do { 						\
507 	if (bswap_safe(f, 0))			\
508 		attr->f = bswap_##sz(attr->f);	\
509 } while(0)
510 #define bswap_field_16(f) bswap_field(f, 16)
511 #define bswap_field_32(f) bswap_field(f, 32)
512 #define bswap_field_64(f) bswap_field(f, 64)
513 
514 	bswap_field_64(config);
515 	bswap_field_64(sample_period);
516 	bswap_field_64(sample_type);
517 	bswap_field_64(read_format);
518 	bswap_field_32(wakeup_events);
519 	bswap_field_32(bp_type);
520 	bswap_field_64(bp_addr);
521 	bswap_field_64(bp_len);
522 	bswap_field_64(branch_sample_type);
523 	bswap_field_64(sample_regs_user);
524 	bswap_field_32(sample_stack_user);
525 	bswap_field_32(aux_watermark);
526 	bswap_field_16(sample_max_stack);
527 	bswap_field_32(aux_sample_size);
528 
529 	/*
530 	 * After read_format are bitfields. Check read_format because
531 	 * we are unable to use offsetof on bitfield.
532 	 */
533 	if (bswap_safe(read_format, 1))
534 		swap_bitfield((u8 *) (&attr->read_format + 1),
535 			      sizeof(u64));
536 #undef bswap_field_64
537 #undef bswap_field_32
538 #undef bswap_field
539 #undef bswap_safe
540 }
541 
542 static void perf_event__hdr_attr_swap(union perf_event *event,
543 				      bool sample_id_all __maybe_unused)
544 {
545 	size_t size;
546 
547 	perf_event__attr_swap(&event->attr.attr);
548 
549 	size = event->header.size;
550 	size -= perf_record_header_attr_id(event) - (void *)event;
551 	mem_bswap_64(perf_record_header_attr_id(event), size);
552 }
553 
554 static void perf_event__event_update_swap(union perf_event *event,
555 					  bool sample_id_all __maybe_unused)
556 {
557 	event->event_update.type = bswap_64(event->event_update.type);
558 	event->event_update.id   = bswap_64(event->event_update.id);
559 }
560 
561 static void perf_event__event_type_swap(union perf_event *event,
562 					bool sample_id_all __maybe_unused)
563 {
564 	event->event_type.event_type.event_id =
565 		bswap_64(event->event_type.event_type.event_id);
566 }
567 
568 static void perf_event__tracing_data_swap(union perf_event *event,
569 					  bool sample_id_all __maybe_unused)
570 {
571 	event->tracing_data.size = bswap_32(event->tracing_data.size);
572 }
573 
574 static void perf_event__auxtrace_info_swap(union perf_event *event,
575 					   bool sample_id_all __maybe_unused)
576 {
577 	size_t size;
578 
579 	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
580 
581 	size = event->header.size;
582 	size -= (void *)&event->auxtrace_info.priv - (void *)event;
583 	mem_bswap_64(event->auxtrace_info.priv, size);
584 }
585 
586 static void perf_event__auxtrace_swap(union perf_event *event,
587 				      bool sample_id_all __maybe_unused)
588 {
589 	event->auxtrace.size      = bswap_64(event->auxtrace.size);
590 	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
591 	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
592 	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
593 	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
594 	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
595 }
596 
597 static void perf_event__auxtrace_error_swap(union perf_event *event,
598 					    bool sample_id_all __maybe_unused)
599 {
600 	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
601 	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
602 	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
603 	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
604 	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
605 	event->auxtrace_error.fmt  = bswap_32(event->auxtrace_error.fmt);
606 	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
607 	if (event->auxtrace_error.fmt)
608 		event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
609 	if (event->auxtrace_error.fmt >= 2) {
610 		event->auxtrace_error.machine_pid = bswap_32(event->auxtrace_error.machine_pid);
611 		event->auxtrace_error.vcpu = bswap_32(event->auxtrace_error.vcpu);
612 	}
613 }
614 
615 static void perf_event__thread_map_swap(union perf_event *event,
616 					bool sample_id_all __maybe_unused)
617 {
618 	unsigned i;
619 
620 	event->thread_map.nr = bswap_64(event->thread_map.nr);
621 
622 	for (i = 0; i < event->thread_map.nr; i++)
623 		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
624 }
625 
626 static void perf_event__cpu_map_swap(union perf_event *event,
627 				     bool sample_id_all __maybe_unused)
628 {
629 	struct perf_record_cpu_map_data *data = &event->cpu_map.data;
630 
631 	data->type = bswap_16(data->type);
632 
633 	switch (data->type) {
634 	case PERF_CPU_MAP__CPUS:
635 		data->cpus_data.nr = bswap_16(data->cpus_data.nr);
636 
637 		for (unsigned i = 0; i < data->cpus_data.nr; i++)
638 			data->cpus_data.cpu[i] = bswap_16(data->cpus_data.cpu[i]);
639 		break;
640 	case PERF_CPU_MAP__MASK:
641 		data->mask32_data.long_size = bswap_16(data->mask32_data.long_size);
642 
643 		switch (data->mask32_data.long_size) {
644 		case 4:
645 			data->mask32_data.nr = bswap_16(data->mask32_data.nr);
646 			for (unsigned i = 0; i < data->mask32_data.nr; i++)
647 				data->mask32_data.mask[i] = bswap_32(data->mask32_data.mask[i]);
648 			break;
649 		case 8:
650 			data->mask64_data.nr = bswap_16(data->mask64_data.nr);
651 			for (unsigned i = 0; i < data->mask64_data.nr; i++)
652 				data->mask64_data.mask[i] = bswap_64(data->mask64_data.mask[i]);
653 			break;
654 		default:
655 			pr_err("cpu_map swap: unsupported long size\n");
656 		}
657 		break;
658 	case PERF_CPU_MAP__RANGE_CPUS:
659 		data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
660 		data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu);
661 		break;
662 	default:
663 		break;
664 	}
665 }
666 
667 static void perf_event__stat_config_swap(union perf_event *event,
668 					 bool sample_id_all __maybe_unused)
669 {
670 	u64 size;
671 
672 	size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
673 	size += 1; /* nr item itself */
674 	mem_bswap_64(&event->stat_config.nr, size);
675 }
676 
677 static void perf_event__stat_swap(union perf_event *event,
678 				  bool sample_id_all __maybe_unused)
679 {
680 	event->stat.id     = bswap_64(event->stat.id);
681 	event->stat.thread = bswap_32(event->stat.thread);
682 	event->stat.cpu    = bswap_32(event->stat.cpu);
683 	event->stat.val    = bswap_64(event->stat.val);
684 	event->stat.ena    = bswap_64(event->stat.ena);
685 	event->stat.run    = bswap_64(event->stat.run);
686 }
687 
688 static void perf_event__stat_round_swap(union perf_event *event,
689 					bool sample_id_all __maybe_unused)
690 {
691 	event->stat_round.type = bswap_64(event->stat_round.type);
692 	event->stat_round.time = bswap_64(event->stat_round.time);
693 }
694 
695 static void perf_event__time_conv_swap(union perf_event *event,
696 				       bool sample_id_all __maybe_unused)
697 {
698 	event->time_conv.time_shift = bswap_64(event->time_conv.time_shift);
699 	event->time_conv.time_mult  = bswap_64(event->time_conv.time_mult);
700 	event->time_conv.time_zero  = bswap_64(event->time_conv.time_zero);
701 
702 	if (event_contains(event->time_conv, time_cycles)) {
703 		event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles);
704 		event->time_conv.time_mask = bswap_64(event->time_conv.time_mask);
705 	}
706 }
707 
708 static void
709 perf_event__schedstat_cpu_swap(union perf_event *event __maybe_unused,
710 			       bool sample_id_all __maybe_unused)
711 {
712 	/* FIXME */
713 }
714 
715 static void
716 perf_event__schedstat_domain_swap(union perf_event *event __maybe_unused,
717 				  bool sample_id_all __maybe_unused)
718 {
719 	/* FIXME */
720 }
721 
722 typedef void (*perf_event__swap_op)(union perf_event *event,
723 				    bool sample_id_all);
724 
725 static perf_event__swap_op perf_event__swap_ops[] = {
726 	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
727 	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap,
728 	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
729 	[PERF_RECORD_FORK]		  = perf_event__task_swap,
730 	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
731 	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
732 	[PERF_RECORD_READ]		  = perf_event__read_swap,
733 	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
734 	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
735 	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
736 	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
737 	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
738 	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
739 	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
740 	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
741 	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
742 	[PERF_RECORD_CGROUP]		  = perf_event__cgroup_swap,
743 	[PERF_RECORD_TEXT_POKE]		  = perf_event__text_poke_swap,
744 	[PERF_RECORD_AUX_OUTPUT_HW_ID]	  = perf_event__all64_swap,
745 	[PERF_RECORD_CALLCHAIN_DEFERRED]  = perf_event__all64_swap,
746 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
747 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
748 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
749 	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
750 	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
751 	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
752 	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
753 	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
754 	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
755 	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
756 	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
757 	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
758 	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
759 	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
760 	[PERF_RECORD_TIME_CONV]		  = perf_event__time_conv_swap,
761 	[PERF_RECORD_SCHEDSTAT_CPU]	  = perf_event__schedstat_cpu_swap,
762 	[PERF_RECORD_SCHEDSTAT_DOMAIN]	  = perf_event__schedstat_domain_swap,
763 	[PERF_RECORD_HEADER_MAX]	  = NULL,
764 };
765 
766 /*
767  * When perf record finishes a pass on every buffers, it records this pseudo
768  * event.
769  * We record the max timestamp t found in the pass n.
770  * Assuming these timestamps are monotonic across cpus, we know that if
771  * a buffer still has events with timestamps below t, they will be all
772  * available and then read in the pass n + 1.
773  * Hence when we start to read the pass n + 2, we can safely flush every
774  * events with timestamps below t.
775  *
776  *    ============ PASS n =================
777  *       CPU 0         |   CPU 1
778  *                     |
779  *    cnt1 timestamps  |   cnt2 timestamps
780  *          1          |         2
781  *          2          |         3
782  *          -          |         4  <--- max recorded
783  *
784  *    ============ PASS n + 1 ==============
785  *       CPU 0         |   CPU 1
786  *                     |
787  *    cnt1 timestamps  |   cnt2 timestamps
788  *          3          |         5
789  *          4          |         6
790  *          5          |         7 <---- max recorded
791  *
792  *      Flush every events below timestamp 4
793  *
794  *    ============ PASS n + 2 ==============
795  *       CPU 0         |   CPU 1
796  *                     |
797  *    cnt1 timestamps  |   cnt2 timestamps
798  *          6          |         8
799  *          7          |         9
800  *          -          |         10
801  *
802  *      Flush every events below timestamp 7
803  *      etc...
804  */
805 int perf_event__process_finished_round(const struct perf_tool *tool __maybe_unused,
806 				       union perf_event *event __maybe_unused,
807 				       struct ordered_events *oe)
808 {
809 	if (dump_trace)
810 		fprintf(stdout, "\n");
811 	return ordered_events__flush(oe, OE_FLUSH__ROUND);
812 }
813 
814 int perf_session__queue_event(struct perf_session *s, union perf_event *event,
815 			      u64 timestamp, u64 file_offset, const char *file_path)
816 {
817 	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset, file_path);
818 }
819 
820 static void callchain__lbr_callstack_printf(struct perf_sample *sample)
821 {
822 	struct ip_callchain *callchain = sample->callchain;
823 	struct branch_stack *lbr_stack = sample->branch_stack;
824 	struct branch_entry *entries = perf_sample__branch_entries(sample);
825 	u64 kernel_callchain_nr = callchain->nr;
826 	unsigned int i;
827 
828 	for (i = 0; i < kernel_callchain_nr; i++) {
829 		if (callchain->ips[i] == PERF_CONTEXT_USER)
830 			break;
831 	}
832 
833 	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
834 		u64 total_nr;
835 		/*
836 		 * LBR callstack can only get user call chain,
837 		 * i is kernel call chain number,
838 		 * 1 is PERF_CONTEXT_USER.
839 		 *
840 		 * The user call chain is stored in LBR registers.
841 		 * LBR are pair registers. The caller is stored
842 		 * in "from" register, while the callee is stored
843 		 * in "to" register.
844 		 * For example, there is a call stack
845 		 * "A"->"B"->"C"->"D".
846 		 * The LBR registers will be recorded like
847 		 * "C"->"D", "B"->"C", "A"->"B".
848 		 * So only the first "to" register and all "from"
849 		 * registers are needed to construct the whole stack.
850 		 */
851 		total_nr = i + 1 + lbr_stack->nr + 1;
852 		kernel_callchain_nr = i + 1;
853 
854 		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);
855 
856 		for (i = 0; i < kernel_callchain_nr; i++)
857 			printf("..... %2d: %016" PRIx64 "\n",
858 			       i, callchain->ips[i]);
859 
860 		printf("..... %2d: %016" PRIx64 "\n",
861 		       (int)(kernel_callchain_nr), entries[0].to);
862 		for (i = 0; i < lbr_stack->nr; i++)
863 			printf("..... %2d: %016" PRIx64 "\n",
864 			       (int)(i + kernel_callchain_nr + 1), entries[i].from);
865 	}
866 }
867 
868 static const char *callchain_context_str(u64 ip)
869 {
870 	switch (ip) {
871 	case PERF_CONTEXT_HV:
872 		return " (PERF_CONTEXT_HV)";
873 	case PERF_CONTEXT_KERNEL:
874 		return " (PERF_CONTEXT_KERNEL)";
875 	case PERF_CONTEXT_USER:
876 		return " (PERF_CONTEXT_USER)";
877 	case PERF_CONTEXT_GUEST:
878 		return " (PERF_CONTEXT_GUEST)";
879 	case PERF_CONTEXT_GUEST_KERNEL:
880 		return " (PERF_CONTEXT_GUEST_KERNEL)";
881 	case PERF_CONTEXT_GUEST_USER:
882 		return " (PERF_CONTEXT_GUEST_USER)";
883 	case PERF_CONTEXT_USER_DEFERRED:
884 		return " (PERF_CONTEXT_USER_DEFERRED)";
885 	default:
886 		return "";
887 	}
888 }
889 
890 static void callchain__printf(struct evsel *evsel,
891 			      struct perf_sample *sample)
892 {
893 	unsigned int i;
894 	struct ip_callchain *callchain = sample->callchain;
895 
896 	if (evsel__has_branch_callstack(evsel))
897 		callchain__lbr_callstack_printf(sample);
898 
899 	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
900 
901 	for (i = 0; i < callchain->nr; i++)
902 		printf("..... %2d: %016" PRIx64 "%s\n",
903 		       i, callchain->ips[i],
904 		       callchain_context_str(callchain->ips[i]));
905 
906 	if (sample->deferred_callchain)
907 		printf("...... (deferred)\n");
908 }
909 
910 static void branch_stack__printf(struct perf_sample *sample,
911 				 struct evsel *evsel)
912 {
913 	struct branch_entry *entries = perf_sample__branch_entries(sample);
914 	bool callstack = evsel__has_branch_callstack(evsel);
915 	u64 *branch_stack_cntr = sample->branch_stack_cntr;
916 	uint64_t i;
917 
918 	if (!callstack) {
919 		printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
920 	} else {
921 		/* the reason of adding 1 to nr is because after expanding
922 		 * branch stack it generates nr + 1 callstack records. e.g.,
923 		 *         B()->C()
924 		 *         A()->B()
925 		 * the final callstack should be:
926 		 *         C()
927 		 *         B()
928 		 *         A()
929 		 */
930 		printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
931 	}
932 
933 	for (i = 0; i < sample->branch_stack->nr; i++) {
934 		struct branch_entry *e = &entries[i];
935 
936 		if (!callstack) {
937 			printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n",
938 				i, e->from, e->to,
939 				(unsigned short)e->flags.cycles,
940 				e->flags.mispred ? "M" : " ",
941 				e->flags.predicted ? "P" : " ",
942 				e->flags.abort ? "A" : " ",
943 				e->flags.in_tx ? "T" : " ",
944 				(unsigned)e->flags.reserved,
945 				get_branch_type(e),
946 				e->flags.spec ? branch_spec_desc(e->flags.spec) : "");
947 		} else {
948 			if (i == 0) {
949 				printf("..... %2"PRIu64": %016" PRIx64 "\n"
950 				       "..... %2"PRIu64": %016" PRIx64 "\n",
951 						i, e->to, i+1, e->from);
952 			} else {
953 				printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
954 			}
955 		}
956 	}
957 
958 	if (branch_stack_cntr) {
959 		unsigned int br_cntr_width, br_cntr_nr;
960 
961 		perf_env__find_br_cntr_info(evsel__env(evsel), &br_cntr_nr, &br_cntr_width);
962 		printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
963 			sample->branch_stack->nr, br_cntr_width, br_cntr_nr);
964 		for (i = 0; i < sample->branch_stack->nr; i++)
965 			printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
966 	}
967 }
968 
969 static void regs_dump__printf(u64 mask, u64 *regs, uint16_t e_machine, uint32_t e_flags)
970 {
971 	unsigned rid, i = 0;
972 
973 	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
974 		u64 val = regs[i++];
975 
976 		printf(".... %-5s 0x%016" PRIx64 "\n",
977 		       perf_reg_name(rid, e_machine, e_flags), val);
978 	}
979 }
980 
981 static const char *regs_abi[] = {
982 	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
983 	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
984 	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
985 };
986 
987 static inline const char *regs_dump_abi(struct regs_dump *d)
988 {
989 	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
990 		return "unknown";
991 
992 	return regs_abi[d->abi];
993 }
994 
995 static void regs__printf(const char *type, struct regs_dump *regs,
996 			 uint16_t e_machine, uint32_t e_flags)
997 {
998 	u64 mask = regs->mask;
999 
1000 	printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
1001 	       type,
1002 	       mask,
1003 	       regs_dump_abi(regs));
1004 
1005 	regs_dump__printf(mask, regs->regs, e_machine, e_flags);
1006 }
1007 
1008 static void regs_user__printf(struct perf_sample *sample, uint16_t e_machine, uint32_t e_flags)
1009 {
1010 	struct regs_dump *user_regs;
1011 
1012 	if (!sample->user_regs)
1013 		return;
1014 
1015 	user_regs = perf_sample__user_regs(sample);
1016 
1017 	if (user_regs->regs)
1018 		regs__printf("user", user_regs, e_machine, e_flags);
1019 }
1020 
1021 static void regs_intr__printf(struct perf_sample *sample, uint16_t e_machine, uint32_t e_flags)
1022 {
1023 	struct regs_dump *intr_regs;
1024 
1025 	if (!sample->intr_regs)
1026 		return;
1027 
1028 	intr_regs = perf_sample__intr_regs(sample);
1029 
1030 	if (intr_regs->regs)
1031 		regs__printf("intr", intr_regs, e_machine, e_flags);
1032 }
1033 
1034 static void stack_user__printf(struct stack_dump *dump)
1035 {
1036 	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
1037 	       dump->size, dump->offset);
1038 }
1039 
1040 static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
1041 {
1042 	u64 sample_type = __evlist__combined_sample_type(evlist);
1043 
1044 	if (event->header.type != PERF_RECORD_SAMPLE &&
1045 	    !evlist__sample_id_all(evlist)) {
1046 		fputs("-1 -1 ", stdout);
1047 		return;
1048 	}
1049 
1050 	if ((sample_type & PERF_SAMPLE_CPU))
1051 		printf("%u ", sample->cpu);
1052 
1053 	if (sample_type & PERF_SAMPLE_TIME)
1054 		printf("%" PRIu64 " ", sample->time);
1055 }
1056 
1057 static void sample_read__printf(struct perf_sample *sample, u64 read_format)
1058 {
1059 	printf("... sample_read:\n");
1060 
1061 	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1062 		printf("...... time enabled %016" PRIx64 "\n",
1063 		       sample->read.time_enabled);
1064 
1065 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1066 		printf("...... time running %016" PRIx64 "\n",
1067 		       sample->read.time_running);
1068 
1069 	if (read_format & PERF_FORMAT_GROUP) {
1070 		struct sample_read_value *value = sample->read.group.values;
1071 
1072 		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
1073 
1074 		sample_read_group__for_each(value, sample->read.group.nr, read_format) {
1075 			printf("..... id %016" PRIx64
1076 			       ", value %016" PRIx64,
1077 			       value->id, value->value);
1078 			if (read_format & PERF_FORMAT_LOST)
1079 				printf(", lost %" PRIu64, value->lost);
1080 			printf("\n");
1081 		}
1082 	} else {
1083 		printf("..... id %016" PRIx64 ", value %016" PRIx64,
1084 			sample->read.one.id, sample->read.one.value);
1085 		if (read_format & PERF_FORMAT_LOST)
1086 			printf(", lost %" PRIu64, sample->read.one.lost);
1087 		printf("\n");
1088 	}
1089 }
1090 
1091 static void dump_event(struct evlist *evlist, union perf_event *event,
1092 		       u64 file_offset, struct perf_sample *sample,
1093 		       const char *file_path)
1094 {
1095 	if (!dump_trace)
1096 		return;
1097 
1098 	printf("\n%#" PRIx64 "@%s [%#x]: event: %d\n",
1099 	       file_offset, file_path, event->header.size, event->header.type);
1100 
1101 	trace_event(event);
1102 	if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
1103 		evlist->trace_event_sample_raw(evlist, event, sample);
1104 
1105 	if (sample)
1106 		evlist__print_tstamp(evlist, event, sample);
1107 
1108 	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
1109 	       event->header.size, perf_event__name(event->header.type));
1110 }
1111 
1112 char *get_page_size_name(u64 size, char *str)
1113 {
1114 	if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size))
1115 		snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A");
1116 
1117 	return str;
1118 }
1119 
1120 static void dump_sample(struct machine *machine, struct evsel *evsel, union perf_event *event,
1121 			struct perf_sample *sample)
1122 {
1123 	u64 sample_type;
1124 	char str[PAGE_SIZE_NAME_LEN];
1125 	uint16_t e_machine = EM_NONE;
1126 	uint32_t e_flags = 0;
1127 
1128 	if (!dump_trace)
1129 		return;
1130 
1131 	sample_type = evsel->core.attr.sample_type;
1132 
1133 	if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR)) {
1134 		struct thread *thread = machine__find_thread(machine, sample->pid, sample->pid);
1135 
1136 		e_machine = thread__e_machine(thread, machine, &e_flags);
1137 	}
1138 
1139 	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
1140 	       event->header.misc, sample->pid, sample->tid, sample->ip,
1141 	       sample->period, sample->addr);
1142 
1143 	if (evsel__has_callchain(evsel))
1144 		callchain__printf(evsel, sample);
1145 
1146 	if (evsel__has_br_stack(evsel))
1147 		branch_stack__printf(sample, evsel);
1148 
1149 	if (sample_type & PERF_SAMPLE_REGS_USER)
1150 		regs_user__printf(sample, e_machine, e_flags);
1151 
1152 	if (sample_type & PERF_SAMPLE_REGS_INTR)
1153 		regs_intr__printf(sample, e_machine, e_flags);
1154 
1155 	if (sample_type & PERF_SAMPLE_STACK_USER)
1156 		stack_user__printf(&sample->user_stack);
1157 
1158 	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1159 		printf("... weight: %" PRIu64 "", sample->weight);
1160 			if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
1161 				printf(",0x%"PRIx16"", sample->ins_lat);
1162 				printf(",0x%"PRIx16"", sample->weight3);
1163 			}
1164 		printf("\n");
1165 	}
1166 
1167 	if (sample_type & PERF_SAMPLE_DATA_SRC)
1168 		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
1169 
1170 	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
1171 		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
1172 
1173 	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
1174 		printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));
1175 
1176 	if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
1177 		printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str));
1178 
1179 	if (sample_type & PERF_SAMPLE_TRANSACTION)
1180 		printf("... transaction: %" PRIx64 "\n", sample->transaction);
1181 
1182 	if (sample_type & PERF_SAMPLE_READ)
1183 		sample_read__printf(sample, evsel->core.attr.read_format);
1184 }
1185 
1186 static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
1187 				    struct perf_sample *sample)
1188 {
1189 	if (!dump_trace)
1190 		return;
1191 
1192 	printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
1193 	       event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
1194 
1195 	if (evsel__has_callchain(evsel))
1196 		callchain__printf(evsel, sample);
1197 }
1198 
1199 static void dump_read(struct evsel *evsel, union perf_event *event)
1200 {
1201 	struct perf_record_read *read_event = &event->read;
1202 	u64 read_format;
1203 
1204 	if (!dump_trace)
1205 		return;
1206 
1207 	printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
1208 	       evsel__name(evsel), event->read.value);
1209 
1210 	if (!evsel)
1211 		return;
1212 
1213 	read_format = evsel->core.attr.read_format;
1214 
1215 	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1216 		printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
1217 
1218 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1219 		printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
1220 
1221 	if (read_format & PERF_FORMAT_ID)
1222 		printf("... id           : %" PRI_lu64 "\n", read_event->id);
1223 
1224 	if (read_format & PERF_FORMAT_LOST)
1225 		printf("... lost         : %" PRI_lu64 "\n", read_event->lost);
1226 }
1227 
1228 static struct machine *machines__find_for_cpumode(struct machines *machines,
1229 					       union perf_event *event,
1230 					       struct perf_sample *sample)
1231 {
1232 	if (perf_guest &&
1233 	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
1234 	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
1235 		u32 pid;
1236 
1237 		if (sample->machine_pid)
1238 			pid = sample->machine_pid;
1239 		else if (event->header.type == PERF_RECORD_MMAP
1240 		    || event->header.type == PERF_RECORD_MMAP2)
1241 			pid = event->mmap.pid;
1242 		else
1243 			pid = sample->pid;
1244 
1245 		/*
1246 		 * Guest code machine is created as needed and does not use
1247 		 * DEFAULT_GUEST_KERNEL_ID.
1248 		 */
1249 		if (symbol_conf.guest_code)
1250 			return machines__findnew(machines, pid);
1251 
1252 		return machines__find_guest(machines, pid);
1253 	}
1254 
1255 	return &machines->host;
1256 }
1257 
1258 static int deliver_sample_value(struct evlist *evlist,
1259 				const struct perf_tool *tool,
1260 				union perf_event *event,
1261 				struct perf_sample *sample,
1262 				struct sample_read_value *v,
1263 				struct machine *machine,
1264 				bool per_thread)
1265 {
1266 	struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
1267 	struct evsel *saved_evsel = sample->evsel;
1268 	u64 *storage = NULL;
1269 	int ret;
1270 
1271 	if (sid) {
1272 		storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread);
1273 	}
1274 
1275 	if (storage) {
1276 		sample->id     = v->id;
1277 		sample->period = v->value - *storage;
1278 		*storage       = v->value;
1279 	}
1280 
1281 	if (!storage || sid->evsel == NULL) {
1282 		++evlist->stats.nr_unknown_id;
1283 		return 0;
1284 	}
1285 
1286 	/*
1287 	 * There's no reason to deliver sample
1288 	 * for zero period, bail out.
1289 	 */
1290 	if (!sample->period)
1291 		return 0;
1292 
1293 	sample->evsel = container_of(sid->evsel, struct evsel, core);
1294 	ret = tool->sample(tool, event, sample, sample->evsel, machine);
1295 	sample->evsel = saved_evsel;
1296 	return ret;
1297 }
1298 
1299 static int deliver_sample_group(struct evlist *evlist,
1300 				const struct perf_tool *tool,
1301 				union  perf_event *event,
1302 				struct perf_sample *sample,
1303 				struct machine *machine,
1304 				u64 read_format,
1305 				bool per_thread)
1306 {
1307 	int ret = -EINVAL;
1308 	struct sample_read_value *v = sample->read.group.values;
1309 
1310 	if (tool->dont_split_sample_group)
1311 		return deliver_sample_value(evlist, tool, event, sample, v, machine,
1312 					    per_thread);
1313 
1314 	sample_read_group__for_each(v, sample->read.group.nr, read_format) {
1315 		ret = deliver_sample_value(evlist, tool, event, sample, v,
1316 					   machine, per_thread);
1317 		if (ret)
1318 			break;
1319 	}
1320 
1321 	return ret;
1322 }
1323 
1324 static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool *tool,
1325 				  union  perf_event *event, struct perf_sample *sample,
1326 				  struct evsel *evsel, struct machine *machine)
1327 {
1328 	/* We know evsel != NULL. */
1329 	u64 sample_type = evsel->core.attr.sample_type;
1330 	u64 read_format = evsel->core.attr.read_format;
1331 	bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core);
1332 
1333 	/* Standard sample delivery. */
1334 	if (!(sample_type & PERF_SAMPLE_READ))
1335 		return tool->sample(tool, event, sample, evsel, machine);
1336 
1337 	/* For PERF_SAMPLE_READ we have either single or group mode. */
1338 	if (read_format & PERF_FORMAT_GROUP)
1339 		return deliver_sample_group(evlist, tool, event, sample,
1340 					    machine, read_format, per_thread);
1341 	else
1342 		return deliver_sample_value(evlist, tool, event, sample,
1343 					    &sample->read.one, machine,
1344 					    per_thread);
1345 }
1346 
1347 /*
1348  * Samples with deferred callchains should wait for the next matching
1349  * PERF_RECORD_CALLCHAIN_RECORD entries.  Keep the events in a list and
1350  * deliver them once it finds the callchains.
1351  */
1352 struct deferred_event {
1353 	struct list_head list;
1354 	union perf_event *event;
1355 };
1356 
1357 /*
1358  * This is called when a deferred callchain record comes up.  Find all matching
1359  * samples, merge the callchains and process them.
1360  */
1361 static int evlist__deliver_deferred_callchain(struct evlist *evlist,
1362 					      const struct perf_tool *tool,
1363 					      union  perf_event *event,
1364 					      struct perf_sample *sample,
1365 					      struct machine *machine)
1366 {
1367 	struct deferred_event *de, *tmp;
1368 	int ret = 0;
1369 
1370 	if (!tool->merge_deferred_callchains) {
1371 		struct evsel *saved_evsel = sample->evsel;
1372 
1373 		sample->evsel = evlist__id2evsel(evlist, sample->id);
1374 		ret = tool->callchain_deferred(tool, event, sample,
1375 					       sample->evsel, machine);
1376 		sample->evsel = saved_evsel;
1377 		return ret;
1378 	}
1379 
1380 	list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
1381 		struct perf_sample orig_sample;
1382 
1383 		perf_sample__init(&orig_sample, /*all=*/false);
1384 		ret = evlist__parse_sample(evlist, de->event, &orig_sample);
1385 		if (ret < 0) {
1386 			pr_err("failed to parse original sample\n");
1387 			perf_sample__exit(&orig_sample);
1388 			break;
1389 		}
1390 
1391 		if (sample->tid != orig_sample.tid) {
1392 			perf_sample__exit(&orig_sample);
1393 			continue;
1394 		}
1395 
1396 		if (event->callchain_deferred.cookie == orig_sample.deferred_cookie)
1397 			sample__merge_deferred_callchain(&orig_sample, sample);
1398 		else
1399 			orig_sample.deferred_callchain = false;
1400 
1401 		orig_sample.evsel = evlist__id2evsel(evlist, orig_sample.id);
1402 		ret = evlist__deliver_sample(evlist, tool, de->event,
1403 					     &orig_sample, orig_sample.evsel, machine);
1404 
1405 		perf_sample__exit(&orig_sample);
1406 		list_del(&de->list);
1407 		free(de->event);
1408 		free(de);
1409 
1410 		if (ret)
1411 			break;
1412 	}
1413 	return ret;
1414 }
1415 
1416 /*
1417  * This is called at the end of the data processing for the session.  Flush the
1418  * remaining samples as there's no hope for matching deferred callchains.
1419  */
1420 static int session__flush_deferred_samples(struct perf_session *session,
1421 					   const struct perf_tool *tool)
1422 {
1423 	struct evlist *evlist = session->evlist;
1424 	struct machine *machine = &session->machines.host;
1425 	struct deferred_event *de, *tmp;
1426 	int ret = 0;
1427 
1428 	list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
1429 		struct perf_sample sample;
1430 
1431 		perf_sample__init(&sample, /*all=*/false);
1432 		ret = evlist__parse_sample(evlist, de->event, &sample);
1433 		if (ret < 0) {
1434 			pr_err("failed to parse original sample\n");
1435 			perf_sample__exit(&sample);
1436 			break;
1437 		}
1438 
1439 		sample.evsel = evlist__id2evsel(evlist, sample.id);
1440 		ret = evlist__deliver_sample(evlist, tool, de->event,
1441 					     &sample, sample.evsel, machine);
1442 
1443 		perf_sample__exit(&sample);
1444 		list_del(&de->list);
1445 		free(de->event);
1446 		free(de);
1447 
1448 		if (ret)
1449 			break;
1450 	}
1451 	return ret;
1452 }
1453 
1454 static int machines__deliver_event(struct machines *machines,
1455 				   struct evlist *evlist,
1456 				   union perf_event *event,
1457 				   struct perf_sample *sample,
1458 				   const struct perf_tool *tool, u64 file_offset,
1459 				   const char *file_path)
1460 {
1461 	struct evsel *evsel;
1462 	struct machine *machine;
1463 
1464 	dump_event(evlist, event, file_offset, sample, file_path);
1465 
1466 	if (!sample->evsel)
1467 		sample->evsel = evlist__id2evsel(evlist, sample->id);
1468 	else
1469 		assert(sample->evsel == evlist__id2evsel(evlist, sample->id));
1470 
1471 	evsel = sample->evsel;
1472 	machine = machines__find_for_cpumode(machines, event, sample);
1473 
1474 	switch (event->header.type) {
1475 	case PERF_RECORD_SAMPLE:
1476 		if (evsel == NULL) {
1477 			++evlist->stats.nr_unknown_id;
1478 			return 0;
1479 		}
1480 		if (machine == NULL) {
1481 			++evlist->stats.nr_unprocessable_samples;
1482 			dump_sample(machine, evsel, event, sample);
1483 			return 0;
1484 		}
1485 		dump_sample(machine, evsel, event, sample);
1486 		if (sample->deferred_callchain && tool->merge_deferred_callchains) {
1487 			struct deferred_event *de = malloc(sizeof(*de));
1488 			size_t sz = event->header.size;
1489 
1490 			if (de == NULL)
1491 				return -ENOMEM;
1492 
1493 			de->event = malloc(sz);
1494 			if (de->event == NULL) {
1495 				free(de);
1496 				return -ENOMEM;
1497 			}
1498 			memcpy(de->event, event, sz);
1499 			list_add_tail(&de->list, &evlist->deferred_samples);
1500 			return 0;
1501 		}
1502 		return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
1503 	case PERF_RECORD_MMAP:
1504 		return tool->mmap(tool, event, sample, machine);
1505 	case PERF_RECORD_MMAP2:
1506 		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
1507 			++evlist->stats.nr_proc_map_timeout;
1508 		return tool->mmap2(tool, event, sample, machine);
1509 	case PERF_RECORD_COMM:
1510 		return tool->comm(tool, event, sample, machine);
1511 	case PERF_RECORD_NAMESPACES:
1512 		return tool->namespaces(tool, event, sample, machine);
1513 	case PERF_RECORD_CGROUP:
1514 		return tool->cgroup(tool, event, sample, machine);
1515 	case PERF_RECORD_FORK:
1516 		return tool->fork(tool, event, sample, machine);
1517 	case PERF_RECORD_EXIT:
1518 		return tool->exit(tool, event, sample, machine);
1519 	case PERF_RECORD_LOST:
1520 		if (tool->lost == perf_event__process_lost)
1521 			evlist->stats.total_lost += event->lost.lost;
1522 		return tool->lost(tool, event, sample, machine);
1523 	case PERF_RECORD_LOST_SAMPLES:
1524 		if (event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF)
1525 			evlist->stats.total_dropped_samples += event->lost_samples.lost;
1526 		else if (tool->lost_samples == perf_event__process_lost_samples)
1527 			evlist->stats.total_lost_samples += event->lost_samples.lost;
1528 		return tool->lost_samples(tool, event, sample, machine);
1529 	case PERF_RECORD_READ:
1530 		dump_read(evsel, event);
1531 		return tool->read(tool, event, sample, evsel, machine);
1532 	case PERF_RECORD_THROTTLE:
1533 		return tool->throttle(tool, event, sample, machine);
1534 	case PERF_RECORD_UNTHROTTLE:
1535 		return tool->unthrottle(tool, event, sample, machine);
1536 	case PERF_RECORD_AUX:
1537 		if (tool->aux == perf_event__process_aux) {
1538 			if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
1539 				evlist->stats.total_aux_lost += 1;
1540 			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
1541 				evlist->stats.total_aux_partial += 1;
1542 			if (event->aux.flags & PERF_AUX_FLAG_COLLISION)
1543 				evlist->stats.total_aux_collision += 1;
1544 		}
1545 		return tool->aux(tool, event, sample, machine);
1546 	case PERF_RECORD_ITRACE_START:
1547 		return tool->itrace_start(tool, event, sample, machine);
1548 	case PERF_RECORD_SWITCH:
1549 	case PERF_RECORD_SWITCH_CPU_WIDE:
1550 		return tool->context_switch(tool, event, sample, machine);
1551 	case PERF_RECORD_KSYMBOL:
1552 		return tool->ksymbol(tool, event, sample, machine);
1553 	case PERF_RECORD_BPF_EVENT:
1554 		return tool->bpf(tool, event, sample, machine);
1555 	case PERF_RECORD_TEXT_POKE:
1556 		return tool->text_poke(tool, event, sample, machine);
1557 	case PERF_RECORD_AUX_OUTPUT_HW_ID:
1558 		return tool->aux_output_hw_id(tool, event, sample, machine);
1559 	case PERF_RECORD_CALLCHAIN_DEFERRED:
1560 		dump_deferred_callchain(evsel, event, sample);
1561 		return evlist__deliver_deferred_callchain(evlist, tool, event,
1562 							  sample, machine);
1563 	default:
1564 		++evlist->stats.nr_unknown_events;
1565 		return -1;
1566 	}
1567 }
1568 
1569 static int perf_session__deliver_event(struct perf_session *session,
1570 				       union perf_event *event,
1571 				       const struct perf_tool *tool,
1572 				       u64 file_offset,
1573 				       const char *file_path)
1574 {
1575 	struct perf_sample sample;
1576 	int ret;
1577 
1578 	perf_sample__init(&sample, /*all=*/false);
1579 	ret = evlist__parse_sample(session->evlist, event, &sample);
1580 	if (ret) {
1581 		pr_err("Can't parse sample, err = %d\n", ret);
1582 		goto out;
1583 	}
1584 
1585 	ret = auxtrace__process_event(session, event, &sample, tool);
1586 	if (ret < 0)
1587 		goto out;
1588 	if (ret > 0) {
1589 		ret = 0;
1590 		goto out;
1591 	}
1592 
1593 	ret = machines__deliver_event(&session->machines, session->evlist,
1594 				      event, &sample, tool, file_offset, file_path);
1595 
1596 	if (dump_trace && sample.aux_sample.size)
1597 		auxtrace__dump_auxtrace_sample(session, &sample);
1598 out:
1599 	perf_sample__exit(&sample);
1600 	return ret;
1601 }
1602 
1603 static s64 perf_session__process_user_event(struct perf_session *session,
1604 					    union perf_event *event,
1605 					    u64 file_offset,
1606 					    const char *file_path)
1607 {
1608 	struct ordered_events *oe = &session->ordered_events;
1609 	const struct perf_tool *tool = session->tool;
1610 	struct perf_sample sample;
1611 	int fd = perf_data__fd(session->data);
1612 	s64 err;
1613 
1614 	perf_sample__init(&sample, /*all=*/true);
1615 	if ((event->header.type != PERF_RECORD_COMPRESSED &&
1616 	     event->header.type != PERF_RECORD_COMPRESSED2) ||
1617 	    perf_tool__compressed_is_stub(tool))
1618 		dump_event(session->evlist, event, file_offset, &sample, file_path);
1619 
1620 	/* These events are processed right away */
1621 	switch (event->header.type) {
1622 	case PERF_RECORD_HEADER_ATTR:
1623 		err = tool->attr(tool, event, &session->evlist);
1624 		if (err == 0) {
1625 			perf_session__set_id_hdr_size(session);
1626 			perf_session__set_comm_exec(session);
1627 		}
1628 		break;
1629 	case PERF_RECORD_EVENT_UPDATE:
1630 		err = tool->event_update(tool, event, &session->evlist);
1631 		break;
1632 	case PERF_RECORD_HEADER_EVENT_TYPE:
1633 		/*
1634 		 * Deprecated, but we need to handle it for sake
1635 		 * of old data files create in pipe mode.
1636 		 */
1637 		err = 0;
1638 		break;
1639 	case PERF_RECORD_HEADER_TRACING_DATA:
1640 		/*
1641 		 * Setup for reading amidst mmap, but only when we
1642 		 * are in 'file' mode. The 'pipe' fd is in proper
1643 		 * place already.
1644 		 */
1645 		if (!perf_data__is_pipe(session->data))
1646 			lseek(fd, file_offset, SEEK_SET);
1647 		err = tool->tracing_data(tool, session, event);
1648 		break;
1649 	case PERF_RECORD_HEADER_BUILD_ID:
1650 		err = tool->build_id(tool, session, event);
1651 		break;
1652 	case PERF_RECORD_FINISHED_ROUND:
1653 		err = tool->finished_round(tool, event, oe);
1654 		break;
1655 	case PERF_RECORD_ID_INDEX:
1656 		err = tool->id_index(tool, session, event);
1657 		break;
1658 	case PERF_RECORD_AUXTRACE_INFO:
1659 		err = tool->auxtrace_info(tool, session, event);
1660 		break;
1661 	case PERF_RECORD_AUXTRACE:
1662 		/*
1663 		 * Setup for reading amidst mmap, but only when we
1664 		 * are in 'file' mode.  The 'pipe' fd is in proper
1665 		 * place already.
1666 		 */
1667 		if (!perf_data__is_pipe(session->data))
1668 			lseek(fd, file_offset + event->header.size, SEEK_SET);
1669 		err = tool->auxtrace(tool, session, event);
1670 		break;
1671 	case PERF_RECORD_AUXTRACE_ERROR:
1672 		perf_session__auxtrace_error_inc(session, event);
1673 		err = tool->auxtrace_error(tool, session, event);
1674 		break;
1675 	case PERF_RECORD_THREAD_MAP:
1676 		err = tool->thread_map(tool, session, event);
1677 		break;
1678 	case PERF_RECORD_CPU_MAP:
1679 		err = tool->cpu_map(tool, session, event);
1680 		break;
1681 	case PERF_RECORD_STAT_CONFIG:
1682 		err = tool->stat_config(tool, session, event);
1683 		break;
1684 	case PERF_RECORD_STAT:
1685 		err = tool->stat(tool, session, event);
1686 		break;
1687 	case PERF_RECORD_STAT_ROUND:
1688 		err = tool->stat_round(tool, session, event);
1689 		break;
1690 	case PERF_RECORD_TIME_CONV:
1691 		session->time_conv = event->time_conv;
1692 		err = tool->time_conv(tool, session, event);
1693 		break;
1694 	case PERF_RECORD_HEADER_FEATURE:
1695 		err = tool->feature(tool, session, event);
1696 		break;
1697 	case PERF_RECORD_COMPRESSED:
1698 	case PERF_RECORD_COMPRESSED2:
1699 		err = tool->compressed(tool, session, event, file_offset, file_path);
1700 		if (err)
1701 			dump_event(session->evlist, event, file_offset, &sample, file_path);
1702 		break;
1703 	case PERF_RECORD_FINISHED_INIT:
1704 		err = tool->finished_init(tool, session, event);
1705 		break;
1706 	case PERF_RECORD_BPF_METADATA:
1707 		err = tool->bpf_metadata(tool, session, event);
1708 		break;
1709 	case PERF_RECORD_SCHEDSTAT_CPU:
1710 		err = tool->schedstat_cpu(tool, session, event);
1711 		break;
1712 	case PERF_RECORD_SCHEDSTAT_DOMAIN:
1713 		err = tool->schedstat_domain(tool, session, event);
1714 		break;
1715 	default:
1716 		err = -EINVAL;
1717 		break;
1718 	}
1719 	perf_sample__exit(&sample);
1720 	return err;
1721 }
1722 
1723 int perf_session__deliver_synth_event(struct perf_session *session,
1724 				      union perf_event *event,
1725 				      struct perf_sample *sample)
1726 {
1727 	struct evlist *evlist = session->evlist;
1728 	const struct perf_tool *tool = session->tool;
1729 
1730 	events_stats__inc(&evlist->stats, event->header.type);
1731 
1732 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1733 		return perf_session__process_user_event(session, event, 0, NULL);
1734 
1735 	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL);
1736 }
1737 
1738 int perf_session__deliver_synth_attr_event(struct perf_session *session,
1739 					   const struct perf_event_attr *attr,
1740 					   u64 id)
1741 {
1742 	union {
1743 		struct {
1744 			struct perf_record_header_attr attr;
1745 			u64 ids[1];
1746 		} attr_id;
1747 		union perf_event ev;
1748 	} ev = {
1749 		.attr_id.attr.header.type = PERF_RECORD_HEADER_ATTR,
1750 		.attr_id.attr.header.size = sizeof(ev.attr_id),
1751 		.attr_id.ids[0] = id,
1752 	};
1753 
1754 	if (attr->size != sizeof(ev.attr_id.attr.attr)) {
1755 		pr_debug("Unexpected perf_event_attr size\n");
1756 		return -EINVAL;
1757 	}
1758 	ev.attr_id.attr.attr = *attr;
1759 	return perf_session__deliver_synth_event(session, &ev.ev, NULL);
1760 }
1761 
1762 static void event_swap(union perf_event *event, bool sample_id_all)
1763 {
1764 	perf_event__swap_op swap;
1765 
1766 	swap = perf_event__swap_ops[event->header.type];
1767 	if (swap)
1768 		swap(event, sample_id_all);
1769 }
1770 
1771 int perf_session__peek_event(struct perf_session *session, off_t file_offset,
1772 			     void *buf, size_t buf_sz,
1773 			     union perf_event **event_ptr,
1774 			     struct perf_sample *sample)
1775 {
1776 	union perf_event *event;
1777 	size_t hdr_sz, rest;
1778 	int fd;
1779 
1780 	if (session->one_mmap && !session->header.needs_swap) {
1781 		event = file_offset - session->one_mmap_offset +
1782 			session->one_mmap_addr;
1783 		goto out_parse_sample;
1784 	}
1785 
1786 	if (perf_data__is_pipe(session->data))
1787 		return -1;
1788 
1789 	fd = perf_data__fd(session->data);
1790 	hdr_sz = sizeof(struct perf_event_header);
1791 
1792 	if (buf_sz < hdr_sz)
1793 		return -1;
1794 
1795 	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
1796 	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
1797 		return -1;
1798 
1799 	event = (union perf_event *)buf;
1800 
1801 	if (session->header.needs_swap)
1802 		perf_event_header__bswap(&event->header);
1803 
1804 	if (event->header.size < hdr_sz || event->header.size > buf_sz)
1805 		return -1;
1806 
1807 	buf += hdr_sz;
1808 	rest = event->header.size - hdr_sz;
1809 
1810 	if (readn(fd, buf, rest) != (ssize_t)rest)
1811 		return -1;
1812 
1813 	if (session->header.needs_swap)
1814 		event_swap(event, evlist__sample_id_all(session->evlist));
1815 
1816 out_parse_sample:
1817 
1818 	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
1819 	    evlist__parse_sample(session->evlist, event, sample))
1820 		return -1;
1821 
1822 	*event_ptr = event;
1823 
1824 	return 0;
1825 }
1826 
1827 int perf_session__peek_events(struct perf_session *session, u64 offset,
1828 			      u64 size, peek_events_cb_t cb, void *data)
1829 {
1830 	u64 max_offset = offset + size;
1831 	char buf[PERF_SAMPLE_MAX_SIZE];
1832 	union perf_event *event;
1833 	int err;
1834 
1835 	do {
1836 		err = perf_session__peek_event(session, offset, buf,
1837 					       PERF_SAMPLE_MAX_SIZE, &event,
1838 					       NULL);
1839 		if (err)
1840 			return err;
1841 
1842 		err = cb(session, event, offset, data);
1843 		if (err)
1844 			return err;
1845 
1846 		offset += event->header.size;
1847 		if (event->header.type == PERF_RECORD_AUXTRACE)
1848 			offset += event->auxtrace.size;
1849 
1850 	} while (offset < max_offset);
1851 
1852 	return err;
1853 }
1854 
1855 static s64 perf_session__process_event(struct perf_session *session,
1856 				       union perf_event *event, u64 file_offset,
1857 				       const char *file_path)
1858 {
1859 	struct evlist *evlist = session->evlist;
1860 	const struct perf_tool *tool = session->tool;
1861 	int ret;
1862 
1863 	if (session->header.needs_swap)
1864 		event_swap(event, evlist__sample_id_all(evlist));
1865 
1866 	if (event->header.type >= PERF_RECORD_HEADER_MAX) {
1867 		/* perf should not support unaligned event, stop here. */
1868 		if (event->header.size % sizeof(u64))
1869 			return -EINVAL;
1870 
1871 		/* This perf is outdated and does not support the latest event type. */
1872 		ui__warning("Unsupported header type %u, please consider updating perf.\n",
1873 			    event->header.type);
1874 		/* Skip unsupported event by returning its size. */
1875 		return event->header.size;
1876 	}
1877 
1878 	events_stats__inc(&evlist->stats, event->header.type);
1879 
1880 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1881 		return perf_session__process_user_event(session, event, file_offset, file_path);
1882 
1883 	if (tool->ordered_events) {
1884 		u64 timestamp = -1ULL;
1885 
1886 		ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
1887 		if (ret && ret != -1)
1888 			return ret;
1889 
1890 		ret = perf_session__queue_event(session, event, timestamp, file_offset, file_path);
1891 		if (ret != -ETIME)
1892 			return ret;
1893 	}
1894 
1895 	return perf_session__deliver_event(session, event, tool, file_offset, file_path);
1896 }
1897 
1898 void perf_event_header__bswap(struct perf_event_header *hdr)
1899 {
1900 	hdr->type = bswap_32(hdr->type);
1901 	hdr->misc = bswap_16(hdr->misc);
1902 	hdr->size = bswap_16(hdr->size);
1903 }
1904 
1905 struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
1906 {
1907 	return machine__findnew_thread(&session->machines.host, -1, pid);
1908 }
1909 
1910 int perf_session__register_idle_thread(struct perf_session *session)
1911 {
1912 	struct thread *thread = machine__idle_thread(&session->machines.host);
1913 
1914 	/* machine__idle_thread() got the thread, so put it */
1915 	thread__put(thread);
1916 	return thread ? 0 : -1;
1917 }
1918 
1919 static void
1920 perf_session__warn_order(const struct perf_session *session)
1921 {
1922 	const struct ordered_events *oe = &session->ordered_events;
1923 	struct evsel *evsel;
1924 	bool should_warn = true;
1925 
1926 	evlist__for_each_entry(session->evlist, evsel) {
1927 		if (evsel->core.attr.write_backward)
1928 			should_warn = false;
1929 	}
1930 
1931 	if (!should_warn)
1932 		return;
1933 	if (oe->nr_unordered_events != 0)
1934 		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
1935 }
1936 
1937 static void perf_session__warn_about_errors(const struct perf_session *session)
1938 {
1939 	const struct events_stats *stats = &session->evlist->stats;
1940 
1941 	if (session->tool->lost == perf_event__process_lost &&
1942 	    stats->nr_events[PERF_RECORD_LOST] != 0) {
1943 		ui__warning("Processed %d events and lost %d chunks!\n\n"
1944 			    "Check IO/CPU overload!\n\n",
1945 			    stats->nr_events[0],
1946 			    stats->nr_events[PERF_RECORD_LOST]);
1947 	}
1948 
1949 	if (session->tool->lost_samples == perf_event__process_lost_samples) {
1950 		double drop_rate;
1951 
1952 		drop_rate = (double)stats->total_lost_samples /
1953 			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
1954 		if (drop_rate > 0.05) {
1955 			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
1956 				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
1957 				    drop_rate * 100.0);
1958 		}
1959 	}
1960 
1961 	if (session->tool->aux == perf_event__process_aux &&
1962 	    stats->total_aux_lost != 0) {
1963 		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
1964 			    stats->total_aux_lost,
1965 			    stats->nr_events[PERF_RECORD_AUX]);
1966 	}
1967 
1968 	if (session->tool->aux == perf_event__process_aux &&
1969 	    stats->total_aux_partial != 0) {
1970 		bool vmm_exclusive = false;
1971 
1972 		(void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
1973 		                       &vmm_exclusive);
1974 
1975 		ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
1976 		            "Are you running a KVM guest in the background?%s\n\n",
1977 			    stats->total_aux_partial,
1978 			    stats->nr_events[PERF_RECORD_AUX],
1979 			    vmm_exclusive ?
1980 			    "\nReloading kvm_intel module with vmm_exclusive=0\n"
1981 			    "will reduce the gaps to only guest's timeslices." :
1982 			    "");
1983 	}
1984 
1985 	if (session->tool->aux == perf_event__process_aux &&
1986 	    stats->total_aux_collision != 0) {
1987 		ui__warning("AUX data detected collision  %" PRIu64 " times out of %u!\n\n",
1988 			    stats->total_aux_collision,
1989 			    stats->nr_events[PERF_RECORD_AUX]);
1990 	}
1991 
1992 	if (stats->nr_unknown_events != 0) {
1993 		ui__warning("Found %u unknown events!\n\n"
1994 			    "Is this an older tool processing a perf.data "
1995 			    "file generated by a more recent tool?\n\n"
1996 			    "If that is not the case, consider "
1997 			    "reporting to linux-kernel@vger.kernel.org.\n\n",
1998 			    stats->nr_unknown_events);
1999 	}
2000 
2001 	if (stats->nr_unknown_id != 0) {
2002 		ui__warning("%u samples with id not present in the header\n",
2003 			    stats->nr_unknown_id);
2004 	}
2005 
2006 	if (stats->nr_invalid_chains != 0) {
2007 		ui__warning("Found invalid callchains!\n\n"
2008 			    "%u out of %u events were discarded for this reason.\n\n"
2009 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
2010 			    stats->nr_invalid_chains,
2011 			    stats->nr_events[PERF_RECORD_SAMPLE]);
2012 	}
2013 
2014 	if (stats->nr_unprocessable_samples != 0) {
2015 		ui__warning("%u unprocessable samples recorded.\n"
2016 			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
2017 			    stats->nr_unprocessable_samples);
2018 	}
2019 
2020 	perf_session__warn_order(session);
2021 
2022 	events_stats__auxtrace_error_warn(stats);
2023 
2024 	if (stats->nr_proc_map_timeout != 0) {
2025 		ui__warning("%d map information files for pre-existing threads were\n"
2026 			    "not processed, if there are samples for addresses they\n"
2027 			    "will not be resolved, you may find out which are these\n"
2028 			    "threads by running with -v and redirecting the output\n"
2029 			    "to a file.\n"
2030 			    "The time limit to process proc map is too short?\n"
2031 			    "Increase it by --proc-map-timeout\n",
2032 			    stats->nr_proc_map_timeout);
2033 	}
2034 }
2035 
2036 static int perf_session__flush_thread_stack(struct thread *thread,
2037 					    void *p __maybe_unused)
2038 {
2039 	return thread_stack__flush(thread);
2040 }
2041 
2042 static int perf_session__flush_thread_stacks(struct perf_session *session)
2043 {
2044 	return machines__for_each_thread(&session->machines,
2045 					 perf_session__flush_thread_stack,
2046 					 NULL);
2047 }
2048 
2049 volatile sig_atomic_t session_done;
2050 
2051 static int __perf_session__process_decomp_events(struct perf_session *session);
2052 
2053 static int __perf_session__process_pipe_events(struct perf_session *session)
2054 {
2055 	struct ordered_events *oe = &session->ordered_events;
2056 	const struct perf_tool *tool = session->tool;
2057 	struct ui_progress prog;
2058 	union perf_event *event;
2059 	uint32_t size, cur_size = 0;
2060 	void *buf = NULL;
2061 	s64 skip = 0;
2062 	u64 head;
2063 	ssize_t err;
2064 	void *p;
2065 	bool update_prog = false;
2066 
2067 	/*
2068 	 * If it's from a file saving pipe data (by redirection), it would have
2069 	 * a file name other than "-".  Then we can get the total size and show
2070 	 * the progress.
2071 	 */
2072 	if (strcmp(session->data->path, "-") && session->data->file.size) {
2073 		ui_progress__init_size(&prog, session->data->file.size,
2074 				       "Processing events...");
2075 		update_prog = true;
2076 	}
2077 
2078 	head = 0;
2079 	cur_size = sizeof(union perf_event);
2080 
2081 	buf = malloc(cur_size);
2082 	if (!buf)
2083 		return -errno;
2084 	ordered_events__set_copy_on_queue(oe, true);
2085 more:
2086 	event = buf;
2087 	err = perf_data__read(session->data, event,
2088 			      sizeof(struct perf_event_header));
2089 	if (err <= 0) {
2090 		if (err == 0)
2091 			goto done;
2092 
2093 		pr_err("failed to read event header\n");
2094 		goto out_err;
2095 	}
2096 
2097 	if (session->header.needs_swap)
2098 		perf_event_header__bswap(&event->header);
2099 
2100 	size = event->header.size;
2101 	if (size < sizeof(struct perf_event_header)) {
2102 		pr_err("bad event header size\n");
2103 		goto out_err;
2104 	}
2105 
2106 	if (size > cur_size) {
2107 		void *new = realloc(buf, size);
2108 		if (!new) {
2109 			pr_err("failed to allocate memory to read event\n");
2110 			goto out_err;
2111 		}
2112 		buf = new;
2113 		cur_size = size;
2114 		event = buf;
2115 	}
2116 	p = event;
2117 	p += sizeof(struct perf_event_header);
2118 
2119 	if (size - sizeof(struct perf_event_header)) {
2120 		err = perf_data__read(session->data, p,
2121 				      size - sizeof(struct perf_event_header));
2122 		if (err <= 0) {
2123 			if (err == 0) {
2124 				pr_err("unexpected end of event stream\n");
2125 				goto done;
2126 			}
2127 
2128 			pr_err("failed to read event data\n");
2129 			goto out_err;
2130 		}
2131 	}
2132 
2133 	if ((skip = perf_session__process_event(session, event, head, "pipe")) < 0) {
2134 		pr_err("%#" PRIx64 " [%#x]: piped event processing failed for event of type: %s (%d)\n",
2135 			head, event->header.size,
2136 			perf_event__name(event->header.type),
2137 			event->header.type);
2138 		err = -EINVAL;
2139 		goto out_err;
2140 	}
2141 
2142 	head += size;
2143 
2144 	if (skip > 0)
2145 		head += skip;
2146 
2147 	err = __perf_session__process_decomp_events(session);
2148 	if (err)
2149 		goto out_err;
2150 
2151 	if (update_prog)
2152 		ui_progress__update(&prog, size);
2153 
2154 	if (!session_done())
2155 		goto more;
2156 done:
2157 	/* do the final flush for ordered samples */
2158 	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
2159 	if (err)
2160 		goto out_err;
2161 	err = session__flush_deferred_samples(session, tool);
2162 	if (err)
2163 		goto out_err;
2164 	err = auxtrace__flush_events(session, tool);
2165 	if (err)
2166 		goto out_err;
2167 	err = perf_session__flush_thread_stacks(session);
2168 out_err:
2169 	free(buf);
2170 	if (update_prog)
2171 		ui_progress__finish();
2172 	if (!tool->no_warn)
2173 		perf_session__warn_about_errors(session);
2174 	ordered_events__free(&session->ordered_events);
2175 	auxtrace__free_events(session);
2176 	return err;
2177 }
2178 
2179 static union perf_event *
2180 prefetch_event(char *buf, u64 head, size_t mmap_size,
2181 	       bool needs_swap, union perf_event *error)
2182 {
2183 	union perf_event *event;
2184 	u16 event_size;
2185 
2186 	/*
2187 	 * Ensure we have enough space remaining to read
2188 	 * the size of the event in the headers.
2189 	 */
2190 	if (head + sizeof(event->header) > mmap_size)
2191 		return NULL;
2192 
2193 	event = (union perf_event *)(buf + head);
2194 	if (needs_swap)
2195 		perf_event_header__bswap(&event->header);
2196 
2197 	event_size = event->header.size;
2198 	if (head + event_size <= mmap_size)
2199 		return event;
2200 
2201 	/* We're not fetching the event so swap back again */
2202 	if (needs_swap)
2203 		perf_event_header__bswap(&event->header);
2204 
2205 	/* Check if the event fits into the next mmapped buf. */
2206 	if (event_size <= mmap_size - head % page_size) {
2207 		/* Remap buf and fetch again. */
2208 		return NULL;
2209 	}
2210 
2211 	/* Invalid input. Event size should never exceed mmap_size. */
2212 	pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
2213 		 " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
2214 
2215 	return error;
2216 }
2217 
2218 static union perf_event *
2219 fetch_mmaped_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
2220 {
2221 	return prefetch_event(buf, head, mmap_size, needs_swap, ERR_PTR(-EINVAL));
2222 }
2223 
2224 static union perf_event *
2225 fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
2226 {
2227 	return prefetch_event(buf, head, mmap_size, needs_swap, NULL);
2228 }
2229 
2230 static int __perf_session__process_decomp_events(struct perf_session *session)
2231 {
2232 	s64 skip;
2233 	u64 size;
2234 	struct decomp *decomp = session->active_decomp->decomp_last;
2235 
2236 	if (!decomp)
2237 		return 0;
2238 
2239 	while (decomp->head < decomp->size && !session_done()) {
2240 		union perf_event *event = fetch_decomp_event(decomp->head, decomp->size, decomp->data,
2241 							     session->header.needs_swap);
2242 
2243 		if (!event)
2244 			break;
2245 
2246 		size = event->header.size;
2247 
2248 		if (size < sizeof(struct perf_event_header) ||
2249 		    (skip = perf_session__process_event(session, event, decomp->file_pos,
2250 							decomp->file_path)) < 0) {
2251 			pr_err("%#" PRIx64 " [%#x]: decompress event processing failed for event of type: %s (%d)\n",
2252 				decomp->file_pos + decomp->head, event->header.size,
2253 				perf_event__name(event->header.type),
2254 				event->header.type);
2255 			return -EINVAL;
2256 		}
2257 
2258 		if (skip)
2259 			size += skip;
2260 
2261 		decomp->head += size;
2262 	}
2263 
2264 	return 0;
2265 }
2266 
2267 /*
2268  * On 64bit we can mmap the data file in one go. No need for tiny mmap
2269  * slices. On 32bit we use 32MB.
2270  */
2271 #if BITS_PER_LONG == 64
2272 #define MMAP_SIZE ULLONG_MAX
2273 #define NUM_MMAPS 1
2274 #else
2275 #define MMAP_SIZE (32 * 1024 * 1024ULL)
2276 #define NUM_MMAPS 128
2277 #endif
2278 
2279 struct reader;
2280 
2281 typedef s64 (*reader_cb_t)(struct perf_session *session,
2282 			   union perf_event *event,
2283 			   u64 file_offset,
2284 			   const char *file_path);
2285 
2286 struct reader {
2287 	int		 fd;
2288 	const char	 *path;
2289 	u64		 data_size;
2290 	u64		 data_offset;
2291 	reader_cb_t	 process;
2292 	bool		 in_place_update;
2293 	char		 *mmaps[NUM_MMAPS];
2294 	size_t		 mmap_size;
2295 	int		 mmap_idx;
2296 	char		 *mmap_cur;
2297 	u64		 file_pos;
2298 	u64		 file_offset;
2299 	u64		 head;
2300 	u64		 size;
2301 	bool		 done;
2302 	struct zstd_data   zstd_data;
2303 	struct decomp_data decomp_data;
2304 };
2305 
2306 static int
2307 reader__init(struct reader *rd, bool *one_mmap)
2308 {
2309 	u64 data_size = rd->data_size;
2310 	char **mmaps = rd->mmaps;
2311 
2312 	rd->head = rd->data_offset;
2313 	data_size += rd->data_offset;
2314 
2315 	rd->mmap_size = MMAP_SIZE;
2316 	if (rd->mmap_size > data_size) {
2317 		rd->mmap_size = data_size;
2318 		if (one_mmap)
2319 			*one_mmap = true;
2320 	}
2321 
2322 	memset(mmaps, 0, sizeof(rd->mmaps));
2323 
2324 	if (zstd_init(&rd->zstd_data, 0))
2325 		return -1;
2326 	rd->decomp_data.zstd_decomp = &rd->zstd_data;
2327 
2328 	return 0;
2329 }
2330 
2331 static void
2332 reader__release_decomp(struct reader *rd)
2333 {
2334 	perf_decomp__release_events(rd->decomp_data.decomp);
2335 	zstd_fini(&rd->zstd_data);
2336 }
2337 
2338 static int
2339 reader__mmap(struct reader *rd, struct perf_session *session)
2340 {
2341 	int mmap_prot, mmap_flags;
2342 	char *buf, **mmaps = rd->mmaps;
2343 	u64 page_offset;
2344 
2345 	mmap_prot  = PROT_READ;
2346 	mmap_flags = MAP_SHARED;
2347 
2348 	if (rd->in_place_update) {
2349 		mmap_prot  |= PROT_WRITE;
2350 	} else if (session->header.needs_swap) {
2351 		mmap_prot  |= PROT_WRITE;
2352 		mmap_flags = MAP_PRIVATE;
2353 	}
2354 
2355 	if (mmaps[rd->mmap_idx]) {
2356 		munmap(mmaps[rd->mmap_idx], rd->mmap_size);
2357 		mmaps[rd->mmap_idx] = NULL;
2358 	}
2359 
2360 	page_offset = page_size * (rd->head / page_size);
2361 	rd->file_offset += page_offset;
2362 	rd->head -= page_offset;
2363 
2364 	buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd,
2365 		   rd->file_offset);
2366 	if (buf == MAP_FAILED) {
2367 		pr_err("failed to mmap file\n");
2368 		return -errno;
2369 	}
2370 	mmaps[rd->mmap_idx] = rd->mmap_cur = buf;
2371 	rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1);
2372 	rd->file_pos = rd->file_offset + rd->head;
2373 	if (session->one_mmap) {
2374 		session->one_mmap_addr = buf;
2375 		session->one_mmap_offset = rd->file_offset;
2376 	}
2377 
2378 	return 0;
2379 }
2380 
2381 enum {
2382 	READER_OK,
2383 	READER_NODATA,
2384 };
2385 
2386 static int
2387 reader__read_event(struct reader *rd, struct perf_session *session,
2388 		   struct ui_progress *prog)
2389 {
2390 	u64 size;
2391 	int err = READER_OK;
2392 	union perf_event *event;
2393 	s64 skip;
2394 
2395 	event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur,
2396 				   session->header.needs_swap);
2397 	if (IS_ERR(event))
2398 		return PTR_ERR(event);
2399 
2400 	if (!event)
2401 		return READER_NODATA;
2402 
2403 	size = event->header.size;
2404 
2405 	skip = -EINVAL;
2406 
2407 	if (size < sizeof(struct perf_event_header) ||
2408 	    (skip = rd->process(session, event, rd->file_pos, rd->path)) < 0) {
2409 		errno = -skip;
2410 		pr_err("%#" PRIx64 " [%#x]: processing failed for event of type: %s (%d) [%m]\n",
2411 		       rd->file_offset + rd->head, event->header.size,
2412 		       perf_event__name(event->header.type),
2413 		       event->header.type);
2414 		err = skip;
2415 		goto out;
2416 	}
2417 
2418 	if (skip)
2419 		size += skip;
2420 
2421 	rd->size += size;
2422 	rd->head += size;
2423 	rd->file_pos += size;
2424 
2425 	err = __perf_session__process_decomp_events(session);
2426 	if (err)
2427 		goto out;
2428 
2429 	ui_progress__update(prog, size);
2430 
2431 out:
2432 	return err;
2433 }
2434 
2435 static inline bool
2436 reader__eof(struct reader *rd)
2437 {
2438 	return (rd->file_pos >= rd->data_size + rd->data_offset);
2439 }
2440 
2441 static int
2442 reader__process_events(struct reader *rd, struct perf_session *session,
2443 		       struct ui_progress *prog)
2444 {
2445 	int err;
2446 
2447 	err = reader__init(rd, &session->one_mmap);
2448 	if (err)
2449 		goto out;
2450 
2451 	session->active_decomp = &rd->decomp_data;
2452 
2453 remap:
2454 	err = reader__mmap(rd, session);
2455 	if (err)
2456 		goto out;
2457 
2458 more:
2459 	err = reader__read_event(rd, session, prog);
2460 	if (err < 0)
2461 		goto out;
2462 	else if (err == READER_NODATA)
2463 		goto remap;
2464 
2465 	if (session_done())
2466 		goto out;
2467 
2468 	if (!reader__eof(rd))
2469 		goto more;
2470 
2471 out:
2472 	session->active_decomp = &session->decomp_data;
2473 	return err;
2474 }
2475 
2476 static s64 process_simple(struct perf_session *session,
2477 			  union perf_event *event,
2478 			  u64 file_offset,
2479 			  const char *file_path)
2480 {
2481 	return perf_session__process_event(session, event, file_offset, file_path);
2482 }
2483 
2484 static int __perf_session__process_events(struct perf_session *session)
2485 {
2486 	struct reader rd = {
2487 		.fd		= perf_data__fd(session->data),
2488 		.path		= session->data->file.path,
2489 		.data_size	= session->header.data_size,
2490 		.data_offset	= session->header.data_offset,
2491 		.process	= process_simple,
2492 		.in_place_update = session->data->in_place_update,
2493 	};
2494 	struct ordered_events *oe = &session->ordered_events;
2495 	const struct perf_tool *tool = session->tool;
2496 	struct ui_progress prog;
2497 	int err;
2498 
2499 	if (rd.data_size == 0)
2500 		return -1;
2501 
2502 	ui_progress__init_size(&prog, rd.data_size, "Processing events...");
2503 
2504 	err = reader__process_events(&rd, session, &prog);
2505 	if (err)
2506 		goto out_err;
2507 	/* do the final flush for ordered samples */
2508 	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
2509 	if (err)
2510 		goto out_err;
2511 	err = auxtrace__flush_events(session, tool);
2512 	if (err)
2513 		goto out_err;
2514 	err = session__flush_deferred_samples(session, tool);
2515 	if (err)
2516 		goto out_err;
2517 	err = perf_session__flush_thread_stacks(session);
2518 out_err:
2519 	ui_progress__finish();
2520 	if (!tool->no_warn)
2521 		perf_session__warn_about_errors(session);
2522 	/*
2523 	 * We may switching perf.data output, make ordered_events
2524 	 * reusable.
2525 	 */
2526 	ordered_events__reinit(&session->ordered_events);
2527 	auxtrace__free_events(session);
2528 	reader__release_decomp(&rd);
2529 	session->one_mmap = false;
2530 	return err;
2531 }
2532 
2533 /*
2534  * Processing 2 MB of data from each reader in sequence,
2535  * because that's the way the ordered events sorting works
2536  * most efficiently.
2537  */
2538 #define READER_MAX_SIZE (2 * 1024 * 1024)
2539 
2540 /*
2541  * This function reads, merge and process directory data.
2542  * It assumens the version 1 of directory data, where each
2543  * data file holds per-cpu data, already sorted by kernel.
2544  */
2545 static int __perf_session__process_dir_events(struct perf_session *session)
2546 {
2547 	struct perf_data *data = session->data;
2548 	const struct perf_tool *tool = session->tool;
2549 	int i, ret, readers, nr_readers;
2550 	struct ui_progress prog;
2551 	u64 total_size = perf_data__size(session->data);
2552 	struct reader *rd;
2553 
2554 	ui_progress__init_size(&prog, total_size, "Processing events...");
2555 
2556 	nr_readers = 1;
2557 	for (i = 0; i < data->dir.nr; i++) {
2558 		if (data->dir.files[i].size)
2559 			nr_readers++;
2560 	}
2561 
2562 	rd = calloc(nr_readers, sizeof(struct reader));
2563 	if (!rd)
2564 		return -ENOMEM;
2565 
2566 	rd[0] = (struct reader) {
2567 		.fd		 = perf_data__fd(session->data),
2568 		.path		 = session->data->file.path,
2569 		.data_size	 = session->header.data_size,
2570 		.data_offset	 = session->header.data_offset,
2571 		.process	 = process_simple,
2572 		.in_place_update = session->data->in_place_update,
2573 	};
2574 	ret = reader__init(&rd[0], NULL);
2575 	if (ret)
2576 		goto out_err;
2577 	ret = reader__mmap(&rd[0], session);
2578 	if (ret)
2579 		goto out_err;
2580 	readers = 1;
2581 
2582 	for (i = 0; i < data->dir.nr; i++) {
2583 		if (!data->dir.files[i].size)
2584 			continue;
2585 		rd[readers] = (struct reader) {
2586 			.fd		 = perf_data_file__fd(&data->dir.files[i]),
2587 			.path		 = data->dir.files[i].path,
2588 			.data_size	 = data->dir.files[i].size,
2589 			.data_offset	 = 0,
2590 			.process	 = process_simple,
2591 			.in_place_update = session->data->in_place_update,
2592 		};
2593 		ret = reader__init(&rd[readers], NULL);
2594 		if (ret)
2595 			goto out_err;
2596 		ret = reader__mmap(&rd[readers], session);
2597 		if (ret)
2598 			goto out_err;
2599 		readers++;
2600 	}
2601 
2602 	i = 0;
2603 	while (readers) {
2604 		if (session_done())
2605 			break;
2606 
2607 		if (rd[i].done) {
2608 			i = (i + 1) % nr_readers;
2609 			continue;
2610 		}
2611 		if (reader__eof(&rd[i])) {
2612 			rd[i].done = true;
2613 			readers--;
2614 			continue;
2615 		}
2616 
2617 		session->active_decomp = &rd[i].decomp_data;
2618 		ret = reader__read_event(&rd[i], session, &prog);
2619 		if (ret < 0) {
2620 			goto out_err;
2621 		} else if (ret == READER_NODATA) {
2622 			ret = reader__mmap(&rd[i], session);
2623 			if (ret)
2624 				goto out_err;
2625 		}
2626 
2627 		if (rd[i].size >= READER_MAX_SIZE) {
2628 			rd[i].size = 0;
2629 			i = (i + 1) % nr_readers;
2630 		}
2631 	}
2632 
2633 	ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
2634 	if (ret)
2635 		goto out_err;
2636 
2637 	ret = session__flush_deferred_samples(session, tool);
2638 	if (ret)
2639 		goto out_err;
2640 
2641 	ret = perf_session__flush_thread_stacks(session);
2642 out_err:
2643 	ui_progress__finish();
2644 
2645 	if (!tool->no_warn)
2646 		perf_session__warn_about_errors(session);
2647 
2648 	/*
2649 	 * We may switching perf.data output, make ordered_events
2650 	 * reusable.
2651 	 */
2652 	ordered_events__reinit(&session->ordered_events);
2653 
2654 	session->one_mmap = false;
2655 
2656 	session->active_decomp = &session->decomp_data;
2657 	for (i = 0; i < nr_readers; i++)
2658 		reader__release_decomp(&rd[i]);
2659 	zfree(&rd);
2660 
2661 	return ret;
2662 }
2663 
2664 int perf_session__process_events(struct perf_session *session)
2665 {
2666 	if (perf_session__register_idle_thread(session) < 0)
2667 		return -ENOMEM;
2668 
2669 	if (perf_data__is_pipe(session->data))
2670 		return __perf_session__process_pipe_events(session);
2671 
2672 	if (perf_data__is_dir(session->data) && session->data->dir.nr)
2673 		return __perf_session__process_dir_events(session);
2674 
2675 	return __perf_session__process_events(session);
2676 }
2677 
2678 bool perf_session__has_traces(struct perf_session *session, const char *msg)
2679 {
2680 	struct evsel *evsel;
2681 
2682 	evlist__for_each_entry(session->evlist, evsel) {
2683 		if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT)
2684 			return true;
2685 	}
2686 
2687 	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
2688 	return false;
2689 }
2690 
2691 bool perf_session__has_switch_events(struct perf_session *session)
2692 {
2693 	struct evsel *evsel;
2694 
2695 	evlist__for_each_entry(session->evlist, evsel) {
2696 		if (evsel->core.attr.context_switch)
2697 			return true;
2698 	}
2699 
2700 	return false;
2701 }
2702 
2703 int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
2704 {
2705 	char *bracket, *name;
2706 	struct ref_reloc_sym *ref;
2707 	struct kmap *kmap;
2708 
2709 	ref = zalloc(sizeof(struct ref_reloc_sym));
2710 	if (ref == NULL)
2711 		return -ENOMEM;
2712 
2713 	ref->name = name = strdup(symbol_name);
2714 	if (ref->name == NULL) {
2715 		free(ref);
2716 		return -ENOMEM;
2717 	}
2718 
2719 	bracket = strchr(name, ']');
2720 	if (bracket)
2721 		*bracket = '\0';
2722 
2723 	ref->addr = addr;
2724 
2725 	kmap = map__kmap(map);
2726 	if (kmap)
2727 		kmap->ref_reloc_sym = ref;
2728 
2729 	return 0;
2730 }
2731 
2732 size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
2733 {
2734 	return machines__fprintf_dsos(&session->machines, fp);
2735 }
2736 
2737 size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
2738 					  bool (skip)(struct dso *dso, int parm), int parm)
2739 {
2740 	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
2741 }
2742 
2743 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
2744 {
2745 	size_t ret;
2746 	const char *msg = "";
2747 
2748 	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
2749 		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
2750 
2751 	ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
2752 
2753 	ret += events_stats__fprintf(&session->evlist->stats, fp);
2754 	return ret;
2755 }
2756 
2757 size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
2758 {
2759 	size_t ret = machine__fprintf(&session->machines.host, fp);
2760 
2761 	for (struct rb_node *nd = rb_first_cached(&session->machines.guests); nd; nd = rb_next(nd)) {
2762 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
2763 
2764 		ret += machine__fprintf(pos, fp);
2765 	}
2766 	return ret;
2767 }
2768 
2769 void perf_session__dump_kmaps(struct perf_session *session)
2770 {
2771 	int save_verbose = verbose;
2772 
2773 	fflush(stdout);
2774 	fprintf(stderr, "Kernel and module maps:\n");
2775 	verbose = 0; /* Suppress verbose to print a summary only */
2776 	maps__fprintf(machine__kernel_maps(&session->machines.host), stderr);
2777 	verbose = save_verbose;
2778 }
2779 
2780 struct evsel *perf_session__find_first_evtype(struct perf_session *session,
2781 					      unsigned int type)
2782 {
2783 	struct evsel *pos;
2784 
2785 	evlist__for_each_entry(session->evlist, pos) {
2786 		if (pos->core.attr.type == type)
2787 			return pos;
2788 	}
2789 	return NULL;
2790 }
2791 
2792 int perf_session__cpu_bitmap(struct perf_session *session,
2793 			     const char *cpu_list, unsigned long *cpu_bitmap)
2794 {
2795 	unsigned int i;
2796 	int err = -1;
2797 	struct perf_cpu_map *map;
2798 	int nr_cpus = min(perf_session__env(session)->nr_cpus_avail, MAX_NR_CPUS);
2799 	struct perf_cpu cpu;
2800 
2801 	for (i = 0; i < PERF_TYPE_MAX; ++i) {
2802 		struct evsel *evsel;
2803 
2804 		evsel = perf_session__find_first_evtype(session, i);
2805 		if (!evsel)
2806 			continue;
2807 
2808 		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CPU)) {
2809 			pr_err("File does not contain CPU events. "
2810 			       "Remove -C option to proceed.\n");
2811 			return -1;
2812 		}
2813 	}
2814 
2815 	map = perf_cpu_map__new(cpu_list);
2816 	if (map == NULL) {
2817 		pr_err("Invalid cpu_list\n");
2818 		return -1;
2819 	}
2820 
2821 	perf_cpu_map__for_each_cpu(cpu, i, map) {
2822 		if (cpu.cpu >= nr_cpus) {
2823 			pr_err("Requested CPU %d too large. "
2824 			       "Consider raising MAX_NR_CPUS\n", cpu.cpu);
2825 			goto out_delete_map;
2826 		}
2827 
2828 		__set_bit(cpu.cpu, cpu_bitmap);
2829 	}
2830 
2831 	err = 0;
2832 
2833 out_delete_map:
2834 	perf_cpu_map__put(map);
2835 	return err;
2836 }
2837 
2838 void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
2839 				bool full)
2840 {
2841 	if (session == NULL || fp == NULL)
2842 		return;
2843 
2844 	fprintf(fp, "# ========\n");
2845 	perf_header__fprintf_info(session, fp, full);
2846 	fprintf(fp, "# ========\n#\n");
2847 }
2848 
2849 static int perf_session__register_guest(struct perf_session *session, pid_t machine_pid)
2850 {
2851 	struct machine *machine = machines__findnew(&session->machines, machine_pid);
2852 	struct thread *thread;
2853 
2854 	if (!machine)
2855 		return -ENOMEM;
2856 
2857 	machine->single_address_space = session->machines.host.single_address_space;
2858 
2859 	thread = machine__idle_thread(machine);
2860 	if (!thread)
2861 		return -ENOMEM;
2862 	thread__put(thread);
2863 
2864 	machine->kallsyms_filename = perf_data__guest_kallsyms_name(session->data, machine_pid);
2865 
2866 	return 0;
2867 }
2868 
2869 static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid,
2870 				       pid_t tid, int guest_cpu)
2871 {
2872 	struct machine *machine = &session->machines.host;
2873 	struct thread *thread = machine__findnew_thread(machine, pid, tid);
2874 
2875 	if (!thread)
2876 		return -ENOMEM;
2877 	thread__set_guest_cpu(thread, guest_cpu);
2878 	thread__put(thread);
2879 
2880 	return 0;
2881 }
2882 
2883 int perf_event__process_id_index(const struct perf_tool *tool __maybe_unused,
2884 				 struct perf_session *session,
2885 				 union perf_event *event)
2886 {
2887 	struct evlist *evlist = session->evlist;
2888 	struct perf_record_id_index *ie = &event->id_index;
2889 	size_t sz = ie->header.size - sizeof(*ie);
2890 	size_t i, nr, max_nr;
2891 	size_t e1_sz = sizeof(struct id_index_entry);
2892 	size_t e2_sz = sizeof(struct id_index_entry_2);
2893 	size_t etot_sz = e1_sz + e2_sz;
2894 	struct id_index_entry_2 *e2;
2895 	pid_t last_pid = 0;
2896 
2897 	max_nr = sz / e1_sz;
2898 	nr = ie->nr;
2899 	if (nr > max_nr) {
2900 		printf("Too big: nr %zu max_nr %zu\n", nr, max_nr);
2901 		return -EINVAL;
2902 	}
2903 
2904 	if (sz >= nr * etot_sz) {
2905 		max_nr = sz / etot_sz;
2906 		if (nr > max_nr) {
2907 			printf("Too big2: nr %zu max_nr %zu\n", nr, max_nr);
2908 			return -EINVAL;
2909 		}
2910 		e2 = (void *)ie + sizeof(*ie) + nr * e1_sz;
2911 	} else {
2912 		e2 = NULL;
2913 	}
2914 
2915 	if (dump_trace)
2916 		fprintf(stdout, " nr: %zu\n", nr);
2917 
2918 	for (i = 0; i < nr; i++, (e2 ? e2++ : 0)) {
2919 		struct id_index_entry *e = &ie->entries[i];
2920 		struct perf_sample_id *sid;
2921 		int ret;
2922 
2923 		if (dump_trace) {
2924 			fprintf(stdout,	" ... id: %"PRI_lu64, e->id);
2925 			fprintf(stdout,	"  idx: %"PRI_lu64, e->idx);
2926 			fprintf(stdout,	"  cpu: %"PRI_ld64, e->cpu);
2927 			fprintf(stdout, "  tid: %"PRI_ld64, e->tid);
2928 			if (e2) {
2929 				fprintf(stdout, "  machine_pid: %"PRI_ld64, e2->machine_pid);
2930 				fprintf(stdout, "  vcpu: %"PRI_lu64"\n", e2->vcpu);
2931 			} else {
2932 				fprintf(stdout, "\n");
2933 			}
2934 		}
2935 
2936 		sid = evlist__id2sid(evlist, e->id);
2937 		if (!sid)
2938 			return -ENOENT;
2939 
2940 		sid->idx = e->idx;
2941 		sid->cpu.cpu = e->cpu;
2942 		sid->tid = e->tid;
2943 
2944 		if (!e2)
2945 			continue;
2946 
2947 		sid->machine_pid = e2->machine_pid;
2948 		sid->vcpu.cpu = e2->vcpu;
2949 
2950 		if (!sid->machine_pid)
2951 			continue;
2952 
2953 		if (sid->machine_pid != last_pid) {
2954 			ret = perf_session__register_guest(session, sid->machine_pid);
2955 			if (ret)
2956 				return ret;
2957 			last_pid = sid->machine_pid;
2958 			perf_guest = true;
2959 		}
2960 
2961 		ret = perf_session__set_guest_cpu(session, sid->machine_pid, e->tid, e2->vcpu);
2962 		if (ret)
2963 			return ret;
2964 	}
2965 	return 0;
2966 }
2967 
2968 int perf_session__dsos_hit_all(struct perf_session *session)
2969 {
2970 	struct rb_node *nd;
2971 	int err;
2972 
2973 	err = machine__hit_all_dsos(&session->machines.host);
2974 	if (err)
2975 		return err;
2976 
2977 	for (nd = rb_first_cached(&session->machines.guests); nd;
2978 	     nd = rb_next(nd)) {
2979 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
2980 
2981 		err = machine__hit_all_dsos(pos);
2982 		if (err)
2983 			return err;
2984 	}
2985 
2986 	return 0;
2987 }
2988 
2989 struct perf_env *perf_session__env(struct perf_session *session)
2990 {
2991 	return &session->header.env;
2992 }
2993 
2994 struct perf_session__e_machine_cb_args {
2995 	uint32_t e_flags;
2996 	uint16_t e_machine;
2997 };
2998 
2999 static int perf_session__e_machine_cb(struct thread *thread, void *_args)
3000 {
3001 	struct perf_session__e_machine_cb_args *args = _args;
3002 
3003 	args->e_machine = thread__e_machine(thread, /*machine=*/NULL, &args->e_flags);
3004 	return args->e_machine != EM_NONE ? 1 : 0;
3005 }
3006 
3007 /*
3008  * Note, a machine may have mixed 32-bit and 64-bit processes and so mixed
3009  * e_machines. Use thread__e_machine when this matters.
3010  */
3011 uint16_t perf_session__e_machine(struct perf_session *session, uint32_t *e_flags)
3012 {
3013 	struct perf_session__e_machine_cb_args args = {
3014 		.e_machine = EM_NONE,
3015 	};
3016 	struct perf_env *env;
3017 
3018 	if (!session) {
3019 		/* Default to assuming a host machine. */
3020 		if (e_flags)
3021 			*e_flags = EF_HOST;
3022 
3023 		return EM_HOST;
3024 	}
3025 
3026 	env = perf_session__env(session);
3027 	if (env && env->e_machine != EM_NONE) {
3028 		if (e_flags)
3029 			*e_flags = env->e_flags;
3030 
3031 		return env->e_machine;
3032 	}
3033 
3034 	machines__for_each_thread(&session->machines,
3035 				  perf_session__e_machine_cb,
3036 				  &args);
3037 
3038 	if (args.e_machine != EM_NONE) {
3039 		if (env) {
3040 			env->e_machine = args.e_machine;
3041 			env->e_flags = args.e_flags;
3042 		}
3043 		if (e_flags)
3044 			*e_flags = args.e_flags;
3045 
3046 		return args.e_machine;
3047 	}
3048 
3049 	/*
3050 	 * Couldn't determine from the perf_env or current set of
3051 	 * threads. Default to the host.
3052 	 */
3053 	if (e_flags)
3054 		*e_flags = EF_HOST;
3055 
3056 	return EM_HOST;
3057 }
3058