xref: /linux/tools/perf/util/intel-pt.c (revision 071bf69a0220253a44acb8b2a27f7a262b9a46bf)
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <stdio.h>
17 #include <stdbool.h>
18 #include <errno.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 
22 #include "../perf.h"
23 #include "session.h"
24 #include "machine.h"
25 #include "sort.h"
26 #include "tool.h"
27 #include "event.h"
28 #include "evlist.h"
29 #include "evsel.h"
30 #include "map.h"
31 #include "color.h"
32 #include "util.h"
33 #include "thread.h"
34 #include "thread-stack.h"
35 #include "symbol.h"
36 #include "callchain.h"
37 #include "dso.h"
38 #include "debug.h"
39 #include "auxtrace.h"
40 #include "tsc.h"
41 #include "intel-pt.h"
42 #include "config.h"
43 
44 #include "intel-pt-decoder/intel-pt-log.h"
45 #include "intel-pt-decoder/intel-pt-decoder.h"
46 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
47 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
48 
49 #define MAX_TIMESTAMP (~0ULL)
50 
51 struct intel_pt {
52 	struct auxtrace auxtrace;
53 	struct auxtrace_queues queues;
54 	struct auxtrace_heap heap;
55 	u32 auxtrace_type;
56 	struct perf_session *session;
57 	struct machine *machine;
58 	struct perf_evsel *switch_evsel;
59 	struct thread *unknown_thread;
60 	bool timeless_decoding;
61 	bool sampling_mode;
62 	bool snapshot_mode;
63 	bool per_cpu_mmaps;
64 	bool have_tsc;
65 	bool data_queued;
66 	bool est_tsc;
67 	bool sync_switch;
68 	bool mispred_all;
69 	int have_sched_switch;
70 	u32 pmu_type;
71 	u64 kernel_start;
72 	u64 switch_ip;
73 	u64 ptss_ip;
74 
75 	struct perf_tsc_conversion tc;
76 	bool cap_user_time_zero;
77 
78 	struct itrace_synth_opts synth_opts;
79 
80 	bool sample_instructions;
81 	u64 instructions_sample_type;
82 	u64 instructions_sample_period;
83 	u64 instructions_id;
84 
85 	bool sample_branches;
86 	u32 branches_filter;
87 	u64 branches_sample_type;
88 	u64 branches_id;
89 
90 	bool sample_transactions;
91 	u64 transactions_sample_type;
92 	u64 transactions_id;
93 
94 	bool synth_needs_swap;
95 
96 	u64 tsc_bit;
97 	u64 mtc_bit;
98 	u64 mtc_freq_bits;
99 	u32 tsc_ctc_ratio_n;
100 	u32 tsc_ctc_ratio_d;
101 	u64 cyc_bit;
102 	u64 noretcomp_bit;
103 	unsigned max_non_turbo_ratio;
104 
105 	unsigned long num_events;
106 };
107 
108 enum switch_state {
109 	INTEL_PT_SS_NOT_TRACING,
110 	INTEL_PT_SS_UNKNOWN,
111 	INTEL_PT_SS_TRACING,
112 	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
113 	INTEL_PT_SS_EXPECTING_SWITCH_IP,
114 };
115 
116 struct intel_pt_queue {
117 	struct intel_pt *pt;
118 	unsigned int queue_nr;
119 	struct auxtrace_buffer *buffer;
120 	void *decoder;
121 	const struct intel_pt_state *state;
122 	struct ip_callchain *chain;
123 	struct branch_stack *last_branch;
124 	struct branch_stack *last_branch_rb;
125 	size_t last_branch_pos;
126 	union perf_event *event_buf;
127 	bool on_heap;
128 	bool stop;
129 	bool step_through_buffers;
130 	bool use_buffer_pid_tid;
131 	pid_t pid, tid;
132 	int cpu;
133 	int switch_state;
134 	pid_t next_tid;
135 	struct thread *thread;
136 	bool exclude_kernel;
137 	bool have_sample;
138 	u64 time;
139 	u64 timestamp;
140 	u32 flags;
141 	u16 insn_len;
142 	u64 last_insn_cnt;
143 };
144 
145 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
146 			  unsigned char *buf, size_t len)
147 {
148 	struct intel_pt_pkt packet;
149 	size_t pos = 0;
150 	int ret, pkt_len, i;
151 	char desc[INTEL_PT_PKT_DESC_MAX];
152 	const char *color = PERF_COLOR_BLUE;
153 
154 	color_fprintf(stdout, color,
155 		      ". ... Intel Processor Trace data: size %zu bytes\n",
156 		      len);
157 
158 	while (len) {
159 		ret = intel_pt_get_packet(buf, len, &packet);
160 		if (ret > 0)
161 			pkt_len = ret;
162 		else
163 			pkt_len = 1;
164 		printf(".");
165 		color_fprintf(stdout, color, "  %08x: ", pos);
166 		for (i = 0; i < pkt_len; i++)
167 			color_fprintf(stdout, color, " %02x", buf[i]);
168 		for (; i < 16; i++)
169 			color_fprintf(stdout, color, "   ");
170 		if (ret > 0) {
171 			ret = intel_pt_pkt_desc(&packet, desc,
172 						INTEL_PT_PKT_DESC_MAX);
173 			if (ret > 0)
174 				color_fprintf(stdout, color, " %s\n", desc);
175 		} else {
176 			color_fprintf(stdout, color, " Bad packet!\n");
177 		}
178 		pos += pkt_len;
179 		buf += pkt_len;
180 		len -= pkt_len;
181 	}
182 }
183 
184 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
185 				size_t len)
186 {
187 	printf(".\n");
188 	intel_pt_dump(pt, buf, len);
189 }
190 
191 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
192 				   struct auxtrace_buffer *b)
193 {
194 	void *start;
195 
196 	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
197 				      pt->have_tsc);
198 	if (!start)
199 		return -EINVAL;
200 	b->use_size = b->data + b->size - start;
201 	b->use_data = start;
202 	return 0;
203 }
204 
205 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
206 					struct auxtrace_queue *queue,
207 					struct auxtrace_buffer *buffer)
208 {
209 	if (queue->cpu == -1 && buffer->cpu != -1)
210 		ptq->cpu = buffer->cpu;
211 
212 	ptq->pid = buffer->pid;
213 	ptq->tid = buffer->tid;
214 
215 	intel_pt_log("queue %u cpu %d pid %d tid %d\n",
216 		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
217 
218 	thread__zput(ptq->thread);
219 
220 	if (ptq->tid != -1) {
221 		if (ptq->pid != -1)
222 			ptq->thread = machine__findnew_thread(ptq->pt->machine,
223 							      ptq->pid,
224 							      ptq->tid);
225 		else
226 			ptq->thread = machine__find_thread(ptq->pt->machine, -1,
227 							   ptq->tid);
228 	}
229 }
230 
231 /* This function assumes data is processed sequentially only */
232 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
233 {
234 	struct intel_pt_queue *ptq = data;
235 	struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
236 	struct auxtrace_queue *queue;
237 
238 	if (ptq->stop) {
239 		b->len = 0;
240 		return 0;
241 	}
242 
243 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
244 
245 	buffer = auxtrace_buffer__next(queue, buffer);
246 	if (!buffer) {
247 		if (old_buffer)
248 			auxtrace_buffer__drop_data(old_buffer);
249 		b->len = 0;
250 		return 0;
251 	}
252 
253 	ptq->buffer = buffer;
254 
255 	if (!buffer->data) {
256 		int fd = perf_data_file__fd(ptq->pt->session->file);
257 
258 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
259 		if (!buffer->data)
260 			return -ENOMEM;
261 	}
262 
263 	if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
264 	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
265 		return -ENOMEM;
266 
267 	if (old_buffer)
268 		auxtrace_buffer__drop_data(old_buffer);
269 
270 	if (buffer->use_data) {
271 		b->len = buffer->use_size;
272 		b->buf = buffer->use_data;
273 	} else {
274 		b->len = buffer->size;
275 		b->buf = buffer->data;
276 	}
277 	b->ref_timestamp = buffer->reference;
278 
279 	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
280 						      !buffer->consecutive)) {
281 		b->consecutive = false;
282 		b->trace_nr = buffer->buffer_nr + 1;
283 	} else {
284 		b->consecutive = true;
285 	}
286 
287 	if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
288 					ptq->tid != buffer->tid))
289 		intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
290 
291 	if (ptq->step_through_buffers)
292 		ptq->stop = true;
293 
294 	if (!b->len)
295 		return intel_pt_get_trace(b, data);
296 
297 	return 0;
298 }
299 
300 struct intel_pt_cache_entry {
301 	struct auxtrace_cache_entry	entry;
302 	u64				insn_cnt;
303 	u64				byte_cnt;
304 	enum intel_pt_insn_op		op;
305 	enum intel_pt_insn_branch	branch;
306 	int				length;
307 	int32_t				rel;
308 };
309 
310 static int intel_pt_config_div(const char *var, const char *value, void *data)
311 {
312 	int *d = data;
313 	long val;
314 
315 	if (!strcmp(var, "intel-pt.cache-divisor")) {
316 		val = strtol(value, NULL, 0);
317 		if (val > 0 && val <= INT_MAX)
318 			*d = val;
319 	}
320 
321 	return 0;
322 }
323 
324 static int intel_pt_cache_divisor(void)
325 {
326 	static int d;
327 
328 	if (d)
329 		return d;
330 
331 	perf_config(intel_pt_config_div, &d);
332 
333 	if (!d)
334 		d = 64;
335 
336 	return d;
337 }
338 
339 static unsigned int intel_pt_cache_size(struct dso *dso,
340 					struct machine *machine)
341 {
342 	off_t size;
343 
344 	size = dso__data_size(dso, machine);
345 	size /= intel_pt_cache_divisor();
346 	if (size < 1000)
347 		return 10;
348 	if (size > (1 << 21))
349 		return 21;
350 	return 32 - __builtin_clz(size);
351 }
352 
353 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
354 					     struct machine *machine)
355 {
356 	struct auxtrace_cache *c;
357 	unsigned int bits;
358 
359 	if (dso->auxtrace_cache)
360 		return dso->auxtrace_cache;
361 
362 	bits = intel_pt_cache_size(dso, machine);
363 
364 	/* Ignoring cache creation failure */
365 	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
366 
367 	dso->auxtrace_cache = c;
368 
369 	return c;
370 }
371 
372 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
373 			      u64 offset, u64 insn_cnt, u64 byte_cnt,
374 			      struct intel_pt_insn *intel_pt_insn)
375 {
376 	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
377 	struct intel_pt_cache_entry *e;
378 	int err;
379 
380 	if (!c)
381 		return -ENOMEM;
382 
383 	e = auxtrace_cache__alloc_entry(c);
384 	if (!e)
385 		return -ENOMEM;
386 
387 	e->insn_cnt = insn_cnt;
388 	e->byte_cnt = byte_cnt;
389 	e->op = intel_pt_insn->op;
390 	e->branch = intel_pt_insn->branch;
391 	e->length = intel_pt_insn->length;
392 	e->rel = intel_pt_insn->rel;
393 
394 	err = auxtrace_cache__add(c, offset, &e->entry);
395 	if (err)
396 		auxtrace_cache__free_entry(c, e);
397 
398 	return err;
399 }
400 
401 static struct intel_pt_cache_entry *
402 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
403 {
404 	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
405 
406 	if (!c)
407 		return NULL;
408 
409 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
410 }
411 
412 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
413 				   uint64_t *insn_cnt_ptr, uint64_t *ip,
414 				   uint64_t to_ip, uint64_t max_insn_cnt,
415 				   void *data)
416 {
417 	struct intel_pt_queue *ptq = data;
418 	struct machine *machine = ptq->pt->machine;
419 	struct thread *thread;
420 	struct addr_location al;
421 	unsigned char buf[1024];
422 	size_t bufsz;
423 	ssize_t len;
424 	int x86_64;
425 	u8 cpumode;
426 	u64 offset, start_offset, start_ip;
427 	u64 insn_cnt = 0;
428 	bool one_map = true;
429 
430 	if (to_ip && *ip == to_ip)
431 		goto out_no_cache;
432 
433 	bufsz = intel_pt_insn_max_size();
434 
435 	if (*ip >= ptq->pt->kernel_start)
436 		cpumode = PERF_RECORD_MISC_KERNEL;
437 	else
438 		cpumode = PERF_RECORD_MISC_USER;
439 
440 	thread = ptq->thread;
441 	if (!thread) {
442 		if (cpumode != PERF_RECORD_MISC_KERNEL)
443 			return -EINVAL;
444 		thread = ptq->pt->unknown_thread;
445 	}
446 
447 	while (1) {
448 		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
449 		if (!al.map || !al.map->dso)
450 			return -EINVAL;
451 
452 		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
453 		    dso__data_status_seen(al.map->dso,
454 					  DSO_DATA_STATUS_SEEN_ITRACE))
455 			return -ENOENT;
456 
457 		offset = al.map->map_ip(al.map, *ip);
458 
459 		if (!to_ip && one_map) {
460 			struct intel_pt_cache_entry *e;
461 
462 			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
463 			if (e &&
464 			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
465 				*insn_cnt_ptr = e->insn_cnt;
466 				*ip += e->byte_cnt;
467 				intel_pt_insn->op = e->op;
468 				intel_pt_insn->branch = e->branch;
469 				intel_pt_insn->length = e->length;
470 				intel_pt_insn->rel = e->rel;
471 				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
472 				return 0;
473 			}
474 		}
475 
476 		start_offset = offset;
477 		start_ip = *ip;
478 
479 		/* Load maps to ensure dso->is_64_bit has been updated */
480 		map__load(al.map, machine->symbol_filter);
481 
482 		x86_64 = al.map->dso->is_64_bit;
483 
484 		while (1) {
485 			len = dso__data_read_offset(al.map->dso, machine,
486 						    offset, buf, bufsz);
487 			if (len <= 0)
488 				return -EINVAL;
489 
490 			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
491 				return -EINVAL;
492 
493 			intel_pt_log_insn(intel_pt_insn, *ip);
494 
495 			insn_cnt += 1;
496 
497 			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
498 				goto out;
499 
500 			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
501 				goto out_no_cache;
502 
503 			*ip += intel_pt_insn->length;
504 
505 			if (to_ip && *ip == to_ip)
506 				goto out_no_cache;
507 
508 			if (*ip >= al.map->end)
509 				break;
510 
511 			offset += intel_pt_insn->length;
512 		}
513 		one_map = false;
514 	}
515 out:
516 	*insn_cnt_ptr = insn_cnt;
517 
518 	if (!one_map)
519 		goto out_no_cache;
520 
521 	/*
522 	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
523 	 * entries.
524 	 */
525 	if (to_ip) {
526 		struct intel_pt_cache_entry *e;
527 
528 		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
529 		if (e)
530 			return 0;
531 	}
532 
533 	/* Ignore cache errors */
534 	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
535 			   *ip - start_ip, intel_pt_insn);
536 
537 	return 0;
538 
539 out_no_cache:
540 	*insn_cnt_ptr = insn_cnt;
541 	return 0;
542 }
543 
544 static bool intel_pt_get_config(struct intel_pt *pt,
545 				struct perf_event_attr *attr, u64 *config)
546 {
547 	if (attr->type == pt->pmu_type) {
548 		if (config)
549 			*config = attr->config;
550 		return true;
551 	}
552 
553 	return false;
554 }
555 
556 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
557 {
558 	struct perf_evsel *evsel;
559 
560 	evlist__for_each_entry(pt->session->evlist, evsel) {
561 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
562 		    !evsel->attr.exclude_kernel)
563 			return false;
564 	}
565 	return true;
566 }
567 
568 static bool intel_pt_return_compression(struct intel_pt *pt)
569 {
570 	struct perf_evsel *evsel;
571 	u64 config;
572 
573 	if (!pt->noretcomp_bit)
574 		return true;
575 
576 	evlist__for_each_entry(pt->session->evlist, evsel) {
577 		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
578 		    (config & pt->noretcomp_bit))
579 			return false;
580 	}
581 	return true;
582 }
583 
584 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
585 {
586 	struct perf_evsel *evsel;
587 	unsigned int shift;
588 	u64 config;
589 
590 	if (!pt->mtc_freq_bits)
591 		return 0;
592 
593 	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
594 		config >>= 1;
595 
596 	evlist__for_each_entry(pt->session->evlist, evsel) {
597 		if (intel_pt_get_config(pt, &evsel->attr, &config))
598 			return (config & pt->mtc_freq_bits) >> shift;
599 	}
600 	return 0;
601 }
602 
603 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
604 {
605 	struct perf_evsel *evsel;
606 	bool timeless_decoding = true;
607 	u64 config;
608 
609 	if (!pt->tsc_bit || !pt->cap_user_time_zero)
610 		return true;
611 
612 	evlist__for_each_entry(pt->session->evlist, evsel) {
613 		if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
614 			return true;
615 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
616 			if (config & pt->tsc_bit)
617 				timeless_decoding = false;
618 			else
619 				return true;
620 		}
621 	}
622 	return timeless_decoding;
623 }
624 
625 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
626 {
627 	struct perf_evsel *evsel;
628 
629 	evlist__for_each_entry(pt->session->evlist, evsel) {
630 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
631 		    !evsel->attr.exclude_kernel)
632 			return true;
633 	}
634 	return false;
635 }
636 
637 static bool intel_pt_have_tsc(struct intel_pt *pt)
638 {
639 	struct perf_evsel *evsel;
640 	bool have_tsc = false;
641 	u64 config;
642 
643 	if (!pt->tsc_bit)
644 		return false;
645 
646 	evlist__for_each_entry(pt->session->evlist, evsel) {
647 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
648 			if (config & pt->tsc_bit)
649 				have_tsc = true;
650 			else
651 				return false;
652 		}
653 	}
654 	return have_tsc;
655 }
656 
657 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
658 {
659 	u64 quot, rem;
660 
661 	quot = ns / pt->tc.time_mult;
662 	rem  = ns % pt->tc.time_mult;
663 	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
664 		pt->tc.time_mult;
665 }
666 
667 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
668 						   unsigned int queue_nr)
669 {
670 	struct intel_pt_params params = { .get_trace = 0, };
671 	struct intel_pt_queue *ptq;
672 
673 	ptq = zalloc(sizeof(struct intel_pt_queue));
674 	if (!ptq)
675 		return NULL;
676 
677 	if (pt->synth_opts.callchain) {
678 		size_t sz = sizeof(struct ip_callchain);
679 
680 		sz += pt->synth_opts.callchain_sz * sizeof(u64);
681 		ptq->chain = zalloc(sz);
682 		if (!ptq->chain)
683 			goto out_free;
684 	}
685 
686 	if (pt->synth_opts.last_branch) {
687 		size_t sz = sizeof(struct branch_stack);
688 
689 		sz += pt->synth_opts.last_branch_sz *
690 		      sizeof(struct branch_entry);
691 		ptq->last_branch = zalloc(sz);
692 		if (!ptq->last_branch)
693 			goto out_free;
694 		ptq->last_branch_rb = zalloc(sz);
695 		if (!ptq->last_branch_rb)
696 			goto out_free;
697 	}
698 
699 	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
700 	if (!ptq->event_buf)
701 		goto out_free;
702 
703 	ptq->pt = pt;
704 	ptq->queue_nr = queue_nr;
705 	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
706 	ptq->pid = -1;
707 	ptq->tid = -1;
708 	ptq->cpu = -1;
709 	ptq->next_tid = -1;
710 
711 	params.get_trace = intel_pt_get_trace;
712 	params.walk_insn = intel_pt_walk_next_insn;
713 	params.data = ptq;
714 	params.return_compression = intel_pt_return_compression(pt);
715 	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
716 	params.mtc_period = intel_pt_mtc_period(pt);
717 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
718 	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
719 
720 	if (pt->synth_opts.instructions) {
721 		if (pt->synth_opts.period) {
722 			switch (pt->synth_opts.period_type) {
723 			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
724 				params.period_type =
725 						INTEL_PT_PERIOD_INSTRUCTIONS;
726 				params.period = pt->synth_opts.period;
727 				break;
728 			case PERF_ITRACE_PERIOD_TICKS:
729 				params.period_type = INTEL_PT_PERIOD_TICKS;
730 				params.period = pt->synth_opts.period;
731 				break;
732 			case PERF_ITRACE_PERIOD_NANOSECS:
733 				params.period_type = INTEL_PT_PERIOD_TICKS;
734 				params.period = intel_pt_ns_to_ticks(pt,
735 							pt->synth_opts.period);
736 				break;
737 			default:
738 				break;
739 			}
740 		}
741 
742 		if (!params.period) {
743 			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
744 			params.period = 1;
745 		}
746 	}
747 
748 	ptq->decoder = intel_pt_decoder_new(&params);
749 	if (!ptq->decoder)
750 		goto out_free;
751 
752 	return ptq;
753 
754 out_free:
755 	zfree(&ptq->event_buf);
756 	zfree(&ptq->last_branch);
757 	zfree(&ptq->last_branch_rb);
758 	zfree(&ptq->chain);
759 	free(ptq);
760 	return NULL;
761 }
762 
763 static void intel_pt_free_queue(void *priv)
764 {
765 	struct intel_pt_queue *ptq = priv;
766 
767 	if (!ptq)
768 		return;
769 	thread__zput(ptq->thread);
770 	intel_pt_decoder_free(ptq->decoder);
771 	zfree(&ptq->event_buf);
772 	zfree(&ptq->last_branch);
773 	zfree(&ptq->last_branch_rb);
774 	zfree(&ptq->chain);
775 	free(ptq);
776 }
777 
778 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
779 				     struct auxtrace_queue *queue)
780 {
781 	struct intel_pt_queue *ptq = queue->priv;
782 
783 	if (queue->tid == -1 || pt->have_sched_switch) {
784 		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
785 		thread__zput(ptq->thread);
786 	}
787 
788 	if (!ptq->thread && ptq->tid != -1)
789 		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
790 
791 	if (ptq->thread) {
792 		ptq->pid = ptq->thread->pid_;
793 		if (queue->cpu == -1)
794 			ptq->cpu = ptq->thread->cpu;
795 	}
796 }
797 
798 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
799 {
800 	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
801 		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
802 	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
803 		if (ptq->state->to_ip)
804 			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
805 				     PERF_IP_FLAG_ASYNC |
806 				     PERF_IP_FLAG_INTERRUPT;
807 		else
808 			ptq->flags = PERF_IP_FLAG_BRANCH |
809 				     PERF_IP_FLAG_TRACE_END;
810 		ptq->insn_len = 0;
811 	} else {
812 		if (ptq->state->from_ip)
813 			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
814 		else
815 			ptq->flags = PERF_IP_FLAG_BRANCH |
816 				     PERF_IP_FLAG_TRACE_BEGIN;
817 		if (ptq->state->flags & INTEL_PT_IN_TX)
818 			ptq->flags |= PERF_IP_FLAG_IN_TX;
819 		ptq->insn_len = ptq->state->insn_len;
820 	}
821 }
822 
823 static int intel_pt_setup_queue(struct intel_pt *pt,
824 				struct auxtrace_queue *queue,
825 				unsigned int queue_nr)
826 {
827 	struct intel_pt_queue *ptq = queue->priv;
828 
829 	if (list_empty(&queue->head))
830 		return 0;
831 
832 	if (!ptq) {
833 		ptq = intel_pt_alloc_queue(pt, queue_nr);
834 		if (!ptq)
835 			return -ENOMEM;
836 		queue->priv = ptq;
837 
838 		if (queue->cpu != -1)
839 			ptq->cpu = queue->cpu;
840 		ptq->tid = queue->tid;
841 
842 		if (pt->sampling_mode) {
843 			if (pt->timeless_decoding)
844 				ptq->step_through_buffers = true;
845 			if (pt->timeless_decoding || !pt->have_sched_switch)
846 				ptq->use_buffer_pid_tid = true;
847 		}
848 	}
849 
850 	if (!ptq->on_heap &&
851 	    (!pt->sync_switch ||
852 	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
853 		const struct intel_pt_state *state;
854 		int ret;
855 
856 		if (pt->timeless_decoding)
857 			return 0;
858 
859 		intel_pt_log("queue %u getting timestamp\n", queue_nr);
860 		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
861 			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
862 		while (1) {
863 			state = intel_pt_decode(ptq->decoder);
864 			if (state->err) {
865 				if (state->err == INTEL_PT_ERR_NODATA) {
866 					intel_pt_log("queue %u has no timestamp\n",
867 						     queue_nr);
868 					return 0;
869 				}
870 				continue;
871 			}
872 			if (state->timestamp)
873 				break;
874 		}
875 
876 		ptq->timestamp = state->timestamp;
877 		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
878 			     queue_nr, ptq->timestamp);
879 		ptq->state = state;
880 		ptq->have_sample = true;
881 		intel_pt_sample_flags(ptq);
882 		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
883 		if (ret)
884 			return ret;
885 		ptq->on_heap = true;
886 	}
887 
888 	return 0;
889 }
890 
891 static int intel_pt_setup_queues(struct intel_pt *pt)
892 {
893 	unsigned int i;
894 	int ret;
895 
896 	for (i = 0; i < pt->queues.nr_queues; i++) {
897 		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
898 		if (ret)
899 			return ret;
900 	}
901 	return 0;
902 }
903 
904 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
905 {
906 	struct branch_stack *bs_src = ptq->last_branch_rb;
907 	struct branch_stack *bs_dst = ptq->last_branch;
908 	size_t nr = 0;
909 
910 	bs_dst->nr = bs_src->nr;
911 
912 	if (!bs_src->nr)
913 		return;
914 
915 	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
916 	memcpy(&bs_dst->entries[0],
917 	       &bs_src->entries[ptq->last_branch_pos],
918 	       sizeof(struct branch_entry) * nr);
919 
920 	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
921 		memcpy(&bs_dst->entries[nr],
922 		       &bs_src->entries[0],
923 		       sizeof(struct branch_entry) * ptq->last_branch_pos);
924 	}
925 }
926 
927 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
928 {
929 	ptq->last_branch_pos = 0;
930 	ptq->last_branch_rb->nr = 0;
931 }
932 
933 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
934 {
935 	const struct intel_pt_state *state = ptq->state;
936 	struct branch_stack *bs = ptq->last_branch_rb;
937 	struct branch_entry *be;
938 
939 	if (!ptq->last_branch_pos)
940 		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
941 
942 	ptq->last_branch_pos -= 1;
943 
944 	be              = &bs->entries[ptq->last_branch_pos];
945 	be->from        = state->from_ip;
946 	be->to          = state->to_ip;
947 	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
948 	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
949 	/* No support for mispredict */
950 	be->flags.mispred = ptq->pt->mispred_all;
951 
952 	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
953 		bs->nr += 1;
954 }
955 
956 static int intel_pt_inject_event(union perf_event *event,
957 				 struct perf_sample *sample, u64 type,
958 				 bool swapped)
959 {
960 	event->header.size = perf_event__sample_event_size(sample, type, 0);
961 	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
962 }
963 
964 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
965 {
966 	int ret;
967 	struct intel_pt *pt = ptq->pt;
968 	union perf_event *event = ptq->event_buf;
969 	struct perf_sample sample = { .ip = 0, };
970 	struct dummy_branch_stack {
971 		u64			nr;
972 		struct branch_entry	entries;
973 	} dummy_bs;
974 
975 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
976 		return 0;
977 
978 	if (pt->synth_opts.initial_skip &&
979 	    pt->num_events++ < pt->synth_opts.initial_skip)
980 		return 0;
981 
982 	event->sample.header.type = PERF_RECORD_SAMPLE;
983 	event->sample.header.misc = PERF_RECORD_MISC_USER;
984 	event->sample.header.size = sizeof(struct perf_event_header);
985 
986 	if (!pt->timeless_decoding)
987 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
988 
989 	sample.cpumode = PERF_RECORD_MISC_USER;
990 	sample.ip = ptq->state->from_ip;
991 	sample.pid = ptq->pid;
992 	sample.tid = ptq->tid;
993 	sample.addr = ptq->state->to_ip;
994 	sample.id = ptq->pt->branches_id;
995 	sample.stream_id = ptq->pt->branches_id;
996 	sample.period = 1;
997 	sample.cpu = ptq->cpu;
998 	sample.flags = ptq->flags;
999 	sample.insn_len = ptq->insn_len;
1000 
1001 	/*
1002 	 * perf report cannot handle events without a branch stack when using
1003 	 * SORT_MODE__BRANCH so make a dummy one.
1004 	 */
1005 	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1006 		dummy_bs = (struct dummy_branch_stack){
1007 			.nr = 1,
1008 			.entries = {
1009 				.from = sample.ip,
1010 				.to = sample.addr,
1011 			},
1012 		};
1013 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1014 	}
1015 
1016 	if (pt->synth_opts.inject) {
1017 		ret = intel_pt_inject_event(event, &sample,
1018 					    pt->branches_sample_type,
1019 					    pt->synth_needs_swap);
1020 		if (ret)
1021 			return ret;
1022 	}
1023 
1024 	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1025 	if (ret)
1026 		pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
1027 		       ret);
1028 
1029 	return ret;
1030 }
1031 
1032 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1033 {
1034 	int ret;
1035 	struct intel_pt *pt = ptq->pt;
1036 	union perf_event *event = ptq->event_buf;
1037 	struct perf_sample sample = { .ip = 0, };
1038 
1039 	if (pt->synth_opts.initial_skip &&
1040 	    pt->num_events++ < pt->synth_opts.initial_skip)
1041 		return 0;
1042 
1043 	event->sample.header.type = PERF_RECORD_SAMPLE;
1044 	event->sample.header.misc = PERF_RECORD_MISC_USER;
1045 	event->sample.header.size = sizeof(struct perf_event_header);
1046 
1047 	if (!pt->timeless_decoding)
1048 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1049 
1050 	sample.cpumode = PERF_RECORD_MISC_USER;
1051 	sample.ip = ptq->state->from_ip;
1052 	sample.pid = ptq->pid;
1053 	sample.tid = ptq->tid;
1054 	sample.addr = ptq->state->to_ip;
1055 	sample.id = ptq->pt->instructions_id;
1056 	sample.stream_id = ptq->pt->instructions_id;
1057 	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1058 	sample.cpu = ptq->cpu;
1059 	sample.flags = ptq->flags;
1060 	sample.insn_len = ptq->insn_len;
1061 
1062 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1063 
1064 	if (pt->synth_opts.callchain) {
1065 		thread_stack__sample(ptq->thread, ptq->chain,
1066 				     pt->synth_opts.callchain_sz, sample.ip);
1067 		sample.callchain = ptq->chain;
1068 	}
1069 
1070 	if (pt->synth_opts.last_branch) {
1071 		intel_pt_copy_last_branch_rb(ptq);
1072 		sample.branch_stack = ptq->last_branch;
1073 	}
1074 
1075 	if (pt->synth_opts.inject) {
1076 		ret = intel_pt_inject_event(event, &sample,
1077 					    pt->instructions_sample_type,
1078 					    pt->synth_needs_swap);
1079 		if (ret)
1080 			return ret;
1081 	}
1082 
1083 	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1084 	if (ret)
1085 		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
1086 		       ret);
1087 
1088 	if (pt->synth_opts.last_branch)
1089 		intel_pt_reset_last_branch_rb(ptq);
1090 
1091 	return ret;
1092 }
1093 
1094 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1095 {
1096 	int ret;
1097 	struct intel_pt *pt = ptq->pt;
1098 	union perf_event *event = ptq->event_buf;
1099 	struct perf_sample sample = { .ip = 0, };
1100 
1101 	if (pt->synth_opts.initial_skip &&
1102 	    pt->num_events++ < pt->synth_opts.initial_skip)
1103 		return 0;
1104 
1105 	event->sample.header.type = PERF_RECORD_SAMPLE;
1106 	event->sample.header.misc = PERF_RECORD_MISC_USER;
1107 	event->sample.header.size = sizeof(struct perf_event_header);
1108 
1109 	if (!pt->timeless_decoding)
1110 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1111 
1112 	sample.cpumode = PERF_RECORD_MISC_USER;
1113 	sample.ip = ptq->state->from_ip;
1114 	sample.pid = ptq->pid;
1115 	sample.tid = ptq->tid;
1116 	sample.addr = ptq->state->to_ip;
1117 	sample.id = ptq->pt->transactions_id;
1118 	sample.stream_id = ptq->pt->transactions_id;
1119 	sample.period = 1;
1120 	sample.cpu = ptq->cpu;
1121 	sample.flags = ptq->flags;
1122 	sample.insn_len = ptq->insn_len;
1123 
1124 	if (pt->synth_opts.callchain) {
1125 		thread_stack__sample(ptq->thread, ptq->chain,
1126 				     pt->synth_opts.callchain_sz, sample.ip);
1127 		sample.callchain = ptq->chain;
1128 	}
1129 
1130 	if (pt->synth_opts.last_branch) {
1131 		intel_pt_copy_last_branch_rb(ptq);
1132 		sample.branch_stack = ptq->last_branch;
1133 	}
1134 
1135 	if (pt->synth_opts.inject) {
1136 		ret = intel_pt_inject_event(event, &sample,
1137 					    pt->transactions_sample_type,
1138 					    pt->synth_needs_swap);
1139 		if (ret)
1140 			return ret;
1141 	}
1142 
1143 	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1144 	if (ret)
1145 		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1146 		       ret);
1147 
1148 	if (pt->synth_opts.last_branch)
1149 		intel_pt_reset_last_branch_rb(ptq);
1150 
1151 	return ret;
1152 }
1153 
1154 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1155 				pid_t pid, pid_t tid, u64 ip)
1156 {
1157 	union perf_event event;
1158 	char msg[MAX_AUXTRACE_ERROR_MSG];
1159 	int err;
1160 
1161 	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1162 
1163 	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1164 			     code, cpu, pid, tid, ip, msg);
1165 
1166 	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1167 	if (err)
1168 		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1169 		       err);
1170 
1171 	return err;
1172 }
1173 
1174 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1175 {
1176 	struct auxtrace_queue *queue;
1177 	pid_t tid = ptq->next_tid;
1178 	int err;
1179 
1180 	if (tid == -1)
1181 		return 0;
1182 
1183 	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1184 
1185 	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1186 
1187 	queue = &pt->queues.queue_array[ptq->queue_nr];
1188 	intel_pt_set_pid_tid_cpu(pt, queue);
1189 
1190 	ptq->next_tid = -1;
1191 
1192 	return err;
1193 }
1194 
1195 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1196 {
1197 	struct intel_pt *pt = ptq->pt;
1198 
1199 	return ip == pt->switch_ip &&
1200 	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1201 	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1202 			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1203 }
1204 
1205 static int intel_pt_sample(struct intel_pt_queue *ptq)
1206 {
1207 	const struct intel_pt_state *state = ptq->state;
1208 	struct intel_pt *pt = ptq->pt;
1209 	int err;
1210 
1211 	if (!ptq->have_sample)
1212 		return 0;
1213 
1214 	ptq->have_sample = false;
1215 
1216 	if (pt->sample_instructions &&
1217 	    (state->type & INTEL_PT_INSTRUCTION) &&
1218 	    (!pt->synth_opts.initial_skip ||
1219 	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
1220 		err = intel_pt_synth_instruction_sample(ptq);
1221 		if (err)
1222 			return err;
1223 	}
1224 
1225 	if (pt->sample_transactions &&
1226 	    (state->type & INTEL_PT_TRANSACTION) &&
1227 	    (!pt->synth_opts.initial_skip ||
1228 	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
1229 		err = intel_pt_synth_transaction_sample(ptq);
1230 		if (err)
1231 			return err;
1232 	}
1233 
1234 	if (!(state->type & INTEL_PT_BRANCH))
1235 		return 0;
1236 
1237 	if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1238 		thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1239 				    state->to_ip, ptq->insn_len,
1240 				    state->trace_nr);
1241 	else
1242 		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1243 
1244 	if (pt->sample_branches) {
1245 		err = intel_pt_synth_branch_sample(ptq);
1246 		if (err)
1247 			return err;
1248 	}
1249 
1250 	if (pt->synth_opts.last_branch)
1251 		intel_pt_update_last_branch_rb(ptq);
1252 
1253 	if (!pt->sync_switch)
1254 		return 0;
1255 
1256 	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1257 		switch (ptq->switch_state) {
1258 		case INTEL_PT_SS_UNKNOWN:
1259 		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1260 			err = intel_pt_next_tid(pt, ptq);
1261 			if (err)
1262 				return err;
1263 			ptq->switch_state = INTEL_PT_SS_TRACING;
1264 			break;
1265 		default:
1266 			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1267 			return 1;
1268 		}
1269 	} else if (!state->to_ip) {
1270 		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1271 	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1272 		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1273 	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1274 		   state->to_ip == pt->ptss_ip &&
1275 		   (ptq->flags & PERF_IP_FLAG_CALL)) {
1276 		ptq->switch_state = INTEL_PT_SS_TRACING;
1277 	}
1278 
1279 	return 0;
1280 }
1281 
1282 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1283 {
1284 	struct machine *machine = pt->machine;
1285 	struct map *map;
1286 	struct symbol *sym, *start;
1287 	u64 ip, switch_ip = 0;
1288 	const char *ptss;
1289 
1290 	if (ptss_ip)
1291 		*ptss_ip = 0;
1292 
1293 	map = machine__kernel_map(machine);
1294 	if (!map)
1295 		return 0;
1296 
1297 	if (map__load(map, machine->symbol_filter))
1298 		return 0;
1299 
1300 	start = dso__first_symbol(map->dso, MAP__FUNCTION);
1301 
1302 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
1303 		if (sym->binding == STB_GLOBAL &&
1304 		    !strcmp(sym->name, "__switch_to")) {
1305 			ip = map->unmap_ip(map, sym->start);
1306 			if (ip >= map->start && ip < map->end) {
1307 				switch_ip = ip;
1308 				break;
1309 			}
1310 		}
1311 	}
1312 
1313 	if (!switch_ip || !ptss_ip)
1314 		return 0;
1315 
1316 	if (pt->have_sched_switch == 1)
1317 		ptss = "perf_trace_sched_switch";
1318 	else
1319 		ptss = "__perf_event_task_sched_out";
1320 
1321 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
1322 		if (!strcmp(sym->name, ptss)) {
1323 			ip = map->unmap_ip(map, sym->start);
1324 			if (ip >= map->start && ip < map->end) {
1325 				*ptss_ip = ip;
1326 				break;
1327 			}
1328 		}
1329 	}
1330 
1331 	return switch_ip;
1332 }
1333 
1334 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1335 {
1336 	const struct intel_pt_state *state = ptq->state;
1337 	struct intel_pt *pt = ptq->pt;
1338 	int err;
1339 
1340 	if (!pt->kernel_start) {
1341 		pt->kernel_start = machine__kernel_start(pt->machine);
1342 		if (pt->per_cpu_mmaps &&
1343 		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1344 		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1345 		    !pt->sampling_mode) {
1346 			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1347 			if (pt->switch_ip) {
1348 				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1349 					     pt->switch_ip, pt->ptss_ip);
1350 				pt->sync_switch = true;
1351 			}
1352 		}
1353 	}
1354 
1355 	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1356 		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1357 	while (1) {
1358 		err = intel_pt_sample(ptq);
1359 		if (err)
1360 			return err;
1361 
1362 		state = intel_pt_decode(ptq->decoder);
1363 		if (state->err) {
1364 			if (state->err == INTEL_PT_ERR_NODATA)
1365 				return 1;
1366 			if (pt->sync_switch &&
1367 			    state->from_ip >= pt->kernel_start) {
1368 				pt->sync_switch = false;
1369 				intel_pt_next_tid(pt, ptq);
1370 			}
1371 			if (pt->synth_opts.errors) {
1372 				err = intel_pt_synth_error(pt, state->err,
1373 							   ptq->cpu, ptq->pid,
1374 							   ptq->tid,
1375 							   state->from_ip);
1376 				if (err)
1377 					return err;
1378 			}
1379 			continue;
1380 		}
1381 
1382 		ptq->state = state;
1383 		ptq->have_sample = true;
1384 		intel_pt_sample_flags(ptq);
1385 
1386 		/* Use estimated TSC upon return to user space */
1387 		if (pt->est_tsc &&
1388 		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1389 		    state->to_ip && state->to_ip < pt->kernel_start) {
1390 			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1391 				     state->timestamp, state->est_timestamp);
1392 			ptq->timestamp = state->est_timestamp;
1393 		/* Use estimated TSC in unknown switch state */
1394 		} else if (pt->sync_switch &&
1395 			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1396 			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
1397 			   ptq->next_tid == -1) {
1398 			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1399 				     state->timestamp, state->est_timestamp);
1400 			ptq->timestamp = state->est_timestamp;
1401 		} else if (state->timestamp > ptq->timestamp) {
1402 			ptq->timestamp = state->timestamp;
1403 		}
1404 
1405 		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1406 			*timestamp = ptq->timestamp;
1407 			return 0;
1408 		}
1409 	}
1410 	return 0;
1411 }
1412 
1413 static inline int intel_pt_update_queues(struct intel_pt *pt)
1414 {
1415 	if (pt->queues.new_data) {
1416 		pt->queues.new_data = false;
1417 		return intel_pt_setup_queues(pt);
1418 	}
1419 	return 0;
1420 }
1421 
1422 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1423 {
1424 	unsigned int queue_nr;
1425 	u64 ts;
1426 	int ret;
1427 
1428 	while (1) {
1429 		struct auxtrace_queue *queue;
1430 		struct intel_pt_queue *ptq;
1431 
1432 		if (!pt->heap.heap_cnt)
1433 			return 0;
1434 
1435 		if (pt->heap.heap_array[0].ordinal >= timestamp)
1436 			return 0;
1437 
1438 		queue_nr = pt->heap.heap_array[0].queue_nr;
1439 		queue = &pt->queues.queue_array[queue_nr];
1440 		ptq = queue->priv;
1441 
1442 		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1443 			     queue_nr, pt->heap.heap_array[0].ordinal,
1444 			     timestamp);
1445 
1446 		auxtrace_heap__pop(&pt->heap);
1447 
1448 		if (pt->heap.heap_cnt) {
1449 			ts = pt->heap.heap_array[0].ordinal + 1;
1450 			if (ts > timestamp)
1451 				ts = timestamp;
1452 		} else {
1453 			ts = timestamp;
1454 		}
1455 
1456 		intel_pt_set_pid_tid_cpu(pt, queue);
1457 
1458 		ret = intel_pt_run_decoder(ptq, &ts);
1459 
1460 		if (ret < 0) {
1461 			auxtrace_heap__add(&pt->heap, queue_nr, ts);
1462 			return ret;
1463 		}
1464 
1465 		if (!ret) {
1466 			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1467 			if (ret < 0)
1468 				return ret;
1469 		} else {
1470 			ptq->on_heap = false;
1471 		}
1472 	}
1473 
1474 	return 0;
1475 }
1476 
1477 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1478 					    u64 time_)
1479 {
1480 	struct auxtrace_queues *queues = &pt->queues;
1481 	unsigned int i;
1482 	u64 ts = 0;
1483 
1484 	for (i = 0; i < queues->nr_queues; i++) {
1485 		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1486 		struct intel_pt_queue *ptq = queue->priv;
1487 
1488 		if (ptq && (tid == -1 || ptq->tid == tid)) {
1489 			ptq->time = time_;
1490 			intel_pt_set_pid_tid_cpu(pt, queue);
1491 			intel_pt_run_decoder(ptq, &ts);
1492 		}
1493 	}
1494 	return 0;
1495 }
1496 
1497 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1498 {
1499 	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1500 				    sample->pid, sample->tid, 0);
1501 }
1502 
1503 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1504 {
1505 	unsigned i, j;
1506 
1507 	if (cpu < 0 || !pt->queues.nr_queues)
1508 		return NULL;
1509 
1510 	if ((unsigned)cpu >= pt->queues.nr_queues)
1511 		i = pt->queues.nr_queues - 1;
1512 	else
1513 		i = cpu;
1514 
1515 	if (pt->queues.queue_array[i].cpu == cpu)
1516 		return pt->queues.queue_array[i].priv;
1517 
1518 	for (j = 0; i > 0; j++) {
1519 		if (pt->queues.queue_array[--i].cpu == cpu)
1520 			return pt->queues.queue_array[i].priv;
1521 	}
1522 
1523 	for (; j < pt->queues.nr_queues; j++) {
1524 		if (pt->queues.queue_array[j].cpu == cpu)
1525 			return pt->queues.queue_array[j].priv;
1526 	}
1527 
1528 	return NULL;
1529 }
1530 
1531 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1532 				u64 timestamp)
1533 {
1534 	struct intel_pt_queue *ptq;
1535 	int err;
1536 
1537 	if (!pt->sync_switch)
1538 		return 1;
1539 
1540 	ptq = intel_pt_cpu_to_ptq(pt, cpu);
1541 	if (!ptq)
1542 		return 1;
1543 
1544 	switch (ptq->switch_state) {
1545 	case INTEL_PT_SS_NOT_TRACING:
1546 		ptq->next_tid = -1;
1547 		break;
1548 	case INTEL_PT_SS_UNKNOWN:
1549 	case INTEL_PT_SS_TRACING:
1550 		ptq->next_tid = tid;
1551 		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1552 		return 0;
1553 	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1554 		if (!ptq->on_heap) {
1555 			ptq->timestamp = perf_time_to_tsc(timestamp,
1556 							  &pt->tc);
1557 			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1558 						 ptq->timestamp);
1559 			if (err)
1560 				return err;
1561 			ptq->on_heap = true;
1562 		}
1563 		ptq->switch_state = INTEL_PT_SS_TRACING;
1564 		break;
1565 	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1566 		ptq->next_tid = tid;
1567 		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1568 		break;
1569 	default:
1570 		break;
1571 	}
1572 
1573 	return 1;
1574 }
1575 
1576 static int intel_pt_process_switch(struct intel_pt *pt,
1577 				   struct perf_sample *sample)
1578 {
1579 	struct perf_evsel *evsel;
1580 	pid_t tid;
1581 	int cpu, ret;
1582 
1583 	evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1584 	if (evsel != pt->switch_evsel)
1585 		return 0;
1586 
1587 	tid = perf_evsel__intval(evsel, sample, "next_pid");
1588 	cpu = sample->cpu;
1589 
1590 	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1591 		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1592 		     &pt->tc));
1593 
1594 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1595 	if (ret <= 0)
1596 		return ret;
1597 
1598 	return machine__set_current_tid(pt->machine, cpu, -1, tid);
1599 }
1600 
1601 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1602 				   struct perf_sample *sample)
1603 {
1604 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1605 	pid_t pid, tid;
1606 	int cpu, ret;
1607 
1608 	cpu = sample->cpu;
1609 
1610 	if (pt->have_sched_switch == 3) {
1611 		if (!out)
1612 			return 0;
1613 		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1614 			pr_err("Expecting CPU-wide context switch event\n");
1615 			return -EINVAL;
1616 		}
1617 		pid = event->context_switch.next_prev_pid;
1618 		tid = event->context_switch.next_prev_tid;
1619 	} else {
1620 		if (out)
1621 			return 0;
1622 		pid = sample->pid;
1623 		tid = sample->tid;
1624 	}
1625 
1626 	if (tid == -1) {
1627 		pr_err("context_switch event has no tid\n");
1628 		return -EINVAL;
1629 	}
1630 
1631 	intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1632 		     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1633 		     &pt->tc));
1634 
1635 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1636 	if (ret <= 0)
1637 		return ret;
1638 
1639 	return machine__set_current_tid(pt->machine, cpu, pid, tid);
1640 }
1641 
1642 static int intel_pt_process_itrace_start(struct intel_pt *pt,
1643 					 union perf_event *event,
1644 					 struct perf_sample *sample)
1645 {
1646 	if (!pt->per_cpu_mmaps)
1647 		return 0;
1648 
1649 	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1650 		     sample->cpu, event->itrace_start.pid,
1651 		     event->itrace_start.tid, sample->time,
1652 		     perf_time_to_tsc(sample->time, &pt->tc));
1653 
1654 	return machine__set_current_tid(pt->machine, sample->cpu,
1655 					event->itrace_start.pid,
1656 					event->itrace_start.tid);
1657 }
1658 
1659 static int intel_pt_process_event(struct perf_session *session,
1660 				  union perf_event *event,
1661 				  struct perf_sample *sample,
1662 				  struct perf_tool *tool)
1663 {
1664 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1665 					   auxtrace);
1666 	u64 timestamp;
1667 	int err = 0;
1668 
1669 	if (dump_trace)
1670 		return 0;
1671 
1672 	if (!tool->ordered_events) {
1673 		pr_err("Intel Processor Trace requires ordered events\n");
1674 		return -EINVAL;
1675 	}
1676 
1677 	if (sample->time && sample->time != (u64)-1)
1678 		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1679 	else
1680 		timestamp = 0;
1681 
1682 	if (timestamp || pt->timeless_decoding) {
1683 		err = intel_pt_update_queues(pt);
1684 		if (err)
1685 			return err;
1686 	}
1687 
1688 	if (pt->timeless_decoding) {
1689 		if (event->header.type == PERF_RECORD_EXIT) {
1690 			err = intel_pt_process_timeless_queues(pt,
1691 							       event->fork.tid,
1692 							       sample->time);
1693 		}
1694 	} else if (timestamp) {
1695 		err = intel_pt_process_queues(pt, timestamp);
1696 	}
1697 	if (err)
1698 		return err;
1699 
1700 	if (event->header.type == PERF_RECORD_AUX &&
1701 	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1702 	    pt->synth_opts.errors) {
1703 		err = intel_pt_lost(pt, sample);
1704 		if (err)
1705 			return err;
1706 	}
1707 
1708 	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1709 		err = intel_pt_process_switch(pt, sample);
1710 	else if (event->header.type == PERF_RECORD_ITRACE_START)
1711 		err = intel_pt_process_itrace_start(pt, event, sample);
1712 	else if (event->header.type == PERF_RECORD_SWITCH ||
1713 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
1714 		err = intel_pt_context_switch(pt, event, sample);
1715 
1716 	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1717 		     perf_event__name(event->header.type), event->header.type,
1718 		     sample->cpu, sample->time, timestamp);
1719 
1720 	return err;
1721 }
1722 
1723 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1724 {
1725 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1726 					   auxtrace);
1727 	int ret;
1728 
1729 	if (dump_trace)
1730 		return 0;
1731 
1732 	if (!tool->ordered_events)
1733 		return -EINVAL;
1734 
1735 	ret = intel_pt_update_queues(pt);
1736 	if (ret < 0)
1737 		return ret;
1738 
1739 	if (pt->timeless_decoding)
1740 		return intel_pt_process_timeless_queues(pt, -1,
1741 							MAX_TIMESTAMP - 1);
1742 
1743 	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1744 }
1745 
1746 static void intel_pt_free_events(struct perf_session *session)
1747 {
1748 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1749 					   auxtrace);
1750 	struct auxtrace_queues *queues = &pt->queues;
1751 	unsigned int i;
1752 
1753 	for (i = 0; i < queues->nr_queues; i++) {
1754 		intel_pt_free_queue(queues->queue_array[i].priv);
1755 		queues->queue_array[i].priv = NULL;
1756 	}
1757 	intel_pt_log_disable();
1758 	auxtrace_queues__free(queues);
1759 }
1760 
1761 static void intel_pt_free(struct perf_session *session)
1762 {
1763 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1764 					   auxtrace);
1765 
1766 	auxtrace_heap__free(&pt->heap);
1767 	intel_pt_free_events(session);
1768 	session->auxtrace = NULL;
1769 	thread__put(pt->unknown_thread);
1770 	free(pt);
1771 }
1772 
1773 static int intel_pt_process_auxtrace_event(struct perf_session *session,
1774 					   union perf_event *event,
1775 					   struct perf_tool *tool __maybe_unused)
1776 {
1777 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1778 					   auxtrace);
1779 
1780 	if (pt->sampling_mode)
1781 		return 0;
1782 
1783 	if (!pt->data_queued) {
1784 		struct auxtrace_buffer *buffer;
1785 		off_t data_offset;
1786 		int fd = perf_data_file__fd(session->file);
1787 		int err;
1788 
1789 		if (perf_data_file__is_pipe(session->file)) {
1790 			data_offset = 0;
1791 		} else {
1792 			data_offset = lseek(fd, 0, SEEK_CUR);
1793 			if (data_offset == -1)
1794 				return -errno;
1795 		}
1796 
1797 		err = auxtrace_queues__add_event(&pt->queues, session, event,
1798 						 data_offset, &buffer);
1799 		if (err)
1800 			return err;
1801 
1802 		/* Dump here now we have copied a piped trace out of the pipe */
1803 		if (dump_trace) {
1804 			if (auxtrace_buffer__get_data(buffer, fd)) {
1805 				intel_pt_dump_event(pt, buffer->data,
1806 						    buffer->size);
1807 				auxtrace_buffer__put_data(buffer);
1808 			}
1809 		}
1810 	}
1811 
1812 	return 0;
1813 }
1814 
1815 struct intel_pt_synth {
1816 	struct perf_tool dummy_tool;
1817 	struct perf_session *session;
1818 };
1819 
1820 static int intel_pt_event_synth(struct perf_tool *tool,
1821 				union perf_event *event,
1822 				struct perf_sample *sample __maybe_unused,
1823 				struct machine *machine __maybe_unused)
1824 {
1825 	struct intel_pt_synth *intel_pt_synth =
1826 			container_of(tool, struct intel_pt_synth, dummy_tool);
1827 
1828 	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1829 						 NULL);
1830 }
1831 
1832 static int intel_pt_synth_event(struct perf_session *session,
1833 				struct perf_event_attr *attr, u64 id)
1834 {
1835 	struct intel_pt_synth intel_pt_synth;
1836 
1837 	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1838 	intel_pt_synth.session = session;
1839 
1840 	return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1841 					   &id, intel_pt_event_synth);
1842 }
1843 
1844 static int intel_pt_synth_events(struct intel_pt *pt,
1845 				 struct perf_session *session)
1846 {
1847 	struct perf_evlist *evlist = session->evlist;
1848 	struct perf_evsel *evsel;
1849 	struct perf_event_attr attr;
1850 	bool found = false;
1851 	u64 id;
1852 	int err;
1853 
1854 	evlist__for_each_entry(evlist, evsel) {
1855 		if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1856 			found = true;
1857 			break;
1858 		}
1859 	}
1860 
1861 	if (!found) {
1862 		pr_debug("There are no selected events with Intel Processor Trace data\n");
1863 		return 0;
1864 	}
1865 
1866 	memset(&attr, 0, sizeof(struct perf_event_attr));
1867 	attr.size = sizeof(struct perf_event_attr);
1868 	attr.type = PERF_TYPE_HARDWARE;
1869 	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1870 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1871 			    PERF_SAMPLE_PERIOD;
1872 	if (pt->timeless_decoding)
1873 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1874 	else
1875 		attr.sample_type |= PERF_SAMPLE_TIME;
1876 	if (!pt->per_cpu_mmaps)
1877 		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1878 	attr.exclude_user = evsel->attr.exclude_user;
1879 	attr.exclude_kernel = evsel->attr.exclude_kernel;
1880 	attr.exclude_hv = evsel->attr.exclude_hv;
1881 	attr.exclude_host = evsel->attr.exclude_host;
1882 	attr.exclude_guest = evsel->attr.exclude_guest;
1883 	attr.sample_id_all = evsel->attr.sample_id_all;
1884 	attr.read_format = evsel->attr.read_format;
1885 
1886 	id = evsel->id[0] + 1000000000;
1887 	if (!id)
1888 		id = 1;
1889 
1890 	if (pt->synth_opts.instructions) {
1891 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1892 		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1893 			attr.sample_period =
1894 				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1895 		else
1896 			attr.sample_period = pt->synth_opts.period;
1897 		pt->instructions_sample_period = attr.sample_period;
1898 		if (pt->synth_opts.callchain)
1899 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1900 		if (pt->synth_opts.last_branch)
1901 			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1902 		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1903 			 id, (u64)attr.sample_type);
1904 		err = intel_pt_synth_event(session, &attr, id);
1905 		if (err) {
1906 			pr_err("%s: failed to synthesize 'instructions' event type\n",
1907 			       __func__);
1908 			return err;
1909 		}
1910 		pt->sample_instructions = true;
1911 		pt->instructions_sample_type = attr.sample_type;
1912 		pt->instructions_id = id;
1913 		id += 1;
1914 	}
1915 
1916 	if (pt->synth_opts.transactions) {
1917 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1918 		attr.sample_period = 1;
1919 		if (pt->synth_opts.callchain)
1920 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1921 		if (pt->synth_opts.last_branch)
1922 			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1923 		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1924 			 id, (u64)attr.sample_type);
1925 		err = intel_pt_synth_event(session, &attr, id);
1926 		if (err) {
1927 			pr_err("%s: failed to synthesize 'transactions' event type\n",
1928 			       __func__);
1929 			return err;
1930 		}
1931 		pt->sample_transactions = true;
1932 		pt->transactions_id = id;
1933 		id += 1;
1934 		evlist__for_each_entry(evlist, evsel) {
1935 			if (evsel->id && evsel->id[0] == pt->transactions_id) {
1936 				if (evsel->name)
1937 					zfree(&evsel->name);
1938 				evsel->name = strdup("transactions");
1939 				break;
1940 			}
1941 		}
1942 	}
1943 
1944 	if (pt->synth_opts.branches) {
1945 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1946 		attr.sample_period = 1;
1947 		attr.sample_type |= PERF_SAMPLE_ADDR;
1948 		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1949 		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
1950 		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1951 			 id, (u64)attr.sample_type);
1952 		err = intel_pt_synth_event(session, &attr, id);
1953 		if (err) {
1954 			pr_err("%s: failed to synthesize 'branches' event type\n",
1955 			       __func__);
1956 			return err;
1957 		}
1958 		pt->sample_branches = true;
1959 		pt->branches_sample_type = attr.sample_type;
1960 		pt->branches_id = id;
1961 	}
1962 
1963 	pt->synth_needs_swap = evsel->needs_swap;
1964 
1965 	return 0;
1966 }
1967 
1968 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
1969 {
1970 	struct perf_evsel *evsel;
1971 
1972 	evlist__for_each_entry_reverse(evlist, evsel) {
1973 		const char *name = perf_evsel__name(evsel);
1974 
1975 		if (!strcmp(name, "sched:sched_switch"))
1976 			return evsel;
1977 	}
1978 
1979 	return NULL;
1980 }
1981 
1982 static bool intel_pt_find_switch(struct perf_evlist *evlist)
1983 {
1984 	struct perf_evsel *evsel;
1985 
1986 	evlist__for_each_entry(evlist, evsel) {
1987 		if (evsel->attr.context_switch)
1988 			return true;
1989 	}
1990 
1991 	return false;
1992 }
1993 
1994 static int intel_pt_perf_config(const char *var, const char *value, void *data)
1995 {
1996 	struct intel_pt *pt = data;
1997 
1998 	if (!strcmp(var, "intel-pt.mispred-all"))
1999 		pt->mispred_all = perf_config_bool(var, value);
2000 
2001 	return 0;
2002 }
2003 
2004 static const char * const intel_pt_info_fmts[] = {
2005 	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
2006 	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
2007 	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
2008 	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
2009 	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
2010 	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
2011 	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
2012 	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
2013 	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
2014 	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
2015 	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
2016 	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
2017 	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
2018 	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
2019 };
2020 
2021 static void intel_pt_print_info(u64 *arr, int start, int finish)
2022 {
2023 	int i;
2024 
2025 	if (!dump_trace)
2026 		return;
2027 
2028 	for (i = start; i <= finish; i++)
2029 		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2030 }
2031 
2032 int intel_pt_process_auxtrace_info(union perf_event *event,
2033 				   struct perf_session *session)
2034 {
2035 	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2036 	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2037 	struct intel_pt *pt;
2038 	int err;
2039 
2040 	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2041 					min_sz)
2042 		return -EINVAL;
2043 
2044 	pt = zalloc(sizeof(struct intel_pt));
2045 	if (!pt)
2046 		return -ENOMEM;
2047 
2048 	perf_config(intel_pt_perf_config, pt);
2049 
2050 	err = auxtrace_queues__init(&pt->queues);
2051 	if (err)
2052 		goto err_free;
2053 
2054 	intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2055 
2056 	pt->session = session;
2057 	pt->machine = &session->machines.host; /* No kvm support */
2058 	pt->auxtrace_type = auxtrace_info->type;
2059 	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2060 	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2061 	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2062 	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2063 	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2064 	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2065 	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2066 	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2067 	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2068 	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2069 	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2070 			    INTEL_PT_PER_CPU_MMAPS);
2071 
2072 	if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
2073 					(sizeof(u64) * INTEL_PT_CYC_BIT)) {
2074 		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2075 		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2076 		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2077 		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2078 		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2079 		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2080 				    INTEL_PT_CYC_BIT);
2081 	}
2082 
2083 	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2084 	pt->have_tsc = intel_pt_have_tsc(pt);
2085 	pt->sampling_mode = false;
2086 	pt->est_tsc = !pt->timeless_decoding;
2087 
2088 	pt->unknown_thread = thread__new(999999999, 999999999);
2089 	if (!pt->unknown_thread) {
2090 		err = -ENOMEM;
2091 		goto err_free_queues;
2092 	}
2093 
2094 	/*
2095 	 * Since this thread will not be kept in any rbtree not in a
2096 	 * list, initialize its list node so that at thread__put() the
2097 	 * current thread lifetime assuption is kept and we don't segfault
2098 	 * at list_del_init().
2099 	 */
2100 	INIT_LIST_HEAD(&pt->unknown_thread->node);
2101 
2102 	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2103 	if (err)
2104 		goto err_delete_thread;
2105 	if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2106 		err = -ENOMEM;
2107 		goto err_delete_thread;
2108 	}
2109 
2110 	pt->auxtrace.process_event = intel_pt_process_event;
2111 	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2112 	pt->auxtrace.flush_events = intel_pt_flush;
2113 	pt->auxtrace.free_events = intel_pt_free_events;
2114 	pt->auxtrace.free = intel_pt_free;
2115 	session->auxtrace = &pt->auxtrace;
2116 
2117 	if (dump_trace)
2118 		return 0;
2119 
2120 	if (pt->have_sched_switch == 1) {
2121 		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2122 		if (!pt->switch_evsel) {
2123 			pr_err("%s: missing sched_switch event\n", __func__);
2124 			goto err_delete_thread;
2125 		}
2126 	} else if (pt->have_sched_switch == 2 &&
2127 		   !intel_pt_find_switch(session->evlist)) {
2128 		pr_err("%s: missing context_switch attribute flag\n", __func__);
2129 		goto err_delete_thread;
2130 	}
2131 
2132 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2133 		pt->synth_opts = *session->itrace_synth_opts;
2134 	} else {
2135 		itrace_synth_opts__set_default(&pt->synth_opts);
2136 		if (use_browser != -1) {
2137 			pt->synth_opts.branches = false;
2138 			pt->synth_opts.callchain = true;
2139 		}
2140 		if (session->itrace_synth_opts)
2141 			pt->synth_opts.thread_stack =
2142 				session->itrace_synth_opts->thread_stack;
2143 	}
2144 
2145 	if (pt->synth_opts.log)
2146 		intel_pt_log_enable();
2147 
2148 	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
2149 	if (pt->tc.time_mult) {
2150 		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2151 
2152 		pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
2153 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2154 		intel_pt_log("Maximum non-turbo ratio %u\n",
2155 			     pt->max_non_turbo_ratio);
2156 	}
2157 
2158 	if (pt->synth_opts.calls)
2159 		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2160 				       PERF_IP_FLAG_TRACE_END;
2161 	if (pt->synth_opts.returns)
2162 		pt->branches_filter |= PERF_IP_FLAG_RETURN |
2163 				       PERF_IP_FLAG_TRACE_BEGIN;
2164 
2165 	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2166 		symbol_conf.use_callchain = true;
2167 		if (callchain_register_param(&callchain_param) < 0) {
2168 			symbol_conf.use_callchain = false;
2169 			pt->synth_opts.callchain = false;
2170 		}
2171 	}
2172 
2173 	err = intel_pt_synth_events(pt, session);
2174 	if (err)
2175 		goto err_delete_thread;
2176 
2177 	err = auxtrace_queues__process_index(&pt->queues, session);
2178 	if (err)
2179 		goto err_delete_thread;
2180 
2181 	if (pt->queues.populated)
2182 		pt->data_queued = true;
2183 
2184 	if (pt->timeless_decoding)
2185 		pr_debug2("Intel PT decoding without timestamps\n");
2186 
2187 	return 0;
2188 
2189 err_delete_thread:
2190 	thread__zput(pt->unknown_thread);
2191 err_free_queues:
2192 	intel_pt_log_disable();
2193 	auxtrace_queues__free(&pt->queues);
2194 	session->auxtrace = NULL;
2195 err_free:
2196 	free(pt);
2197 	return err;
2198 }
2199