xref: /linux/tools/perf/util/intel-pt.c (revision b8d312aa075f33282565467662c4628dae0a2aff)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_pt.c: Intel Processor Trace support
4  * Copyright (c) 2013-2015, Intel Corporation.
5  */
6 
7 #include <inttypes.h>
8 #include <stdio.h>
9 #include <stdbool.h>
10 #include <errno.h>
11 #include <linux/kernel.h>
12 #include <linux/types.h>
13 #include <linux/zalloc.h>
14 
15 #include "../perf.h"
16 #include "session.h"
17 #include "machine.h"
18 #include "memswap.h"
19 #include "sort.h"
20 #include "tool.h"
21 #include "event.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "map.h"
25 #include "color.h"
26 #include "thread.h"
27 #include "thread-stack.h"
28 #include "symbol.h"
29 #include "callchain.h"
30 #include "dso.h"
31 #include "debug.h"
32 #include "auxtrace.h"
33 #include "tsc.h"
34 #include "intel-pt.h"
35 #include "config.h"
36 #include "time-utils.h"
37 
38 #include "../arch/x86/include/uapi/asm/perf_regs.h"
39 
40 #include "intel-pt-decoder/intel-pt-log.h"
41 #include "intel-pt-decoder/intel-pt-decoder.h"
42 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
43 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
44 
45 #define MAX_TIMESTAMP (~0ULL)
46 
47 struct range {
48 	u64 start;
49 	u64 end;
50 };
51 
52 struct intel_pt {
53 	struct auxtrace auxtrace;
54 	struct auxtrace_queues queues;
55 	struct auxtrace_heap heap;
56 	u32 auxtrace_type;
57 	struct perf_session *session;
58 	struct machine *machine;
59 	struct perf_evsel *switch_evsel;
60 	struct thread *unknown_thread;
61 	bool timeless_decoding;
62 	bool sampling_mode;
63 	bool snapshot_mode;
64 	bool per_cpu_mmaps;
65 	bool have_tsc;
66 	bool data_queued;
67 	bool est_tsc;
68 	bool sync_switch;
69 	bool mispred_all;
70 	int have_sched_switch;
71 	u32 pmu_type;
72 	u64 kernel_start;
73 	u64 switch_ip;
74 	u64 ptss_ip;
75 
76 	struct perf_tsc_conversion tc;
77 	bool cap_user_time_zero;
78 
79 	struct itrace_synth_opts synth_opts;
80 
81 	bool sample_instructions;
82 	u64 instructions_sample_type;
83 	u64 instructions_id;
84 
85 	bool sample_branches;
86 	u32 branches_filter;
87 	u64 branches_sample_type;
88 	u64 branches_id;
89 
90 	bool sample_transactions;
91 	u64 transactions_sample_type;
92 	u64 transactions_id;
93 
94 	bool sample_ptwrites;
95 	u64 ptwrites_sample_type;
96 	u64 ptwrites_id;
97 
98 	bool sample_pwr_events;
99 	u64 pwr_events_sample_type;
100 	u64 mwait_id;
101 	u64 pwre_id;
102 	u64 exstop_id;
103 	u64 pwrx_id;
104 	u64 cbr_id;
105 
106 	bool sample_pebs;
107 	struct perf_evsel *pebs_evsel;
108 
109 	u64 tsc_bit;
110 	u64 mtc_bit;
111 	u64 mtc_freq_bits;
112 	u32 tsc_ctc_ratio_n;
113 	u32 tsc_ctc_ratio_d;
114 	u64 cyc_bit;
115 	u64 noretcomp_bit;
116 	unsigned max_non_turbo_ratio;
117 	unsigned cbr2khz;
118 
119 	unsigned long num_events;
120 
121 	char *filter;
122 	struct addr_filters filts;
123 
124 	struct range *time_ranges;
125 	unsigned int range_cnt;
126 };
127 
128 enum switch_state {
129 	INTEL_PT_SS_NOT_TRACING,
130 	INTEL_PT_SS_UNKNOWN,
131 	INTEL_PT_SS_TRACING,
132 	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
133 	INTEL_PT_SS_EXPECTING_SWITCH_IP,
134 };
135 
136 struct intel_pt_queue {
137 	struct intel_pt *pt;
138 	unsigned int queue_nr;
139 	struct auxtrace_buffer *buffer;
140 	struct auxtrace_buffer *old_buffer;
141 	void *decoder;
142 	const struct intel_pt_state *state;
143 	struct ip_callchain *chain;
144 	struct branch_stack *last_branch;
145 	struct branch_stack *last_branch_rb;
146 	size_t last_branch_pos;
147 	union perf_event *event_buf;
148 	bool on_heap;
149 	bool stop;
150 	bool step_through_buffers;
151 	bool use_buffer_pid_tid;
152 	bool sync_switch;
153 	pid_t pid, tid;
154 	int cpu;
155 	int switch_state;
156 	pid_t next_tid;
157 	struct thread *thread;
158 	bool exclude_kernel;
159 	bool have_sample;
160 	u64 time;
161 	u64 timestamp;
162 	u64 sel_timestamp;
163 	bool sel_start;
164 	unsigned int sel_idx;
165 	u32 flags;
166 	u16 insn_len;
167 	u64 last_insn_cnt;
168 	u64 ipc_insn_cnt;
169 	u64 ipc_cyc_cnt;
170 	u64 last_in_insn_cnt;
171 	u64 last_in_cyc_cnt;
172 	u64 last_br_insn_cnt;
173 	u64 last_br_cyc_cnt;
174 	unsigned int cbr_seen;
175 	char insn[INTEL_PT_INSN_BUF_SZ];
176 };
177 
178 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
179 			  unsigned char *buf, size_t len)
180 {
181 	struct intel_pt_pkt packet;
182 	size_t pos = 0;
183 	int ret, pkt_len, i;
184 	char desc[INTEL_PT_PKT_DESC_MAX];
185 	const char *color = PERF_COLOR_BLUE;
186 	enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
187 
188 	color_fprintf(stdout, color,
189 		      ". ... Intel Processor Trace data: size %zu bytes\n",
190 		      len);
191 
192 	while (len) {
193 		ret = intel_pt_get_packet(buf, len, &packet, &ctx);
194 		if (ret > 0)
195 			pkt_len = ret;
196 		else
197 			pkt_len = 1;
198 		printf(".");
199 		color_fprintf(stdout, color, "  %08x: ", pos);
200 		for (i = 0; i < pkt_len; i++)
201 			color_fprintf(stdout, color, " %02x", buf[i]);
202 		for (; i < 16; i++)
203 			color_fprintf(stdout, color, "   ");
204 		if (ret > 0) {
205 			ret = intel_pt_pkt_desc(&packet, desc,
206 						INTEL_PT_PKT_DESC_MAX);
207 			if (ret > 0)
208 				color_fprintf(stdout, color, " %s\n", desc);
209 		} else {
210 			color_fprintf(stdout, color, " Bad packet!\n");
211 		}
212 		pos += pkt_len;
213 		buf += pkt_len;
214 		len -= pkt_len;
215 	}
216 }
217 
218 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
219 				size_t len)
220 {
221 	printf(".\n");
222 	intel_pt_dump(pt, buf, len);
223 }
224 
225 static void intel_pt_log_event(union perf_event *event)
226 {
227 	FILE *f = intel_pt_log_fp();
228 
229 	if (!intel_pt_enable_logging || !f)
230 		return;
231 
232 	perf_event__fprintf(event, f);
233 }
234 
235 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
236 				   struct auxtrace_buffer *b)
237 {
238 	bool consecutive = false;
239 	void *start;
240 
241 	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
242 				      pt->have_tsc, &consecutive);
243 	if (!start)
244 		return -EINVAL;
245 	b->use_size = b->data + b->size - start;
246 	b->use_data = start;
247 	if (b->use_size && consecutive)
248 		b->consecutive = true;
249 	return 0;
250 }
251 
252 static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
253 			       struct auxtrace_buffer *buffer,
254 			       struct auxtrace_buffer *old_buffer,
255 			       struct intel_pt_buffer *b)
256 {
257 	bool might_overlap;
258 
259 	if (!buffer->data) {
260 		int fd = perf_data__fd(ptq->pt->session->data);
261 
262 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
263 		if (!buffer->data)
264 			return -ENOMEM;
265 	}
266 
267 	might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
268 	if (might_overlap && !buffer->consecutive && old_buffer &&
269 	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
270 		return -ENOMEM;
271 
272 	if (buffer->use_data) {
273 		b->len = buffer->use_size;
274 		b->buf = buffer->use_data;
275 	} else {
276 		b->len = buffer->size;
277 		b->buf = buffer->data;
278 	}
279 	b->ref_timestamp = buffer->reference;
280 
281 	if (!old_buffer || (might_overlap && !buffer->consecutive)) {
282 		b->consecutive = false;
283 		b->trace_nr = buffer->buffer_nr + 1;
284 	} else {
285 		b->consecutive = true;
286 	}
287 
288 	return 0;
289 }
290 
291 /* Do not drop buffers with references - refer intel_pt_get_trace() */
292 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
293 					   struct auxtrace_buffer *buffer)
294 {
295 	if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
296 		return;
297 
298 	auxtrace_buffer__drop_data(buffer);
299 }
300 
301 /* Must be serialized with respect to intel_pt_get_trace() */
302 static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
303 			      void *cb_data)
304 {
305 	struct intel_pt_queue *ptq = data;
306 	struct auxtrace_buffer *buffer = ptq->buffer;
307 	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
308 	struct auxtrace_queue *queue;
309 	int err = 0;
310 
311 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
312 
313 	while (1) {
314 		struct intel_pt_buffer b = { .len = 0 };
315 
316 		buffer = auxtrace_buffer__next(queue, buffer);
317 		if (!buffer)
318 			break;
319 
320 		err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
321 		if (err)
322 			break;
323 
324 		if (b.len) {
325 			intel_pt_lookahead_drop_buffer(ptq, old_buffer);
326 			old_buffer = buffer;
327 		} else {
328 			intel_pt_lookahead_drop_buffer(ptq, buffer);
329 			continue;
330 		}
331 
332 		err = cb(&b, cb_data);
333 		if (err)
334 			break;
335 	}
336 
337 	if (buffer != old_buffer)
338 		intel_pt_lookahead_drop_buffer(ptq, buffer);
339 	intel_pt_lookahead_drop_buffer(ptq, old_buffer);
340 
341 	return err;
342 }
343 
344 /*
345  * This function assumes data is processed sequentially only.
346  * Must be serialized with respect to intel_pt_lookahead()
347  */
348 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
349 {
350 	struct intel_pt_queue *ptq = data;
351 	struct auxtrace_buffer *buffer = ptq->buffer;
352 	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
353 	struct auxtrace_queue *queue;
354 	int err;
355 
356 	if (ptq->stop) {
357 		b->len = 0;
358 		return 0;
359 	}
360 
361 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
362 
363 	buffer = auxtrace_buffer__next(queue, buffer);
364 	if (!buffer) {
365 		if (old_buffer)
366 			auxtrace_buffer__drop_data(old_buffer);
367 		b->len = 0;
368 		return 0;
369 	}
370 
371 	ptq->buffer = buffer;
372 
373 	err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
374 	if (err)
375 		return err;
376 
377 	if (ptq->step_through_buffers)
378 		ptq->stop = true;
379 
380 	if (b->len) {
381 		if (old_buffer)
382 			auxtrace_buffer__drop_data(old_buffer);
383 		ptq->old_buffer = buffer;
384 	} else {
385 		auxtrace_buffer__drop_data(buffer);
386 		return intel_pt_get_trace(b, data);
387 	}
388 
389 	return 0;
390 }
391 
392 struct intel_pt_cache_entry {
393 	struct auxtrace_cache_entry	entry;
394 	u64				insn_cnt;
395 	u64				byte_cnt;
396 	enum intel_pt_insn_op		op;
397 	enum intel_pt_insn_branch	branch;
398 	int				length;
399 	int32_t				rel;
400 	char				insn[INTEL_PT_INSN_BUF_SZ];
401 };
402 
403 static int intel_pt_config_div(const char *var, const char *value, void *data)
404 {
405 	int *d = data;
406 	long val;
407 
408 	if (!strcmp(var, "intel-pt.cache-divisor")) {
409 		val = strtol(value, NULL, 0);
410 		if (val > 0 && val <= INT_MAX)
411 			*d = val;
412 	}
413 
414 	return 0;
415 }
416 
417 static int intel_pt_cache_divisor(void)
418 {
419 	static int d;
420 
421 	if (d)
422 		return d;
423 
424 	perf_config(intel_pt_config_div, &d);
425 
426 	if (!d)
427 		d = 64;
428 
429 	return d;
430 }
431 
432 static unsigned int intel_pt_cache_size(struct dso *dso,
433 					struct machine *machine)
434 {
435 	off_t size;
436 
437 	size = dso__data_size(dso, machine);
438 	size /= intel_pt_cache_divisor();
439 	if (size < 1000)
440 		return 10;
441 	if (size > (1 << 21))
442 		return 21;
443 	return 32 - __builtin_clz(size);
444 }
445 
446 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
447 					     struct machine *machine)
448 {
449 	struct auxtrace_cache *c;
450 	unsigned int bits;
451 
452 	if (dso->auxtrace_cache)
453 		return dso->auxtrace_cache;
454 
455 	bits = intel_pt_cache_size(dso, machine);
456 
457 	/* Ignoring cache creation failure */
458 	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
459 
460 	dso->auxtrace_cache = c;
461 
462 	return c;
463 }
464 
465 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
466 			      u64 offset, u64 insn_cnt, u64 byte_cnt,
467 			      struct intel_pt_insn *intel_pt_insn)
468 {
469 	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
470 	struct intel_pt_cache_entry *e;
471 	int err;
472 
473 	if (!c)
474 		return -ENOMEM;
475 
476 	e = auxtrace_cache__alloc_entry(c);
477 	if (!e)
478 		return -ENOMEM;
479 
480 	e->insn_cnt = insn_cnt;
481 	e->byte_cnt = byte_cnt;
482 	e->op = intel_pt_insn->op;
483 	e->branch = intel_pt_insn->branch;
484 	e->length = intel_pt_insn->length;
485 	e->rel = intel_pt_insn->rel;
486 	memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
487 
488 	err = auxtrace_cache__add(c, offset, &e->entry);
489 	if (err)
490 		auxtrace_cache__free_entry(c, e);
491 
492 	return err;
493 }
494 
495 static struct intel_pt_cache_entry *
496 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
497 {
498 	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
499 
500 	if (!c)
501 		return NULL;
502 
503 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
504 }
505 
506 static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
507 {
508 	return ip >= pt->kernel_start ?
509 	       PERF_RECORD_MISC_KERNEL :
510 	       PERF_RECORD_MISC_USER;
511 }
512 
513 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
514 				   uint64_t *insn_cnt_ptr, uint64_t *ip,
515 				   uint64_t to_ip, uint64_t max_insn_cnt,
516 				   void *data)
517 {
518 	struct intel_pt_queue *ptq = data;
519 	struct machine *machine = ptq->pt->machine;
520 	struct thread *thread;
521 	struct addr_location al;
522 	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
523 	ssize_t len;
524 	int x86_64;
525 	u8 cpumode;
526 	u64 offset, start_offset, start_ip;
527 	u64 insn_cnt = 0;
528 	bool one_map = true;
529 
530 	intel_pt_insn->length = 0;
531 
532 	if (to_ip && *ip == to_ip)
533 		goto out_no_cache;
534 
535 	cpumode = intel_pt_cpumode(ptq->pt, *ip);
536 
537 	thread = ptq->thread;
538 	if (!thread) {
539 		if (cpumode != PERF_RECORD_MISC_KERNEL)
540 			return -EINVAL;
541 		thread = ptq->pt->unknown_thread;
542 	}
543 
544 	while (1) {
545 		if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
546 			return -EINVAL;
547 
548 		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
549 		    dso__data_status_seen(al.map->dso,
550 					  DSO_DATA_STATUS_SEEN_ITRACE))
551 			return -ENOENT;
552 
553 		offset = al.map->map_ip(al.map, *ip);
554 
555 		if (!to_ip && one_map) {
556 			struct intel_pt_cache_entry *e;
557 
558 			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
559 			if (e &&
560 			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
561 				*insn_cnt_ptr = e->insn_cnt;
562 				*ip += e->byte_cnt;
563 				intel_pt_insn->op = e->op;
564 				intel_pt_insn->branch = e->branch;
565 				intel_pt_insn->length = e->length;
566 				intel_pt_insn->rel = e->rel;
567 				memcpy(intel_pt_insn->buf, e->insn,
568 				       INTEL_PT_INSN_BUF_SZ);
569 				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
570 				return 0;
571 			}
572 		}
573 
574 		start_offset = offset;
575 		start_ip = *ip;
576 
577 		/* Load maps to ensure dso->is_64_bit has been updated */
578 		map__load(al.map);
579 
580 		x86_64 = al.map->dso->is_64_bit;
581 
582 		while (1) {
583 			len = dso__data_read_offset(al.map->dso, machine,
584 						    offset, buf,
585 						    INTEL_PT_INSN_BUF_SZ);
586 			if (len <= 0)
587 				return -EINVAL;
588 
589 			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
590 				return -EINVAL;
591 
592 			intel_pt_log_insn(intel_pt_insn, *ip);
593 
594 			insn_cnt += 1;
595 
596 			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
597 				goto out;
598 
599 			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
600 				goto out_no_cache;
601 
602 			*ip += intel_pt_insn->length;
603 
604 			if (to_ip && *ip == to_ip)
605 				goto out_no_cache;
606 
607 			if (*ip >= al.map->end)
608 				break;
609 
610 			offset += intel_pt_insn->length;
611 		}
612 		one_map = false;
613 	}
614 out:
615 	*insn_cnt_ptr = insn_cnt;
616 
617 	if (!one_map)
618 		goto out_no_cache;
619 
620 	/*
621 	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
622 	 * entries.
623 	 */
624 	if (to_ip) {
625 		struct intel_pt_cache_entry *e;
626 
627 		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
628 		if (e)
629 			return 0;
630 	}
631 
632 	/* Ignore cache errors */
633 	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
634 			   *ip - start_ip, intel_pt_insn);
635 
636 	return 0;
637 
638 out_no_cache:
639 	*insn_cnt_ptr = insn_cnt;
640 	return 0;
641 }
642 
643 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
644 				  uint64_t offset, const char *filename)
645 {
646 	struct addr_filter *filt;
647 	bool have_filter   = false;
648 	bool hit_tracestop = false;
649 	bool hit_filter    = false;
650 
651 	list_for_each_entry(filt, &pt->filts.head, list) {
652 		if (filt->start)
653 			have_filter = true;
654 
655 		if ((filename && !filt->filename) ||
656 		    (!filename && filt->filename) ||
657 		    (filename && strcmp(filename, filt->filename)))
658 			continue;
659 
660 		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
661 			continue;
662 
663 		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
664 			     ip, offset, filename ? filename : "[kernel]",
665 			     filt->start ? "filter" : "stop",
666 			     filt->addr, filt->size);
667 
668 		if (filt->start)
669 			hit_filter = true;
670 		else
671 			hit_tracestop = true;
672 	}
673 
674 	if (!hit_tracestop && !hit_filter)
675 		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
676 			     ip, offset, filename ? filename : "[kernel]");
677 
678 	return hit_tracestop || (have_filter && !hit_filter);
679 }
680 
681 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
682 {
683 	struct intel_pt_queue *ptq = data;
684 	struct thread *thread;
685 	struct addr_location al;
686 	u8 cpumode;
687 	u64 offset;
688 
689 	if (ip >= ptq->pt->kernel_start)
690 		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
691 
692 	cpumode = PERF_RECORD_MISC_USER;
693 
694 	thread = ptq->thread;
695 	if (!thread)
696 		return -EINVAL;
697 
698 	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
699 		return -EINVAL;
700 
701 	offset = al.map->map_ip(al.map, ip);
702 
703 	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
704 				     al.map->dso->long_name);
705 }
706 
707 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
708 {
709 	return __intel_pt_pgd_ip(ip, data) > 0;
710 }
711 
712 static bool intel_pt_get_config(struct intel_pt *pt,
713 				struct perf_event_attr *attr, u64 *config)
714 {
715 	if (attr->type == pt->pmu_type) {
716 		if (config)
717 			*config = attr->config;
718 		return true;
719 	}
720 
721 	return false;
722 }
723 
724 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
725 {
726 	struct perf_evsel *evsel;
727 
728 	evlist__for_each_entry(pt->session->evlist, evsel) {
729 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
730 		    !evsel->attr.exclude_kernel)
731 			return false;
732 	}
733 	return true;
734 }
735 
736 static bool intel_pt_return_compression(struct intel_pt *pt)
737 {
738 	struct perf_evsel *evsel;
739 	u64 config;
740 
741 	if (!pt->noretcomp_bit)
742 		return true;
743 
744 	evlist__for_each_entry(pt->session->evlist, evsel) {
745 		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
746 		    (config & pt->noretcomp_bit))
747 			return false;
748 	}
749 	return true;
750 }
751 
752 static bool intel_pt_branch_enable(struct intel_pt *pt)
753 {
754 	struct perf_evsel *evsel;
755 	u64 config;
756 
757 	evlist__for_each_entry(pt->session->evlist, evsel) {
758 		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
759 		    (config & 1) && !(config & 0x2000))
760 			return false;
761 	}
762 	return true;
763 }
764 
765 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
766 {
767 	struct perf_evsel *evsel;
768 	unsigned int shift;
769 	u64 config;
770 
771 	if (!pt->mtc_freq_bits)
772 		return 0;
773 
774 	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
775 		config >>= 1;
776 
777 	evlist__for_each_entry(pt->session->evlist, evsel) {
778 		if (intel_pt_get_config(pt, &evsel->attr, &config))
779 			return (config & pt->mtc_freq_bits) >> shift;
780 	}
781 	return 0;
782 }
783 
784 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
785 {
786 	struct perf_evsel *evsel;
787 	bool timeless_decoding = true;
788 	u64 config;
789 
790 	if (!pt->tsc_bit || !pt->cap_user_time_zero)
791 		return true;
792 
793 	evlist__for_each_entry(pt->session->evlist, evsel) {
794 		if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
795 			return true;
796 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
797 			if (config & pt->tsc_bit)
798 				timeless_decoding = false;
799 			else
800 				return true;
801 		}
802 	}
803 	return timeless_decoding;
804 }
805 
806 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
807 {
808 	struct perf_evsel *evsel;
809 
810 	evlist__for_each_entry(pt->session->evlist, evsel) {
811 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
812 		    !evsel->attr.exclude_kernel)
813 			return true;
814 	}
815 	return false;
816 }
817 
818 static bool intel_pt_have_tsc(struct intel_pt *pt)
819 {
820 	struct perf_evsel *evsel;
821 	bool have_tsc = false;
822 	u64 config;
823 
824 	if (!pt->tsc_bit)
825 		return false;
826 
827 	evlist__for_each_entry(pt->session->evlist, evsel) {
828 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
829 			if (config & pt->tsc_bit)
830 				have_tsc = true;
831 			else
832 				return false;
833 		}
834 	}
835 	return have_tsc;
836 }
837 
838 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
839 {
840 	u64 quot, rem;
841 
842 	quot = ns / pt->tc.time_mult;
843 	rem  = ns % pt->tc.time_mult;
844 	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
845 		pt->tc.time_mult;
846 }
847 
848 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
849 						   unsigned int queue_nr)
850 {
851 	struct intel_pt_params params = { .get_trace = 0, };
852 	struct perf_env *env = pt->machine->env;
853 	struct intel_pt_queue *ptq;
854 
855 	ptq = zalloc(sizeof(struct intel_pt_queue));
856 	if (!ptq)
857 		return NULL;
858 
859 	if (pt->synth_opts.callchain) {
860 		size_t sz = sizeof(struct ip_callchain);
861 
862 		/* Add 1 to callchain_sz for callchain context */
863 		sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
864 		ptq->chain = zalloc(sz);
865 		if (!ptq->chain)
866 			goto out_free;
867 	}
868 
869 	if (pt->synth_opts.last_branch) {
870 		size_t sz = sizeof(struct branch_stack);
871 
872 		sz += pt->synth_opts.last_branch_sz *
873 		      sizeof(struct branch_entry);
874 		ptq->last_branch = zalloc(sz);
875 		if (!ptq->last_branch)
876 			goto out_free;
877 		ptq->last_branch_rb = zalloc(sz);
878 		if (!ptq->last_branch_rb)
879 			goto out_free;
880 	}
881 
882 	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
883 	if (!ptq->event_buf)
884 		goto out_free;
885 
886 	ptq->pt = pt;
887 	ptq->queue_nr = queue_nr;
888 	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
889 	ptq->pid = -1;
890 	ptq->tid = -1;
891 	ptq->cpu = -1;
892 	ptq->next_tid = -1;
893 
894 	params.get_trace = intel_pt_get_trace;
895 	params.walk_insn = intel_pt_walk_next_insn;
896 	params.lookahead = intel_pt_lookahead;
897 	params.data = ptq;
898 	params.return_compression = intel_pt_return_compression(pt);
899 	params.branch_enable = intel_pt_branch_enable(pt);
900 	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
901 	params.mtc_period = intel_pt_mtc_period(pt);
902 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
903 	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
904 
905 	if (pt->filts.cnt > 0)
906 		params.pgd_ip = intel_pt_pgd_ip;
907 
908 	if (pt->synth_opts.instructions) {
909 		if (pt->synth_opts.period) {
910 			switch (pt->synth_opts.period_type) {
911 			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
912 				params.period_type =
913 						INTEL_PT_PERIOD_INSTRUCTIONS;
914 				params.period = pt->synth_opts.period;
915 				break;
916 			case PERF_ITRACE_PERIOD_TICKS:
917 				params.period_type = INTEL_PT_PERIOD_TICKS;
918 				params.period = pt->synth_opts.period;
919 				break;
920 			case PERF_ITRACE_PERIOD_NANOSECS:
921 				params.period_type = INTEL_PT_PERIOD_TICKS;
922 				params.period = intel_pt_ns_to_ticks(pt,
923 							pt->synth_opts.period);
924 				break;
925 			default:
926 				break;
927 			}
928 		}
929 
930 		if (!params.period) {
931 			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
932 			params.period = 1;
933 		}
934 	}
935 
936 	if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
937 		params.flags |= INTEL_PT_FUP_WITH_NLIP;
938 
939 	ptq->decoder = intel_pt_decoder_new(&params);
940 	if (!ptq->decoder)
941 		goto out_free;
942 
943 	return ptq;
944 
945 out_free:
946 	zfree(&ptq->event_buf);
947 	zfree(&ptq->last_branch);
948 	zfree(&ptq->last_branch_rb);
949 	zfree(&ptq->chain);
950 	free(ptq);
951 	return NULL;
952 }
953 
954 static void intel_pt_free_queue(void *priv)
955 {
956 	struct intel_pt_queue *ptq = priv;
957 
958 	if (!ptq)
959 		return;
960 	thread__zput(ptq->thread);
961 	intel_pt_decoder_free(ptq->decoder);
962 	zfree(&ptq->event_buf);
963 	zfree(&ptq->last_branch);
964 	zfree(&ptq->last_branch_rb);
965 	zfree(&ptq->chain);
966 	free(ptq);
967 }
968 
969 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
970 				     struct auxtrace_queue *queue)
971 {
972 	struct intel_pt_queue *ptq = queue->priv;
973 
974 	if (queue->tid == -1 || pt->have_sched_switch) {
975 		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
976 		thread__zput(ptq->thread);
977 	}
978 
979 	if (!ptq->thread && ptq->tid != -1)
980 		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
981 
982 	if (ptq->thread) {
983 		ptq->pid = ptq->thread->pid_;
984 		if (queue->cpu == -1)
985 			ptq->cpu = ptq->thread->cpu;
986 	}
987 }
988 
989 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
990 {
991 	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
992 		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
993 	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
994 		if (ptq->state->to_ip)
995 			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
996 				     PERF_IP_FLAG_ASYNC |
997 				     PERF_IP_FLAG_INTERRUPT;
998 		else
999 			ptq->flags = PERF_IP_FLAG_BRANCH |
1000 				     PERF_IP_FLAG_TRACE_END;
1001 		ptq->insn_len = 0;
1002 	} else {
1003 		if (ptq->state->from_ip)
1004 			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
1005 		else
1006 			ptq->flags = PERF_IP_FLAG_BRANCH |
1007 				     PERF_IP_FLAG_TRACE_BEGIN;
1008 		if (ptq->state->flags & INTEL_PT_IN_TX)
1009 			ptq->flags |= PERF_IP_FLAG_IN_TX;
1010 		ptq->insn_len = ptq->state->insn_len;
1011 		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
1012 	}
1013 
1014 	if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
1015 		ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
1016 	if (ptq->state->type & INTEL_PT_TRACE_END)
1017 		ptq->flags |= PERF_IP_FLAG_TRACE_END;
1018 }
1019 
1020 static void intel_pt_setup_time_range(struct intel_pt *pt,
1021 				      struct intel_pt_queue *ptq)
1022 {
1023 	if (!pt->range_cnt)
1024 		return;
1025 
1026 	ptq->sel_timestamp = pt->time_ranges[0].start;
1027 	ptq->sel_idx = 0;
1028 
1029 	if (ptq->sel_timestamp) {
1030 		ptq->sel_start = true;
1031 	} else {
1032 		ptq->sel_timestamp = pt->time_ranges[0].end;
1033 		ptq->sel_start = false;
1034 	}
1035 }
1036 
1037 static int intel_pt_setup_queue(struct intel_pt *pt,
1038 				struct auxtrace_queue *queue,
1039 				unsigned int queue_nr)
1040 {
1041 	struct intel_pt_queue *ptq = queue->priv;
1042 
1043 	if (list_empty(&queue->head))
1044 		return 0;
1045 
1046 	if (!ptq) {
1047 		ptq = intel_pt_alloc_queue(pt, queue_nr);
1048 		if (!ptq)
1049 			return -ENOMEM;
1050 		queue->priv = ptq;
1051 
1052 		if (queue->cpu != -1)
1053 			ptq->cpu = queue->cpu;
1054 		ptq->tid = queue->tid;
1055 
1056 		ptq->cbr_seen = UINT_MAX;
1057 
1058 		if (pt->sampling_mode && !pt->snapshot_mode &&
1059 		    pt->timeless_decoding)
1060 			ptq->step_through_buffers = true;
1061 
1062 		ptq->sync_switch = pt->sync_switch;
1063 
1064 		intel_pt_setup_time_range(pt, ptq);
1065 	}
1066 
1067 	if (!ptq->on_heap &&
1068 	    (!ptq->sync_switch ||
1069 	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
1070 		const struct intel_pt_state *state;
1071 		int ret;
1072 
1073 		if (pt->timeless_decoding)
1074 			return 0;
1075 
1076 		intel_pt_log("queue %u getting timestamp\n", queue_nr);
1077 		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1078 			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1079 
1080 		if (ptq->sel_start && ptq->sel_timestamp) {
1081 			ret = intel_pt_fast_forward(ptq->decoder,
1082 						    ptq->sel_timestamp);
1083 			if (ret)
1084 				return ret;
1085 		}
1086 
1087 		while (1) {
1088 			state = intel_pt_decode(ptq->decoder);
1089 			if (state->err) {
1090 				if (state->err == INTEL_PT_ERR_NODATA) {
1091 					intel_pt_log("queue %u has no timestamp\n",
1092 						     queue_nr);
1093 					return 0;
1094 				}
1095 				continue;
1096 			}
1097 			if (state->timestamp)
1098 				break;
1099 		}
1100 
1101 		ptq->timestamp = state->timestamp;
1102 		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
1103 			     queue_nr, ptq->timestamp);
1104 		ptq->state = state;
1105 		ptq->have_sample = true;
1106 		if (ptq->sel_start && ptq->sel_timestamp &&
1107 		    ptq->timestamp < ptq->sel_timestamp)
1108 			ptq->have_sample = false;
1109 		intel_pt_sample_flags(ptq);
1110 		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
1111 		if (ret)
1112 			return ret;
1113 		ptq->on_heap = true;
1114 	}
1115 
1116 	return 0;
1117 }
1118 
1119 static int intel_pt_setup_queues(struct intel_pt *pt)
1120 {
1121 	unsigned int i;
1122 	int ret;
1123 
1124 	for (i = 0; i < pt->queues.nr_queues; i++) {
1125 		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
1126 		if (ret)
1127 			return ret;
1128 	}
1129 	return 0;
1130 }
1131 
1132 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
1133 {
1134 	struct branch_stack *bs_src = ptq->last_branch_rb;
1135 	struct branch_stack *bs_dst = ptq->last_branch;
1136 	size_t nr = 0;
1137 
1138 	bs_dst->nr = bs_src->nr;
1139 
1140 	if (!bs_src->nr)
1141 		return;
1142 
1143 	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1144 	memcpy(&bs_dst->entries[0],
1145 	       &bs_src->entries[ptq->last_branch_pos],
1146 	       sizeof(struct branch_entry) * nr);
1147 
1148 	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1149 		memcpy(&bs_dst->entries[nr],
1150 		       &bs_src->entries[0],
1151 		       sizeof(struct branch_entry) * ptq->last_branch_pos);
1152 	}
1153 }
1154 
1155 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1156 {
1157 	ptq->last_branch_pos = 0;
1158 	ptq->last_branch_rb->nr = 0;
1159 }
1160 
1161 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1162 {
1163 	const struct intel_pt_state *state = ptq->state;
1164 	struct branch_stack *bs = ptq->last_branch_rb;
1165 	struct branch_entry *be;
1166 
1167 	if (!ptq->last_branch_pos)
1168 		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1169 
1170 	ptq->last_branch_pos -= 1;
1171 
1172 	be              = &bs->entries[ptq->last_branch_pos];
1173 	be->from        = state->from_ip;
1174 	be->to          = state->to_ip;
1175 	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1176 	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1177 	/* No support for mispredict */
1178 	be->flags.mispred = ptq->pt->mispred_all;
1179 
1180 	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1181 		bs->nr += 1;
1182 }
1183 
1184 static inline bool intel_pt_skip_event(struct intel_pt *pt)
1185 {
1186 	return pt->synth_opts.initial_skip &&
1187 	       pt->num_events++ < pt->synth_opts.initial_skip;
1188 }
1189 
1190 /*
1191  * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
1192  * Also ensure CBR is first non-skipped event by allowing for 4 more samples
1193  * from this decoder state.
1194  */
1195 static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt)
1196 {
1197 	return pt->synth_opts.initial_skip &&
1198 	       pt->num_events + 4 < pt->synth_opts.initial_skip;
1199 }
1200 
1201 static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
1202 				   union perf_event *event,
1203 				   struct perf_sample *sample)
1204 {
1205 	event->sample.header.type = PERF_RECORD_SAMPLE;
1206 	event->sample.header.size = sizeof(struct perf_event_header);
1207 
1208 	sample->pid = ptq->pid;
1209 	sample->tid = ptq->tid;
1210 	sample->cpu = ptq->cpu;
1211 	sample->insn_len = ptq->insn_len;
1212 	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1213 }
1214 
1215 static void intel_pt_prep_b_sample(struct intel_pt *pt,
1216 				   struct intel_pt_queue *ptq,
1217 				   union perf_event *event,
1218 				   struct perf_sample *sample)
1219 {
1220 	intel_pt_prep_a_sample(ptq, event, sample);
1221 
1222 	if (!pt->timeless_decoding)
1223 		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1224 
1225 	sample->ip = ptq->state->from_ip;
1226 	sample->cpumode = intel_pt_cpumode(pt, sample->ip);
1227 	sample->addr = ptq->state->to_ip;
1228 	sample->period = 1;
1229 	sample->flags = ptq->flags;
1230 
1231 	event->sample.header.misc = sample->cpumode;
1232 }
1233 
1234 static int intel_pt_inject_event(union perf_event *event,
1235 				 struct perf_sample *sample, u64 type)
1236 {
1237 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1238 	return perf_event__synthesize_sample(event, type, 0, sample);
1239 }
1240 
1241 static inline int intel_pt_opt_inject(struct intel_pt *pt,
1242 				      union perf_event *event,
1243 				      struct perf_sample *sample, u64 type)
1244 {
1245 	if (!pt->synth_opts.inject)
1246 		return 0;
1247 
1248 	return intel_pt_inject_event(event, sample, type);
1249 }
1250 
1251 static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
1252 					  union perf_event *event,
1253 					  struct perf_sample *sample, u64 type)
1254 {
1255 	int ret;
1256 
1257 	ret = intel_pt_opt_inject(pt, event, sample, type);
1258 	if (ret)
1259 		return ret;
1260 
1261 	ret = perf_session__deliver_synth_event(pt->session, event, sample);
1262 	if (ret)
1263 		pr_err("Intel PT: failed to deliver event, error %d\n", ret);
1264 
1265 	return ret;
1266 }
1267 
1268 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1269 {
1270 	struct intel_pt *pt = ptq->pt;
1271 	union perf_event *event = ptq->event_buf;
1272 	struct perf_sample sample = { .ip = 0, };
1273 	struct dummy_branch_stack {
1274 		u64			nr;
1275 		struct branch_entry	entries;
1276 	} dummy_bs;
1277 
1278 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1279 		return 0;
1280 
1281 	if (intel_pt_skip_event(pt))
1282 		return 0;
1283 
1284 	intel_pt_prep_b_sample(pt, ptq, event, &sample);
1285 
1286 	sample.id = ptq->pt->branches_id;
1287 	sample.stream_id = ptq->pt->branches_id;
1288 
1289 	/*
1290 	 * perf report cannot handle events without a branch stack when using
1291 	 * SORT_MODE__BRANCH so make a dummy one.
1292 	 */
1293 	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1294 		dummy_bs = (struct dummy_branch_stack){
1295 			.nr = 1,
1296 			.entries = {
1297 				.from = sample.ip,
1298 				.to = sample.addr,
1299 			},
1300 		};
1301 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1302 	}
1303 
1304 	sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
1305 	if (sample.cyc_cnt) {
1306 		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
1307 		ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
1308 		ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
1309 	}
1310 
1311 	return intel_pt_deliver_synth_b_event(pt, event, &sample,
1312 					      pt->branches_sample_type);
1313 }
1314 
1315 static void intel_pt_prep_sample(struct intel_pt *pt,
1316 				 struct intel_pt_queue *ptq,
1317 				 union perf_event *event,
1318 				 struct perf_sample *sample)
1319 {
1320 	intel_pt_prep_b_sample(pt, ptq, event, sample);
1321 
1322 	if (pt->synth_opts.callchain) {
1323 		thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1324 				     pt->synth_opts.callchain_sz + 1,
1325 				     sample->ip, pt->kernel_start);
1326 		sample->callchain = ptq->chain;
1327 	}
1328 
1329 	if (pt->synth_opts.last_branch) {
1330 		intel_pt_copy_last_branch_rb(ptq);
1331 		sample->branch_stack = ptq->last_branch;
1332 	}
1333 }
1334 
1335 static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
1336 					       struct intel_pt_queue *ptq,
1337 					       union perf_event *event,
1338 					       struct perf_sample *sample,
1339 					       u64 type)
1340 {
1341 	int ret;
1342 
1343 	ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
1344 
1345 	if (pt->synth_opts.last_branch)
1346 		intel_pt_reset_last_branch_rb(ptq);
1347 
1348 	return ret;
1349 }
1350 
1351 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1352 {
1353 	struct intel_pt *pt = ptq->pt;
1354 	union perf_event *event = ptq->event_buf;
1355 	struct perf_sample sample = { .ip = 0, };
1356 
1357 	if (intel_pt_skip_event(pt))
1358 		return 0;
1359 
1360 	intel_pt_prep_sample(pt, ptq, event, &sample);
1361 
1362 	sample.id = ptq->pt->instructions_id;
1363 	sample.stream_id = ptq->pt->instructions_id;
1364 	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1365 
1366 	sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
1367 	if (sample.cyc_cnt) {
1368 		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
1369 		ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
1370 		ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
1371 	}
1372 
1373 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1374 
1375 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1376 					    pt->instructions_sample_type);
1377 }
1378 
1379 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1380 {
1381 	struct intel_pt *pt = ptq->pt;
1382 	union perf_event *event = ptq->event_buf;
1383 	struct perf_sample sample = { .ip = 0, };
1384 
1385 	if (intel_pt_skip_event(pt))
1386 		return 0;
1387 
1388 	intel_pt_prep_sample(pt, ptq, event, &sample);
1389 
1390 	sample.id = ptq->pt->transactions_id;
1391 	sample.stream_id = ptq->pt->transactions_id;
1392 
1393 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1394 					    pt->transactions_sample_type);
1395 }
1396 
1397 static void intel_pt_prep_p_sample(struct intel_pt *pt,
1398 				   struct intel_pt_queue *ptq,
1399 				   union perf_event *event,
1400 				   struct perf_sample *sample)
1401 {
1402 	intel_pt_prep_sample(pt, ptq, event, sample);
1403 
1404 	/*
1405 	 * Zero IP is used to mean "trace start" but that is not the case for
1406 	 * power or PTWRITE events with no IP, so clear the flags.
1407 	 */
1408 	if (!sample->ip)
1409 		sample->flags = 0;
1410 }
1411 
1412 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
1413 {
1414 	struct intel_pt *pt = ptq->pt;
1415 	union perf_event *event = ptq->event_buf;
1416 	struct perf_sample sample = { .ip = 0, };
1417 	struct perf_synth_intel_ptwrite raw;
1418 
1419 	if (intel_pt_skip_event(pt))
1420 		return 0;
1421 
1422 	intel_pt_prep_p_sample(pt, ptq, event, &sample);
1423 
1424 	sample.id = ptq->pt->ptwrites_id;
1425 	sample.stream_id = ptq->pt->ptwrites_id;
1426 
1427 	raw.flags = 0;
1428 	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1429 	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
1430 
1431 	sample.raw_size = perf_synth__raw_size(raw);
1432 	sample.raw_data = perf_synth__raw_data(&raw);
1433 
1434 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1435 					    pt->ptwrites_sample_type);
1436 }
1437 
1438 static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
1439 {
1440 	struct intel_pt *pt = ptq->pt;
1441 	union perf_event *event = ptq->event_buf;
1442 	struct perf_sample sample = { .ip = 0, };
1443 	struct perf_synth_intel_cbr raw;
1444 	u32 flags;
1445 
1446 	if (intel_pt_skip_cbr_event(pt))
1447 		return 0;
1448 
1449 	ptq->cbr_seen = ptq->state->cbr;
1450 
1451 	intel_pt_prep_p_sample(pt, ptq, event, &sample);
1452 
1453 	sample.id = ptq->pt->cbr_id;
1454 	sample.stream_id = ptq->pt->cbr_id;
1455 
1456 	flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
1457 	raw.flags = cpu_to_le32(flags);
1458 	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
1459 	raw.reserved3 = 0;
1460 
1461 	sample.raw_size = perf_synth__raw_size(raw);
1462 	sample.raw_data = perf_synth__raw_data(&raw);
1463 
1464 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1465 					    pt->pwr_events_sample_type);
1466 }
1467 
1468 static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
1469 {
1470 	struct intel_pt *pt = ptq->pt;
1471 	union perf_event *event = ptq->event_buf;
1472 	struct perf_sample sample = { .ip = 0, };
1473 	struct perf_synth_intel_mwait raw;
1474 
1475 	if (intel_pt_skip_event(pt))
1476 		return 0;
1477 
1478 	intel_pt_prep_p_sample(pt, ptq, event, &sample);
1479 
1480 	sample.id = ptq->pt->mwait_id;
1481 	sample.stream_id = ptq->pt->mwait_id;
1482 
1483 	raw.reserved = 0;
1484 	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
1485 
1486 	sample.raw_size = perf_synth__raw_size(raw);
1487 	sample.raw_data = perf_synth__raw_data(&raw);
1488 
1489 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1490 					    pt->pwr_events_sample_type);
1491 }
1492 
1493 static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
1494 {
1495 	struct intel_pt *pt = ptq->pt;
1496 	union perf_event *event = ptq->event_buf;
1497 	struct perf_sample sample = { .ip = 0, };
1498 	struct perf_synth_intel_pwre raw;
1499 
1500 	if (intel_pt_skip_event(pt))
1501 		return 0;
1502 
1503 	intel_pt_prep_p_sample(pt, ptq, event, &sample);
1504 
1505 	sample.id = ptq->pt->pwre_id;
1506 	sample.stream_id = ptq->pt->pwre_id;
1507 
1508 	raw.reserved = 0;
1509 	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
1510 
1511 	sample.raw_size = perf_synth__raw_size(raw);
1512 	sample.raw_data = perf_synth__raw_data(&raw);
1513 
1514 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1515 					    pt->pwr_events_sample_type);
1516 }
1517 
1518 static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
1519 {
1520 	struct intel_pt *pt = ptq->pt;
1521 	union perf_event *event = ptq->event_buf;
1522 	struct perf_sample sample = { .ip = 0, };
1523 	struct perf_synth_intel_exstop raw;
1524 
1525 	if (intel_pt_skip_event(pt))
1526 		return 0;
1527 
1528 	intel_pt_prep_p_sample(pt, ptq, event, &sample);
1529 
1530 	sample.id = ptq->pt->exstop_id;
1531 	sample.stream_id = ptq->pt->exstop_id;
1532 
1533 	raw.flags = 0;
1534 	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1535 
1536 	sample.raw_size = perf_synth__raw_size(raw);
1537 	sample.raw_data = perf_synth__raw_data(&raw);
1538 
1539 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1540 					    pt->pwr_events_sample_type);
1541 }
1542 
1543 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
1544 {
1545 	struct intel_pt *pt = ptq->pt;
1546 	union perf_event *event = ptq->event_buf;
1547 	struct perf_sample sample = { .ip = 0, };
1548 	struct perf_synth_intel_pwrx raw;
1549 
1550 	if (intel_pt_skip_event(pt))
1551 		return 0;
1552 
1553 	intel_pt_prep_p_sample(pt, ptq, event, &sample);
1554 
1555 	sample.id = ptq->pt->pwrx_id;
1556 	sample.stream_id = ptq->pt->pwrx_id;
1557 
1558 	raw.reserved = 0;
1559 	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
1560 
1561 	sample.raw_size = perf_synth__raw_size(raw);
1562 	sample.raw_data = perf_synth__raw_data(&raw);
1563 
1564 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1565 					    pt->pwr_events_sample_type);
1566 }
1567 
1568 /*
1569  * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
1570  * intel_pt_add_gp_regs().
1571  */
1572 static const int pebs_gp_regs[] = {
1573 	[PERF_REG_X86_FLAGS]	= 1,
1574 	[PERF_REG_X86_IP]	= 2,
1575 	[PERF_REG_X86_AX]	= 3,
1576 	[PERF_REG_X86_CX]	= 4,
1577 	[PERF_REG_X86_DX]	= 5,
1578 	[PERF_REG_X86_BX]	= 6,
1579 	[PERF_REG_X86_SP]	= 7,
1580 	[PERF_REG_X86_BP]	= 8,
1581 	[PERF_REG_X86_SI]	= 9,
1582 	[PERF_REG_X86_DI]	= 10,
1583 	[PERF_REG_X86_R8]	= 11,
1584 	[PERF_REG_X86_R9]	= 12,
1585 	[PERF_REG_X86_R10]	= 13,
1586 	[PERF_REG_X86_R11]	= 14,
1587 	[PERF_REG_X86_R12]	= 15,
1588 	[PERF_REG_X86_R13]	= 16,
1589 	[PERF_REG_X86_R14]	= 17,
1590 	[PERF_REG_X86_R15]	= 18,
1591 };
1592 
1593 static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
1594 				 const struct intel_pt_blk_items *items,
1595 				 u64 regs_mask)
1596 {
1597 	const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
1598 	u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
1599 	u32 bit;
1600 	int i;
1601 
1602 	for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
1603 		/* Get the PEBS gp_regs array index */
1604 		int n = pebs_gp_regs[i] - 1;
1605 
1606 		if (n < 0)
1607 			continue;
1608 		/*
1609 		 * Add only registers that were requested (i.e. 'regs_mask') and
1610 		 * that were provided (i.e. 'mask'), and update the resulting
1611 		 * mask (i.e. 'intr_regs->mask') accordingly.
1612 		 */
1613 		if (mask & 1 << n && regs_mask & bit) {
1614 			intr_regs->mask |= bit;
1615 			*pos++ = gp_regs[n];
1616 		}
1617 	}
1618 
1619 	return pos;
1620 }
1621 
1622 #ifndef PERF_REG_X86_XMM0
1623 #define PERF_REG_X86_XMM0 32
1624 #endif
1625 
1626 static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
1627 			     const struct intel_pt_blk_items *items,
1628 			     u64 regs_mask)
1629 {
1630 	u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
1631 	const u64 *xmm = items->xmm;
1632 
1633 	/*
1634 	 * If there are any XMM registers, then there should be all of them.
1635 	 * Nevertheless, follow the logic to add only registers that were
1636 	 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
1637 	 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
1638 	 */
1639 	intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
1640 
1641 	for (; mask; mask >>= 1, xmm++) {
1642 		if (mask & 1)
1643 			*pos++ = *xmm;
1644 	}
1645 }
1646 
1647 #define LBR_INFO_MISPRED	(1ULL << 63)
1648 #define LBR_INFO_IN_TX		(1ULL << 62)
1649 #define LBR_INFO_ABORT		(1ULL << 61)
1650 #define LBR_INFO_CYCLES		0xffff
1651 
1652 /* Refer kernel's intel_pmu_store_pebs_lbrs() */
1653 static u64 intel_pt_lbr_flags(u64 info)
1654 {
1655 	union {
1656 		struct branch_flags flags;
1657 		u64 result;
1658 	} u = {
1659 		.flags = {
1660 			.mispred	= !!(info & LBR_INFO_MISPRED),
1661 			.predicted	= !(info & LBR_INFO_MISPRED),
1662 			.in_tx		= !!(info & LBR_INFO_IN_TX),
1663 			.abort		= !!(info & LBR_INFO_ABORT),
1664 			.cycles		= info & LBR_INFO_CYCLES,
1665 		}
1666 	};
1667 
1668 	return u.result;
1669 }
1670 
1671 static void intel_pt_add_lbrs(struct branch_stack *br_stack,
1672 			      const struct intel_pt_blk_items *items)
1673 {
1674 	u64 *to;
1675 	int i;
1676 
1677 	br_stack->nr = 0;
1678 
1679 	to = &br_stack->entries[0].from;
1680 
1681 	for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
1682 		u32 mask = items->mask[i];
1683 		const u64 *from = items->val[i];
1684 
1685 		for (; mask; mask >>= 3, from += 3) {
1686 			if ((mask & 7) == 7) {
1687 				*to++ = from[0];
1688 				*to++ = from[1];
1689 				*to++ = intel_pt_lbr_flags(from[2]);
1690 				br_stack->nr += 1;
1691 			}
1692 		}
1693 	}
1694 }
1695 
1696 /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
1697 #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
1698 
1699 static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
1700 {
1701 	const struct intel_pt_blk_items *items = &ptq->state->items;
1702 	struct perf_sample sample = { .ip = 0, };
1703 	union perf_event *event = ptq->event_buf;
1704 	struct intel_pt *pt = ptq->pt;
1705 	struct perf_evsel *evsel = pt->pebs_evsel;
1706 	u64 sample_type = evsel->attr.sample_type;
1707 	u64 id = evsel->id[0];
1708 	u8 cpumode;
1709 
1710 	if (intel_pt_skip_event(pt))
1711 		return 0;
1712 
1713 	intel_pt_prep_a_sample(ptq, event, &sample);
1714 
1715 	sample.id = id;
1716 	sample.stream_id = id;
1717 
1718 	if (!evsel->attr.freq)
1719 		sample.period = evsel->attr.sample_period;
1720 
1721 	/* No support for non-zero CS base */
1722 	if (items->has_ip)
1723 		sample.ip = items->ip;
1724 	else if (items->has_rip)
1725 		sample.ip = items->rip;
1726 	else
1727 		sample.ip = ptq->state->from_ip;
1728 
1729 	/* No support for guest mode at this time */
1730 	cpumode = sample.ip < ptq->pt->kernel_start ?
1731 		  PERF_RECORD_MISC_USER :
1732 		  PERF_RECORD_MISC_KERNEL;
1733 
1734 	event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
1735 
1736 	sample.cpumode = cpumode;
1737 
1738 	if (sample_type & PERF_SAMPLE_TIME) {
1739 		u64 timestamp = 0;
1740 
1741 		if (items->has_timestamp)
1742 			timestamp = items->timestamp;
1743 		else if (!pt->timeless_decoding)
1744 			timestamp = ptq->timestamp;
1745 		if (timestamp)
1746 			sample.time = tsc_to_perf_time(timestamp, &pt->tc);
1747 	}
1748 
1749 	if (sample_type & PERF_SAMPLE_CALLCHAIN &&
1750 	    pt->synth_opts.callchain) {
1751 		thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1752 				     pt->synth_opts.callchain_sz, sample.ip,
1753 				     pt->kernel_start);
1754 		sample.callchain = ptq->chain;
1755 	}
1756 
1757 	if (sample_type & PERF_SAMPLE_REGS_INTR &&
1758 	    items->mask[INTEL_PT_GP_REGS_POS]) {
1759 		u64 regs[sizeof(sample.intr_regs.mask)];
1760 		u64 regs_mask = evsel->attr.sample_regs_intr;
1761 		u64 *pos;
1762 
1763 		sample.intr_regs.abi = items->is_32_bit ?
1764 				       PERF_SAMPLE_REGS_ABI_32 :
1765 				       PERF_SAMPLE_REGS_ABI_64;
1766 		sample.intr_regs.regs = regs;
1767 
1768 		pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
1769 
1770 		intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
1771 	}
1772 
1773 	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1774 		struct {
1775 			struct branch_stack br_stack;
1776 			struct branch_entry entries[LBRS_MAX];
1777 		} br;
1778 
1779 		if (items->mask[INTEL_PT_LBR_0_POS] ||
1780 		    items->mask[INTEL_PT_LBR_1_POS] ||
1781 		    items->mask[INTEL_PT_LBR_2_POS]) {
1782 			intel_pt_add_lbrs(&br.br_stack, items);
1783 			sample.branch_stack = &br.br_stack;
1784 		} else if (pt->synth_opts.last_branch) {
1785 			intel_pt_copy_last_branch_rb(ptq);
1786 			sample.branch_stack = ptq->last_branch;
1787 		} else {
1788 			br.br_stack.nr = 0;
1789 			sample.branch_stack = &br.br_stack;
1790 		}
1791 	}
1792 
1793 	if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
1794 		sample.addr = items->mem_access_address;
1795 
1796 	if (sample_type & PERF_SAMPLE_WEIGHT) {
1797 		/*
1798 		 * Refer kernel's setup_pebs_adaptive_sample_data() and
1799 		 * intel_hsw_weight().
1800 		 */
1801 		if (items->has_mem_access_latency)
1802 			sample.weight = items->mem_access_latency;
1803 		if (!sample.weight && items->has_tsx_aux_info) {
1804 			/* Cycles last block */
1805 			sample.weight = (u32)items->tsx_aux_info;
1806 		}
1807 	}
1808 
1809 	if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
1810 		u64 ax = items->has_rax ? items->rax : 0;
1811 		/* Refer kernel's intel_hsw_transaction() */
1812 		u64 txn = (u8)(items->tsx_aux_info >> 32);
1813 
1814 		/* For RTM XABORTs also log the abort code from AX */
1815 		if (txn & PERF_TXN_TRANSACTION && ax & 1)
1816 			txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1817 		sample.transaction = txn;
1818 	}
1819 
1820 	return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
1821 }
1822 
1823 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1824 				pid_t pid, pid_t tid, u64 ip, u64 timestamp)
1825 {
1826 	union perf_event event;
1827 	char msg[MAX_AUXTRACE_ERROR_MSG];
1828 	int err;
1829 
1830 	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1831 
1832 	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1833 			     code, cpu, pid, tid, ip, msg, timestamp);
1834 
1835 	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1836 	if (err)
1837 		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1838 		       err);
1839 
1840 	return err;
1841 }
1842 
1843 static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
1844 				 const struct intel_pt_state *state)
1845 {
1846 	struct intel_pt *pt = ptq->pt;
1847 	u64 tm = ptq->timestamp;
1848 
1849 	tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
1850 
1851 	return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
1852 				    ptq->tid, state->from_ip, tm);
1853 }
1854 
1855 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1856 {
1857 	struct auxtrace_queue *queue;
1858 	pid_t tid = ptq->next_tid;
1859 	int err;
1860 
1861 	if (tid == -1)
1862 		return 0;
1863 
1864 	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1865 
1866 	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1867 
1868 	queue = &pt->queues.queue_array[ptq->queue_nr];
1869 	intel_pt_set_pid_tid_cpu(pt, queue);
1870 
1871 	ptq->next_tid = -1;
1872 
1873 	return err;
1874 }
1875 
1876 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1877 {
1878 	struct intel_pt *pt = ptq->pt;
1879 
1880 	return ip == pt->switch_ip &&
1881 	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1882 	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1883 			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1884 }
1885 
1886 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1887 			  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
1888 
1889 static int intel_pt_sample(struct intel_pt_queue *ptq)
1890 {
1891 	const struct intel_pt_state *state = ptq->state;
1892 	struct intel_pt *pt = ptq->pt;
1893 	int err;
1894 
1895 	if (!ptq->have_sample)
1896 		return 0;
1897 
1898 	ptq->have_sample = false;
1899 
1900 	if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
1901 		/*
1902 		 * Cycle count and instruction count only go together to create
1903 		 * a valid IPC ratio when the cycle count changes.
1904 		 */
1905 		ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
1906 		ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
1907 	}
1908 
1909 	/*
1910 	 * Do PEBS first to allow for the possibility that the PEBS timestamp
1911 	 * precedes the current timestamp.
1912 	 */
1913 	if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
1914 		err = intel_pt_synth_pebs_sample(ptq);
1915 		if (err)
1916 			return err;
1917 	}
1918 
1919 	if (pt->sample_pwr_events) {
1920 		if (ptq->state->cbr != ptq->cbr_seen) {
1921 			err = intel_pt_synth_cbr_sample(ptq);
1922 			if (err)
1923 				return err;
1924 		}
1925 		if (state->type & INTEL_PT_PWR_EVT) {
1926 			if (state->type & INTEL_PT_MWAIT_OP) {
1927 				err = intel_pt_synth_mwait_sample(ptq);
1928 				if (err)
1929 					return err;
1930 			}
1931 			if (state->type & INTEL_PT_PWR_ENTRY) {
1932 				err = intel_pt_synth_pwre_sample(ptq);
1933 				if (err)
1934 					return err;
1935 			}
1936 			if (state->type & INTEL_PT_EX_STOP) {
1937 				err = intel_pt_synth_exstop_sample(ptq);
1938 				if (err)
1939 					return err;
1940 			}
1941 			if (state->type & INTEL_PT_PWR_EXIT) {
1942 				err = intel_pt_synth_pwrx_sample(ptq);
1943 				if (err)
1944 					return err;
1945 			}
1946 		}
1947 	}
1948 
1949 	if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
1950 		err = intel_pt_synth_instruction_sample(ptq);
1951 		if (err)
1952 			return err;
1953 	}
1954 
1955 	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
1956 		err = intel_pt_synth_transaction_sample(ptq);
1957 		if (err)
1958 			return err;
1959 	}
1960 
1961 	if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
1962 		err = intel_pt_synth_ptwrite_sample(ptq);
1963 		if (err)
1964 			return err;
1965 	}
1966 
1967 	if (!(state->type & INTEL_PT_BRANCH))
1968 		return 0;
1969 
1970 	if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1971 		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
1972 				    state->to_ip, ptq->insn_len,
1973 				    state->trace_nr);
1974 	else
1975 		thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
1976 
1977 	if (pt->sample_branches) {
1978 		err = intel_pt_synth_branch_sample(ptq);
1979 		if (err)
1980 			return err;
1981 	}
1982 
1983 	if (pt->synth_opts.last_branch)
1984 		intel_pt_update_last_branch_rb(ptq);
1985 
1986 	if (!ptq->sync_switch)
1987 		return 0;
1988 
1989 	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1990 		switch (ptq->switch_state) {
1991 		case INTEL_PT_SS_NOT_TRACING:
1992 		case INTEL_PT_SS_UNKNOWN:
1993 		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1994 			err = intel_pt_next_tid(pt, ptq);
1995 			if (err)
1996 				return err;
1997 			ptq->switch_state = INTEL_PT_SS_TRACING;
1998 			break;
1999 		default:
2000 			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
2001 			return 1;
2002 		}
2003 	} else if (!state->to_ip) {
2004 		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
2005 	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
2006 		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
2007 	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
2008 		   state->to_ip == pt->ptss_ip &&
2009 		   (ptq->flags & PERF_IP_FLAG_CALL)) {
2010 		ptq->switch_state = INTEL_PT_SS_TRACING;
2011 	}
2012 
2013 	return 0;
2014 }
2015 
2016 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
2017 {
2018 	struct machine *machine = pt->machine;
2019 	struct map *map;
2020 	struct symbol *sym, *start;
2021 	u64 ip, switch_ip = 0;
2022 	const char *ptss;
2023 
2024 	if (ptss_ip)
2025 		*ptss_ip = 0;
2026 
2027 	map = machine__kernel_map(machine);
2028 	if (!map)
2029 		return 0;
2030 
2031 	if (map__load(map))
2032 		return 0;
2033 
2034 	start = dso__first_symbol(map->dso);
2035 
2036 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
2037 		if (sym->binding == STB_GLOBAL &&
2038 		    !strcmp(sym->name, "__switch_to")) {
2039 			ip = map->unmap_ip(map, sym->start);
2040 			if (ip >= map->start && ip < map->end) {
2041 				switch_ip = ip;
2042 				break;
2043 			}
2044 		}
2045 	}
2046 
2047 	if (!switch_ip || !ptss_ip)
2048 		return 0;
2049 
2050 	if (pt->have_sched_switch == 1)
2051 		ptss = "perf_trace_sched_switch";
2052 	else
2053 		ptss = "__perf_event_task_sched_out";
2054 
2055 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
2056 		if (!strcmp(sym->name, ptss)) {
2057 			ip = map->unmap_ip(map, sym->start);
2058 			if (ip >= map->start && ip < map->end) {
2059 				*ptss_ip = ip;
2060 				break;
2061 			}
2062 		}
2063 	}
2064 
2065 	return switch_ip;
2066 }
2067 
2068 static void intel_pt_enable_sync_switch(struct intel_pt *pt)
2069 {
2070 	unsigned int i;
2071 
2072 	pt->sync_switch = true;
2073 
2074 	for (i = 0; i < pt->queues.nr_queues; i++) {
2075 		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
2076 		struct intel_pt_queue *ptq = queue->priv;
2077 
2078 		if (ptq)
2079 			ptq->sync_switch = true;
2080 	}
2081 }
2082 
2083 /*
2084  * To filter against time ranges, it is only necessary to look at the next start
2085  * or end time.
2086  */
2087 static bool intel_pt_next_time(struct intel_pt_queue *ptq)
2088 {
2089 	struct intel_pt *pt = ptq->pt;
2090 
2091 	if (ptq->sel_start) {
2092 		/* Next time is an end time */
2093 		ptq->sel_start = false;
2094 		ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
2095 		return true;
2096 	} else if (ptq->sel_idx + 1 < pt->range_cnt) {
2097 		/* Next time is a start time */
2098 		ptq->sel_start = true;
2099 		ptq->sel_idx += 1;
2100 		ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
2101 		return true;
2102 	}
2103 
2104 	/* No next time */
2105 	return false;
2106 }
2107 
2108 static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
2109 {
2110 	int err;
2111 
2112 	while (1) {
2113 		if (ptq->sel_start) {
2114 			if (ptq->timestamp >= ptq->sel_timestamp) {
2115 				/* After start time, so consider next time */
2116 				intel_pt_next_time(ptq);
2117 				if (!ptq->sel_timestamp) {
2118 					/* No end time */
2119 					return 0;
2120 				}
2121 				/* Check against end time */
2122 				continue;
2123 			}
2124 			/* Before start time, so fast forward */
2125 			ptq->have_sample = false;
2126 			if (ptq->sel_timestamp > *ff_timestamp) {
2127 				if (ptq->sync_switch) {
2128 					intel_pt_next_tid(ptq->pt, ptq);
2129 					ptq->switch_state = INTEL_PT_SS_UNKNOWN;
2130 				}
2131 				*ff_timestamp = ptq->sel_timestamp;
2132 				err = intel_pt_fast_forward(ptq->decoder,
2133 							    ptq->sel_timestamp);
2134 				if (err)
2135 					return err;
2136 			}
2137 			return 0;
2138 		} else if (ptq->timestamp > ptq->sel_timestamp) {
2139 			/* After end time, so consider next time */
2140 			if (!intel_pt_next_time(ptq)) {
2141 				/* No next time range, so stop decoding */
2142 				ptq->have_sample = false;
2143 				ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
2144 				return 1;
2145 			}
2146 			/* Check against next start time */
2147 			continue;
2148 		} else {
2149 			/* Before end time */
2150 			return 0;
2151 		}
2152 	}
2153 }
2154 
2155 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
2156 {
2157 	const struct intel_pt_state *state = ptq->state;
2158 	struct intel_pt *pt = ptq->pt;
2159 	u64 ff_timestamp = 0;
2160 	int err;
2161 
2162 	if (!pt->kernel_start) {
2163 		pt->kernel_start = machine__kernel_start(pt->machine);
2164 		if (pt->per_cpu_mmaps &&
2165 		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
2166 		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
2167 		    !pt->sampling_mode) {
2168 			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
2169 			if (pt->switch_ip) {
2170 				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
2171 					     pt->switch_ip, pt->ptss_ip);
2172 				intel_pt_enable_sync_switch(pt);
2173 			}
2174 		}
2175 	}
2176 
2177 	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
2178 		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
2179 	while (1) {
2180 		err = intel_pt_sample(ptq);
2181 		if (err)
2182 			return err;
2183 
2184 		state = intel_pt_decode(ptq->decoder);
2185 		if (state->err) {
2186 			if (state->err == INTEL_PT_ERR_NODATA)
2187 				return 1;
2188 			if (ptq->sync_switch &&
2189 			    state->from_ip >= pt->kernel_start) {
2190 				ptq->sync_switch = false;
2191 				intel_pt_next_tid(pt, ptq);
2192 			}
2193 			if (pt->synth_opts.errors) {
2194 				err = intel_ptq_synth_error(ptq, state);
2195 				if (err)
2196 					return err;
2197 			}
2198 			continue;
2199 		}
2200 
2201 		ptq->state = state;
2202 		ptq->have_sample = true;
2203 		intel_pt_sample_flags(ptq);
2204 
2205 		/* Use estimated TSC upon return to user space */
2206 		if (pt->est_tsc &&
2207 		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
2208 		    state->to_ip && state->to_ip < pt->kernel_start) {
2209 			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
2210 				     state->timestamp, state->est_timestamp);
2211 			ptq->timestamp = state->est_timestamp;
2212 		/* Use estimated TSC in unknown switch state */
2213 		} else if (ptq->sync_switch &&
2214 			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
2215 			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
2216 			   ptq->next_tid == -1) {
2217 			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
2218 				     state->timestamp, state->est_timestamp);
2219 			ptq->timestamp = state->est_timestamp;
2220 		} else if (state->timestamp > ptq->timestamp) {
2221 			ptq->timestamp = state->timestamp;
2222 		}
2223 
2224 		if (ptq->sel_timestamp) {
2225 			err = intel_pt_time_filter(ptq, &ff_timestamp);
2226 			if (err)
2227 				return err;
2228 		}
2229 
2230 		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
2231 			*timestamp = ptq->timestamp;
2232 			return 0;
2233 		}
2234 	}
2235 	return 0;
2236 }
2237 
2238 static inline int intel_pt_update_queues(struct intel_pt *pt)
2239 {
2240 	if (pt->queues.new_data) {
2241 		pt->queues.new_data = false;
2242 		return intel_pt_setup_queues(pt);
2243 	}
2244 	return 0;
2245 }
2246 
2247 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
2248 {
2249 	unsigned int queue_nr;
2250 	u64 ts;
2251 	int ret;
2252 
2253 	while (1) {
2254 		struct auxtrace_queue *queue;
2255 		struct intel_pt_queue *ptq;
2256 
2257 		if (!pt->heap.heap_cnt)
2258 			return 0;
2259 
2260 		if (pt->heap.heap_array[0].ordinal >= timestamp)
2261 			return 0;
2262 
2263 		queue_nr = pt->heap.heap_array[0].queue_nr;
2264 		queue = &pt->queues.queue_array[queue_nr];
2265 		ptq = queue->priv;
2266 
2267 		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
2268 			     queue_nr, pt->heap.heap_array[0].ordinal,
2269 			     timestamp);
2270 
2271 		auxtrace_heap__pop(&pt->heap);
2272 
2273 		if (pt->heap.heap_cnt) {
2274 			ts = pt->heap.heap_array[0].ordinal + 1;
2275 			if (ts > timestamp)
2276 				ts = timestamp;
2277 		} else {
2278 			ts = timestamp;
2279 		}
2280 
2281 		intel_pt_set_pid_tid_cpu(pt, queue);
2282 
2283 		ret = intel_pt_run_decoder(ptq, &ts);
2284 
2285 		if (ret < 0) {
2286 			auxtrace_heap__add(&pt->heap, queue_nr, ts);
2287 			return ret;
2288 		}
2289 
2290 		if (!ret) {
2291 			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
2292 			if (ret < 0)
2293 				return ret;
2294 		} else {
2295 			ptq->on_heap = false;
2296 		}
2297 	}
2298 
2299 	return 0;
2300 }
2301 
2302 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
2303 					    u64 time_)
2304 {
2305 	struct auxtrace_queues *queues = &pt->queues;
2306 	unsigned int i;
2307 	u64 ts = 0;
2308 
2309 	for (i = 0; i < queues->nr_queues; i++) {
2310 		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
2311 		struct intel_pt_queue *ptq = queue->priv;
2312 
2313 		if (ptq && (tid == -1 || ptq->tid == tid)) {
2314 			ptq->time = time_;
2315 			intel_pt_set_pid_tid_cpu(pt, queue);
2316 			intel_pt_run_decoder(ptq, &ts);
2317 		}
2318 	}
2319 	return 0;
2320 }
2321 
2322 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
2323 {
2324 	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
2325 				    sample->pid, sample->tid, 0, sample->time);
2326 }
2327 
2328 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
2329 {
2330 	unsigned i, j;
2331 
2332 	if (cpu < 0 || !pt->queues.nr_queues)
2333 		return NULL;
2334 
2335 	if ((unsigned)cpu >= pt->queues.nr_queues)
2336 		i = pt->queues.nr_queues - 1;
2337 	else
2338 		i = cpu;
2339 
2340 	if (pt->queues.queue_array[i].cpu == cpu)
2341 		return pt->queues.queue_array[i].priv;
2342 
2343 	for (j = 0; i > 0; j++) {
2344 		if (pt->queues.queue_array[--i].cpu == cpu)
2345 			return pt->queues.queue_array[i].priv;
2346 	}
2347 
2348 	for (; j < pt->queues.nr_queues; j++) {
2349 		if (pt->queues.queue_array[j].cpu == cpu)
2350 			return pt->queues.queue_array[j].priv;
2351 	}
2352 
2353 	return NULL;
2354 }
2355 
2356 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
2357 				u64 timestamp)
2358 {
2359 	struct intel_pt_queue *ptq;
2360 	int err;
2361 
2362 	if (!pt->sync_switch)
2363 		return 1;
2364 
2365 	ptq = intel_pt_cpu_to_ptq(pt, cpu);
2366 	if (!ptq || !ptq->sync_switch)
2367 		return 1;
2368 
2369 	switch (ptq->switch_state) {
2370 	case INTEL_PT_SS_NOT_TRACING:
2371 		break;
2372 	case INTEL_PT_SS_UNKNOWN:
2373 	case INTEL_PT_SS_TRACING:
2374 		ptq->next_tid = tid;
2375 		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
2376 		return 0;
2377 	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2378 		if (!ptq->on_heap) {
2379 			ptq->timestamp = perf_time_to_tsc(timestamp,
2380 							  &pt->tc);
2381 			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
2382 						 ptq->timestamp);
2383 			if (err)
2384 				return err;
2385 			ptq->on_heap = true;
2386 		}
2387 		ptq->switch_state = INTEL_PT_SS_TRACING;
2388 		break;
2389 	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2390 		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
2391 		break;
2392 	default:
2393 		break;
2394 	}
2395 
2396 	ptq->next_tid = -1;
2397 
2398 	return 1;
2399 }
2400 
2401 static int intel_pt_process_switch(struct intel_pt *pt,
2402 				   struct perf_sample *sample)
2403 {
2404 	struct perf_evsel *evsel;
2405 	pid_t tid;
2406 	int cpu, ret;
2407 
2408 	evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
2409 	if (evsel != pt->switch_evsel)
2410 		return 0;
2411 
2412 	tid = perf_evsel__intval(evsel, sample, "next_pid");
2413 	cpu = sample->cpu;
2414 
2415 	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2416 		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
2417 		     &pt->tc));
2418 
2419 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
2420 	if (ret <= 0)
2421 		return ret;
2422 
2423 	return machine__set_current_tid(pt->machine, cpu, -1, tid);
2424 }
2425 
2426 static int intel_pt_context_switch_in(struct intel_pt *pt,
2427 				      struct perf_sample *sample)
2428 {
2429 	pid_t pid = sample->pid;
2430 	pid_t tid = sample->tid;
2431 	int cpu = sample->cpu;
2432 
2433 	if (pt->sync_switch) {
2434 		struct intel_pt_queue *ptq;
2435 
2436 		ptq = intel_pt_cpu_to_ptq(pt, cpu);
2437 		if (ptq && ptq->sync_switch) {
2438 			ptq->next_tid = -1;
2439 			switch (ptq->switch_state) {
2440 			case INTEL_PT_SS_NOT_TRACING:
2441 			case INTEL_PT_SS_UNKNOWN:
2442 			case INTEL_PT_SS_TRACING:
2443 				break;
2444 			case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2445 			case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2446 				ptq->switch_state = INTEL_PT_SS_TRACING;
2447 				break;
2448 			default:
2449 				break;
2450 			}
2451 		}
2452 	}
2453 
2454 	/*
2455 	 * If the current tid has not been updated yet, ensure it is now that
2456 	 * a "switch in" event has occurred.
2457 	 */
2458 	if (machine__get_current_tid(pt->machine, cpu) == tid)
2459 		return 0;
2460 
2461 	return machine__set_current_tid(pt->machine, cpu, pid, tid);
2462 }
2463 
2464 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
2465 				   struct perf_sample *sample)
2466 {
2467 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2468 	pid_t pid, tid;
2469 	int cpu, ret;
2470 
2471 	cpu = sample->cpu;
2472 
2473 	if (pt->have_sched_switch == 3) {
2474 		if (!out)
2475 			return intel_pt_context_switch_in(pt, sample);
2476 		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
2477 			pr_err("Expecting CPU-wide context switch event\n");
2478 			return -EINVAL;
2479 		}
2480 		pid = event->context_switch.next_prev_pid;
2481 		tid = event->context_switch.next_prev_tid;
2482 	} else {
2483 		if (out)
2484 			return 0;
2485 		pid = sample->pid;
2486 		tid = sample->tid;
2487 	}
2488 
2489 	if (tid == -1) {
2490 		pr_err("context_switch event has no tid\n");
2491 		return -EINVAL;
2492 	}
2493 
2494 	intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2495 		     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
2496 		     &pt->tc));
2497 
2498 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
2499 	if (ret <= 0)
2500 		return ret;
2501 
2502 	return machine__set_current_tid(pt->machine, cpu, pid, tid);
2503 }
2504 
2505 static int intel_pt_process_itrace_start(struct intel_pt *pt,
2506 					 union perf_event *event,
2507 					 struct perf_sample *sample)
2508 {
2509 	if (!pt->per_cpu_mmaps)
2510 		return 0;
2511 
2512 	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2513 		     sample->cpu, event->itrace_start.pid,
2514 		     event->itrace_start.tid, sample->time,
2515 		     perf_time_to_tsc(sample->time, &pt->tc));
2516 
2517 	return machine__set_current_tid(pt->machine, sample->cpu,
2518 					event->itrace_start.pid,
2519 					event->itrace_start.tid);
2520 }
2521 
2522 static int intel_pt_process_event(struct perf_session *session,
2523 				  union perf_event *event,
2524 				  struct perf_sample *sample,
2525 				  struct perf_tool *tool)
2526 {
2527 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2528 					   auxtrace);
2529 	u64 timestamp;
2530 	int err = 0;
2531 
2532 	if (dump_trace)
2533 		return 0;
2534 
2535 	if (!tool->ordered_events) {
2536 		pr_err("Intel Processor Trace requires ordered events\n");
2537 		return -EINVAL;
2538 	}
2539 
2540 	if (sample->time && sample->time != (u64)-1)
2541 		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
2542 	else
2543 		timestamp = 0;
2544 
2545 	if (timestamp || pt->timeless_decoding) {
2546 		err = intel_pt_update_queues(pt);
2547 		if (err)
2548 			return err;
2549 	}
2550 
2551 	if (pt->timeless_decoding) {
2552 		if (event->header.type == PERF_RECORD_EXIT) {
2553 			err = intel_pt_process_timeless_queues(pt,
2554 							       event->fork.tid,
2555 							       sample->time);
2556 		}
2557 	} else if (timestamp) {
2558 		err = intel_pt_process_queues(pt, timestamp);
2559 	}
2560 	if (err)
2561 		return err;
2562 
2563 	if (event->header.type == PERF_RECORD_AUX &&
2564 	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
2565 	    pt->synth_opts.errors) {
2566 		err = intel_pt_lost(pt, sample);
2567 		if (err)
2568 			return err;
2569 	}
2570 
2571 	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
2572 		err = intel_pt_process_switch(pt, sample);
2573 	else if (event->header.type == PERF_RECORD_ITRACE_START)
2574 		err = intel_pt_process_itrace_start(pt, event, sample);
2575 	else if (event->header.type == PERF_RECORD_SWITCH ||
2576 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
2577 		err = intel_pt_context_switch(pt, event, sample);
2578 
2579 	intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
2580 		     event->header.type, sample->cpu, sample->time, timestamp);
2581 	intel_pt_log_event(event);
2582 
2583 	return err;
2584 }
2585 
2586 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
2587 {
2588 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2589 					   auxtrace);
2590 	int ret;
2591 
2592 	if (dump_trace)
2593 		return 0;
2594 
2595 	if (!tool->ordered_events)
2596 		return -EINVAL;
2597 
2598 	ret = intel_pt_update_queues(pt);
2599 	if (ret < 0)
2600 		return ret;
2601 
2602 	if (pt->timeless_decoding)
2603 		return intel_pt_process_timeless_queues(pt, -1,
2604 							MAX_TIMESTAMP - 1);
2605 
2606 	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
2607 }
2608 
2609 static void intel_pt_free_events(struct perf_session *session)
2610 {
2611 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2612 					   auxtrace);
2613 	struct auxtrace_queues *queues = &pt->queues;
2614 	unsigned int i;
2615 
2616 	for (i = 0; i < queues->nr_queues; i++) {
2617 		intel_pt_free_queue(queues->queue_array[i].priv);
2618 		queues->queue_array[i].priv = NULL;
2619 	}
2620 	intel_pt_log_disable();
2621 	auxtrace_queues__free(queues);
2622 }
2623 
2624 static void intel_pt_free(struct perf_session *session)
2625 {
2626 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2627 					   auxtrace);
2628 
2629 	auxtrace_heap__free(&pt->heap);
2630 	intel_pt_free_events(session);
2631 	session->auxtrace = NULL;
2632 	thread__put(pt->unknown_thread);
2633 	addr_filters__exit(&pt->filts);
2634 	zfree(&pt->filter);
2635 	zfree(&pt->time_ranges);
2636 	free(pt);
2637 }
2638 
2639 static int intel_pt_process_auxtrace_event(struct perf_session *session,
2640 					   union perf_event *event,
2641 					   struct perf_tool *tool __maybe_unused)
2642 {
2643 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2644 					   auxtrace);
2645 
2646 	if (!pt->data_queued) {
2647 		struct auxtrace_buffer *buffer;
2648 		off_t data_offset;
2649 		int fd = perf_data__fd(session->data);
2650 		int err;
2651 
2652 		if (perf_data__is_pipe(session->data)) {
2653 			data_offset = 0;
2654 		} else {
2655 			data_offset = lseek(fd, 0, SEEK_CUR);
2656 			if (data_offset == -1)
2657 				return -errno;
2658 		}
2659 
2660 		err = auxtrace_queues__add_event(&pt->queues, session, event,
2661 						 data_offset, &buffer);
2662 		if (err)
2663 			return err;
2664 
2665 		/* Dump here now we have copied a piped trace out of the pipe */
2666 		if (dump_trace) {
2667 			if (auxtrace_buffer__get_data(buffer, fd)) {
2668 				intel_pt_dump_event(pt, buffer->data,
2669 						    buffer->size);
2670 				auxtrace_buffer__put_data(buffer);
2671 			}
2672 		}
2673 	}
2674 
2675 	return 0;
2676 }
2677 
2678 struct intel_pt_synth {
2679 	struct perf_tool dummy_tool;
2680 	struct perf_session *session;
2681 };
2682 
2683 static int intel_pt_event_synth(struct perf_tool *tool,
2684 				union perf_event *event,
2685 				struct perf_sample *sample __maybe_unused,
2686 				struct machine *machine __maybe_unused)
2687 {
2688 	struct intel_pt_synth *intel_pt_synth =
2689 			container_of(tool, struct intel_pt_synth, dummy_tool);
2690 
2691 	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
2692 						 NULL);
2693 }
2694 
2695 static int intel_pt_synth_event(struct perf_session *session, const char *name,
2696 				struct perf_event_attr *attr, u64 id)
2697 {
2698 	struct intel_pt_synth intel_pt_synth;
2699 	int err;
2700 
2701 	pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2702 		 name, id, (u64)attr->sample_type);
2703 
2704 	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
2705 	intel_pt_synth.session = session;
2706 
2707 	err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
2708 					  &id, intel_pt_event_synth);
2709 	if (err)
2710 		pr_err("%s: failed to synthesize '%s' event type\n",
2711 		       __func__, name);
2712 
2713 	return err;
2714 }
2715 
2716 static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
2717 				    const char *name)
2718 {
2719 	struct perf_evsel *evsel;
2720 
2721 	evlist__for_each_entry(evlist, evsel) {
2722 		if (evsel->id && evsel->id[0] == id) {
2723 			if (evsel->name)
2724 				zfree(&evsel->name);
2725 			evsel->name = strdup(name);
2726 			break;
2727 		}
2728 	}
2729 }
2730 
2731 static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
2732 					 struct perf_evlist *evlist)
2733 {
2734 	struct perf_evsel *evsel;
2735 
2736 	evlist__for_each_entry(evlist, evsel) {
2737 		if (evsel->attr.type == pt->pmu_type && evsel->ids)
2738 			return evsel;
2739 	}
2740 
2741 	return NULL;
2742 }
2743 
2744 static int intel_pt_synth_events(struct intel_pt *pt,
2745 				 struct perf_session *session)
2746 {
2747 	struct perf_evlist *evlist = session->evlist;
2748 	struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
2749 	struct perf_event_attr attr;
2750 	u64 id;
2751 	int err;
2752 
2753 	if (!evsel) {
2754 		pr_debug("There are no selected events with Intel Processor Trace data\n");
2755 		return 0;
2756 	}
2757 
2758 	memset(&attr, 0, sizeof(struct perf_event_attr));
2759 	attr.size = sizeof(struct perf_event_attr);
2760 	attr.type = PERF_TYPE_HARDWARE;
2761 	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
2762 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
2763 			    PERF_SAMPLE_PERIOD;
2764 	if (pt->timeless_decoding)
2765 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
2766 	else
2767 		attr.sample_type |= PERF_SAMPLE_TIME;
2768 	if (!pt->per_cpu_mmaps)
2769 		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
2770 	attr.exclude_user = evsel->attr.exclude_user;
2771 	attr.exclude_kernel = evsel->attr.exclude_kernel;
2772 	attr.exclude_hv = evsel->attr.exclude_hv;
2773 	attr.exclude_host = evsel->attr.exclude_host;
2774 	attr.exclude_guest = evsel->attr.exclude_guest;
2775 	attr.sample_id_all = evsel->attr.sample_id_all;
2776 	attr.read_format = evsel->attr.read_format;
2777 
2778 	id = evsel->id[0] + 1000000000;
2779 	if (!id)
2780 		id = 1;
2781 
2782 	if (pt->synth_opts.branches) {
2783 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2784 		attr.sample_period = 1;
2785 		attr.sample_type |= PERF_SAMPLE_ADDR;
2786 		err = intel_pt_synth_event(session, "branches", &attr, id);
2787 		if (err)
2788 			return err;
2789 		pt->sample_branches = true;
2790 		pt->branches_sample_type = attr.sample_type;
2791 		pt->branches_id = id;
2792 		id += 1;
2793 		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
2794 	}
2795 
2796 	if (pt->synth_opts.callchain)
2797 		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2798 	if (pt->synth_opts.last_branch)
2799 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2800 
2801 	if (pt->synth_opts.instructions) {
2802 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2803 		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
2804 			attr.sample_period =
2805 				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
2806 		else
2807 			attr.sample_period = pt->synth_opts.period;
2808 		err = intel_pt_synth_event(session, "instructions", &attr, id);
2809 		if (err)
2810 			return err;
2811 		pt->sample_instructions = true;
2812 		pt->instructions_sample_type = attr.sample_type;
2813 		pt->instructions_id = id;
2814 		id += 1;
2815 	}
2816 
2817 	attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
2818 	attr.sample_period = 1;
2819 
2820 	if (pt->synth_opts.transactions) {
2821 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2822 		err = intel_pt_synth_event(session, "transactions", &attr, id);
2823 		if (err)
2824 			return err;
2825 		pt->sample_transactions = true;
2826 		pt->transactions_sample_type = attr.sample_type;
2827 		pt->transactions_id = id;
2828 		intel_pt_set_event_name(evlist, id, "transactions");
2829 		id += 1;
2830 	}
2831 
2832 	attr.type = PERF_TYPE_SYNTH;
2833 	attr.sample_type |= PERF_SAMPLE_RAW;
2834 
2835 	if (pt->synth_opts.ptwrites) {
2836 		attr.config = PERF_SYNTH_INTEL_PTWRITE;
2837 		err = intel_pt_synth_event(session, "ptwrite", &attr, id);
2838 		if (err)
2839 			return err;
2840 		pt->sample_ptwrites = true;
2841 		pt->ptwrites_sample_type = attr.sample_type;
2842 		pt->ptwrites_id = id;
2843 		intel_pt_set_event_name(evlist, id, "ptwrite");
2844 		id += 1;
2845 	}
2846 
2847 	if (pt->synth_opts.pwr_events) {
2848 		pt->sample_pwr_events = true;
2849 		pt->pwr_events_sample_type = attr.sample_type;
2850 
2851 		attr.config = PERF_SYNTH_INTEL_CBR;
2852 		err = intel_pt_synth_event(session, "cbr", &attr, id);
2853 		if (err)
2854 			return err;
2855 		pt->cbr_id = id;
2856 		intel_pt_set_event_name(evlist, id, "cbr");
2857 		id += 1;
2858 	}
2859 
2860 	if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
2861 		attr.config = PERF_SYNTH_INTEL_MWAIT;
2862 		err = intel_pt_synth_event(session, "mwait", &attr, id);
2863 		if (err)
2864 			return err;
2865 		pt->mwait_id = id;
2866 		intel_pt_set_event_name(evlist, id, "mwait");
2867 		id += 1;
2868 
2869 		attr.config = PERF_SYNTH_INTEL_PWRE;
2870 		err = intel_pt_synth_event(session, "pwre", &attr, id);
2871 		if (err)
2872 			return err;
2873 		pt->pwre_id = id;
2874 		intel_pt_set_event_name(evlist, id, "pwre");
2875 		id += 1;
2876 
2877 		attr.config = PERF_SYNTH_INTEL_EXSTOP;
2878 		err = intel_pt_synth_event(session, "exstop", &attr, id);
2879 		if (err)
2880 			return err;
2881 		pt->exstop_id = id;
2882 		intel_pt_set_event_name(evlist, id, "exstop");
2883 		id += 1;
2884 
2885 		attr.config = PERF_SYNTH_INTEL_PWRX;
2886 		err = intel_pt_synth_event(session, "pwrx", &attr, id);
2887 		if (err)
2888 			return err;
2889 		pt->pwrx_id = id;
2890 		intel_pt_set_event_name(evlist, id, "pwrx");
2891 		id += 1;
2892 	}
2893 
2894 	return 0;
2895 }
2896 
2897 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2898 {
2899 	struct perf_evsel *evsel;
2900 
2901 	evlist__for_each_entry_reverse(evlist, evsel) {
2902 		const char *name = perf_evsel__name(evsel);
2903 
2904 		if (!strcmp(name, "sched:sched_switch"))
2905 			return evsel;
2906 	}
2907 
2908 	return NULL;
2909 }
2910 
2911 static bool intel_pt_find_switch(struct perf_evlist *evlist)
2912 {
2913 	struct perf_evsel *evsel;
2914 
2915 	evlist__for_each_entry(evlist, evsel) {
2916 		if (evsel->attr.context_switch)
2917 			return true;
2918 	}
2919 
2920 	return false;
2921 }
2922 
2923 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2924 {
2925 	struct intel_pt *pt = data;
2926 
2927 	if (!strcmp(var, "intel-pt.mispred-all"))
2928 		pt->mispred_all = perf_config_bool(var, value);
2929 
2930 	return 0;
2931 }
2932 
2933 /* Find least TSC which converts to ns or later */
2934 static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
2935 {
2936 	u64 tsc, tm;
2937 
2938 	tsc = perf_time_to_tsc(ns, &pt->tc);
2939 
2940 	while (1) {
2941 		tm = tsc_to_perf_time(tsc, &pt->tc);
2942 		if (tm < ns)
2943 			break;
2944 		tsc -= 1;
2945 	}
2946 
2947 	while (tm < ns)
2948 		tm = tsc_to_perf_time(++tsc, &pt->tc);
2949 
2950 	return tsc;
2951 }
2952 
2953 /* Find greatest TSC which converts to ns or earlier */
2954 static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
2955 {
2956 	u64 tsc, tm;
2957 
2958 	tsc = perf_time_to_tsc(ns, &pt->tc);
2959 
2960 	while (1) {
2961 		tm = tsc_to_perf_time(tsc, &pt->tc);
2962 		if (tm > ns)
2963 			break;
2964 		tsc += 1;
2965 	}
2966 
2967 	while (tm > ns)
2968 		tm = tsc_to_perf_time(--tsc, &pt->tc);
2969 
2970 	return tsc;
2971 }
2972 
2973 static int intel_pt_setup_time_ranges(struct intel_pt *pt,
2974 				      struct itrace_synth_opts *opts)
2975 {
2976 	struct perf_time_interval *p = opts->ptime_range;
2977 	int n = opts->range_num;
2978 	int i;
2979 
2980 	if (!n || !p || pt->timeless_decoding)
2981 		return 0;
2982 
2983 	pt->time_ranges = calloc(n, sizeof(struct range));
2984 	if (!pt->time_ranges)
2985 		return -ENOMEM;
2986 
2987 	pt->range_cnt = n;
2988 
2989 	intel_pt_log("%s: %u range(s)\n", __func__, n);
2990 
2991 	for (i = 0; i < n; i++) {
2992 		struct range *r = &pt->time_ranges[i];
2993 		u64 ts = p[i].start;
2994 		u64 te = p[i].end;
2995 
2996 		/*
2997 		 * Take care to ensure the TSC range matches the perf-time range
2998 		 * when converted back to perf-time.
2999 		 */
3000 		r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
3001 		r->end   = te ? intel_pt_tsc_end(te, pt) : 0;
3002 
3003 		intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
3004 			     i, ts, te);
3005 		intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
3006 			     i, r->start, r->end);
3007 	}
3008 
3009 	return 0;
3010 }
3011 
3012 static const char * const intel_pt_info_fmts[] = {
3013 	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
3014 	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
3015 	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
3016 	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
3017 	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
3018 	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
3019 	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
3020 	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
3021 	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
3022 	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
3023 	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
3024 	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
3025 	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
3026 	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
3027 	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
3028 	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
3029 };
3030 
3031 static void intel_pt_print_info(u64 *arr, int start, int finish)
3032 {
3033 	int i;
3034 
3035 	if (!dump_trace)
3036 		return;
3037 
3038 	for (i = start; i <= finish; i++)
3039 		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
3040 }
3041 
3042 static void intel_pt_print_info_str(const char *name, const char *str)
3043 {
3044 	if (!dump_trace)
3045 		return;
3046 
3047 	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
3048 }
3049 
3050 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
3051 {
3052 	return auxtrace_info->header.size >=
3053 		sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
3054 }
3055 
3056 int intel_pt_process_auxtrace_info(union perf_event *event,
3057 				   struct perf_session *session)
3058 {
3059 	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
3060 	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
3061 	struct intel_pt *pt;
3062 	void *info_end;
3063 	u64 *info;
3064 	int err;
3065 
3066 	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
3067 					min_sz)
3068 		return -EINVAL;
3069 
3070 	pt = zalloc(sizeof(struct intel_pt));
3071 	if (!pt)
3072 		return -ENOMEM;
3073 
3074 	addr_filters__init(&pt->filts);
3075 
3076 	err = perf_config(intel_pt_perf_config, pt);
3077 	if (err)
3078 		goto err_free;
3079 
3080 	err = auxtrace_queues__init(&pt->queues);
3081 	if (err)
3082 		goto err_free;
3083 
3084 	intel_pt_log_set_name(INTEL_PT_PMU_NAME);
3085 
3086 	pt->session = session;
3087 	pt->machine = &session->machines.host; /* No kvm support */
3088 	pt->auxtrace_type = auxtrace_info->type;
3089 	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
3090 	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
3091 	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
3092 	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
3093 	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
3094 	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
3095 	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
3096 	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
3097 	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
3098 	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
3099 	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
3100 			    INTEL_PT_PER_CPU_MMAPS);
3101 
3102 	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
3103 		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
3104 		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
3105 		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
3106 		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
3107 		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
3108 		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
3109 				    INTEL_PT_CYC_BIT);
3110 	}
3111 
3112 	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
3113 		pt->max_non_turbo_ratio =
3114 			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
3115 		intel_pt_print_info(&auxtrace_info->priv[0],
3116 				    INTEL_PT_MAX_NONTURBO_RATIO,
3117 				    INTEL_PT_MAX_NONTURBO_RATIO);
3118 	}
3119 
3120 	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
3121 	info_end = (void *)info + auxtrace_info->header.size;
3122 
3123 	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
3124 		size_t len;
3125 
3126 		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
3127 		intel_pt_print_info(&auxtrace_info->priv[0],
3128 				    INTEL_PT_FILTER_STR_LEN,
3129 				    INTEL_PT_FILTER_STR_LEN);
3130 		if (len) {
3131 			const char *filter = (const char *)info;
3132 
3133 			len = roundup(len + 1, 8);
3134 			info += len >> 3;
3135 			if ((void *)info > info_end) {
3136 				pr_err("%s: bad filter string length\n", __func__);
3137 				err = -EINVAL;
3138 				goto err_free_queues;
3139 			}
3140 			pt->filter = memdup(filter, len);
3141 			if (!pt->filter) {
3142 				err = -ENOMEM;
3143 				goto err_free_queues;
3144 			}
3145 			if (session->header.needs_swap)
3146 				mem_bswap_64(pt->filter, len);
3147 			if (pt->filter[len - 1]) {
3148 				pr_err("%s: filter string not null terminated\n", __func__);
3149 				err = -EINVAL;
3150 				goto err_free_queues;
3151 			}
3152 			err = addr_filters__parse_bare_filter(&pt->filts,
3153 							      filter);
3154 			if (err)
3155 				goto err_free_queues;
3156 		}
3157 		intel_pt_print_info_str("Filter string", pt->filter);
3158 	}
3159 
3160 	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
3161 	if (pt->timeless_decoding && !pt->tc.time_mult)
3162 		pt->tc.time_mult = 1;
3163 	pt->have_tsc = intel_pt_have_tsc(pt);
3164 	pt->sampling_mode = false;
3165 	pt->est_tsc = !pt->timeless_decoding;
3166 
3167 	pt->unknown_thread = thread__new(999999999, 999999999);
3168 	if (!pt->unknown_thread) {
3169 		err = -ENOMEM;
3170 		goto err_free_queues;
3171 	}
3172 
3173 	/*
3174 	 * Since this thread will not be kept in any rbtree not in a
3175 	 * list, initialize its list node so that at thread__put() the
3176 	 * current thread lifetime assuption is kept and we don't segfault
3177 	 * at list_del_init().
3178 	 */
3179 	INIT_LIST_HEAD(&pt->unknown_thread->node);
3180 
3181 	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
3182 	if (err)
3183 		goto err_delete_thread;
3184 	if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
3185 		err = -ENOMEM;
3186 		goto err_delete_thread;
3187 	}
3188 
3189 	pt->auxtrace.process_event = intel_pt_process_event;
3190 	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
3191 	pt->auxtrace.flush_events = intel_pt_flush;
3192 	pt->auxtrace.free_events = intel_pt_free_events;
3193 	pt->auxtrace.free = intel_pt_free;
3194 	session->auxtrace = &pt->auxtrace;
3195 
3196 	if (dump_trace)
3197 		return 0;
3198 
3199 	if (pt->have_sched_switch == 1) {
3200 		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
3201 		if (!pt->switch_evsel) {
3202 			pr_err("%s: missing sched_switch event\n", __func__);
3203 			err = -EINVAL;
3204 			goto err_delete_thread;
3205 		}
3206 	} else if (pt->have_sched_switch == 2 &&
3207 		   !intel_pt_find_switch(session->evlist)) {
3208 		pr_err("%s: missing context_switch attribute flag\n", __func__);
3209 		err = -EINVAL;
3210 		goto err_delete_thread;
3211 	}
3212 
3213 	if (session->itrace_synth_opts->set) {
3214 		pt->synth_opts = *session->itrace_synth_opts;
3215 	} else {
3216 		itrace_synth_opts__set_default(&pt->synth_opts,
3217 				session->itrace_synth_opts->default_no_sample);
3218 		if (!session->itrace_synth_opts->default_no_sample &&
3219 		    !session->itrace_synth_opts->inject) {
3220 			pt->synth_opts.branches = false;
3221 			pt->synth_opts.callchain = true;
3222 		}
3223 		pt->synth_opts.thread_stack =
3224 				session->itrace_synth_opts->thread_stack;
3225 	}
3226 
3227 	if (pt->synth_opts.log)
3228 		intel_pt_log_enable();
3229 
3230 	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
3231 	if (pt->tc.time_mult) {
3232 		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
3233 
3234 		if (!pt->max_non_turbo_ratio)
3235 			pt->max_non_turbo_ratio =
3236 					(tsc_freq + 50000000) / 100000000;
3237 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
3238 		intel_pt_log("Maximum non-turbo ratio %u\n",
3239 			     pt->max_non_turbo_ratio);
3240 		pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
3241 	}
3242 
3243 	err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
3244 	if (err)
3245 		goto err_delete_thread;
3246 
3247 	if (pt->synth_opts.calls)
3248 		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
3249 				       PERF_IP_FLAG_TRACE_END;
3250 	if (pt->synth_opts.returns)
3251 		pt->branches_filter |= PERF_IP_FLAG_RETURN |
3252 				       PERF_IP_FLAG_TRACE_BEGIN;
3253 
3254 	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
3255 		symbol_conf.use_callchain = true;
3256 		if (callchain_register_param(&callchain_param) < 0) {
3257 			symbol_conf.use_callchain = false;
3258 			pt->synth_opts.callchain = false;
3259 		}
3260 	}
3261 
3262 	err = intel_pt_synth_events(pt, session);
3263 	if (err)
3264 		goto err_delete_thread;
3265 
3266 	err = auxtrace_queues__process_index(&pt->queues, session);
3267 	if (err)
3268 		goto err_delete_thread;
3269 
3270 	if (pt->queues.populated)
3271 		pt->data_queued = true;
3272 
3273 	if (pt->timeless_decoding)
3274 		pr_debug2("Intel PT decoding without timestamps\n");
3275 
3276 	return 0;
3277 
3278 err_delete_thread:
3279 	thread__zput(pt->unknown_thread);
3280 err_free_queues:
3281 	intel_pt_log_disable();
3282 	auxtrace_queues__free(&pt->queues);
3283 	session->auxtrace = NULL;
3284 err_free:
3285 	addr_filters__exit(&pt->filts);
3286 	zfree(&pt->filter);
3287 	zfree(&pt->time_ranges);
3288 	free(pt);
3289 	return err;
3290 }
3291