xref: /linux/arch/x86/events/intel/pt.c (revision 27f6d22b037b2be6685e0e27cce929779d634119)
1fd1c601cSBorislav Petkov /*
2fd1c601cSBorislav Petkov  * Intel(R) Processor Trace PMU driver for perf
3fd1c601cSBorislav Petkov  * Copyright (c) 2013-2014, Intel Corporation.
4fd1c601cSBorislav Petkov  *
5fd1c601cSBorislav Petkov  * This program is free software; you can redistribute it and/or modify it
6fd1c601cSBorislav Petkov  * under the terms and conditions of the GNU General Public License,
7fd1c601cSBorislav Petkov  * version 2, as published by the Free Software Foundation.
8fd1c601cSBorislav Petkov  *
9fd1c601cSBorislav Petkov  * This program is distributed in the hope it will be useful, but WITHOUT
10fd1c601cSBorislav Petkov  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11fd1c601cSBorislav Petkov  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12fd1c601cSBorislav Petkov  * more details.
13fd1c601cSBorislav Petkov  *
14fd1c601cSBorislav Petkov  * Intel PT is specified in the Intel Architecture Instruction Set Extensions
15fd1c601cSBorislav Petkov  * Programming Reference:
16fd1c601cSBorislav Petkov  * http://software.intel.com/en-us/intel-isa-extensions
17fd1c601cSBorislav Petkov  */
18fd1c601cSBorislav Petkov 
19fd1c601cSBorislav Petkov #undef DEBUG
20fd1c601cSBorislav Petkov 
21fd1c601cSBorislav Petkov #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22fd1c601cSBorislav Petkov 
23fd1c601cSBorislav Petkov #include <linux/types.h>
24fd1c601cSBorislav Petkov #include <linux/slab.h>
25fd1c601cSBorislav Petkov #include <linux/device.h>
26fd1c601cSBorislav Petkov 
27fd1c601cSBorislav Petkov #include <asm/perf_event.h>
28fd1c601cSBorislav Petkov #include <asm/insn.h>
29fd1c601cSBorislav Petkov #include <asm/io.h>
30fd1c601cSBorislav Petkov #include <asm/intel_pt.h>
31fd1c601cSBorislav Petkov 
32*27f6d22bSBorislav Petkov #include "../perf_event.h"
33fd1c601cSBorislav Petkov #include "pt.h"
34fd1c601cSBorislav Petkov 
35fd1c601cSBorislav Petkov static DEFINE_PER_CPU(struct pt, pt_ctx);
36fd1c601cSBorislav Petkov 
37fd1c601cSBorislav Petkov static struct pt_pmu pt_pmu;
38fd1c601cSBorislav Petkov 
39fd1c601cSBorislav Petkov enum cpuid_regs {
40fd1c601cSBorislav Petkov 	CR_EAX = 0,
41fd1c601cSBorislav Petkov 	CR_ECX,
42fd1c601cSBorislav Petkov 	CR_EDX,
43fd1c601cSBorislav Petkov 	CR_EBX
44fd1c601cSBorislav Petkov };
45fd1c601cSBorislav Petkov 
46fd1c601cSBorislav Petkov /*
47fd1c601cSBorislav Petkov  * Capabilities of Intel PT hardware, such as number of address bits or
48fd1c601cSBorislav Petkov  * supported output schemes, are cached and exported to userspace as "caps"
49fd1c601cSBorislav Petkov  * attribute group of pt pmu device
50fd1c601cSBorislav Petkov  * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
51fd1c601cSBorislav Petkov  * relevant bits together with intel_pt traces.
52fd1c601cSBorislav Petkov  *
53fd1c601cSBorislav Petkov  * These are necessary for both trace decoding (payloads_lip, contains address
54fd1c601cSBorislav Petkov  * width encoded in IP-related packets), and event configuration (bitmasks with
55fd1c601cSBorislav Petkov  * permitted values for certain bit fields).
56fd1c601cSBorislav Petkov  */
57fd1c601cSBorislav Petkov #define PT_CAP(_n, _l, _r, _m)						\
58fd1c601cSBorislav Petkov 	[PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,	\
59fd1c601cSBorislav Petkov 			    .reg = _r, .mask = _m }
60fd1c601cSBorislav Petkov 
61fd1c601cSBorislav Petkov static struct pt_cap_desc {
62fd1c601cSBorislav Petkov 	const char	*name;
63fd1c601cSBorislav Petkov 	u32		leaf;
64fd1c601cSBorislav Petkov 	u8		reg;
65fd1c601cSBorislav Petkov 	u32		mask;
66fd1c601cSBorislav Petkov } pt_caps[] = {
67fd1c601cSBorislav Petkov 	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
68fd1c601cSBorislav Petkov 	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
69fd1c601cSBorislav Petkov 	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
70fd1c601cSBorislav Petkov 	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
71fd1c601cSBorislav Petkov 	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
72fd1c601cSBorislav Petkov 	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
73fd1c601cSBorislav Petkov 	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
74fd1c601cSBorislav Petkov 	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
75fd1c601cSBorislav Petkov 	PT_CAP(mtc_periods,		1, CR_EAX, 0xffff0000),
76fd1c601cSBorislav Petkov 	PT_CAP(cycle_thresholds,	1, CR_EBX, 0xffff),
77fd1c601cSBorislav Petkov 	PT_CAP(psb_periods,		1, CR_EBX, 0xffff0000),
78fd1c601cSBorislav Petkov };
79fd1c601cSBorislav Petkov 
80fd1c601cSBorislav Petkov static u32 pt_cap_get(enum pt_capabilities cap)
81fd1c601cSBorislav Petkov {
82fd1c601cSBorislav Petkov 	struct pt_cap_desc *cd = &pt_caps[cap];
83fd1c601cSBorislav Petkov 	u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
84fd1c601cSBorislav Petkov 	unsigned int shift = __ffs(cd->mask);
85fd1c601cSBorislav Petkov 
86fd1c601cSBorislav Petkov 	return (c & cd->mask) >> shift;
87fd1c601cSBorislav Petkov }
88fd1c601cSBorislav Petkov 
89fd1c601cSBorislav Petkov static ssize_t pt_cap_show(struct device *cdev,
90fd1c601cSBorislav Petkov 			   struct device_attribute *attr,
91fd1c601cSBorislav Petkov 			   char *buf)
92fd1c601cSBorislav Petkov {
93fd1c601cSBorislav Petkov 	struct dev_ext_attribute *ea =
94fd1c601cSBorislav Petkov 		container_of(attr, struct dev_ext_attribute, attr);
95fd1c601cSBorislav Petkov 	enum pt_capabilities cap = (long)ea->var;
96fd1c601cSBorislav Petkov 
97fd1c601cSBorislav Petkov 	return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
98fd1c601cSBorislav Petkov }
99fd1c601cSBorislav Petkov 
100fd1c601cSBorislav Petkov static struct attribute_group pt_cap_group = {
101fd1c601cSBorislav Petkov 	.name	= "caps",
102fd1c601cSBorislav Petkov };
103fd1c601cSBorislav Petkov 
104fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(cyc,		"config:1"	);
105fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(mtc,		"config:9"	);
106fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(tsc,		"config:10"	);
107fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(noretcomp,	"config:11"	);
108fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(mtc_period,	"config:14-17"	);
109fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(cyc_thresh,	"config:19-22"	);
110fd1c601cSBorislav Petkov PMU_FORMAT_ATTR(psb_period,	"config:24-27"	);
111fd1c601cSBorislav Petkov 
112fd1c601cSBorislav Petkov static struct attribute *pt_formats_attr[] = {
113fd1c601cSBorislav Petkov 	&format_attr_cyc.attr,
114fd1c601cSBorislav Petkov 	&format_attr_mtc.attr,
115fd1c601cSBorislav Petkov 	&format_attr_tsc.attr,
116fd1c601cSBorislav Petkov 	&format_attr_noretcomp.attr,
117fd1c601cSBorislav Petkov 	&format_attr_mtc_period.attr,
118fd1c601cSBorislav Petkov 	&format_attr_cyc_thresh.attr,
119fd1c601cSBorislav Petkov 	&format_attr_psb_period.attr,
120fd1c601cSBorislav Petkov 	NULL,
121fd1c601cSBorislav Petkov };
122fd1c601cSBorislav Petkov 
123fd1c601cSBorislav Petkov static struct attribute_group pt_format_group = {
124fd1c601cSBorislav Petkov 	.name	= "format",
125fd1c601cSBorislav Petkov 	.attrs	= pt_formats_attr,
126fd1c601cSBorislav Petkov };
127fd1c601cSBorislav Petkov 
128fd1c601cSBorislav Petkov static const struct attribute_group *pt_attr_groups[] = {
129fd1c601cSBorislav Petkov 	&pt_cap_group,
130fd1c601cSBorislav Petkov 	&pt_format_group,
131fd1c601cSBorislav Petkov 	NULL,
132fd1c601cSBorislav Petkov };
133fd1c601cSBorislav Petkov 
134fd1c601cSBorislav Petkov static int __init pt_pmu_hw_init(void)
135fd1c601cSBorislav Petkov {
136fd1c601cSBorislav Petkov 	struct dev_ext_attribute *de_attrs;
137fd1c601cSBorislav Petkov 	struct attribute **attrs;
138fd1c601cSBorislav Petkov 	size_t size;
139fd1c601cSBorislav Petkov 	int ret;
140fd1c601cSBorislav Petkov 	long i;
141fd1c601cSBorislav Petkov 
142fd1c601cSBorislav Petkov 	attrs = NULL;
143fd1c601cSBorislav Petkov 
144fd1c601cSBorislav Petkov 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
145fd1c601cSBorislav Petkov 		cpuid_count(20, i,
146fd1c601cSBorislav Petkov 			    &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
147fd1c601cSBorislav Petkov 			    &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
148fd1c601cSBorislav Petkov 			    &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
149fd1c601cSBorislav Petkov 			    &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
150fd1c601cSBorislav Petkov 	}
151fd1c601cSBorislav Petkov 
152fd1c601cSBorislav Petkov 	ret = -ENOMEM;
153fd1c601cSBorislav Petkov 	size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
154fd1c601cSBorislav Petkov 	attrs = kzalloc(size, GFP_KERNEL);
155fd1c601cSBorislav Petkov 	if (!attrs)
156fd1c601cSBorislav Petkov 		goto fail;
157fd1c601cSBorislav Petkov 
158fd1c601cSBorislav Petkov 	size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
159fd1c601cSBorislav Petkov 	de_attrs = kzalloc(size, GFP_KERNEL);
160fd1c601cSBorislav Petkov 	if (!de_attrs)
161fd1c601cSBorislav Petkov 		goto fail;
162fd1c601cSBorislav Petkov 
163fd1c601cSBorislav Petkov 	for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
164fd1c601cSBorislav Petkov 		struct dev_ext_attribute *de_attr = de_attrs + i;
165fd1c601cSBorislav Petkov 
166fd1c601cSBorislav Petkov 		de_attr->attr.attr.name = pt_caps[i].name;
167fd1c601cSBorislav Petkov 
168fd1c601cSBorislav Petkov 		sysfs_attr_init(&de_attr->attr.attr);
169fd1c601cSBorislav Petkov 
170fd1c601cSBorislav Petkov 		de_attr->attr.attr.mode		= S_IRUGO;
171fd1c601cSBorislav Petkov 		de_attr->attr.show		= pt_cap_show;
172fd1c601cSBorislav Petkov 		de_attr->var			= (void *)i;
173fd1c601cSBorislav Petkov 
174fd1c601cSBorislav Petkov 		attrs[i] = &de_attr->attr.attr;
175fd1c601cSBorislav Petkov 	}
176fd1c601cSBorislav Petkov 
177fd1c601cSBorislav Petkov 	pt_cap_group.attrs = attrs;
178fd1c601cSBorislav Petkov 
179fd1c601cSBorislav Petkov 	return 0;
180fd1c601cSBorislav Petkov 
181fd1c601cSBorislav Petkov fail:
182fd1c601cSBorislav Petkov 	kfree(attrs);
183fd1c601cSBorislav Petkov 
184fd1c601cSBorislav Petkov 	return ret;
185fd1c601cSBorislav Petkov }
186fd1c601cSBorislav Petkov 
187fd1c601cSBorislav Petkov #define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC	| \
188fd1c601cSBorislav Petkov 			  RTIT_CTL_CYC_THRESH	| \
189fd1c601cSBorislav Petkov 			  RTIT_CTL_PSB_FREQ)
190fd1c601cSBorislav Petkov 
191fd1c601cSBorislav Petkov #define RTIT_CTL_MTC	(RTIT_CTL_MTC_EN	| \
192fd1c601cSBorislav Petkov 			 RTIT_CTL_MTC_RANGE)
193fd1c601cSBorislav Petkov 
194fd1c601cSBorislav Petkov #define PT_CONFIG_MASK (RTIT_CTL_TSC_EN		| \
195fd1c601cSBorislav Petkov 			RTIT_CTL_DISRETC	| \
196fd1c601cSBorislav Petkov 			RTIT_CTL_CYC_PSB	| \
197fd1c601cSBorislav Petkov 			RTIT_CTL_MTC)
198fd1c601cSBorislav Petkov 
199fd1c601cSBorislav Petkov static bool pt_event_valid(struct perf_event *event)
200fd1c601cSBorislav Petkov {
201fd1c601cSBorislav Petkov 	u64 config = event->attr.config;
202fd1c601cSBorislav Petkov 	u64 allowed, requested;
203fd1c601cSBorislav Petkov 
204fd1c601cSBorislav Petkov 	if ((config & PT_CONFIG_MASK) != config)
205fd1c601cSBorislav Petkov 		return false;
206fd1c601cSBorislav Petkov 
207fd1c601cSBorislav Petkov 	if (config & RTIT_CTL_CYC_PSB) {
208fd1c601cSBorislav Petkov 		if (!pt_cap_get(PT_CAP_psb_cyc))
209fd1c601cSBorislav Petkov 			return false;
210fd1c601cSBorislav Petkov 
211fd1c601cSBorislav Petkov 		allowed = pt_cap_get(PT_CAP_psb_periods);
212fd1c601cSBorislav Petkov 		requested = (config & RTIT_CTL_PSB_FREQ) >>
213fd1c601cSBorislav Petkov 			RTIT_CTL_PSB_FREQ_OFFSET;
214fd1c601cSBorislav Petkov 		if (requested && (!(allowed & BIT(requested))))
215fd1c601cSBorislav Petkov 			return false;
216fd1c601cSBorislav Petkov 
217fd1c601cSBorislav Petkov 		allowed = pt_cap_get(PT_CAP_cycle_thresholds);
218fd1c601cSBorislav Petkov 		requested = (config & RTIT_CTL_CYC_THRESH) >>
219fd1c601cSBorislav Petkov 			RTIT_CTL_CYC_THRESH_OFFSET;
220fd1c601cSBorislav Petkov 		if (requested && (!(allowed & BIT(requested))))
221fd1c601cSBorislav Petkov 			return false;
222fd1c601cSBorislav Petkov 	}
223fd1c601cSBorislav Petkov 
224fd1c601cSBorislav Petkov 	if (config & RTIT_CTL_MTC) {
225fd1c601cSBorislav Petkov 		/*
226fd1c601cSBorislav Petkov 		 * In the unlikely case that CPUID lists valid mtc periods,
227fd1c601cSBorislav Petkov 		 * but not the mtc capability, drop out here.
228fd1c601cSBorislav Petkov 		 *
229fd1c601cSBorislav Petkov 		 * Spec says that setting mtc period bits while mtc bit in
230fd1c601cSBorislav Petkov 		 * CPUID is 0 will #GP, so better safe than sorry.
231fd1c601cSBorislav Petkov 		 */
232fd1c601cSBorislav Petkov 		if (!pt_cap_get(PT_CAP_mtc))
233fd1c601cSBorislav Petkov 			return false;
234fd1c601cSBorislav Petkov 
235fd1c601cSBorislav Petkov 		allowed = pt_cap_get(PT_CAP_mtc_periods);
236fd1c601cSBorislav Petkov 		if (!allowed)
237fd1c601cSBorislav Petkov 			return false;
238fd1c601cSBorislav Petkov 
239fd1c601cSBorislav Petkov 		requested = (config & RTIT_CTL_MTC_RANGE) >>
240fd1c601cSBorislav Petkov 			RTIT_CTL_MTC_RANGE_OFFSET;
241fd1c601cSBorislav Petkov 
242fd1c601cSBorislav Petkov 		if (!(allowed & BIT(requested)))
243fd1c601cSBorislav Petkov 			return false;
244fd1c601cSBorislav Petkov 	}
245fd1c601cSBorislav Petkov 
246fd1c601cSBorislav Petkov 	return true;
247fd1c601cSBorislav Petkov }
248fd1c601cSBorislav Petkov 
249fd1c601cSBorislav Petkov /*
250fd1c601cSBorislav Petkov  * PT configuration helpers
251fd1c601cSBorislav Petkov  * These all are cpu affine and operate on a local PT
252fd1c601cSBorislav Petkov  */
253fd1c601cSBorislav Petkov 
254fd1c601cSBorislav Petkov static void pt_config(struct perf_event *event)
255fd1c601cSBorislav Petkov {
256fd1c601cSBorislav Petkov 	u64 reg;
257fd1c601cSBorislav Petkov 
258fd1c601cSBorislav Petkov 	if (!event->hw.itrace_started) {
259fd1c601cSBorislav Petkov 		event->hw.itrace_started = 1;
260fd1c601cSBorislav Petkov 		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
261fd1c601cSBorislav Petkov 	}
262fd1c601cSBorislav Petkov 
263fd1c601cSBorislav Petkov 	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
264fd1c601cSBorislav Petkov 
265fd1c601cSBorislav Petkov 	if (!event->attr.exclude_kernel)
266fd1c601cSBorislav Petkov 		reg |= RTIT_CTL_OS;
267fd1c601cSBorislav Petkov 	if (!event->attr.exclude_user)
268fd1c601cSBorislav Petkov 		reg |= RTIT_CTL_USR;
269fd1c601cSBorislav Petkov 
270fd1c601cSBorislav Petkov 	reg |= (event->attr.config & PT_CONFIG_MASK);
271fd1c601cSBorislav Petkov 
272fd1c601cSBorislav Petkov 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
273fd1c601cSBorislav Petkov }
274fd1c601cSBorislav Petkov 
275fd1c601cSBorislav Petkov static void pt_config_start(bool start)
276fd1c601cSBorislav Petkov {
277fd1c601cSBorislav Petkov 	u64 ctl;
278fd1c601cSBorislav Petkov 
279fd1c601cSBorislav Petkov 	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
280fd1c601cSBorislav Petkov 	if (start)
281fd1c601cSBorislav Petkov 		ctl |= RTIT_CTL_TRACEEN;
282fd1c601cSBorislav Petkov 	else
283fd1c601cSBorislav Petkov 		ctl &= ~RTIT_CTL_TRACEEN;
284fd1c601cSBorislav Petkov 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
285fd1c601cSBorislav Petkov 
286fd1c601cSBorislav Petkov 	/*
287fd1c601cSBorislav Petkov 	 * A wrmsr that disables trace generation serializes other PT
288fd1c601cSBorislav Petkov 	 * registers and causes all data packets to be written to memory,
289fd1c601cSBorislav Petkov 	 * but a fence is required for the data to become globally visible.
290fd1c601cSBorislav Petkov 	 *
291fd1c601cSBorislav Petkov 	 * The below WMB, separating data store and aux_head store matches
292fd1c601cSBorislav Petkov 	 * the consumer's RMB that separates aux_head load and data load.
293fd1c601cSBorislav Petkov 	 */
294fd1c601cSBorislav Petkov 	if (!start)
295fd1c601cSBorislav Petkov 		wmb();
296fd1c601cSBorislav Petkov }
297fd1c601cSBorislav Petkov 
298fd1c601cSBorislav Petkov static void pt_config_buffer(void *buf, unsigned int topa_idx,
299fd1c601cSBorislav Petkov 			     unsigned int output_off)
300fd1c601cSBorislav Petkov {
301fd1c601cSBorislav Petkov 	u64 reg;
302fd1c601cSBorislav Petkov 
303fd1c601cSBorislav Petkov 	wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
304fd1c601cSBorislav Petkov 
305fd1c601cSBorislav Petkov 	reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
306fd1c601cSBorislav Petkov 
307fd1c601cSBorislav Petkov 	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
308fd1c601cSBorislav Petkov }
309fd1c601cSBorislav Petkov 
310fd1c601cSBorislav Petkov /*
311fd1c601cSBorislav Petkov  * Keep ToPA table-related metadata on the same page as the actual table,
312fd1c601cSBorislav Petkov  * taking up a few words from the top
313fd1c601cSBorislav Petkov  */
314fd1c601cSBorislav Petkov 
315fd1c601cSBorislav Petkov #define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
316fd1c601cSBorislav Petkov 
317fd1c601cSBorislav Petkov /**
318fd1c601cSBorislav Petkov  * struct topa - page-sized ToPA table with metadata at the top
319fd1c601cSBorislav Petkov  * @table:	actual ToPA table entries, as understood by PT hardware
320fd1c601cSBorislav Petkov  * @list:	linkage to struct pt_buffer's list of tables
321fd1c601cSBorislav Petkov  * @phys:	physical address of this page
322fd1c601cSBorislav Petkov  * @offset:	offset of the first entry in this table in the buffer
323fd1c601cSBorislav Petkov  * @size:	total size of all entries in this table
324fd1c601cSBorislav Petkov  * @last:	index of the last initialized entry in this table
325fd1c601cSBorislav Petkov  */
326fd1c601cSBorislav Petkov struct topa {
327fd1c601cSBorislav Petkov 	struct topa_entry	table[TENTS_PER_PAGE];
328fd1c601cSBorislav Petkov 	struct list_head	list;
329fd1c601cSBorislav Petkov 	u64			phys;
330fd1c601cSBorislav Petkov 	u64			offset;
331fd1c601cSBorislav Petkov 	size_t			size;
332fd1c601cSBorislav Petkov 	int			last;
333fd1c601cSBorislav Petkov };
334fd1c601cSBorislav Petkov 
335fd1c601cSBorislav Petkov /* make -1 stand for the last table entry */
336fd1c601cSBorislav Petkov #define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
337fd1c601cSBorislav Petkov 
338fd1c601cSBorislav Petkov /**
339fd1c601cSBorislav Petkov  * topa_alloc() - allocate page-sized ToPA table
340fd1c601cSBorislav Petkov  * @cpu:	CPU on which to allocate.
341fd1c601cSBorislav Petkov  * @gfp:	Allocation flags.
342fd1c601cSBorislav Petkov  *
343fd1c601cSBorislav Petkov  * Return:	On success, return the pointer to ToPA table page.
344fd1c601cSBorislav Petkov  */
345fd1c601cSBorislav Petkov static struct topa *topa_alloc(int cpu, gfp_t gfp)
346fd1c601cSBorislav Petkov {
347fd1c601cSBorislav Petkov 	int node = cpu_to_node(cpu);
348fd1c601cSBorislav Petkov 	struct topa *topa;
349fd1c601cSBorislav Petkov 	struct page *p;
350fd1c601cSBorislav Petkov 
351fd1c601cSBorislav Petkov 	p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
352fd1c601cSBorislav Petkov 	if (!p)
353fd1c601cSBorislav Petkov 		return NULL;
354fd1c601cSBorislav Petkov 
355fd1c601cSBorislav Petkov 	topa = page_address(p);
356fd1c601cSBorislav Petkov 	topa->last = 0;
357fd1c601cSBorislav Petkov 	topa->phys = page_to_phys(p);
358fd1c601cSBorislav Petkov 
359fd1c601cSBorislav Petkov 	/*
360fd1c601cSBorislav Petkov 	 * In case of singe-entry ToPA, always put the self-referencing END
361fd1c601cSBorislav Petkov 	 * link as the 2nd entry in the table
362fd1c601cSBorislav Petkov 	 */
363fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
364fd1c601cSBorislav Petkov 		TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
365fd1c601cSBorislav Petkov 		TOPA_ENTRY(topa, 1)->end = 1;
366fd1c601cSBorislav Petkov 	}
367fd1c601cSBorislav Petkov 
368fd1c601cSBorislav Petkov 	return topa;
369fd1c601cSBorislav Petkov }
370fd1c601cSBorislav Petkov 
371fd1c601cSBorislav Petkov /**
372fd1c601cSBorislav Petkov  * topa_free() - free a page-sized ToPA table
373fd1c601cSBorislav Petkov  * @topa:	Table to deallocate.
374fd1c601cSBorislav Petkov  */
375fd1c601cSBorislav Petkov static void topa_free(struct topa *topa)
376fd1c601cSBorislav Petkov {
377fd1c601cSBorislav Petkov 	free_page((unsigned long)topa);
378fd1c601cSBorislav Petkov }
379fd1c601cSBorislav Petkov 
380fd1c601cSBorislav Petkov /**
381fd1c601cSBorislav Petkov  * topa_insert_table() - insert a ToPA table into a buffer
382fd1c601cSBorislav Petkov  * @buf:	 PT buffer that's being extended.
383fd1c601cSBorislav Petkov  * @topa:	 New topa table to be inserted.
384fd1c601cSBorislav Petkov  *
385fd1c601cSBorislav Petkov  * If it's the first table in this buffer, set up buffer's pointers
386fd1c601cSBorislav Petkov  * accordingly; otherwise, add a END=1 link entry to @topa to the current
387fd1c601cSBorislav Petkov  * "last" table and adjust the last table pointer to @topa.
388fd1c601cSBorislav Petkov  */
389fd1c601cSBorislav Petkov static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
390fd1c601cSBorislav Petkov {
391fd1c601cSBorislav Petkov 	struct topa *last = buf->last;
392fd1c601cSBorislav Petkov 
393fd1c601cSBorislav Petkov 	list_add_tail(&topa->list, &buf->tables);
394fd1c601cSBorislav Petkov 
395fd1c601cSBorislav Petkov 	if (!buf->first) {
396fd1c601cSBorislav Petkov 		buf->first = buf->last = buf->cur = topa;
397fd1c601cSBorislav Petkov 		return;
398fd1c601cSBorislav Petkov 	}
399fd1c601cSBorislav Petkov 
400fd1c601cSBorislav Petkov 	topa->offset = last->offset + last->size;
401fd1c601cSBorislav Petkov 	buf->last = topa;
402fd1c601cSBorislav Petkov 
403fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
404fd1c601cSBorislav Petkov 		return;
405fd1c601cSBorislav Petkov 
406fd1c601cSBorislav Petkov 	BUG_ON(last->last != TENTS_PER_PAGE - 1);
407fd1c601cSBorislav Petkov 
408fd1c601cSBorislav Petkov 	TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
409fd1c601cSBorislav Petkov 	TOPA_ENTRY(last, -1)->end = 1;
410fd1c601cSBorislav Petkov }
411fd1c601cSBorislav Petkov 
412fd1c601cSBorislav Petkov /**
413fd1c601cSBorislav Petkov  * topa_table_full() - check if a ToPA table is filled up
414fd1c601cSBorislav Petkov  * @topa:	ToPA table.
415fd1c601cSBorislav Petkov  */
416fd1c601cSBorislav Petkov static bool topa_table_full(struct topa *topa)
417fd1c601cSBorislav Petkov {
418fd1c601cSBorislav Petkov 	/* single-entry ToPA is a special case */
419fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
420fd1c601cSBorislav Petkov 		return !!topa->last;
421fd1c601cSBorislav Petkov 
422fd1c601cSBorislav Petkov 	return topa->last == TENTS_PER_PAGE - 1;
423fd1c601cSBorislav Petkov }
424fd1c601cSBorislav Petkov 
425fd1c601cSBorislav Petkov /**
426fd1c601cSBorislav Petkov  * topa_insert_pages() - create a list of ToPA tables
427fd1c601cSBorislav Petkov  * @buf:	PT buffer being initialized.
428fd1c601cSBorislav Petkov  * @gfp:	Allocation flags.
429fd1c601cSBorislav Petkov  *
430fd1c601cSBorislav Petkov  * This initializes a list of ToPA tables with entries from
431fd1c601cSBorislav Petkov  * the data_pages provided by rb_alloc_aux().
432fd1c601cSBorislav Petkov  *
433fd1c601cSBorislav Petkov  * Return:	0 on success or error code.
434fd1c601cSBorislav Petkov  */
435fd1c601cSBorislav Petkov static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
436fd1c601cSBorislav Petkov {
437fd1c601cSBorislav Petkov 	struct topa *topa = buf->last;
438fd1c601cSBorislav Petkov 	int order = 0;
439fd1c601cSBorislav Petkov 	struct page *p;
440fd1c601cSBorislav Petkov 
441fd1c601cSBorislav Petkov 	p = virt_to_page(buf->data_pages[buf->nr_pages]);
442fd1c601cSBorislav Petkov 	if (PagePrivate(p))
443fd1c601cSBorislav Petkov 		order = page_private(p);
444fd1c601cSBorislav Petkov 
445fd1c601cSBorislav Petkov 	if (topa_table_full(topa)) {
446fd1c601cSBorislav Petkov 		topa = topa_alloc(buf->cpu, gfp);
447fd1c601cSBorislav Petkov 		if (!topa)
448fd1c601cSBorislav Petkov 			return -ENOMEM;
449fd1c601cSBorislav Petkov 
450fd1c601cSBorislav Petkov 		topa_insert_table(buf, topa);
451fd1c601cSBorislav Petkov 	}
452fd1c601cSBorislav Petkov 
453fd1c601cSBorislav Petkov 	TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
454fd1c601cSBorislav Petkov 	TOPA_ENTRY(topa, -1)->size = order;
455fd1c601cSBorislav Petkov 	if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
456fd1c601cSBorislav Petkov 		TOPA_ENTRY(topa, -1)->intr = 1;
457fd1c601cSBorislav Petkov 		TOPA_ENTRY(topa, -1)->stop = 1;
458fd1c601cSBorislav Petkov 	}
459fd1c601cSBorislav Petkov 
460fd1c601cSBorislav Petkov 	topa->last++;
461fd1c601cSBorislav Petkov 	topa->size += sizes(order);
462fd1c601cSBorislav Petkov 
463fd1c601cSBorislav Petkov 	buf->nr_pages += 1ul << order;
464fd1c601cSBorislav Petkov 
465fd1c601cSBorislav Petkov 	return 0;
466fd1c601cSBorislav Petkov }
467fd1c601cSBorislav Petkov 
468fd1c601cSBorislav Petkov /**
469fd1c601cSBorislav Petkov  * pt_topa_dump() - print ToPA tables and their entries
470fd1c601cSBorislav Petkov  * @buf:	PT buffer.
471fd1c601cSBorislav Petkov  */
472fd1c601cSBorislav Petkov static void pt_topa_dump(struct pt_buffer *buf)
473fd1c601cSBorislav Petkov {
474fd1c601cSBorislav Petkov 	struct topa *topa;
475fd1c601cSBorislav Petkov 
476fd1c601cSBorislav Petkov 	list_for_each_entry(topa, &buf->tables, list) {
477fd1c601cSBorislav Petkov 		int i;
478fd1c601cSBorislav Petkov 
479fd1c601cSBorislav Petkov 		pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
480fd1c601cSBorislav Petkov 			 topa->phys, topa->offset, topa->size);
481fd1c601cSBorislav Petkov 		for (i = 0; i < TENTS_PER_PAGE; i++) {
482fd1c601cSBorislav Petkov 			pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
483fd1c601cSBorislav Petkov 				 &topa->table[i],
484fd1c601cSBorislav Petkov 				 (unsigned long)topa->table[i].base << TOPA_SHIFT,
485fd1c601cSBorislav Petkov 				 sizes(topa->table[i].size),
486fd1c601cSBorislav Petkov 				 topa->table[i].end ?  'E' : ' ',
487fd1c601cSBorislav Petkov 				 topa->table[i].intr ? 'I' : ' ',
488fd1c601cSBorislav Petkov 				 topa->table[i].stop ? 'S' : ' ',
489fd1c601cSBorislav Petkov 				 *(u64 *)&topa->table[i]);
490fd1c601cSBorislav Petkov 			if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
491fd1c601cSBorislav Petkov 			     topa->table[i].stop) ||
492fd1c601cSBorislav Petkov 			    topa->table[i].end)
493fd1c601cSBorislav Petkov 				break;
494fd1c601cSBorislav Petkov 		}
495fd1c601cSBorislav Petkov 	}
496fd1c601cSBorislav Petkov }
497fd1c601cSBorislav Petkov 
498fd1c601cSBorislav Petkov /**
499fd1c601cSBorislav Petkov  * pt_buffer_advance() - advance to the next output region
500fd1c601cSBorislav Petkov  * @buf:	PT buffer.
501fd1c601cSBorislav Petkov  *
502fd1c601cSBorislav Petkov  * Advance the current pointers in the buffer to the next ToPA entry.
503fd1c601cSBorislav Petkov  */
504fd1c601cSBorislav Petkov static void pt_buffer_advance(struct pt_buffer *buf)
505fd1c601cSBorislav Petkov {
506fd1c601cSBorislav Petkov 	buf->output_off = 0;
507fd1c601cSBorislav Petkov 	buf->cur_idx++;
508fd1c601cSBorislav Petkov 
509fd1c601cSBorislav Petkov 	if (buf->cur_idx == buf->cur->last) {
510fd1c601cSBorislav Petkov 		if (buf->cur == buf->last)
511fd1c601cSBorislav Petkov 			buf->cur = buf->first;
512fd1c601cSBorislav Petkov 		else
513fd1c601cSBorislav Petkov 			buf->cur = list_entry(buf->cur->list.next, struct topa,
514fd1c601cSBorislav Petkov 					      list);
515fd1c601cSBorislav Petkov 		buf->cur_idx = 0;
516fd1c601cSBorislav Petkov 	}
517fd1c601cSBorislav Petkov }
518fd1c601cSBorislav Petkov 
519fd1c601cSBorislav Petkov /**
520fd1c601cSBorislav Petkov  * pt_update_head() - calculate current offsets and sizes
521fd1c601cSBorislav Petkov  * @pt:		Per-cpu pt context.
522fd1c601cSBorislav Petkov  *
523fd1c601cSBorislav Petkov  * Update buffer's current write pointer position and data size.
524fd1c601cSBorislav Petkov  */
525fd1c601cSBorislav Petkov static void pt_update_head(struct pt *pt)
526fd1c601cSBorislav Petkov {
527fd1c601cSBorislav Petkov 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
528fd1c601cSBorislav Petkov 	u64 topa_idx, base, old;
529fd1c601cSBorislav Petkov 
530fd1c601cSBorislav Petkov 	/* offset of the first region in this table from the beginning of buf */
531fd1c601cSBorislav Petkov 	base = buf->cur->offset + buf->output_off;
532fd1c601cSBorislav Petkov 
533fd1c601cSBorislav Petkov 	/* offset of the current output region within this table */
534fd1c601cSBorislav Petkov 	for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
535fd1c601cSBorislav Petkov 		base += sizes(buf->cur->table[topa_idx].size);
536fd1c601cSBorislav Petkov 
537fd1c601cSBorislav Petkov 	if (buf->snapshot) {
538fd1c601cSBorislav Petkov 		local_set(&buf->data_size, base);
539fd1c601cSBorislav Petkov 	} else {
540fd1c601cSBorislav Petkov 		old = (local64_xchg(&buf->head, base) &
541fd1c601cSBorislav Petkov 		       ((buf->nr_pages << PAGE_SHIFT) - 1));
542fd1c601cSBorislav Petkov 		if (base < old)
543fd1c601cSBorislav Petkov 			base += buf->nr_pages << PAGE_SHIFT;
544fd1c601cSBorislav Petkov 
545fd1c601cSBorislav Petkov 		local_add(base - old, &buf->data_size);
546fd1c601cSBorislav Petkov 	}
547fd1c601cSBorislav Petkov }
548fd1c601cSBorislav Petkov 
549fd1c601cSBorislav Petkov /**
550fd1c601cSBorislav Petkov  * pt_buffer_region() - obtain current output region's address
551fd1c601cSBorislav Petkov  * @buf:	PT buffer.
552fd1c601cSBorislav Petkov  */
553fd1c601cSBorislav Petkov static void *pt_buffer_region(struct pt_buffer *buf)
554fd1c601cSBorislav Petkov {
555fd1c601cSBorislav Petkov 	return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
556fd1c601cSBorislav Petkov }
557fd1c601cSBorislav Petkov 
558fd1c601cSBorislav Petkov /**
559fd1c601cSBorislav Petkov  * pt_buffer_region_size() - obtain current output region's size
560fd1c601cSBorislav Petkov  * @buf:	PT buffer.
561fd1c601cSBorislav Petkov  */
562fd1c601cSBorislav Petkov static size_t pt_buffer_region_size(struct pt_buffer *buf)
563fd1c601cSBorislav Petkov {
564fd1c601cSBorislav Petkov 	return sizes(buf->cur->table[buf->cur_idx].size);
565fd1c601cSBorislav Petkov }
566fd1c601cSBorislav Petkov 
567fd1c601cSBorislav Petkov /**
568fd1c601cSBorislav Petkov  * pt_handle_status() - take care of possible status conditions
569fd1c601cSBorislav Petkov  * @pt:		Per-cpu pt context.
570fd1c601cSBorislav Petkov  */
571fd1c601cSBorislav Petkov static void pt_handle_status(struct pt *pt)
572fd1c601cSBorislav Petkov {
573fd1c601cSBorislav Petkov 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
574fd1c601cSBorislav Petkov 	int advance = 0;
575fd1c601cSBorislav Petkov 	u64 status;
576fd1c601cSBorislav Petkov 
577fd1c601cSBorislav Petkov 	rdmsrl(MSR_IA32_RTIT_STATUS, status);
578fd1c601cSBorislav Petkov 
579fd1c601cSBorislav Petkov 	if (status & RTIT_STATUS_ERROR) {
580fd1c601cSBorislav Petkov 		pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
581fd1c601cSBorislav Petkov 		pt_topa_dump(buf);
582fd1c601cSBorislav Petkov 		status &= ~RTIT_STATUS_ERROR;
583fd1c601cSBorislav Petkov 	}
584fd1c601cSBorislav Petkov 
585fd1c601cSBorislav Petkov 	if (status & RTIT_STATUS_STOPPED) {
586fd1c601cSBorislav Petkov 		status &= ~RTIT_STATUS_STOPPED;
587fd1c601cSBorislav Petkov 
588fd1c601cSBorislav Petkov 		/*
589fd1c601cSBorislav Petkov 		 * On systems that only do single-entry ToPA, hitting STOP
590fd1c601cSBorislav Petkov 		 * means we are already losing data; need to let the decoder
591fd1c601cSBorislav Petkov 		 * know.
592fd1c601cSBorislav Petkov 		 */
593fd1c601cSBorislav Petkov 		if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
594fd1c601cSBorislav Petkov 		    buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
595fd1c601cSBorislav Petkov 			local_inc(&buf->lost);
596fd1c601cSBorislav Petkov 			advance++;
597fd1c601cSBorislav Petkov 		}
598fd1c601cSBorislav Petkov 	}
599fd1c601cSBorislav Petkov 
600fd1c601cSBorislav Petkov 	/*
601fd1c601cSBorislav Petkov 	 * Also on single-entry ToPA implementations, interrupt will come
602fd1c601cSBorislav Petkov 	 * before the output reaches its output region's boundary.
603fd1c601cSBorislav Petkov 	 */
604fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
605fd1c601cSBorislav Petkov 	    pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
606fd1c601cSBorislav Petkov 		void *head = pt_buffer_region(buf);
607fd1c601cSBorislav Petkov 
608fd1c601cSBorislav Petkov 		/* everything within this margin needs to be zeroed out */
609fd1c601cSBorislav Petkov 		memset(head + buf->output_off, 0,
610fd1c601cSBorislav Petkov 		       pt_buffer_region_size(buf) -
611fd1c601cSBorislav Petkov 		       buf->output_off);
612fd1c601cSBorislav Petkov 		advance++;
613fd1c601cSBorislav Petkov 	}
614fd1c601cSBorislav Petkov 
615fd1c601cSBorislav Petkov 	if (advance)
616fd1c601cSBorislav Petkov 		pt_buffer_advance(buf);
617fd1c601cSBorislav Petkov 
618fd1c601cSBorislav Petkov 	wrmsrl(MSR_IA32_RTIT_STATUS, status);
619fd1c601cSBorislav Petkov }
620fd1c601cSBorislav Petkov 
621fd1c601cSBorislav Petkov /**
622fd1c601cSBorislav Petkov  * pt_read_offset() - translate registers into buffer pointers
623fd1c601cSBorislav Petkov  * @buf:	PT buffer.
624fd1c601cSBorislav Petkov  *
625fd1c601cSBorislav Petkov  * Set buffer's output pointers from MSR values.
626fd1c601cSBorislav Petkov  */
627fd1c601cSBorislav Petkov static void pt_read_offset(struct pt_buffer *buf)
628fd1c601cSBorislav Petkov {
629fd1c601cSBorislav Petkov 	u64 offset, base_topa;
630fd1c601cSBorislav Petkov 
631fd1c601cSBorislav Petkov 	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
632fd1c601cSBorislav Petkov 	buf->cur = phys_to_virt(base_topa);
633fd1c601cSBorislav Petkov 
634fd1c601cSBorislav Petkov 	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
635fd1c601cSBorislav Petkov 	/* offset within current output region */
636fd1c601cSBorislav Petkov 	buf->output_off = offset >> 32;
637fd1c601cSBorislav Petkov 	/* index of current output region within this table */
638fd1c601cSBorislav Petkov 	buf->cur_idx = (offset & 0xffffff80) >> 7;
639fd1c601cSBorislav Petkov }
640fd1c601cSBorislav Petkov 
641fd1c601cSBorislav Petkov /**
642fd1c601cSBorislav Petkov  * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
643fd1c601cSBorislav Petkov  * @buf:	PT buffer.
644fd1c601cSBorislav Petkov  * @pg:		Page offset in the buffer.
645fd1c601cSBorislav Petkov  *
646fd1c601cSBorislav Petkov  * When advancing to the next output region (ToPA entry), given a page offset
647fd1c601cSBorislav Petkov  * into the buffer, we need to find the offset of the first page in the next
648fd1c601cSBorislav Petkov  * region.
649fd1c601cSBorislav Petkov  */
650fd1c601cSBorislav Petkov static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
651fd1c601cSBorislav Petkov {
652fd1c601cSBorislav Petkov 	struct topa_entry *te = buf->topa_index[pg];
653fd1c601cSBorislav Petkov 
654fd1c601cSBorislav Petkov 	/* one region */
655fd1c601cSBorislav Petkov 	if (buf->first == buf->last && buf->first->last == 1)
656fd1c601cSBorislav Petkov 		return pg;
657fd1c601cSBorislav Petkov 
658fd1c601cSBorislav Petkov 	do {
659fd1c601cSBorislav Petkov 		pg++;
660fd1c601cSBorislav Petkov 		pg &= buf->nr_pages - 1;
661fd1c601cSBorislav Petkov 	} while (buf->topa_index[pg] == te);
662fd1c601cSBorislav Petkov 
663fd1c601cSBorislav Petkov 	return pg;
664fd1c601cSBorislav Petkov }
665fd1c601cSBorislav Petkov 
666fd1c601cSBorislav Petkov /**
667fd1c601cSBorislav Petkov  * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
668fd1c601cSBorislav Petkov  * @buf:	PT buffer.
669fd1c601cSBorislav Petkov  * @handle:	Current output handle.
670fd1c601cSBorislav Petkov  *
671fd1c601cSBorislav Petkov  * Place INT and STOP marks to prevent overwriting old data that the consumer
672fd1c601cSBorislav Petkov  * hasn't yet collected and waking up the consumer after a certain fraction of
673fd1c601cSBorislav Petkov  * the buffer has filled up. Only needed and sensible for non-snapshot counters.
674fd1c601cSBorislav Petkov  *
675fd1c601cSBorislav Petkov  * This obviously relies on buf::head to figure out buffer markers, so it has
676fd1c601cSBorislav Petkov  * to be called after pt_buffer_reset_offsets() and before the hardware tracing
677fd1c601cSBorislav Petkov  * is enabled.
678fd1c601cSBorislav Petkov  */
679fd1c601cSBorislav Petkov static int pt_buffer_reset_markers(struct pt_buffer *buf,
680fd1c601cSBorislav Petkov 				   struct perf_output_handle *handle)
681fd1c601cSBorislav Petkov 
682fd1c601cSBorislav Petkov {
683fd1c601cSBorislav Petkov 	unsigned long head = local64_read(&buf->head);
684fd1c601cSBorislav Petkov 	unsigned long idx, npages, wakeup;
685fd1c601cSBorislav Petkov 
686fd1c601cSBorislav Petkov 	/* can't stop in the middle of an output region */
687fd1c601cSBorislav Petkov 	if (buf->output_off + handle->size + 1 <
688fd1c601cSBorislav Petkov 	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
689fd1c601cSBorislav Petkov 		return -EINVAL;
690fd1c601cSBorislav Petkov 
691fd1c601cSBorislav Petkov 
692fd1c601cSBorislav Petkov 	/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
693fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
694fd1c601cSBorislav Petkov 		return 0;
695fd1c601cSBorislav Petkov 
696fd1c601cSBorislav Petkov 	/* clear STOP and INT from current entry */
697fd1c601cSBorislav Petkov 	buf->topa_index[buf->stop_pos]->stop = 0;
698fd1c601cSBorislav Petkov 	buf->topa_index[buf->intr_pos]->intr = 0;
699fd1c601cSBorislav Petkov 
700fd1c601cSBorislav Petkov 	/* how many pages till the STOP marker */
701fd1c601cSBorislav Petkov 	npages = handle->size >> PAGE_SHIFT;
702fd1c601cSBorislav Petkov 
703fd1c601cSBorislav Petkov 	/* if it's on a page boundary, fill up one more page */
704fd1c601cSBorislav Petkov 	if (!offset_in_page(head + handle->size + 1))
705fd1c601cSBorislav Petkov 		npages++;
706fd1c601cSBorislav Petkov 
707fd1c601cSBorislav Petkov 	idx = (head >> PAGE_SHIFT) + npages;
708fd1c601cSBorislav Petkov 	idx &= buf->nr_pages - 1;
709fd1c601cSBorislav Petkov 	buf->stop_pos = idx;
710fd1c601cSBorislav Petkov 
711fd1c601cSBorislav Petkov 	wakeup = handle->wakeup >> PAGE_SHIFT;
712fd1c601cSBorislav Petkov 
713fd1c601cSBorislav Petkov 	/* in the worst case, wake up the consumer one page before hard stop */
714fd1c601cSBorislav Petkov 	idx = (head >> PAGE_SHIFT) + npages - 1;
715fd1c601cSBorislav Petkov 	if (idx > wakeup)
716fd1c601cSBorislav Petkov 		idx = wakeup;
717fd1c601cSBorislav Petkov 
718fd1c601cSBorislav Petkov 	idx &= buf->nr_pages - 1;
719fd1c601cSBorislav Petkov 	buf->intr_pos = idx;
720fd1c601cSBorislav Petkov 
721fd1c601cSBorislav Petkov 	buf->topa_index[buf->stop_pos]->stop = 1;
722fd1c601cSBorislav Petkov 	buf->topa_index[buf->intr_pos]->intr = 1;
723fd1c601cSBorislav Petkov 
724fd1c601cSBorislav Petkov 	return 0;
725fd1c601cSBorislav Petkov }
726fd1c601cSBorislav Petkov 
727fd1c601cSBorislav Petkov /**
728fd1c601cSBorislav Petkov  * pt_buffer_setup_topa_index() - build topa_index[] table of regions
729fd1c601cSBorislav Petkov  * @buf:	PT buffer.
730fd1c601cSBorislav Petkov  *
731fd1c601cSBorislav Petkov  * topa_index[] references output regions indexed by offset into the
732fd1c601cSBorislav Petkov  * buffer for purposes of quick reverse lookup.
733fd1c601cSBorislav Petkov  */
734fd1c601cSBorislav Petkov static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
735fd1c601cSBorislav Petkov {
736fd1c601cSBorislav Petkov 	struct topa *cur = buf->first, *prev = buf->last;
737fd1c601cSBorislav Petkov 	struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
738fd1c601cSBorislav Petkov 		*te_prev = TOPA_ENTRY(prev, prev->last - 1);
739fd1c601cSBorislav Petkov 	int pg = 0, idx = 0;
740fd1c601cSBorislav Petkov 
741fd1c601cSBorislav Petkov 	while (pg < buf->nr_pages) {
742fd1c601cSBorislav Petkov 		int tidx;
743fd1c601cSBorislav Petkov 
744fd1c601cSBorislav Petkov 		/* pages within one topa entry */
745fd1c601cSBorislav Petkov 		for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
746fd1c601cSBorislav Petkov 			buf->topa_index[pg] = te_prev;
747fd1c601cSBorislav Petkov 
748fd1c601cSBorislav Petkov 		te_prev = te_cur;
749fd1c601cSBorislav Petkov 
750fd1c601cSBorislav Petkov 		if (idx == cur->last - 1) {
751fd1c601cSBorislav Petkov 			/* advance to next topa table */
752fd1c601cSBorislav Petkov 			idx = 0;
753fd1c601cSBorislav Petkov 			cur = list_entry(cur->list.next, struct topa, list);
754fd1c601cSBorislav Petkov 		} else {
755fd1c601cSBorislav Petkov 			idx++;
756fd1c601cSBorislav Petkov 		}
757fd1c601cSBorislav Petkov 		te_cur = TOPA_ENTRY(cur, idx);
758fd1c601cSBorislav Petkov 	}
759fd1c601cSBorislav Petkov 
760fd1c601cSBorislav Petkov }
761fd1c601cSBorislav Petkov 
762fd1c601cSBorislav Petkov /**
763fd1c601cSBorislav Petkov  * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
764fd1c601cSBorislav Petkov  * @buf:	PT buffer.
765fd1c601cSBorislav Petkov  * @head:	Write pointer (aux_head) from AUX buffer.
766fd1c601cSBorislav Petkov  *
767fd1c601cSBorislav Petkov  * Find the ToPA table and entry corresponding to given @head and set buffer's
768fd1c601cSBorislav Petkov  * "current" pointers accordingly. This is done after we have obtained the
769fd1c601cSBorislav Petkov  * current aux_head position from a successful call to perf_aux_output_begin()
770fd1c601cSBorislav Petkov  * to make sure the hardware is writing to the right place.
771fd1c601cSBorislav Petkov  *
772fd1c601cSBorislav Petkov  * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
773fd1c601cSBorislav Petkov  * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
774fd1c601cSBorislav Petkov  * which are used to determine INT and STOP markers' locations by a subsequent
775fd1c601cSBorislav Petkov  * call to pt_buffer_reset_markers().
776fd1c601cSBorislav Petkov  */
777fd1c601cSBorislav Petkov static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
778fd1c601cSBorislav Petkov {
779fd1c601cSBorislav Petkov 	int pg;
780fd1c601cSBorislav Petkov 
781fd1c601cSBorislav Petkov 	if (buf->snapshot)
782fd1c601cSBorislav Petkov 		head &= (buf->nr_pages << PAGE_SHIFT) - 1;
783fd1c601cSBorislav Petkov 
784fd1c601cSBorislav Petkov 	pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
785fd1c601cSBorislav Petkov 	pg = pt_topa_next_entry(buf, pg);
786fd1c601cSBorislav Petkov 
787fd1c601cSBorislav Petkov 	buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
788fd1c601cSBorislav Petkov 	buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
789fd1c601cSBorislav Petkov 			(unsigned long)buf->cur) / sizeof(struct topa_entry);
790fd1c601cSBorislav Petkov 	buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
791fd1c601cSBorislav Petkov 
792fd1c601cSBorislav Petkov 	local64_set(&buf->head, head);
793fd1c601cSBorislav Petkov 	local_set(&buf->data_size, 0);
794fd1c601cSBorislav Petkov }
795fd1c601cSBorislav Petkov 
796fd1c601cSBorislav Petkov /**
797fd1c601cSBorislav Petkov  * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
798fd1c601cSBorislav Petkov  * @buf:	PT buffer.
799fd1c601cSBorislav Petkov  */
800fd1c601cSBorislav Petkov static void pt_buffer_fini_topa(struct pt_buffer *buf)
801fd1c601cSBorislav Petkov {
802fd1c601cSBorislav Petkov 	struct topa *topa, *iter;
803fd1c601cSBorislav Petkov 
804fd1c601cSBorislav Petkov 	list_for_each_entry_safe(topa, iter, &buf->tables, list) {
805fd1c601cSBorislav Petkov 		/*
806fd1c601cSBorislav Petkov 		 * right now, this is in free_aux() path only, so
807fd1c601cSBorislav Petkov 		 * no need to unlink this table from the list
808fd1c601cSBorislav Petkov 		 */
809fd1c601cSBorislav Petkov 		topa_free(topa);
810fd1c601cSBorislav Petkov 	}
811fd1c601cSBorislav Petkov }
812fd1c601cSBorislav Petkov 
813fd1c601cSBorislav Petkov /**
814fd1c601cSBorislav Petkov  * pt_buffer_init_topa() - initialize ToPA table for pt buffer
815fd1c601cSBorislav Petkov  * @buf:	PT buffer.
816fd1c601cSBorislav Petkov  * @size:	Total size of all regions within this ToPA.
817fd1c601cSBorislav Petkov  * @gfp:	Allocation flags.
818fd1c601cSBorislav Petkov  */
819fd1c601cSBorislav Petkov static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
820fd1c601cSBorislav Petkov 			       gfp_t gfp)
821fd1c601cSBorislav Petkov {
822fd1c601cSBorislav Petkov 	struct topa *topa;
823fd1c601cSBorislav Petkov 	int err;
824fd1c601cSBorislav Petkov 
825fd1c601cSBorislav Petkov 	topa = topa_alloc(buf->cpu, gfp);
826fd1c601cSBorislav Petkov 	if (!topa)
827fd1c601cSBorislav Petkov 		return -ENOMEM;
828fd1c601cSBorislav Petkov 
829fd1c601cSBorislav Petkov 	topa_insert_table(buf, topa);
830fd1c601cSBorislav Petkov 
831fd1c601cSBorislav Petkov 	while (buf->nr_pages < nr_pages) {
832fd1c601cSBorislav Petkov 		err = topa_insert_pages(buf, gfp);
833fd1c601cSBorislav Petkov 		if (err) {
834fd1c601cSBorislav Petkov 			pt_buffer_fini_topa(buf);
835fd1c601cSBorislav Petkov 			return -ENOMEM;
836fd1c601cSBorislav Petkov 		}
837fd1c601cSBorislav Petkov 	}
838fd1c601cSBorislav Petkov 
839fd1c601cSBorislav Petkov 	pt_buffer_setup_topa_index(buf);
840fd1c601cSBorislav Petkov 
841fd1c601cSBorislav Petkov 	/* link last table to the first one, unless we're double buffering */
842fd1c601cSBorislav Petkov 	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
843fd1c601cSBorislav Petkov 		TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
844fd1c601cSBorislav Petkov 		TOPA_ENTRY(buf->last, -1)->end = 1;
845fd1c601cSBorislav Petkov 	}
846fd1c601cSBorislav Petkov 
847fd1c601cSBorislav Petkov 	pt_topa_dump(buf);
848fd1c601cSBorislav Petkov 	return 0;
849fd1c601cSBorislav Petkov }
850fd1c601cSBorislav Petkov 
851fd1c601cSBorislav Petkov /**
852fd1c601cSBorislav Petkov  * pt_buffer_setup_aux() - set up topa tables for a PT buffer
853fd1c601cSBorislav Petkov  * @cpu:	Cpu on which to allocate, -1 means current.
854fd1c601cSBorislav Petkov  * @pages:	Array of pointers to buffer pages passed from perf core.
855fd1c601cSBorislav Petkov  * @nr_pages:	Number of pages in the buffer.
856fd1c601cSBorislav Petkov  * @snapshot:	If this is a snapshot/overwrite counter.
857fd1c601cSBorislav Petkov  *
858fd1c601cSBorislav Petkov  * This is a pmu::setup_aux callback that sets up ToPA tables and all the
859fd1c601cSBorislav Petkov  * bookkeeping for an AUX buffer.
860fd1c601cSBorislav Petkov  *
861fd1c601cSBorislav Petkov  * Return:	Our private PT buffer structure.
862fd1c601cSBorislav Petkov  */
863fd1c601cSBorislav Petkov static void *
864fd1c601cSBorislav Petkov pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
865fd1c601cSBorislav Petkov {
866fd1c601cSBorislav Petkov 	struct pt_buffer *buf;
867fd1c601cSBorislav Petkov 	int node, ret;
868fd1c601cSBorislav Petkov 
869fd1c601cSBorislav Petkov 	if (!nr_pages)
870fd1c601cSBorislav Petkov 		return NULL;
871fd1c601cSBorislav Petkov 
872fd1c601cSBorislav Petkov 	if (cpu == -1)
873fd1c601cSBorislav Petkov 		cpu = raw_smp_processor_id();
874fd1c601cSBorislav Petkov 	node = cpu_to_node(cpu);
875fd1c601cSBorislav Petkov 
876fd1c601cSBorislav Petkov 	buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
877fd1c601cSBorislav Petkov 			   GFP_KERNEL, node);
878fd1c601cSBorislav Petkov 	if (!buf)
879fd1c601cSBorislav Petkov 		return NULL;
880fd1c601cSBorislav Petkov 
881fd1c601cSBorislav Petkov 	buf->cpu = cpu;
882fd1c601cSBorislav Petkov 	buf->snapshot = snapshot;
883fd1c601cSBorislav Petkov 	buf->data_pages = pages;
884fd1c601cSBorislav Petkov 
885fd1c601cSBorislav Petkov 	INIT_LIST_HEAD(&buf->tables);
886fd1c601cSBorislav Petkov 
887fd1c601cSBorislav Petkov 	ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
888fd1c601cSBorislav Petkov 	if (ret) {
889fd1c601cSBorislav Petkov 		kfree(buf);
890fd1c601cSBorislav Petkov 		return NULL;
891fd1c601cSBorislav Petkov 	}
892fd1c601cSBorislav Petkov 
893fd1c601cSBorislav Petkov 	return buf;
894fd1c601cSBorislav Petkov }
895fd1c601cSBorislav Petkov 
896fd1c601cSBorislav Petkov /**
897fd1c601cSBorislav Petkov  * pt_buffer_free_aux() - perf AUX deallocation path callback
898fd1c601cSBorislav Petkov  * @data:	PT buffer.
899fd1c601cSBorislav Petkov  */
900fd1c601cSBorislav Petkov static void pt_buffer_free_aux(void *data)
901fd1c601cSBorislav Petkov {
902fd1c601cSBorislav Petkov 	struct pt_buffer *buf = data;
903fd1c601cSBorislav Petkov 
904fd1c601cSBorislav Petkov 	pt_buffer_fini_topa(buf);
905fd1c601cSBorislav Petkov 	kfree(buf);
906fd1c601cSBorislav Petkov }
907fd1c601cSBorislav Petkov 
908fd1c601cSBorislav Petkov /**
909fd1c601cSBorislav Petkov  * pt_buffer_is_full() - check if the buffer is full
910fd1c601cSBorislav Petkov  * @buf:	PT buffer.
911fd1c601cSBorislav Petkov  * @pt:		Per-cpu pt handle.
912fd1c601cSBorislav Petkov  *
913fd1c601cSBorislav Petkov  * If the user hasn't read data from the output region that aux_head
914fd1c601cSBorislav Petkov  * points to, the buffer is considered full: the user needs to read at
915fd1c601cSBorislav Petkov  * least this region and update aux_tail to point past it.
916fd1c601cSBorislav Petkov  */
917fd1c601cSBorislav Petkov static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
918fd1c601cSBorislav Petkov {
919fd1c601cSBorislav Petkov 	if (buf->snapshot)
920fd1c601cSBorislav Petkov 		return false;
921fd1c601cSBorislav Petkov 
922fd1c601cSBorislav Petkov 	if (local_read(&buf->data_size) >= pt->handle.size)
923fd1c601cSBorislav Petkov 		return true;
924fd1c601cSBorislav Petkov 
925fd1c601cSBorislav Petkov 	return false;
926fd1c601cSBorislav Petkov }
927fd1c601cSBorislav Petkov 
928fd1c601cSBorislav Petkov /**
929fd1c601cSBorislav Petkov  * intel_pt_interrupt() - PT PMI handler
930fd1c601cSBorislav Petkov  */
931fd1c601cSBorislav Petkov void intel_pt_interrupt(void)
932fd1c601cSBorislav Petkov {
933fd1c601cSBorislav Petkov 	struct pt *pt = this_cpu_ptr(&pt_ctx);
934fd1c601cSBorislav Petkov 	struct pt_buffer *buf;
935fd1c601cSBorislav Petkov 	struct perf_event *event = pt->handle.event;
936fd1c601cSBorislav Petkov 
937fd1c601cSBorislav Petkov 	/*
938fd1c601cSBorislav Petkov 	 * There may be a dangling PT bit in the interrupt status register
939fd1c601cSBorislav Petkov 	 * after PT has been disabled by pt_event_stop(). Make sure we don't
940fd1c601cSBorislav Petkov 	 * do anything (particularly, re-enable) for this event here.
941fd1c601cSBorislav Petkov 	 */
942fd1c601cSBorislav Petkov 	if (!ACCESS_ONCE(pt->handle_nmi))
943fd1c601cSBorislav Petkov 		return;
944fd1c601cSBorislav Petkov 
945fd1c601cSBorislav Petkov 	pt_config_start(false);
946fd1c601cSBorislav Petkov 
947fd1c601cSBorislav Petkov 	if (!event)
948fd1c601cSBorislav Petkov 		return;
949fd1c601cSBorislav Petkov 
950fd1c601cSBorislav Petkov 	buf = perf_get_aux(&pt->handle);
951fd1c601cSBorislav Petkov 	if (!buf)
952fd1c601cSBorislav Petkov 		return;
953fd1c601cSBorislav Petkov 
954fd1c601cSBorislav Petkov 	pt_read_offset(buf);
955fd1c601cSBorislav Petkov 
956fd1c601cSBorislav Petkov 	pt_handle_status(pt);
957fd1c601cSBorislav Petkov 
958fd1c601cSBorislav Petkov 	pt_update_head(pt);
959fd1c601cSBorislav Petkov 
960fd1c601cSBorislav Petkov 	perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
961fd1c601cSBorislav Petkov 			    local_xchg(&buf->lost, 0));
962fd1c601cSBorislav Petkov 
963fd1c601cSBorislav Petkov 	if (!event->hw.state) {
964fd1c601cSBorislav Petkov 		int ret;
965fd1c601cSBorislav Petkov 
966fd1c601cSBorislav Petkov 		buf = perf_aux_output_begin(&pt->handle, event);
967fd1c601cSBorislav Petkov 		if (!buf) {
968fd1c601cSBorislav Petkov 			event->hw.state = PERF_HES_STOPPED;
969fd1c601cSBorislav Petkov 			return;
970fd1c601cSBorislav Petkov 		}
971fd1c601cSBorislav Petkov 
972fd1c601cSBorislav Petkov 		pt_buffer_reset_offsets(buf, pt->handle.head);
973fd1c601cSBorislav Petkov 		/* snapshot counters don't use PMI, so it's safe */
974fd1c601cSBorislav Petkov 		ret = pt_buffer_reset_markers(buf, &pt->handle);
975fd1c601cSBorislav Petkov 		if (ret) {
976fd1c601cSBorislav Petkov 			perf_aux_output_end(&pt->handle, 0, true);
977fd1c601cSBorislav Petkov 			return;
978fd1c601cSBorislav Petkov 		}
979fd1c601cSBorislav Petkov 
980fd1c601cSBorislav Petkov 		pt_config_buffer(buf->cur->table, buf->cur_idx,
981fd1c601cSBorislav Petkov 				 buf->output_off);
982fd1c601cSBorislav Petkov 		pt_config(event);
983fd1c601cSBorislav Petkov 	}
984fd1c601cSBorislav Petkov }
985fd1c601cSBorislav Petkov 
986fd1c601cSBorislav Petkov /*
987fd1c601cSBorislav Petkov  * PMU callbacks
988fd1c601cSBorislav Petkov  */
989fd1c601cSBorislav Petkov 
990fd1c601cSBorislav Petkov static void pt_event_start(struct perf_event *event, int mode)
991fd1c601cSBorislav Petkov {
992fd1c601cSBorislav Petkov 	struct pt *pt = this_cpu_ptr(&pt_ctx);
993fd1c601cSBorislav Petkov 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
994fd1c601cSBorislav Petkov 
995fd1c601cSBorislav Petkov 	if (!buf || pt_buffer_is_full(buf, pt)) {
996fd1c601cSBorislav Petkov 		event->hw.state = PERF_HES_STOPPED;
997fd1c601cSBorislav Petkov 		return;
998fd1c601cSBorislav Petkov 	}
999fd1c601cSBorislav Petkov 
1000fd1c601cSBorislav Petkov 	ACCESS_ONCE(pt->handle_nmi) = 1;
1001fd1c601cSBorislav Petkov 	event->hw.state = 0;
1002fd1c601cSBorislav Petkov 
1003fd1c601cSBorislav Petkov 	pt_config_buffer(buf->cur->table, buf->cur_idx,
1004fd1c601cSBorislav Petkov 			 buf->output_off);
1005fd1c601cSBorislav Petkov 	pt_config(event);
1006fd1c601cSBorislav Petkov }
1007fd1c601cSBorislav Petkov 
1008fd1c601cSBorislav Petkov static void pt_event_stop(struct perf_event *event, int mode)
1009fd1c601cSBorislav Petkov {
1010fd1c601cSBorislav Petkov 	struct pt *pt = this_cpu_ptr(&pt_ctx);
1011fd1c601cSBorislav Petkov 
1012fd1c601cSBorislav Petkov 	/*
1013fd1c601cSBorislav Petkov 	 * Protect against the PMI racing with disabling wrmsr,
1014fd1c601cSBorislav Petkov 	 * see comment in intel_pt_interrupt().
1015fd1c601cSBorislav Petkov 	 */
1016fd1c601cSBorislav Petkov 	ACCESS_ONCE(pt->handle_nmi) = 0;
1017fd1c601cSBorislav Petkov 	pt_config_start(false);
1018fd1c601cSBorislav Petkov 
1019fd1c601cSBorislav Petkov 	if (event->hw.state == PERF_HES_STOPPED)
1020fd1c601cSBorislav Petkov 		return;
1021fd1c601cSBorislav Petkov 
1022fd1c601cSBorislav Petkov 	event->hw.state = PERF_HES_STOPPED;
1023fd1c601cSBorislav Petkov 
1024fd1c601cSBorislav Petkov 	if (mode & PERF_EF_UPDATE) {
1025fd1c601cSBorislav Petkov 		struct pt_buffer *buf = perf_get_aux(&pt->handle);
1026fd1c601cSBorislav Petkov 
1027fd1c601cSBorislav Petkov 		if (!buf)
1028fd1c601cSBorislav Petkov 			return;
1029fd1c601cSBorislav Petkov 
1030fd1c601cSBorislav Petkov 		if (WARN_ON_ONCE(pt->handle.event != event))
1031fd1c601cSBorislav Petkov 			return;
1032fd1c601cSBorislav Petkov 
1033fd1c601cSBorislav Petkov 		pt_read_offset(buf);
1034fd1c601cSBorislav Petkov 
1035fd1c601cSBorislav Petkov 		pt_handle_status(pt);
1036fd1c601cSBorislav Petkov 
1037fd1c601cSBorislav Petkov 		pt_update_head(pt);
1038fd1c601cSBorislav Petkov 	}
1039fd1c601cSBorislav Petkov }
1040fd1c601cSBorislav Petkov 
1041fd1c601cSBorislav Petkov static void pt_event_del(struct perf_event *event, int mode)
1042fd1c601cSBorislav Petkov {
1043fd1c601cSBorislav Petkov 	struct pt *pt = this_cpu_ptr(&pt_ctx);
1044fd1c601cSBorislav Petkov 	struct pt_buffer *buf;
1045fd1c601cSBorislav Petkov 
1046fd1c601cSBorislav Petkov 	pt_event_stop(event, PERF_EF_UPDATE);
1047fd1c601cSBorislav Petkov 
1048fd1c601cSBorislav Petkov 	buf = perf_get_aux(&pt->handle);
1049fd1c601cSBorislav Petkov 
1050fd1c601cSBorislav Petkov 	if (buf) {
1051fd1c601cSBorislav Petkov 		if (buf->snapshot)
1052fd1c601cSBorislav Petkov 			pt->handle.head =
1053fd1c601cSBorislav Petkov 				local_xchg(&buf->data_size,
1054fd1c601cSBorislav Petkov 					   buf->nr_pages << PAGE_SHIFT);
1055fd1c601cSBorislav Petkov 		perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
1056fd1c601cSBorislav Petkov 				    local_xchg(&buf->lost, 0));
1057fd1c601cSBorislav Petkov 	}
1058fd1c601cSBorislav Petkov }
1059fd1c601cSBorislav Petkov 
1060fd1c601cSBorislav Petkov static int pt_event_add(struct perf_event *event, int mode)
1061fd1c601cSBorislav Petkov {
1062fd1c601cSBorislav Petkov 	struct pt_buffer *buf;
1063fd1c601cSBorislav Petkov 	struct pt *pt = this_cpu_ptr(&pt_ctx);
1064fd1c601cSBorislav Petkov 	struct hw_perf_event *hwc = &event->hw;
1065fd1c601cSBorislav Petkov 	int ret = -EBUSY;
1066fd1c601cSBorislav Petkov 
1067fd1c601cSBorislav Petkov 	if (pt->handle.event)
1068fd1c601cSBorislav Petkov 		goto fail;
1069fd1c601cSBorislav Petkov 
1070fd1c601cSBorislav Petkov 	buf = perf_aux_output_begin(&pt->handle, event);
1071fd1c601cSBorislav Petkov 	ret = -EINVAL;
1072fd1c601cSBorislav Petkov 	if (!buf)
1073fd1c601cSBorislav Petkov 		goto fail_stop;
1074fd1c601cSBorislav Petkov 
1075fd1c601cSBorislav Petkov 	pt_buffer_reset_offsets(buf, pt->handle.head);
1076fd1c601cSBorislav Petkov 	if (!buf->snapshot) {
1077fd1c601cSBorislav Petkov 		ret = pt_buffer_reset_markers(buf, &pt->handle);
1078fd1c601cSBorislav Petkov 		if (ret)
1079fd1c601cSBorislav Petkov 			goto fail_end_stop;
1080fd1c601cSBorislav Petkov 	}
1081fd1c601cSBorislav Petkov 
1082fd1c601cSBorislav Petkov 	if (mode & PERF_EF_START) {
1083fd1c601cSBorislav Petkov 		pt_event_start(event, 0);
1084fd1c601cSBorislav Petkov 		ret = -EBUSY;
1085fd1c601cSBorislav Petkov 		if (hwc->state == PERF_HES_STOPPED)
1086fd1c601cSBorislav Petkov 			goto fail_end_stop;
1087fd1c601cSBorislav Petkov 	} else {
1088fd1c601cSBorislav Petkov 		hwc->state = PERF_HES_STOPPED;
1089fd1c601cSBorislav Petkov 	}
1090fd1c601cSBorislav Petkov 
1091fd1c601cSBorislav Petkov 	return 0;
1092fd1c601cSBorislav Petkov 
1093fd1c601cSBorislav Petkov fail_end_stop:
1094fd1c601cSBorislav Petkov 	perf_aux_output_end(&pt->handle, 0, true);
1095fd1c601cSBorislav Petkov fail_stop:
1096fd1c601cSBorislav Petkov 	hwc->state = PERF_HES_STOPPED;
1097fd1c601cSBorislav Petkov fail:
1098fd1c601cSBorislav Petkov 	return ret;
1099fd1c601cSBorislav Petkov }
1100fd1c601cSBorislav Petkov 
1101fd1c601cSBorislav Petkov static void pt_event_read(struct perf_event *event)
1102fd1c601cSBorislav Petkov {
1103fd1c601cSBorislav Petkov }
1104fd1c601cSBorislav Petkov 
1105fd1c601cSBorislav Petkov static void pt_event_destroy(struct perf_event *event)
1106fd1c601cSBorislav Petkov {
1107fd1c601cSBorislav Petkov 	x86_del_exclusive(x86_lbr_exclusive_pt);
1108fd1c601cSBorislav Petkov }
1109fd1c601cSBorislav Petkov 
1110fd1c601cSBorislav Petkov static int pt_event_init(struct perf_event *event)
1111fd1c601cSBorislav Petkov {
1112fd1c601cSBorislav Petkov 	if (event->attr.type != pt_pmu.pmu.type)
1113fd1c601cSBorislav Petkov 		return -ENOENT;
1114fd1c601cSBorislav Petkov 
1115fd1c601cSBorislav Petkov 	if (!pt_event_valid(event))
1116fd1c601cSBorislav Petkov 		return -EINVAL;
1117fd1c601cSBorislav Petkov 
1118fd1c601cSBorislav Petkov 	if (x86_add_exclusive(x86_lbr_exclusive_pt))
1119fd1c601cSBorislav Petkov 		return -EBUSY;
1120fd1c601cSBorislav Petkov 
1121fd1c601cSBorislav Petkov 	event->destroy = pt_event_destroy;
1122fd1c601cSBorislav Petkov 
1123fd1c601cSBorislav Petkov 	return 0;
1124fd1c601cSBorislav Petkov }
1125fd1c601cSBorislav Petkov 
1126fd1c601cSBorislav Petkov void cpu_emergency_stop_pt(void)
1127fd1c601cSBorislav Petkov {
1128fd1c601cSBorislav Petkov 	struct pt *pt = this_cpu_ptr(&pt_ctx);
1129fd1c601cSBorislav Petkov 
1130fd1c601cSBorislav Petkov 	if (pt->handle.event)
1131fd1c601cSBorislav Petkov 		pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
1132fd1c601cSBorislav Petkov }
1133fd1c601cSBorislav Petkov 
1134fd1c601cSBorislav Petkov static __init int pt_init(void)
1135fd1c601cSBorislav Petkov {
1136fd1c601cSBorislav Petkov 	int ret, cpu, prior_warn = 0;
1137fd1c601cSBorislav Petkov 
1138fd1c601cSBorislav Petkov 	BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
1139fd1c601cSBorislav Petkov 
1140fd1c601cSBorislav Petkov 	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
1141fd1c601cSBorislav Petkov 		return -ENODEV;
1142fd1c601cSBorislav Petkov 
1143fd1c601cSBorislav Petkov 	get_online_cpus();
1144fd1c601cSBorislav Petkov 	for_each_online_cpu(cpu) {
1145fd1c601cSBorislav Petkov 		u64 ctl;
1146fd1c601cSBorislav Petkov 
1147fd1c601cSBorislav Petkov 		ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
1148fd1c601cSBorislav Petkov 		if (!ret && (ctl & RTIT_CTL_TRACEEN))
1149fd1c601cSBorislav Petkov 			prior_warn++;
1150fd1c601cSBorislav Petkov 	}
1151fd1c601cSBorislav Petkov 	put_online_cpus();
1152fd1c601cSBorislav Petkov 
1153fd1c601cSBorislav Petkov 	if (prior_warn) {
1154fd1c601cSBorislav Petkov 		x86_add_exclusive(x86_lbr_exclusive_pt);
1155fd1c601cSBorislav Petkov 		pr_warn("PT is enabled at boot time, doing nothing\n");
1156fd1c601cSBorislav Petkov 
1157fd1c601cSBorislav Petkov 		return -EBUSY;
1158fd1c601cSBorislav Petkov 	}
1159fd1c601cSBorislav Petkov 
1160fd1c601cSBorislav Petkov 	ret = pt_pmu_hw_init();
1161fd1c601cSBorislav Petkov 	if (ret)
1162fd1c601cSBorislav Petkov 		return ret;
1163fd1c601cSBorislav Petkov 
1164fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_output)) {
1165fd1c601cSBorislav Petkov 		pr_warn("ToPA output is not supported on this CPU\n");
1166fd1c601cSBorislav Petkov 		return -ENODEV;
1167fd1c601cSBorislav Petkov 	}
1168fd1c601cSBorislav Petkov 
1169fd1c601cSBorislav Petkov 	if (!pt_cap_get(PT_CAP_topa_multiple_entries))
1170fd1c601cSBorislav Petkov 		pt_pmu.pmu.capabilities =
1171fd1c601cSBorislav Petkov 			PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
1172fd1c601cSBorislav Petkov 
1173fd1c601cSBorislav Petkov 	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
1174fd1c601cSBorislav Petkov 	pt_pmu.pmu.attr_groups	= pt_attr_groups;
1175fd1c601cSBorislav Petkov 	pt_pmu.pmu.task_ctx_nr	= perf_sw_context;
1176fd1c601cSBorislav Petkov 	pt_pmu.pmu.event_init	= pt_event_init;
1177fd1c601cSBorislav Petkov 	pt_pmu.pmu.add		= pt_event_add;
1178fd1c601cSBorislav Petkov 	pt_pmu.pmu.del		= pt_event_del;
1179fd1c601cSBorislav Petkov 	pt_pmu.pmu.start	= pt_event_start;
1180fd1c601cSBorislav Petkov 	pt_pmu.pmu.stop		= pt_event_stop;
1181fd1c601cSBorislav Petkov 	pt_pmu.pmu.read		= pt_event_read;
1182fd1c601cSBorislav Petkov 	pt_pmu.pmu.setup_aux	= pt_buffer_setup_aux;
1183fd1c601cSBorislav Petkov 	pt_pmu.pmu.free_aux	= pt_buffer_free_aux;
1184fd1c601cSBorislav Petkov 	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
1185fd1c601cSBorislav Petkov 
1186fd1c601cSBorislav Petkov 	return ret;
1187fd1c601cSBorislav Petkov }
1188fd1c601cSBorislav Petkov arch_initcall(pt_init);
1189