xref: /linux/arch/powerpc/perf/generic-compat-pmu.c (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1 // SPDX-License-Identifier: GPL-2.0+
2 //
3 // Copyright 2019 Madhavan Srinivasan, IBM Corporation.
4 
5 #define pr_fmt(fmt)	"generic-compat-pmu: " fmt
6 
7 #include "isa207-common.h"
8 
9 /*
10  * Raw event encoding:
11  *
12  *        60        56        52        48        44        40        36        32
13  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
14  *
15  *        28        24        20        16        12         8         4         0
16  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
17  *                                 [ pmc ]                       [    pmcxsel    ]
18  */
19 
20 /*
21  * Event codes defined in ISA v3.0B
22  */
23 #define EVENT(_name, _code)	_name = _code,
24 
25 enum {
26 	/* Cycles, alternate code */
27 	EVENT(PM_CYC_ALT,			0x100f0)
28 	/* One or more instructions completed in a cycle */
29 	EVENT(PM_CYC_INST_CMPL,			0x100f2)
30 	/* Floating-point instruction completed */
31 	EVENT(PM_FLOP_CMPL,			0x100f4)
32 	/* Instruction ERAT/L1-TLB miss */
33 	EVENT(PM_L1_ITLB_MISS,			0x100f6)
34 	/* All instructions completed and none available */
35 	EVENT(PM_NO_INST_AVAIL,			0x100f8)
36 	/* A load-type instruction completed (ISA v3.0+) */
37 	EVENT(PM_LD_CMPL,			0x100fc)
38 	/* Instruction completed, alternate code (ISA v3.0+) */
39 	EVENT(PM_INST_CMPL_ALT,			0x100fe)
40 	/* A store-type instruction completed */
41 	EVENT(PM_ST_CMPL,			0x200f0)
42 	/* Instruction Dispatched */
43 	EVENT(PM_INST_DISP,			0x200f2)
44 	/* Run_cycles */
45 	EVENT(PM_RUN_CYC,			0x200f4)
46 	/* Data ERAT/L1-TLB miss/reload */
47 	EVENT(PM_L1_DTLB_RELOAD,		0x200f6)
48 	/* Taken branch completed */
49 	EVENT(PM_BR_TAKEN_CMPL,			0x200fa)
50 	/* Demand iCache Miss */
51 	EVENT(PM_L1_ICACHE_MISS,		0x200fc)
52 	/* L1 Dcache reload from memory */
53 	EVENT(PM_L1_RELOAD_FROM_MEM,		0x200fe)
54 	/* L1 Dcache store miss */
55 	EVENT(PM_ST_MISS_L1,			0x300f0)
56 	/* Alternate code for PM_INST_DISP */
57 	EVENT(PM_INST_DISP_ALT,			0x300f2)
58 	/* Branch direction or target mispredicted */
59 	EVENT(PM_BR_MISPREDICT,			0x300f6)
60 	/* Data TLB miss/reload */
61 	EVENT(PM_DTLB_MISS,			0x300fc)
62 	/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
63 	EVENT(PM_DATA_FROM_L3MISS,		0x300fe)
64 	/* L1 Dcache load miss */
65 	EVENT(PM_LD_MISS_L1,			0x400f0)
66 	/* Cycle when instruction(s) dispatched */
67 	EVENT(PM_CYC_INST_DISP,			0x400f2)
68 	/* Branch or branch target mispredicted */
69 	EVENT(PM_BR_MPRED_CMPL,			0x400f6)
70 	/* Instructions completed with run latch set */
71 	EVENT(PM_RUN_INST_CMPL,			0x400fa)
72 	/* Instruction TLB miss/reload */
73 	EVENT(PM_ITLB_MISS,			0x400fc)
74 	/* Load data not cached */
75 	EVENT(PM_LD_NOT_CACHED,			0x400fe)
76 	/* Instructions */
77 	EVENT(PM_INST_CMPL,			0x500fa)
78 	/* Cycles */
79 	EVENT(PM_CYC,				0x600f4)
80 };
81 
82 #undef EVENT
83 
84 /* Table of alternatives, sorted in increasing order of column 0 */
85 /* Note that in each row, column 0 must be the smallest */
86 static const unsigned int generic_event_alternatives[][MAX_ALT] = {
87 	{ PM_CYC_ALT,			PM_CYC },
88 	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
89 	{ PM_INST_DISP,			PM_INST_DISP_ALT },
90 };
91 
92 static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
93 {
94 	int num_alt = 0;
95 
96 	num_alt = isa207_get_alternatives(event, alt,
97 					  ARRAY_SIZE(generic_event_alternatives), flags,
98 					  generic_event_alternatives);
99 
100 	return num_alt;
101 }
102 
103 GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
104 GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
105 GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_NO_INST_AVAIL);
106 GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
107 GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
108 
109 CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
110 CACHE_EVENT_ATTR(L1-dcache-store-misses,	PM_ST_MISS_L1);
111 CACHE_EVENT_ATTR(L1-icache-load-misses,		PM_L1_ICACHE_MISS);
112 CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
113 CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
114 CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
115 CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
116 
117 static struct attribute *generic_compat_events_attr[] = {
118 	GENERIC_EVENT_PTR(PM_CYC),
119 	GENERIC_EVENT_PTR(PM_INST_CMPL),
120 	GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
121 	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
122 	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
123 	CACHE_EVENT_PTR(PM_LD_MISS_L1),
124 	CACHE_EVENT_PTR(PM_ST_MISS_L1),
125 	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
126 	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
127 	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
128 	CACHE_EVENT_PTR(PM_DTLB_MISS),
129 	CACHE_EVENT_PTR(PM_ITLB_MISS),
130 	NULL
131 };
132 
133 static const struct attribute_group generic_compat_pmu_events_group = {
134 	.name = "events",
135 	.attrs = generic_compat_events_attr,
136 };
137 
138 PMU_FORMAT_ATTR(event,		"config:0-19");
139 PMU_FORMAT_ATTR(pmcxsel,	"config:0-7");
140 PMU_FORMAT_ATTR(pmc,		"config:16-19");
141 
142 static struct attribute *generic_compat_pmu_format_attr[] = {
143 	&format_attr_event.attr,
144 	&format_attr_pmcxsel.attr,
145 	&format_attr_pmc.attr,
146 	NULL,
147 };
148 
149 static const struct attribute_group generic_compat_pmu_format_group = {
150 	.name = "format",
151 	.attrs = generic_compat_pmu_format_attr,
152 };
153 
154 static struct attribute *generic_compat_pmu_caps_attrs[] = {
155 	NULL
156 };
157 
158 static struct attribute_group generic_compat_pmu_caps_group = {
159 	.name  = "caps",
160 	.attrs = generic_compat_pmu_caps_attrs,
161 };
162 
163 static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
164 	&generic_compat_pmu_format_group,
165 	&generic_compat_pmu_events_group,
166 	&generic_compat_pmu_caps_group,
167 	NULL,
168 };
169 
170 static int compat_generic_events[] = {
171 	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
172 	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
173 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_NO_INST_AVAIL,
174 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
175 	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
176 };
177 
178 #define C(x)	PERF_COUNT_HW_CACHE_##x
179 
180 /*
181  * Table of generalized cache-related events.
182  * 0 means not supported, -1 means nonsensical, other values
183  * are event codes.
184  */
185 static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
186 	[ C(L1D) ] = {
187 		[ C(OP_READ) ] = {
188 			[ C(RESULT_ACCESS) ] = 0,
189 			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
190 		},
191 		[ C(OP_WRITE) ] = {
192 			[ C(RESULT_ACCESS) ] = 0,
193 			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
194 		},
195 		[ C(OP_PREFETCH) ] = {
196 			[ C(RESULT_ACCESS) ] = 0,
197 			[ C(RESULT_MISS)   ] = 0,
198 		},
199 	},
200 	[ C(L1I) ] = {
201 		[ C(OP_READ) ] = {
202 			[ C(RESULT_ACCESS) ] = 0,
203 			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
204 		},
205 		[ C(OP_WRITE) ] = {
206 			[ C(RESULT_ACCESS) ] = 0,
207 			[ C(RESULT_MISS)   ] = -1,
208 		},
209 		[ C(OP_PREFETCH) ] = {
210 			[ C(RESULT_ACCESS) ] = 0,
211 			[ C(RESULT_MISS)   ] = 0,
212 		},
213 	},
214 	[ C(LL) ] = {
215 		[ C(OP_READ) ] = {
216 			[ C(RESULT_ACCESS) ] = 0,
217 			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
218 		},
219 		[ C(OP_WRITE) ] = {
220 			[ C(RESULT_ACCESS) ] = 0,
221 			[ C(RESULT_MISS)   ] = 0,
222 		},
223 		[ C(OP_PREFETCH) ] = {
224 			[ C(RESULT_ACCESS) ] = 0,
225 			[ C(RESULT_MISS)   ] = 0,
226 		},
227 	},
228 	[ C(DTLB) ] = {
229 		[ C(OP_READ) ] = {
230 			[ C(RESULT_ACCESS) ] = 0,
231 			[ C(RESULT_MISS)   ] = PM_DTLB_MISS,
232 		},
233 		[ C(OP_WRITE) ] = {
234 			[ C(RESULT_ACCESS) ] = -1,
235 			[ C(RESULT_MISS)   ] = -1,
236 		},
237 		[ C(OP_PREFETCH) ] = {
238 			[ C(RESULT_ACCESS) ] = -1,
239 			[ C(RESULT_MISS)   ] = -1,
240 		},
241 	},
242 	[ C(ITLB) ] = {
243 		[ C(OP_READ) ] = {
244 			[ C(RESULT_ACCESS) ] = 0,
245 			[ C(RESULT_MISS)   ] = PM_ITLB_MISS,
246 		},
247 		[ C(OP_WRITE) ] = {
248 			[ C(RESULT_ACCESS) ] = -1,
249 			[ C(RESULT_MISS)   ] = -1,
250 		},
251 		[ C(OP_PREFETCH) ] = {
252 			[ C(RESULT_ACCESS) ] = -1,
253 			[ C(RESULT_MISS)   ] = -1,
254 		},
255 	},
256 	[ C(BPU) ] = {
257 		[ C(OP_READ) ] = {
258 			[ C(RESULT_ACCESS) ] = 0,
259 			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
260 		},
261 		[ C(OP_WRITE) ] = {
262 			[ C(RESULT_ACCESS) ] = -1,
263 			[ C(RESULT_MISS)   ] = -1,
264 		},
265 		[ C(OP_PREFETCH) ] = {
266 			[ C(RESULT_ACCESS) ] = -1,
267 			[ C(RESULT_MISS)   ] = -1,
268 		},
269 	},
270 	[ C(NODE) ] = {
271 		[ C(OP_READ) ] = {
272 			[ C(RESULT_ACCESS) ] = -1,
273 			[ C(RESULT_MISS)   ] = -1,
274 		},
275 		[ C(OP_WRITE) ] = {
276 			[ C(RESULT_ACCESS) ] = -1,
277 			[ C(RESULT_MISS)   ] = -1,
278 		},
279 		[ C(OP_PREFETCH) ] = {
280 			[ C(RESULT_ACCESS) ] = -1,
281 			[ C(RESULT_MISS)   ] = -1,
282 		},
283 	},
284 };
285 
286 #undef C
287 
288 /*
289  * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
290  * PM_INST_CMPL and PM_CYC.
291  */
292 static int generic_compute_mmcr(u64 event[], int n_ev,
293 				unsigned int hwc[], struct mmcr_regs *mmcr,
294 				struct perf_event *pevents[], u32 flags)
295 {
296 	int ret;
297 
298 	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
299 	if (!ret)
300 		mmcr->mmcr0 |= MMCR0_C56RUN;
301 	return ret;
302 }
303 
304 static struct power_pmu generic_compat_pmu = {
305 	.name			= "ISAv3",
306 	.n_counter		= MAX_PMU_COUNTERS,
307 	.add_fields		= ISA207_ADD_FIELDS,
308 	.test_adder		= ISA207_TEST_ADDER,
309 	.compute_mmcr		= generic_compute_mmcr,
310 	.get_constraint		= isa207_get_constraint,
311 	.get_alternatives	= generic_get_alternatives,
312 	.disable_pmc		= isa207_disable_pmc,
313 	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
314 	.n_generic		= ARRAY_SIZE(compat_generic_events),
315 	.generic_events		= compat_generic_events,
316 	.cache_events		= &generic_compat_cache_events,
317 	.attr_groups		= generic_compat_pmu_attr_groups,
318 };
319 
320 int __init init_generic_compat_pmu(void)
321 {
322 	int rc = 0;
323 
324 	/*
325 	 * From ISA v2.07 on, PMU features are architected;
326 	 * we require >= v3.0 because (a) that has PM_LD_CMPL and
327 	 * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
328 	 * (b) we don't expect any non-IBM Power ISA
329 	 * implementations that conform to v2.07 but not v3.0.
330 	 */
331 	if (!cpu_has_feature(CPU_FTR_ARCH_300))
332 		return -ENODEV;
333 
334 	rc = register_power_pmu(&generic_compat_pmu);
335 	if (rc)
336 		return rc;
337 
338 	/* Tell userspace that EBB is supported */
339 	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
340 
341 	return 0;
342 }
343