xref: /linux/arch/x86/events/intel/cstate.c (revision a126eca844353360ebafa9088d22865cb8e022e3)
1 /*
2  * Support cstate residency counters
3  *
4  * Copyright (C) 2015, Intel Corp.
5  * Author: Kan Liang (kan.liang@intel.com)
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  */
18 
19 /*
20  * This file export cstate related free running (read-only) counters
21  * for perf. These counters may be use simultaneously by other tools,
22  * such as turbostat. However, it still make sense to implement them
23  * in perf. Because we can conveniently collect them together with
24  * other events, and allow to use them from tools without special MSR
25  * access code.
26  *
27  * The events only support system-wide mode counting. There is no
28  * sampling support because it is not supported by the hardware.
29  *
30  * According to counters' scope and category, two PMUs are registered
31  * with the perf_event core subsystem.
32  *  - 'cstate_core': The counter is available for each physical core.
33  *    The counters include CORE_C*_RESIDENCY.
34  *  - 'cstate_pkg': The counter is available for each physical package.
35  *    The counters include PKG_C*_RESIDENCY.
36  *
37  * All of these counters are specified in the Intel® 64 and IA-32
38  * Architectures Software Developer.s Manual Vol3b.
39  *
40  * Model specific counters:
41  *	MSR_CORE_C1_RES: CORE C1 Residency Counter
42  *			 perf code: 0x00
43  *			 Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
44  *					  MTL,SRF,GRR
45  *			 Scope: Core (each processor core has a MSR)
46  *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
47  *			       perf code: 0x01
48  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
49  *						CNL,KBL,CML,TNT
50  *			       Scope: Core
51  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
52  *			       perf code: 0x02
53  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
54  *						SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
55  *						TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
56  *						GRR
57  *			       Scope: Core
58  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
59  *			       perf code: 0x03
60  *			       Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
61  *						ICL,TGL,RKL,ADL,RPL,MTL
62  *			       Scope: Core
63  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
64  *			       perf code: 0x00
65  *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
66  *						KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
67  *						RPL,SPR,MTL
68  *			       Scope: Package (physical package)
69  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
70  *			       perf code: 0x01
71  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
72  *						GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
73  *						ADL,RPL,MTL
74  *			       Scope: Package (physical package)
75  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
76  *			       perf code: 0x02
77  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
78  *						SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
79  *						TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF
80  *			       Scope: Package (physical package)
81  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
82  *			       perf code: 0x03
83  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
84  *						KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL
85  *			       Scope: Package (physical package)
86  *	MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
87  *			       perf code: 0x04
88  *			       Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
89  *						ADL,RPL,MTL
90  *			       Scope: Package (physical package)
91  *	MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
92  *			       perf code: 0x05
93  *			       Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
94  *						ADL,RPL,MTL
95  *			       Scope: Package (physical package)
96  *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
97  *			       perf code: 0x06
98  *			       Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
99  *						TNT,RKL,ADL,RPL,MTL
100  *			       Scope: Package (physical package)
101  *	MSR_MODULE_C6_RES_MS:  Module C6 Residency Counter.
102  *			       perf code: 0x00
103  *			       Available model: SRF,GRR
104  *			       Scope: A cluster of cores shared L2 cache
105  *
106  */
107 
108 #include <linux/module.h>
109 #include <linux/slab.h>
110 #include <linux/perf_event.h>
111 #include <linux/nospec.h>
112 #include <asm/cpu_device_id.h>
113 #include <asm/intel-family.h>
114 #include "../perf_event.h"
115 #include "../probe.h"
116 
117 MODULE_DESCRIPTION("Support for Intel cstate performance events");
118 MODULE_LICENSE("GPL");
119 
120 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
121 static ssize_t __cstate_##_var##_show(struct device *dev,	\
122 				struct device_attribute *attr,	\
123 				char *page)			\
124 {								\
125 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\
126 	return sprintf(page, _format "\n");			\
127 }								\
128 static struct device_attribute format_attr_##_var =		\
129 	__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
130 
131 static ssize_t cstate_get_attr_cpumask(struct device *dev,
132 				       struct device_attribute *attr,
133 				       char *buf);
134 
135 /* Model -> events mapping */
136 struct cstate_model {
137 	unsigned long		core_events;
138 	unsigned long		pkg_events;
139 	unsigned long		module_events;
140 	unsigned long		quirks;
141 };
142 
143 /* Quirk flags */
144 #define SLM_PKG_C6_USE_C7_MSR	(1UL << 0)
145 #define KNL_CORE_C6_MSR		(1UL << 1)
146 
147 /* cstate_core PMU */
148 static struct pmu cstate_core_pmu;
149 static bool has_cstate_core;
150 
151 enum perf_cstate_core_events {
152 	PERF_CSTATE_CORE_C1_RES = 0,
153 	PERF_CSTATE_CORE_C3_RES,
154 	PERF_CSTATE_CORE_C6_RES,
155 	PERF_CSTATE_CORE_C7_RES,
156 
157 	PERF_CSTATE_CORE_EVENT_MAX,
158 };
159 
160 PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
161 PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
162 PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
163 PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
164 
165 static unsigned long core_msr_mask;
166 
167 PMU_EVENT_GROUP(events, cstate_core_c1);
168 PMU_EVENT_GROUP(events, cstate_core_c3);
169 PMU_EVENT_GROUP(events, cstate_core_c6);
170 PMU_EVENT_GROUP(events, cstate_core_c7);
171 
172 static bool test_msr(int idx, void *data)
173 {
174 	return test_bit(idx, (unsigned long *) data);
175 }
176 
177 static struct perf_msr core_msr[] = {
178 	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&group_cstate_core_c1,	test_msr },
179 	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&group_cstate_core_c3,	test_msr },
180 	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&group_cstate_core_c6,	test_msr },
181 	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&group_cstate_core_c7,	test_msr },
182 };
183 
184 static struct attribute *attrs_empty[] = {
185 	NULL,
186 };
187 
188 /*
189  * There are no default events, but we need to create
190  * "events" group (with empty attrs) before updating
191  * it with detected events.
192  */
193 static struct attribute_group cstate_events_attr_group = {
194 	.name = "events",
195 	.attrs = attrs_empty,
196 };
197 
198 DEFINE_CSTATE_FORMAT_ATTR(cstate_event, event, "config:0-63");
199 static struct attribute *cstate_format_attrs[] = {
200 	&format_attr_cstate_event.attr,
201 	NULL,
202 };
203 
204 static struct attribute_group cstate_format_attr_group = {
205 	.name = "format",
206 	.attrs = cstate_format_attrs,
207 };
208 
209 static cpumask_t cstate_core_cpu_mask;
210 static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
211 
212 static struct attribute *cstate_cpumask_attrs[] = {
213 	&dev_attr_cpumask.attr,
214 	NULL,
215 };
216 
217 static struct attribute_group cpumask_attr_group = {
218 	.attrs = cstate_cpumask_attrs,
219 };
220 
221 static const struct attribute_group *cstate_attr_groups[] = {
222 	&cstate_events_attr_group,
223 	&cstate_format_attr_group,
224 	&cpumask_attr_group,
225 	NULL,
226 };
227 
228 /* cstate_pkg PMU */
229 static struct pmu cstate_pkg_pmu;
230 static bool has_cstate_pkg;
231 
232 enum perf_cstate_pkg_events {
233 	PERF_CSTATE_PKG_C2_RES = 0,
234 	PERF_CSTATE_PKG_C3_RES,
235 	PERF_CSTATE_PKG_C6_RES,
236 	PERF_CSTATE_PKG_C7_RES,
237 	PERF_CSTATE_PKG_C8_RES,
238 	PERF_CSTATE_PKG_C9_RES,
239 	PERF_CSTATE_PKG_C10_RES,
240 
241 	PERF_CSTATE_PKG_EVENT_MAX,
242 };
243 
244 PMU_EVENT_ATTR_STRING(c2-residency,  attr_cstate_pkg_c2,  "event=0x00");
245 PMU_EVENT_ATTR_STRING(c3-residency,  attr_cstate_pkg_c3,  "event=0x01");
246 PMU_EVENT_ATTR_STRING(c6-residency,  attr_cstate_pkg_c6,  "event=0x02");
247 PMU_EVENT_ATTR_STRING(c7-residency,  attr_cstate_pkg_c7,  "event=0x03");
248 PMU_EVENT_ATTR_STRING(c8-residency,  attr_cstate_pkg_c8,  "event=0x04");
249 PMU_EVENT_ATTR_STRING(c9-residency,  attr_cstate_pkg_c9,  "event=0x05");
250 PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06");
251 
252 static unsigned long pkg_msr_mask;
253 
254 PMU_EVENT_GROUP(events, cstate_pkg_c2);
255 PMU_EVENT_GROUP(events, cstate_pkg_c3);
256 PMU_EVENT_GROUP(events, cstate_pkg_c6);
257 PMU_EVENT_GROUP(events, cstate_pkg_c7);
258 PMU_EVENT_GROUP(events, cstate_pkg_c8);
259 PMU_EVENT_GROUP(events, cstate_pkg_c9);
260 PMU_EVENT_GROUP(events, cstate_pkg_c10);
261 
262 static struct perf_msr pkg_msr[] = {
263 	[PERF_CSTATE_PKG_C2_RES]  = { MSR_PKG_C2_RESIDENCY,	&group_cstate_pkg_c2,	test_msr },
264 	[PERF_CSTATE_PKG_C3_RES]  = { MSR_PKG_C3_RESIDENCY,	&group_cstate_pkg_c3,	test_msr },
265 	[PERF_CSTATE_PKG_C6_RES]  = { MSR_PKG_C6_RESIDENCY,	&group_cstate_pkg_c6,	test_msr },
266 	[PERF_CSTATE_PKG_C7_RES]  = { MSR_PKG_C7_RESIDENCY,	&group_cstate_pkg_c7,	test_msr },
267 	[PERF_CSTATE_PKG_C8_RES]  = { MSR_PKG_C8_RESIDENCY,	&group_cstate_pkg_c8,	test_msr },
268 	[PERF_CSTATE_PKG_C9_RES]  = { MSR_PKG_C9_RESIDENCY,	&group_cstate_pkg_c9,	test_msr },
269 	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&group_cstate_pkg_c10,	test_msr },
270 };
271 
272 static cpumask_t cstate_pkg_cpu_mask;
273 
274 /* cstate_module PMU */
275 static struct pmu cstate_module_pmu;
276 static bool has_cstate_module;
277 
278 enum perf_cstate_module_events {
279 	PERF_CSTATE_MODULE_C6_RES = 0,
280 
281 	PERF_CSTATE_MODULE_EVENT_MAX,
282 };
283 
284 PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_module_c6, "event=0x00");
285 
286 static unsigned long module_msr_mask;
287 
288 PMU_EVENT_GROUP(events, cstate_module_c6);
289 
290 static struct perf_msr module_msr[] = {
291 	[PERF_CSTATE_MODULE_C6_RES]  = { MSR_MODULE_C6_RES_MS,	&group_cstate_module_c6,	test_msr },
292 };
293 
294 static cpumask_t cstate_module_cpu_mask;
295 
296 static ssize_t cstate_get_attr_cpumask(struct device *dev,
297 				       struct device_attribute *attr,
298 				       char *buf)
299 {
300 	struct pmu *pmu = dev_get_drvdata(dev);
301 
302 	if (pmu == &cstate_core_pmu)
303 		return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
304 	else if (pmu == &cstate_pkg_pmu)
305 		return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
306 	else if (pmu == &cstate_module_pmu)
307 		return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
308 	else
309 		return 0;
310 }
311 
312 static int cstate_pmu_event_init(struct perf_event *event)
313 {
314 	u64 cfg = event->attr.config;
315 	int cpu;
316 
317 	if (event->attr.type != event->pmu->type)
318 		return -ENOENT;
319 
320 	/* unsupported modes and filters */
321 	if (event->attr.sample_period) /* no sampling */
322 		return -EINVAL;
323 
324 	if (event->cpu < 0)
325 		return -EINVAL;
326 
327 	if (event->pmu == &cstate_core_pmu) {
328 		if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
329 			return -EINVAL;
330 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX);
331 		if (!(core_msr_mask & (1 << cfg)))
332 			return -EINVAL;
333 		event->hw.event_base = core_msr[cfg].msr;
334 		cpu = cpumask_any_and(&cstate_core_cpu_mask,
335 				      topology_sibling_cpumask(event->cpu));
336 	} else if (event->pmu == &cstate_pkg_pmu) {
337 		if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
338 			return -EINVAL;
339 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
340 		if (!(pkg_msr_mask & (1 << cfg)))
341 			return -EINVAL;
342 
343 		event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
344 
345 		event->hw.event_base = pkg_msr[cfg].msr;
346 		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
347 				      topology_die_cpumask(event->cpu));
348 	} else if (event->pmu == &cstate_module_pmu) {
349 		if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
350 			return -EINVAL;
351 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_MODULE_EVENT_MAX);
352 		if (!(module_msr_mask & (1 << cfg)))
353 			return -EINVAL;
354 		event->hw.event_base = module_msr[cfg].msr;
355 		cpu = cpumask_any_and(&cstate_module_cpu_mask,
356 				      topology_cluster_cpumask(event->cpu));
357 	} else {
358 		return -ENOENT;
359 	}
360 
361 	if (cpu >= nr_cpu_ids)
362 		return -ENODEV;
363 
364 	event->cpu = cpu;
365 	event->hw.config = cfg;
366 	event->hw.idx = -1;
367 	return 0;
368 }
369 
370 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
371 {
372 	u64 val;
373 
374 	rdmsrl(event->hw.event_base, val);
375 	return val;
376 }
377 
378 static void cstate_pmu_event_update(struct perf_event *event)
379 {
380 	struct hw_perf_event *hwc = &event->hw;
381 	u64 prev_raw_count, new_raw_count;
382 
383 	prev_raw_count = local64_read(&hwc->prev_count);
384 	do {
385 		new_raw_count = cstate_pmu_read_counter(event);
386 	} while (!local64_try_cmpxchg(&hwc->prev_count,
387 				      &prev_raw_count, new_raw_count));
388 
389 	local64_add(new_raw_count - prev_raw_count, &event->count);
390 }
391 
392 static void cstate_pmu_event_start(struct perf_event *event, int mode)
393 {
394 	local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
395 }
396 
397 static void cstate_pmu_event_stop(struct perf_event *event, int mode)
398 {
399 	cstate_pmu_event_update(event);
400 }
401 
402 static void cstate_pmu_event_del(struct perf_event *event, int mode)
403 {
404 	cstate_pmu_event_stop(event, PERF_EF_UPDATE);
405 }
406 
407 static int cstate_pmu_event_add(struct perf_event *event, int mode)
408 {
409 	if (mode & PERF_EF_START)
410 		cstate_pmu_event_start(event, mode);
411 
412 	return 0;
413 }
414 
415 /*
416  * Check if exiting cpu is the designated reader. If so migrate the
417  * events when there is a valid target available
418  */
419 static int cstate_cpu_exit(unsigned int cpu)
420 {
421 	unsigned int target;
422 
423 	if (has_cstate_core &&
424 	    cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
425 
426 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
427 		/* Migrate events if there is a valid target */
428 		if (target < nr_cpu_ids) {
429 			cpumask_set_cpu(target, &cstate_core_cpu_mask);
430 			perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
431 		}
432 	}
433 
434 	if (has_cstate_pkg &&
435 	    cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
436 
437 		target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
438 		/* Migrate events if there is a valid target */
439 		if (target < nr_cpu_ids) {
440 			cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
441 			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
442 		}
443 	}
444 
445 	if (has_cstate_module &&
446 	    cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {
447 
448 		target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
449 		/* Migrate events if there is a valid target */
450 		if (target < nr_cpu_ids) {
451 			cpumask_set_cpu(target, &cstate_module_cpu_mask);
452 			perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
453 		}
454 	}
455 	return 0;
456 }
457 
458 static int cstate_cpu_init(unsigned int cpu)
459 {
460 	unsigned int target;
461 
462 	/*
463 	 * If this is the first online thread of that core, set it in
464 	 * the core cpu mask as the designated reader.
465 	 */
466 	target = cpumask_any_and(&cstate_core_cpu_mask,
467 				 topology_sibling_cpumask(cpu));
468 
469 	if (has_cstate_core && target >= nr_cpu_ids)
470 		cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
471 
472 	/*
473 	 * If this is the first online thread of that package, set it
474 	 * in the package cpu mask as the designated reader.
475 	 */
476 	target = cpumask_any_and(&cstate_pkg_cpu_mask,
477 				 topology_die_cpumask(cpu));
478 	if (has_cstate_pkg && target >= nr_cpu_ids)
479 		cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
480 
481 	/*
482 	 * If this is the first online thread of that cluster, set it
483 	 * in the cluster cpu mask as the designated reader.
484 	 */
485 	target = cpumask_any_and(&cstate_module_cpu_mask,
486 				 topology_cluster_cpumask(cpu));
487 	if (has_cstate_module && target >= nr_cpu_ids)
488 		cpumask_set_cpu(cpu, &cstate_module_cpu_mask);
489 
490 	return 0;
491 }
492 
493 static const struct attribute_group *core_attr_update[] = {
494 	&group_cstate_core_c1,
495 	&group_cstate_core_c3,
496 	&group_cstate_core_c6,
497 	&group_cstate_core_c7,
498 	NULL,
499 };
500 
501 static const struct attribute_group *pkg_attr_update[] = {
502 	&group_cstate_pkg_c2,
503 	&group_cstate_pkg_c3,
504 	&group_cstate_pkg_c6,
505 	&group_cstate_pkg_c7,
506 	&group_cstate_pkg_c8,
507 	&group_cstate_pkg_c9,
508 	&group_cstate_pkg_c10,
509 	NULL,
510 };
511 
512 static const struct attribute_group *module_attr_update[] = {
513 	&group_cstate_module_c6,
514 	NULL
515 };
516 
517 static struct pmu cstate_core_pmu = {
518 	.attr_groups	= cstate_attr_groups,
519 	.attr_update	= core_attr_update,
520 	.name		= "cstate_core",
521 	.task_ctx_nr	= perf_invalid_context,
522 	.event_init	= cstate_pmu_event_init,
523 	.add		= cstate_pmu_event_add,
524 	.del		= cstate_pmu_event_del,
525 	.start		= cstate_pmu_event_start,
526 	.stop		= cstate_pmu_event_stop,
527 	.read		= cstate_pmu_event_update,
528 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
529 	.module		= THIS_MODULE,
530 };
531 
532 static struct pmu cstate_pkg_pmu = {
533 	.attr_groups	= cstate_attr_groups,
534 	.attr_update	= pkg_attr_update,
535 	.name		= "cstate_pkg",
536 	.task_ctx_nr	= perf_invalid_context,
537 	.event_init	= cstate_pmu_event_init,
538 	.add		= cstate_pmu_event_add,
539 	.del		= cstate_pmu_event_del,
540 	.start		= cstate_pmu_event_start,
541 	.stop		= cstate_pmu_event_stop,
542 	.read		= cstate_pmu_event_update,
543 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
544 	.module		= THIS_MODULE,
545 };
546 
547 static struct pmu cstate_module_pmu = {
548 	.attr_groups	= cstate_attr_groups,
549 	.attr_update	= module_attr_update,
550 	.name		= "cstate_module",
551 	.task_ctx_nr	= perf_invalid_context,
552 	.event_init	= cstate_pmu_event_init,
553 	.add		= cstate_pmu_event_add,
554 	.del		= cstate_pmu_event_del,
555 	.start		= cstate_pmu_event_start,
556 	.stop		= cstate_pmu_event_stop,
557 	.read		= cstate_pmu_event_update,
558 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
559 	.module		= THIS_MODULE,
560 };
561 
562 static const struct cstate_model nhm_cstates __initconst = {
563 	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
564 				  BIT(PERF_CSTATE_CORE_C6_RES),
565 
566 	.pkg_events		= BIT(PERF_CSTATE_PKG_C3_RES) |
567 				  BIT(PERF_CSTATE_PKG_C6_RES) |
568 				  BIT(PERF_CSTATE_PKG_C7_RES),
569 };
570 
571 static const struct cstate_model snb_cstates __initconst = {
572 	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
573 				  BIT(PERF_CSTATE_CORE_C6_RES) |
574 				  BIT(PERF_CSTATE_CORE_C7_RES),
575 
576 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
577 				  BIT(PERF_CSTATE_PKG_C3_RES) |
578 				  BIT(PERF_CSTATE_PKG_C6_RES) |
579 				  BIT(PERF_CSTATE_PKG_C7_RES),
580 };
581 
582 static const struct cstate_model hswult_cstates __initconst = {
583 	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
584 				  BIT(PERF_CSTATE_CORE_C6_RES) |
585 				  BIT(PERF_CSTATE_CORE_C7_RES),
586 
587 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
588 				  BIT(PERF_CSTATE_PKG_C3_RES) |
589 				  BIT(PERF_CSTATE_PKG_C6_RES) |
590 				  BIT(PERF_CSTATE_PKG_C7_RES) |
591 				  BIT(PERF_CSTATE_PKG_C8_RES) |
592 				  BIT(PERF_CSTATE_PKG_C9_RES) |
593 				  BIT(PERF_CSTATE_PKG_C10_RES),
594 };
595 
596 static const struct cstate_model cnl_cstates __initconst = {
597 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
598 				  BIT(PERF_CSTATE_CORE_C3_RES) |
599 				  BIT(PERF_CSTATE_CORE_C6_RES) |
600 				  BIT(PERF_CSTATE_CORE_C7_RES),
601 
602 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
603 				  BIT(PERF_CSTATE_PKG_C3_RES) |
604 				  BIT(PERF_CSTATE_PKG_C6_RES) |
605 				  BIT(PERF_CSTATE_PKG_C7_RES) |
606 				  BIT(PERF_CSTATE_PKG_C8_RES) |
607 				  BIT(PERF_CSTATE_PKG_C9_RES) |
608 				  BIT(PERF_CSTATE_PKG_C10_RES),
609 };
610 
611 static const struct cstate_model icl_cstates __initconst = {
612 	.core_events		= BIT(PERF_CSTATE_CORE_C6_RES) |
613 				  BIT(PERF_CSTATE_CORE_C7_RES),
614 
615 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
616 				  BIT(PERF_CSTATE_PKG_C3_RES) |
617 				  BIT(PERF_CSTATE_PKG_C6_RES) |
618 				  BIT(PERF_CSTATE_PKG_C7_RES) |
619 				  BIT(PERF_CSTATE_PKG_C8_RES) |
620 				  BIT(PERF_CSTATE_PKG_C9_RES) |
621 				  BIT(PERF_CSTATE_PKG_C10_RES),
622 };
623 
624 static const struct cstate_model icx_cstates __initconst = {
625 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
626 				  BIT(PERF_CSTATE_CORE_C6_RES),
627 
628 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
629 				  BIT(PERF_CSTATE_PKG_C6_RES),
630 };
631 
632 static const struct cstate_model adl_cstates __initconst = {
633 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
634 				  BIT(PERF_CSTATE_CORE_C6_RES) |
635 				  BIT(PERF_CSTATE_CORE_C7_RES),
636 
637 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
638 				  BIT(PERF_CSTATE_PKG_C3_RES) |
639 				  BIT(PERF_CSTATE_PKG_C6_RES) |
640 				  BIT(PERF_CSTATE_PKG_C7_RES) |
641 				  BIT(PERF_CSTATE_PKG_C8_RES) |
642 				  BIT(PERF_CSTATE_PKG_C9_RES) |
643 				  BIT(PERF_CSTATE_PKG_C10_RES),
644 };
645 
646 static const struct cstate_model slm_cstates __initconst = {
647 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
648 				  BIT(PERF_CSTATE_CORE_C6_RES),
649 
650 	.pkg_events		= BIT(PERF_CSTATE_PKG_C6_RES),
651 	.quirks			= SLM_PKG_C6_USE_C7_MSR,
652 };
653 
654 
655 static const struct cstate_model knl_cstates __initconst = {
656 	.core_events		= BIT(PERF_CSTATE_CORE_C6_RES),
657 
658 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
659 				  BIT(PERF_CSTATE_PKG_C3_RES) |
660 				  BIT(PERF_CSTATE_PKG_C6_RES),
661 	.quirks			= KNL_CORE_C6_MSR,
662 };
663 
664 
665 static const struct cstate_model glm_cstates __initconst = {
666 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
667 				  BIT(PERF_CSTATE_CORE_C3_RES) |
668 				  BIT(PERF_CSTATE_CORE_C6_RES),
669 
670 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
671 				  BIT(PERF_CSTATE_PKG_C3_RES) |
672 				  BIT(PERF_CSTATE_PKG_C6_RES) |
673 				  BIT(PERF_CSTATE_PKG_C10_RES),
674 };
675 
676 static const struct cstate_model grr_cstates __initconst = {
677 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
678 				  BIT(PERF_CSTATE_CORE_C6_RES),
679 
680 	.module_events		= BIT(PERF_CSTATE_MODULE_C6_RES),
681 };
682 
683 static const struct cstate_model srf_cstates __initconst = {
684 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
685 				  BIT(PERF_CSTATE_CORE_C6_RES),
686 
687 	.pkg_events		= BIT(PERF_CSTATE_PKG_C6_RES),
688 
689 	.module_events		= BIT(PERF_CSTATE_MODULE_C6_RES),
690 };
691 
692 
693 static const struct x86_cpu_id intel_cstates_match[] __initconst = {
694 	X86_MATCH_VFM(INTEL_NEHALEM,		&nhm_cstates),
695 	X86_MATCH_VFM(INTEL_NEHALEM_EP,		&nhm_cstates),
696 	X86_MATCH_VFM(INTEL_NEHALEM_EX,		&nhm_cstates),
697 
698 	X86_MATCH_VFM(INTEL_WESTMERE,		&nhm_cstates),
699 	X86_MATCH_VFM(INTEL_WESTMERE_EP,	&nhm_cstates),
700 	X86_MATCH_VFM(INTEL_WESTMERE_EX,	&nhm_cstates),
701 
702 	X86_MATCH_VFM(INTEL_SANDYBRIDGE,	&snb_cstates),
703 	X86_MATCH_VFM(INTEL_SANDYBRIDGE_X,	&snb_cstates),
704 
705 	X86_MATCH_VFM(INTEL_IVYBRIDGE,		&snb_cstates),
706 	X86_MATCH_VFM(INTEL_IVYBRIDGE_X,	&snb_cstates),
707 
708 	X86_MATCH_VFM(INTEL_HASWELL,		&snb_cstates),
709 	X86_MATCH_VFM(INTEL_HASWELL_X,		&snb_cstates),
710 	X86_MATCH_VFM(INTEL_HASWELL_G,		&snb_cstates),
711 
712 	X86_MATCH_VFM(INTEL_HASWELL_L,		&hswult_cstates),
713 
714 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT,	&slm_cstates),
715 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D,	&slm_cstates),
716 	X86_MATCH_VFM(INTEL_ATOM_AIRMONT,	&slm_cstates),
717 
718 	X86_MATCH_VFM(INTEL_BROADWELL,		&snb_cstates),
719 	X86_MATCH_VFM(INTEL_BROADWELL_D,	&snb_cstates),
720 	X86_MATCH_VFM(INTEL_BROADWELL_G,	&snb_cstates),
721 	X86_MATCH_VFM(INTEL_BROADWELL_X,	&snb_cstates),
722 
723 	X86_MATCH_VFM(INTEL_SKYLAKE_L,		&snb_cstates),
724 	X86_MATCH_VFM(INTEL_SKYLAKE,		&snb_cstates),
725 	X86_MATCH_VFM(INTEL_SKYLAKE_X,		&snb_cstates),
726 
727 	X86_MATCH_VFM(INTEL_KABYLAKE_L,		&hswult_cstates),
728 	X86_MATCH_VFM(INTEL_KABYLAKE,		&hswult_cstates),
729 	X86_MATCH_VFM(INTEL_COMETLAKE_L,	&hswult_cstates),
730 	X86_MATCH_VFM(INTEL_COMETLAKE,		&hswult_cstates),
731 
732 	X86_MATCH_VFM(INTEL_CANNONLAKE_L,	&cnl_cstates),
733 
734 	X86_MATCH_VFM(INTEL_XEON_PHI_KNL,	&knl_cstates),
735 	X86_MATCH_VFM(INTEL_XEON_PHI_KNM,	&knl_cstates),
736 
737 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT,	&glm_cstates),
738 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D,	&glm_cstates),
739 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS,	&glm_cstates),
740 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_D,	&glm_cstates),
741 	X86_MATCH_VFM(INTEL_ATOM_TREMONT,	&glm_cstates),
742 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_L,	&glm_cstates),
743 	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,	&adl_cstates),
744 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X,	&srf_cstates),
745 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT,	&grr_cstates),
746 
747 	X86_MATCH_VFM(INTEL_ICELAKE_L,		&icl_cstates),
748 	X86_MATCH_VFM(INTEL_ICELAKE,		&icl_cstates),
749 	X86_MATCH_VFM(INTEL_ICELAKE_X,		&icx_cstates),
750 	X86_MATCH_VFM(INTEL_ICELAKE_D,		&icx_cstates),
751 	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,	&icx_cstates),
752 	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,	&icx_cstates),
753 	X86_MATCH_VFM(INTEL_GRANITERAPIDS_X,	&icx_cstates),
754 	X86_MATCH_VFM(INTEL_GRANITERAPIDS_D,	&icx_cstates),
755 
756 	X86_MATCH_VFM(INTEL_TIGERLAKE_L,	&icl_cstates),
757 	X86_MATCH_VFM(INTEL_TIGERLAKE,		&icl_cstates),
758 	X86_MATCH_VFM(INTEL_ROCKETLAKE,		&icl_cstates),
759 	X86_MATCH_VFM(INTEL_ALDERLAKE,		&adl_cstates),
760 	X86_MATCH_VFM(INTEL_ALDERLAKE_L,	&adl_cstates),
761 	X86_MATCH_VFM(INTEL_RAPTORLAKE,		&adl_cstates),
762 	X86_MATCH_VFM(INTEL_RAPTORLAKE_P,	&adl_cstates),
763 	X86_MATCH_VFM(INTEL_RAPTORLAKE_S,	&adl_cstates),
764 	X86_MATCH_VFM(INTEL_METEORLAKE,		&adl_cstates),
765 	X86_MATCH_VFM(INTEL_METEORLAKE_L,	&adl_cstates),
766 	{ },
767 };
768 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
769 
770 static int __init cstate_probe(const struct cstate_model *cm)
771 {
772 	/* SLM has different MSR for PKG C6 */
773 	if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
774 		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
775 
776 	/* KNL has different MSR for CORE C6 */
777 	if (cm->quirks & KNL_CORE_C6_MSR)
778 		pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
779 
780 
781 	core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX,
782 				       true, (void *) &cm->core_events);
783 
784 	pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
785 				      true, (void *) &cm->pkg_events);
786 
787 	module_msr_mask = perf_msr_probe(module_msr, PERF_CSTATE_MODULE_EVENT_MAX,
788 				      true, (void *) &cm->module_events);
789 
790 	has_cstate_core = !!core_msr_mask;
791 	has_cstate_pkg  = !!pkg_msr_mask;
792 	has_cstate_module  = !!module_msr_mask;
793 
794 	return (has_cstate_core || has_cstate_pkg || has_cstate_module) ? 0 : -ENODEV;
795 }
796 
797 static inline void cstate_cleanup(void)
798 {
799 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
800 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
801 
802 	if (has_cstate_core)
803 		perf_pmu_unregister(&cstate_core_pmu);
804 
805 	if (has_cstate_pkg)
806 		perf_pmu_unregister(&cstate_pkg_pmu);
807 
808 	if (has_cstate_module)
809 		perf_pmu_unregister(&cstate_module_pmu);
810 }
811 
812 static int __init cstate_init(void)
813 {
814 	int err;
815 
816 	cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
817 			  "perf/x86/cstate:starting", cstate_cpu_init, NULL);
818 	cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
819 			  "perf/x86/cstate:online", NULL, cstate_cpu_exit);
820 
821 	if (has_cstate_core) {
822 		err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
823 		if (err) {
824 			has_cstate_core = false;
825 			pr_info("Failed to register cstate core pmu\n");
826 			cstate_cleanup();
827 			return err;
828 		}
829 	}
830 
831 	if (has_cstate_pkg) {
832 		if (topology_max_dies_per_package() > 1) {
833 			err = perf_pmu_register(&cstate_pkg_pmu,
834 						"cstate_die", -1);
835 		} else {
836 			err = perf_pmu_register(&cstate_pkg_pmu,
837 						cstate_pkg_pmu.name, -1);
838 		}
839 		if (err) {
840 			has_cstate_pkg = false;
841 			pr_info("Failed to register cstate pkg pmu\n");
842 			cstate_cleanup();
843 			return err;
844 		}
845 	}
846 
847 	if (has_cstate_module) {
848 		err = perf_pmu_register(&cstate_module_pmu, cstate_module_pmu.name, -1);
849 		if (err) {
850 			has_cstate_module = false;
851 			pr_info("Failed to register cstate cluster pmu\n");
852 			cstate_cleanup();
853 			return err;
854 		}
855 	}
856 	return 0;
857 }
858 
859 static int __init cstate_pmu_init(void)
860 {
861 	const struct x86_cpu_id *id;
862 	int err;
863 
864 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
865 		return -ENODEV;
866 
867 	id = x86_match_cpu(intel_cstates_match);
868 	if (!id)
869 		return -ENODEV;
870 
871 	err = cstate_probe((const struct cstate_model *) id->driver_data);
872 	if (err)
873 		return err;
874 
875 	return cstate_init();
876 }
877 module_init(cstate_pmu_init);
878 
879 static void __exit cstate_pmu_exit(void)
880 {
881 	cstate_cleanup();
882 }
883 module_exit(cstate_pmu_exit);
884