xref: /linux/drivers/idle/intel_idle.c (revision 14ebe69091935d65d9dc452c1bad8fed20d06c29)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9 
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16 
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *	for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *	(in contrast to entering ACPI C3, in which case the WBINVD
27  *	instruction needs to be executed to flush the caches)
28  */
29 
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38 
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41 
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43 
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/sched/smt.h>
51 #include <linux/notifier.h>
52 #include <linux/cpu.h>
53 #include <linux/moduleparam.h>
54 #include <asm/cpuid.h>
55 #include <asm/cpu_device_id.h>
56 #include <asm/intel-family.h>
57 #include <asm/mwait.h>
58 #include <asm/spec-ctrl.h>
59 #include <asm/tsc.h>
60 #include <asm/fpu/api.h>
61 
62 #define INTEL_IDLE_VERSION "0.5.1"
63 
64 static struct cpuidle_driver intel_idle_driver = {
65 	.name = "intel_idle",
66 	.owner = THIS_MODULE,
67 };
68 /* intel_idle.max_cstate=0 disables driver */
69 static int max_cstate = CPUIDLE_STATE_MAX - 1;
70 static unsigned int disabled_states_mask __read_mostly;
71 static unsigned int preferred_states_mask __read_mostly;
72 static bool force_irq_on __read_mostly;
73 static bool ibrs_off __read_mostly;
74 
75 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
76 
77 static unsigned long auto_demotion_disable_flags;
78 
79 static enum {
80 	C1E_PROMOTION_PRESERVE,
81 	C1E_PROMOTION_ENABLE,
82 	C1E_PROMOTION_DISABLE
83 } c1e_promotion = C1E_PROMOTION_PRESERVE;
84 
85 struct idle_cpu {
86 	struct cpuidle_state *state_table;
87 
88 	/*
89 	 * Hardware C-state auto-demotion may not always be optimal.
90 	 * Indicate which enable bits to clear here.
91 	 */
92 	unsigned long auto_demotion_disable_flags;
93 	bool byt_auto_demotion_disable_flag;
94 	bool disable_promotion_to_c1e;
95 	bool use_acpi;
96 };
97 
98 static const struct idle_cpu *icpu __initdata;
99 static struct cpuidle_state *cpuidle_state_table __initdata;
100 
101 static unsigned int mwait_substates __initdata;
102 
103 /*
104  * Enable interrupts before entering the C-state. On some platforms and for
105  * some C-states, this may measurably decrease interrupt latency.
106  */
107 #define CPUIDLE_FLAG_IRQ_ENABLE		BIT(14)
108 
109 /*
110  * Enable this state by default even if the ACPI _CST does not list it.
111  */
112 #define CPUIDLE_FLAG_ALWAYS_ENABLE	BIT(15)
113 
114 /*
115  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
116  * above.
117  */
118 #define CPUIDLE_FLAG_IBRS		BIT(16)
119 
120 /*
121  * Initialize large xstate for the C6-state entrance.
122  */
123 #define CPUIDLE_FLAG_INIT_XSTATE	BIT(17)
124 
125 /*
126  * Ignore the sub-state when matching mwait hints between the ACPI _CST and
127  * custom tables.
128  */
129 #define CPUIDLE_FLAG_PARTIAL_HINT_MATCH	BIT(18)
130 
131 /*
132  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
133  * the C-state (top nibble) and sub-state (bottom nibble)
134  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
135  *
136  * We store the hint at the top of our "flags" for each state.
137  */
138 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
139 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
140 
__intel_idle(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index,bool irqoff)141 static __always_inline int __intel_idle(struct cpuidle_device *dev,
142 					struct cpuidle_driver *drv,
143 					int index, bool irqoff)
144 {
145 	struct cpuidle_state *state = &drv->states[index];
146 	unsigned long eax = flg2MWAIT(state->flags);
147 	unsigned long ecx = 1*irqoff; /* break on interrupt flag */
148 
149 	mwait_idle_with_hints(eax, ecx);
150 
151 	return index;
152 }
153 
154 /**
155  * intel_idle - Ask the processor to enter the given idle state.
156  * @dev: cpuidle device of the target CPU.
157  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
158  * @index: Target idle state index.
159  *
160  * Use the MWAIT instruction to notify the processor that the CPU represented by
161  * @dev is idle and it can try to enter the idle state corresponding to @index.
162  *
163  * If the local APIC timer is not known to be reliable in the target idle state,
164  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
165  *
166  * Must be called under local_irq_disable().
167  */
intel_idle(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)168 static __cpuidle int intel_idle(struct cpuidle_device *dev,
169 				struct cpuidle_driver *drv, int index)
170 {
171 	return __intel_idle(dev, drv, index, true);
172 }
173 
intel_idle_irq(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)174 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
175 				    struct cpuidle_driver *drv, int index)
176 {
177 	return __intel_idle(dev, drv, index, false);
178 }
179 
intel_idle_ibrs(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)180 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
181 				     struct cpuidle_driver *drv, int index)
182 {
183 	bool smt_active = sched_smt_active();
184 	u64 spec_ctrl = spec_ctrl_current();
185 	int ret;
186 
187 	if (smt_active)
188 		__update_spec_ctrl(0);
189 
190 	ret = __intel_idle(dev, drv, index, true);
191 
192 	if (smt_active)
193 		__update_spec_ctrl(spec_ctrl);
194 
195 	return ret;
196 }
197 
intel_idle_xstate(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)198 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
199 				       struct cpuidle_driver *drv, int index)
200 {
201 	fpu_idle_fpregs();
202 	return __intel_idle(dev, drv, index, true);
203 }
204 
205 /**
206  * intel_idle_s2idle - Ask the processor to enter the given idle state.
207  * @dev: cpuidle device of the target CPU.
208  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
209  * @index: Target idle state index.
210  *
211  * Use the MWAIT instruction to notify the processor that the CPU represented by
212  * @dev is idle and it can try to enter the idle state corresponding to @index.
213  *
214  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
215  * scheduler tick and suspended scheduler clock on the target CPU.
216  */
intel_idle_s2idle(struct cpuidle_device * dev,struct cpuidle_driver * drv,int index)217 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
218 				       struct cpuidle_driver *drv, int index)
219 {
220 	unsigned long ecx = 1; /* break on interrupt flag */
221 	struct cpuidle_state *state = &drv->states[index];
222 	unsigned long eax = flg2MWAIT(state->flags);
223 
224 	if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
225 		fpu_idle_fpregs();
226 
227 	mwait_idle_with_hints(eax, ecx);
228 
229 	return 0;
230 }
231 
232 /*
233  * States are indexed by the cstate number,
234  * which is also the index into the MWAIT hint array.
235  * Thus C0 is a dummy.
236  */
237 static struct cpuidle_state nehalem_cstates[] __initdata = {
238 	{
239 		.name = "C1",
240 		.desc = "MWAIT 0x00",
241 		.flags = MWAIT2flg(0x00),
242 		.exit_latency = 3,
243 		.target_residency = 6,
244 		.enter = &intel_idle,
245 		.enter_s2idle = intel_idle_s2idle, },
246 	{
247 		.name = "C1E",
248 		.desc = "MWAIT 0x01",
249 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
250 		.exit_latency = 10,
251 		.target_residency = 20,
252 		.enter = &intel_idle,
253 		.enter_s2idle = intel_idle_s2idle, },
254 	{
255 		.name = "C3",
256 		.desc = "MWAIT 0x10",
257 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
258 		.exit_latency = 20,
259 		.target_residency = 80,
260 		.enter = &intel_idle,
261 		.enter_s2idle = intel_idle_s2idle, },
262 	{
263 		.name = "C6",
264 		.desc = "MWAIT 0x20",
265 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
266 		.exit_latency = 200,
267 		.target_residency = 800,
268 		.enter = &intel_idle,
269 		.enter_s2idle = intel_idle_s2idle, },
270 	{
271 		.enter = NULL }
272 };
273 
274 static struct cpuidle_state snb_cstates[] __initdata = {
275 	{
276 		.name = "C1",
277 		.desc = "MWAIT 0x00",
278 		.flags = MWAIT2flg(0x00),
279 		.exit_latency = 2,
280 		.target_residency = 2,
281 		.enter = &intel_idle,
282 		.enter_s2idle = intel_idle_s2idle, },
283 	{
284 		.name = "C1E",
285 		.desc = "MWAIT 0x01",
286 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
287 		.exit_latency = 10,
288 		.target_residency = 20,
289 		.enter = &intel_idle,
290 		.enter_s2idle = intel_idle_s2idle, },
291 	{
292 		.name = "C3",
293 		.desc = "MWAIT 0x10",
294 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
295 		.exit_latency = 80,
296 		.target_residency = 211,
297 		.enter = &intel_idle,
298 		.enter_s2idle = intel_idle_s2idle, },
299 	{
300 		.name = "C6",
301 		.desc = "MWAIT 0x20",
302 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
303 		.exit_latency = 104,
304 		.target_residency = 345,
305 		.enter = &intel_idle,
306 		.enter_s2idle = intel_idle_s2idle, },
307 	{
308 		.name = "C7",
309 		.desc = "MWAIT 0x30",
310 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
311 		.exit_latency = 109,
312 		.target_residency = 345,
313 		.enter = &intel_idle,
314 		.enter_s2idle = intel_idle_s2idle, },
315 	{
316 		.enter = NULL }
317 };
318 
319 static struct cpuidle_state byt_cstates[] __initdata = {
320 	{
321 		.name = "C1",
322 		.desc = "MWAIT 0x00",
323 		.flags = MWAIT2flg(0x00),
324 		.exit_latency = 1,
325 		.target_residency = 1,
326 		.enter = &intel_idle,
327 		.enter_s2idle = intel_idle_s2idle, },
328 	{
329 		.name = "C6N",
330 		.desc = "MWAIT 0x58",
331 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
332 		.exit_latency = 300,
333 		.target_residency = 275,
334 		.enter = &intel_idle,
335 		.enter_s2idle = intel_idle_s2idle, },
336 	{
337 		.name = "C6S",
338 		.desc = "MWAIT 0x52",
339 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
340 		.exit_latency = 500,
341 		.target_residency = 560,
342 		.enter = &intel_idle,
343 		.enter_s2idle = intel_idle_s2idle, },
344 	{
345 		.name = "C7",
346 		.desc = "MWAIT 0x60",
347 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
348 		.exit_latency = 1200,
349 		.target_residency = 4000,
350 		.enter = &intel_idle,
351 		.enter_s2idle = intel_idle_s2idle, },
352 	{
353 		.name = "C7S",
354 		.desc = "MWAIT 0x64",
355 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
356 		.exit_latency = 10000,
357 		.target_residency = 20000,
358 		.enter = &intel_idle,
359 		.enter_s2idle = intel_idle_s2idle, },
360 	{
361 		.enter = NULL }
362 };
363 
364 static struct cpuidle_state cht_cstates[] __initdata = {
365 	{
366 		.name = "C1",
367 		.desc = "MWAIT 0x00",
368 		.flags = MWAIT2flg(0x00),
369 		.exit_latency = 1,
370 		.target_residency = 1,
371 		.enter = &intel_idle,
372 		.enter_s2idle = intel_idle_s2idle, },
373 	{
374 		.name = "C6N",
375 		.desc = "MWAIT 0x58",
376 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
377 		.exit_latency = 80,
378 		.target_residency = 275,
379 		.enter = &intel_idle,
380 		.enter_s2idle = intel_idle_s2idle, },
381 	{
382 		.name = "C6S",
383 		.desc = "MWAIT 0x52",
384 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
385 		.exit_latency = 200,
386 		.target_residency = 560,
387 		.enter = &intel_idle,
388 		.enter_s2idle = intel_idle_s2idle, },
389 	{
390 		.name = "C7",
391 		.desc = "MWAIT 0x60",
392 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
393 		.exit_latency = 1200,
394 		.target_residency = 4000,
395 		.enter = &intel_idle,
396 		.enter_s2idle = intel_idle_s2idle, },
397 	{
398 		.name = "C7S",
399 		.desc = "MWAIT 0x64",
400 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
401 		.exit_latency = 10000,
402 		.target_residency = 20000,
403 		.enter = &intel_idle,
404 		.enter_s2idle = intel_idle_s2idle, },
405 	{
406 		.enter = NULL }
407 };
408 
409 static struct cpuidle_state ivb_cstates[] __initdata = {
410 	{
411 		.name = "C1",
412 		.desc = "MWAIT 0x00",
413 		.flags = MWAIT2flg(0x00),
414 		.exit_latency = 1,
415 		.target_residency = 1,
416 		.enter = &intel_idle,
417 		.enter_s2idle = intel_idle_s2idle, },
418 	{
419 		.name = "C1E",
420 		.desc = "MWAIT 0x01",
421 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
422 		.exit_latency = 10,
423 		.target_residency = 20,
424 		.enter = &intel_idle,
425 		.enter_s2idle = intel_idle_s2idle, },
426 	{
427 		.name = "C3",
428 		.desc = "MWAIT 0x10",
429 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
430 		.exit_latency = 59,
431 		.target_residency = 156,
432 		.enter = &intel_idle,
433 		.enter_s2idle = intel_idle_s2idle, },
434 	{
435 		.name = "C6",
436 		.desc = "MWAIT 0x20",
437 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
438 		.exit_latency = 80,
439 		.target_residency = 300,
440 		.enter = &intel_idle,
441 		.enter_s2idle = intel_idle_s2idle, },
442 	{
443 		.name = "C7",
444 		.desc = "MWAIT 0x30",
445 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
446 		.exit_latency = 87,
447 		.target_residency = 300,
448 		.enter = &intel_idle,
449 		.enter_s2idle = intel_idle_s2idle, },
450 	{
451 		.enter = NULL }
452 };
453 
454 static struct cpuidle_state ivt_cstates[] __initdata = {
455 	{
456 		.name = "C1",
457 		.desc = "MWAIT 0x00",
458 		.flags = MWAIT2flg(0x00),
459 		.exit_latency = 1,
460 		.target_residency = 1,
461 		.enter = &intel_idle,
462 		.enter_s2idle = intel_idle_s2idle, },
463 	{
464 		.name = "C1E",
465 		.desc = "MWAIT 0x01",
466 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
467 		.exit_latency = 10,
468 		.target_residency = 80,
469 		.enter = &intel_idle,
470 		.enter_s2idle = intel_idle_s2idle, },
471 	{
472 		.name = "C3",
473 		.desc = "MWAIT 0x10",
474 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
475 		.exit_latency = 59,
476 		.target_residency = 156,
477 		.enter = &intel_idle,
478 		.enter_s2idle = intel_idle_s2idle, },
479 	{
480 		.name = "C6",
481 		.desc = "MWAIT 0x20",
482 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
483 		.exit_latency = 82,
484 		.target_residency = 300,
485 		.enter = &intel_idle,
486 		.enter_s2idle = intel_idle_s2idle, },
487 	{
488 		.enter = NULL }
489 };
490 
491 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
492 	{
493 		.name = "C1",
494 		.desc = "MWAIT 0x00",
495 		.flags = MWAIT2flg(0x00),
496 		.exit_latency = 1,
497 		.target_residency = 1,
498 		.enter = &intel_idle,
499 		.enter_s2idle = intel_idle_s2idle, },
500 	{
501 		.name = "C1E",
502 		.desc = "MWAIT 0x01",
503 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
504 		.exit_latency = 10,
505 		.target_residency = 250,
506 		.enter = &intel_idle,
507 		.enter_s2idle = intel_idle_s2idle, },
508 	{
509 		.name = "C3",
510 		.desc = "MWAIT 0x10",
511 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
512 		.exit_latency = 59,
513 		.target_residency = 300,
514 		.enter = &intel_idle,
515 		.enter_s2idle = intel_idle_s2idle, },
516 	{
517 		.name = "C6",
518 		.desc = "MWAIT 0x20",
519 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
520 		.exit_latency = 84,
521 		.target_residency = 400,
522 		.enter = &intel_idle,
523 		.enter_s2idle = intel_idle_s2idle, },
524 	{
525 		.enter = NULL }
526 };
527 
528 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
529 	{
530 		.name = "C1",
531 		.desc = "MWAIT 0x00",
532 		.flags = MWAIT2flg(0x00),
533 		.exit_latency = 1,
534 		.target_residency = 1,
535 		.enter = &intel_idle,
536 		.enter_s2idle = intel_idle_s2idle, },
537 	{
538 		.name = "C1E",
539 		.desc = "MWAIT 0x01",
540 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
541 		.exit_latency = 10,
542 		.target_residency = 500,
543 		.enter = &intel_idle,
544 		.enter_s2idle = intel_idle_s2idle, },
545 	{
546 		.name = "C3",
547 		.desc = "MWAIT 0x10",
548 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
549 		.exit_latency = 59,
550 		.target_residency = 600,
551 		.enter = &intel_idle,
552 		.enter_s2idle = intel_idle_s2idle, },
553 	{
554 		.name = "C6",
555 		.desc = "MWAIT 0x20",
556 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
557 		.exit_latency = 88,
558 		.target_residency = 700,
559 		.enter = &intel_idle,
560 		.enter_s2idle = intel_idle_s2idle, },
561 	{
562 		.enter = NULL }
563 };
564 
565 static struct cpuidle_state hsw_cstates[] __initdata = {
566 	{
567 		.name = "C1",
568 		.desc = "MWAIT 0x00",
569 		.flags = MWAIT2flg(0x00),
570 		.exit_latency = 2,
571 		.target_residency = 2,
572 		.enter = &intel_idle,
573 		.enter_s2idle = intel_idle_s2idle, },
574 	{
575 		.name = "C1E",
576 		.desc = "MWAIT 0x01",
577 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
578 		.exit_latency = 10,
579 		.target_residency = 20,
580 		.enter = &intel_idle,
581 		.enter_s2idle = intel_idle_s2idle, },
582 	{
583 		.name = "C3",
584 		.desc = "MWAIT 0x10",
585 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
586 		.exit_latency = 33,
587 		.target_residency = 100,
588 		.enter = &intel_idle,
589 		.enter_s2idle = intel_idle_s2idle, },
590 	{
591 		.name = "C6",
592 		.desc = "MWAIT 0x20",
593 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
594 		.exit_latency = 133,
595 		.target_residency = 400,
596 		.enter = &intel_idle,
597 		.enter_s2idle = intel_idle_s2idle, },
598 	{
599 		.name = "C7s",
600 		.desc = "MWAIT 0x32",
601 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
602 		.exit_latency = 166,
603 		.target_residency = 500,
604 		.enter = &intel_idle,
605 		.enter_s2idle = intel_idle_s2idle, },
606 	{
607 		.name = "C8",
608 		.desc = "MWAIT 0x40",
609 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
610 		.exit_latency = 300,
611 		.target_residency = 900,
612 		.enter = &intel_idle,
613 		.enter_s2idle = intel_idle_s2idle, },
614 	{
615 		.name = "C9",
616 		.desc = "MWAIT 0x50",
617 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
618 		.exit_latency = 600,
619 		.target_residency = 1800,
620 		.enter = &intel_idle,
621 		.enter_s2idle = intel_idle_s2idle, },
622 	{
623 		.name = "C10",
624 		.desc = "MWAIT 0x60",
625 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
626 		.exit_latency = 2600,
627 		.target_residency = 7700,
628 		.enter = &intel_idle,
629 		.enter_s2idle = intel_idle_s2idle, },
630 	{
631 		.enter = NULL }
632 };
633 static struct cpuidle_state bdw_cstates[] __initdata = {
634 	{
635 		.name = "C1",
636 		.desc = "MWAIT 0x00",
637 		.flags = MWAIT2flg(0x00),
638 		.exit_latency = 2,
639 		.target_residency = 2,
640 		.enter = &intel_idle,
641 		.enter_s2idle = intel_idle_s2idle, },
642 	{
643 		.name = "C1E",
644 		.desc = "MWAIT 0x01",
645 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
646 		.exit_latency = 10,
647 		.target_residency = 20,
648 		.enter = &intel_idle,
649 		.enter_s2idle = intel_idle_s2idle, },
650 	{
651 		.name = "C3",
652 		.desc = "MWAIT 0x10",
653 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
654 		.exit_latency = 40,
655 		.target_residency = 100,
656 		.enter = &intel_idle,
657 		.enter_s2idle = intel_idle_s2idle, },
658 	{
659 		.name = "C6",
660 		.desc = "MWAIT 0x20",
661 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
662 		.exit_latency = 133,
663 		.target_residency = 400,
664 		.enter = &intel_idle,
665 		.enter_s2idle = intel_idle_s2idle, },
666 	{
667 		.name = "C7s",
668 		.desc = "MWAIT 0x32",
669 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
670 		.exit_latency = 166,
671 		.target_residency = 500,
672 		.enter = &intel_idle,
673 		.enter_s2idle = intel_idle_s2idle, },
674 	{
675 		.name = "C8",
676 		.desc = "MWAIT 0x40",
677 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
678 		.exit_latency = 300,
679 		.target_residency = 900,
680 		.enter = &intel_idle,
681 		.enter_s2idle = intel_idle_s2idle, },
682 	{
683 		.name = "C9",
684 		.desc = "MWAIT 0x50",
685 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
686 		.exit_latency = 600,
687 		.target_residency = 1800,
688 		.enter = &intel_idle,
689 		.enter_s2idle = intel_idle_s2idle, },
690 	{
691 		.name = "C10",
692 		.desc = "MWAIT 0x60",
693 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
694 		.exit_latency = 2600,
695 		.target_residency = 7700,
696 		.enter = &intel_idle,
697 		.enter_s2idle = intel_idle_s2idle, },
698 	{
699 		.enter = NULL }
700 };
701 
702 static struct cpuidle_state skl_cstates[] __initdata = {
703 	{
704 		.name = "C1",
705 		.desc = "MWAIT 0x00",
706 		.flags = MWAIT2flg(0x00),
707 		.exit_latency = 2,
708 		.target_residency = 2,
709 		.enter = &intel_idle,
710 		.enter_s2idle = intel_idle_s2idle, },
711 	{
712 		.name = "C1E",
713 		.desc = "MWAIT 0x01",
714 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
715 		.exit_latency = 10,
716 		.target_residency = 20,
717 		.enter = &intel_idle,
718 		.enter_s2idle = intel_idle_s2idle, },
719 	{
720 		.name = "C3",
721 		.desc = "MWAIT 0x10",
722 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
723 		.exit_latency = 70,
724 		.target_residency = 100,
725 		.enter = &intel_idle,
726 		.enter_s2idle = intel_idle_s2idle, },
727 	{
728 		.name = "C6",
729 		.desc = "MWAIT 0x20",
730 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
731 		.exit_latency = 85,
732 		.target_residency = 200,
733 		.enter = &intel_idle,
734 		.enter_s2idle = intel_idle_s2idle, },
735 	{
736 		.name = "C7s",
737 		.desc = "MWAIT 0x33",
738 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
739 		.exit_latency = 124,
740 		.target_residency = 800,
741 		.enter = &intel_idle,
742 		.enter_s2idle = intel_idle_s2idle, },
743 	{
744 		.name = "C8",
745 		.desc = "MWAIT 0x40",
746 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
747 		.exit_latency = 200,
748 		.target_residency = 800,
749 		.enter = &intel_idle,
750 		.enter_s2idle = intel_idle_s2idle, },
751 	{
752 		.name = "C9",
753 		.desc = "MWAIT 0x50",
754 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
755 		.exit_latency = 480,
756 		.target_residency = 5000,
757 		.enter = &intel_idle,
758 		.enter_s2idle = intel_idle_s2idle, },
759 	{
760 		.name = "C10",
761 		.desc = "MWAIT 0x60",
762 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
763 		.exit_latency = 890,
764 		.target_residency = 5000,
765 		.enter = &intel_idle,
766 		.enter_s2idle = intel_idle_s2idle, },
767 	{
768 		.enter = NULL }
769 };
770 
771 static struct cpuidle_state skx_cstates[] __initdata = {
772 	{
773 		.name = "C1",
774 		.desc = "MWAIT 0x00",
775 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
776 		.exit_latency = 2,
777 		.target_residency = 2,
778 		.enter = &intel_idle,
779 		.enter_s2idle = intel_idle_s2idle, },
780 	{
781 		.name = "C1E",
782 		.desc = "MWAIT 0x01",
783 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
784 		.exit_latency = 10,
785 		.target_residency = 20,
786 		.enter = &intel_idle,
787 		.enter_s2idle = intel_idle_s2idle, },
788 	{
789 		.name = "C6",
790 		.desc = "MWAIT 0x20",
791 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
792 		.exit_latency = 133,
793 		.target_residency = 600,
794 		.enter = &intel_idle,
795 		.enter_s2idle = intel_idle_s2idle, },
796 	{
797 		.enter = NULL }
798 };
799 
800 static struct cpuidle_state icx_cstates[] __initdata = {
801 	{
802 		.name = "C1",
803 		.desc = "MWAIT 0x00",
804 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
805 		.exit_latency = 1,
806 		.target_residency = 1,
807 		.enter = &intel_idle,
808 		.enter_s2idle = intel_idle_s2idle, },
809 	{
810 		.name = "C1E",
811 		.desc = "MWAIT 0x01",
812 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
813 		.exit_latency = 4,
814 		.target_residency = 4,
815 		.enter = &intel_idle,
816 		.enter_s2idle = intel_idle_s2idle, },
817 	{
818 		.name = "C6",
819 		.desc = "MWAIT 0x20",
820 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
821 		.exit_latency = 170,
822 		.target_residency = 600,
823 		.enter = &intel_idle,
824 		.enter_s2idle = intel_idle_s2idle, },
825 	{
826 		.enter = NULL }
827 };
828 
829 /*
830  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
831  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
832  * But in this case there is effectively no C1, because C1 requests are
833  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
834  * and C1E requests end up with C1, so there is effectively no C1E.
835  *
836  * By default we enable C1E and disable C1 by marking it with
837  * 'CPUIDLE_FLAG_UNUSABLE'.
838  */
839 static struct cpuidle_state adl_cstates[] __initdata = {
840 	{
841 		.name = "C1",
842 		.desc = "MWAIT 0x00",
843 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
844 		.exit_latency = 1,
845 		.target_residency = 1,
846 		.enter = &intel_idle,
847 		.enter_s2idle = intel_idle_s2idle, },
848 	{
849 		.name = "C1E",
850 		.desc = "MWAIT 0x01",
851 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
852 		.exit_latency = 2,
853 		.target_residency = 4,
854 		.enter = &intel_idle,
855 		.enter_s2idle = intel_idle_s2idle, },
856 	{
857 		.name = "C6",
858 		.desc = "MWAIT 0x20",
859 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
860 		.exit_latency = 220,
861 		.target_residency = 600,
862 		.enter = &intel_idle,
863 		.enter_s2idle = intel_idle_s2idle, },
864 	{
865 		.name = "C8",
866 		.desc = "MWAIT 0x40",
867 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
868 		.exit_latency = 280,
869 		.target_residency = 800,
870 		.enter = &intel_idle,
871 		.enter_s2idle = intel_idle_s2idle, },
872 	{
873 		.name = "C10",
874 		.desc = "MWAIT 0x60",
875 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
876 		.exit_latency = 680,
877 		.target_residency = 2000,
878 		.enter = &intel_idle,
879 		.enter_s2idle = intel_idle_s2idle, },
880 	{
881 		.enter = NULL }
882 };
883 
884 static struct cpuidle_state adl_l_cstates[] __initdata = {
885 	{
886 		.name = "C1",
887 		.desc = "MWAIT 0x00",
888 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
889 		.exit_latency = 1,
890 		.target_residency = 1,
891 		.enter = &intel_idle,
892 		.enter_s2idle = intel_idle_s2idle, },
893 	{
894 		.name = "C1E",
895 		.desc = "MWAIT 0x01",
896 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
897 		.exit_latency = 2,
898 		.target_residency = 4,
899 		.enter = &intel_idle,
900 		.enter_s2idle = intel_idle_s2idle, },
901 	{
902 		.name = "C6",
903 		.desc = "MWAIT 0x20",
904 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
905 		.exit_latency = 170,
906 		.target_residency = 500,
907 		.enter = &intel_idle,
908 		.enter_s2idle = intel_idle_s2idle, },
909 	{
910 		.name = "C8",
911 		.desc = "MWAIT 0x40",
912 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
913 		.exit_latency = 200,
914 		.target_residency = 600,
915 		.enter = &intel_idle,
916 		.enter_s2idle = intel_idle_s2idle, },
917 	{
918 		.name = "C10",
919 		.desc = "MWAIT 0x60",
920 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
921 		.exit_latency = 230,
922 		.target_residency = 700,
923 		.enter = &intel_idle,
924 		.enter_s2idle = intel_idle_s2idle, },
925 	{
926 		.enter = NULL }
927 };
928 
929 static struct cpuidle_state mtl_l_cstates[] __initdata = {
930 	{
931 		.name = "C1E",
932 		.desc = "MWAIT 0x01",
933 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
934 		.exit_latency = 1,
935 		.target_residency = 1,
936 		.enter = &intel_idle,
937 		.enter_s2idle = intel_idle_s2idle, },
938 	{
939 		.name = "C6",
940 		.desc = "MWAIT 0x20",
941 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
942 		.exit_latency = 140,
943 		.target_residency = 420,
944 		.enter = &intel_idle,
945 		.enter_s2idle = intel_idle_s2idle, },
946 	{
947 		.name = "C10",
948 		.desc = "MWAIT 0x60",
949 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
950 		.exit_latency = 310,
951 		.target_residency = 930,
952 		.enter = &intel_idle,
953 		.enter_s2idle = intel_idle_s2idle, },
954 	{
955 		.enter = NULL }
956 };
957 
958 static struct cpuidle_state gmt_cstates[] __initdata = {
959 	{
960 		.name = "C1",
961 		.desc = "MWAIT 0x00",
962 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
963 		.exit_latency = 1,
964 		.target_residency = 1,
965 		.enter = &intel_idle,
966 		.enter_s2idle = intel_idle_s2idle, },
967 	{
968 		.name = "C1E",
969 		.desc = "MWAIT 0x01",
970 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
971 		.exit_latency = 2,
972 		.target_residency = 4,
973 		.enter = &intel_idle,
974 		.enter_s2idle = intel_idle_s2idle, },
975 	{
976 		.name = "C6",
977 		.desc = "MWAIT 0x20",
978 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
979 		.exit_latency = 195,
980 		.target_residency = 585,
981 		.enter = &intel_idle,
982 		.enter_s2idle = intel_idle_s2idle, },
983 	{
984 		.name = "C8",
985 		.desc = "MWAIT 0x40",
986 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
987 		.exit_latency = 260,
988 		.target_residency = 1040,
989 		.enter = &intel_idle,
990 		.enter_s2idle = intel_idle_s2idle, },
991 	{
992 		.name = "C10",
993 		.desc = "MWAIT 0x60",
994 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
995 		.exit_latency = 660,
996 		.target_residency = 1980,
997 		.enter = &intel_idle,
998 		.enter_s2idle = intel_idle_s2idle, },
999 	{
1000 		.enter = NULL }
1001 };
1002 
1003 static struct cpuidle_state spr_cstates[] __initdata = {
1004 	{
1005 		.name = "C1",
1006 		.desc = "MWAIT 0x00",
1007 		.flags = MWAIT2flg(0x00),
1008 		.exit_latency = 1,
1009 		.target_residency = 1,
1010 		.enter = &intel_idle,
1011 		.enter_s2idle = intel_idle_s2idle, },
1012 	{
1013 		.name = "C1E",
1014 		.desc = "MWAIT 0x01",
1015 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1016 		.exit_latency = 2,
1017 		.target_residency = 4,
1018 		.enter = &intel_idle,
1019 		.enter_s2idle = intel_idle_s2idle, },
1020 	{
1021 		.name = "C6",
1022 		.desc = "MWAIT 0x20",
1023 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1024 					   CPUIDLE_FLAG_INIT_XSTATE,
1025 		.exit_latency = 290,
1026 		.target_residency = 800,
1027 		.enter = &intel_idle,
1028 		.enter_s2idle = intel_idle_s2idle, },
1029 	{
1030 		.enter = NULL }
1031 };
1032 
1033 static struct cpuidle_state gnr_cstates[] __initdata = {
1034 	{
1035 		.name = "C1",
1036 		.desc = "MWAIT 0x00",
1037 		.flags = MWAIT2flg(0x00),
1038 		.exit_latency = 1,
1039 		.target_residency = 1,
1040 		.enter = &intel_idle,
1041 		.enter_s2idle = intel_idle_s2idle, },
1042 	{
1043 		.name = "C1E",
1044 		.desc = "MWAIT 0x01",
1045 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1046 		.exit_latency = 4,
1047 		.target_residency = 4,
1048 		.enter = &intel_idle,
1049 		.enter_s2idle = intel_idle_s2idle, },
1050 	{
1051 		.name = "C6",
1052 		.desc = "MWAIT 0x20",
1053 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1054 					   CPUIDLE_FLAG_INIT_XSTATE |
1055 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1056 		.exit_latency = 170,
1057 		.target_residency = 650,
1058 		.enter = &intel_idle,
1059 		.enter_s2idle = intel_idle_s2idle, },
1060 	{
1061 		.name = "C6P",
1062 		.desc = "MWAIT 0x21",
1063 		.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1064 					   CPUIDLE_FLAG_INIT_XSTATE |
1065 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1066 		.exit_latency = 210,
1067 		.target_residency = 1000,
1068 		.enter = &intel_idle,
1069 		.enter_s2idle = intel_idle_s2idle, },
1070 	{
1071 		.enter = NULL }
1072 };
1073 
1074 static struct cpuidle_state gnrd_cstates[] __initdata = {
1075 	{
1076 		.name = "C1",
1077 		.desc = "MWAIT 0x00",
1078 		.flags = MWAIT2flg(0x00),
1079 		.exit_latency = 1,
1080 		.target_residency = 1,
1081 		.enter = &intel_idle,
1082 		.enter_s2idle = intel_idle_s2idle, },
1083 	{
1084 		.name = "C1E",
1085 		.desc = "MWAIT 0x01",
1086 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1087 		.exit_latency = 4,
1088 		.target_residency = 4,
1089 		.enter = &intel_idle,
1090 		.enter_s2idle = intel_idle_s2idle, },
1091 	{
1092 		.name = "C6",
1093 		.desc = "MWAIT 0x20",
1094 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1095 					   CPUIDLE_FLAG_INIT_XSTATE |
1096 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1097 		.exit_latency = 220,
1098 		.target_residency = 650,
1099 		.enter = &intel_idle,
1100 		.enter_s2idle = intel_idle_s2idle, },
1101 	{
1102 		.name = "C6P",
1103 		.desc = "MWAIT 0x21",
1104 		.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1105 					   CPUIDLE_FLAG_INIT_XSTATE |
1106 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1107 		.exit_latency = 240,
1108 		.target_residency = 750,
1109 		.enter = &intel_idle,
1110 		.enter_s2idle = intel_idle_s2idle, },
1111 	{
1112 		.enter = NULL }
1113 };
1114 
1115 static struct cpuidle_state atom_cstates[] __initdata = {
1116 	{
1117 		.name = "C1E",
1118 		.desc = "MWAIT 0x00",
1119 		.flags = MWAIT2flg(0x00),
1120 		.exit_latency = 10,
1121 		.target_residency = 20,
1122 		.enter = &intel_idle,
1123 		.enter_s2idle = intel_idle_s2idle, },
1124 	{
1125 		.name = "C2",
1126 		.desc = "MWAIT 0x10",
1127 		.flags = MWAIT2flg(0x10),
1128 		.exit_latency = 20,
1129 		.target_residency = 80,
1130 		.enter = &intel_idle,
1131 		.enter_s2idle = intel_idle_s2idle, },
1132 	{
1133 		.name = "C4",
1134 		.desc = "MWAIT 0x30",
1135 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1136 		.exit_latency = 100,
1137 		.target_residency = 400,
1138 		.enter = &intel_idle,
1139 		.enter_s2idle = intel_idle_s2idle, },
1140 	{
1141 		.name = "C6",
1142 		.desc = "MWAIT 0x52",
1143 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1144 		.exit_latency = 140,
1145 		.target_residency = 560,
1146 		.enter = &intel_idle,
1147 		.enter_s2idle = intel_idle_s2idle, },
1148 	{
1149 		.enter = NULL }
1150 };
1151 static struct cpuidle_state tangier_cstates[] __initdata = {
1152 	{
1153 		.name = "C1",
1154 		.desc = "MWAIT 0x00",
1155 		.flags = MWAIT2flg(0x00),
1156 		.exit_latency = 1,
1157 		.target_residency = 4,
1158 		.enter = &intel_idle,
1159 		.enter_s2idle = intel_idle_s2idle, },
1160 	{
1161 		.name = "C4",
1162 		.desc = "MWAIT 0x30",
1163 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1164 		.exit_latency = 100,
1165 		.target_residency = 400,
1166 		.enter = &intel_idle,
1167 		.enter_s2idle = intel_idle_s2idle, },
1168 	{
1169 		.name = "C6",
1170 		.desc = "MWAIT 0x52",
1171 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1172 		.exit_latency = 140,
1173 		.target_residency = 560,
1174 		.enter = &intel_idle,
1175 		.enter_s2idle = intel_idle_s2idle, },
1176 	{
1177 		.name = "C7",
1178 		.desc = "MWAIT 0x60",
1179 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1180 		.exit_latency = 1200,
1181 		.target_residency = 4000,
1182 		.enter = &intel_idle,
1183 		.enter_s2idle = intel_idle_s2idle, },
1184 	{
1185 		.name = "C9",
1186 		.desc = "MWAIT 0x64",
1187 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1188 		.exit_latency = 10000,
1189 		.target_residency = 20000,
1190 		.enter = &intel_idle,
1191 		.enter_s2idle = intel_idle_s2idle, },
1192 	{
1193 		.enter = NULL }
1194 };
1195 static struct cpuidle_state avn_cstates[] __initdata = {
1196 	{
1197 		.name = "C1",
1198 		.desc = "MWAIT 0x00",
1199 		.flags = MWAIT2flg(0x00),
1200 		.exit_latency = 2,
1201 		.target_residency = 2,
1202 		.enter = &intel_idle,
1203 		.enter_s2idle = intel_idle_s2idle, },
1204 	{
1205 		.name = "C6",
1206 		.desc = "MWAIT 0x51",
1207 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1208 		.exit_latency = 15,
1209 		.target_residency = 45,
1210 		.enter = &intel_idle,
1211 		.enter_s2idle = intel_idle_s2idle, },
1212 	{
1213 		.enter = NULL }
1214 };
1215 static struct cpuidle_state knl_cstates[] __initdata = {
1216 	{
1217 		.name = "C1",
1218 		.desc = "MWAIT 0x00",
1219 		.flags = MWAIT2flg(0x00),
1220 		.exit_latency = 1,
1221 		.target_residency = 2,
1222 		.enter = &intel_idle,
1223 		.enter_s2idle = intel_idle_s2idle },
1224 	{
1225 		.name = "C6",
1226 		.desc = "MWAIT 0x10",
1227 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1228 		.exit_latency = 120,
1229 		.target_residency = 500,
1230 		.enter = &intel_idle,
1231 		.enter_s2idle = intel_idle_s2idle },
1232 	{
1233 		.enter = NULL }
1234 };
1235 
1236 static struct cpuidle_state bxt_cstates[] __initdata = {
1237 	{
1238 		.name = "C1",
1239 		.desc = "MWAIT 0x00",
1240 		.flags = MWAIT2flg(0x00),
1241 		.exit_latency = 2,
1242 		.target_residency = 2,
1243 		.enter = &intel_idle,
1244 		.enter_s2idle = intel_idle_s2idle, },
1245 	{
1246 		.name = "C1E",
1247 		.desc = "MWAIT 0x01",
1248 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1249 		.exit_latency = 10,
1250 		.target_residency = 20,
1251 		.enter = &intel_idle,
1252 		.enter_s2idle = intel_idle_s2idle, },
1253 	{
1254 		.name = "C6",
1255 		.desc = "MWAIT 0x20",
1256 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1257 		.exit_latency = 133,
1258 		.target_residency = 133,
1259 		.enter = &intel_idle,
1260 		.enter_s2idle = intel_idle_s2idle, },
1261 	{
1262 		.name = "C7s",
1263 		.desc = "MWAIT 0x31",
1264 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1265 		.exit_latency = 155,
1266 		.target_residency = 155,
1267 		.enter = &intel_idle,
1268 		.enter_s2idle = intel_idle_s2idle, },
1269 	{
1270 		.name = "C8",
1271 		.desc = "MWAIT 0x40",
1272 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1273 		.exit_latency = 1000,
1274 		.target_residency = 1000,
1275 		.enter = &intel_idle,
1276 		.enter_s2idle = intel_idle_s2idle, },
1277 	{
1278 		.name = "C9",
1279 		.desc = "MWAIT 0x50",
1280 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1281 		.exit_latency = 2000,
1282 		.target_residency = 2000,
1283 		.enter = &intel_idle,
1284 		.enter_s2idle = intel_idle_s2idle, },
1285 	{
1286 		.name = "C10",
1287 		.desc = "MWAIT 0x60",
1288 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1289 		.exit_latency = 10000,
1290 		.target_residency = 10000,
1291 		.enter = &intel_idle,
1292 		.enter_s2idle = intel_idle_s2idle, },
1293 	{
1294 		.enter = NULL }
1295 };
1296 
1297 static struct cpuidle_state dnv_cstates[] __initdata = {
1298 	{
1299 		.name = "C1",
1300 		.desc = "MWAIT 0x00",
1301 		.flags = MWAIT2flg(0x00),
1302 		.exit_latency = 2,
1303 		.target_residency = 2,
1304 		.enter = &intel_idle,
1305 		.enter_s2idle = intel_idle_s2idle, },
1306 	{
1307 		.name = "C1E",
1308 		.desc = "MWAIT 0x01",
1309 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1310 		.exit_latency = 10,
1311 		.target_residency = 20,
1312 		.enter = &intel_idle,
1313 		.enter_s2idle = intel_idle_s2idle, },
1314 	{
1315 		.name = "C6",
1316 		.desc = "MWAIT 0x20",
1317 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1318 		.exit_latency = 50,
1319 		.target_residency = 500,
1320 		.enter = &intel_idle,
1321 		.enter_s2idle = intel_idle_s2idle, },
1322 	{
1323 		.enter = NULL }
1324 };
1325 
1326 /*
1327  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1328  * C6, and this is indicated in the CPUID mwait leaf.
1329  */
1330 static struct cpuidle_state snr_cstates[] __initdata = {
1331 	{
1332 		.name = "C1",
1333 		.desc = "MWAIT 0x00",
1334 		.flags = MWAIT2flg(0x00),
1335 		.exit_latency = 2,
1336 		.target_residency = 2,
1337 		.enter = &intel_idle,
1338 		.enter_s2idle = intel_idle_s2idle, },
1339 	{
1340 		.name = "C1E",
1341 		.desc = "MWAIT 0x01",
1342 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1343 		.exit_latency = 15,
1344 		.target_residency = 25,
1345 		.enter = &intel_idle,
1346 		.enter_s2idle = intel_idle_s2idle, },
1347 	{
1348 		.name = "C6",
1349 		.desc = "MWAIT 0x20",
1350 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1351 		.exit_latency = 130,
1352 		.target_residency = 500,
1353 		.enter = &intel_idle,
1354 		.enter_s2idle = intel_idle_s2idle, },
1355 	{
1356 		.enter = NULL }
1357 };
1358 
1359 static struct cpuidle_state grr_cstates[] __initdata = {
1360 	{
1361 		.name = "C1",
1362 		.desc = "MWAIT 0x00",
1363 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1364 		.exit_latency = 1,
1365 		.target_residency = 1,
1366 		.enter = &intel_idle,
1367 		.enter_s2idle = intel_idle_s2idle, },
1368 	{
1369 		.name = "C1E",
1370 		.desc = "MWAIT 0x01",
1371 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1372 		.exit_latency = 2,
1373 		.target_residency = 10,
1374 		.enter = &intel_idle,
1375 		.enter_s2idle = intel_idle_s2idle, },
1376 	{
1377 		.name = "C6S",
1378 		.desc = "MWAIT 0x22",
1379 		.flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED,
1380 		.exit_latency = 140,
1381 		.target_residency = 500,
1382 		.enter = &intel_idle,
1383 		.enter_s2idle = intel_idle_s2idle, },
1384 	{
1385 		.enter = NULL }
1386 };
1387 
1388 static struct cpuidle_state srf_cstates[] __initdata = {
1389 	{
1390 		.name = "C1",
1391 		.desc = "MWAIT 0x00",
1392 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1393 		.exit_latency = 1,
1394 		.target_residency = 1,
1395 		.enter = &intel_idle,
1396 		.enter_s2idle = intel_idle_s2idle, },
1397 	{
1398 		.name = "C1E",
1399 		.desc = "MWAIT 0x01",
1400 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1401 		.exit_latency = 2,
1402 		.target_residency = 10,
1403 		.enter = &intel_idle,
1404 		.enter_s2idle = intel_idle_s2idle, },
1405 	{
1406 		.name = "C6S",
1407 		.desc = "MWAIT 0x22",
1408 		.flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED |
1409 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1410 		.exit_latency = 270,
1411 		.target_residency = 700,
1412 		.enter = &intel_idle,
1413 		.enter_s2idle = intel_idle_s2idle, },
1414 	{
1415 		.name = "C6SP",
1416 		.desc = "MWAIT 0x23",
1417 		.flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED |
1418 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1419 		.exit_latency = 310,
1420 		.target_residency = 900,
1421 		.enter = &intel_idle,
1422 		.enter_s2idle = intel_idle_s2idle, },
1423 	{
1424 		.enter = NULL }
1425 };
1426 
1427 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1428 	.state_table = nehalem_cstates,
1429 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1430 	.disable_promotion_to_c1e = true,
1431 };
1432 
1433 static const struct idle_cpu idle_cpu_nhx __initconst = {
1434 	.state_table = nehalem_cstates,
1435 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1436 	.disable_promotion_to_c1e = true,
1437 	.use_acpi = true,
1438 };
1439 
1440 static const struct idle_cpu idle_cpu_atom __initconst = {
1441 	.state_table = atom_cstates,
1442 };
1443 
1444 static const struct idle_cpu idle_cpu_tangier __initconst = {
1445 	.state_table = tangier_cstates,
1446 };
1447 
1448 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1449 	.state_table = atom_cstates,
1450 	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1451 };
1452 
1453 static const struct idle_cpu idle_cpu_snb __initconst = {
1454 	.state_table = snb_cstates,
1455 	.disable_promotion_to_c1e = true,
1456 };
1457 
1458 static const struct idle_cpu idle_cpu_snx __initconst = {
1459 	.state_table = snb_cstates,
1460 	.disable_promotion_to_c1e = true,
1461 	.use_acpi = true,
1462 };
1463 
1464 static const struct idle_cpu idle_cpu_byt __initconst = {
1465 	.state_table = byt_cstates,
1466 	.disable_promotion_to_c1e = true,
1467 	.byt_auto_demotion_disable_flag = true,
1468 };
1469 
1470 static const struct idle_cpu idle_cpu_cht __initconst = {
1471 	.state_table = cht_cstates,
1472 	.disable_promotion_to_c1e = true,
1473 	.byt_auto_demotion_disable_flag = true,
1474 };
1475 
1476 static const struct idle_cpu idle_cpu_ivb __initconst = {
1477 	.state_table = ivb_cstates,
1478 	.disable_promotion_to_c1e = true,
1479 };
1480 
1481 static const struct idle_cpu idle_cpu_ivt __initconst = {
1482 	.state_table = ivt_cstates,
1483 	.disable_promotion_to_c1e = true,
1484 	.use_acpi = true,
1485 };
1486 
1487 static const struct idle_cpu idle_cpu_hsw __initconst = {
1488 	.state_table = hsw_cstates,
1489 	.disable_promotion_to_c1e = true,
1490 };
1491 
1492 static const struct idle_cpu idle_cpu_hsx __initconst = {
1493 	.state_table = hsw_cstates,
1494 	.disable_promotion_to_c1e = true,
1495 	.use_acpi = true,
1496 };
1497 
1498 static const struct idle_cpu idle_cpu_bdw __initconst = {
1499 	.state_table = bdw_cstates,
1500 	.disable_promotion_to_c1e = true,
1501 };
1502 
1503 static const struct idle_cpu idle_cpu_bdx __initconst = {
1504 	.state_table = bdw_cstates,
1505 	.disable_promotion_to_c1e = true,
1506 	.use_acpi = true,
1507 };
1508 
1509 static const struct idle_cpu idle_cpu_skl __initconst = {
1510 	.state_table = skl_cstates,
1511 	.disable_promotion_to_c1e = true,
1512 };
1513 
1514 static const struct idle_cpu idle_cpu_skx __initconst = {
1515 	.state_table = skx_cstates,
1516 	.disable_promotion_to_c1e = true,
1517 	.use_acpi = true,
1518 };
1519 
1520 static const struct idle_cpu idle_cpu_icx __initconst = {
1521 	.state_table = icx_cstates,
1522 	.disable_promotion_to_c1e = true,
1523 	.use_acpi = true,
1524 };
1525 
1526 static const struct idle_cpu idle_cpu_adl __initconst = {
1527 	.state_table = adl_cstates,
1528 };
1529 
1530 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1531 	.state_table = adl_l_cstates,
1532 };
1533 
1534 static const struct idle_cpu idle_cpu_mtl_l __initconst = {
1535 	.state_table = mtl_l_cstates,
1536 };
1537 
1538 static const struct idle_cpu idle_cpu_gmt __initconst = {
1539 	.state_table = gmt_cstates,
1540 };
1541 
1542 static const struct idle_cpu idle_cpu_spr __initconst = {
1543 	.state_table = spr_cstates,
1544 	.disable_promotion_to_c1e = true,
1545 	.use_acpi = true,
1546 };
1547 
1548 static const struct idle_cpu idle_cpu_gnr __initconst = {
1549 	.state_table = gnr_cstates,
1550 	.disable_promotion_to_c1e = true,
1551 	.use_acpi = true,
1552 };
1553 
1554 static const struct idle_cpu idle_cpu_gnrd __initconst = {
1555 	.state_table = gnrd_cstates,
1556 	.disable_promotion_to_c1e = true,
1557 	.use_acpi = true,
1558 };
1559 
1560 static const struct idle_cpu idle_cpu_avn __initconst = {
1561 	.state_table = avn_cstates,
1562 	.disable_promotion_to_c1e = true,
1563 	.use_acpi = true,
1564 };
1565 
1566 static const struct idle_cpu idle_cpu_knl __initconst = {
1567 	.state_table = knl_cstates,
1568 	.use_acpi = true,
1569 };
1570 
1571 static const struct idle_cpu idle_cpu_bxt __initconst = {
1572 	.state_table = bxt_cstates,
1573 	.disable_promotion_to_c1e = true,
1574 };
1575 
1576 static const struct idle_cpu idle_cpu_dnv __initconst = {
1577 	.state_table = dnv_cstates,
1578 	.disable_promotion_to_c1e = true,
1579 	.use_acpi = true,
1580 };
1581 
1582 static const struct idle_cpu idle_cpu_tmt __initconst = {
1583 	.disable_promotion_to_c1e = true,
1584 };
1585 
1586 static const struct idle_cpu idle_cpu_snr __initconst = {
1587 	.state_table = snr_cstates,
1588 	.disable_promotion_to_c1e = true,
1589 	.use_acpi = true,
1590 };
1591 
1592 static const struct idle_cpu idle_cpu_grr __initconst = {
1593 	.state_table = grr_cstates,
1594 	.disable_promotion_to_c1e = true,
1595 	.use_acpi = true,
1596 };
1597 
1598 static const struct idle_cpu idle_cpu_srf __initconst = {
1599 	.state_table = srf_cstates,
1600 	.disable_promotion_to_c1e = true,
1601 	.use_acpi = true,
1602 };
1603 
1604 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1605 	X86_MATCH_VFM(INTEL_NEHALEM_EP,		&idle_cpu_nhx),
1606 	X86_MATCH_VFM(INTEL_NEHALEM,		&idle_cpu_nehalem),
1607 	X86_MATCH_VFM(INTEL_NEHALEM_G,		&idle_cpu_nehalem),
1608 	X86_MATCH_VFM(INTEL_WESTMERE,		&idle_cpu_nehalem),
1609 	X86_MATCH_VFM(INTEL_WESTMERE_EP,	&idle_cpu_nhx),
1610 	X86_MATCH_VFM(INTEL_NEHALEM_EX,		&idle_cpu_nhx),
1611 	X86_MATCH_VFM(INTEL_ATOM_BONNELL,	&idle_cpu_atom),
1612 	X86_MATCH_VFM(INTEL_ATOM_BONNELL_MID,	&idle_cpu_lincroft),
1613 	X86_MATCH_VFM(INTEL_WESTMERE_EX,	&idle_cpu_nhx),
1614 	X86_MATCH_VFM(INTEL_SANDYBRIDGE,	&idle_cpu_snb),
1615 	X86_MATCH_VFM(INTEL_SANDYBRIDGE_X,	&idle_cpu_snx),
1616 	X86_MATCH_VFM(INTEL_ATOM_SALTWELL,	&idle_cpu_atom),
1617 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT,	&idle_cpu_byt),
1618 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1619 	X86_MATCH_VFM(INTEL_ATOM_AIRMONT,	&idle_cpu_cht),
1620 	X86_MATCH_VFM(INTEL_IVYBRIDGE,		&idle_cpu_ivb),
1621 	X86_MATCH_VFM(INTEL_IVYBRIDGE_X,	&idle_cpu_ivt),
1622 	X86_MATCH_VFM(INTEL_HASWELL,		&idle_cpu_hsw),
1623 	X86_MATCH_VFM(INTEL_HASWELL_X,		&idle_cpu_hsx),
1624 	X86_MATCH_VFM(INTEL_HASWELL_L,		&idle_cpu_hsw),
1625 	X86_MATCH_VFM(INTEL_HASWELL_G,		&idle_cpu_hsw),
1626 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D,	&idle_cpu_avn),
1627 	X86_MATCH_VFM(INTEL_BROADWELL,		&idle_cpu_bdw),
1628 	X86_MATCH_VFM(INTEL_BROADWELL_G,	&idle_cpu_bdw),
1629 	X86_MATCH_VFM(INTEL_BROADWELL_X,	&idle_cpu_bdx),
1630 	X86_MATCH_VFM(INTEL_BROADWELL_D,	&idle_cpu_bdx),
1631 	X86_MATCH_VFM(INTEL_SKYLAKE_L,		&idle_cpu_skl),
1632 	X86_MATCH_VFM(INTEL_SKYLAKE,		&idle_cpu_skl),
1633 	X86_MATCH_VFM(INTEL_KABYLAKE_L,		&idle_cpu_skl),
1634 	X86_MATCH_VFM(INTEL_KABYLAKE,		&idle_cpu_skl),
1635 	X86_MATCH_VFM(INTEL_SKYLAKE_X,		&idle_cpu_skx),
1636 	X86_MATCH_VFM(INTEL_ICELAKE_X,		&idle_cpu_icx),
1637 	X86_MATCH_VFM(INTEL_ICELAKE_D,		&idle_cpu_icx),
1638 	X86_MATCH_VFM(INTEL_ALDERLAKE,		&idle_cpu_adl),
1639 	X86_MATCH_VFM(INTEL_ALDERLAKE_L,	&idle_cpu_adl_l),
1640 	X86_MATCH_VFM(INTEL_METEORLAKE_L,	&idle_cpu_mtl_l),
1641 	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,	&idle_cpu_gmt),
1642 	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,	&idle_cpu_spr),
1643 	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,	&idle_cpu_spr),
1644 	X86_MATCH_VFM(INTEL_GRANITERAPIDS_X,	&idle_cpu_gnr),
1645 	X86_MATCH_VFM(INTEL_GRANITERAPIDS_D,	&idle_cpu_gnrd),
1646 	X86_MATCH_VFM(INTEL_XEON_PHI_KNL,	&idle_cpu_knl),
1647 	X86_MATCH_VFM(INTEL_XEON_PHI_KNM,	&idle_cpu_knl),
1648 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT,	&idle_cpu_bxt),
1649 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
1650 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D,	&idle_cpu_dnv),
1651 	X86_MATCH_VFM(INTEL_ATOM_TREMONT,       &idle_cpu_tmt),
1652 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_L,     &idle_cpu_tmt),
1653 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_D,	&idle_cpu_snr),
1654 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT,	&idle_cpu_grr),
1655 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X,	&idle_cpu_srf),
1656 	X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X,	&idle_cpu_srf),
1657 	{}
1658 };
1659 
1660 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1661 	X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1662 	{}
1663 };
1664 
intel_idle_max_cstate_reached(int cstate)1665 static bool __init intel_idle_max_cstate_reached(int cstate)
1666 {
1667 	if (cstate + 1 > max_cstate) {
1668 		pr_info("max_cstate %d reached\n", max_cstate);
1669 		return true;
1670 	}
1671 	return false;
1672 }
1673 
intel_idle_state_needs_timer_stop(struct cpuidle_state * state)1674 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1675 {
1676 	unsigned long eax = flg2MWAIT(state->flags);
1677 
1678 	if (boot_cpu_has(X86_FEATURE_ARAT))
1679 		return false;
1680 
1681 	/*
1682 	 * Switch over to one-shot tick broadcast if the target C-state
1683 	 * is deeper than C1.
1684 	 */
1685 	return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1686 }
1687 
1688 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1689 #include <acpi/processor.h>
1690 
1691 static bool no_acpi __read_mostly;
1692 module_param(no_acpi, bool, 0444);
1693 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1694 
1695 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1696 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1697 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1698 
1699 static struct acpi_processor_power acpi_state_table __initdata;
1700 
1701 /**
1702  * intel_idle_cst_usable - Check if the _CST information can be used.
1703  *
1704  * Check if all of the C-states listed by _CST in the max_cstate range are
1705  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1706  */
intel_idle_cst_usable(void)1707 static bool __init intel_idle_cst_usable(void)
1708 {
1709 	int cstate, limit;
1710 
1711 	limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1712 		      acpi_state_table.count);
1713 
1714 	for (cstate = 1; cstate < limit; cstate++) {
1715 		struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1716 
1717 		if (cx->entry_method != ACPI_CSTATE_FFH)
1718 			return false;
1719 	}
1720 
1721 	return true;
1722 }
1723 
intel_idle_acpi_cst_extract(void)1724 static bool __init intel_idle_acpi_cst_extract(void)
1725 {
1726 	unsigned int cpu;
1727 
1728 	if (no_acpi) {
1729 		pr_debug("Not allowed to use ACPI _CST\n");
1730 		return false;
1731 	}
1732 
1733 	for_each_possible_cpu(cpu) {
1734 		struct acpi_processor *pr = per_cpu(processors, cpu);
1735 
1736 		if (!pr)
1737 			continue;
1738 
1739 		if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1740 			continue;
1741 
1742 		acpi_state_table.count++;
1743 
1744 		if (!intel_idle_cst_usable())
1745 			continue;
1746 
1747 		if (!acpi_processor_claim_cst_control())
1748 			break;
1749 
1750 		return true;
1751 	}
1752 
1753 	acpi_state_table.count = 0;
1754 	pr_debug("ACPI _CST not found or not usable\n");
1755 	return false;
1756 }
1757 
intel_idle_init_cstates_acpi(struct cpuidle_driver * drv)1758 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1759 {
1760 	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1761 
1762 	/*
1763 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1764 	 * the interesting states are ACPI_CSTATE_FFH.
1765 	 */
1766 	for (cstate = 1; cstate < limit; cstate++) {
1767 		struct acpi_processor_cx *cx;
1768 		struct cpuidle_state *state;
1769 
1770 		if (intel_idle_max_cstate_reached(cstate - 1))
1771 			break;
1772 
1773 		cx = &acpi_state_table.states[cstate];
1774 
1775 		state = &drv->states[drv->state_count++];
1776 
1777 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1778 		strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1779 		state->exit_latency = cx->latency;
1780 		/*
1781 		 * For C1-type C-states use the same number for both the exit
1782 		 * latency and target residency, because that is the case for
1783 		 * C1 in the majority of the static C-states tables above.
1784 		 * For the other types of C-states, however, set the target
1785 		 * residency to 3 times the exit latency which should lead to
1786 		 * a reasonable balance between energy-efficiency and
1787 		 * performance in the majority of interesting cases.
1788 		 */
1789 		state->target_residency = cx->latency;
1790 		if (cx->type > ACPI_STATE_C1)
1791 			state->target_residency *= 3;
1792 
1793 		state->flags = MWAIT2flg(cx->address);
1794 		if (cx->type > ACPI_STATE_C2)
1795 			state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1796 
1797 		if (disabled_states_mask & BIT(cstate))
1798 			state->flags |= CPUIDLE_FLAG_OFF;
1799 
1800 		if (intel_idle_state_needs_timer_stop(state))
1801 			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1802 
1803 		if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1804 			mark_tsc_unstable("TSC halts in idle");
1805 
1806 		state->enter = intel_idle;
1807 		state->enter_s2idle = intel_idle_s2idle;
1808 	}
1809 }
1810 
intel_idle_off_by_default(unsigned int flags,u32 mwait_hint)1811 static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1812 {
1813 	int cstate, limit;
1814 
1815 	/*
1816 	 * If there are no _CST C-states, do not disable any C-states by
1817 	 * default.
1818 	 */
1819 	if (!acpi_state_table.count)
1820 		return false;
1821 
1822 	limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1823 	/*
1824 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1825 	 * the interesting states are ACPI_CSTATE_FFH.
1826 	 */
1827 	for (cstate = 1; cstate < limit; cstate++) {
1828 		u32 acpi_hint = acpi_state_table.states[cstate].address;
1829 		u32 table_hint = mwait_hint;
1830 
1831 		if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) {
1832 			acpi_hint &= ~MWAIT_SUBSTATE_MASK;
1833 			table_hint &= ~MWAIT_SUBSTATE_MASK;
1834 		}
1835 
1836 		if (acpi_hint == table_hint)
1837 			return false;
1838 	}
1839 	return true;
1840 }
1841 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1842 #define force_use_acpi	(false)
1843 
intel_idle_acpi_cst_extract(void)1844 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
intel_idle_init_cstates_acpi(struct cpuidle_driver * drv)1845 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
intel_idle_off_by_default(unsigned int flags,u32 mwait_hint)1846 static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1847 {
1848 	return false;
1849 }
1850 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1851 
1852 /**
1853  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1854  *
1855  * Tune IVT multi-socket targets.
1856  * Assumption: num_sockets == (max_package_num + 1).
1857  */
ivt_idle_state_table_update(void)1858 static void __init ivt_idle_state_table_update(void)
1859 {
1860 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1861 	int cpu, package_num, num_sockets = 1;
1862 
1863 	for_each_online_cpu(cpu) {
1864 		package_num = topology_physical_package_id(cpu);
1865 		if (package_num + 1 > num_sockets) {
1866 			num_sockets = package_num + 1;
1867 
1868 			if (num_sockets > 4) {
1869 				cpuidle_state_table = ivt_cstates_8s;
1870 				return;
1871 			}
1872 		}
1873 	}
1874 
1875 	if (num_sockets > 2)
1876 		cpuidle_state_table = ivt_cstates_4s;
1877 
1878 	/* else, 1 and 2 socket systems use default ivt_cstates */
1879 }
1880 
1881 /**
1882  * irtl_2_usec - IRTL to microseconds conversion.
1883  * @irtl: IRTL MSR value.
1884  *
1885  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1886  */
irtl_2_usec(unsigned long long irtl)1887 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1888 {
1889 	static const unsigned int irtl_ns_units[] __initconst = {
1890 		1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1891 	};
1892 	unsigned long long ns;
1893 
1894 	if (!irtl)
1895 		return 0;
1896 
1897 	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1898 
1899 	return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1900 }
1901 
1902 /**
1903  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1904  *
1905  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1906  * definitive maximum latency and use the same value for target_residency.
1907  */
bxt_idle_state_table_update(void)1908 static void __init bxt_idle_state_table_update(void)
1909 {
1910 	unsigned long long msr;
1911 	unsigned int usec;
1912 
1913 	rdmsrl(MSR_PKGC6_IRTL, msr);
1914 	usec = irtl_2_usec(msr);
1915 	if (usec) {
1916 		bxt_cstates[2].exit_latency = usec;
1917 		bxt_cstates[2].target_residency = usec;
1918 	}
1919 
1920 	rdmsrl(MSR_PKGC7_IRTL, msr);
1921 	usec = irtl_2_usec(msr);
1922 	if (usec) {
1923 		bxt_cstates[3].exit_latency = usec;
1924 		bxt_cstates[3].target_residency = usec;
1925 	}
1926 
1927 	rdmsrl(MSR_PKGC8_IRTL, msr);
1928 	usec = irtl_2_usec(msr);
1929 	if (usec) {
1930 		bxt_cstates[4].exit_latency = usec;
1931 		bxt_cstates[4].target_residency = usec;
1932 	}
1933 
1934 	rdmsrl(MSR_PKGC9_IRTL, msr);
1935 	usec = irtl_2_usec(msr);
1936 	if (usec) {
1937 		bxt_cstates[5].exit_latency = usec;
1938 		bxt_cstates[5].target_residency = usec;
1939 	}
1940 
1941 	rdmsrl(MSR_PKGC10_IRTL, msr);
1942 	usec = irtl_2_usec(msr);
1943 	if (usec) {
1944 		bxt_cstates[6].exit_latency = usec;
1945 		bxt_cstates[6].target_residency = usec;
1946 	}
1947 
1948 }
1949 
1950 /**
1951  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1952  *
1953  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1954  */
sklh_idle_state_table_update(void)1955 static void __init sklh_idle_state_table_update(void)
1956 {
1957 	unsigned long long msr;
1958 	unsigned int eax, ebx, ecx, edx;
1959 
1960 
1961 	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1962 	if (max_cstate <= 7)
1963 		return;
1964 
1965 	/* if PC10 not present in CPUID.MWAIT.EDX */
1966 	if ((mwait_substates & (0xF << 28)) == 0)
1967 		return;
1968 
1969 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1970 
1971 	/* PC10 is not enabled in PKG C-state limit */
1972 	if ((msr & 0xF) != 8)
1973 		return;
1974 
1975 	ecx = 0;
1976 	cpuid(7, &eax, &ebx, &ecx, &edx);
1977 
1978 	/* if SGX is present */
1979 	if (ebx & (1 << 2)) {
1980 
1981 		rdmsrl(MSR_IA32_FEAT_CTL, msr);
1982 
1983 		/* if SGX is enabled */
1984 		if (msr & (1 << 18))
1985 			return;
1986 	}
1987 
1988 	skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C8-SKL */
1989 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
1990 }
1991 
1992 /**
1993  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1994  * idle states table.
1995  */
skx_idle_state_table_update(void)1996 static void __init skx_idle_state_table_update(void)
1997 {
1998 	unsigned long long msr;
1999 
2000 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
2001 
2002 	/*
2003 	 * 000b: C0/C1 (no package C-state support)
2004 	 * 001b: C2
2005 	 * 010b: C6 (non-retention)
2006 	 * 011b: C6 (retention)
2007 	 * 111b: No Package C state limits.
2008 	 */
2009 	if ((msr & 0x7) < 2) {
2010 		/*
2011 		 * Uses the CC6 + PC0 latency and 3 times of
2012 		 * latency for target_residency if the PC6
2013 		 * is disabled in BIOS. This is consistent
2014 		 * with how intel_idle driver uses _CST
2015 		 * to set the target_residency.
2016 		 */
2017 		skx_cstates[2].exit_latency = 92;
2018 		skx_cstates[2].target_residency = 276;
2019 	}
2020 }
2021 
2022 /**
2023  * adl_idle_state_table_update - Adjust AlderLake idle states table.
2024  */
adl_idle_state_table_update(void)2025 static void __init adl_idle_state_table_update(void)
2026 {
2027 	/* Check if user prefers C1 over C1E. */
2028 	if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
2029 		cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
2030 		cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
2031 
2032 		/* Disable C1E by clearing the "C1E promotion" bit. */
2033 		c1e_promotion = C1E_PROMOTION_DISABLE;
2034 		return;
2035 	}
2036 
2037 	/* Make sure C1E is enabled by default */
2038 	c1e_promotion = C1E_PROMOTION_ENABLE;
2039 }
2040 
2041 /**
2042  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
2043  */
spr_idle_state_table_update(void)2044 static void __init spr_idle_state_table_update(void)
2045 {
2046 	unsigned long long msr;
2047 
2048 	/*
2049 	 * By default, the C6 state assumes the worst-case scenario of package
2050 	 * C6. However, if PC6 is disabled, we update the numbers to match
2051 	 * core C6.
2052 	 */
2053 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
2054 
2055 	/* Limit value 2 and above allow for PC6. */
2056 	if ((msr & 0x7) < 2) {
2057 		spr_cstates[2].exit_latency = 190;
2058 		spr_cstates[2].target_residency = 600;
2059 	}
2060 }
2061 
intel_idle_verify_cstate(unsigned int mwait_hint)2062 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
2063 {
2064 	unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) &
2065 					MWAIT_CSTATE_MASK;
2066 	unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
2067 					MWAIT_SUBSTATE_MASK;
2068 
2069 	/* Ignore the C-state if there are NO sub-states in CPUID for it. */
2070 	if (num_substates == 0)
2071 		return false;
2072 
2073 	if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
2074 		mark_tsc_unstable("TSC halts in idle states deeper than C2");
2075 
2076 	return true;
2077 }
2078 
state_update_enter_method(struct cpuidle_state * state,int cstate)2079 static void state_update_enter_method(struct cpuidle_state *state, int cstate)
2080 {
2081 	if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
2082 		/*
2083 		 * Combining with XSTATE with IBRS or IRQ_ENABLE flags
2084 		 * is not currently supported but this driver.
2085 		 */
2086 		WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
2087 		WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2088 		state->enter = intel_idle_xstate;
2089 		return;
2090 	}
2091 
2092 	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
2093 			((state->flags & CPUIDLE_FLAG_IBRS) || ibrs_off)) {
2094 		/*
2095 		 * IBRS mitigation requires that C-states are entered
2096 		 * with interrupts disabled.
2097 		 */
2098 		if (ibrs_off && (state->flags & CPUIDLE_FLAG_IRQ_ENABLE))
2099 			state->flags &= ~CPUIDLE_FLAG_IRQ_ENABLE;
2100 		WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2101 		state->enter = intel_idle_ibrs;
2102 		return;
2103 	}
2104 
2105 	if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
2106 		state->enter = intel_idle_irq;
2107 		return;
2108 	}
2109 
2110 	if (force_irq_on) {
2111 		pr_info("forced intel_idle_irq for state %d\n", cstate);
2112 		state->enter = intel_idle_irq;
2113 	}
2114 }
2115 
intel_idle_init_cstates_icpu(struct cpuidle_driver * drv)2116 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
2117 {
2118 	int cstate;
2119 
2120 	switch (boot_cpu_data.x86_vfm) {
2121 	case INTEL_IVYBRIDGE_X:
2122 		ivt_idle_state_table_update();
2123 		break;
2124 	case INTEL_ATOM_GOLDMONT:
2125 	case INTEL_ATOM_GOLDMONT_PLUS:
2126 		bxt_idle_state_table_update();
2127 		break;
2128 	case INTEL_SKYLAKE:
2129 		sklh_idle_state_table_update();
2130 		break;
2131 	case INTEL_SKYLAKE_X:
2132 		skx_idle_state_table_update();
2133 		break;
2134 	case INTEL_SAPPHIRERAPIDS_X:
2135 	case INTEL_EMERALDRAPIDS_X:
2136 		spr_idle_state_table_update();
2137 		break;
2138 	case INTEL_ALDERLAKE:
2139 	case INTEL_ALDERLAKE_L:
2140 	case INTEL_ATOM_GRACEMONT:
2141 		adl_idle_state_table_update();
2142 		break;
2143 	}
2144 
2145 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
2146 		struct cpuidle_state *state;
2147 		unsigned int mwait_hint;
2148 
2149 		if (intel_idle_max_cstate_reached(cstate))
2150 			break;
2151 
2152 		if (!cpuidle_state_table[cstate].enter &&
2153 		    !cpuidle_state_table[cstate].enter_s2idle)
2154 			break;
2155 
2156 		/* If marked as unusable, skip this state. */
2157 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
2158 			pr_debug("state %s is disabled\n",
2159 				 cpuidle_state_table[cstate].name);
2160 			continue;
2161 		}
2162 
2163 		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
2164 		if (!intel_idle_verify_cstate(mwait_hint))
2165 			continue;
2166 
2167 		/* Structure copy. */
2168 		drv->states[drv->state_count] = cpuidle_state_table[cstate];
2169 		state = &drv->states[drv->state_count];
2170 
2171 		state_update_enter_method(state, cstate);
2172 
2173 
2174 		if ((disabled_states_mask & BIT(drv->state_count)) ||
2175 		    ((icpu->use_acpi || force_use_acpi) &&
2176 		     intel_idle_off_by_default(state->flags, mwait_hint) &&
2177 		     !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
2178 			state->flags |= CPUIDLE_FLAG_OFF;
2179 
2180 		if (intel_idle_state_needs_timer_stop(state))
2181 			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
2182 
2183 		drv->state_count++;
2184 	}
2185 
2186 	if (icpu->byt_auto_demotion_disable_flag) {
2187 		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
2188 		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
2189 	}
2190 }
2191 
2192 /**
2193  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
2194  * @drv: cpuidle driver structure to initialize.
2195  */
intel_idle_cpuidle_driver_init(struct cpuidle_driver * drv)2196 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
2197 {
2198 	cpuidle_poll_state_init(drv);
2199 
2200 	if (disabled_states_mask & BIT(0))
2201 		drv->states[0].flags |= CPUIDLE_FLAG_OFF;
2202 
2203 	drv->state_count = 1;
2204 
2205 	if (icpu && icpu->state_table)
2206 		intel_idle_init_cstates_icpu(drv);
2207 	else
2208 		intel_idle_init_cstates_acpi(drv);
2209 }
2210 
auto_demotion_disable(void)2211 static void auto_demotion_disable(void)
2212 {
2213 	unsigned long long msr_bits;
2214 
2215 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2216 	msr_bits &= ~auto_demotion_disable_flags;
2217 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2218 }
2219 
c1e_promotion_enable(void)2220 static void c1e_promotion_enable(void)
2221 {
2222 	unsigned long long msr_bits;
2223 
2224 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
2225 	msr_bits |= 0x2;
2226 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
2227 }
2228 
c1e_promotion_disable(void)2229 static void c1e_promotion_disable(void)
2230 {
2231 	unsigned long long msr_bits;
2232 
2233 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
2234 	msr_bits &= ~0x2;
2235 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
2236 }
2237 
2238 /**
2239  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
2240  * @cpu: CPU to initialize.
2241  *
2242  * Register a cpuidle device object for @cpu and update its MSRs in accordance
2243  * with the processor model flags.
2244  */
intel_idle_cpu_init(unsigned int cpu)2245 static int intel_idle_cpu_init(unsigned int cpu)
2246 {
2247 	struct cpuidle_device *dev;
2248 
2249 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2250 	dev->cpu = cpu;
2251 
2252 	if (cpuidle_register_device(dev)) {
2253 		pr_debug("cpuidle_register_device %d failed!\n", cpu);
2254 		return -EIO;
2255 	}
2256 
2257 	if (auto_demotion_disable_flags)
2258 		auto_demotion_disable();
2259 
2260 	if (c1e_promotion == C1E_PROMOTION_ENABLE)
2261 		c1e_promotion_enable();
2262 	else if (c1e_promotion == C1E_PROMOTION_DISABLE)
2263 		c1e_promotion_disable();
2264 
2265 	return 0;
2266 }
2267 
intel_idle_cpu_online(unsigned int cpu)2268 static int intel_idle_cpu_online(unsigned int cpu)
2269 {
2270 	struct cpuidle_device *dev;
2271 
2272 	if (!boot_cpu_has(X86_FEATURE_ARAT))
2273 		tick_broadcast_enable();
2274 
2275 	/*
2276 	 * Some systems can hotplug a cpu at runtime after
2277 	 * the kernel has booted, we have to initialize the
2278 	 * driver in this case
2279 	 */
2280 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2281 	if (!dev->registered)
2282 		return intel_idle_cpu_init(cpu);
2283 
2284 	return 0;
2285 }
2286 
2287 /**
2288  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
2289  */
intel_idle_cpuidle_devices_uninit(void)2290 static void __init intel_idle_cpuidle_devices_uninit(void)
2291 {
2292 	int i;
2293 
2294 	for_each_online_cpu(i)
2295 		cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
2296 }
2297 
intel_idle_init(void)2298 static int __init intel_idle_init(void)
2299 {
2300 	const struct x86_cpu_id *id;
2301 	unsigned int eax, ebx, ecx;
2302 	int retval;
2303 
2304 	/* Do not load intel_idle at all for now if idle= is passed */
2305 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
2306 		return -ENODEV;
2307 
2308 	if (max_cstate == 0) {
2309 		pr_debug("disabled\n");
2310 		return -EPERM;
2311 	}
2312 
2313 	id = x86_match_cpu(intel_idle_ids);
2314 	if (id) {
2315 		if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2316 			pr_debug("Please enable MWAIT in BIOS SETUP\n");
2317 			return -ENODEV;
2318 		}
2319 	} else {
2320 		id = x86_match_cpu(intel_mwait_ids);
2321 		if (!id)
2322 			return -ENODEV;
2323 	}
2324 
2325 	cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates);
2326 
2327 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2328 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2329 	    !mwait_substates)
2330 			return -ENODEV;
2331 
2332 	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2333 
2334 	icpu = (const struct idle_cpu *)id->driver_data;
2335 	if (icpu) {
2336 		if (icpu->state_table)
2337 			cpuidle_state_table = icpu->state_table;
2338 		else if (!intel_idle_acpi_cst_extract())
2339 			return -ENODEV;
2340 
2341 		auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2342 		if (icpu->disable_promotion_to_c1e)
2343 			c1e_promotion = C1E_PROMOTION_DISABLE;
2344 		if (icpu->use_acpi || force_use_acpi)
2345 			intel_idle_acpi_cst_extract();
2346 	} else if (!intel_idle_acpi_cst_extract()) {
2347 		return -ENODEV;
2348 	}
2349 
2350 	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2351 		 boot_cpu_data.x86_model);
2352 
2353 	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2354 	if (!intel_idle_cpuidle_devices)
2355 		return -ENOMEM;
2356 
2357 	intel_idle_cpuidle_driver_init(&intel_idle_driver);
2358 
2359 	retval = cpuidle_register_driver(&intel_idle_driver);
2360 	if (retval) {
2361 		struct cpuidle_driver *drv = cpuidle_get_driver();
2362 		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2363 		       drv ? drv->name : "none");
2364 		goto init_driver_fail;
2365 	}
2366 
2367 	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2368 				   intel_idle_cpu_online, NULL);
2369 	if (retval < 0)
2370 		goto hp_setup_fail;
2371 
2372 	pr_debug("Local APIC timer is reliable in %s\n",
2373 		 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2374 
2375 	return 0;
2376 
2377 hp_setup_fail:
2378 	intel_idle_cpuidle_devices_uninit();
2379 	cpuidle_unregister_driver(&intel_idle_driver);
2380 init_driver_fail:
2381 	free_percpu(intel_idle_cpuidle_devices);
2382 	return retval;
2383 
2384 }
2385 device_initcall(intel_idle_init);
2386 
2387 /*
2388  * We are not really modular, but we used to support that.  Meaning we also
2389  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2390  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2391  * is the easiest way (currently) to continue doing that.
2392  */
2393 module_param(max_cstate, int, 0444);
2394 /*
2395  * The positions of the bits that are set in this number are the indices of the
2396  * idle states to be disabled by default (as reflected by the names of the
2397  * corresponding idle state directories in sysfs, "state0", "state1" ...
2398  * "state<i>" ..., where <i> is the index of the given state).
2399  */
2400 module_param_named(states_off, disabled_states_mask, uint, 0444);
2401 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2402 /*
2403  * Some platforms come with mutually exclusive C-states, so that if one is
2404  * enabled, the other C-states must not be used. Example: C1 and C1E on
2405  * Sapphire Rapids platform. This parameter allows for selecting the
2406  * preferred C-states among the groups of mutually exclusive C-states - the
2407  * selected C-states will be registered, the other C-states from the mutually
2408  * exclusive group won't be registered. If the platform has no mutually
2409  * exclusive C-states, this parameter has no effect.
2410  */
2411 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2412 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
2413 /*
2414  * Debugging option that forces the driver to enter all C-states with
2415  * interrupts enabled. Does not apply to C-states with
2416  * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags.
2417  */
2418 module_param(force_irq_on, bool, 0444);
2419 /*
2420  * Force the disabling of IBRS when X86_FEATURE_KERNEL_IBRS is on and
2421  * CPUIDLE_FLAG_IRQ_ENABLE isn't set.
2422  */
2423 module_param(ibrs_off, bool, 0444);
2424 MODULE_PARM_DESC(ibrs_off, "Disable IBRS when idle");
2425