xref: /linux/drivers/idle/intel_idle.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9 
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16 
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *	for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *	(in contrast to entering ACPI C3, in which case the WBINVD
27  *	instruction needs to be executed to flush the caches)
28  */
29 
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38 
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41 
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43 
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57 
58 #define INTEL_IDLE_VERSION "0.5.1"
59 
60 static struct cpuidle_driver intel_idle_driver = {
61 	.name = "intel_idle",
62 	.owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66 static unsigned int disabled_states_mask;
67 static unsigned int preferred_states_mask;
68 
69 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
70 
71 static unsigned long auto_demotion_disable_flags;
72 
73 static enum {
74 	C1E_PROMOTION_PRESERVE,
75 	C1E_PROMOTION_ENABLE,
76 	C1E_PROMOTION_DISABLE
77 } c1e_promotion = C1E_PROMOTION_PRESERVE;
78 
79 struct idle_cpu {
80 	struct cpuidle_state *state_table;
81 
82 	/*
83 	 * Hardware C-state auto-demotion may not always be optimal.
84 	 * Indicate which enable bits to clear here.
85 	 */
86 	unsigned long auto_demotion_disable_flags;
87 	bool byt_auto_demotion_disable_flag;
88 	bool disable_promotion_to_c1e;
89 	bool use_acpi;
90 };
91 
92 static const struct idle_cpu *icpu __initdata;
93 static struct cpuidle_state *cpuidle_state_table __initdata;
94 
95 static unsigned int mwait_substates __initdata;
96 
97 /*
98  * Enable interrupts before entering the C-state. On some platforms and for
99  * some C-states, this may measurably decrease interrupt latency.
100  */
101 #define CPUIDLE_FLAG_IRQ_ENABLE		BIT(14)
102 
103 /*
104  * Enable this state by default even if the ACPI _CST does not list it.
105  */
106 #define CPUIDLE_FLAG_ALWAYS_ENABLE	BIT(15)
107 
108 /*
109  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
110  * the C-state (top nibble) and sub-state (bottom nibble)
111  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
112  *
113  * We store the hint at the top of our "flags" for each state.
114  */
115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
117 
118 static __always_inline int __intel_idle(struct cpuidle_device *dev,
119 					struct cpuidle_driver *drv, int index)
120 {
121 	struct cpuidle_state *state = &drv->states[index];
122 	unsigned long eax = flg2MWAIT(state->flags);
123 	unsigned long ecx = 1; /* break on interrupt flag */
124 
125 	mwait_idle_with_hints(eax, ecx);
126 
127 	return index;
128 }
129 
130 /**
131  * intel_idle - Ask the processor to enter the given idle state.
132  * @dev: cpuidle device of the target CPU.
133  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
134  * @index: Target idle state index.
135  *
136  * Use the MWAIT instruction to notify the processor that the CPU represented by
137  * @dev is idle and it can try to enter the idle state corresponding to @index.
138  *
139  * If the local APIC timer is not known to be reliable in the target idle state,
140  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
141  *
142  * Must be called under local_irq_disable().
143  */
144 static __cpuidle int intel_idle(struct cpuidle_device *dev,
145 				struct cpuidle_driver *drv, int index)
146 {
147 	return __intel_idle(dev, drv, index);
148 }
149 
150 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
151 				    struct cpuidle_driver *drv, int index)
152 {
153 	int ret;
154 
155 	raw_local_irq_enable();
156 	ret = __intel_idle(dev, drv, index);
157 	raw_local_irq_disable();
158 
159 	return ret;
160 }
161 
162 /**
163  * intel_idle_s2idle - Ask the processor to enter the given idle state.
164  * @dev: cpuidle device of the target CPU.
165  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
166  * @index: Target idle state index.
167  *
168  * Use the MWAIT instruction to notify the processor that the CPU represented by
169  * @dev is idle and it can try to enter the idle state corresponding to @index.
170  *
171  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
172  * scheduler tick and suspended scheduler clock on the target CPU.
173  */
174 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
175 				       struct cpuidle_driver *drv, int index)
176 {
177 	unsigned long eax = flg2MWAIT(drv->states[index].flags);
178 	unsigned long ecx = 1; /* break on interrupt flag */
179 
180 	mwait_idle_with_hints(eax, ecx);
181 
182 	return 0;
183 }
184 
185 /*
186  * States are indexed by the cstate number,
187  * which is also the index into the MWAIT hint array.
188  * Thus C0 is a dummy.
189  */
190 static struct cpuidle_state nehalem_cstates[] __initdata = {
191 	{
192 		.name = "C1",
193 		.desc = "MWAIT 0x00",
194 		.flags = MWAIT2flg(0x00),
195 		.exit_latency = 3,
196 		.target_residency = 6,
197 		.enter = &intel_idle,
198 		.enter_s2idle = intel_idle_s2idle, },
199 	{
200 		.name = "C1E",
201 		.desc = "MWAIT 0x01",
202 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
203 		.exit_latency = 10,
204 		.target_residency = 20,
205 		.enter = &intel_idle,
206 		.enter_s2idle = intel_idle_s2idle, },
207 	{
208 		.name = "C3",
209 		.desc = "MWAIT 0x10",
210 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
211 		.exit_latency = 20,
212 		.target_residency = 80,
213 		.enter = &intel_idle,
214 		.enter_s2idle = intel_idle_s2idle, },
215 	{
216 		.name = "C6",
217 		.desc = "MWAIT 0x20",
218 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
219 		.exit_latency = 200,
220 		.target_residency = 800,
221 		.enter = &intel_idle,
222 		.enter_s2idle = intel_idle_s2idle, },
223 	{
224 		.enter = NULL }
225 };
226 
227 static struct cpuidle_state snb_cstates[] __initdata = {
228 	{
229 		.name = "C1",
230 		.desc = "MWAIT 0x00",
231 		.flags = MWAIT2flg(0x00),
232 		.exit_latency = 2,
233 		.target_residency = 2,
234 		.enter = &intel_idle,
235 		.enter_s2idle = intel_idle_s2idle, },
236 	{
237 		.name = "C1E",
238 		.desc = "MWAIT 0x01",
239 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
240 		.exit_latency = 10,
241 		.target_residency = 20,
242 		.enter = &intel_idle,
243 		.enter_s2idle = intel_idle_s2idle, },
244 	{
245 		.name = "C3",
246 		.desc = "MWAIT 0x10",
247 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
248 		.exit_latency = 80,
249 		.target_residency = 211,
250 		.enter = &intel_idle,
251 		.enter_s2idle = intel_idle_s2idle, },
252 	{
253 		.name = "C6",
254 		.desc = "MWAIT 0x20",
255 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
256 		.exit_latency = 104,
257 		.target_residency = 345,
258 		.enter = &intel_idle,
259 		.enter_s2idle = intel_idle_s2idle, },
260 	{
261 		.name = "C7",
262 		.desc = "MWAIT 0x30",
263 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
264 		.exit_latency = 109,
265 		.target_residency = 345,
266 		.enter = &intel_idle,
267 		.enter_s2idle = intel_idle_s2idle, },
268 	{
269 		.enter = NULL }
270 };
271 
272 static struct cpuidle_state byt_cstates[] __initdata = {
273 	{
274 		.name = "C1",
275 		.desc = "MWAIT 0x00",
276 		.flags = MWAIT2flg(0x00),
277 		.exit_latency = 1,
278 		.target_residency = 1,
279 		.enter = &intel_idle,
280 		.enter_s2idle = intel_idle_s2idle, },
281 	{
282 		.name = "C6N",
283 		.desc = "MWAIT 0x58",
284 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
285 		.exit_latency = 300,
286 		.target_residency = 275,
287 		.enter = &intel_idle,
288 		.enter_s2idle = intel_idle_s2idle, },
289 	{
290 		.name = "C6S",
291 		.desc = "MWAIT 0x52",
292 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
293 		.exit_latency = 500,
294 		.target_residency = 560,
295 		.enter = &intel_idle,
296 		.enter_s2idle = intel_idle_s2idle, },
297 	{
298 		.name = "C7",
299 		.desc = "MWAIT 0x60",
300 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
301 		.exit_latency = 1200,
302 		.target_residency = 4000,
303 		.enter = &intel_idle,
304 		.enter_s2idle = intel_idle_s2idle, },
305 	{
306 		.name = "C7S",
307 		.desc = "MWAIT 0x64",
308 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
309 		.exit_latency = 10000,
310 		.target_residency = 20000,
311 		.enter = &intel_idle,
312 		.enter_s2idle = intel_idle_s2idle, },
313 	{
314 		.enter = NULL }
315 };
316 
317 static struct cpuidle_state cht_cstates[] __initdata = {
318 	{
319 		.name = "C1",
320 		.desc = "MWAIT 0x00",
321 		.flags = MWAIT2flg(0x00),
322 		.exit_latency = 1,
323 		.target_residency = 1,
324 		.enter = &intel_idle,
325 		.enter_s2idle = intel_idle_s2idle, },
326 	{
327 		.name = "C6N",
328 		.desc = "MWAIT 0x58",
329 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
330 		.exit_latency = 80,
331 		.target_residency = 275,
332 		.enter = &intel_idle,
333 		.enter_s2idle = intel_idle_s2idle, },
334 	{
335 		.name = "C6S",
336 		.desc = "MWAIT 0x52",
337 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
338 		.exit_latency = 200,
339 		.target_residency = 560,
340 		.enter = &intel_idle,
341 		.enter_s2idle = intel_idle_s2idle, },
342 	{
343 		.name = "C7",
344 		.desc = "MWAIT 0x60",
345 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
346 		.exit_latency = 1200,
347 		.target_residency = 4000,
348 		.enter = &intel_idle,
349 		.enter_s2idle = intel_idle_s2idle, },
350 	{
351 		.name = "C7S",
352 		.desc = "MWAIT 0x64",
353 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
354 		.exit_latency = 10000,
355 		.target_residency = 20000,
356 		.enter = &intel_idle,
357 		.enter_s2idle = intel_idle_s2idle, },
358 	{
359 		.enter = NULL }
360 };
361 
362 static struct cpuidle_state ivb_cstates[] __initdata = {
363 	{
364 		.name = "C1",
365 		.desc = "MWAIT 0x00",
366 		.flags = MWAIT2flg(0x00),
367 		.exit_latency = 1,
368 		.target_residency = 1,
369 		.enter = &intel_idle,
370 		.enter_s2idle = intel_idle_s2idle, },
371 	{
372 		.name = "C1E",
373 		.desc = "MWAIT 0x01",
374 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
375 		.exit_latency = 10,
376 		.target_residency = 20,
377 		.enter = &intel_idle,
378 		.enter_s2idle = intel_idle_s2idle, },
379 	{
380 		.name = "C3",
381 		.desc = "MWAIT 0x10",
382 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
383 		.exit_latency = 59,
384 		.target_residency = 156,
385 		.enter = &intel_idle,
386 		.enter_s2idle = intel_idle_s2idle, },
387 	{
388 		.name = "C6",
389 		.desc = "MWAIT 0x20",
390 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
391 		.exit_latency = 80,
392 		.target_residency = 300,
393 		.enter = &intel_idle,
394 		.enter_s2idle = intel_idle_s2idle, },
395 	{
396 		.name = "C7",
397 		.desc = "MWAIT 0x30",
398 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
399 		.exit_latency = 87,
400 		.target_residency = 300,
401 		.enter = &intel_idle,
402 		.enter_s2idle = intel_idle_s2idle, },
403 	{
404 		.enter = NULL }
405 };
406 
407 static struct cpuidle_state ivt_cstates[] __initdata = {
408 	{
409 		.name = "C1",
410 		.desc = "MWAIT 0x00",
411 		.flags = MWAIT2flg(0x00),
412 		.exit_latency = 1,
413 		.target_residency = 1,
414 		.enter = &intel_idle,
415 		.enter_s2idle = intel_idle_s2idle, },
416 	{
417 		.name = "C1E",
418 		.desc = "MWAIT 0x01",
419 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
420 		.exit_latency = 10,
421 		.target_residency = 80,
422 		.enter = &intel_idle,
423 		.enter_s2idle = intel_idle_s2idle, },
424 	{
425 		.name = "C3",
426 		.desc = "MWAIT 0x10",
427 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
428 		.exit_latency = 59,
429 		.target_residency = 156,
430 		.enter = &intel_idle,
431 		.enter_s2idle = intel_idle_s2idle, },
432 	{
433 		.name = "C6",
434 		.desc = "MWAIT 0x20",
435 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
436 		.exit_latency = 82,
437 		.target_residency = 300,
438 		.enter = &intel_idle,
439 		.enter_s2idle = intel_idle_s2idle, },
440 	{
441 		.enter = NULL }
442 };
443 
444 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
445 	{
446 		.name = "C1",
447 		.desc = "MWAIT 0x00",
448 		.flags = MWAIT2flg(0x00),
449 		.exit_latency = 1,
450 		.target_residency = 1,
451 		.enter = &intel_idle,
452 		.enter_s2idle = intel_idle_s2idle, },
453 	{
454 		.name = "C1E",
455 		.desc = "MWAIT 0x01",
456 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
457 		.exit_latency = 10,
458 		.target_residency = 250,
459 		.enter = &intel_idle,
460 		.enter_s2idle = intel_idle_s2idle, },
461 	{
462 		.name = "C3",
463 		.desc = "MWAIT 0x10",
464 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
465 		.exit_latency = 59,
466 		.target_residency = 300,
467 		.enter = &intel_idle,
468 		.enter_s2idle = intel_idle_s2idle, },
469 	{
470 		.name = "C6",
471 		.desc = "MWAIT 0x20",
472 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
473 		.exit_latency = 84,
474 		.target_residency = 400,
475 		.enter = &intel_idle,
476 		.enter_s2idle = intel_idle_s2idle, },
477 	{
478 		.enter = NULL }
479 };
480 
481 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
482 	{
483 		.name = "C1",
484 		.desc = "MWAIT 0x00",
485 		.flags = MWAIT2flg(0x00),
486 		.exit_latency = 1,
487 		.target_residency = 1,
488 		.enter = &intel_idle,
489 		.enter_s2idle = intel_idle_s2idle, },
490 	{
491 		.name = "C1E",
492 		.desc = "MWAIT 0x01",
493 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
494 		.exit_latency = 10,
495 		.target_residency = 500,
496 		.enter = &intel_idle,
497 		.enter_s2idle = intel_idle_s2idle, },
498 	{
499 		.name = "C3",
500 		.desc = "MWAIT 0x10",
501 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
502 		.exit_latency = 59,
503 		.target_residency = 600,
504 		.enter = &intel_idle,
505 		.enter_s2idle = intel_idle_s2idle, },
506 	{
507 		.name = "C6",
508 		.desc = "MWAIT 0x20",
509 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
510 		.exit_latency = 88,
511 		.target_residency = 700,
512 		.enter = &intel_idle,
513 		.enter_s2idle = intel_idle_s2idle, },
514 	{
515 		.enter = NULL }
516 };
517 
518 static struct cpuidle_state hsw_cstates[] __initdata = {
519 	{
520 		.name = "C1",
521 		.desc = "MWAIT 0x00",
522 		.flags = MWAIT2flg(0x00),
523 		.exit_latency = 2,
524 		.target_residency = 2,
525 		.enter = &intel_idle,
526 		.enter_s2idle = intel_idle_s2idle, },
527 	{
528 		.name = "C1E",
529 		.desc = "MWAIT 0x01",
530 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
531 		.exit_latency = 10,
532 		.target_residency = 20,
533 		.enter = &intel_idle,
534 		.enter_s2idle = intel_idle_s2idle, },
535 	{
536 		.name = "C3",
537 		.desc = "MWAIT 0x10",
538 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
539 		.exit_latency = 33,
540 		.target_residency = 100,
541 		.enter = &intel_idle,
542 		.enter_s2idle = intel_idle_s2idle, },
543 	{
544 		.name = "C6",
545 		.desc = "MWAIT 0x20",
546 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
547 		.exit_latency = 133,
548 		.target_residency = 400,
549 		.enter = &intel_idle,
550 		.enter_s2idle = intel_idle_s2idle, },
551 	{
552 		.name = "C7s",
553 		.desc = "MWAIT 0x32",
554 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
555 		.exit_latency = 166,
556 		.target_residency = 500,
557 		.enter = &intel_idle,
558 		.enter_s2idle = intel_idle_s2idle, },
559 	{
560 		.name = "C8",
561 		.desc = "MWAIT 0x40",
562 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
563 		.exit_latency = 300,
564 		.target_residency = 900,
565 		.enter = &intel_idle,
566 		.enter_s2idle = intel_idle_s2idle, },
567 	{
568 		.name = "C9",
569 		.desc = "MWAIT 0x50",
570 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
571 		.exit_latency = 600,
572 		.target_residency = 1800,
573 		.enter = &intel_idle,
574 		.enter_s2idle = intel_idle_s2idle, },
575 	{
576 		.name = "C10",
577 		.desc = "MWAIT 0x60",
578 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
579 		.exit_latency = 2600,
580 		.target_residency = 7700,
581 		.enter = &intel_idle,
582 		.enter_s2idle = intel_idle_s2idle, },
583 	{
584 		.enter = NULL }
585 };
586 static struct cpuidle_state bdw_cstates[] __initdata = {
587 	{
588 		.name = "C1",
589 		.desc = "MWAIT 0x00",
590 		.flags = MWAIT2flg(0x00),
591 		.exit_latency = 2,
592 		.target_residency = 2,
593 		.enter = &intel_idle,
594 		.enter_s2idle = intel_idle_s2idle, },
595 	{
596 		.name = "C1E",
597 		.desc = "MWAIT 0x01",
598 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
599 		.exit_latency = 10,
600 		.target_residency = 20,
601 		.enter = &intel_idle,
602 		.enter_s2idle = intel_idle_s2idle, },
603 	{
604 		.name = "C3",
605 		.desc = "MWAIT 0x10",
606 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
607 		.exit_latency = 40,
608 		.target_residency = 100,
609 		.enter = &intel_idle,
610 		.enter_s2idle = intel_idle_s2idle, },
611 	{
612 		.name = "C6",
613 		.desc = "MWAIT 0x20",
614 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
615 		.exit_latency = 133,
616 		.target_residency = 400,
617 		.enter = &intel_idle,
618 		.enter_s2idle = intel_idle_s2idle, },
619 	{
620 		.name = "C7s",
621 		.desc = "MWAIT 0x32",
622 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
623 		.exit_latency = 166,
624 		.target_residency = 500,
625 		.enter = &intel_idle,
626 		.enter_s2idle = intel_idle_s2idle, },
627 	{
628 		.name = "C8",
629 		.desc = "MWAIT 0x40",
630 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
631 		.exit_latency = 300,
632 		.target_residency = 900,
633 		.enter = &intel_idle,
634 		.enter_s2idle = intel_idle_s2idle, },
635 	{
636 		.name = "C9",
637 		.desc = "MWAIT 0x50",
638 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
639 		.exit_latency = 600,
640 		.target_residency = 1800,
641 		.enter = &intel_idle,
642 		.enter_s2idle = intel_idle_s2idle, },
643 	{
644 		.name = "C10",
645 		.desc = "MWAIT 0x60",
646 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
647 		.exit_latency = 2600,
648 		.target_residency = 7700,
649 		.enter = &intel_idle,
650 		.enter_s2idle = intel_idle_s2idle, },
651 	{
652 		.enter = NULL }
653 };
654 
655 static struct cpuidle_state skl_cstates[] __initdata = {
656 	{
657 		.name = "C1",
658 		.desc = "MWAIT 0x00",
659 		.flags = MWAIT2flg(0x00),
660 		.exit_latency = 2,
661 		.target_residency = 2,
662 		.enter = &intel_idle,
663 		.enter_s2idle = intel_idle_s2idle, },
664 	{
665 		.name = "C1E",
666 		.desc = "MWAIT 0x01",
667 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
668 		.exit_latency = 10,
669 		.target_residency = 20,
670 		.enter = &intel_idle,
671 		.enter_s2idle = intel_idle_s2idle, },
672 	{
673 		.name = "C3",
674 		.desc = "MWAIT 0x10",
675 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
676 		.exit_latency = 70,
677 		.target_residency = 100,
678 		.enter = &intel_idle,
679 		.enter_s2idle = intel_idle_s2idle, },
680 	{
681 		.name = "C6",
682 		.desc = "MWAIT 0x20",
683 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
684 		.exit_latency = 85,
685 		.target_residency = 200,
686 		.enter = &intel_idle,
687 		.enter_s2idle = intel_idle_s2idle, },
688 	{
689 		.name = "C7s",
690 		.desc = "MWAIT 0x33",
691 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
692 		.exit_latency = 124,
693 		.target_residency = 800,
694 		.enter = &intel_idle,
695 		.enter_s2idle = intel_idle_s2idle, },
696 	{
697 		.name = "C8",
698 		.desc = "MWAIT 0x40",
699 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
700 		.exit_latency = 200,
701 		.target_residency = 800,
702 		.enter = &intel_idle,
703 		.enter_s2idle = intel_idle_s2idle, },
704 	{
705 		.name = "C9",
706 		.desc = "MWAIT 0x50",
707 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
708 		.exit_latency = 480,
709 		.target_residency = 5000,
710 		.enter = &intel_idle,
711 		.enter_s2idle = intel_idle_s2idle, },
712 	{
713 		.name = "C10",
714 		.desc = "MWAIT 0x60",
715 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
716 		.exit_latency = 890,
717 		.target_residency = 5000,
718 		.enter = &intel_idle,
719 		.enter_s2idle = intel_idle_s2idle, },
720 	{
721 		.enter = NULL }
722 };
723 
724 static struct cpuidle_state skx_cstates[] __initdata = {
725 	{
726 		.name = "C1",
727 		.desc = "MWAIT 0x00",
728 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
729 		.exit_latency = 2,
730 		.target_residency = 2,
731 		.enter = &intel_idle,
732 		.enter_s2idle = intel_idle_s2idle, },
733 	{
734 		.name = "C1E",
735 		.desc = "MWAIT 0x01",
736 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
737 		.exit_latency = 10,
738 		.target_residency = 20,
739 		.enter = &intel_idle,
740 		.enter_s2idle = intel_idle_s2idle, },
741 	{
742 		.name = "C6",
743 		.desc = "MWAIT 0x20",
744 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
745 		.exit_latency = 133,
746 		.target_residency = 600,
747 		.enter = &intel_idle,
748 		.enter_s2idle = intel_idle_s2idle, },
749 	{
750 		.enter = NULL }
751 };
752 
753 static struct cpuidle_state icx_cstates[] __initdata = {
754 	{
755 		.name = "C1",
756 		.desc = "MWAIT 0x00",
757 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
758 		.exit_latency = 1,
759 		.target_residency = 1,
760 		.enter = &intel_idle,
761 		.enter_s2idle = intel_idle_s2idle, },
762 	{
763 		.name = "C1E",
764 		.desc = "MWAIT 0x01",
765 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
766 		.exit_latency = 4,
767 		.target_residency = 4,
768 		.enter = &intel_idle,
769 		.enter_s2idle = intel_idle_s2idle, },
770 	{
771 		.name = "C6",
772 		.desc = "MWAIT 0x20",
773 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
774 		.exit_latency = 170,
775 		.target_residency = 600,
776 		.enter = &intel_idle,
777 		.enter_s2idle = intel_idle_s2idle, },
778 	{
779 		.enter = NULL }
780 };
781 
782 /*
783  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
784  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
785  * But in this case there is effectively no C1, because C1 requests are
786  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
787  * and C1E requests end up with C1, so there is effectively no C1E.
788  *
789  * By default we enable C1E and disable C1 by marking it with
790  * 'CPUIDLE_FLAG_UNUSABLE'.
791  */
792 static struct cpuidle_state adl_cstates[] __initdata = {
793 	{
794 		.name = "C1",
795 		.desc = "MWAIT 0x00",
796 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
797 		.exit_latency = 1,
798 		.target_residency = 1,
799 		.enter = &intel_idle,
800 		.enter_s2idle = intel_idle_s2idle, },
801 	{
802 		.name = "C1E",
803 		.desc = "MWAIT 0x01",
804 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
805 		.exit_latency = 2,
806 		.target_residency = 4,
807 		.enter = &intel_idle,
808 		.enter_s2idle = intel_idle_s2idle, },
809 	{
810 		.name = "C6",
811 		.desc = "MWAIT 0x20",
812 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
813 		.exit_latency = 220,
814 		.target_residency = 600,
815 		.enter = &intel_idle,
816 		.enter_s2idle = intel_idle_s2idle, },
817 	{
818 		.name = "C8",
819 		.desc = "MWAIT 0x40",
820 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
821 		.exit_latency = 280,
822 		.target_residency = 800,
823 		.enter = &intel_idle,
824 		.enter_s2idle = intel_idle_s2idle, },
825 	{
826 		.name = "C10",
827 		.desc = "MWAIT 0x60",
828 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
829 		.exit_latency = 680,
830 		.target_residency = 2000,
831 		.enter = &intel_idle,
832 		.enter_s2idle = intel_idle_s2idle, },
833 	{
834 		.enter = NULL }
835 };
836 
837 static struct cpuidle_state adl_l_cstates[] __initdata = {
838 	{
839 		.name = "C1",
840 		.desc = "MWAIT 0x00",
841 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
842 		.exit_latency = 1,
843 		.target_residency = 1,
844 		.enter = &intel_idle,
845 		.enter_s2idle = intel_idle_s2idle, },
846 	{
847 		.name = "C1E",
848 		.desc = "MWAIT 0x01",
849 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
850 		.exit_latency = 2,
851 		.target_residency = 4,
852 		.enter = &intel_idle,
853 		.enter_s2idle = intel_idle_s2idle, },
854 	{
855 		.name = "C6",
856 		.desc = "MWAIT 0x20",
857 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
858 		.exit_latency = 170,
859 		.target_residency = 500,
860 		.enter = &intel_idle,
861 		.enter_s2idle = intel_idle_s2idle, },
862 	{
863 		.name = "C8",
864 		.desc = "MWAIT 0x40",
865 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
866 		.exit_latency = 200,
867 		.target_residency = 600,
868 		.enter = &intel_idle,
869 		.enter_s2idle = intel_idle_s2idle, },
870 	{
871 		.name = "C10",
872 		.desc = "MWAIT 0x60",
873 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
874 		.exit_latency = 230,
875 		.target_residency = 700,
876 		.enter = &intel_idle,
877 		.enter_s2idle = intel_idle_s2idle, },
878 	{
879 		.enter = NULL }
880 };
881 
882 /*
883  * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
884  * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
885  * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
886  * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
887  * both C1 and C1E requests end up with C1, so there is effectively no C1E.
888  *
889  * By default we enable C1 and disable C1E by marking it with
890  * 'CPUIDLE_FLAG_UNUSABLE'.
891  */
892 static struct cpuidle_state spr_cstates[] __initdata = {
893 	{
894 		.name = "C1",
895 		.desc = "MWAIT 0x00",
896 		.flags = MWAIT2flg(0x00),
897 		.exit_latency = 1,
898 		.target_residency = 1,
899 		.enter = &intel_idle,
900 		.enter_s2idle = intel_idle_s2idle, },
901 	{
902 		.name = "C1E",
903 		.desc = "MWAIT 0x01",
904 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
905 					   CPUIDLE_FLAG_UNUSABLE,
906 		.exit_latency = 2,
907 		.target_residency = 4,
908 		.enter = &intel_idle,
909 		.enter_s2idle = intel_idle_s2idle, },
910 	{
911 		.name = "C6",
912 		.desc = "MWAIT 0x20",
913 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
914 		.exit_latency = 290,
915 		.target_residency = 800,
916 		.enter = &intel_idle,
917 		.enter_s2idle = intel_idle_s2idle, },
918 	{
919 		.enter = NULL }
920 };
921 
922 static struct cpuidle_state atom_cstates[] __initdata = {
923 	{
924 		.name = "C1E",
925 		.desc = "MWAIT 0x00",
926 		.flags = MWAIT2flg(0x00),
927 		.exit_latency = 10,
928 		.target_residency = 20,
929 		.enter = &intel_idle,
930 		.enter_s2idle = intel_idle_s2idle, },
931 	{
932 		.name = "C2",
933 		.desc = "MWAIT 0x10",
934 		.flags = MWAIT2flg(0x10),
935 		.exit_latency = 20,
936 		.target_residency = 80,
937 		.enter = &intel_idle,
938 		.enter_s2idle = intel_idle_s2idle, },
939 	{
940 		.name = "C4",
941 		.desc = "MWAIT 0x30",
942 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
943 		.exit_latency = 100,
944 		.target_residency = 400,
945 		.enter = &intel_idle,
946 		.enter_s2idle = intel_idle_s2idle, },
947 	{
948 		.name = "C6",
949 		.desc = "MWAIT 0x52",
950 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
951 		.exit_latency = 140,
952 		.target_residency = 560,
953 		.enter = &intel_idle,
954 		.enter_s2idle = intel_idle_s2idle, },
955 	{
956 		.enter = NULL }
957 };
958 static struct cpuidle_state tangier_cstates[] __initdata = {
959 	{
960 		.name = "C1",
961 		.desc = "MWAIT 0x00",
962 		.flags = MWAIT2flg(0x00),
963 		.exit_latency = 1,
964 		.target_residency = 4,
965 		.enter = &intel_idle,
966 		.enter_s2idle = intel_idle_s2idle, },
967 	{
968 		.name = "C4",
969 		.desc = "MWAIT 0x30",
970 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
971 		.exit_latency = 100,
972 		.target_residency = 400,
973 		.enter = &intel_idle,
974 		.enter_s2idle = intel_idle_s2idle, },
975 	{
976 		.name = "C6",
977 		.desc = "MWAIT 0x52",
978 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
979 		.exit_latency = 140,
980 		.target_residency = 560,
981 		.enter = &intel_idle,
982 		.enter_s2idle = intel_idle_s2idle, },
983 	{
984 		.name = "C7",
985 		.desc = "MWAIT 0x60",
986 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
987 		.exit_latency = 1200,
988 		.target_residency = 4000,
989 		.enter = &intel_idle,
990 		.enter_s2idle = intel_idle_s2idle, },
991 	{
992 		.name = "C9",
993 		.desc = "MWAIT 0x64",
994 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
995 		.exit_latency = 10000,
996 		.target_residency = 20000,
997 		.enter = &intel_idle,
998 		.enter_s2idle = intel_idle_s2idle, },
999 	{
1000 		.enter = NULL }
1001 };
1002 static struct cpuidle_state avn_cstates[] __initdata = {
1003 	{
1004 		.name = "C1",
1005 		.desc = "MWAIT 0x00",
1006 		.flags = MWAIT2flg(0x00),
1007 		.exit_latency = 2,
1008 		.target_residency = 2,
1009 		.enter = &intel_idle,
1010 		.enter_s2idle = intel_idle_s2idle, },
1011 	{
1012 		.name = "C6",
1013 		.desc = "MWAIT 0x51",
1014 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1015 		.exit_latency = 15,
1016 		.target_residency = 45,
1017 		.enter = &intel_idle,
1018 		.enter_s2idle = intel_idle_s2idle, },
1019 	{
1020 		.enter = NULL }
1021 };
1022 static struct cpuidle_state knl_cstates[] __initdata = {
1023 	{
1024 		.name = "C1",
1025 		.desc = "MWAIT 0x00",
1026 		.flags = MWAIT2flg(0x00),
1027 		.exit_latency = 1,
1028 		.target_residency = 2,
1029 		.enter = &intel_idle,
1030 		.enter_s2idle = intel_idle_s2idle },
1031 	{
1032 		.name = "C6",
1033 		.desc = "MWAIT 0x10",
1034 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1035 		.exit_latency = 120,
1036 		.target_residency = 500,
1037 		.enter = &intel_idle,
1038 		.enter_s2idle = intel_idle_s2idle },
1039 	{
1040 		.enter = NULL }
1041 };
1042 
1043 static struct cpuidle_state bxt_cstates[] __initdata = {
1044 	{
1045 		.name = "C1",
1046 		.desc = "MWAIT 0x00",
1047 		.flags = MWAIT2flg(0x00),
1048 		.exit_latency = 2,
1049 		.target_residency = 2,
1050 		.enter = &intel_idle,
1051 		.enter_s2idle = intel_idle_s2idle, },
1052 	{
1053 		.name = "C1E",
1054 		.desc = "MWAIT 0x01",
1055 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1056 		.exit_latency = 10,
1057 		.target_residency = 20,
1058 		.enter = &intel_idle,
1059 		.enter_s2idle = intel_idle_s2idle, },
1060 	{
1061 		.name = "C6",
1062 		.desc = "MWAIT 0x20",
1063 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1064 		.exit_latency = 133,
1065 		.target_residency = 133,
1066 		.enter = &intel_idle,
1067 		.enter_s2idle = intel_idle_s2idle, },
1068 	{
1069 		.name = "C7s",
1070 		.desc = "MWAIT 0x31",
1071 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1072 		.exit_latency = 155,
1073 		.target_residency = 155,
1074 		.enter = &intel_idle,
1075 		.enter_s2idle = intel_idle_s2idle, },
1076 	{
1077 		.name = "C8",
1078 		.desc = "MWAIT 0x40",
1079 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1080 		.exit_latency = 1000,
1081 		.target_residency = 1000,
1082 		.enter = &intel_idle,
1083 		.enter_s2idle = intel_idle_s2idle, },
1084 	{
1085 		.name = "C9",
1086 		.desc = "MWAIT 0x50",
1087 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1088 		.exit_latency = 2000,
1089 		.target_residency = 2000,
1090 		.enter = &intel_idle,
1091 		.enter_s2idle = intel_idle_s2idle, },
1092 	{
1093 		.name = "C10",
1094 		.desc = "MWAIT 0x60",
1095 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1096 		.exit_latency = 10000,
1097 		.target_residency = 10000,
1098 		.enter = &intel_idle,
1099 		.enter_s2idle = intel_idle_s2idle, },
1100 	{
1101 		.enter = NULL }
1102 };
1103 
1104 static struct cpuidle_state dnv_cstates[] __initdata = {
1105 	{
1106 		.name = "C1",
1107 		.desc = "MWAIT 0x00",
1108 		.flags = MWAIT2flg(0x00),
1109 		.exit_latency = 2,
1110 		.target_residency = 2,
1111 		.enter = &intel_idle,
1112 		.enter_s2idle = intel_idle_s2idle, },
1113 	{
1114 		.name = "C1E",
1115 		.desc = "MWAIT 0x01",
1116 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1117 		.exit_latency = 10,
1118 		.target_residency = 20,
1119 		.enter = &intel_idle,
1120 		.enter_s2idle = intel_idle_s2idle, },
1121 	{
1122 		.name = "C6",
1123 		.desc = "MWAIT 0x20",
1124 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1125 		.exit_latency = 50,
1126 		.target_residency = 500,
1127 		.enter = &intel_idle,
1128 		.enter_s2idle = intel_idle_s2idle, },
1129 	{
1130 		.enter = NULL }
1131 };
1132 
1133 /*
1134  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1135  * C6, and this is indicated in the CPUID mwait leaf.
1136  */
1137 static struct cpuidle_state snr_cstates[] __initdata = {
1138 	{
1139 		.name = "C1",
1140 		.desc = "MWAIT 0x00",
1141 		.flags = MWAIT2flg(0x00),
1142 		.exit_latency = 2,
1143 		.target_residency = 2,
1144 		.enter = &intel_idle,
1145 		.enter_s2idle = intel_idle_s2idle, },
1146 	{
1147 		.name = "C1E",
1148 		.desc = "MWAIT 0x01",
1149 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1150 		.exit_latency = 15,
1151 		.target_residency = 25,
1152 		.enter = &intel_idle,
1153 		.enter_s2idle = intel_idle_s2idle, },
1154 	{
1155 		.name = "C6",
1156 		.desc = "MWAIT 0x20",
1157 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1158 		.exit_latency = 130,
1159 		.target_residency = 500,
1160 		.enter = &intel_idle,
1161 		.enter_s2idle = intel_idle_s2idle, },
1162 	{
1163 		.enter = NULL }
1164 };
1165 
1166 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1167 	.state_table = nehalem_cstates,
1168 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1169 	.disable_promotion_to_c1e = true,
1170 };
1171 
1172 static const struct idle_cpu idle_cpu_nhx __initconst = {
1173 	.state_table = nehalem_cstates,
1174 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1175 	.disable_promotion_to_c1e = true,
1176 	.use_acpi = true,
1177 };
1178 
1179 static const struct idle_cpu idle_cpu_atom __initconst = {
1180 	.state_table = atom_cstates,
1181 };
1182 
1183 static const struct idle_cpu idle_cpu_tangier __initconst = {
1184 	.state_table = tangier_cstates,
1185 };
1186 
1187 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1188 	.state_table = atom_cstates,
1189 	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1190 };
1191 
1192 static const struct idle_cpu idle_cpu_snb __initconst = {
1193 	.state_table = snb_cstates,
1194 	.disable_promotion_to_c1e = true,
1195 };
1196 
1197 static const struct idle_cpu idle_cpu_snx __initconst = {
1198 	.state_table = snb_cstates,
1199 	.disable_promotion_to_c1e = true,
1200 	.use_acpi = true,
1201 };
1202 
1203 static const struct idle_cpu idle_cpu_byt __initconst = {
1204 	.state_table = byt_cstates,
1205 	.disable_promotion_to_c1e = true,
1206 	.byt_auto_demotion_disable_flag = true,
1207 };
1208 
1209 static const struct idle_cpu idle_cpu_cht __initconst = {
1210 	.state_table = cht_cstates,
1211 	.disable_promotion_to_c1e = true,
1212 	.byt_auto_demotion_disable_flag = true,
1213 };
1214 
1215 static const struct idle_cpu idle_cpu_ivb __initconst = {
1216 	.state_table = ivb_cstates,
1217 	.disable_promotion_to_c1e = true,
1218 };
1219 
1220 static const struct idle_cpu idle_cpu_ivt __initconst = {
1221 	.state_table = ivt_cstates,
1222 	.disable_promotion_to_c1e = true,
1223 	.use_acpi = true,
1224 };
1225 
1226 static const struct idle_cpu idle_cpu_hsw __initconst = {
1227 	.state_table = hsw_cstates,
1228 	.disable_promotion_to_c1e = true,
1229 };
1230 
1231 static const struct idle_cpu idle_cpu_hsx __initconst = {
1232 	.state_table = hsw_cstates,
1233 	.disable_promotion_to_c1e = true,
1234 	.use_acpi = true,
1235 };
1236 
1237 static const struct idle_cpu idle_cpu_bdw __initconst = {
1238 	.state_table = bdw_cstates,
1239 	.disable_promotion_to_c1e = true,
1240 };
1241 
1242 static const struct idle_cpu idle_cpu_bdx __initconst = {
1243 	.state_table = bdw_cstates,
1244 	.disable_promotion_to_c1e = true,
1245 	.use_acpi = true,
1246 };
1247 
1248 static const struct idle_cpu idle_cpu_skl __initconst = {
1249 	.state_table = skl_cstates,
1250 	.disable_promotion_to_c1e = true,
1251 };
1252 
1253 static const struct idle_cpu idle_cpu_skx __initconst = {
1254 	.state_table = skx_cstates,
1255 	.disable_promotion_to_c1e = true,
1256 	.use_acpi = true,
1257 };
1258 
1259 static const struct idle_cpu idle_cpu_icx __initconst = {
1260 	.state_table = icx_cstates,
1261 	.disable_promotion_to_c1e = true,
1262 	.use_acpi = true,
1263 };
1264 
1265 static const struct idle_cpu idle_cpu_adl __initconst = {
1266 	.state_table = adl_cstates,
1267 };
1268 
1269 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1270 	.state_table = adl_l_cstates,
1271 };
1272 
1273 static const struct idle_cpu idle_cpu_spr __initconst = {
1274 	.state_table = spr_cstates,
1275 	.disable_promotion_to_c1e = true,
1276 	.use_acpi = true,
1277 };
1278 
1279 static const struct idle_cpu idle_cpu_avn __initconst = {
1280 	.state_table = avn_cstates,
1281 	.disable_promotion_to_c1e = true,
1282 	.use_acpi = true,
1283 };
1284 
1285 static const struct idle_cpu idle_cpu_knl __initconst = {
1286 	.state_table = knl_cstates,
1287 	.use_acpi = true,
1288 };
1289 
1290 static const struct idle_cpu idle_cpu_bxt __initconst = {
1291 	.state_table = bxt_cstates,
1292 	.disable_promotion_to_c1e = true,
1293 };
1294 
1295 static const struct idle_cpu idle_cpu_dnv __initconst = {
1296 	.state_table = dnv_cstates,
1297 	.disable_promotion_to_c1e = true,
1298 	.use_acpi = true,
1299 };
1300 
1301 static const struct idle_cpu idle_cpu_snr __initconst = {
1302 	.state_table = snr_cstates,
1303 	.disable_promotion_to_c1e = true,
1304 	.use_acpi = true,
1305 };
1306 
1307 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1308 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&idle_cpu_nhx),
1309 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&idle_cpu_nehalem),
1310 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,		&idle_cpu_nehalem),
1311 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&idle_cpu_nehalem),
1312 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&idle_cpu_nhx),
1313 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&idle_cpu_nhx),
1314 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,	&idle_cpu_atom),
1315 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,	&idle_cpu_lincroft),
1316 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&idle_cpu_nhx),
1317 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&idle_cpu_snb),
1318 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&idle_cpu_snx),
1319 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,	&idle_cpu_atom),
1320 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,	&idle_cpu_byt),
1321 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID,	&idle_cpu_tangier),
1322 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,	&idle_cpu_cht),
1323 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&idle_cpu_ivb),
1324 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&idle_cpu_ivt),
1325 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&idle_cpu_hsw),
1326 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&idle_cpu_hsx),
1327 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&idle_cpu_hsw),
1328 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&idle_cpu_hsw),
1329 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	&idle_cpu_avn),
1330 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&idle_cpu_bdw),
1331 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&idle_cpu_bdw),
1332 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&idle_cpu_bdx),
1333 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&idle_cpu_bdx),
1334 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&idle_cpu_skl),
1335 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&idle_cpu_skl),
1336 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&idle_cpu_skl),
1337 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&idle_cpu_skl),
1338 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&idle_cpu_skx),
1339 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&idle_cpu_icx),
1340 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&idle_cpu_icx),
1341 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&idle_cpu_adl),
1342 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&idle_cpu_adl_l),
1343 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&idle_cpu_spr),
1344 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
1345 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&idle_cpu_knl),
1346 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
1347 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
1348 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&idle_cpu_dnv),
1349 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_snr),
1350 	{}
1351 };
1352 
1353 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1354 	X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1355 	{}
1356 };
1357 
1358 static bool __init intel_idle_max_cstate_reached(int cstate)
1359 {
1360 	if (cstate + 1 > max_cstate) {
1361 		pr_info("max_cstate %d reached\n", max_cstate);
1362 		return true;
1363 	}
1364 	return false;
1365 }
1366 
1367 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1368 {
1369 	unsigned long eax = flg2MWAIT(state->flags);
1370 
1371 	if (boot_cpu_has(X86_FEATURE_ARAT))
1372 		return false;
1373 
1374 	/*
1375 	 * Switch over to one-shot tick broadcast if the target C-state
1376 	 * is deeper than C1.
1377 	 */
1378 	return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1379 }
1380 
1381 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1382 #include <acpi/processor.h>
1383 
1384 static bool no_acpi __read_mostly;
1385 module_param(no_acpi, bool, 0444);
1386 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1387 
1388 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1389 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1390 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1391 
1392 static struct acpi_processor_power acpi_state_table __initdata;
1393 
1394 /**
1395  * intel_idle_cst_usable - Check if the _CST information can be used.
1396  *
1397  * Check if all of the C-states listed by _CST in the max_cstate range are
1398  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1399  */
1400 static bool __init intel_idle_cst_usable(void)
1401 {
1402 	int cstate, limit;
1403 
1404 	limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1405 		      acpi_state_table.count);
1406 
1407 	for (cstate = 1; cstate < limit; cstate++) {
1408 		struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1409 
1410 		if (cx->entry_method != ACPI_CSTATE_FFH)
1411 			return false;
1412 	}
1413 
1414 	return true;
1415 }
1416 
1417 static bool __init intel_idle_acpi_cst_extract(void)
1418 {
1419 	unsigned int cpu;
1420 
1421 	if (no_acpi) {
1422 		pr_debug("Not allowed to use ACPI _CST\n");
1423 		return false;
1424 	}
1425 
1426 	for_each_possible_cpu(cpu) {
1427 		struct acpi_processor *pr = per_cpu(processors, cpu);
1428 
1429 		if (!pr)
1430 			continue;
1431 
1432 		if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1433 			continue;
1434 
1435 		acpi_state_table.count++;
1436 
1437 		if (!intel_idle_cst_usable())
1438 			continue;
1439 
1440 		if (!acpi_processor_claim_cst_control())
1441 			break;
1442 
1443 		return true;
1444 	}
1445 
1446 	acpi_state_table.count = 0;
1447 	pr_debug("ACPI _CST not found or not usable\n");
1448 	return false;
1449 }
1450 
1451 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1452 {
1453 	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1454 
1455 	/*
1456 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1457 	 * the interesting states are ACPI_CSTATE_FFH.
1458 	 */
1459 	for (cstate = 1; cstate < limit; cstate++) {
1460 		struct acpi_processor_cx *cx;
1461 		struct cpuidle_state *state;
1462 
1463 		if (intel_idle_max_cstate_reached(cstate - 1))
1464 			break;
1465 
1466 		cx = &acpi_state_table.states[cstate];
1467 
1468 		state = &drv->states[drv->state_count++];
1469 
1470 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1471 		strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1472 		state->exit_latency = cx->latency;
1473 		/*
1474 		 * For C1-type C-states use the same number for both the exit
1475 		 * latency and target residency, because that is the case for
1476 		 * C1 in the majority of the static C-states tables above.
1477 		 * For the other types of C-states, however, set the target
1478 		 * residency to 3 times the exit latency which should lead to
1479 		 * a reasonable balance between energy-efficiency and
1480 		 * performance in the majority of interesting cases.
1481 		 */
1482 		state->target_residency = cx->latency;
1483 		if (cx->type > ACPI_STATE_C1)
1484 			state->target_residency *= 3;
1485 
1486 		state->flags = MWAIT2flg(cx->address);
1487 		if (cx->type > ACPI_STATE_C2)
1488 			state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1489 
1490 		if (disabled_states_mask & BIT(cstate))
1491 			state->flags |= CPUIDLE_FLAG_OFF;
1492 
1493 		if (intel_idle_state_needs_timer_stop(state))
1494 			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1495 
1496 		state->enter = intel_idle;
1497 		state->enter_s2idle = intel_idle_s2idle;
1498 	}
1499 }
1500 
1501 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1502 {
1503 	int cstate, limit;
1504 
1505 	/*
1506 	 * If there are no _CST C-states, do not disable any C-states by
1507 	 * default.
1508 	 */
1509 	if (!acpi_state_table.count)
1510 		return false;
1511 
1512 	limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1513 	/*
1514 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1515 	 * the interesting states are ACPI_CSTATE_FFH.
1516 	 */
1517 	for (cstate = 1; cstate < limit; cstate++) {
1518 		if (acpi_state_table.states[cstate].address == mwait_hint)
1519 			return false;
1520 	}
1521 	return true;
1522 }
1523 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1524 #define force_use_acpi	(false)
1525 
1526 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1527 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1528 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1529 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1530 
1531 /**
1532  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1533  *
1534  * Tune IVT multi-socket targets.
1535  * Assumption: num_sockets == (max_package_num + 1).
1536  */
1537 static void __init ivt_idle_state_table_update(void)
1538 {
1539 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1540 	int cpu, package_num, num_sockets = 1;
1541 
1542 	for_each_online_cpu(cpu) {
1543 		package_num = topology_physical_package_id(cpu);
1544 		if (package_num + 1 > num_sockets) {
1545 			num_sockets = package_num + 1;
1546 
1547 			if (num_sockets > 4) {
1548 				cpuidle_state_table = ivt_cstates_8s;
1549 				return;
1550 			}
1551 		}
1552 	}
1553 
1554 	if (num_sockets > 2)
1555 		cpuidle_state_table = ivt_cstates_4s;
1556 
1557 	/* else, 1 and 2 socket systems use default ivt_cstates */
1558 }
1559 
1560 /**
1561  * irtl_2_usec - IRTL to microseconds conversion.
1562  * @irtl: IRTL MSR value.
1563  *
1564  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1565  */
1566 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1567 {
1568 	static const unsigned int irtl_ns_units[] __initconst = {
1569 		1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1570 	};
1571 	unsigned long long ns;
1572 
1573 	if (!irtl)
1574 		return 0;
1575 
1576 	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1577 
1578 	return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1579 }
1580 
1581 /**
1582  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1583  *
1584  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1585  * definitive maximum latency and use the same value for target_residency.
1586  */
1587 static void __init bxt_idle_state_table_update(void)
1588 {
1589 	unsigned long long msr;
1590 	unsigned int usec;
1591 
1592 	rdmsrl(MSR_PKGC6_IRTL, msr);
1593 	usec = irtl_2_usec(msr);
1594 	if (usec) {
1595 		bxt_cstates[2].exit_latency = usec;
1596 		bxt_cstates[2].target_residency = usec;
1597 	}
1598 
1599 	rdmsrl(MSR_PKGC7_IRTL, msr);
1600 	usec = irtl_2_usec(msr);
1601 	if (usec) {
1602 		bxt_cstates[3].exit_latency = usec;
1603 		bxt_cstates[3].target_residency = usec;
1604 	}
1605 
1606 	rdmsrl(MSR_PKGC8_IRTL, msr);
1607 	usec = irtl_2_usec(msr);
1608 	if (usec) {
1609 		bxt_cstates[4].exit_latency = usec;
1610 		bxt_cstates[4].target_residency = usec;
1611 	}
1612 
1613 	rdmsrl(MSR_PKGC9_IRTL, msr);
1614 	usec = irtl_2_usec(msr);
1615 	if (usec) {
1616 		bxt_cstates[5].exit_latency = usec;
1617 		bxt_cstates[5].target_residency = usec;
1618 	}
1619 
1620 	rdmsrl(MSR_PKGC10_IRTL, msr);
1621 	usec = irtl_2_usec(msr);
1622 	if (usec) {
1623 		bxt_cstates[6].exit_latency = usec;
1624 		bxt_cstates[6].target_residency = usec;
1625 	}
1626 
1627 }
1628 
1629 /**
1630  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1631  *
1632  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1633  */
1634 static void __init sklh_idle_state_table_update(void)
1635 {
1636 	unsigned long long msr;
1637 	unsigned int eax, ebx, ecx, edx;
1638 
1639 
1640 	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1641 	if (max_cstate <= 7)
1642 		return;
1643 
1644 	/* if PC10 not present in CPUID.MWAIT.EDX */
1645 	if ((mwait_substates & (0xF << 28)) == 0)
1646 		return;
1647 
1648 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1649 
1650 	/* PC10 is not enabled in PKG C-state limit */
1651 	if ((msr & 0xF) != 8)
1652 		return;
1653 
1654 	ecx = 0;
1655 	cpuid(7, &eax, &ebx, &ecx, &edx);
1656 
1657 	/* if SGX is present */
1658 	if (ebx & (1 << 2)) {
1659 
1660 		rdmsrl(MSR_IA32_FEAT_CTL, msr);
1661 
1662 		/* if SGX is enabled */
1663 		if (msr & (1 << 18))
1664 			return;
1665 	}
1666 
1667 	skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C8-SKL */
1668 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
1669 }
1670 
1671 /**
1672  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1673  * idle states table.
1674  */
1675 static void __init skx_idle_state_table_update(void)
1676 {
1677 	unsigned long long msr;
1678 
1679 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1680 
1681 	/*
1682 	 * 000b: C0/C1 (no package C-state support)
1683 	 * 001b: C2
1684 	 * 010b: C6 (non-retention)
1685 	 * 011b: C6 (retention)
1686 	 * 111b: No Package C state limits.
1687 	 */
1688 	if ((msr & 0x7) < 2) {
1689 		/*
1690 		 * Uses the CC6 + PC0 latency and 3 times of
1691 		 * latency for target_residency if the PC6
1692 		 * is disabled in BIOS. This is consistent
1693 		 * with how intel_idle driver uses _CST
1694 		 * to set the target_residency.
1695 		 */
1696 		skx_cstates[2].exit_latency = 92;
1697 		skx_cstates[2].target_residency = 276;
1698 	}
1699 }
1700 
1701 /**
1702  * adl_idle_state_table_update - Adjust AlderLake idle states table.
1703  */
1704 static void __init adl_idle_state_table_update(void)
1705 {
1706 	/* Check if user prefers C1 over C1E. */
1707 	if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1708 		cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1709 		cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1710 
1711 		/* Disable C1E by clearing the "C1E promotion" bit. */
1712 		c1e_promotion = C1E_PROMOTION_DISABLE;
1713 		return;
1714 	}
1715 
1716 	/* Make sure C1E is enabled by default */
1717 	c1e_promotion = C1E_PROMOTION_ENABLE;
1718 }
1719 
1720 /**
1721  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1722  */
1723 static void __init spr_idle_state_table_update(void)
1724 {
1725 	unsigned long long msr;
1726 
1727 	/* Check if user prefers C1E over C1. */
1728 	if ((preferred_states_mask & BIT(2)) &&
1729 	    !(preferred_states_mask & BIT(1))) {
1730 		/* Disable C1 and enable C1E. */
1731 		spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
1732 		spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1733 
1734 		/* Enable C1E using the "C1E promotion" bit. */
1735 		c1e_promotion = C1E_PROMOTION_ENABLE;
1736 	}
1737 
1738 	/*
1739 	 * By default, the C6 state assumes the worst-case scenario of package
1740 	 * C6. However, if PC6 is disabled, we update the numbers to match
1741 	 * core C6.
1742 	 */
1743 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1744 
1745 	/* Limit value 2 and above allow for PC6. */
1746 	if ((msr & 0x7) < 2) {
1747 		spr_cstates[2].exit_latency = 190;
1748 		spr_cstates[2].target_residency = 600;
1749 	}
1750 }
1751 
1752 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1753 {
1754 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1755 	unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1756 					MWAIT_SUBSTATE_MASK;
1757 
1758 	/* Ignore the C-state if there are NO sub-states in CPUID for it. */
1759 	if (num_substates == 0)
1760 		return false;
1761 
1762 	if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1763 		mark_tsc_unstable("TSC halts in idle states deeper than C2");
1764 
1765 	return true;
1766 }
1767 
1768 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1769 {
1770 	int cstate;
1771 
1772 	switch (boot_cpu_data.x86_model) {
1773 	case INTEL_FAM6_IVYBRIDGE_X:
1774 		ivt_idle_state_table_update();
1775 		break;
1776 	case INTEL_FAM6_ATOM_GOLDMONT:
1777 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1778 		bxt_idle_state_table_update();
1779 		break;
1780 	case INTEL_FAM6_SKYLAKE:
1781 		sklh_idle_state_table_update();
1782 		break;
1783 	case INTEL_FAM6_SKYLAKE_X:
1784 		skx_idle_state_table_update();
1785 		break;
1786 	case INTEL_FAM6_SAPPHIRERAPIDS_X:
1787 		spr_idle_state_table_update();
1788 		break;
1789 	case INTEL_FAM6_ALDERLAKE:
1790 	case INTEL_FAM6_ALDERLAKE_L:
1791 		adl_idle_state_table_update();
1792 		break;
1793 	}
1794 
1795 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1796 		unsigned int mwait_hint;
1797 
1798 		if (intel_idle_max_cstate_reached(cstate))
1799 			break;
1800 
1801 		if (!cpuidle_state_table[cstate].enter &&
1802 		    !cpuidle_state_table[cstate].enter_s2idle)
1803 			break;
1804 
1805 		/* If marked as unusable, skip this state. */
1806 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1807 			pr_debug("state %s is disabled\n",
1808 				 cpuidle_state_table[cstate].name);
1809 			continue;
1810 		}
1811 
1812 		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1813 		if (!intel_idle_verify_cstate(mwait_hint))
1814 			continue;
1815 
1816 		/* Structure copy. */
1817 		drv->states[drv->state_count] = cpuidle_state_table[cstate];
1818 
1819 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
1820 			drv->states[drv->state_count].enter = intel_idle_irq;
1821 
1822 		if ((disabled_states_mask & BIT(drv->state_count)) ||
1823 		    ((icpu->use_acpi || force_use_acpi) &&
1824 		     intel_idle_off_by_default(mwait_hint) &&
1825 		     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1826 			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1827 
1828 		if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1829 			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1830 
1831 		drv->state_count++;
1832 	}
1833 
1834 	if (icpu->byt_auto_demotion_disable_flag) {
1835 		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1836 		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1837 	}
1838 }
1839 
1840 /**
1841  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1842  * @drv: cpuidle driver structure to initialize.
1843  */
1844 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1845 {
1846 	cpuidle_poll_state_init(drv);
1847 
1848 	if (disabled_states_mask & BIT(0))
1849 		drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1850 
1851 	drv->state_count = 1;
1852 
1853 	if (icpu)
1854 		intel_idle_init_cstates_icpu(drv);
1855 	else
1856 		intel_idle_init_cstates_acpi(drv);
1857 }
1858 
1859 static void auto_demotion_disable(void)
1860 {
1861 	unsigned long long msr_bits;
1862 
1863 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1864 	msr_bits &= ~auto_demotion_disable_flags;
1865 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1866 }
1867 
1868 static void c1e_promotion_enable(void)
1869 {
1870 	unsigned long long msr_bits;
1871 
1872 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1873 	msr_bits |= 0x2;
1874 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1875 }
1876 
1877 static void c1e_promotion_disable(void)
1878 {
1879 	unsigned long long msr_bits;
1880 
1881 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1882 	msr_bits &= ~0x2;
1883 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1884 }
1885 
1886 /**
1887  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1888  * @cpu: CPU to initialize.
1889  *
1890  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1891  * with the processor model flags.
1892  */
1893 static int intel_idle_cpu_init(unsigned int cpu)
1894 {
1895 	struct cpuidle_device *dev;
1896 
1897 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1898 	dev->cpu = cpu;
1899 
1900 	if (cpuidle_register_device(dev)) {
1901 		pr_debug("cpuidle_register_device %d failed!\n", cpu);
1902 		return -EIO;
1903 	}
1904 
1905 	if (auto_demotion_disable_flags)
1906 		auto_demotion_disable();
1907 
1908 	if (c1e_promotion == C1E_PROMOTION_ENABLE)
1909 		c1e_promotion_enable();
1910 	else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1911 		c1e_promotion_disable();
1912 
1913 	return 0;
1914 }
1915 
1916 static int intel_idle_cpu_online(unsigned int cpu)
1917 {
1918 	struct cpuidle_device *dev;
1919 
1920 	if (!boot_cpu_has(X86_FEATURE_ARAT))
1921 		tick_broadcast_enable();
1922 
1923 	/*
1924 	 * Some systems can hotplug a cpu at runtime after
1925 	 * the kernel has booted, we have to initialize the
1926 	 * driver in this case
1927 	 */
1928 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1929 	if (!dev->registered)
1930 		return intel_idle_cpu_init(cpu);
1931 
1932 	return 0;
1933 }
1934 
1935 /**
1936  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1937  */
1938 static void __init intel_idle_cpuidle_devices_uninit(void)
1939 {
1940 	int i;
1941 
1942 	for_each_online_cpu(i)
1943 		cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1944 }
1945 
1946 static int __init intel_idle_init(void)
1947 {
1948 	const struct x86_cpu_id *id;
1949 	unsigned int eax, ebx, ecx;
1950 	int retval;
1951 
1952 	/* Do not load intel_idle at all for now if idle= is passed */
1953 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1954 		return -ENODEV;
1955 
1956 	if (max_cstate == 0) {
1957 		pr_debug("disabled\n");
1958 		return -EPERM;
1959 	}
1960 
1961 	id = x86_match_cpu(intel_idle_ids);
1962 	if (id) {
1963 		if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1964 			pr_debug("Please enable MWAIT in BIOS SETUP\n");
1965 			return -ENODEV;
1966 		}
1967 	} else {
1968 		id = x86_match_cpu(intel_mwait_ids);
1969 		if (!id)
1970 			return -ENODEV;
1971 	}
1972 
1973 	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1974 		return -ENODEV;
1975 
1976 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1977 
1978 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1979 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1980 	    !mwait_substates)
1981 			return -ENODEV;
1982 
1983 	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1984 
1985 	icpu = (const struct idle_cpu *)id->driver_data;
1986 	if (icpu) {
1987 		cpuidle_state_table = icpu->state_table;
1988 		auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1989 		if (icpu->disable_promotion_to_c1e)
1990 			c1e_promotion = C1E_PROMOTION_DISABLE;
1991 		if (icpu->use_acpi || force_use_acpi)
1992 			intel_idle_acpi_cst_extract();
1993 	} else if (!intel_idle_acpi_cst_extract()) {
1994 		return -ENODEV;
1995 	}
1996 
1997 	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1998 		 boot_cpu_data.x86_model);
1999 
2000 	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2001 	if (!intel_idle_cpuidle_devices)
2002 		return -ENOMEM;
2003 
2004 	intel_idle_cpuidle_driver_init(&intel_idle_driver);
2005 
2006 	retval = cpuidle_register_driver(&intel_idle_driver);
2007 	if (retval) {
2008 		struct cpuidle_driver *drv = cpuidle_get_driver();
2009 		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2010 		       drv ? drv->name : "none");
2011 		goto init_driver_fail;
2012 	}
2013 
2014 	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2015 				   intel_idle_cpu_online, NULL);
2016 	if (retval < 0)
2017 		goto hp_setup_fail;
2018 
2019 	pr_debug("Local APIC timer is reliable in %s\n",
2020 		 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2021 
2022 	return 0;
2023 
2024 hp_setup_fail:
2025 	intel_idle_cpuidle_devices_uninit();
2026 	cpuidle_unregister_driver(&intel_idle_driver);
2027 init_driver_fail:
2028 	free_percpu(intel_idle_cpuidle_devices);
2029 	return retval;
2030 
2031 }
2032 device_initcall(intel_idle_init);
2033 
2034 /*
2035  * We are not really modular, but we used to support that.  Meaning we also
2036  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2037  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2038  * is the easiest way (currently) to continue doing that.
2039  */
2040 module_param(max_cstate, int, 0444);
2041 /*
2042  * The positions of the bits that are set in this number are the indices of the
2043  * idle states to be disabled by default (as reflected by the names of the
2044  * corresponding idle state directories in sysfs, "state0", "state1" ...
2045  * "state<i>" ..., where <i> is the index of the given state).
2046  */
2047 module_param_named(states_off, disabled_states_mask, uint, 0444);
2048 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2049 /*
2050  * Some platforms come with mutually exclusive C-states, so that if one is
2051  * enabled, the other C-states must not be used. Example: C1 and C1E on
2052  * Sapphire Rapids platform. This parameter allows for selecting the
2053  * preferred C-states among the groups of mutually exclusive C-states - the
2054  * selected C-states will be registered, the other C-states from the mutually
2055  * exclusive group won't be registered. If the platform has no mutually
2056  * exclusive C-states, this parameter has no effect.
2057  */
2058 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2059 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
2060