xref: /linux/drivers/idle/intel_idle.c (revision 4b99990cdf9560e8a071640baf19f312e6ae02f4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9 
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16 
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *	for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *	(in contrast to entering ACPI C3, in which case the WBINVD
27  *	instruction needs to be executed to flush the caches)
28  */
29 
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38 
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41 
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43 
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <linux/time64.h>
49 #include <trace/events/power.h>
50 #include <linux/sched.h>
51 #include <linux/sched/smt.h>
52 #include <linux/mutex.h>
53 #include <linux/notifier.h>
54 #include <linux/cpu.h>
55 #include <linux/moduleparam.h>
56 #include <linux/sysfs.h>
57 #include <asm/cpuid/api.h>
58 #include <asm/cpu_device_id.h>
59 #include <asm/intel-family.h>
60 #include <asm/mwait.h>
61 #include <asm/spec-ctrl.h>
62 #include <asm/msr.h>
63 #include <asm/tsc.h>
64 #include <asm/fpu/api.h>
65 #include <asm/smp.h>
66 
67 static struct cpuidle_driver intel_idle_driver = {
68 	.name = "intel_idle",
69 	.owner = THIS_MODULE,
70 };
71 /* intel_idle.max_cstate=0 disables driver */
72 static int max_cstate = CPUIDLE_STATE_MAX - 1;
73 static unsigned int disabled_states_mask __read_mostly;
74 static bool force_irq_on __read_mostly;
75 static bool ibrs_off __read_mostly;
76 
77 /* The maximum allowed length for the 'table' module parameter  */
78 #define MAX_CMDLINE_TABLE_LEN 256
79 /* Maximum allowed C-state latency */
80 #define MAX_CMDLINE_LATENCY_US (5 * USEC_PER_MSEC)
81 /* Maximum allowed C-state target residency */
82 #define MAX_CMDLINE_RESIDENCY_US (100 * USEC_PER_MSEC)
83 
84 /* The Package C-State Limit bits in MSR_PKG_CST_CONFIG_CONTROL */
85 #define SKX_PKG_CST_LIMIT_MASK GENMASK(2, 0)
86 /* PC6 is enabled when Package C-State Limit >= this value */
87 #define SKX_PKG_CST_LIMIT_PC6 2
88 
89 static char cmdline_table_str[MAX_CMDLINE_TABLE_LEN] __read_mostly;
90 
91 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
92 
93 static unsigned long auto_demotion_disable_flags;
94 
95 static enum {
96 	C1E_PROMOTION_PRESERVE,
97 	C1E_PROMOTION_ENABLE,
98 	C1E_PROMOTION_DISABLE
99 } c1e_promotion = C1E_PROMOTION_PRESERVE;
100 
101 struct idle_cpu {
102 	struct cpuidle_state *state_table;
103 
104 	/*
105 	 * Hardware C-state auto-demotion may not always be optimal.
106 	 * Indicate which enable bits to clear here.
107 	 */
108 	unsigned long auto_demotion_disable_flags;
109 	bool disable_promotion_to_c1e;
110 	bool c1_demotion_supported;
111 	bool use_acpi;
112 };
113 
114 static bool c1_demotion_supported;
115 static DEFINE_MUTEX(c1_demotion_mutex);
116 
117 static struct device *sysfs_root __initdata;
118 
119 static const struct idle_cpu *icpu __initdata;
120 static struct cpuidle_state *cpuidle_state_table __initdata;
121 
122 /* C-states data from the 'intel_idle.table' cmdline parameter */
123 static struct cpuidle_state cmdline_states[CPUIDLE_STATE_MAX] __initdata;
124 
125 static unsigned int mwait_substates __initdata;
126 
127 /*
128  * Enable interrupts before entering the C-state. On some platforms and for
129  * some C-states, this may measurably decrease interrupt latency.
130  */
131 #define CPUIDLE_FLAG_IRQ_ENABLE		BIT(14)
132 
133 /*
134  * Enable this state by default even if the ACPI _CST does not list it.
135  */
136 #define CPUIDLE_FLAG_ALWAYS_ENABLE	BIT(15)
137 
138 /*
139  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
140  * above.
141  */
142 #define CPUIDLE_FLAG_IBRS		BIT(16)
143 
144 /*
145  * Initialize large xstate for the C6-state entrance.
146  */
147 #define CPUIDLE_FLAG_INIT_XSTATE	BIT(17)
148 
149 /*
150  * Ignore the sub-state when matching mwait hints between the ACPI _CST and
151  * custom tables.
152  */
153 #define CPUIDLE_FLAG_PARTIAL_HINT_MATCH	BIT(18)
154 
155 /*
156  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
157  * the C-state (top nibble) and sub-state (bottom nibble)
158  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
159  *
160  * We store the hint at the top of our "flags" for each state.
161  */
162 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
163 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
164 
165 static __always_inline int __intel_idle(struct cpuidle_device *dev,
166 					struct cpuidle_driver *drv,
167 					int index, bool irqoff)
168 {
169 	struct cpuidle_state *state = &drv->states[index];
170 	unsigned int eax = flg2MWAIT(state->flags);
171 	unsigned int ecx = 1*irqoff; /* break on interrupt flag */
172 
173 	mwait_idle_with_hints(eax, ecx);
174 
175 	return index;
176 }
177 
178 /**
179  * intel_idle - Ask the processor to enter the given idle state.
180  * @dev: cpuidle device of the target CPU.
181  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
182  * @index: Target idle state index.
183  *
184  * Use the MWAIT instruction to notify the processor that the CPU represented by
185  * @dev is idle and it can try to enter the idle state corresponding to @index.
186  *
187  * If the local APIC timer is not known to be reliable in the target idle state,
188  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
189  *
190  * Must be called under local_irq_disable().
191  */
192 static __cpuidle int intel_idle(struct cpuidle_device *dev,
193 				struct cpuidle_driver *drv, int index)
194 {
195 	return __intel_idle(dev, drv, index, true);
196 }
197 
198 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
199 				    struct cpuidle_driver *drv, int index)
200 {
201 	return __intel_idle(dev, drv, index, false);
202 }
203 
204 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
205 				     struct cpuidle_driver *drv, int index)
206 {
207 	bool smt_active = sched_smt_active();
208 	u64 spec_ctrl = spec_ctrl_current();
209 	int ret;
210 
211 	if (smt_active)
212 		__update_spec_ctrl(0);
213 
214 	ret = __intel_idle(dev, drv, index, true);
215 
216 	if (smt_active)
217 		__update_spec_ctrl(spec_ctrl);
218 
219 	return ret;
220 }
221 
222 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
223 				       struct cpuidle_driver *drv, int index)
224 {
225 	fpu_idle_fpregs();
226 	return __intel_idle(dev, drv, index, true);
227 }
228 
229 /**
230  * intel_idle_s2idle - Ask the processor to enter the given idle state.
231  * @dev: cpuidle device of the target CPU.
232  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
233  * @index: Target idle state index.
234  *
235  * Use the MWAIT instruction to notify the processor that the CPU represented by
236  * @dev is idle and it can try to enter the idle state corresponding to @index.
237  *
238  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
239  * scheduler tick and suspended scheduler clock on the target CPU.
240  */
241 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
242 				       struct cpuidle_driver *drv, int index)
243 {
244 	struct cpuidle_state *state = &drv->states[index];
245 	unsigned int eax = flg2MWAIT(state->flags);
246 	unsigned int ecx = 1; /* break on interrupt flag */
247 
248 	if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
249 		fpu_idle_fpregs();
250 
251 	mwait_idle_with_hints(eax, ecx);
252 
253 	return 0;
254 }
255 
256 static void intel_idle_enter_dead(struct cpuidle_device *dev, int index)
257 {
258 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
259 	struct cpuidle_state *state = &drv->states[index];
260 	unsigned long eax = flg2MWAIT(state->flags);
261 
262 	mwait_play_dead(eax);
263 }
264 
265 /*
266  * States are indexed by the cstate number,
267  * which is also the index into the MWAIT hint array.
268  * Thus C0 is a dummy.
269  */
270 static struct cpuidle_state nehalem_cstates[] __initdata = {
271 	{
272 		.name = "C1",
273 		.desc = "MWAIT 0x00",
274 		.flags = MWAIT2flg(0x00),
275 		.exit_latency = 3,
276 		.target_residency = 6,
277 		.enter = intel_idle,
278 		.enter_s2idle = intel_idle_s2idle, },
279 	{
280 		.name = "C1E",
281 		.desc = "MWAIT 0x01",
282 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
283 		.exit_latency = 10,
284 		.target_residency = 20,
285 		.enter = intel_idle,
286 		.enter_s2idle = intel_idle_s2idle, },
287 	{
288 		.name = "C3",
289 		.desc = "MWAIT 0x10",
290 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
291 		.exit_latency = 20,
292 		.target_residency = 80,
293 		.enter = intel_idle,
294 		.enter_s2idle = intel_idle_s2idle, },
295 	{
296 		.name = "C6",
297 		.desc = "MWAIT 0x20",
298 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
299 		.exit_latency = 200,
300 		.target_residency = 800,
301 		.enter = intel_idle,
302 		.enter_s2idle = intel_idle_s2idle, },
303 	{
304 		.enter = NULL }
305 };
306 
307 static struct cpuidle_state snb_cstates[] __initdata = {
308 	{
309 		.name = "C1",
310 		.desc = "MWAIT 0x00",
311 		.flags = MWAIT2flg(0x00),
312 		.exit_latency = 2,
313 		.target_residency = 2,
314 		.enter = intel_idle,
315 		.enter_s2idle = intel_idle_s2idle, },
316 	{
317 		.name = "C1E",
318 		.desc = "MWAIT 0x01",
319 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
320 		.exit_latency = 10,
321 		.target_residency = 20,
322 		.enter = intel_idle,
323 		.enter_s2idle = intel_idle_s2idle, },
324 	{
325 		.name = "C3",
326 		.desc = "MWAIT 0x10",
327 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
328 		.exit_latency = 80,
329 		.target_residency = 211,
330 		.enter = intel_idle,
331 		.enter_s2idle = intel_idle_s2idle, },
332 	{
333 		.name = "C6",
334 		.desc = "MWAIT 0x20",
335 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
336 		.exit_latency = 104,
337 		.target_residency = 345,
338 		.enter = intel_idle,
339 		.enter_s2idle = intel_idle_s2idle, },
340 	{
341 		.name = "C7",
342 		.desc = "MWAIT 0x30",
343 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
344 		.exit_latency = 109,
345 		.target_residency = 345,
346 		.enter = intel_idle,
347 		.enter_s2idle = intel_idle_s2idle, },
348 	{
349 		.enter = NULL }
350 };
351 
352 static struct cpuidle_state byt_cstates[] __initdata = {
353 	{
354 		.name = "C1",
355 		.desc = "MWAIT 0x00",
356 		.flags = MWAIT2flg(0x00),
357 		.exit_latency = 1,
358 		.target_residency = 1,
359 		.enter = intel_idle,
360 		.enter_s2idle = intel_idle_s2idle, },
361 	{
362 		.name = "C6N",
363 		.desc = "MWAIT 0x58",
364 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
365 		.exit_latency = 300,
366 		.target_residency = 275,
367 		.enter = intel_idle,
368 		.enter_s2idle = intel_idle_s2idle, },
369 	{
370 		.name = "C6S",
371 		.desc = "MWAIT 0x52",
372 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
373 		.exit_latency = 500,
374 		.target_residency = 560,
375 		.enter = intel_idle,
376 		.enter_s2idle = intel_idle_s2idle, },
377 	{
378 		.name = "C7",
379 		.desc = "MWAIT 0x60",
380 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
381 		.exit_latency = 1200,
382 		.target_residency = 4000,
383 		.enter = intel_idle,
384 		.enter_s2idle = intel_idle_s2idle, },
385 	{
386 		.name = "C7S",
387 		.desc = "MWAIT 0x64",
388 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
389 		.exit_latency = 10000,
390 		.target_residency = 20000,
391 		.enter = intel_idle,
392 		.enter_s2idle = intel_idle_s2idle, },
393 	{
394 		.enter = NULL }
395 };
396 
397 static struct cpuidle_state cht_cstates[] __initdata = {
398 	{
399 		.name = "C1",
400 		.desc = "MWAIT 0x00",
401 		.flags = MWAIT2flg(0x00),
402 		.exit_latency = 1,
403 		.target_residency = 1,
404 		.enter = intel_idle,
405 		.enter_s2idle = intel_idle_s2idle, },
406 	{
407 		.name = "C6N",
408 		.desc = "MWAIT 0x58",
409 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
410 		.exit_latency = 80,
411 		.target_residency = 275,
412 		.enter = intel_idle,
413 		.enter_s2idle = intel_idle_s2idle, },
414 	{
415 		.name = "C6S",
416 		.desc = "MWAIT 0x52",
417 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
418 		.exit_latency = 200,
419 		.target_residency = 560,
420 		.enter = intel_idle,
421 		.enter_s2idle = intel_idle_s2idle, },
422 	{
423 		.name = "C7",
424 		.desc = "MWAIT 0x60",
425 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
426 		.exit_latency = 1200,
427 		.target_residency = 4000,
428 		.enter = intel_idle,
429 		.enter_s2idle = intel_idle_s2idle, },
430 	{
431 		.name = "C7S",
432 		.desc = "MWAIT 0x64",
433 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
434 		.exit_latency = 10000,
435 		.target_residency = 20000,
436 		.enter = intel_idle,
437 		.enter_s2idle = intel_idle_s2idle, },
438 	{
439 		.enter = NULL }
440 };
441 
442 static struct cpuidle_state ivb_cstates[] __initdata = {
443 	{
444 		.name = "C1",
445 		.desc = "MWAIT 0x00",
446 		.flags = MWAIT2flg(0x00),
447 		.exit_latency = 1,
448 		.target_residency = 1,
449 		.enter = intel_idle,
450 		.enter_s2idle = intel_idle_s2idle, },
451 	{
452 		.name = "C1E",
453 		.desc = "MWAIT 0x01",
454 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
455 		.exit_latency = 10,
456 		.target_residency = 20,
457 		.enter = intel_idle,
458 		.enter_s2idle = intel_idle_s2idle, },
459 	{
460 		.name = "C3",
461 		.desc = "MWAIT 0x10",
462 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
463 		.exit_latency = 59,
464 		.target_residency = 156,
465 		.enter = intel_idle,
466 		.enter_s2idle = intel_idle_s2idle, },
467 	{
468 		.name = "C6",
469 		.desc = "MWAIT 0x20",
470 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
471 		.exit_latency = 80,
472 		.target_residency = 300,
473 		.enter = intel_idle,
474 		.enter_s2idle = intel_idle_s2idle, },
475 	{
476 		.name = "C7",
477 		.desc = "MWAIT 0x30",
478 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
479 		.exit_latency = 87,
480 		.target_residency = 300,
481 		.enter = intel_idle,
482 		.enter_s2idle = intel_idle_s2idle, },
483 	{
484 		.enter = NULL }
485 };
486 
487 static struct cpuidle_state ivt_cstates[] __initdata = {
488 	{
489 		.name = "C1",
490 		.desc = "MWAIT 0x00",
491 		.flags = MWAIT2flg(0x00),
492 		.exit_latency = 1,
493 		.target_residency = 1,
494 		.enter = intel_idle,
495 		.enter_s2idle = intel_idle_s2idle, },
496 	{
497 		.name = "C1E",
498 		.desc = "MWAIT 0x01",
499 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
500 		.exit_latency = 10,
501 		.target_residency = 80,
502 		.enter = intel_idle,
503 		.enter_s2idle = intel_idle_s2idle, },
504 	{
505 		.name = "C3",
506 		.desc = "MWAIT 0x10",
507 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
508 		.exit_latency = 59,
509 		.target_residency = 156,
510 		.enter = intel_idle,
511 		.enter_s2idle = intel_idle_s2idle, },
512 	{
513 		.name = "C6",
514 		.desc = "MWAIT 0x20",
515 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
516 		.exit_latency = 82,
517 		.target_residency = 300,
518 		.enter = intel_idle,
519 		.enter_s2idle = intel_idle_s2idle, },
520 	{
521 		.enter = NULL }
522 };
523 
524 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
525 	{
526 		.name = "C1",
527 		.desc = "MWAIT 0x00",
528 		.flags = MWAIT2flg(0x00),
529 		.exit_latency = 1,
530 		.target_residency = 1,
531 		.enter = intel_idle,
532 		.enter_s2idle = intel_idle_s2idle, },
533 	{
534 		.name = "C1E",
535 		.desc = "MWAIT 0x01",
536 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
537 		.exit_latency = 10,
538 		.target_residency = 250,
539 		.enter = intel_idle,
540 		.enter_s2idle = intel_idle_s2idle, },
541 	{
542 		.name = "C3",
543 		.desc = "MWAIT 0x10",
544 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
545 		.exit_latency = 59,
546 		.target_residency = 300,
547 		.enter = intel_idle,
548 		.enter_s2idle = intel_idle_s2idle, },
549 	{
550 		.name = "C6",
551 		.desc = "MWAIT 0x20",
552 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
553 		.exit_latency = 84,
554 		.target_residency = 400,
555 		.enter = intel_idle,
556 		.enter_s2idle = intel_idle_s2idle, },
557 	{
558 		.enter = NULL }
559 };
560 
561 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
562 	{
563 		.name = "C1",
564 		.desc = "MWAIT 0x00",
565 		.flags = MWAIT2flg(0x00),
566 		.exit_latency = 1,
567 		.target_residency = 1,
568 		.enter = intel_idle,
569 		.enter_s2idle = intel_idle_s2idle, },
570 	{
571 		.name = "C1E",
572 		.desc = "MWAIT 0x01",
573 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
574 		.exit_latency = 10,
575 		.target_residency = 500,
576 		.enter = intel_idle,
577 		.enter_s2idle = intel_idle_s2idle, },
578 	{
579 		.name = "C3",
580 		.desc = "MWAIT 0x10",
581 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
582 		.exit_latency = 59,
583 		.target_residency = 600,
584 		.enter = intel_idle,
585 		.enter_s2idle = intel_idle_s2idle, },
586 	{
587 		.name = "C6",
588 		.desc = "MWAIT 0x20",
589 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
590 		.exit_latency = 88,
591 		.target_residency = 700,
592 		.enter = intel_idle,
593 		.enter_s2idle = intel_idle_s2idle, },
594 	{
595 		.enter = NULL }
596 };
597 
598 static struct cpuidle_state hsw_cstates[] __initdata = {
599 	{
600 		.name = "C1",
601 		.desc = "MWAIT 0x00",
602 		.flags = MWAIT2flg(0x00),
603 		.exit_latency = 2,
604 		.target_residency = 2,
605 		.enter = intel_idle,
606 		.enter_s2idle = intel_idle_s2idle, },
607 	{
608 		.name = "C1E",
609 		.desc = "MWAIT 0x01",
610 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
611 		.exit_latency = 10,
612 		.target_residency = 20,
613 		.enter = intel_idle,
614 		.enter_s2idle = intel_idle_s2idle, },
615 	{
616 		.name = "C3",
617 		.desc = "MWAIT 0x10",
618 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
619 		.exit_latency = 33,
620 		.target_residency = 100,
621 		.enter = intel_idle,
622 		.enter_s2idle = intel_idle_s2idle, },
623 	{
624 		.name = "C6",
625 		.desc = "MWAIT 0x20",
626 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
627 		.exit_latency = 133,
628 		.target_residency = 400,
629 		.enter = intel_idle,
630 		.enter_s2idle = intel_idle_s2idle, },
631 	{
632 		.name = "C7s",
633 		.desc = "MWAIT 0x32",
634 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
635 		.exit_latency = 166,
636 		.target_residency = 500,
637 		.enter = intel_idle,
638 		.enter_s2idle = intel_idle_s2idle, },
639 	{
640 		.name = "C8",
641 		.desc = "MWAIT 0x40",
642 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
643 		.exit_latency = 300,
644 		.target_residency = 900,
645 		.enter = intel_idle,
646 		.enter_s2idle = intel_idle_s2idle, },
647 	{
648 		.name = "C9",
649 		.desc = "MWAIT 0x50",
650 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
651 		.exit_latency = 600,
652 		.target_residency = 1800,
653 		.enter = intel_idle,
654 		.enter_s2idle = intel_idle_s2idle, },
655 	{
656 		.name = "C10",
657 		.desc = "MWAIT 0x60",
658 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
659 		.exit_latency = 2600,
660 		.target_residency = 7700,
661 		.enter = intel_idle,
662 		.enter_s2idle = intel_idle_s2idle, },
663 	{
664 		.enter = NULL }
665 };
666 static struct cpuidle_state bdw_cstates[] __initdata = {
667 	{
668 		.name = "C1",
669 		.desc = "MWAIT 0x00",
670 		.flags = MWAIT2flg(0x00),
671 		.exit_latency = 2,
672 		.target_residency = 2,
673 		.enter = intel_idle,
674 		.enter_s2idle = intel_idle_s2idle, },
675 	{
676 		.name = "C1E",
677 		.desc = "MWAIT 0x01",
678 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
679 		.exit_latency = 10,
680 		.target_residency = 20,
681 		.enter = intel_idle,
682 		.enter_s2idle = intel_idle_s2idle, },
683 	{
684 		.name = "C3",
685 		.desc = "MWAIT 0x10",
686 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
687 		.exit_latency = 40,
688 		.target_residency = 100,
689 		.enter = intel_idle,
690 		.enter_s2idle = intel_idle_s2idle, },
691 	{
692 		.name = "C6",
693 		.desc = "MWAIT 0x20",
694 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
695 		.exit_latency = 133,
696 		.target_residency = 400,
697 		.enter = intel_idle,
698 		.enter_s2idle = intel_idle_s2idle, },
699 	{
700 		.name = "C7s",
701 		.desc = "MWAIT 0x32",
702 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
703 		.exit_latency = 166,
704 		.target_residency = 500,
705 		.enter = intel_idle,
706 		.enter_s2idle = intel_idle_s2idle, },
707 	{
708 		.name = "C8",
709 		.desc = "MWAIT 0x40",
710 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
711 		.exit_latency = 300,
712 		.target_residency = 900,
713 		.enter = intel_idle,
714 		.enter_s2idle = intel_idle_s2idle, },
715 	{
716 		.name = "C9",
717 		.desc = "MWAIT 0x50",
718 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
719 		.exit_latency = 600,
720 		.target_residency = 1800,
721 		.enter = intel_idle,
722 		.enter_s2idle = intel_idle_s2idle, },
723 	{
724 		.name = "C10",
725 		.desc = "MWAIT 0x60",
726 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
727 		.exit_latency = 2600,
728 		.target_residency = 7700,
729 		.enter = intel_idle,
730 		.enter_s2idle = intel_idle_s2idle, },
731 	{
732 		.enter = NULL }
733 };
734 
735 static struct cpuidle_state skl_cstates[] __initdata = {
736 	{
737 		.name = "C1",
738 		.desc = "MWAIT 0x00",
739 		.flags = MWAIT2flg(0x00),
740 		.exit_latency = 2,
741 		.target_residency = 2,
742 		.enter = intel_idle,
743 		.enter_s2idle = intel_idle_s2idle, },
744 	{
745 		.name = "C1E",
746 		.desc = "MWAIT 0x01",
747 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
748 		.exit_latency = 10,
749 		.target_residency = 20,
750 		.enter = intel_idle,
751 		.enter_s2idle = intel_idle_s2idle, },
752 	{
753 		.name = "C3",
754 		.desc = "MWAIT 0x10",
755 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
756 		.exit_latency = 70,
757 		.target_residency = 100,
758 		.enter = intel_idle,
759 		.enter_s2idle = intel_idle_s2idle, },
760 	{
761 		.name = "C6",
762 		.desc = "MWAIT 0x20",
763 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
764 		.exit_latency = 85,
765 		.target_residency = 200,
766 		.enter = intel_idle,
767 		.enter_s2idle = intel_idle_s2idle, },
768 	{
769 		.name = "C7s",
770 		.desc = "MWAIT 0x33",
771 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
772 		.exit_latency = 124,
773 		.target_residency = 800,
774 		.enter = intel_idle,
775 		.enter_s2idle = intel_idle_s2idle, },
776 	{
777 		.name = "C8",
778 		.desc = "MWAIT 0x40",
779 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
780 		.exit_latency = 200,
781 		.target_residency = 800,
782 		.enter = intel_idle,
783 		.enter_s2idle = intel_idle_s2idle, },
784 	{
785 		.name = "C9",
786 		.desc = "MWAIT 0x50",
787 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
788 		.exit_latency = 480,
789 		.target_residency = 5000,
790 		.enter = intel_idle,
791 		.enter_s2idle = intel_idle_s2idle, },
792 	{
793 		.name = "C10",
794 		.desc = "MWAIT 0x60",
795 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
796 		.exit_latency = 890,
797 		.target_residency = 5000,
798 		.enter = intel_idle,
799 		.enter_s2idle = intel_idle_s2idle, },
800 	{
801 		.enter = NULL }
802 };
803 
804 static struct cpuidle_state skx_cstates[] __initdata = {
805 	{
806 		.name = "C1",
807 		.desc = "MWAIT 0x00",
808 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
809 		.exit_latency = 2,
810 		.target_residency = 2,
811 		.enter = intel_idle,
812 		.enter_s2idle = intel_idle_s2idle, },
813 	{
814 		.name = "C1E",
815 		.desc = "MWAIT 0x01",
816 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
817 		.exit_latency = 10,
818 		.target_residency = 20,
819 		.enter = intel_idle,
820 		.enter_s2idle = intel_idle_s2idle, },
821 	{
822 		.name = "C6",
823 		.desc = "MWAIT 0x20",
824 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
825 		.exit_latency = 133,
826 		.target_residency = 600,
827 		.enter = intel_idle,
828 		.enter_s2idle = intel_idle_s2idle, },
829 	{
830 		.enter = NULL }
831 };
832 
833 static struct cpuidle_state icx_cstates[] __initdata = {
834 	{
835 		.name = "C1",
836 		.desc = "MWAIT 0x00",
837 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
838 		.exit_latency = 1,
839 		.target_residency = 1,
840 		.enter = intel_idle,
841 		.enter_s2idle = intel_idle_s2idle, },
842 	{
843 		.name = "C1E",
844 		.desc = "MWAIT 0x01",
845 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
846 		.exit_latency = 4,
847 		.target_residency = 4,
848 		.enter = intel_idle,
849 		.enter_s2idle = intel_idle_s2idle, },
850 	{
851 		.name = "C6",
852 		.desc = "MWAIT 0x20",
853 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
854 		.exit_latency = 170,
855 		.target_residency = 600,
856 		.enter = intel_idle,
857 		.enter_s2idle = intel_idle_s2idle, },
858 	{
859 		.enter = NULL }
860 };
861 
862 /*
863  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
864  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
865  * But in this case there is effectively no C1, because C1 requests are
866  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
867  * and C1E requests end up with C1, so there is effectively no C1E.
868  *
869  * By default we enable C1E and disable C1 by marking it with
870  * 'CPUIDLE_FLAG_UNUSABLE'.
871  */
872 static struct cpuidle_state adl_cstates[] __initdata = {
873 	{
874 		.name = "C1",
875 		.desc = "MWAIT 0x00",
876 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
877 		.exit_latency = 1,
878 		.target_residency = 1,
879 		.enter = intel_idle,
880 		.enter_s2idle = intel_idle_s2idle, },
881 	{
882 		.name = "C1E",
883 		.desc = "MWAIT 0x01",
884 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
885 		.exit_latency = 2,
886 		.target_residency = 4,
887 		.enter = intel_idle,
888 		.enter_s2idle = intel_idle_s2idle, },
889 	{
890 		.name = "C6",
891 		.desc = "MWAIT 0x20",
892 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
893 		.exit_latency = 220,
894 		.target_residency = 600,
895 		.enter = intel_idle,
896 		.enter_s2idle = intel_idle_s2idle, },
897 	{
898 		.name = "C8",
899 		.desc = "MWAIT 0x40",
900 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
901 		.exit_latency = 280,
902 		.target_residency = 800,
903 		.enter = intel_idle,
904 		.enter_s2idle = intel_idle_s2idle, },
905 	{
906 		.name = "C10",
907 		.desc = "MWAIT 0x60",
908 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
909 		.exit_latency = 680,
910 		.target_residency = 2000,
911 		.enter = intel_idle,
912 		.enter_s2idle = intel_idle_s2idle, },
913 	{
914 		.enter = NULL }
915 };
916 
917 static struct cpuidle_state adl_l_cstates[] __initdata = {
918 	{
919 		.name = "C1",
920 		.desc = "MWAIT 0x00",
921 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
922 		.exit_latency = 1,
923 		.target_residency = 1,
924 		.enter = intel_idle,
925 		.enter_s2idle = intel_idle_s2idle, },
926 	{
927 		.name = "C1E",
928 		.desc = "MWAIT 0x01",
929 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
930 		.exit_latency = 2,
931 		.target_residency = 4,
932 		.enter = intel_idle,
933 		.enter_s2idle = intel_idle_s2idle, },
934 	{
935 		.name = "C6",
936 		.desc = "MWAIT 0x20",
937 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
938 		.exit_latency = 170,
939 		.target_residency = 500,
940 		.enter = intel_idle,
941 		.enter_s2idle = intel_idle_s2idle, },
942 	{
943 		.name = "C8",
944 		.desc = "MWAIT 0x40",
945 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
946 		.exit_latency = 200,
947 		.target_residency = 600,
948 		.enter = intel_idle,
949 		.enter_s2idle = intel_idle_s2idle, },
950 	{
951 		.name = "C10",
952 		.desc = "MWAIT 0x60",
953 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
954 		.exit_latency = 230,
955 		.target_residency = 700,
956 		.enter = intel_idle,
957 		.enter_s2idle = intel_idle_s2idle, },
958 	{
959 		.enter = NULL }
960 };
961 
962 static struct cpuidle_state mtl_l_cstates[] __initdata = {
963 	{
964 		.name = "C1E",
965 		.desc = "MWAIT 0x01",
966 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
967 		.exit_latency = 1,
968 		.target_residency = 1,
969 		.enter = intel_idle,
970 		.enter_s2idle = intel_idle_s2idle, },
971 	{
972 		.name = "C6",
973 		.desc = "MWAIT 0x20",
974 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
975 		.exit_latency = 140,
976 		.target_residency = 420,
977 		.enter = intel_idle,
978 		.enter_s2idle = intel_idle_s2idle, },
979 	{
980 		.name = "C10",
981 		.desc = "MWAIT 0x60",
982 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
983 		.exit_latency = 310,
984 		.target_residency = 930,
985 		.enter = intel_idle,
986 		.enter_s2idle = intel_idle_s2idle, },
987 	{
988 		.enter = NULL }
989 };
990 
991 static struct cpuidle_state ptl_cstates[] __initdata = {
992 	{
993 		.name = "C1",
994 		.desc = "MWAIT 0x00",
995 		.flags = MWAIT2flg(0x00),
996 		.exit_latency = 1,
997 		.target_residency = 1,
998 		.enter = &intel_idle,
999 		.enter_s2idle = intel_idle_s2idle, },
1000 	{
1001 		.name = "C1E",
1002 		.desc = "MWAIT 0x01",
1003 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1004 		.exit_latency = 10,
1005 		.target_residency = 10,
1006 		.enter = &intel_idle,
1007 		.enter_s2idle = intel_idle_s2idle, },
1008 	{
1009 		.name = "C6S",
1010 		.desc = "MWAIT 0x21",
1011 		.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED,
1012 		.exit_latency = 300,
1013 		.target_residency = 300,
1014 		.enter = &intel_idle,
1015 		.enter_s2idle = intel_idle_s2idle, },
1016 	{
1017 		.name = "C10",
1018 		.desc = "MWAIT 0x60",
1019 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1020 		.exit_latency = 370,
1021 		.target_residency = 2500,
1022 		.enter = &intel_idle,
1023 		.enter_s2idle = intel_idle_s2idle, },
1024 	{
1025 		.enter = NULL }
1026 };
1027 
1028 static struct cpuidle_state gmt_cstates[] __initdata = {
1029 	{
1030 		.name = "C1",
1031 		.desc = "MWAIT 0x00",
1032 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
1033 		.exit_latency = 1,
1034 		.target_residency = 1,
1035 		.enter = intel_idle,
1036 		.enter_s2idle = intel_idle_s2idle, },
1037 	{
1038 		.name = "C1E",
1039 		.desc = "MWAIT 0x01",
1040 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1041 		.exit_latency = 2,
1042 		.target_residency = 4,
1043 		.enter = intel_idle,
1044 		.enter_s2idle = intel_idle_s2idle, },
1045 	{
1046 		.name = "C6",
1047 		.desc = "MWAIT 0x20",
1048 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1049 		.exit_latency = 195,
1050 		.target_residency = 585,
1051 		.enter = intel_idle,
1052 		.enter_s2idle = intel_idle_s2idle, },
1053 	{
1054 		.name = "C8",
1055 		.desc = "MWAIT 0x40",
1056 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1057 		.exit_latency = 260,
1058 		.target_residency = 1040,
1059 		.enter = intel_idle,
1060 		.enter_s2idle = intel_idle_s2idle, },
1061 	{
1062 		.name = "C10",
1063 		.desc = "MWAIT 0x60",
1064 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1065 		.exit_latency = 660,
1066 		.target_residency = 1980,
1067 		.enter = intel_idle,
1068 		.enter_s2idle = intel_idle_s2idle, },
1069 	{
1070 		.enter = NULL }
1071 };
1072 
1073 static struct cpuidle_state spr_cstates[] __initdata = {
1074 	{
1075 		.name = "C1",
1076 		.desc = "MWAIT 0x00",
1077 		.flags = MWAIT2flg(0x00),
1078 		.exit_latency = 1,
1079 		.target_residency = 1,
1080 		.enter = intel_idle,
1081 		.enter_s2idle = intel_idle_s2idle, },
1082 	{
1083 		.name = "C1E",
1084 		.desc = "MWAIT 0x01",
1085 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1086 		.exit_latency = 2,
1087 		.target_residency = 4,
1088 		.enter = intel_idle,
1089 		.enter_s2idle = intel_idle_s2idle, },
1090 	{
1091 		.name = "C6",
1092 		.desc = "MWAIT 0x20",
1093 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1094 					   CPUIDLE_FLAG_INIT_XSTATE,
1095 		.exit_latency = 290,
1096 		.target_residency = 800,
1097 		.enter = intel_idle,
1098 		.enter_s2idle = intel_idle_s2idle, },
1099 	{
1100 		.enter = NULL }
1101 };
1102 
1103 static struct cpuidle_state gnr_cstates[] __initdata = {
1104 	{
1105 		.name = "C1",
1106 		.desc = "MWAIT 0x00",
1107 		.flags = MWAIT2flg(0x00),
1108 		.exit_latency = 1,
1109 		.target_residency = 1,
1110 		.enter = intel_idle,
1111 		.enter_s2idle = intel_idle_s2idle, },
1112 	{
1113 		.name = "C1E",
1114 		.desc = "MWAIT 0x01",
1115 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1116 		.exit_latency = 4,
1117 		.target_residency = 4,
1118 		.enter = intel_idle,
1119 		.enter_s2idle = intel_idle_s2idle, },
1120 	{
1121 		.name = "C6",
1122 		.desc = "MWAIT 0x20",
1123 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1124 					   CPUIDLE_FLAG_INIT_XSTATE |
1125 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1126 		.exit_latency = 170,
1127 		.target_residency = 650,
1128 		.enter = intel_idle,
1129 		.enter_s2idle = intel_idle_s2idle, },
1130 	{
1131 		.name = "C6P",
1132 		.desc = "MWAIT 0x21",
1133 		.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1134 					   CPUIDLE_FLAG_INIT_XSTATE |
1135 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1136 		.exit_latency = 210,
1137 		.target_residency = 1000,
1138 		.enter = intel_idle,
1139 		.enter_s2idle = intel_idle_s2idle, },
1140 	{
1141 		.enter = NULL }
1142 };
1143 
1144 static struct cpuidle_state gnrd_cstates[] __initdata = {
1145 	{
1146 		.name = "C1",
1147 		.desc = "MWAIT 0x00",
1148 		.flags = MWAIT2flg(0x00),
1149 		.exit_latency = 1,
1150 		.target_residency = 1,
1151 		.enter = intel_idle,
1152 		.enter_s2idle = intel_idle_s2idle, },
1153 	{
1154 		.name = "C1E",
1155 		.desc = "MWAIT 0x01",
1156 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1157 		.exit_latency = 4,
1158 		.target_residency = 4,
1159 		.enter = intel_idle,
1160 		.enter_s2idle = intel_idle_s2idle, },
1161 	{
1162 		.name = "C6",
1163 		.desc = "MWAIT 0x20",
1164 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
1165 					   CPUIDLE_FLAG_INIT_XSTATE |
1166 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1167 		.exit_latency = 220,
1168 		.target_residency = 650,
1169 		.enter = intel_idle,
1170 		.enter_s2idle = intel_idle_s2idle, },
1171 	{
1172 		.name = "C6P",
1173 		.desc = "MWAIT 0x21",
1174 		.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
1175 					   CPUIDLE_FLAG_INIT_XSTATE |
1176 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1177 		.exit_latency = 240,
1178 		.target_residency = 750,
1179 		.enter = intel_idle,
1180 		.enter_s2idle = intel_idle_s2idle, },
1181 	{
1182 		.enter = NULL }
1183 };
1184 
1185 static struct cpuidle_state atom_cstates[] __initdata = {
1186 	{
1187 		.name = "C1E",
1188 		.desc = "MWAIT 0x00",
1189 		.flags = MWAIT2flg(0x00),
1190 		.exit_latency = 10,
1191 		.target_residency = 20,
1192 		.enter = intel_idle,
1193 		.enter_s2idle = intel_idle_s2idle, },
1194 	{
1195 		.name = "C2",
1196 		.desc = "MWAIT 0x10",
1197 		.flags = MWAIT2flg(0x10),
1198 		.exit_latency = 20,
1199 		.target_residency = 80,
1200 		.enter = intel_idle,
1201 		.enter_s2idle = intel_idle_s2idle, },
1202 	{
1203 		.name = "C4",
1204 		.desc = "MWAIT 0x30",
1205 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1206 		.exit_latency = 100,
1207 		.target_residency = 400,
1208 		.enter = intel_idle,
1209 		.enter_s2idle = intel_idle_s2idle, },
1210 	{
1211 		.name = "C6",
1212 		.desc = "MWAIT 0x52",
1213 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1214 		.exit_latency = 140,
1215 		.target_residency = 560,
1216 		.enter = intel_idle,
1217 		.enter_s2idle = intel_idle_s2idle, },
1218 	{
1219 		.enter = NULL }
1220 };
1221 static struct cpuidle_state tangier_cstates[] __initdata = {
1222 	{
1223 		.name = "C1",
1224 		.desc = "MWAIT 0x00",
1225 		.flags = MWAIT2flg(0x00),
1226 		.exit_latency = 1,
1227 		.target_residency = 4,
1228 		.enter = intel_idle,
1229 		.enter_s2idle = intel_idle_s2idle, },
1230 	{
1231 		.name = "C4",
1232 		.desc = "MWAIT 0x30",
1233 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1234 		.exit_latency = 100,
1235 		.target_residency = 400,
1236 		.enter = intel_idle,
1237 		.enter_s2idle = intel_idle_s2idle, },
1238 	{
1239 		.name = "C6",
1240 		.desc = "MWAIT 0x52",
1241 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1242 		.exit_latency = 140,
1243 		.target_residency = 560,
1244 		.enter = intel_idle,
1245 		.enter_s2idle = intel_idle_s2idle, },
1246 	{
1247 		.name = "C7",
1248 		.desc = "MWAIT 0x60",
1249 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1250 		.exit_latency = 1200,
1251 		.target_residency = 4000,
1252 		.enter = intel_idle,
1253 		.enter_s2idle = intel_idle_s2idle, },
1254 	{
1255 		.name = "C9",
1256 		.desc = "MWAIT 0x64",
1257 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1258 		.exit_latency = 10000,
1259 		.target_residency = 20000,
1260 		.enter = intel_idle,
1261 		.enter_s2idle = intel_idle_s2idle, },
1262 	{
1263 		.enter = NULL }
1264 };
1265 static struct cpuidle_state avn_cstates[] __initdata = {
1266 	{
1267 		.name = "C1",
1268 		.desc = "MWAIT 0x00",
1269 		.flags = MWAIT2flg(0x00),
1270 		.exit_latency = 2,
1271 		.target_residency = 2,
1272 		.enter = intel_idle,
1273 		.enter_s2idle = intel_idle_s2idle, },
1274 	{
1275 		.name = "C6",
1276 		.desc = "MWAIT 0x51",
1277 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1278 		.exit_latency = 15,
1279 		.target_residency = 45,
1280 		.enter = intel_idle,
1281 		.enter_s2idle = intel_idle_s2idle, },
1282 	{
1283 		.enter = NULL }
1284 };
1285 static struct cpuidle_state knl_cstates[] __initdata = {
1286 	{
1287 		.name = "C1",
1288 		.desc = "MWAIT 0x00",
1289 		.flags = MWAIT2flg(0x00),
1290 		.exit_latency = 1,
1291 		.target_residency = 2,
1292 		.enter = intel_idle,
1293 		.enter_s2idle = intel_idle_s2idle },
1294 	{
1295 		.name = "C6",
1296 		.desc = "MWAIT 0x10",
1297 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1298 		.exit_latency = 120,
1299 		.target_residency = 500,
1300 		.enter = intel_idle,
1301 		.enter_s2idle = intel_idle_s2idle },
1302 	{
1303 		.enter = NULL }
1304 };
1305 
1306 static struct cpuidle_state bxt_cstates[] __initdata = {
1307 	{
1308 		.name = "C1",
1309 		.desc = "MWAIT 0x00",
1310 		.flags = MWAIT2flg(0x00),
1311 		.exit_latency = 2,
1312 		.target_residency = 2,
1313 		.enter = intel_idle,
1314 		.enter_s2idle = intel_idle_s2idle, },
1315 	{
1316 		.name = "C1E",
1317 		.desc = "MWAIT 0x01",
1318 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1319 		.exit_latency = 10,
1320 		.target_residency = 20,
1321 		.enter = intel_idle,
1322 		.enter_s2idle = intel_idle_s2idle, },
1323 	{
1324 		.name = "C6",
1325 		.desc = "MWAIT 0x20",
1326 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1327 		.exit_latency = 133,
1328 		.target_residency = 133,
1329 		.enter = intel_idle,
1330 		.enter_s2idle = intel_idle_s2idle, },
1331 	{
1332 		.name = "C7s",
1333 		.desc = "MWAIT 0x31",
1334 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1335 		.exit_latency = 155,
1336 		.target_residency = 155,
1337 		.enter = intel_idle,
1338 		.enter_s2idle = intel_idle_s2idle, },
1339 	{
1340 		.name = "C8",
1341 		.desc = "MWAIT 0x40",
1342 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1343 		.exit_latency = 1000,
1344 		.target_residency = 1000,
1345 		.enter = intel_idle,
1346 		.enter_s2idle = intel_idle_s2idle, },
1347 	{
1348 		.name = "C9",
1349 		.desc = "MWAIT 0x50",
1350 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1351 		.exit_latency = 2000,
1352 		.target_residency = 2000,
1353 		.enter = intel_idle,
1354 		.enter_s2idle = intel_idle_s2idle, },
1355 	{
1356 		.name = "C10",
1357 		.desc = "MWAIT 0x60",
1358 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1359 		.exit_latency = 10000,
1360 		.target_residency = 10000,
1361 		.enter = intel_idle,
1362 		.enter_s2idle = intel_idle_s2idle, },
1363 	{
1364 		.enter = NULL }
1365 };
1366 
1367 static struct cpuidle_state dnv_cstates[] __initdata = {
1368 	{
1369 		.name = "C1",
1370 		.desc = "MWAIT 0x00",
1371 		.flags = MWAIT2flg(0x00),
1372 		.exit_latency = 2,
1373 		.target_residency = 2,
1374 		.enter = intel_idle,
1375 		.enter_s2idle = intel_idle_s2idle, },
1376 	{
1377 		.name = "C1E",
1378 		.desc = "MWAIT 0x01",
1379 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1380 		.exit_latency = 10,
1381 		.target_residency = 20,
1382 		.enter = intel_idle,
1383 		.enter_s2idle = intel_idle_s2idle, },
1384 	{
1385 		.name = "C6",
1386 		.desc = "MWAIT 0x20",
1387 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1388 		.exit_latency = 50,
1389 		.target_residency = 500,
1390 		.enter = intel_idle,
1391 		.enter_s2idle = intel_idle_s2idle, },
1392 	{
1393 		.enter = NULL }
1394 };
1395 
1396 /*
1397  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1398  * C6, and this is indicated in the CPUID mwait leaf.
1399  */
1400 static struct cpuidle_state snr_cstates[] __initdata = {
1401 	{
1402 		.name = "C1",
1403 		.desc = "MWAIT 0x00",
1404 		.flags = MWAIT2flg(0x00),
1405 		.exit_latency = 2,
1406 		.target_residency = 2,
1407 		.enter = intel_idle,
1408 		.enter_s2idle = intel_idle_s2idle, },
1409 	{
1410 		.name = "C1E",
1411 		.desc = "MWAIT 0x01",
1412 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1413 		.exit_latency = 15,
1414 		.target_residency = 25,
1415 		.enter = intel_idle,
1416 		.enter_s2idle = intel_idle_s2idle, },
1417 	{
1418 		.name = "C6",
1419 		.desc = "MWAIT 0x20",
1420 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1421 		.exit_latency = 130,
1422 		.target_residency = 500,
1423 		.enter = intel_idle,
1424 		.enter_s2idle = intel_idle_s2idle, },
1425 	{
1426 		.enter = NULL }
1427 };
1428 
1429 static struct cpuidle_state grr_cstates[] __initdata = {
1430 	{
1431 		.name = "C1",
1432 		.desc = "MWAIT 0x00",
1433 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1434 		.exit_latency = 1,
1435 		.target_residency = 1,
1436 		.enter = intel_idle,
1437 		.enter_s2idle = intel_idle_s2idle, },
1438 	{
1439 		.name = "C1E",
1440 		.desc = "MWAIT 0x01",
1441 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1442 		.exit_latency = 2,
1443 		.target_residency = 10,
1444 		.enter = intel_idle,
1445 		.enter_s2idle = intel_idle_s2idle, },
1446 	{
1447 		.name = "C6S",
1448 		.desc = "MWAIT 0x22",
1449 		.flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED,
1450 		.exit_latency = 140,
1451 		.target_residency = 500,
1452 		.enter = intel_idle,
1453 		.enter_s2idle = intel_idle_s2idle, },
1454 	{
1455 		.enter = NULL }
1456 };
1457 
1458 static struct cpuidle_state srf_cstates[] __initdata = {
1459 	{
1460 		.name = "C1",
1461 		.desc = "MWAIT 0x00",
1462 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1463 		.exit_latency = 1,
1464 		.target_residency = 1,
1465 		.enter = intel_idle,
1466 		.enter_s2idle = intel_idle_s2idle, },
1467 	{
1468 		.name = "C1E",
1469 		.desc = "MWAIT 0x01",
1470 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1471 		.exit_latency = 2,
1472 		.target_residency = 10,
1473 		.enter = intel_idle,
1474 		.enter_s2idle = intel_idle_s2idle, },
1475 	{
1476 		.name = "C6S",
1477 		.desc = "MWAIT 0x22",
1478 		.flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED |
1479 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1480 		.exit_latency = 270,
1481 		.target_residency = 700,
1482 		.enter = intel_idle,
1483 		.enter_s2idle = intel_idle_s2idle, },
1484 	{
1485 		.name = "C6SP",
1486 		.desc = "MWAIT 0x23",
1487 		.flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED |
1488 					   CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
1489 		.exit_latency = 310,
1490 		.target_residency = 900,
1491 		.enter = intel_idle,
1492 		.enter_s2idle = intel_idle_s2idle, },
1493 	{
1494 		.enter = NULL }
1495 };
1496 
1497 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1498 	.state_table = nehalem_cstates,
1499 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1500 	.disable_promotion_to_c1e = true,
1501 };
1502 
1503 static const struct idle_cpu idle_cpu_nhx __initconst = {
1504 	.state_table = nehalem_cstates,
1505 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1506 	.disable_promotion_to_c1e = true,
1507 	.use_acpi = true,
1508 };
1509 
1510 static const struct idle_cpu idle_cpu_atom __initconst = {
1511 	.state_table = atom_cstates,
1512 };
1513 
1514 static const struct idle_cpu idle_cpu_tangier __initconst = {
1515 	.state_table = tangier_cstates,
1516 };
1517 
1518 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1519 	.state_table = atom_cstates,
1520 	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1521 };
1522 
1523 static const struct idle_cpu idle_cpu_snb __initconst = {
1524 	.state_table = snb_cstates,
1525 	.disable_promotion_to_c1e = true,
1526 };
1527 
1528 static const struct idle_cpu idle_cpu_snx __initconst = {
1529 	.state_table = snb_cstates,
1530 	.disable_promotion_to_c1e = true,
1531 	.use_acpi = true,
1532 };
1533 
1534 static const struct idle_cpu idle_cpu_byt __initconst = {
1535 	.state_table = byt_cstates,
1536 	.disable_promotion_to_c1e = true,
1537 };
1538 
1539 static const struct idle_cpu idle_cpu_cht __initconst = {
1540 	.state_table = cht_cstates,
1541 	.disable_promotion_to_c1e = true,
1542 };
1543 
1544 static const struct idle_cpu idle_cpu_ivb __initconst = {
1545 	.state_table = ivb_cstates,
1546 	.disable_promotion_to_c1e = true,
1547 };
1548 
1549 static const struct idle_cpu idle_cpu_ivt __initconst = {
1550 	.state_table = ivt_cstates,
1551 	.disable_promotion_to_c1e = true,
1552 	.use_acpi = true,
1553 };
1554 
1555 static const struct idle_cpu idle_cpu_hsw __initconst = {
1556 	.state_table = hsw_cstates,
1557 	.disable_promotion_to_c1e = true,
1558 };
1559 
1560 static const struct idle_cpu idle_cpu_hsx __initconst = {
1561 	.state_table = hsw_cstates,
1562 	.disable_promotion_to_c1e = true,
1563 	.use_acpi = true,
1564 };
1565 
1566 static const struct idle_cpu idle_cpu_bdw __initconst = {
1567 	.state_table = bdw_cstates,
1568 	.disable_promotion_to_c1e = true,
1569 };
1570 
1571 static const struct idle_cpu idle_cpu_bdx __initconst = {
1572 	.state_table = bdw_cstates,
1573 	.disable_promotion_to_c1e = true,
1574 	.use_acpi = true,
1575 };
1576 
1577 static const struct idle_cpu idle_cpu_skl __initconst = {
1578 	.state_table = skl_cstates,
1579 	.disable_promotion_to_c1e = true,
1580 };
1581 
1582 static const struct idle_cpu idle_cpu_skx __initconst = {
1583 	.state_table = skx_cstates,
1584 	.disable_promotion_to_c1e = true,
1585 	.use_acpi = true,
1586 };
1587 
1588 static const struct idle_cpu idle_cpu_icx __initconst = {
1589 	.state_table = icx_cstates,
1590 	.disable_promotion_to_c1e = true,
1591 	.use_acpi = true,
1592 };
1593 
1594 static const struct idle_cpu idle_cpu_adl __initconst = {
1595 	.state_table = adl_cstates,
1596 };
1597 
1598 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1599 	.state_table = adl_l_cstates,
1600 };
1601 
1602 static const struct idle_cpu idle_cpu_mtl_l __initconst = {
1603 	.state_table = mtl_l_cstates,
1604 };
1605 
1606 static const struct idle_cpu idle_cpu_ptl __initconst = {
1607 	.state_table = ptl_cstates,
1608 };
1609 
1610 static const struct idle_cpu idle_cpu_gmt __initconst = {
1611 	.state_table = gmt_cstates,
1612 };
1613 
1614 static const struct idle_cpu idle_cpu_spr __initconst = {
1615 	.state_table = spr_cstates,
1616 	.disable_promotion_to_c1e = true,
1617 	.c1_demotion_supported = true,
1618 	.use_acpi = true,
1619 };
1620 
1621 static const struct idle_cpu idle_cpu_gnr __initconst = {
1622 	.state_table = gnr_cstates,
1623 	.disable_promotion_to_c1e = true,
1624 	.c1_demotion_supported = true,
1625 	.use_acpi = true,
1626 };
1627 
1628 static const struct idle_cpu idle_cpu_gnrd __initconst = {
1629 	.state_table = gnrd_cstates,
1630 	.disable_promotion_to_c1e = true,
1631 	.c1_demotion_supported = true,
1632 	.use_acpi = true,
1633 };
1634 
1635 static const struct idle_cpu idle_cpu_avn __initconst = {
1636 	.state_table = avn_cstates,
1637 	.disable_promotion_to_c1e = true,
1638 	.use_acpi = true,
1639 };
1640 
1641 static const struct idle_cpu idle_cpu_knl __initconst = {
1642 	.state_table = knl_cstates,
1643 	.use_acpi = true,
1644 };
1645 
1646 static const struct idle_cpu idle_cpu_bxt __initconst = {
1647 	.state_table = bxt_cstates,
1648 	.disable_promotion_to_c1e = true,
1649 };
1650 
1651 static const struct idle_cpu idle_cpu_dnv __initconst = {
1652 	.state_table = dnv_cstates,
1653 	.disable_promotion_to_c1e = true,
1654 	.use_acpi = true,
1655 };
1656 
1657 static const struct idle_cpu idle_cpu_tmt __initconst = {
1658 	.disable_promotion_to_c1e = true,
1659 };
1660 
1661 static const struct idle_cpu idle_cpu_snr __initconst = {
1662 	.state_table = snr_cstates,
1663 	.disable_promotion_to_c1e = true,
1664 	.use_acpi = true,
1665 };
1666 
1667 static const struct idle_cpu idle_cpu_grr __initconst = {
1668 	.state_table = grr_cstates,
1669 	.disable_promotion_to_c1e = true,
1670 	.c1_demotion_supported = true,
1671 	.use_acpi = true,
1672 };
1673 
1674 static const struct idle_cpu idle_cpu_srf __initconst = {
1675 	.state_table = srf_cstates,
1676 	.disable_promotion_to_c1e = true,
1677 	.c1_demotion_supported = true,
1678 	.use_acpi = true,
1679 };
1680 
1681 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1682 	X86_MATCH_VFM(INTEL_NEHALEM_EP,		&idle_cpu_nhx),
1683 	X86_MATCH_VFM(INTEL_NEHALEM,		&idle_cpu_nehalem),
1684 	X86_MATCH_VFM(INTEL_NEHALEM_G,		&idle_cpu_nehalem),
1685 	X86_MATCH_VFM(INTEL_WESTMERE,		&idle_cpu_nehalem),
1686 	X86_MATCH_VFM(INTEL_WESTMERE_EP,	&idle_cpu_nhx),
1687 	X86_MATCH_VFM(INTEL_NEHALEM_EX,		&idle_cpu_nhx),
1688 	X86_MATCH_VFM(INTEL_ATOM_BONNELL,	&idle_cpu_atom),
1689 	X86_MATCH_VFM(INTEL_ATOM_BONNELL_MID,	&idle_cpu_lincroft),
1690 	X86_MATCH_VFM(INTEL_WESTMERE_EX,	&idle_cpu_nhx),
1691 	X86_MATCH_VFM(INTEL_SANDYBRIDGE,	&idle_cpu_snb),
1692 	X86_MATCH_VFM(INTEL_SANDYBRIDGE_X,	&idle_cpu_snx),
1693 	X86_MATCH_VFM(INTEL_ATOM_SALTWELL,	&idle_cpu_atom),
1694 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT,	&idle_cpu_byt),
1695 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1696 	X86_MATCH_VFM(INTEL_ATOM_AIRMONT,	&idle_cpu_cht),
1697 	X86_MATCH_VFM(INTEL_IVYBRIDGE,		&idle_cpu_ivb),
1698 	X86_MATCH_VFM(INTEL_IVYBRIDGE_X,	&idle_cpu_ivt),
1699 	X86_MATCH_VFM(INTEL_HASWELL,		&idle_cpu_hsw),
1700 	X86_MATCH_VFM(INTEL_HASWELL_X,		&idle_cpu_hsx),
1701 	X86_MATCH_VFM(INTEL_HASWELL_L,		&idle_cpu_hsw),
1702 	X86_MATCH_VFM(INTEL_HASWELL_G,		&idle_cpu_hsw),
1703 	X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D,	&idle_cpu_avn),
1704 	X86_MATCH_VFM(INTEL_BROADWELL,		&idle_cpu_bdw),
1705 	X86_MATCH_VFM(INTEL_BROADWELL_G,	&idle_cpu_bdw),
1706 	X86_MATCH_VFM(INTEL_BROADWELL_X,	&idle_cpu_bdx),
1707 	X86_MATCH_VFM(INTEL_BROADWELL_D,	&idle_cpu_bdx),
1708 	X86_MATCH_VFM(INTEL_SKYLAKE_L,		&idle_cpu_skl),
1709 	X86_MATCH_VFM(INTEL_SKYLAKE,		&idle_cpu_skl),
1710 	X86_MATCH_VFM(INTEL_KABYLAKE_L,		&idle_cpu_skl),
1711 	X86_MATCH_VFM(INTEL_KABYLAKE,		&idle_cpu_skl),
1712 	X86_MATCH_VFM(INTEL_SKYLAKE_X,		&idle_cpu_skx),
1713 	X86_MATCH_VFM(INTEL_ICELAKE_X,		&idle_cpu_icx),
1714 	X86_MATCH_VFM(INTEL_ICELAKE_D,		&idle_cpu_icx),
1715 	X86_MATCH_VFM(INTEL_ALDERLAKE,		&idle_cpu_adl),
1716 	X86_MATCH_VFM(INTEL_ALDERLAKE_L,	&idle_cpu_adl_l),
1717 	X86_MATCH_VFM(INTEL_METEORLAKE_L,	&idle_cpu_mtl_l),
1718 	X86_MATCH_VFM(INTEL_PANTHERLAKE_L,	&idle_cpu_ptl),
1719 	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,	&idle_cpu_gmt),
1720 	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,	&idle_cpu_spr),
1721 	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,	&idle_cpu_spr),
1722 	X86_MATCH_VFM(INTEL_GRANITERAPIDS_X,	&idle_cpu_gnr),
1723 	X86_MATCH_VFM(INTEL_GRANITERAPIDS_D,	&idle_cpu_gnrd),
1724 	X86_MATCH_VFM(INTEL_XEON_PHI_KNL,	&idle_cpu_knl),
1725 	X86_MATCH_VFM(INTEL_XEON_PHI_KNM,	&idle_cpu_knl),
1726 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT,	&idle_cpu_bxt),
1727 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
1728 	X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D,	&idle_cpu_dnv),
1729 	X86_MATCH_VFM(INTEL_ATOM_TREMONT,       &idle_cpu_tmt),
1730 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_L,     &idle_cpu_tmt),
1731 	X86_MATCH_VFM(INTEL_ATOM_TREMONT_D,	&idle_cpu_snr),
1732 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT,	&idle_cpu_grr),
1733 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X,	&idle_cpu_srf),
1734 	X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X,	&idle_cpu_srf),
1735 	{}
1736 };
1737 
1738 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1739 	X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL),
1740 	{}
1741 };
1742 
1743 static bool __init intel_idle_max_cstate_reached(int cstate)
1744 {
1745 	if (cstate + 1 > max_cstate) {
1746 		pr_info("max_cstate %d reached\n", max_cstate);
1747 		return true;
1748 	}
1749 	return false;
1750 }
1751 
1752 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1753 {
1754 	unsigned long eax = flg2MWAIT(state->flags);
1755 
1756 	if (boot_cpu_has(X86_FEATURE_ARAT))
1757 		return false;
1758 
1759 	/*
1760 	 * Switch over to one-shot tick broadcast if the target C-state
1761 	 * is deeper than C1.
1762 	 */
1763 	return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1764 }
1765 
1766 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1767 #include <acpi/processor.h>
1768 
1769 static bool no_acpi __read_mostly;
1770 module_param(no_acpi, bool, 0444);
1771 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1772 
1773 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1774 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1775 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1776 
1777 static bool no_native __read_mostly; /* No effect if no_acpi is set. */
1778 module_param_named(no_native, no_native, bool, 0444);
1779 MODULE_PARM_DESC(no_native, "Ignore cpu specific (native) idle states in lieu of ACPI idle states");
1780 
1781 static struct acpi_processor_power acpi_state_table __initdata;
1782 
1783 /**
1784  * intel_idle_cst_usable - Check if the _CST information can be used.
1785  *
1786  * Check if all of the C-states listed by _CST in the max_cstate range are
1787  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1788  */
1789 static bool __init intel_idle_cst_usable(void)
1790 {
1791 	int cstate, limit;
1792 
1793 	limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1794 		      acpi_state_table.count);
1795 
1796 	for (cstate = 1; cstate < limit; cstate++) {
1797 		struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1798 
1799 		if (cx->entry_method != ACPI_CSTATE_FFH)
1800 			return false;
1801 	}
1802 
1803 	return true;
1804 }
1805 
1806 static bool __init intel_idle_acpi_cst_extract(void)
1807 {
1808 	unsigned int cpu;
1809 
1810 	if (no_acpi) {
1811 		pr_debug("Not allowed to use ACPI _CST\n");
1812 		return false;
1813 	}
1814 
1815 	for_each_possible_cpu(cpu) {
1816 		struct acpi_processor *pr = per_cpu(processors, cpu);
1817 
1818 		if (!pr)
1819 			continue;
1820 
1821 		if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1822 			continue;
1823 
1824 		acpi_state_table.count++;
1825 
1826 		if (!intel_idle_cst_usable())
1827 			continue;
1828 
1829 		if (!acpi_processor_claim_cst_control())
1830 			break;
1831 
1832 		return true;
1833 	}
1834 
1835 	acpi_state_table.count = 0;
1836 	pr_debug("ACPI _CST not found or not usable\n");
1837 	return false;
1838 }
1839 
1840 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1841 {
1842 	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1843 
1844 	/*
1845 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1846 	 * the interesting states are ACPI_CSTATE_FFH.
1847 	 */
1848 	for (cstate = 1; cstate < limit; cstate++) {
1849 		struct acpi_processor_cx *cx;
1850 		struct cpuidle_state *state;
1851 
1852 		if (intel_idle_max_cstate_reached(cstate - 1))
1853 			break;
1854 
1855 		cx = &acpi_state_table.states[cstate];
1856 
1857 		state = &drv->states[drv->state_count++];
1858 
1859 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1860 		strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1861 		state->exit_latency = cx->latency;
1862 		/*
1863 		 * For C1-type C-states use the same number for both the exit
1864 		 * latency and target residency, because that is the case for
1865 		 * C1 in the majority of the static C-states tables above.
1866 		 * For the other types of C-states, however, set the target
1867 		 * residency to 3 times the exit latency which should lead to
1868 		 * a reasonable balance between energy-efficiency and
1869 		 * performance in the majority of interesting cases.
1870 		 */
1871 		state->target_residency = cx->latency;
1872 		if (cx->type > ACPI_STATE_C1)
1873 			state->target_residency *= 3;
1874 
1875 		state->flags = MWAIT2flg(cx->address);
1876 		if (cx->type > ACPI_STATE_C2)
1877 			state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1878 
1879 		if (disabled_states_mask & BIT(cstate))
1880 			state->flags |= CPUIDLE_FLAG_OFF;
1881 
1882 		if (intel_idle_state_needs_timer_stop(state))
1883 			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1884 
1885 		if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1886 			mark_tsc_unstable("TSC halts in idle");
1887 
1888 		state->enter = intel_idle;
1889 		state->enter_dead = intel_idle_enter_dead;
1890 		state->enter_s2idle = intel_idle_s2idle;
1891 	}
1892 }
1893 
1894 static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1895 {
1896 	int cstate, limit;
1897 
1898 	/*
1899 	 * If there are no _CST C-states, do not disable any C-states by
1900 	 * default.
1901 	 */
1902 	if (!acpi_state_table.count)
1903 		return false;
1904 
1905 	limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1906 	/*
1907 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1908 	 * the interesting states are ACPI_CSTATE_FFH.
1909 	 */
1910 	for (cstate = 1; cstate < limit; cstate++) {
1911 		u32 acpi_hint = acpi_state_table.states[cstate].address;
1912 		u32 table_hint = mwait_hint;
1913 
1914 		if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) {
1915 			acpi_hint &= ~MWAIT_SUBSTATE_MASK;
1916 			table_hint &= ~MWAIT_SUBSTATE_MASK;
1917 		}
1918 
1919 		if (acpi_hint == table_hint)
1920 			return false;
1921 	}
1922 	return true;
1923 }
1924 
1925 static inline bool ignore_native(void)
1926 {
1927 	return no_native && !no_acpi;
1928 }
1929 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1930 #define force_use_acpi	(false)
1931 
1932 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1933 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1934 static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
1935 {
1936 	return false;
1937 }
1938 static inline bool ignore_native(void) { return false; }
1939 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1940 
1941 /**
1942  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1943  *
1944  * Tune IVT multi-socket targets.
1945  * Assumption: num_sockets == (max_package_num + 1).
1946  */
1947 static void __init ivt_idle_state_table_update(void)
1948 {
1949 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1950 	int cpu, package_num, num_sockets = 1;
1951 
1952 	for_each_online_cpu(cpu) {
1953 		package_num = topology_physical_package_id(cpu);
1954 		if (package_num + 1 > num_sockets) {
1955 			num_sockets = package_num + 1;
1956 
1957 			if (num_sockets > 4) {
1958 				cpuidle_state_table = ivt_cstates_8s;
1959 				return;
1960 			}
1961 		}
1962 	}
1963 
1964 	if (num_sockets > 2)
1965 		cpuidle_state_table = ivt_cstates_4s;
1966 
1967 	/* else, 1 and 2 socket systems use default ivt_cstates */
1968 }
1969 
1970 /**
1971  * irtl_2_usec - IRTL to microseconds conversion.
1972  * @irtl: IRTL MSR value.
1973  *
1974  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1975  */
1976 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1977 {
1978 	static const unsigned int irtl_ns_units[] __initconst = {
1979 		1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1980 	};
1981 	unsigned long long ns;
1982 
1983 	if (!irtl)
1984 		return 0;
1985 
1986 	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1987 
1988 	return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1989 }
1990 
1991 /**
1992  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1993  *
1994  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1995  * definitive maximum latency and use the same value for target_residency.
1996  */
1997 static void __init bxt_idle_state_table_update(void)
1998 {
1999 	unsigned long long msr;
2000 	unsigned int usec;
2001 
2002 	rdmsrq(MSR_PKGC6_IRTL, msr);
2003 	usec = irtl_2_usec(msr);
2004 	if (usec) {
2005 		bxt_cstates[2].exit_latency = usec;
2006 		bxt_cstates[2].target_residency = usec;
2007 	}
2008 
2009 	rdmsrq(MSR_PKGC7_IRTL, msr);
2010 	usec = irtl_2_usec(msr);
2011 	if (usec) {
2012 		bxt_cstates[3].exit_latency = usec;
2013 		bxt_cstates[3].target_residency = usec;
2014 	}
2015 
2016 	rdmsrq(MSR_PKGC8_IRTL, msr);
2017 	usec = irtl_2_usec(msr);
2018 	if (usec) {
2019 		bxt_cstates[4].exit_latency = usec;
2020 		bxt_cstates[4].target_residency = usec;
2021 	}
2022 
2023 	rdmsrq(MSR_PKGC9_IRTL, msr);
2024 	usec = irtl_2_usec(msr);
2025 	if (usec) {
2026 		bxt_cstates[5].exit_latency = usec;
2027 		bxt_cstates[5].target_residency = usec;
2028 	}
2029 
2030 	rdmsrq(MSR_PKGC10_IRTL, msr);
2031 	usec = irtl_2_usec(msr);
2032 	if (usec) {
2033 		bxt_cstates[6].exit_latency = usec;
2034 		bxt_cstates[6].target_residency = usec;
2035 	}
2036 
2037 }
2038 
2039 /**
2040  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
2041  *
2042  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
2043  */
2044 static void __init sklh_idle_state_table_update(void)
2045 {
2046 	unsigned long long msr;
2047 	unsigned int eax, ebx, ecx, edx;
2048 
2049 
2050 	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
2051 	if (max_cstate <= 7)
2052 		return;
2053 
2054 	/* if PC10 not present in CPUID.MWAIT.EDX */
2055 	if ((mwait_substates & (0xF << 28)) == 0)
2056 		return;
2057 
2058 	rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2059 
2060 	/* PC10 is not enabled in PKG C-state limit */
2061 	if ((msr & 0xF) != 8)
2062 		return;
2063 
2064 	ecx = 0;
2065 	cpuid(7, &eax, &ebx, &ecx, &edx);
2066 
2067 	/* if SGX is present */
2068 	if (ebx & (1 << 2)) {
2069 
2070 		rdmsrq(MSR_IA32_FEAT_CTL, msr);
2071 
2072 		/* if SGX is enabled */
2073 		if (msr & (1 << 18))
2074 			return;
2075 	}
2076 
2077 	skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C8-SKL */
2078 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
2079 }
2080 
2081 /**
2082  * skx_is_pc6_disabled() - Check if PC6 is disabled in BIOS.
2083  *
2084  * Return: %true if PC6 is disabled, %false otherwise.
2085  */
2086 static bool __init skx_is_pc6_disabled(void)
2087 {
2088 	u64 msr;
2089 
2090 	rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
2091 
2092 	/*
2093 	 * 000b: C0/C1 (no package C-state support)
2094 	 * 001b: C2
2095 	 * 010b: C6 (non-retention)
2096 	 * 011b: C6 (retention)
2097 	 * 111b: No Package C state limits.
2098 	 */
2099 	return (msr & SKX_PKG_CST_LIMIT_MASK) < SKX_PKG_CST_LIMIT_PC6;
2100 }
2101 
2102 /**
2103  * skx_idle_state_table_update - Adjust the SKX/CLX idle states table.
2104  *
2105  * Adjust Sky Lake or Cascade Lake Xeon idle states if PC6 is disabled in BIOS.
2106  * Use the CC6 + PC0 latency and 3 times of that latency for target_residency.
2107  * This is consistent with how the intel_idle driver uses _CST to set the
2108  * target_residency.
2109  */
2110 static void __init skx_idle_state_table_update(void)
2111 {
2112 	if (skx_is_pc6_disabled()) {
2113 		skx_cstates[2].exit_latency = 92;
2114 		skx_cstates[2].target_residency = 276;
2115 	}
2116 }
2117 
2118 /**
2119  * spr_idle_state_table_update - Adjust Sapphire Rapids Xeon idle states table.
2120  *
2121  * By default, the C6 state assumes the worst-case scenario of package C6.
2122  * However, if PC6 is disabled in BIOS, update the numbers to match core C6.
2123  */
2124 static void __init spr_idle_state_table_update(void)
2125 {
2126 	if (skx_is_pc6_disabled()) {
2127 		spr_cstates[2].exit_latency = 190;
2128 		spr_cstates[2].target_residency = 600;
2129 	}
2130 }
2131 
2132 /**
2133  * drop_pc6_redundant_cstates() - Drop C-states redundant when PC6 is disabled.
2134  * @states: Idle states table to modify.
2135  *
2136  * When PC6 is disabled in BIOS, C-states that exist solely to enable PC6
2137  * entry (such as C6P or C6SP) become identical to shallower C-states like
2138  * C6, and are therefore redundant. Should be called only on systems with
2139  * multiple C6 flavors.
2140  */
2141 static void __init drop_pc6_redundant_cstates(struct cpuidle_state *states)
2142 {
2143 	int count;
2144 
2145 	if (!skx_is_pc6_disabled())
2146 		/* PC6 is not disabled, nothing to do */
2147 		return;
2148 
2149 	for (count = 0; states[count].enter; count++)
2150 		continue;
2151 
2152 	if (count < 2) {
2153 		pr_debug("Too few idle states to drop PC6-redundant states\n");
2154 		return;
2155 	}
2156 
2157 	/*
2158 	 * Sanity check: At this point all platforms with multiple C6 flavors
2159 	 * use the CPUIDLE_FLAG_PARTIAL_HINT_MATCH flag. And the last state in
2160 	 * the table is the one that becomes redundant when PC6 is disabled.
2161 	 */
2162 	if (!(states[count - 1].flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH)) {
2163 		pr_debug("Can't drop PC6-redundant states: unexpected flags\n");
2164 		return;
2165 	}
2166 
2167 	/*
2168 	 * On all current platforms with multiple C6 flavors, there is only one
2169 	 * C-state that becomes redundant when PC6 is disabled. This state is
2170 	 * the last one in the table. Drop it by marking it with
2171 	 * CPUIDLE_FLAG_UNUSABLE so that cpuidle excludes it when registering
2172 	 * idle states.
2173 	 */
2174 	pr_info("Dropping idle state %s because PC6 is disabled\n",
2175 		states[count - 1].name);
2176 	states[count - 1].flags |= CPUIDLE_FLAG_UNUSABLE;
2177 }
2178 
2179 /**
2180  * byt_cht_auto_demotion_disable - Disable Bay/Cherry Trail auto-demotion.
2181  */
2182 static void __init byt_cht_auto_demotion_disable(void)
2183 {
2184 	wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
2185 	wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
2186 }
2187 
2188 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
2189 {
2190 	unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) &
2191 					MWAIT_CSTATE_MASK;
2192 	unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
2193 					MWAIT_SUBSTATE_MASK;
2194 
2195 	/* Ignore the C-state if there are NO sub-states in CPUID for it. */
2196 	if (num_substates == 0)
2197 		return false;
2198 
2199 	if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
2200 		mark_tsc_unstable("TSC halts in idle states deeper than C2");
2201 
2202 	return true;
2203 }
2204 
2205 static void state_update_enter_method(struct cpuidle_state *state, int cstate)
2206 {
2207 	if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
2208 		/*
2209 		 * Combining with XSTATE with IBRS or IRQ_ENABLE flags
2210 		 * is not currently supported but this driver.
2211 		 */
2212 		WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
2213 		WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2214 		state->enter = intel_idle_xstate;
2215 		return;
2216 	}
2217 
2218 	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
2219 			((state->flags & CPUIDLE_FLAG_IBRS) || ibrs_off)) {
2220 		/*
2221 		 * IBRS mitigation requires that C-states are entered
2222 		 * with interrupts disabled.
2223 		 */
2224 		if (ibrs_off && (state->flags & CPUIDLE_FLAG_IRQ_ENABLE))
2225 			state->flags &= ~CPUIDLE_FLAG_IRQ_ENABLE;
2226 		WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
2227 		state->enter = intel_idle_ibrs;
2228 		return;
2229 	}
2230 
2231 	if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
2232 		state->enter = intel_idle_irq;
2233 		return;
2234 	}
2235 
2236 	if (force_irq_on) {
2237 		pr_info("forced intel_idle_irq for state %d\n", cstate);
2238 		state->enter = intel_idle_irq;
2239 	}
2240 }
2241 
2242 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
2243 {
2244 	int cstate;
2245 
2246 	switch (boot_cpu_data.x86_vfm) {
2247 	case INTEL_IVYBRIDGE_X:
2248 		ivt_idle_state_table_update();
2249 		break;
2250 	case INTEL_ATOM_GOLDMONT:
2251 	case INTEL_ATOM_GOLDMONT_PLUS:
2252 		bxt_idle_state_table_update();
2253 		break;
2254 	case INTEL_SKYLAKE:
2255 		sklh_idle_state_table_update();
2256 		break;
2257 	case INTEL_SKYLAKE_X:
2258 		skx_idle_state_table_update();
2259 		break;
2260 	case INTEL_SAPPHIRERAPIDS_X:
2261 	case INTEL_EMERALDRAPIDS_X:
2262 		spr_idle_state_table_update();
2263 		break;
2264 	case INTEL_ATOM_SILVERMONT:
2265 	case INTEL_ATOM_AIRMONT:
2266 		byt_cht_auto_demotion_disable();
2267 		break;
2268 	case INTEL_GRANITERAPIDS_D:
2269 	case INTEL_GRANITERAPIDS_X:
2270 	case INTEL_ATOM_CRESTMONT_X:
2271 	case INTEL_ATOM_DARKMONT_X:
2272 		drop_pc6_redundant_cstates(cpuidle_state_table);
2273 		break;
2274 	}
2275 
2276 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
2277 		struct cpuidle_state *state;
2278 		unsigned int mwait_hint;
2279 
2280 		if (intel_idle_max_cstate_reached(cstate))
2281 			break;
2282 
2283 		if (!cpuidle_state_table[cstate].enter &&
2284 		    !cpuidle_state_table[cstate].enter_s2idle)
2285 			break;
2286 
2287 		if (!cpuidle_state_table[cstate].enter_dead)
2288 			cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
2289 
2290 		/* If marked as unusable, skip this state. */
2291 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
2292 			pr_debug("state %s is disabled\n",
2293 				 cpuidle_state_table[cstate].name);
2294 			continue;
2295 		}
2296 
2297 		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
2298 		if (!intel_idle_verify_cstate(mwait_hint))
2299 			continue;
2300 
2301 		/* Structure copy. */
2302 		drv->states[drv->state_count] = cpuidle_state_table[cstate];
2303 		state = &drv->states[drv->state_count];
2304 
2305 		state_update_enter_method(state, cstate);
2306 
2307 
2308 		if ((disabled_states_mask & BIT(drv->state_count)) ||
2309 		    ((icpu->use_acpi || force_use_acpi) &&
2310 		     intel_idle_off_by_default(state->flags, mwait_hint) &&
2311 		     !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
2312 			state->flags |= CPUIDLE_FLAG_OFF;
2313 
2314 		if (intel_idle_state_needs_timer_stop(state))
2315 			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
2316 
2317 		drv->state_count++;
2318 	}
2319 }
2320 
2321 /**
2322  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
2323  * @drv: cpuidle driver structure to initialize.
2324  */
2325 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
2326 {
2327 	cpuidle_poll_state_init(drv);
2328 
2329 	if (disabled_states_mask & BIT(0))
2330 		drv->states[0].flags |= CPUIDLE_FLAG_OFF;
2331 
2332 	drv->state_count = 1;
2333 
2334 	if (icpu && icpu->state_table)
2335 		intel_idle_init_cstates_icpu(drv);
2336 	else
2337 		intel_idle_init_cstates_acpi(drv);
2338 }
2339 
2340 static void auto_demotion_disable(void)
2341 {
2342 	unsigned long long msr_bits;
2343 
2344 	rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2345 	msr_bits &= ~auto_demotion_disable_flags;
2346 	wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
2347 }
2348 
2349 static void c1e_promotion_enable(void)
2350 {
2351 	unsigned long long msr_bits;
2352 
2353 	rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
2354 	msr_bits |= 0x2;
2355 	wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
2356 }
2357 
2358 static void c1e_promotion_disable(void)
2359 {
2360 	unsigned long long msr_bits;
2361 
2362 	rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
2363 	msr_bits &= ~0x2;
2364 	wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
2365 }
2366 
2367 /**
2368  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
2369  * @cpu: CPU to initialize.
2370  *
2371  * Register a cpuidle device object for @cpu and update its MSRs in accordance
2372  * with the processor model flags.
2373  */
2374 static int intel_idle_cpu_init(unsigned int cpu)
2375 {
2376 	struct cpuidle_device *dev;
2377 
2378 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2379 	dev->cpu = cpu;
2380 
2381 	if (cpuidle_register_device(dev)) {
2382 		pr_debug("cpuidle_register_device %d failed!\n", cpu);
2383 		return -EIO;
2384 	}
2385 
2386 	if (auto_demotion_disable_flags)
2387 		auto_demotion_disable();
2388 
2389 	if (c1e_promotion == C1E_PROMOTION_ENABLE)
2390 		c1e_promotion_enable();
2391 	else if (c1e_promotion == C1E_PROMOTION_DISABLE)
2392 		c1e_promotion_disable();
2393 
2394 	return 0;
2395 }
2396 
2397 static int intel_idle_cpu_online(unsigned int cpu)
2398 {
2399 	struct cpuidle_device *dev;
2400 
2401 	if (!boot_cpu_has(X86_FEATURE_ARAT))
2402 		tick_broadcast_enable();
2403 
2404 	/*
2405 	 * Some systems can hotplug a cpu at runtime after
2406 	 * the kernel has booted, we have to initialize the
2407 	 * driver in this case
2408 	 */
2409 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2410 	if (!dev->registered)
2411 		return intel_idle_cpu_init(cpu);
2412 
2413 	return 0;
2414 }
2415 
2416 /**
2417  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
2418  */
2419 static void __init intel_idle_cpuidle_devices_uninit(void)
2420 {
2421 	int i;
2422 
2423 	for_each_online_cpu(i)
2424 		cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
2425 }
2426 
2427 static void intel_c1_demotion_toggle(void *enable)
2428 {
2429 	unsigned long long msr_val;
2430 
2431 	rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2432 	/*
2433 	 * Enable/disable C1 undemotion along with C1 demotion, as this is the
2434 	 * most sensible configuration in general.
2435 	 */
2436 	if (enable)
2437 		msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
2438 	else
2439 		msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
2440 	wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2441 }
2442 
2443 static ssize_t intel_c1_demotion_store(struct device *dev,
2444 				       struct device_attribute *attr,
2445 				       const char *buf, size_t count)
2446 {
2447 	bool enable;
2448 	int err;
2449 
2450 	err = kstrtobool(buf, &enable);
2451 	if (err)
2452 		return err;
2453 
2454 	mutex_lock(&c1_demotion_mutex);
2455 	/* Enable/disable C1 demotion on all CPUs */
2456 	on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
2457 	mutex_unlock(&c1_demotion_mutex);
2458 
2459 	return count;
2460 }
2461 
2462 static ssize_t intel_c1_demotion_show(struct device *dev,
2463 				      struct device_attribute *attr, char *buf)
2464 {
2465 	unsigned long long msr_val;
2466 
2467 	/*
2468 	 * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2469 	 * configuration would be a BIOS bug.
2470 	 */
2471 	rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2472 	return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
2473 }
2474 static DEVICE_ATTR_RW(intel_c1_demotion);
2475 
2476 static int __init intel_idle_sysfs_init(void)
2477 {
2478 	int err;
2479 
2480 	if (!c1_demotion_supported)
2481 		return 0;
2482 
2483 	sysfs_root = bus_get_dev_root(&cpu_subsys);
2484 	if (!sysfs_root)
2485 		return 0;
2486 
2487 	err = sysfs_add_file_to_group(&sysfs_root->kobj,
2488 				      &dev_attr_intel_c1_demotion.attr,
2489 				      "cpuidle");
2490 	if (err) {
2491 		put_device(sysfs_root);
2492 		return err;
2493 	}
2494 
2495 	return 0;
2496 }
2497 
2498 static void __init intel_idle_sysfs_uninit(void)
2499 {
2500 	if (!sysfs_root)
2501 		return;
2502 
2503 	sysfs_remove_file_from_group(&sysfs_root->kobj,
2504 				     &dev_attr_intel_c1_demotion.attr,
2505 				     "cpuidle");
2506 	put_device(sysfs_root);
2507 }
2508 
2509  /**
2510   * get_cmdline_field - Get the current field from a cmdline string.
2511   * @args: The cmdline string to get the current field from.
2512   * @field: Pointer to the current field upon return.
2513   * @sep: The fields separator character.
2514   *
2515   * Examples:
2516   *   Input: args="C1:1:1,C1E:2:10", sep=':'
2517   *   Output: field="C1", return "1:1,C1E:2:10"
2518   *   Input: args="C1:1:1,C1E:2:10", sep=','
2519   *   Output: field="C1:1:1", return "C1E:2:10"
2520   *   Ipnut: args="::", sep=':'
2521   *   Output: field="", return ":"
2522   *
2523   * Return: The continuation of the cmdline string after the field or NULL.
2524   */
2525 static char *get_cmdline_field(char *args, char **field, char sep)
2526 {
2527 	unsigned int i;
2528 
2529 	for (i = 0; args[i] && !isspace(args[i]); i++) {
2530 		if (args[i] == sep)
2531 			break;
2532 	}
2533 
2534 	*field = args;
2535 
2536 	if (args[i] != sep)
2537 		return NULL;
2538 
2539 	args[i] = '\0';
2540 	return args + i + 1;
2541 }
2542 
2543 /**
2544  * validate_cmdline_cstate - Validate a C-state from cmdline.
2545  * @state: The C-state to validate.
2546  * @prev_state: The previous C-state in the table or NULL.
2547  *
2548  * Return: 0 if the C-state is valid or -EINVAL otherwise.
2549  */
2550 static int validate_cmdline_cstate(struct cpuidle_state *state,
2551 				   struct cpuidle_state *prev_state)
2552 {
2553 	if (state->exit_latency == 0)
2554 		/* Exit latency 0 can only be used for the POLL state */
2555 		return -EINVAL;
2556 
2557 	if (state->exit_latency > MAX_CMDLINE_LATENCY_US)
2558 		return -EINVAL;
2559 
2560 	if (state->target_residency > MAX_CMDLINE_RESIDENCY_US)
2561 		return -EINVAL;
2562 
2563 	if (state->target_residency < state->exit_latency)
2564 		return -EINVAL;
2565 
2566 	if (!prev_state)
2567 		return 0;
2568 
2569 	if (state->exit_latency <= prev_state->exit_latency)
2570 		return -EINVAL;
2571 
2572 	if (state->target_residency <= prev_state->target_residency)
2573 		return -EINVAL;
2574 
2575 	return 0;
2576 }
2577 
2578 /**
2579  * cmdline_table_adjust - Adjust the C-states table with data from cmdline.
2580  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
2581  *
2582  * Adjust the C-states table with data from the 'intel_idle.table' module
2583  * parameter (if specified).
2584  */
2585 static void __init cmdline_table_adjust(struct cpuidle_driver *drv)
2586 {
2587 	char *args = cmdline_table_str;
2588 	struct cpuidle_state *state;
2589 	int i;
2590 
2591 	if (args[0] == '\0')
2592 		/* The 'intel_idle.table' module parameter was not specified */
2593 		return;
2594 
2595 	/* Create a copy of the C-states table */
2596 	for (i = 0; i < drv->state_count; i++)
2597 		cmdline_states[i] = drv->states[i];
2598 
2599 	/*
2600 	 * Adjust the C-states table copy with data from the 'intel_idle.table'
2601 	 * module parameter.
2602 	 */
2603 	while (args) {
2604 		char *fields, *name, *val;
2605 
2606 		/*
2607 		 * Get the next C-state definition, which is expected to be
2608 		 * '<name>:<latency_us>:<target_residency_us>'. Treat "empty"
2609 		 * fields as unchanged. For example,
2610 		 * '<name>::<target_residency_us>' leaves the latency unchanged.
2611 		 */
2612 		args = get_cmdline_field(args, &fields, ',');
2613 
2614 		/* name */
2615 		fields = get_cmdline_field(fields, &name, ':');
2616 		if (!fields)
2617 			goto error;
2618 
2619 		if (!strcmp(name, "POLL")) {
2620 			pr_err("Cannot adjust POLL\n");
2621 			continue;
2622 		}
2623 
2624 		/* Find the C-state by its name */
2625 		state = NULL;
2626 		for (i = 0; i < drv->state_count; i++) {
2627 			if (!strcmp(name, drv->states[i].name)) {
2628 				state = &cmdline_states[i];
2629 				break;
2630 			}
2631 		}
2632 
2633 		if (!state) {
2634 			pr_err("C-state '%s' was not found\n", name);
2635 			continue;
2636 		}
2637 
2638 		/* Latency */
2639 		fields = get_cmdline_field(fields, &val, ':');
2640 		if (!fields)
2641 			goto error;
2642 
2643 		if (*val) {
2644 			if (kstrtouint(val, 0, &state->exit_latency))
2645 				goto error;
2646 		}
2647 
2648 		/* Target residency */
2649 		fields = get_cmdline_field(fields, &val, ':');
2650 
2651 		if (*val) {
2652 			if (kstrtouint(val, 0, &state->target_residency))
2653 				goto error;
2654 		}
2655 
2656 		/*
2657 		 * Allow for 3 more fields, but ignore them. Helps to make
2658 		 * possible future extensions of the cmdline format backward
2659 		 * compatible.
2660 		 */
2661 		for (i = 0; fields && i < 3; i++) {
2662 			fields = get_cmdline_field(fields, &val, ':');
2663 			if (!fields)
2664 				break;
2665 		}
2666 
2667 		if (fields) {
2668 			pr_err("Too many fields for C-state '%s'\n", state->name);
2669 			goto error;
2670 		}
2671 
2672 		pr_info("C-state from cmdline: name=%s, latency=%u, residency=%u\n",
2673 			state->name, state->exit_latency, state->target_residency);
2674 	}
2675 
2676 	/* Validate the adjusted C-states, start with index 1 to skip POLL */
2677 	for (i = 1; i < drv->state_count; i++) {
2678 		struct cpuidle_state *prev_state;
2679 
2680 		state = &cmdline_states[i];
2681 		prev_state = &cmdline_states[i - 1];
2682 
2683 		if (validate_cmdline_cstate(state, prev_state)) {
2684 			pr_err("C-state '%s' validation failed\n", state->name);
2685 			goto error;
2686 		}
2687 	}
2688 
2689 	/* Copy the adjusted C-states table back */
2690 	for (i = 1; i < drv->state_count; i++)
2691 		drv->states[i] = cmdline_states[i];
2692 
2693 	pr_info("Adjusted C-states with data from 'intel_idle.table'\n");
2694 	return;
2695 
2696 error:
2697 	pr_info("Failed to adjust C-states with data from 'intel_idle.table'\n");
2698 }
2699 
2700 static int __init intel_idle_init(void)
2701 {
2702 	const struct x86_cpu_id *id;
2703 	unsigned int eax, ebx, ecx;
2704 	int retval;
2705 
2706 	/* Do not load intel_idle at all for now if idle= is passed */
2707 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
2708 		return -ENODEV;
2709 
2710 	if (max_cstate == 0) {
2711 		pr_debug("disabled\n");
2712 		return -EPERM;
2713 	}
2714 
2715 	id = x86_match_cpu(intel_idle_ids);
2716 	if (id) {
2717 		if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2718 			pr_debug("Please enable MWAIT in BIOS SETUP\n");
2719 			return -ENODEV;
2720 		}
2721 	} else {
2722 		id = x86_match_cpu(intel_mwait_ids);
2723 		if (!id)
2724 			return -ENODEV;
2725 	}
2726 
2727 	cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates);
2728 
2729 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2730 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2731 	    !mwait_substates)
2732 			return -ENODEV;
2733 
2734 	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2735 
2736 	icpu = (const struct idle_cpu *)id->driver_data;
2737 	if (icpu && ignore_native()) {
2738 		pr_debug("ignoring native CPU idle states\n");
2739 		icpu = NULL;
2740 	}
2741 	if (icpu) {
2742 		if (icpu->state_table)
2743 			cpuidle_state_table = icpu->state_table;
2744 		else if (!intel_idle_acpi_cst_extract())
2745 			return -ENODEV;
2746 
2747 		auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2748 		if (icpu->disable_promotion_to_c1e)
2749 			c1e_promotion = C1E_PROMOTION_DISABLE;
2750 		if (icpu->c1_demotion_supported)
2751 			c1_demotion_supported = true;
2752 		if (icpu->use_acpi || force_use_acpi)
2753 			intel_idle_acpi_cst_extract();
2754 	} else if (!intel_idle_acpi_cst_extract()) {
2755 		return -ENODEV;
2756 	}
2757 
2758 	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2759 	if (!intel_idle_cpuidle_devices)
2760 		return -ENOMEM;
2761 
2762 	intel_idle_cpuidle_driver_init(&intel_idle_driver);
2763 	cmdline_table_adjust(&intel_idle_driver);
2764 
2765 	retval = intel_idle_sysfs_init();
2766 	if (retval)
2767 		pr_warn("failed to initialized sysfs");
2768 
2769 	retval = cpuidle_register_driver(&intel_idle_driver);
2770 	if (retval) {
2771 		struct cpuidle_driver *drv = cpuidle_get_driver();
2772 		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2773 		       drv ? drv->name : "none");
2774 		goto init_driver_fail;
2775 	}
2776 
2777 	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2778 				   intel_idle_cpu_online, NULL);
2779 	if (retval < 0)
2780 		goto hp_setup_fail;
2781 
2782 	pr_debug("Local APIC timer is reliable in %s\n",
2783 		 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2784 
2785 	arch_cpu_rescan_dead_smt_siblings();
2786 
2787 	return 0;
2788 
2789 hp_setup_fail:
2790 	intel_idle_cpuidle_devices_uninit();
2791 	cpuidle_unregister_driver(&intel_idle_driver);
2792 init_driver_fail:
2793 	intel_idle_sysfs_uninit();
2794 	free_percpu(intel_idle_cpuidle_devices);
2795 	return retval;
2796 
2797 }
2798 subsys_initcall_sync(intel_idle_init);
2799 
2800 /*
2801  * We are not really modular, but we used to support that.  Meaning we also
2802  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2803  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2804  * is the easiest way (currently) to continue doing that.
2805  */
2806 module_param(max_cstate, int, 0444);
2807 /*
2808  * The positions of the bits that are set in this number are the indices of the
2809  * idle states to be disabled by default (as reflected by the names of the
2810  * corresponding idle state directories in sysfs, "state0", "state1" ...
2811  * "state<i>" ..., where <i> is the index of the given state).
2812  */
2813 module_param_named(states_off, disabled_states_mask, uint, 0444);
2814 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2815 /*
2816  * Debugging option that forces the driver to enter all C-states with
2817  * interrupts enabled. Does not apply to C-states with
2818  * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags.
2819  */
2820 module_param(force_irq_on, bool, 0444);
2821 /*
2822  * Force the disabling of IBRS when X86_FEATURE_KERNEL_IBRS is on and
2823  * CPUIDLE_FLAG_IRQ_ENABLE isn't set.
2824  */
2825 module_param(ibrs_off, bool, 0444);
2826 MODULE_PARM_DESC(ibrs_off, "Disable IBRS when idle");
2827 
2828 /*
2829  * Define the C-states table from a user input string. Expected format is
2830  * 'name:latency:residency', where:
2831  * - name: The C-state name.
2832  * - latency: The C-state exit latency in us.
2833  * - residency: The C-state target residency in us.
2834  *
2835  * Multiple C-states can be defined by separating them with commas:
2836  * 'name1:latency1:residency1,name2:latency2:residency2'
2837  *
2838  * Example: intel_idle.table=C1:1:1,C1E:5:10,C6:100:600
2839  *
2840  * To leave latency or residency unchanged, use an empty field, for example:
2841  * 'C1:1:1,C1E::10' - leaves C1E latency unchanged.
2842  */
2843 module_param_string(table, cmdline_table_str, MAX_CMDLINE_TABLE_LEN, 0444);
2844 MODULE_PARM_DESC(table, "Build the C-states table from a user input string");
2845