1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 #include <asm/fpu/api.h> 60 61 #define INTEL_IDLE_VERSION "0.5.1" 62 63 static struct cpuidle_driver intel_idle_driver = { 64 .name = "intel_idle", 65 .owner = THIS_MODULE, 66 }; 67 /* intel_idle.max_cstate=0 disables driver */ 68 static int max_cstate = CPUIDLE_STATE_MAX - 1; 69 static unsigned int disabled_states_mask; 70 static unsigned int preferred_states_mask; 71 72 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 73 74 static unsigned long auto_demotion_disable_flags; 75 76 static enum { 77 C1E_PROMOTION_PRESERVE, 78 C1E_PROMOTION_ENABLE, 79 C1E_PROMOTION_DISABLE 80 } c1e_promotion = C1E_PROMOTION_PRESERVE; 81 82 struct idle_cpu { 83 struct cpuidle_state *state_table; 84 85 /* 86 * Hardware C-state auto-demotion may not always be optimal. 87 * Indicate which enable bits to clear here. 88 */ 89 unsigned long auto_demotion_disable_flags; 90 bool byt_auto_demotion_disable_flag; 91 bool disable_promotion_to_c1e; 92 bool use_acpi; 93 }; 94 95 static const struct idle_cpu *icpu __initdata; 96 static struct cpuidle_state *cpuidle_state_table __initdata; 97 98 static unsigned int mwait_substates __initdata; 99 100 /* 101 * Enable interrupts before entering the C-state. On some platforms and for 102 * some C-states, this may measurably decrease interrupt latency. 103 */ 104 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 105 106 /* 107 * Enable this state by default even if the ACPI _CST does not list it. 108 */ 109 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 110 111 /* 112 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 113 * above. 114 */ 115 #define CPUIDLE_FLAG_IBRS BIT(16) 116 117 /* 118 * Initialize large xstate for the C6-state entrance. 119 */ 120 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) 121 122 /* 123 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 124 * the C-state (top nibble) and sub-state (bottom nibble) 125 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 126 * 127 * We store the hint at the top of our "flags" for each state. 128 */ 129 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 130 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 131 132 static __always_inline int __intel_idle(struct cpuidle_device *dev, 133 struct cpuidle_driver *drv, int index) 134 { 135 struct cpuidle_state *state = &drv->states[index]; 136 unsigned long eax = flg2MWAIT(state->flags); 137 unsigned long ecx = 1; /* break on interrupt flag */ 138 139 mwait_idle_with_hints(eax, ecx); 140 141 return index; 142 } 143 144 /** 145 * intel_idle - Ask the processor to enter the given idle state. 146 * @dev: cpuidle device of the target CPU. 147 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 148 * @index: Target idle state index. 149 * 150 * Use the MWAIT instruction to notify the processor that the CPU represented by 151 * @dev is idle and it can try to enter the idle state corresponding to @index. 152 * 153 * If the local APIC timer is not known to be reliable in the target idle state, 154 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 155 * 156 * Must be called under local_irq_disable(). 157 */ 158 static __cpuidle int intel_idle(struct cpuidle_device *dev, 159 struct cpuidle_driver *drv, int index) 160 { 161 return __intel_idle(dev, drv, index); 162 } 163 164 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 165 struct cpuidle_driver *drv, int index) 166 { 167 int ret; 168 169 raw_local_irq_enable(); 170 ret = __intel_idle(dev, drv, index); 171 172 /* 173 * The lockdep hardirqs state may be changed to 'on' with timer 174 * tick interrupt followed by __do_softirq(). Use local_irq_disable() 175 * to keep the hardirqs state correct. 176 */ 177 local_irq_disable(); 178 179 return ret; 180 } 181 182 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 183 struct cpuidle_driver *drv, int index) 184 { 185 bool smt_active = sched_smt_active(); 186 u64 spec_ctrl = spec_ctrl_current(); 187 int ret; 188 189 if (smt_active) 190 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 191 192 ret = __intel_idle(dev, drv, index); 193 194 if (smt_active) 195 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 196 197 return ret; 198 } 199 200 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, 201 struct cpuidle_driver *drv, int index) 202 { 203 fpu_idle_fpregs(); 204 return __intel_idle(dev, drv, index); 205 } 206 207 /** 208 * intel_idle_s2idle - Ask the processor to enter the given idle state. 209 * @dev: cpuidle device of the target CPU. 210 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 211 * @index: Target idle state index. 212 * 213 * Use the MWAIT instruction to notify the processor that the CPU represented by 214 * @dev is idle and it can try to enter the idle state corresponding to @index. 215 * 216 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 217 * scheduler tick and suspended scheduler clock on the target CPU. 218 */ 219 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 220 struct cpuidle_driver *drv, int index) 221 { 222 unsigned long ecx = 1; /* break on interrupt flag */ 223 struct cpuidle_state *state = &drv->states[index]; 224 unsigned long eax = flg2MWAIT(state->flags); 225 226 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) 227 fpu_idle_fpregs(); 228 229 mwait_idle_with_hints(eax, ecx); 230 231 return 0; 232 } 233 234 /* 235 * States are indexed by the cstate number, 236 * which is also the index into the MWAIT hint array. 237 * Thus C0 is a dummy. 238 */ 239 static struct cpuidle_state nehalem_cstates[] __initdata = { 240 { 241 .name = "C1", 242 .desc = "MWAIT 0x00", 243 .flags = MWAIT2flg(0x00), 244 .exit_latency = 3, 245 .target_residency = 6, 246 .enter = &intel_idle, 247 .enter_s2idle = intel_idle_s2idle, }, 248 { 249 .name = "C1E", 250 .desc = "MWAIT 0x01", 251 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 252 .exit_latency = 10, 253 .target_residency = 20, 254 .enter = &intel_idle, 255 .enter_s2idle = intel_idle_s2idle, }, 256 { 257 .name = "C3", 258 .desc = "MWAIT 0x10", 259 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 260 .exit_latency = 20, 261 .target_residency = 80, 262 .enter = &intel_idle, 263 .enter_s2idle = intel_idle_s2idle, }, 264 { 265 .name = "C6", 266 .desc = "MWAIT 0x20", 267 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 268 .exit_latency = 200, 269 .target_residency = 800, 270 .enter = &intel_idle, 271 .enter_s2idle = intel_idle_s2idle, }, 272 { 273 .enter = NULL } 274 }; 275 276 static struct cpuidle_state snb_cstates[] __initdata = { 277 { 278 .name = "C1", 279 .desc = "MWAIT 0x00", 280 .flags = MWAIT2flg(0x00), 281 .exit_latency = 2, 282 .target_residency = 2, 283 .enter = &intel_idle, 284 .enter_s2idle = intel_idle_s2idle, }, 285 { 286 .name = "C1E", 287 .desc = "MWAIT 0x01", 288 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 289 .exit_latency = 10, 290 .target_residency = 20, 291 .enter = &intel_idle, 292 .enter_s2idle = intel_idle_s2idle, }, 293 { 294 .name = "C3", 295 .desc = "MWAIT 0x10", 296 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 297 .exit_latency = 80, 298 .target_residency = 211, 299 .enter = &intel_idle, 300 .enter_s2idle = intel_idle_s2idle, }, 301 { 302 .name = "C6", 303 .desc = "MWAIT 0x20", 304 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 305 .exit_latency = 104, 306 .target_residency = 345, 307 .enter = &intel_idle, 308 .enter_s2idle = intel_idle_s2idle, }, 309 { 310 .name = "C7", 311 .desc = "MWAIT 0x30", 312 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 313 .exit_latency = 109, 314 .target_residency = 345, 315 .enter = &intel_idle, 316 .enter_s2idle = intel_idle_s2idle, }, 317 { 318 .enter = NULL } 319 }; 320 321 static struct cpuidle_state byt_cstates[] __initdata = { 322 { 323 .name = "C1", 324 .desc = "MWAIT 0x00", 325 .flags = MWAIT2flg(0x00), 326 .exit_latency = 1, 327 .target_residency = 1, 328 .enter = &intel_idle, 329 .enter_s2idle = intel_idle_s2idle, }, 330 { 331 .name = "C6N", 332 .desc = "MWAIT 0x58", 333 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 334 .exit_latency = 300, 335 .target_residency = 275, 336 .enter = &intel_idle, 337 .enter_s2idle = intel_idle_s2idle, }, 338 { 339 .name = "C6S", 340 .desc = "MWAIT 0x52", 341 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 342 .exit_latency = 500, 343 .target_residency = 560, 344 .enter = &intel_idle, 345 .enter_s2idle = intel_idle_s2idle, }, 346 { 347 .name = "C7", 348 .desc = "MWAIT 0x60", 349 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 350 .exit_latency = 1200, 351 .target_residency = 4000, 352 .enter = &intel_idle, 353 .enter_s2idle = intel_idle_s2idle, }, 354 { 355 .name = "C7S", 356 .desc = "MWAIT 0x64", 357 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 358 .exit_latency = 10000, 359 .target_residency = 20000, 360 .enter = &intel_idle, 361 .enter_s2idle = intel_idle_s2idle, }, 362 { 363 .enter = NULL } 364 }; 365 366 static struct cpuidle_state cht_cstates[] __initdata = { 367 { 368 .name = "C1", 369 .desc = "MWAIT 0x00", 370 .flags = MWAIT2flg(0x00), 371 .exit_latency = 1, 372 .target_residency = 1, 373 .enter = &intel_idle, 374 .enter_s2idle = intel_idle_s2idle, }, 375 { 376 .name = "C6N", 377 .desc = "MWAIT 0x58", 378 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 379 .exit_latency = 80, 380 .target_residency = 275, 381 .enter = &intel_idle, 382 .enter_s2idle = intel_idle_s2idle, }, 383 { 384 .name = "C6S", 385 .desc = "MWAIT 0x52", 386 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 387 .exit_latency = 200, 388 .target_residency = 560, 389 .enter = &intel_idle, 390 .enter_s2idle = intel_idle_s2idle, }, 391 { 392 .name = "C7", 393 .desc = "MWAIT 0x60", 394 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 395 .exit_latency = 1200, 396 .target_residency = 4000, 397 .enter = &intel_idle, 398 .enter_s2idle = intel_idle_s2idle, }, 399 { 400 .name = "C7S", 401 .desc = "MWAIT 0x64", 402 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 403 .exit_latency = 10000, 404 .target_residency = 20000, 405 .enter = &intel_idle, 406 .enter_s2idle = intel_idle_s2idle, }, 407 { 408 .enter = NULL } 409 }; 410 411 static struct cpuidle_state ivb_cstates[] __initdata = { 412 { 413 .name = "C1", 414 .desc = "MWAIT 0x00", 415 .flags = MWAIT2flg(0x00), 416 .exit_latency = 1, 417 .target_residency = 1, 418 .enter = &intel_idle, 419 .enter_s2idle = intel_idle_s2idle, }, 420 { 421 .name = "C1E", 422 .desc = "MWAIT 0x01", 423 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 424 .exit_latency = 10, 425 .target_residency = 20, 426 .enter = &intel_idle, 427 .enter_s2idle = intel_idle_s2idle, }, 428 { 429 .name = "C3", 430 .desc = "MWAIT 0x10", 431 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 432 .exit_latency = 59, 433 .target_residency = 156, 434 .enter = &intel_idle, 435 .enter_s2idle = intel_idle_s2idle, }, 436 { 437 .name = "C6", 438 .desc = "MWAIT 0x20", 439 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 440 .exit_latency = 80, 441 .target_residency = 300, 442 .enter = &intel_idle, 443 .enter_s2idle = intel_idle_s2idle, }, 444 { 445 .name = "C7", 446 .desc = "MWAIT 0x30", 447 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 448 .exit_latency = 87, 449 .target_residency = 300, 450 .enter = &intel_idle, 451 .enter_s2idle = intel_idle_s2idle, }, 452 { 453 .enter = NULL } 454 }; 455 456 static struct cpuidle_state ivt_cstates[] __initdata = { 457 { 458 .name = "C1", 459 .desc = "MWAIT 0x00", 460 .flags = MWAIT2flg(0x00), 461 .exit_latency = 1, 462 .target_residency = 1, 463 .enter = &intel_idle, 464 .enter_s2idle = intel_idle_s2idle, }, 465 { 466 .name = "C1E", 467 .desc = "MWAIT 0x01", 468 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 469 .exit_latency = 10, 470 .target_residency = 80, 471 .enter = &intel_idle, 472 .enter_s2idle = intel_idle_s2idle, }, 473 { 474 .name = "C3", 475 .desc = "MWAIT 0x10", 476 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 477 .exit_latency = 59, 478 .target_residency = 156, 479 .enter = &intel_idle, 480 .enter_s2idle = intel_idle_s2idle, }, 481 { 482 .name = "C6", 483 .desc = "MWAIT 0x20", 484 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 485 .exit_latency = 82, 486 .target_residency = 300, 487 .enter = &intel_idle, 488 .enter_s2idle = intel_idle_s2idle, }, 489 { 490 .enter = NULL } 491 }; 492 493 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 494 { 495 .name = "C1", 496 .desc = "MWAIT 0x00", 497 .flags = MWAIT2flg(0x00), 498 .exit_latency = 1, 499 .target_residency = 1, 500 .enter = &intel_idle, 501 .enter_s2idle = intel_idle_s2idle, }, 502 { 503 .name = "C1E", 504 .desc = "MWAIT 0x01", 505 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 506 .exit_latency = 10, 507 .target_residency = 250, 508 .enter = &intel_idle, 509 .enter_s2idle = intel_idle_s2idle, }, 510 { 511 .name = "C3", 512 .desc = "MWAIT 0x10", 513 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 514 .exit_latency = 59, 515 .target_residency = 300, 516 .enter = &intel_idle, 517 .enter_s2idle = intel_idle_s2idle, }, 518 { 519 .name = "C6", 520 .desc = "MWAIT 0x20", 521 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 522 .exit_latency = 84, 523 .target_residency = 400, 524 .enter = &intel_idle, 525 .enter_s2idle = intel_idle_s2idle, }, 526 { 527 .enter = NULL } 528 }; 529 530 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 531 { 532 .name = "C1", 533 .desc = "MWAIT 0x00", 534 .flags = MWAIT2flg(0x00), 535 .exit_latency = 1, 536 .target_residency = 1, 537 .enter = &intel_idle, 538 .enter_s2idle = intel_idle_s2idle, }, 539 { 540 .name = "C1E", 541 .desc = "MWAIT 0x01", 542 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 543 .exit_latency = 10, 544 .target_residency = 500, 545 .enter = &intel_idle, 546 .enter_s2idle = intel_idle_s2idle, }, 547 { 548 .name = "C3", 549 .desc = "MWAIT 0x10", 550 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 551 .exit_latency = 59, 552 .target_residency = 600, 553 .enter = &intel_idle, 554 .enter_s2idle = intel_idle_s2idle, }, 555 { 556 .name = "C6", 557 .desc = "MWAIT 0x20", 558 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 559 .exit_latency = 88, 560 .target_residency = 700, 561 .enter = &intel_idle, 562 .enter_s2idle = intel_idle_s2idle, }, 563 { 564 .enter = NULL } 565 }; 566 567 static struct cpuidle_state hsw_cstates[] __initdata = { 568 { 569 .name = "C1", 570 .desc = "MWAIT 0x00", 571 .flags = MWAIT2flg(0x00), 572 .exit_latency = 2, 573 .target_residency = 2, 574 .enter = &intel_idle, 575 .enter_s2idle = intel_idle_s2idle, }, 576 { 577 .name = "C1E", 578 .desc = "MWAIT 0x01", 579 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 580 .exit_latency = 10, 581 .target_residency = 20, 582 .enter = &intel_idle, 583 .enter_s2idle = intel_idle_s2idle, }, 584 { 585 .name = "C3", 586 .desc = "MWAIT 0x10", 587 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 588 .exit_latency = 33, 589 .target_residency = 100, 590 .enter = &intel_idle, 591 .enter_s2idle = intel_idle_s2idle, }, 592 { 593 .name = "C6", 594 .desc = "MWAIT 0x20", 595 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 596 .exit_latency = 133, 597 .target_residency = 400, 598 .enter = &intel_idle, 599 .enter_s2idle = intel_idle_s2idle, }, 600 { 601 .name = "C7s", 602 .desc = "MWAIT 0x32", 603 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 604 .exit_latency = 166, 605 .target_residency = 500, 606 .enter = &intel_idle, 607 .enter_s2idle = intel_idle_s2idle, }, 608 { 609 .name = "C8", 610 .desc = "MWAIT 0x40", 611 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 612 .exit_latency = 300, 613 .target_residency = 900, 614 .enter = &intel_idle, 615 .enter_s2idle = intel_idle_s2idle, }, 616 { 617 .name = "C9", 618 .desc = "MWAIT 0x50", 619 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 620 .exit_latency = 600, 621 .target_residency = 1800, 622 .enter = &intel_idle, 623 .enter_s2idle = intel_idle_s2idle, }, 624 { 625 .name = "C10", 626 .desc = "MWAIT 0x60", 627 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 628 .exit_latency = 2600, 629 .target_residency = 7700, 630 .enter = &intel_idle, 631 .enter_s2idle = intel_idle_s2idle, }, 632 { 633 .enter = NULL } 634 }; 635 static struct cpuidle_state bdw_cstates[] __initdata = { 636 { 637 .name = "C1", 638 .desc = "MWAIT 0x00", 639 .flags = MWAIT2flg(0x00), 640 .exit_latency = 2, 641 .target_residency = 2, 642 .enter = &intel_idle, 643 .enter_s2idle = intel_idle_s2idle, }, 644 { 645 .name = "C1E", 646 .desc = "MWAIT 0x01", 647 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 648 .exit_latency = 10, 649 .target_residency = 20, 650 .enter = &intel_idle, 651 .enter_s2idle = intel_idle_s2idle, }, 652 { 653 .name = "C3", 654 .desc = "MWAIT 0x10", 655 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 656 .exit_latency = 40, 657 .target_residency = 100, 658 .enter = &intel_idle, 659 .enter_s2idle = intel_idle_s2idle, }, 660 { 661 .name = "C6", 662 .desc = "MWAIT 0x20", 663 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 664 .exit_latency = 133, 665 .target_residency = 400, 666 .enter = &intel_idle, 667 .enter_s2idle = intel_idle_s2idle, }, 668 { 669 .name = "C7s", 670 .desc = "MWAIT 0x32", 671 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 672 .exit_latency = 166, 673 .target_residency = 500, 674 .enter = &intel_idle, 675 .enter_s2idle = intel_idle_s2idle, }, 676 { 677 .name = "C8", 678 .desc = "MWAIT 0x40", 679 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 680 .exit_latency = 300, 681 .target_residency = 900, 682 .enter = &intel_idle, 683 .enter_s2idle = intel_idle_s2idle, }, 684 { 685 .name = "C9", 686 .desc = "MWAIT 0x50", 687 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 688 .exit_latency = 600, 689 .target_residency = 1800, 690 .enter = &intel_idle, 691 .enter_s2idle = intel_idle_s2idle, }, 692 { 693 .name = "C10", 694 .desc = "MWAIT 0x60", 695 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 696 .exit_latency = 2600, 697 .target_residency = 7700, 698 .enter = &intel_idle, 699 .enter_s2idle = intel_idle_s2idle, }, 700 { 701 .enter = NULL } 702 }; 703 704 static struct cpuidle_state skl_cstates[] __initdata = { 705 { 706 .name = "C1", 707 .desc = "MWAIT 0x00", 708 .flags = MWAIT2flg(0x00), 709 .exit_latency = 2, 710 .target_residency = 2, 711 .enter = &intel_idle, 712 .enter_s2idle = intel_idle_s2idle, }, 713 { 714 .name = "C1E", 715 .desc = "MWAIT 0x01", 716 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 717 .exit_latency = 10, 718 .target_residency = 20, 719 .enter = &intel_idle, 720 .enter_s2idle = intel_idle_s2idle, }, 721 { 722 .name = "C3", 723 .desc = "MWAIT 0x10", 724 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 725 .exit_latency = 70, 726 .target_residency = 100, 727 .enter = &intel_idle, 728 .enter_s2idle = intel_idle_s2idle, }, 729 { 730 .name = "C6", 731 .desc = "MWAIT 0x20", 732 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 733 .exit_latency = 85, 734 .target_residency = 200, 735 .enter = &intel_idle, 736 .enter_s2idle = intel_idle_s2idle, }, 737 { 738 .name = "C7s", 739 .desc = "MWAIT 0x33", 740 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 741 .exit_latency = 124, 742 .target_residency = 800, 743 .enter = &intel_idle, 744 .enter_s2idle = intel_idle_s2idle, }, 745 { 746 .name = "C8", 747 .desc = "MWAIT 0x40", 748 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 749 .exit_latency = 200, 750 .target_residency = 800, 751 .enter = &intel_idle, 752 .enter_s2idle = intel_idle_s2idle, }, 753 { 754 .name = "C9", 755 .desc = "MWAIT 0x50", 756 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 757 .exit_latency = 480, 758 .target_residency = 5000, 759 .enter = &intel_idle, 760 .enter_s2idle = intel_idle_s2idle, }, 761 { 762 .name = "C10", 763 .desc = "MWAIT 0x60", 764 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 765 .exit_latency = 890, 766 .target_residency = 5000, 767 .enter = &intel_idle, 768 .enter_s2idle = intel_idle_s2idle, }, 769 { 770 .enter = NULL } 771 }; 772 773 static struct cpuidle_state skx_cstates[] __initdata = { 774 { 775 .name = "C1", 776 .desc = "MWAIT 0x00", 777 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 778 .exit_latency = 2, 779 .target_residency = 2, 780 .enter = &intel_idle, 781 .enter_s2idle = intel_idle_s2idle, }, 782 { 783 .name = "C1E", 784 .desc = "MWAIT 0x01", 785 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 786 .exit_latency = 10, 787 .target_residency = 20, 788 .enter = &intel_idle, 789 .enter_s2idle = intel_idle_s2idle, }, 790 { 791 .name = "C6", 792 .desc = "MWAIT 0x20", 793 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 794 .exit_latency = 133, 795 .target_residency = 600, 796 .enter = &intel_idle, 797 .enter_s2idle = intel_idle_s2idle, }, 798 { 799 .enter = NULL } 800 }; 801 802 static struct cpuidle_state icx_cstates[] __initdata = { 803 { 804 .name = "C1", 805 .desc = "MWAIT 0x00", 806 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 807 .exit_latency = 1, 808 .target_residency = 1, 809 .enter = &intel_idle, 810 .enter_s2idle = intel_idle_s2idle, }, 811 { 812 .name = "C1E", 813 .desc = "MWAIT 0x01", 814 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 815 .exit_latency = 4, 816 .target_residency = 4, 817 .enter = &intel_idle, 818 .enter_s2idle = intel_idle_s2idle, }, 819 { 820 .name = "C6", 821 .desc = "MWAIT 0x20", 822 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 823 .exit_latency = 170, 824 .target_residency = 600, 825 .enter = &intel_idle, 826 .enter_s2idle = intel_idle_s2idle, }, 827 { 828 .enter = NULL } 829 }; 830 831 /* 832 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 833 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 834 * But in this case there is effectively no C1, because C1 requests are 835 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 836 * and C1E requests end up with C1, so there is effectively no C1E. 837 * 838 * By default we enable C1E and disable C1 by marking it with 839 * 'CPUIDLE_FLAG_UNUSABLE'. 840 */ 841 static struct cpuidle_state adl_cstates[] __initdata = { 842 { 843 .name = "C1", 844 .desc = "MWAIT 0x00", 845 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 846 .exit_latency = 1, 847 .target_residency = 1, 848 .enter = &intel_idle, 849 .enter_s2idle = intel_idle_s2idle, }, 850 { 851 .name = "C1E", 852 .desc = "MWAIT 0x01", 853 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 854 .exit_latency = 2, 855 .target_residency = 4, 856 .enter = &intel_idle, 857 .enter_s2idle = intel_idle_s2idle, }, 858 { 859 .name = "C6", 860 .desc = "MWAIT 0x20", 861 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 862 .exit_latency = 220, 863 .target_residency = 600, 864 .enter = &intel_idle, 865 .enter_s2idle = intel_idle_s2idle, }, 866 { 867 .name = "C8", 868 .desc = "MWAIT 0x40", 869 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 870 .exit_latency = 280, 871 .target_residency = 800, 872 .enter = &intel_idle, 873 .enter_s2idle = intel_idle_s2idle, }, 874 { 875 .name = "C10", 876 .desc = "MWAIT 0x60", 877 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 878 .exit_latency = 680, 879 .target_residency = 2000, 880 .enter = &intel_idle, 881 .enter_s2idle = intel_idle_s2idle, }, 882 { 883 .enter = NULL } 884 }; 885 886 static struct cpuidle_state adl_l_cstates[] __initdata = { 887 { 888 .name = "C1", 889 .desc = "MWAIT 0x00", 890 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 891 .exit_latency = 1, 892 .target_residency = 1, 893 .enter = &intel_idle, 894 .enter_s2idle = intel_idle_s2idle, }, 895 { 896 .name = "C1E", 897 .desc = "MWAIT 0x01", 898 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 899 .exit_latency = 2, 900 .target_residency = 4, 901 .enter = &intel_idle, 902 .enter_s2idle = intel_idle_s2idle, }, 903 { 904 .name = "C6", 905 .desc = "MWAIT 0x20", 906 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 907 .exit_latency = 170, 908 .target_residency = 500, 909 .enter = &intel_idle, 910 .enter_s2idle = intel_idle_s2idle, }, 911 { 912 .name = "C8", 913 .desc = "MWAIT 0x40", 914 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 915 .exit_latency = 200, 916 .target_residency = 600, 917 .enter = &intel_idle, 918 .enter_s2idle = intel_idle_s2idle, }, 919 { 920 .name = "C10", 921 .desc = "MWAIT 0x60", 922 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 923 .exit_latency = 230, 924 .target_residency = 700, 925 .enter = &intel_idle, 926 .enter_s2idle = intel_idle_s2idle, }, 927 { 928 .enter = NULL } 929 }; 930 931 static struct cpuidle_state adl_n_cstates[] __initdata = { 932 { 933 .name = "C1", 934 .desc = "MWAIT 0x00", 935 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 936 .exit_latency = 1, 937 .target_residency = 1, 938 .enter = &intel_idle, 939 .enter_s2idle = intel_idle_s2idle, }, 940 { 941 .name = "C1E", 942 .desc = "MWAIT 0x01", 943 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 944 .exit_latency = 2, 945 .target_residency = 4, 946 .enter = &intel_idle, 947 .enter_s2idle = intel_idle_s2idle, }, 948 { 949 .name = "C6", 950 .desc = "MWAIT 0x20", 951 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 952 .exit_latency = 195, 953 .target_residency = 585, 954 .enter = &intel_idle, 955 .enter_s2idle = intel_idle_s2idle, }, 956 { 957 .name = "C8", 958 .desc = "MWAIT 0x40", 959 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 960 .exit_latency = 260, 961 .target_residency = 1040, 962 .enter = &intel_idle, 963 .enter_s2idle = intel_idle_s2idle, }, 964 { 965 .name = "C10", 966 .desc = "MWAIT 0x60", 967 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 968 .exit_latency = 660, 969 .target_residency = 1980, 970 .enter = &intel_idle, 971 .enter_s2idle = intel_idle_s2idle, }, 972 { 973 .enter = NULL } 974 }; 975 976 static struct cpuidle_state spr_cstates[] __initdata = { 977 { 978 .name = "C1", 979 .desc = "MWAIT 0x00", 980 .flags = MWAIT2flg(0x00), 981 .exit_latency = 1, 982 .target_residency = 1, 983 .enter = &intel_idle, 984 .enter_s2idle = intel_idle_s2idle, }, 985 { 986 .name = "C1E", 987 .desc = "MWAIT 0x01", 988 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 989 .exit_latency = 2, 990 .target_residency = 4, 991 .enter = &intel_idle, 992 .enter_s2idle = intel_idle_s2idle, }, 993 { 994 .name = "C6", 995 .desc = "MWAIT 0x20", 996 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 997 CPUIDLE_FLAG_INIT_XSTATE, 998 .exit_latency = 290, 999 .target_residency = 800, 1000 .enter = &intel_idle, 1001 .enter_s2idle = intel_idle_s2idle, }, 1002 { 1003 .enter = NULL } 1004 }; 1005 1006 static struct cpuidle_state atom_cstates[] __initdata = { 1007 { 1008 .name = "C1E", 1009 .desc = "MWAIT 0x00", 1010 .flags = MWAIT2flg(0x00), 1011 .exit_latency = 10, 1012 .target_residency = 20, 1013 .enter = &intel_idle, 1014 .enter_s2idle = intel_idle_s2idle, }, 1015 { 1016 .name = "C2", 1017 .desc = "MWAIT 0x10", 1018 .flags = MWAIT2flg(0x10), 1019 .exit_latency = 20, 1020 .target_residency = 80, 1021 .enter = &intel_idle, 1022 .enter_s2idle = intel_idle_s2idle, }, 1023 { 1024 .name = "C4", 1025 .desc = "MWAIT 0x30", 1026 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1027 .exit_latency = 100, 1028 .target_residency = 400, 1029 .enter = &intel_idle, 1030 .enter_s2idle = intel_idle_s2idle, }, 1031 { 1032 .name = "C6", 1033 .desc = "MWAIT 0x52", 1034 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1035 .exit_latency = 140, 1036 .target_residency = 560, 1037 .enter = &intel_idle, 1038 .enter_s2idle = intel_idle_s2idle, }, 1039 { 1040 .enter = NULL } 1041 }; 1042 static struct cpuidle_state tangier_cstates[] __initdata = { 1043 { 1044 .name = "C1", 1045 .desc = "MWAIT 0x00", 1046 .flags = MWAIT2flg(0x00), 1047 .exit_latency = 1, 1048 .target_residency = 4, 1049 .enter = &intel_idle, 1050 .enter_s2idle = intel_idle_s2idle, }, 1051 { 1052 .name = "C4", 1053 .desc = "MWAIT 0x30", 1054 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1055 .exit_latency = 100, 1056 .target_residency = 400, 1057 .enter = &intel_idle, 1058 .enter_s2idle = intel_idle_s2idle, }, 1059 { 1060 .name = "C6", 1061 .desc = "MWAIT 0x52", 1062 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1063 .exit_latency = 140, 1064 .target_residency = 560, 1065 .enter = &intel_idle, 1066 .enter_s2idle = intel_idle_s2idle, }, 1067 { 1068 .name = "C7", 1069 .desc = "MWAIT 0x60", 1070 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1071 .exit_latency = 1200, 1072 .target_residency = 4000, 1073 .enter = &intel_idle, 1074 .enter_s2idle = intel_idle_s2idle, }, 1075 { 1076 .name = "C9", 1077 .desc = "MWAIT 0x64", 1078 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1079 .exit_latency = 10000, 1080 .target_residency = 20000, 1081 .enter = &intel_idle, 1082 .enter_s2idle = intel_idle_s2idle, }, 1083 { 1084 .enter = NULL } 1085 }; 1086 static struct cpuidle_state avn_cstates[] __initdata = { 1087 { 1088 .name = "C1", 1089 .desc = "MWAIT 0x00", 1090 .flags = MWAIT2flg(0x00), 1091 .exit_latency = 2, 1092 .target_residency = 2, 1093 .enter = &intel_idle, 1094 .enter_s2idle = intel_idle_s2idle, }, 1095 { 1096 .name = "C6", 1097 .desc = "MWAIT 0x51", 1098 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1099 .exit_latency = 15, 1100 .target_residency = 45, 1101 .enter = &intel_idle, 1102 .enter_s2idle = intel_idle_s2idle, }, 1103 { 1104 .enter = NULL } 1105 }; 1106 static struct cpuidle_state knl_cstates[] __initdata = { 1107 { 1108 .name = "C1", 1109 .desc = "MWAIT 0x00", 1110 .flags = MWAIT2flg(0x00), 1111 .exit_latency = 1, 1112 .target_residency = 2, 1113 .enter = &intel_idle, 1114 .enter_s2idle = intel_idle_s2idle }, 1115 { 1116 .name = "C6", 1117 .desc = "MWAIT 0x10", 1118 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1119 .exit_latency = 120, 1120 .target_residency = 500, 1121 .enter = &intel_idle, 1122 .enter_s2idle = intel_idle_s2idle }, 1123 { 1124 .enter = NULL } 1125 }; 1126 1127 static struct cpuidle_state bxt_cstates[] __initdata = { 1128 { 1129 .name = "C1", 1130 .desc = "MWAIT 0x00", 1131 .flags = MWAIT2flg(0x00), 1132 .exit_latency = 2, 1133 .target_residency = 2, 1134 .enter = &intel_idle, 1135 .enter_s2idle = intel_idle_s2idle, }, 1136 { 1137 .name = "C1E", 1138 .desc = "MWAIT 0x01", 1139 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1140 .exit_latency = 10, 1141 .target_residency = 20, 1142 .enter = &intel_idle, 1143 .enter_s2idle = intel_idle_s2idle, }, 1144 { 1145 .name = "C6", 1146 .desc = "MWAIT 0x20", 1147 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1148 .exit_latency = 133, 1149 .target_residency = 133, 1150 .enter = &intel_idle, 1151 .enter_s2idle = intel_idle_s2idle, }, 1152 { 1153 .name = "C7s", 1154 .desc = "MWAIT 0x31", 1155 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1156 .exit_latency = 155, 1157 .target_residency = 155, 1158 .enter = &intel_idle, 1159 .enter_s2idle = intel_idle_s2idle, }, 1160 { 1161 .name = "C8", 1162 .desc = "MWAIT 0x40", 1163 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1164 .exit_latency = 1000, 1165 .target_residency = 1000, 1166 .enter = &intel_idle, 1167 .enter_s2idle = intel_idle_s2idle, }, 1168 { 1169 .name = "C9", 1170 .desc = "MWAIT 0x50", 1171 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1172 .exit_latency = 2000, 1173 .target_residency = 2000, 1174 .enter = &intel_idle, 1175 .enter_s2idle = intel_idle_s2idle, }, 1176 { 1177 .name = "C10", 1178 .desc = "MWAIT 0x60", 1179 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1180 .exit_latency = 10000, 1181 .target_residency = 10000, 1182 .enter = &intel_idle, 1183 .enter_s2idle = intel_idle_s2idle, }, 1184 { 1185 .enter = NULL } 1186 }; 1187 1188 static struct cpuidle_state dnv_cstates[] __initdata = { 1189 { 1190 .name = "C1", 1191 .desc = "MWAIT 0x00", 1192 .flags = MWAIT2flg(0x00), 1193 .exit_latency = 2, 1194 .target_residency = 2, 1195 .enter = &intel_idle, 1196 .enter_s2idle = intel_idle_s2idle, }, 1197 { 1198 .name = "C1E", 1199 .desc = "MWAIT 0x01", 1200 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1201 .exit_latency = 10, 1202 .target_residency = 20, 1203 .enter = &intel_idle, 1204 .enter_s2idle = intel_idle_s2idle, }, 1205 { 1206 .name = "C6", 1207 .desc = "MWAIT 0x20", 1208 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1209 .exit_latency = 50, 1210 .target_residency = 500, 1211 .enter = &intel_idle, 1212 .enter_s2idle = intel_idle_s2idle, }, 1213 { 1214 .enter = NULL } 1215 }; 1216 1217 /* 1218 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1219 * C6, and this is indicated in the CPUID mwait leaf. 1220 */ 1221 static struct cpuidle_state snr_cstates[] __initdata = { 1222 { 1223 .name = "C1", 1224 .desc = "MWAIT 0x00", 1225 .flags = MWAIT2flg(0x00), 1226 .exit_latency = 2, 1227 .target_residency = 2, 1228 .enter = &intel_idle, 1229 .enter_s2idle = intel_idle_s2idle, }, 1230 { 1231 .name = "C1E", 1232 .desc = "MWAIT 0x01", 1233 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1234 .exit_latency = 15, 1235 .target_residency = 25, 1236 .enter = &intel_idle, 1237 .enter_s2idle = intel_idle_s2idle, }, 1238 { 1239 .name = "C6", 1240 .desc = "MWAIT 0x20", 1241 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1242 .exit_latency = 130, 1243 .target_residency = 500, 1244 .enter = &intel_idle, 1245 .enter_s2idle = intel_idle_s2idle, }, 1246 { 1247 .enter = NULL } 1248 }; 1249 1250 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1251 .state_table = nehalem_cstates, 1252 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1253 .disable_promotion_to_c1e = true, 1254 }; 1255 1256 static const struct idle_cpu idle_cpu_nhx __initconst = { 1257 .state_table = nehalem_cstates, 1258 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1259 .disable_promotion_to_c1e = true, 1260 .use_acpi = true, 1261 }; 1262 1263 static const struct idle_cpu idle_cpu_atom __initconst = { 1264 .state_table = atom_cstates, 1265 }; 1266 1267 static const struct idle_cpu idle_cpu_tangier __initconst = { 1268 .state_table = tangier_cstates, 1269 }; 1270 1271 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1272 .state_table = atom_cstates, 1273 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1274 }; 1275 1276 static const struct idle_cpu idle_cpu_snb __initconst = { 1277 .state_table = snb_cstates, 1278 .disable_promotion_to_c1e = true, 1279 }; 1280 1281 static const struct idle_cpu idle_cpu_snx __initconst = { 1282 .state_table = snb_cstates, 1283 .disable_promotion_to_c1e = true, 1284 .use_acpi = true, 1285 }; 1286 1287 static const struct idle_cpu idle_cpu_byt __initconst = { 1288 .state_table = byt_cstates, 1289 .disable_promotion_to_c1e = true, 1290 .byt_auto_demotion_disable_flag = true, 1291 }; 1292 1293 static const struct idle_cpu idle_cpu_cht __initconst = { 1294 .state_table = cht_cstates, 1295 .disable_promotion_to_c1e = true, 1296 .byt_auto_demotion_disable_flag = true, 1297 }; 1298 1299 static const struct idle_cpu idle_cpu_ivb __initconst = { 1300 .state_table = ivb_cstates, 1301 .disable_promotion_to_c1e = true, 1302 }; 1303 1304 static const struct idle_cpu idle_cpu_ivt __initconst = { 1305 .state_table = ivt_cstates, 1306 .disable_promotion_to_c1e = true, 1307 .use_acpi = true, 1308 }; 1309 1310 static const struct idle_cpu idle_cpu_hsw __initconst = { 1311 .state_table = hsw_cstates, 1312 .disable_promotion_to_c1e = true, 1313 }; 1314 1315 static const struct idle_cpu idle_cpu_hsx __initconst = { 1316 .state_table = hsw_cstates, 1317 .disable_promotion_to_c1e = true, 1318 .use_acpi = true, 1319 }; 1320 1321 static const struct idle_cpu idle_cpu_bdw __initconst = { 1322 .state_table = bdw_cstates, 1323 .disable_promotion_to_c1e = true, 1324 }; 1325 1326 static const struct idle_cpu idle_cpu_bdx __initconst = { 1327 .state_table = bdw_cstates, 1328 .disable_promotion_to_c1e = true, 1329 .use_acpi = true, 1330 }; 1331 1332 static const struct idle_cpu idle_cpu_skl __initconst = { 1333 .state_table = skl_cstates, 1334 .disable_promotion_to_c1e = true, 1335 }; 1336 1337 static const struct idle_cpu idle_cpu_skx __initconst = { 1338 .state_table = skx_cstates, 1339 .disable_promotion_to_c1e = true, 1340 .use_acpi = true, 1341 }; 1342 1343 static const struct idle_cpu idle_cpu_icx __initconst = { 1344 .state_table = icx_cstates, 1345 .disable_promotion_to_c1e = true, 1346 .use_acpi = true, 1347 }; 1348 1349 static const struct idle_cpu idle_cpu_adl __initconst = { 1350 .state_table = adl_cstates, 1351 }; 1352 1353 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1354 .state_table = adl_l_cstates, 1355 }; 1356 1357 static const struct idle_cpu idle_cpu_adl_n __initconst = { 1358 .state_table = adl_n_cstates, 1359 }; 1360 1361 static const struct idle_cpu idle_cpu_spr __initconst = { 1362 .state_table = spr_cstates, 1363 .disable_promotion_to_c1e = true, 1364 .use_acpi = true, 1365 }; 1366 1367 static const struct idle_cpu idle_cpu_avn __initconst = { 1368 .state_table = avn_cstates, 1369 .disable_promotion_to_c1e = true, 1370 .use_acpi = true, 1371 }; 1372 1373 static const struct idle_cpu idle_cpu_knl __initconst = { 1374 .state_table = knl_cstates, 1375 .use_acpi = true, 1376 }; 1377 1378 static const struct idle_cpu idle_cpu_bxt __initconst = { 1379 .state_table = bxt_cstates, 1380 .disable_promotion_to_c1e = true, 1381 }; 1382 1383 static const struct idle_cpu idle_cpu_dnv __initconst = { 1384 .state_table = dnv_cstates, 1385 .disable_promotion_to_c1e = true, 1386 .use_acpi = true, 1387 }; 1388 1389 static const struct idle_cpu idle_cpu_snr __initconst = { 1390 .state_table = snr_cstates, 1391 .disable_promotion_to_c1e = true, 1392 .use_acpi = true, 1393 }; 1394 1395 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1396 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1397 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1398 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1399 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1400 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1401 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1402 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1403 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1404 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1405 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1406 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1407 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1408 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1409 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1410 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1411 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1412 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1413 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1414 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1415 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1416 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1417 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1418 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1419 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1420 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1421 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1422 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1423 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1424 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1425 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1426 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1427 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1428 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1429 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1430 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1431 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n), 1432 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1433 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1434 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1435 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1436 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1437 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1438 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1439 {} 1440 }; 1441 1442 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1443 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1444 {} 1445 }; 1446 1447 static bool __init intel_idle_max_cstate_reached(int cstate) 1448 { 1449 if (cstate + 1 > max_cstate) { 1450 pr_info("max_cstate %d reached\n", max_cstate); 1451 return true; 1452 } 1453 return false; 1454 } 1455 1456 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1457 { 1458 unsigned long eax = flg2MWAIT(state->flags); 1459 1460 if (boot_cpu_has(X86_FEATURE_ARAT)) 1461 return false; 1462 1463 /* 1464 * Switch over to one-shot tick broadcast if the target C-state 1465 * is deeper than C1. 1466 */ 1467 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1468 } 1469 1470 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1471 #include <acpi/processor.h> 1472 1473 static bool no_acpi __read_mostly; 1474 module_param(no_acpi, bool, 0444); 1475 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1476 1477 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1478 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1479 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1480 1481 static struct acpi_processor_power acpi_state_table __initdata; 1482 1483 /** 1484 * intel_idle_cst_usable - Check if the _CST information can be used. 1485 * 1486 * Check if all of the C-states listed by _CST in the max_cstate range are 1487 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1488 */ 1489 static bool __init intel_idle_cst_usable(void) 1490 { 1491 int cstate, limit; 1492 1493 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1494 acpi_state_table.count); 1495 1496 for (cstate = 1; cstate < limit; cstate++) { 1497 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1498 1499 if (cx->entry_method != ACPI_CSTATE_FFH) 1500 return false; 1501 } 1502 1503 return true; 1504 } 1505 1506 static bool __init intel_idle_acpi_cst_extract(void) 1507 { 1508 unsigned int cpu; 1509 1510 if (no_acpi) { 1511 pr_debug("Not allowed to use ACPI _CST\n"); 1512 return false; 1513 } 1514 1515 for_each_possible_cpu(cpu) { 1516 struct acpi_processor *pr = per_cpu(processors, cpu); 1517 1518 if (!pr) 1519 continue; 1520 1521 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1522 continue; 1523 1524 acpi_state_table.count++; 1525 1526 if (!intel_idle_cst_usable()) 1527 continue; 1528 1529 if (!acpi_processor_claim_cst_control()) 1530 break; 1531 1532 return true; 1533 } 1534 1535 acpi_state_table.count = 0; 1536 pr_debug("ACPI _CST not found or not usable\n"); 1537 return false; 1538 } 1539 1540 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1541 { 1542 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1543 1544 /* 1545 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1546 * the interesting states are ACPI_CSTATE_FFH. 1547 */ 1548 for (cstate = 1; cstate < limit; cstate++) { 1549 struct acpi_processor_cx *cx; 1550 struct cpuidle_state *state; 1551 1552 if (intel_idle_max_cstate_reached(cstate - 1)) 1553 break; 1554 1555 cx = &acpi_state_table.states[cstate]; 1556 1557 state = &drv->states[drv->state_count++]; 1558 1559 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1560 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1561 state->exit_latency = cx->latency; 1562 /* 1563 * For C1-type C-states use the same number for both the exit 1564 * latency and target residency, because that is the case for 1565 * C1 in the majority of the static C-states tables above. 1566 * For the other types of C-states, however, set the target 1567 * residency to 3 times the exit latency which should lead to 1568 * a reasonable balance between energy-efficiency and 1569 * performance in the majority of interesting cases. 1570 */ 1571 state->target_residency = cx->latency; 1572 if (cx->type > ACPI_STATE_C1) 1573 state->target_residency *= 3; 1574 1575 state->flags = MWAIT2flg(cx->address); 1576 if (cx->type > ACPI_STATE_C2) 1577 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1578 1579 if (disabled_states_mask & BIT(cstate)) 1580 state->flags |= CPUIDLE_FLAG_OFF; 1581 1582 if (intel_idle_state_needs_timer_stop(state)) 1583 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1584 1585 state->enter = intel_idle; 1586 state->enter_s2idle = intel_idle_s2idle; 1587 } 1588 } 1589 1590 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1591 { 1592 int cstate, limit; 1593 1594 /* 1595 * If there are no _CST C-states, do not disable any C-states by 1596 * default. 1597 */ 1598 if (!acpi_state_table.count) 1599 return false; 1600 1601 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1602 /* 1603 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1604 * the interesting states are ACPI_CSTATE_FFH. 1605 */ 1606 for (cstate = 1; cstate < limit; cstate++) { 1607 if (acpi_state_table.states[cstate].address == mwait_hint) 1608 return false; 1609 } 1610 return true; 1611 } 1612 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1613 #define force_use_acpi (false) 1614 1615 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1616 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1617 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1618 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1619 1620 /** 1621 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1622 * 1623 * Tune IVT multi-socket targets. 1624 * Assumption: num_sockets == (max_package_num + 1). 1625 */ 1626 static void __init ivt_idle_state_table_update(void) 1627 { 1628 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1629 int cpu, package_num, num_sockets = 1; 1630 1631 for_each_online_cpu(cpu) { 1632 package_num = topology_physical_package_id(cpu); 1633 if (package_num + 1 > num_sockets) { 1634 num_sockets = package_num + 1; 1635 1636 if (num_sockets > 4) { 1637 cpuidle_state_table = ivt_cstates_8s; 1638 return; 1639 } 1640 } 1641 } 1642 1643 if (num_sockets > 2) 1644 cpuidle_state_table = ivt_cstates_4s; 1645 1646 /* else, 1 and 2 socket systems use default ivt_cstates */ 1647 } 1648 1649 /** 1650 * irtl_2_usec - IRTL to microseconds conversion. 1651 * @irtl: IRTL MSR value. 1652 * 1653 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1654 */ 1655 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1656 { 1657 static const unsigned int irtl_ns_units[] __initconst = { 1658 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1659 }; 1660 unsigned long long ns; 1661 1662 if (!irtl) 1663 return 0; 1664 1665 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1666 1667 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1668 } 1669 1670 /** 1671 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1672 * 1673 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1674 * definitive maximum latency and use the same value for target_residency. 1675 */ 1676 static void __init bxt_idle_state_table_update(void) 1677 { 1678 unsigned long long msr; 1679 unsigned int usec; 1680 1681 rdmsrl(MSR_PKGC6_IRTL, msr); 1682 usec = irtl_2_usec(msr); 1683 if (usec) { 1684 bxt_cstates[2].exit_latency = usec; 1685 bxt_cstates[2].target_residency = usec; 1686 } 1687 1688 rdmsrl(MSR_PKGC7_IRTL, msr); 1689 usec = irtl_2_usec(msr); 1690 if (usec) { 1691 bxt_cstates[3].exit_latency = usec; 1692 bxt_cstates[3].target_residency = usec; 1693 } 1694 1695 rdmsrl(MSR_PKGC8_IRTL, msr); 1696 usec = irtl_2_usec(msr); 1697 if (usec) { 1698 bxt_cstates[4].exit_latency = usec; 1699 bxt_cstates[4].target_residency = usec; 1700 } 1701 1702 rdmsrl(MSR_PKGC9_IRTL, msr); 1703 usec = irtl_2_usec(msr); 1704 if (usec) { 1705 bxt_cstates[5].exit_latency = usec; 1706 bxt_cstates[5].target_residency = usec; 1707 } 1708 1709 rdmsrl(MSR_PKGC10_IRTL, msr); 1710 usec = irtl_2_usec(msr); 1711 if (usec) { 1712 bxt_cstates[6].exit_latency = usec; 1713 bxt_cstates[6].target_residency = usec; 1714 } 1715 1716 } 1717 1718 /** 1719 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1720 * 1721 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1722 */ 1723 static void __init sklh_idle_state_table_update(void) 1724 { 1725 unsigned long long msr; 1726 unsigned int eax, ebx, ecx, edx; 1727 1728 1729 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1730 if (max_cstate <= 7) 1731 return; 1732 1733 /* if PC10 not present in CPUID.MWAIT.EDX */ 1734 if ((mwait_substates & (0xF << 28)) == 0) 1735 return; 1736 1737 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1738 1739 /* PC10 is not enabled in PKG C-state limit */ 1740 if ((msr & 0xF) != 8) 1741 return; 1742 1743 ecx = 0; 1744 cpuid(7, &eax, &ebx, &ecx, &edx); 1745 1746 /* if SGX is present */ 1747 if (ebx & (1 << 2)) { 1748 1749 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1750 1751 /* if SGX is enabled */ 1752 if (msr & (1 << 18)) 1753 return; 1754 } 1755 1756 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1757 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1758 } 1759 1760 /** 1761 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1762 * idle states table. 1763 */ 1764 static void __init skx_idle_state_table_update(void) 1765 { 1766 unsigned long long msr; 1767 1768 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1769 1770 /* 1771 * 000b: C0/C1 (no package C-state support) 1772 * 001b: C2 1773 * 010b: C6 (non-retention) 1774 * 011b: C6 (retention) 1775 * 111b: No Package C state limits. 1776 */ 1777 if ((msr & 0x7) < 2) { 1778 /* 1779 * Uses the CC6 + PC0 latency and 3 times of 1780 * latency for target_residency if the PC6 1781 * is disabled in BIOS. This is consistent 1782 * with how intel_idle driver uses _CST 1783 * to set the target_residency. 1784 */ 1785 skx_cstates[2].exit_latency = 92; 1786 skx_cstates[2].target_residency = 276; 1787 } 1788 } 1789 1790 /** 1791 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1792 */ 1793 static void __init adl_idle_state_table_update(void) 1794 { 1795 /* Check if user prefers C1 over C1E. */ 1796 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1797 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1798 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1799 1800 /* Disable C1E by clearing the "C1E promotion" bit. */ 1801 c1e_promotion = C1E_PROMOTION_DISABLE; 1802 return; 1803 } 1804 1805 /* Make sure C1E is enabled by default */ 1806 c1e_promotion = C1E_PROMOTION_ENABLE; 1807 } 1808 1809 /** 1810 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1811 */ 1812 static void __init spr_idle_state_table_update(void) 1813 { 1814 unsigned long long msr; 1815 1816 /* 1817 * By default, the C6 state assumes the worst-case scenario of package 1818 * C6. However, if PC6 is disabled, we update the numbers to match 1819 * core C6. 1820 */ 1821 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1822 1823 /* Limit value 2 and above allow for PC6. */ 1824 if ((msr & 0x7) < 2) { 1825 spr_cstates[2].exit_latency = 190; 1826 spr_cstates[2].target_residency = 600; 1827 } 1828 } 1829 1830 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1831 { 1832 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1833 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1834 MWAIT_SUBSTATE_MASK; 1835 1836 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1837 if (num_substates == 0) 1838 return false; 1839 1840 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1841 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1842 1843 return true; 1844 } 1845 1846 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1847 { 1848 int cstate; 1849 1850 switch (boot_cpu_data.x86_model) { 1851 case INTEL_FAM6_IVYBRIDGE_X: 1852 ivt_idle_state_table_update(); 1853 break; 1854 case INTEL_FAM6_ATOM_GOLDMONT: 1855 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1856 bxt_idle_state_table_update(); 1857 break; 1858 case INTEL_FAM6_SKYLAKE: 1859 sklh_idle_state_table_update(); 1860 break; 1861 case INTEL_FAM6_SKYLAKE_X: 1862 skx_idle_state_table_update(); 1863 break; 1864 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1865 spr_idle_state_table_update(); 1866 break; 1867 case INTEL_FAM6_ALDERLAKE: 1868 case INTEL_FAM6_ALDERLAKE_L: 1869 case INTEL_FAM6_ALDERLAKE_N: 1870 adl_idle_state_table_update(); 1871 break; 1872 } 1873 1874 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1875 unsigned int mwait_hint; 1876 1877 if (intel_idle_max_cstate_reached(cstate)) 1878 break; 1879 1880 if (!cpuidle_state_table[cstate].enter && 1881 !cpuidle_state_table[cstate].enter_s2idle) 1882 break; 1883 1884 /* If marked as unusable, skip this state. */ 1885 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1886 pr_debug("state %s is disabled\n", 1887 cpuidle_state_table[cstate].name); 1888 continue; 1889 } 1890 1891 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1892 if (!intel_idle_verify_cstate(mwait_hint)) 1893 continue; 1894 1895 /* Structure copy. */ 1896 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1897 1898 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) 1899 drv->states[drv->state_count].enter = intel_idle_irq; 1900 1901 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1902 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { 1903 WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); 1904 drv->states[drv->state_count].enter = intel_idle_ibrs; 1905 } 1906 1907 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE) 1908 drv->states[drv->state_count].enter = intel_idle_xstate; 1909 1910 if ((disabled_states_mask & BIT(drv->state_count)) || 1911 ((icpu->use_acpi || force_use_acpi) && 1912 intel_idle_off_by_default(mwait_hint) && 1913 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1914 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1915 1916 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1917 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1918 1919 drv->state_count++; 1920 } 1921 1922 if (icpu->byt_auto_demotion_disable_flag) { 1923 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1924 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1925 } 1926 } 1927 1928 /** 1929 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1930 * @drv: cpuidle driver structure to initialize. 1931 */ 1932 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1933 { 1934 cpuidle_poll_state_init(drv); 1935 1936 if (disabled_states_mask & BIT(0)) 1937 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1938 1939 drv->state_count = 1; 1940 1941 if (icpu) 1942 intel_idle_init_cstates_icpu(drv); 1943 else 1944 intel_idle_init_cstates_acpi(drv); 1945 } 1946 1947 static void auto_demotion_disable(void) 1948 { 1949 unsigned long long msr_bits; 1950 1951 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1952 msr_bits &= ~auto_demotion_disable_flags; 1953 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1954 } 1955 1956 static void c1e_promotion_enable(void) 1957 { 1958 unsigned long long msr_bits; 1959 1960 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1961 msr_bits |= 0x2; 1962 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1963 } 1964 1965 static void c1e_promotion_disable(void) 1966 { 1967 unsigned long long msr_bits; 1968 1969 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1970 msr_bits &= ~0x2; 1971 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1972 } 1973 1974 /** 1975 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1976 * @cpu: CPU to initialize. 1977 * 1978 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1979 * with the processor model flags. 1980 */ 1981 static int intel_idle_cpu_init(unsigned int cpu) 1982 { 1983 struct cpuidle_device *dev; 1984 1985 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1986 dev->cpu = cpu; 1987 1988 if (cpuidle_register_device(dev)) { 1989 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1990 return -EIO; 1991 } 1992 1993 if (auto_demotion_disable_flags) 1994 auto_demotion_disable(); 1995 1996 if (c1e_promotion == C1E_PROMOTION_ENABLE) 1997 c1e_promotion_enable(); 1998 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 1999 c1e_promotion_disable(); 2000 2001 return 0; 2002 } 2003 2004 static int intel_idle_cpu_online(unsigned int cpu) 2005 { 2006 struct cpuidle_device *dev; 2007 2008 if (!boot_cpu_has(X86_FEATURE_ARAT)) 2009 tick_broadcast_enable(); 2010 2011 /* 2012 * Some systems can hotplug a cpu at runtime after 2013 * the kernel has booted, we have to initialize the 2014 * driver in this case 2015 */ 2016 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2017 if (!dev->registered) 2018 return intel_idle_cpu_init(cpu); 2019 2020 return 0; 2021 } 2022 2023 /** 2024 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 2025 */ 2026 static void __init intel_idle_cpuidle_devices_uninit(void) 2027 { 2028 int i; 2029 2030 for_each_online_cpu(i) 2031 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2032 } 2033 2034 static int __init intel_idle_init(void) 2035 { 2036 const struct x86_cpu_id *id; 2037 unsigned int eax, ebx, ecx; 2038 int retval; 2039 2040 /* Do not load intel_idle at all for now if idle= is passed */ 2041 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 2042 return -ENODEV; 2043 2044 if (max_cstate == 0) { 2045 pr_debug("disabled\n"); 2046 return -EPERM; 2047 } 2048 2049 id = x86_match_cpu(intel_idle_ids); 2050 if (id) { 2051 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2052 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2053 return -ENODEV; 2054 } 2055 } else { 2056 id = x86_match_cpu(intel_mwait_ids); 2057 if (!id) 2058 return -ENODEV; 2059 } 2060 2061 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2062 return -ENODEV; 2063 2064 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2065 2066 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2067 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2068 !mwait_substates) 2069 return -ENODEV; 2070 2071 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2072 2073 icpu = (const struct idle_cpu *)id->driver_data; 2074 if (icpu) { 2075 cpuidle_state_table = icpu->state_table; 2076 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2077 if (icpu->disable_promotion_to_c1e) 2078 c1e_promotion = C1E_PROMOTION_DISABLE; 2079 if (icpu->use_acpi || force_use_acpi) 2080 intel_idle_acpi_cst_extract(); 2081 } else if (!intel_idle_acpi_cst_extract()) { 2082 return -ENODEV; 2083 } 2084 2085 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2086 boot_cpu_data.x86_model); 2087 2088 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2089 if (!intel_idle_cpuidle_devices) 2090 return -ENOMEM; 2091 2092 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2093 2094 retval = cpuidle_register_driver(&intel_idle_driver); 2095 if (retval) { 2096 struct cpuidle_driver *drv = cpuidle_get_driver(); 2097 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2098 drv ? drv->name : "none"); 2099 goto init_driver_fail; 2100 } 2101 2102 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2103 intel_idle_cpu_online, NULL); 2104 if (retval < 0) 2105 goto hp_setup_fail; 2106 2107 pr_debug("Local APIC timer is reliable in %s\n", 2108 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2109 2110 return 0; 2111 2112 hp_setup_fail: 2113 intel_idle_cpuidle_devices_uninit(); 2114 cpuidle_unregister_driver(&intel_idle_driver); 2115 init_driver_fail: 2116 free_percpu(intel_idle_cpuidle_devices); 2117 return retval; 2118 2119 } 2120 device_initcall(intel_idle_init); 2121 2122 /* 2123 * We are not really modular, but we used to support that. Meaning we also 2124 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2125 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2126 * is the easiest way (currently) to continue doing that. 2127 */ 2128 module_param(max_cstate, int, 0444); 2129 /* 2130 * The positions of the bits that are set in this number are the indices of the 2131 * idle states to be disabled by default (as reflected by the names of the 2132 * corresponding idle state directories in sysfs, "state0", "state1" ... 2133 * "state<i>" ..., where <i> is the index of the given state). 2134 */ 2135 module_param_named(states_off, disabled_states_mask, uint, 0444); 2136 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2137 /* 2138 * Some platforms come with mutually exclusive C-states, so that if one is 2139 * enabled, the other C-states must not be used. Example: C1 and C1E on 2140 * Sapphire Rapids platform. This parameter allows for selecting the 2141 * preferred C-states among the groups of mutually exclusive C-states - the 2142 * selected C-states will be registered, the other C-states from the mutually 2143 * exclusive group won't be registered. If the platform has no mutually 2144 * exclusive C-states, this parameter has no effect. 2145 */ 2146 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2147 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2148