1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 #include <asm/fpu/api.h> 60 61 #define INTEL_IDLE_VERSION "0.5.1" 62 63 static struct cpuidle_driver intel_idle_driver = { 64 .name = "intel_idle", 65 .owner = THIS_MODULE, 66 }; 67 /* intel_idle.max_cstate=0 disables driver */ 68 static int max_cstate = CPUIDLE_STATE_MAX - 1; 69 static unsigned int disabled_states_mask __read_mostly; 70 static unsigned int preferred_states_mask __read_mostly; 71 static bool force_irq_on __read_mostly; 72 73 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 74 75 static unsigned long auto_demotion_disable_flags; 76 77 static enum { 78 C1E_PROMOTION_PRESERVE, 79 C1E_PROMOTION_ENABLE, 80 C1E_PROMOTION_DISABLE 81 } c1e_promotion = C1E_PROMOTION_PRESERVE; 82 83 struct idle_cpu { 84 struct cpuidle_state *state_table; 85 86 /* 87 * Hardware C-state auto-demotion may not always be optimal. 88 * Indicate which enable bits to clear here. 89 */ 90 unsigned long auto_demotion_disable_flags; 91 bool byt_auto_demotion_disable_flag; 92 bool disable_promotion_to_c1e; 93 bool use_acpi; 94 }; 95 96 static const struct idle_cpu *icpu __initdata; 97 static struct cpuidle_state *cpuidle_state_table __initdata; 98 99 static unsigned int mwait_substates __initdata; 100 101 /* 102 * Enable interrupts before entering the C-state. On some platforms and for 103 * some C-states, this may measurably decrease interrupt latency. 104 */ 105 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 106 107 /* 108 * Enable this state by default even if the ACPI _CST does not list it. 109 */ 110 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 111 112 /* 113 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 114 * above. 115 */ 116 #define CPUIDLE_FLAG_IBRS BIT(16) 117 118 /* 119 * Initialize large xstate for the C6-state entrance. 120 */ 121 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) 122 123 /* 124 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 125 * the C-state (top nibble) and sub-state (bottom nibble) 126 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 127 * 128 * We store the hint at the top of our "flags" for each state. 129 */ 130 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 131 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 132 133 static __always_inline int __intel_idle(struct cpuidle_device *dev, 134 struct cpuidle_driver *drv, int index) 135 { 136 struct cpuidle_state *state = &drv->states[index]; 137 unsigned long eax = flg2MWAIT(state->flags); 138 unsigned long ecx = 1; /* break on interrupt flag */ 139 140 mwait_idle_with_hints(eax, ecx); 141 142 return index; 143 } 144 145 /** 146 * intel_idle - Ask the processor to enter the given idle state. 147 * @dev: cpuidle device of the target CPU. 148 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 149 * @index: Target idle state index. 150 * 151 * Use the MWAIT instruction to notify the processor that the CPU represented by 152 * @dev is idle and it can try to enter the idle state corresponding to @index. 153 * 154 * If the local APIC timer is not known to be reliable in the target idle state, 155 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 156 * 157 * Must be called under local_irq_disable(). 158 */ 159 static __cpuidle int intel_idle(struct cpuidle_device *dev, 160 struct cpuidle_driver *drv, int index) 161 { 162 return __intel_idle(dev, drv, index); 163 } 164 165 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 166 struct cpuidle_driver *drv, int index) 167 { 168 int ret; 169 170 raw_local_irq_enable(); 171 ret = __intel_idle(dev, drv, index); 172 raw_local_irq_disable(); 173 174 return ret; 175 } 176 177 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 178 struct cpuidle_driver *drv, int index) 179 { 180 bool smt_active = sched_smt_active(); 181 u64 spec_ctrl = spec_ctrl_current(); 182 int ret; 183 184 if (smt_active) 185 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); 186 187 ret = __intel_idle(dev, drv, index); 188 189 if (smt_active) 190 native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 191 192 return ret; 193 } 194 195 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, 196 struct cpuidle_driver *drv, int index) 197 { 198 fpu_idle_fpregs(); 199 return __intel_idle(dev, drv, index); 200 } 201 202 /** 203 * intel_idle_s2idle - Ask the processor to enter the given idle state. 204 * @dev: cpuidle device of the target CPU. 205 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 206 * @index: Target idle state index. 207 * 208 * Use the MWAIT instruction to notify the processor that the CPU represented by 209 * @dev is idle and it can try to enter the idle state corresponding to @index. 210 * 211 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 212 * scheduler tick and suspended scheduler clock on the target CPU. 213 */ 214 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 215 struct cpuidle_driver *drv, int index) 216 { 217 unsigned long ecx = 1; /* break on interrupt flag */ 218 struct cpuidle_state *state = &drv->states[index]; 219 unsigned long eax = flg2MWAIT(state->flags); 220 221 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) 222 fpu_idle_fpregs(); 223 224 mwait_idle_with_hints(eax, ecx); 225 226 return 0; 227 } 228 229 /* 230 * States are indexed by the cstate number, 231 * which is also the index into the MWAIT hint array. 232 * Thus C0 is a dummy. 233 */ 234 static struct cpuidle_state nehalem_cstates[] __initdata = { 235 { 236 .name = "C1", 237 .desc = "MWAIT 0x00", 238 .flags = MWAIT2flg(0x00), 239 .exit_latency = 3, 240 .target_residency = 6, 241 .enter = &intel_idle, 242 .enter_s2idle = intel_idle_s2idle, }, 243 { 244 .name = "C1E", 245 .desc = "MWAIT 0x01", 246 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 247 .exit_latency = 10, 248 .target_residency = 20, 249 .enter = &intel_idle, 250 .enter_s2idle = intel_idle_s2idle, }, 251 { 252 .name = "C3", 253 .desc = "MWAIT 0x10", 254 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 255 .exit_latency = 20, 256 .target_residency = 80, 257 .enter = &intel_idle, 258 .enter_s2idle = intel_idle_s2idle, }, 259 { 260 .name = "C6", 261 .desc = "MWAIT 0x20", 262 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 263 .exit_latency = 200, 264 .target_residency = 800, 265 .enter = &intel_idle, 266 .enter_s2idle = intel_idle_s2idle, }, 267 { 268 .enter = NULL } 269 }; 270 271 static struct cpuidle_state snb_cstates[] __initdata = { 272 { 273 .name = "C1", 274 .desc = "MWAIT 0x00", 275 .flags = MWAIT2flg(0x00), 276 .exit_latency = 2, 277 .target_residency = 2, 278 .enter = &intel_idle, 279 .enter_s2idle = intel_idle_s2idle, }, 280 { 281 .name = "C1E", 282 .desc = "MWAIT 0x01", 283 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 284 .exit_latency = 10, 285 .target_residency = 20, 286 .enter = &intel_idle, 287 .enter_s2idle = intel_idle_s2idle, }, 288 { 289 .name = "C3", 290 .desc = "MWAIT 0x10", 291 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 292 .exit_latency = 80, 293 .target_residency = 211, 294 .enter = &intel_idle, 295 .enter_s2idle = intel_idle_s2idle, }, 296 { 297 .name = "C6", 298 .desc = "MWAIT 0x20", 299 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 300 .exit_latency = 104, 301 .target_residency = 345, 302 .enter = &intel_idle, 303 .enter_s2idle = intel_idle_s2idle, }, 304 { 305 .name = "C7", 306 .desc = "MWAIT 0x30", 307 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 308 .exit_latency = 109, 309 .target_residency = 345, 310 .enter = &intel_idle, 311 .enter_s2idle = intel_idle_s2idle, }, 312 { 313 .enter = NULL } 314 }; 315 316 static struct cpuidle_state byt_cstates[] __initdata = { 317 { 318 .name = "C1", 319 .desc = "MWAIT 0x00", 320 .flags = MWAIT2flg(0x00), 321 .exit_latency = 1, 322 .target_residency = 1, 323 .enter = &intel_idle, 324 .enter_s2idle = intel_idle_s2idle, }, 325 { 326 .name = "C6N", 327 .desc = "MWAIT 0x58", 328 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 329 .exit_latency = 300, 330 .target_residency = 275, 331 .enter = &intel_idle, 332 .enter_s2idle = intel_idle_s2idle, }, 333 { 334 .name = "C6S", 335 .desc = "MWAIT 0x52", 336 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 337 .exit_latency = 500, 338 .target_residency = 560, 339 .enter = &intel_idle, 340 .enter_s2idle = intel_idle_s2idle, }, 341 { 342 .name = "C7", 343 .desc = "MWAIT 0x60", 344 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 345 .exit_latency = 1200, 346 .target_residency = 4000, 347 .enter = &intel_idle, 348 .enter_s2idle = intel_idle_s2idle, }, 349 { 350 .name = "C7S", 351 .desc = "MWAIT 0x64", 352 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 353 .exit_latency = 10000, 354 .target_residency = 20000, 355 .enter = &intel_idle, 356 .enter_s2idle = intel_idle_s2idle, }, 357 { 358 .enter = NULL } 359 }; 360 361 static struct cpuidle_state cht_cstates[] __initdata = { 362 { 363 .name = "C1", 364 .desc = "MWAIT 0x00", 365 .flags = MWAIT2flg(0x00), 366 .exit_latency = 1, 367 .target_residency = 1, 368 .enter = &intel_idle, 369 .enter_s2idle = intel_idle_s2idle, }, 370 { 371 .name = "C6N", 372 .desc = "MWAIT 0x58", 373 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 374 .exit_latency = 80, 375 .target_residency = 275, 376 .enter = &intel_idle, 377 .enter_s2idle = intel_idle_s2idle, }, 378 { 379 .name = "C6S", 380 .desc = "MWAIT 0x52", 381 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 382 .exit_latency = 200, 383 .target_residency = 560, 384 .enter = &intel_idle, 385 .enter_s2idle = intel_idle_s2idle, }, 386 { 387 .name = "C7", 388 .desc = "MWAIT 0x60", 389 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 390 .exit_latency = 1200, 391 .target_residency = 4000, 392 .enter = &intel_idle, 393 .enter_s2idle = intel_idle_s2idle, }, 394 { 395 .name = "C7S", 396 .desc = "MWAIT 0x64", 397 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 398 .exit_latency = 10000, 399 .target_residency = 20000, 400 .enter = &intel_idle, 401 .enter_s2idle = intel_idle_s2idle, }, 402 { 403 .enter = NULL } 404 }; 405 406 static struct cpuidle_state ivb_cstates[] __initdata = { 407 { 408 .name = "C1", 409 .desc = "MWAIT 0x00", 410 .flags = MWAIT2flg(0x00), 411 .exit_latency = 1, 412 .target_residency = 1, 413 .enter = &intel_idle, 414 .enter_s2idle = intel_idle_s2idle, }, 415 { 416 .name = "C1E", 417 .desc = "MWAIT 0x01", 418 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 419 .exit_latency = 10, 420 .target_residency = 20, 421 .enter = &intel_idle, 422 .enter_s2idle = intel_idle_s2idle, }, 423 { 424 .name = "C3", 425 .desc = "MWAIT 0x10", 426 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 427 .exit_latency = 59, 428 .target_residency = 156, 429 .enter = &intel_idle, 430 .enter_s2idle = intel_idle_s2idle, }, 431 { 432 .name = "C6", 433 .desc = "MWAIT 0x20", 434 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 435 .exit_latency = 80, 436 .target_residency = 300, 437 .enter = &intel_idle, 438 .enter_s2idle = intel_idle_s2idle, }, 439 { 440 .name = "C7", 441 .desc = "MWAIT 0x30", 442 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 443 .exit_latency = 87, 444 .target_residency = 300, 445 .enter = &intel_idle, 446 .enter_s2idle = intel_idle_s2idle, }, 447 { 448 .enter = NULL } 449 }; 450 451 static struct cpuidle_state ivt_cstates[] __initdata = { 452 { 453 .name = "C1", 454 .desc = "MWAIT 0x00", 455 .flags = MWAIT2flg(0x00), 456 .exit_latency = 1, 457 .target_residency = 1, 458 .enter = &intel_idle, 459 .enter_s2idle = intel_idle_s2idle, }, 460 { 461 .name = "C1E", 462 .desc = "MWAIT 0x01", 463 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 464 .exit_latency = 10, 465 .target_residency = 80, 466 .enter = &intel_idle, 467 .enter_s2idle = intel_idle_s2idle, }, 468 { 469 .name = "C3", 470 .desc = "MWAIT 0x10", 471 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 472 .exit_latency = 59, 473 .target_residency = 156, 474 .enter = &intel_idle, 475 .enter_s2idle = intel_idle_s2idle, }, 476 { 477 .name = "C6", 478 .desc = "MWAIT 0x20", 479 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 480 .exit_latency = 82, 481 .target_residency = 300, 482 .enter = &intel_idle, 483 .enter_s2idle = intel_idle_s2idle, }, 484 { 485 .enter = NULL } 486 }; 487 488 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 489 { 490 .name = "C1", 491 .desc = "MWAIT 0x00", 492 .flags = MWAIT2flg(0x00), 493 .exit_latency = 1, 494 .target_residency = 1, 495 .enter = &intel_idle, 496 .enter_s2idle = intel_idle_s2idle, }, 497 { 498 .name = "C1E", 499 .desc = "MWAIT 0x01", 500 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 501 .exit_latency = 10, 502 .target_residency = 250, 503 .enter = &intel_idle, 504 .enter_s2idle = intel_idle_s2idle, }, 505 { 506 .name = "C3", 507 .desc = "MWAIT 0x10", 508 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 509 .exit_latency = 59, 510 .target_residency = 300, 511 .enter = &intel_idle, 512 .enter_s2idle = intel_idle_s2idle, }, 513 { 514 .name = "C6", 515 .desc = "MWAIT 0x20", 516 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 517 .exit_latency = 84, 518 .target_residency = 400, 519 .enter = &intel_idle, 520 .enter_s2idle = intel_idle_s2idle, }, 521 { 522 .enter = NULL } 523 }; 524 525 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 526 { 527 .name = "C1", 528 .desc = "MWAIT 0x00", 529 .flags = MWAIT2flg(0x00), 530 .exit_latency = 1, 531 .target_residency = 1, 532 .enter = &intel_idle, 533 .enter_s2idle = intel_idle_s2idle, }, 534 { 535 .name = "C1E", 536 .desc = "MWAIT 0x01", 537 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 538 .exit_latency = 10, 539 .target_residency = 500, 540 .enter = &intel_idle, 541 .enter_s2idle = intel_idle_s2idle, }, 542 { 543 .name = "C3", 544 .desc = "MWAIT 0x10", 545 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 546 .exit_latency = 59, 547 .target_residency = 600, 548 .enter = &intel_idle, 549 .enter_s2idle = intel_idle_s2idle, }, 550 { 551 .name = "C6", 552 .desc = "MWAIT 0x20", 553 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 554 .exit_latency = 88, 555 .target_residency = 700, 556 .enter = &intel_idle, 557 .enter_s2idle = intel_idle_s2idle, }, 558 { 559 .enter = NULL } 560 }; 561 562 static struct cpuidle_state hsw_cstates[] __initdata = { 563 { 564 .name = "C1", 565 .desc = "MWAIT 0x00", 566 .flags = MWAIT2flg(0x00), 567 .exit_latency = 2, 568 .target_residency = 2, 569 .enter = &intel_idle, 570 .enter_s2idle = intel_idle_s2idle, }, 571 { 572 .name = "C1E", 573 .desc = "MWAIT 0x01", 574 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 575 .exit_latency = 10, 576 .target_residency = 20, 577 .enter = &intel_idle, 578 .enter_s2idle = intel_idle_s2idle, }, 579 { 580 .name = "C3", 581 .desc = "MWAIT 0x10", 582 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 583 .exit_latency = 33, 584 .target_residency = 100, 585 .enter = &intel_idle, 586 .enter_s2idle = intel_idle_s2idle, }, 587 { 588 .name = "C6", 589 .desc = "MWAIT 0x20", 590 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 591 .exit_latency = 133, 592 .target_residency = 400, 593 .enter = &intel_idle, 594 .enter_s2idle = intel_idle_s2idle, }, 595 { 596 .name = "C7s", 597 .desc = "MWAIT 0x32", 598 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 599 .exit_latency = 166, 600 .target_residency = 500, 601 .enter = &intel_idle, 602 .enter_s2idle = intel_idle_s2idle, }, 603 { 604 .name = "C8", 605 .desc = "MWAIT 0x40", 606 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 607 .exit_latency = 300, 608 .target_residency = 900, 609 .enter = &intel_idle, 610 .enter_s2idle = intel_idle_s2idle, }, 611 { 612 .name = "C9", 613 .desc = "MWAIT 0x50", 614 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 615 .exit_latency = 600, 616 .target_residency = 1800, 617 .enter = &intel_idle, 618 .enter_s2idle = intel_idle_s2idle, }, 619 { 620 .name = "C10", 621 .desc = "MWAIT 0x60", 622 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 623 .exit_latency = 2600, 624 .target_residency = 7700, 625 .enter = &intel_idle, 626 .enter_s2idle = intel_idle_s2idle, }, 627 { 628 .enter = NULL } 629 }; 630 static struct cpuidle_state bdw_cstates[] __initdata = { 631 { 632 .name = "C1", 633 .desc = "MWAIT 0x00", 634 .flags = MWAIT2flg(0x00), 635 .exit_latency = 2, 636 .target_residency = 2, 637 .enter = &intel_idle, 638 .enter_s2idle = intel_idle_s2idle, }, 639 { 640 .name = "C1E", 641 .desc = "MWAIT 0x01", 642 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 643 .exit_latency = 10, 644 .target_residency = 20, 645 .enter = &intel_idle, 646 .enter_s2idle = intel_idle_s2idle, }, 647 { 648 .name = "C3", 649 .desc = "MWAIT 0x10", 650 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 651 .exit_latency = 40, 652 .target_residency = 100, 653 .enter = &intel_idle, 654 .enter_s2idle = intel_idle_s2idle, }, 655 { 656 .name = "C6", 657 .desc = "MWAIT 0x20", 658 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 659 .exit_latency = 133, 660 .target_residency = 400, 661 .enter = &intel_idle, 662 .enter_s2idle = intel_idle_s2idle, }, 663 { 664 .name = "C7s", 665 .desc = "MWAIT 0x32", 666 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 667 .exit_latency = 166, 668 .target_residency = 500, 669 .enter = &intel_idle, 670 .enter_s2idle = intel_idle_s2idle, }, 671 { 672 .name = "C8", 673 .desc = "MWAIT 0x40", 674 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 675 .exit_latency = 300, 676 .target_residency = 900, 677 .enter = &intel_idle, 678 .enter_s2idle = intel_idle_s2idle, }, 679 { 680 .name = "C9", 681 .desc = "MWAIT 0x50", 682 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 683 .exit_latency = 600, 684 .target_residency = 1800, 685 .enter = &intel_idle, 686 .enter_s2idle = intel_idle_s2idle, }, 687 { 688 .name = "C10", 689 .desc = "MWAIT 0x60", 690 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 691 .exit_latency = 2600, 692 .target_residency = 7700, 693 .enter = &intel_idle, 694 .enter_s2idle = intel_idle_s2idle, }, 695 { 696 .enter = NULL } 697 }; 698 699 static struct cpuidle_state skl_cstates[] __initdata = { 700 { 701 .name = "C1", 702 .desc = "MWAIT 0x00", 703 .flags = MWAIT2flg(0x00), 704 .exit_latency = 2, 705 .target_residency = 2, 706 .enter = &intel_idle, 707 .enter_s2idle = intel_idle_s2idle, }, 708 { 709 .name = "C1E", 710 .desc = "MWAIT 0x01", 711 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 712 .exit_latency = 10, 713 .target_residency = 20, 714 .enter = &intel_idle, 715 .enter_s2idle = intel_idle_s2idle, }, 716 { 717 .name = "C3", 718 .desc = "MWAIT 0x10", 719 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 720 .exit_latency = 70, 721 .target_residency = 100, 722 .enter = &intel_idle, 723 .enter_s2idle = intel_idle_s2idle, }, 724 { 725 .name = "C6", 726 .desc = "MWAIT 0x20", 727 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 728 .exit_latency = 85, 729 .target_residency = 200, 730 .enter = &intel_idle, 731 .enter_s2idle = intel_idle_s2idle, }, 732 { 733 .name = "C7s", 734 .desc = "MWAIT 0x33", 735 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 736 .exit_latency = 124, 737 .target_residency = 800, 738 .enter = &intel_idle, 739 .enter_s2idle = intel_idle_s2idle, }, 740 { 741 .name = "C8", 742 .desc = "MWAIT 0x40", 743 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 744 .exit_latency = 200, 745 .target_residency = 800, 746 .enter = &intel_idle, 747 .enter_s2idle = intel_idle_s2idle, }, 748 { 749 .name = "C9", 750 .desc = "MWAIT 0x50", 751 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 752 .exit_latency = 480, 753 .target_residency = 5000, 754 .enter = &intel_idle, 755 .enter_s2idle = intel_idle_s2idle, }, 756 { 757 .name = "C10", 758 .desc = "MWAIT 0x60", 759 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 760 .exit_latency = 890, 761 .target_residency = 5000, 762 .enter = &intel_idle, 763 .enter_s2idle = intel_idle_s2idle, }, 764 { 765 .enter = NULL } 766 }; 767 768 static struct cpuidle_state skx_cstates[] __initdata = { 769 { 770 .name = "C1", 771 .desc = "MWAIT 0x00", 772 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 773 .exit_latency = 2, 774 .target_residency = 2, 775 .enter = &intel_idle, 776 .enter_s2idle = intel_idle_s2idle, }, 777 { 778 .name = "C1E", 779 .desc = "MWAIT 0x01", 780 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 781 .exit_latency = 10, 782 .target_residency = 20, 783 .enter = &intel_idle, 784 .enter_s2idle = intel_idle_s2idle, }, 785 { 786 .name = "C6", 787 .desc = "MWAIT 0x20", 788 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 789 .exit_latency = 133, 790 .target_residency = 600, 791 .enter = &intel_idle, 792 .enter_s2idle = intel_idle_s2idle, }, 793 { 794 .enter = NULL } 795 }; 796 797 static struct cpuidle_state icx_cstates[] __initdata = { 798 { 799 .name = "C1", 800 .desc = "MWAIT 0x00", 801 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 802 .exit_latency = 1, 803 .target_residency = 1, 804 .enter = &intel_idle, 805 .enter_s2idle = intel_idle_s2idle, }, 806 { 807 .name = "C1E", 808 .desc = "MWAIT 0x01", 809 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 810 .exit_latency = 4, 811 .target_residency = 4, 812 .enter = &intel_idle, 813 .enter_s2idle = intel_idle_s2idle, }, 814 { 815 .name = "C6", 816 .desc = "MWAIT 0x20", 817 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 818 .exit_latency = 170, 819 .target_residency = 600, 820 .enter = &intel_idle, 821 .enter_s2idle = intel_idle_s2idle, }, 822 { 823 .enter = NULL } 824 }; 825 826 /* 827 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 828 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 829 * But in this case there is effectively no C1, because C1 requests are 830 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 831 * and C1E requests end up with C1, so there is effectively no C1E. 832 * 833 * By default we enable C1E and disable C1 by marking it with 834 * 'CPUIDLE_FLAG_UNUSABLE'. 835 */ 836 static struct cpuidle_state adl_cstates[] __initdata = { 837 { 838 .name = "C1", 839 .desc = "MWAIT 0x00", 840 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 841 .exit_latency = 1, 842 .target_residency = 1, 843 .enter = &intel_idle, 844 .enter_s2idle = intel_idle_s2idle, }, 845 { 846 .name = "C1E", 847 .desc = "MWAIT 0x01", 848 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 849 .exit_latency = 2, 850 .target_residency = 4, 851 .enter = &intel_idle, 852 .enter_s2idle = intel_idle_s2idle, }, 853 { 854 .name = "C6", 855 .desc = "MWAIT 0x20", 856 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 857 .exit_latency = 220, 858 .target_residency = 600, 859 .enter = &intel_idle, 860 .enter_s2idle = intel_idle_s2idle, }, 861 { 862 .name = "C8", 863 .desc = "MWAIT 0x40", 864 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 865 .exit_latency = 280, 866 .target_residency = 800, 867 .enter = &intel_idle, 868 .enter_s2idle = intel_idle_s2idle, }, 869 { 870 .name = "C10", 871 .desc = "MWAIT 0x60", 872 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 873 .exit_latency = 680, 874 .target_residency = 2000, 875 .enter = &intel_idle, 876 .enter_s2idle = intel_idle_s2idle, }, 877 { 878 .enter = NULL } 879 }; 880 881 static struct cpuidle_state adl_l_cstates[] __initdata = { 882 { 883 .name = "C1", 884 .desc = "MWAIT 0x00", 885 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 886 .exit_latency = 1, 887 .target_residency = 1, 888 .enter = &intel_idle, 889 .enter_s2idle = intel_idle_s2idle, }, 890 { 891 .name = "C1E", 892 .desc = "MWAIT 0x01", 893 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 894 .exit_latency = 2, 895 .target_residency = 4, 896 .enter = &intel_idle, 897 .enter_s2idle = intel_idle_s2idle, }, 898 { 899 .name = "C6", 900 .desc = "MWAIT 0x20", 901 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 902 .exit_latency = 170, 903 .target_residency = 500, 904 .enter = &intel_idle, 905 .enter_s2idle = intel_idle_s2idle, }, 906 { 907 .name = "C8", 908 .desc = "MWAIT 0x40", 909 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 910 .exit_latency = 200, 911 .target_residency = 600, 912 .enter = &intel_idle, 913 .enter_s2idle = intel_idle_s2idle, }, 914 { 915 .name = "C10", 916 .desc = "MWAIT 0x60", 917 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 918 .exit_latency = 230, 919 .target_residency = 700, 920 .enter = &intel_idle, 921 .enter_s2idle = intel_idle_s2idle, }, 922 { 923 .enter = NULL } 924 }; 925 926 static struct cpuidle_state gmt_cstates[] __initdata = { 927 { 928 .name = "C1", 929 .desc = "MWAIT 0x00", 930 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 931 .exit_latency = 1, 932 .target_residency = 1, 933 .enter = &intel_idle, 934 .enter_s2idle = intel_idle_s2idle, }, 935 { 936 .name = "C1E", 937 .desc = "MWAIT 0x01", 938 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 939 .exit_latency = 2, 940 .target_residency = 4, 941 .enter = &intel_idle, 942 .enter_s2idle = intel_idle_s2idle, }, 943 { 944 .name = "C6", 945 .desc = "MWAIT 0x20", 946 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 947 .exit_latency = 195, 948 .target_residency = 585, 949 .enter = &intel_idle, 950 .enter_s2idle = intel_idle_s2idle, }, 951 { 952 .name = "C8", 953 .desc = "MWAIT 0x40", 954 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 955 .exit_latency = 260, 956 .target_residency = 1040, 957 .enter = &intel_idle, 958 .enter_s2idle = intel_idle_s2idle, }, 959 { 960 .name = "C10", 961 .desc = "MWAIT 0x60", 962 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 963 .exit_latency = 660, 964 .target_residency = 1980, 965 .enter = &intel_idle, 966 .enter_s2idle = intel_idle_s2idle, }, 967 { 968 .enter = NULL } 969 }; 970 971 static struct cpuidle_state spr_cstates[] __initdata = { 972 { 973 .name = "C1", 974 .desc = "MWAIT 0x00", 975 .flags = MWAIT2flg(0x00), 976 .exit_latency = 1, 977 .target_residency = 1, 978 .enter = &intel_idle, 979 .enter_s2idle = intel_idle_s2idle, }, 980 { 981 .name = "C1E", 982 .desc = "MWAIT 0x01", 983 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 984 .exit_latency = 2, 985 .target_residency = 4, 986 .enter = &intel_idle, 987 .enter_s2idle = intel_idle_s2idle, }, 988 { 989 .name = "C6", 990 .desc = "MWAIT 0x20", 991 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 992 CPUIDLE_FLAG_INIT_XSTATE, 993 .exit_latency = 290, 994 .target_residency = 800, 995 .enter = &intel_idle, 996 .enter_s2idle = intel_idle_s2idle, }, 997 { 998 .enter = NULL } 999 }; 1000 1001 static struct cpuidle_state atom_cstates[] __initdata = { 1002 { 1003 .name = "C1E", 1004 .desc = "MWAIT 0x00", 1005 .flags = MWAIT2flg(0x00), 1006 .exit_latency = 10, 1007 .target_residency = 20, 1008 .enter = &intel_idle, 1009 .enter_s2idle = intel_idle_s2idle, }, 1010 { 1011 .name = "C2", 1012 .desc = "MWAIT 0x10", 1013 .flags = MWAIT2flg(0x10), 1014 .exit_latency = 20, 1015 .target_residency = 80, 1016 .enter = &intel_idle, 1017 .enter_s2idle = intel_idle_s2idle, }, 1018 { 1019 .name = "C4", 1020 .desc = "MWAIT 0x30", 1021 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1022 .exit_latency = 100, 1023 .target_residency = 400, 1024 .enter = &intel_idle, 1025 .enter_s2idle = intel_idle_s2idle, }, 1026 { 1027 .name = "C6", 1028 .desc = "MWAIT 0x52", 1029 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1030 .exit_latency = 140, 1031 .target_residency = 560, 1032 .enter = &intel_idle, 1033 .enter_s2idle = intel_idle_s2idle, }, 1034 { 1035 .enter = NULL } 1036 }; 1037 static struct cpuidle_state tangier_cstates[] __initdata = { 1038 { 1039 .name = "C1", 1040 .desc = "MWAIT 0x00", 1041 .flags = MWAIT2flg(0x00), 1042 .exit_latency = 1, 1043 .target_residency = 4, 1044 .enter = &intel_idle, 1045 .enter_s2idle = intel_idle_s2idle, }, 1046 { 1047 .name = "C4", 1048 .desc = "MWAIT 0x30", 1049 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1050 .exit_latency = 100, 1051 .target_residency = 400, 1052 .enter = &intel_idle, 1053 .enter_s2idle = intel_idle_s2idle, }, 1054 { 1055 .name = "C6", 1056 .desc = "MWAIT 0x52", 1057 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1058 .exit_latency = 140, 1059 .target_residency = 560, 1060 .enter = &intel_idle, 1061 .enter_s2idle = intel_idle_s2idle, }, 1062 { 1063 .name = "C7", 1064 .desc = "MWAIT 0x60", 1065 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1066 .exit_latency = 1200, 1067 .target_residency = 4000, 1068 .enter = &intel_idle, 1069 .enter_s2idle = intel_idle_s2idle, }, 1070 { 1071 .name = "C9", 1072 .desc = "MWAIT 0x64", 1073 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1074 .exit_latency = 10000, 1075 .target_residency = 20000, 1076 .enter = &intel_idle, 1077 .enter_s2idle = intel_idle_s2idle, }, 1078 { 1079 .enter = NULL } 1080 }; 1081 static struct cpuidle_state avn_cstates[] __initdata = { 1082 { 1083 .name = "C1", 1084 .desc = "MWAIT 0x00", 1085 .flags = MWAIT2flg(0x00), 1086 .exit_latency = 2, 1087 .target_residency = 2, 1088 .enter = &intel_idle, 1089 .enter_s2idle = intel_idle_s2idle, }, 1090 { 1091 .name = "C6", 1092 .desc = "MWAIT 0x51", 1093 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1094 .exit_latency = 15, 1095 .target_residency = 45, 1096 .enter = &intel_idle, 1097 .enter_s2idle = intel_idle_s2idle, }, 1098 { 1099 .enter = NULL } 1100 }; 1101 static struct cpuidle_state knl_cstates[] __initdata = { 1102 { 1103 .name = "C1", 1104 .desc = "MWAIT 0x00", 1105 .flags = MWAIT2flg(0x00), 1106 .exit_latency = 1, 1107 .target_residency = 2, 1108 .enter = &intel_idle, 1109 .enter_s2idle = intel_idle_s2idle }, 1110 { 1111 .name = "C6", 1112 .desc = "MWAIT 0x10", 1113 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1114 .exit_latency = 120, 1115 .target_residency = 500, 1116 .enter = &intel_idle, 1117 .enter_s2idle = intel_idle_s2idle }, 1118 { 1119 .enter = NULL } 1120 }; 1121 1122 static struct cpuidle_state bxt_cstates[] __initdata = { 1123 { 1124 .name = "C1", 1125 .desc = "MWAIT 0x00", 1126 .flags = MWAIT2flg(0x00), 1127 .exit_latency = 2, 1128 .target_residency = 2, 1129 .enter = &intel_idle, 1130 .enter_s2idle = intel_idle_s2idle, }, 1131 { 1132 .name = "C1E", 1133 .desc = "MWAIT 0x01", 1134 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1135 .exit_latency = 10, 1136 .target_residency = 20, 1137 .enter = &intel_idle, 1138 .enter_s2idle = intel_idle_s2idle, }, 1139 { 1140 .name = "C6", 1141 .desc = "MWAIT 0x20", 1142 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1143 .exit_latency = 133, 1144 .target_residency = 133, 1145 .enter = &intel_idle, 1146 .enter_s2idle = intel_idle_s2idle, }, 1147 { 1148 .name = "C7s", 1149 .desc = "MWAIT 0x31", 1150 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1151 .exit_latency = 155, 1152 .target_residency = 155, 1153 .enter = &intel_idle, 1154 .enter_s2idle = intel_idle_s2idle, }, 1155 { 1156 .name = "C8", 1157 .desc = "MWAIT 0x40", 1158 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1159 .exit_latency = 1000, 1160 .target_residency = 1000, 1161 .enter = &intel_idle, 1162 .enter_s2idle = intel_idle_s2idle, }, 1163 { 1164 .name = "C9", 1165 .desc = "MWAIT 0x50", 1166 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1167 .exit_latency = 2000, 1168 .target_residency = 2000, 1169 .enter = &intel_idle, 1170 .enter_s2idle = intel_idle_s2idle, }, 1171 { 1172 .name = "C10", 1173 .desc = "MWAIT 0x60", 1174 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1175 .exit_latency = 10000, 1176 .target_residency = 10000, 1177 .enter = &intel_idle, 1178 .enter_s2idle = intel_idle_s2idle, }, 1179 { 1180 .enter = NULL } 1181 }; 1182 1183 static struct cpuidle_state dnv_cstates[] __initdata = { 1184 { 1185 .name = "C1", 1186 .desc = "MWAIT 0x00", 1187 .flags = MWAIT2flg(0x00), 1188 .exit_latency = 2, 1189 .target_residency = 2, 1190 .enter = &intel_idle, 1191 .enter_s2idle = intel_idle_s2idle, }, 1192 { 1193 .name = "C1E", 1194 .desc = "MWAIT 0x01", 1195 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1196 .exit_latency = 10, 1197 .target_residency = 20, 1198 .enter = &intel_idle, 1199 .enter_s2idle = intel_idle_s2idle, }, 1200 { 1201 .name = "C6", 1202 .desc = "MWAIT 0x20", 1203 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1204 .exit_latency = 50, 1205 .target_residency = 500, 1206 .enter = &intel_idle, 1207 .enter_s2idle = intel_idle_s2idle, }, 1208 { 1209 .enter = NULL } 1210 }; 1211 1212 /* 1213 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1214 * C6, and this is indicated in the CPUID mwait leaf. 1215 */ 1216 static struct cpuidle_state snr_cstates[] __initdata = { 1217 { 1218 .name = "C1", 1219 .desc = "MWAIT 0x00", 1220 .flags = MWAIT2flg(0x00), 1221 .exit_latency = 2, 1222 .target_residency = 2, 1223 .enter = &intel_idle, 1224 .enter_s2idle = intel_idle_s2idle, }, 1225 { 1226 .name = "C1E", 1227 .desc = "MWAIT 0x01", 1228 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1229 .exit_latency = 15, 1230 .target_residency = 25, 1231 .enter = &intel_idle, 1232 .enter_s2idle = intel_idle_s2idle, }, 1233 { 1234 .name = "C6", 1235 .desc = "MWAIT 0x20", 1236 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1237 .exit_latency = 130, 1238 .target_residency = 500, 1239 .enter = &intel_idle, 1240 .enter_s2idle = intel_idle_s2idle, }, 1241 { 1242 .enter = NULL } 1243 }; 1244 1245 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1246 .state_table = nehalem_cstates, 1247 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1248 .disable_promotion_to_c1e = true, 1249 }; 1250 1251 static const struct idle_cpu idle_cpu_nhx __initconst = { 1252 .state_table = nehalem_cstates, 1253 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1254 .disable_promotion_to_c1e = true, 1255 .use_acpi = true, 1256 }; 1257 1258 static const struct idle_cpu idle_cpu_atom __initconst = { 1259 .state_table = atom_cstates, 1260 }; 1261 1262 static const struct idle_cpu idle_cpu_tangier __initconst = { 1263 .state_table = tangier_cstates, 1264 }; 1265 1266 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1267 .state_table = atom_cstates, 1268 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1269 }; 1270 1271 static const struct idle_cpu idle_cpu_snb __initconst = { 1272 .state_table = snb_cstates, 1273 .disable_promotion_to_c1e = true, 1274 }; 1275 1276 static const struct idle_cpu idle_cpu_snx __initconst = { 1277 .state_table = snb_cstates, 1278 .disable_promotion_to_c1e = true, 1279 .use_acpi = true, 1280 }; 1281 1282 static const struct idle_cpu idle_cpu_byt __initconst = { 1283 .state_table = byt_cstates, 1284 .disable_promotion_to_c1e = true, 1285 .byt_auto_demotion_disable_flag = true, 1286 }; 1287 1288 static const struct idle_cpu idle_cpu_cht __initconst = { 1289 .state_table = cht_cstates, 1290 .disable_promotion_to_c1e = true, 1291 .byt_auto_demotion_disable_flag = true, 1292 }; 1293 1294 static const struct idle_cpu idle_cpu_ivb __initconst = { 1295 .state_table = ivb_cstates, 1296 .disable_promotion_to_c1e = true, 1297 }; 1298 1299 static const struct idle_cpu idle_cpu_ivt __initconst = { 1300 .state_table = ivt_cstates, 1301 .disable_promotion_to_c1e = true, 1302 .use_acpi = true, 1303 }; 1304 1305 static const struct idle_cpu idle_cpu_hsw __initconst = { 1306 .state_table = hsw_cstates, 1307 .disable_promotion_to_c1e = true, 1308 }; 1309 1310 static const struct idle_cpu idle_cpu_hsx __initconst = { 1311 .state_table = hsw_cstates, 1312 .disable_promotion_to_c1e = true, 1313 .use_acpi = true, 1314 }; 1315 1316 static const struct idle_cpu idle_cpu_bdw __initconst = { 1317 .state_table = bdw_cstates, 1318 .disable_promotion_to_c1e = true, 1319 }; 1320 1321 static const struct idle_cpu idle_cpu_bdx __initconst = { 1322 .state_table = bdw_cstates, 1323 .disable_promotion_to_c1e = true, 1324 .use_acpi = true, 1325 }; 1326 1327 static const struct idle_cpu idle_cpu_skl __initconst = { 1328 .state_table = skl_cstates, 1329 .disable_promotion_to_c1e = true, 1330 }; 1331 1332 static const struct idle_cpu idle_cpu_skx __initconst = { 1333 .state_table = skx_cstates, 1334 .disable_promotion_to_c1e = true, 1335 .use_acpi = true, 1336 }; 1337 1338 static const struct idle_cpu idle_cpu_icx __initconst = { 1339 .state_table = icx_cstates, 1340 .disable_promotion_to_c1e = true, 1341 .use_acpi = true, 1342 }; 1343 1344 static const struct idle_cpu idle_cpu_adl __initconst = { 1345 .state_table = adl_cstates, 1346 }; 1347 1348 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1349 .state_table = adl_l_cstates, 1350 }; 1351 1352 static const struct idle_cpu idle_cpu_gmt __initconst = { 1353 .state_table = gmt_cstates, 1354 }; 1355 1356 static const struct idle_cpu idle_cpu_spr __initconst = { 1357 .state_table = spr_cstates, 1358 .disable_promotion_to_c1e = true, 1359 .use_acpi = true, 1360 }; 1361 1362 static const struct idle_cpu idle_cpu_avn __initconst = { 1363 .state_table = avn_cstates, 1364 .disable_promotion_to_c1e = true, 1365 .use_acpi = true, 1366 }; 1367 1368 static const struct idle_cpu idle_cpu_knl __initconst = { 1369 .state_table = knl_cstates, 1370 .use_acpi = true, 1371 }; 1372 1373 static const struct idle_cpu idle_cpu_bxt __initconst = { 1374 .state_table = bxt_cstates, 1375 .disable_promotion_to_c1e = true, 1376 }; 1377 1378 static const struct idle_cpu idle_cpu_dnv __initconst = { 1379 .state_table = dnv_cstates, 1380 .disable_promotion_to_c1e = true, 1381 .use_acpi = true, 1382 }; 1383 1384 static const struct idle_cpu idle_cpu_snr __initconst = { 1385 .state_table = snr_cstates, 1386 .disable_promotion_to_c1e = true, 1387 .use_acpi = true, 1388 }; 1389 1390 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1391 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1392 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1393 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1394 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1395 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1396 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1397 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1398 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1399 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1400 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1401 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1402 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1403 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1404 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1405 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1406 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1407 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1408 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1409 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1410 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1411 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1412 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1413 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1414 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1415 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1416 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1417 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1418 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1419 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1420 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1421 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1422 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1423 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1424 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1425 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1426 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &idle_cpu_gmt), 1427 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1428 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), 1429 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1430 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1431 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1432 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1433 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1434 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1435 {} 1436 }; 1437 1438 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1439 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1440 {} 1441 }; 1442 1443 static bool __init intel_idle_max_cstate_reached(int cstate) 1444 { 1445 if (cstate + 1 > max_cstate) { 1446 pr_info("max_cstate %d reached\n", max_cstate); 1447 return true; 1448 } 1449 return false; 1450 } 1451 1452 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1453 { 1454 unsigned long eax = flg2MWAIT(state->flags); 1455 1456 if (boot_cpu_has(X86_FEATURE_ARAT)) 1457 return false; 1458 1459 /* 1460 * Switch over to one-shot tick broadcast if the target C-state 1461 * is deeper than C1. 1462 */ 1463 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1464 } 1465 1466 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1467 #include <acpi/processor.h> 1468 1469 static bool no_acpi __read_mostly; 1470 module_param(no_acpi, bool, 0444); 1471 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1472 1473 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1474 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1475 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1476 1477 static struct acpi_processor_power acpi_state_table __initdata; 1478 1479 /** 1480 * intel_idle_cst_usable - Check if the _CST information can be used. 1481 * 1482 * Check if all of the C-states listed by _CST in the max_cstate range are 1483 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1484 */ 1485 static bool __init intel_idle_cst_usable(void) 1486 { 1487 int cstate, limit; 1488 1489 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1490 acpi_state_table.count); 1491 1492 for (cstate = 1; cstate < limit; cstate++) { 1493 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1494 1495 if (cx->entry_method != ACPI_CSTATE_FFH) 1496 return false; 1497 } 1498 1499 return true; 1500 } 1501 1502 static bool __init intel_idle_acpi_cst_extract(void) 1503 { 1504 unsigned int cpu; 1505 1506 if (no_acpi) { 1507 pr_debug("Not allowed to use ACPI _CST\n"); 1508 return false; 1509 } 1510 1511 for_each_possible_cpu(cpu) { 1512 struct acpi_processor *pr = per_cpu(processors, cpu); 1513 1514 if (!pr) 1515 continue; 1516 1517 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1518 continue; 1519 1520 acpi_state_table.count++; 1521 1522 if (!intel_idle_cst_usable()) 1523 continue; 1524 1525 if (!acpi_processor_claim_cst_control()) 1526 break; 1527 1528 return true; 1529 } 1530 1531 acpi_state_table.count = 0; 1532 pr_debug("ACPI _CST not found or not usable\n"); 1533 return false; 1534 } 1535 1536 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1537 { 1538 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1539 1540 /* 1541 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1542 * the interesting states are ACPI_CSTATE_FFH. 1543 */ 1544 for (cstate = 1; cstate < limit; cstate++) { 1545 struct acpi_processor_cx *cx; 1546 struct cpuidle_state *state; 1547 1548 if (intel_idle_max_cstate_reached(cstate - 1)) 1549 break; 1550 1551 cx = &acpi_state_table.states[cstate]; 1552 1553 state = &drv->states[drv->state_count++]; 1554 1555 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1556 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1557 state->exit_latency = cx->latency; 1558 /* 1559 * For C1-type C-states use the same number for both the exit 1560 * latency and target residency, because that is the case for 1561 * C1 in the majority of the static C-states tables above. 1562 * For the other types of C-states, however, set the target 1563 * residency to 3 times the exit latency which should lead to 1564 * a reasonable balance between energy-efficiency and 1565 * performance in the majority of interesting cases. 1566 */ 1567 state->target_residency = cx->latency; 1568 if (cx->type > ACPI_STATE_C1) 1569 state->target_residency *= 3; 1570 1571 state->flags = MWAIT2flg(cx->address); 1572 if (cx->type > ACPI_STATE_C2) 1573 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1574 1575 if (disabled_states_mask & BIT(cstate)) 1576 state->flags |= CPUIDLE_FLAG_OFF; 1577 1578 if (intel_idle_state_needs_timer_stop(state)) 1579 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1580 1581 state->enter = intel_idle; 1582 state->enter_s2idle = intel_idle_s2idle; 1583 } 1584 } 1585 1586 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1587 { 1588 int cstate, limit; 1589 1590 /* 1591 * If there are no _CST C-states, do not disable any C-states by 1592 * default. 1593 */ 1594 if (!acpi_state_table.count) 1595 return false; 1596 1597 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1598 /* 1599 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1600 * the interesting states are ACPI_CSTATE_FFH. 1601 */ 1602 for (cstate = 1; cstate < limit; cstate++) { 1603 if (acpi_state_table.states[cstate].address == mwait_hint) 1604 return false; 1605 } 1606 return true; 1607 } 1608 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1609 #define force_use_acpi (false) 1610 1611 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1612 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1613 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1614 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1615 1616 /** 1617 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1618 * 1619 * Tune IVT multi-socket targets. 1620 * Assumption: num_sockets == (max_package_num + 1). 1621 */ 1622 static void __init ivt_idle_state_table_update(void) 1623 { 1624 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1625 int cpu, package_num, num_sockets = 1; 1626 1627 for_each_online_cpu(cpu) { 1628 package_num = topology_physical_package_id(cpu); 1629 if (package_num + 1 > num_sockets) { 1630 num_sockets = package_num + 1; 1631 1632 if (num_sockets > 4) { 1633 cpuidle_state_table = ivt_cstates_8s; 1634 return; 1635 } 1636 } 1637 } 1638 1639 if (num_sockets > 2) 1640 cpuidle_state_table = ivt_cstates_4s; 1641 1642 /* else, 1 and 2 socket systems use default ivt_cstates */ 1643 } 1644 1645 /** 1646 * irtl_2_usec - IRTL to microseconds conversion. 1647 * @irtl: IRTL MSR value. 1648 * 1649 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1650 */ 1651 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1652 { 1653 static const unsigned int irtl_ns_units[] __initconst = { 1654 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1655 }; 1656 unsigned long long ns; 1657 1658 if (!irtl) 1659 return 0; 1660 1661 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1662 1663 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1664 } 1665 1666 /** 1667 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1668 * 1669 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1670 * definitive maximum latency and use the same value for target_residency. 1671 */ 1672 static void __init bxt_idle_state_table_update(void) 1673 { 1674 unsigned long long msr; 1675 unsigned int usec; 1676 1677 rdmsrl(MSR_PKGC6_IRTL, msr); 1678 usec = irtl_2_usec(msr); 1679 if (usec) { 1680 bxt_cstates[2].exit_latency = usec; 1681 bxt_cstates[2].target_residency = usec; 1682 } 1683 1684 rdmsrl(MSR_PKGC7_IRTL, msr); 1685 usec = irtl_2_usec(msr); 1686 if (usec) { 1687 bxt_cstates[3].exit_latency = usec; 1688 bxt_cstates[3].target_residency = usec; 1689 } 1690 1691 rdmsrl(MSR_PKGC8_IRTL, msr); 1692 usec = irtl_2_usec(msr); 1693 if (usec) { 1694 bxt_cstates[4].exit_latency = usec; 1695 bxt_cstates[4].target_residency = usec; 1696 } 1697 1698 rdmsrl(MSR_PKGC9_IRTL, msr); 1699 usec = irtl_2_usec(msr); 1700 if (usec) { 1701 bxt_cstates[5].exit_latency = usec; 1702 bxt_cstates[5].target_residency = usec; 1703 } 1704 1705 rdmsrl(MSR_PKGC10_IRTL, msr); 1706 usec = irtl_2_usec(msr); 1707 if (usec) { 1708 bxt_cstates[6].exit_latency = usec; 1709 bxt_cstates[6].target_residency = usec; 1710 } 1711 1712 } 1713 1714 /** 1715 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1716 * 1717 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1718 */ 1719 static void __init sklh_idle_state_table_update(void) 1720 { 1721 unsigned long long msr; 1722 unsigned int eax, ebx, ecx, edx; 1723 1724 1725 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1726 if (max_cstate <= 7) 1727 return; 1728 1729 /* if PC10 not present in CPUID.MWAIT.EDX */ 1730 if ((mwait_substates & (0xF << 28)) == 0) 1731 return; 1732 1733 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1734 1735 /* PC10 is not enabled in PKG C-state limit */ 1736 if ((msr & 0xF) != 8) 1737 return; 1738 1739 ecx = 0; 1740 cpuid(7, &eax, &ebx, &ecx, &edx); 1741 1742 /* if SGX is present */ 1743 if (ebx & (1 << 2)) { 1744 1745 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1746 1747 /* if SGX is enabled */ 1748 if (msr & (1 << 18)) 1749 return; 1750 } 1751 1752 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1753 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1754 } 1755 1756 /** 1757 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1758 * idle states table. 1759 */ 1760 static void __init skx_idle_state_table_update(void) 1761 { 1762 unsigned long long msr; 1763 1764 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1765 1766 /* 1767 * 000b: C0/C1 (no package C-state support) 1768 * 001b: C2 1769 * 010b: C6 (non-retention) 1770 * 011b: C6 (retention) 1771 * 111b: No Package C state limits. 1772 */ 1773 if ((msr & 0x7) < 2) { 1774 /* 1775 * Uses the CC6 + PC0 latency and 3 times of 1776 * latency for target_residency if the PC6 1777 * is disabled in BIOS. This is consistent 1778 * with how intel_idle driver uses _CST 1779 * to set the target_residency. 1780 */ 1781 skx_cstates[2].exit_latency = 92; 1782 skx_cstates[2].target_residency = 276; 1783 } 1784 } 1785 1786 /** 1787 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1788 */ 1789 static void __init adl_idle_state_table_update(void) 1790 { 1791 /* Check if user prefers C1 over C1E. */ 1792 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1793 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1794 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1795 1796 /* Disable C1E by clearing the "C1E promotion" bit. */ 1797 c1e_promotion = C1E_PROMOTION_DISABLE; 1798 return; 1799 } 1800 1801 /* Make sure C1E is enabled by default */ 1802 c1e_promotion = C1E_PROMOTION_ENABLE; 1803 } 1804 1805 /** 1806 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1807 */ 1808 static void __init spr_idle_state_table_update(void) 1809 { 1810 unsigned long long msr; 1811 1812 /* 1813 * By default, the C6 state assumes the worst-case scenario of package 1814 * C6. However, if PC6 is disabled, we update the numbers to match 1815 * core C6. 1816 */ 1817 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1818 1819 /* Limit value 2 and above allow for PC6. */ 1820 if ((msr & 0x7) < 2) { 1821 spr_cstates[2].exit_latency = 190; 1822 spr_cstates[2].target_residency = 600; 1823 } 1824 } 1825 1826 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1827 { 1828 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1829 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1830 MWAIT_SUBSTATE_MASK; 1831 1832 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1833 if (num_substates == 0) 1834 return false; 1835 1836 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1837 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1838 1839 return true; 1840 } 1841 1842 static void state_update_enter_method(struct cpuidle_state *state, int cstate) 1843 { 1844 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { 1845 /* 1846 * Combining with XSTATE with IBRS or IRQ_ENABLE flags 1847 * is not currently supported but this driver. 1848 */ 1849 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); 1850 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1851 state->enter = intel_idle_xstate; 1852 return; 1853 } 1854 1855 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1856 state->flags & CPUIDLE_FLAG_IBRS) { 1857 /* 1858 * IBRS mitigation requires that C-states are entered 1859 * with interrupts disabled. 1860 */ 1861 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1862 state->enter = intel_idle_ibrs; 1863 return; 1864 } 1865 1866 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { 1867 state->enter = intel_idle_irq; 1868 return; 1869 } 1870 1871 if (force_irq_on) { 1872 pr_info("forced intel_idle_irq for state %d\n", cstate); 1873 state->enter = intel_idle_irq; 1874 } 1875 } 1876 1877 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1878 { 1879 int cstate; 1880 1881 switch (boot_cpu_data.x86_model) { 1882 case INTEL_FAM6_IVYBRIDGE_X: 1883 ivt_idle_state_table_update(); 1884 break; 1885 case INTEL_FAM6_ATOM_GOLDMONT: 1886 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1887 bxt_idle_state_table_update(); 1888 break; 1889 case INTEL_FAM6_SKYLAKE: 1890 sklh_idle_state_table_update(); 1891 break; 1892 case INTEL_FAM6_SKYLAKE_X: 1893 skx_idle_state_table_update(); 1894 break; 1895 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1896 case INTEL_FAM6_EMERALDRAPIDS_X: 1897 spr_idle_state_table_update(); 1898 break; 1899 case INTEL_FAM6_ALDERLAKE: 1900 case INTEL_FAM6_ALDERLAKE_L: 1901 case INTEL_FAM6_ATOM_GRACEMONT: 1902 adl_idle_state_table_update(); 1903 break; 1904 } 1905 1906 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1907 struct cpuidle_state *state; 1908 unsigned int mwait_hint; 1909 1910 if (intel_idle_max_cstate_reached(cstate)) 1911 break; 1912 1913 if (!cpuidle_state_table[cstate].enter && 1914 !cpuidle_state_table[cstate].enter_s2idle) 1915 break; 1916 1917 /* If marked as unusable, skip this state. */ 1918 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1919 pr_debug("state %s is disabled\n", 1920 cpuidle_state_table[cstate].name); 1921 continue; 1922 } 1923 1924 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1925 if (!intel_idle_verify_cstate(mwait_hint)) 1926 continue; 1927 1928 /* Structure copy. */ 1929 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1930 state = &drv->states[drv->state_count]; 1931 1932 state_update_enter_method(state, cstate); 1933 1934 1935 if ((disabled_states_mask & BIT(drv->state_count)) || 1936 ((icpu->use_acpi || force_use_acpi) && 1937 intel_idle_off_by_default(mwait_hint) && 1938 !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1939 state->flags |= CPUIDLE_FLAG_OFF; 1940 1941 if (intel_idle_state_needs_timer_stop(state)) 1942 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1943 1944 drv->state_count++; 1945 } 1946 1947 if (icpu->byt_auto_demotion_disable_flag) { 1948 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1949 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1950 } 1951 } 1952 1953 /** 1954 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1955 * @drv: cpuidle driver structure to initialize. 1956 */ 1957 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1958 { 1959 cpuidle_poll_state_init(drv); 1960 1961 if (disabled_states_mask & BIT(0)) 1962 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1963 1964 drv->state_count = 1; 1965 1966 if (icpu) 1967 intel_idle_init_cstates_icpu(drv); 1968 else 1969 intel_idle_init_cstates_acpi(drv); 1970 } 1971 1972 static void auto_demotion_disable(void) 1973 { 1974 unsigned long long msr_bits; 1975 1976 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1977 msr_bits &= ~auto_demotion_disable_flags; 1978 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1979 } 1980 1981 static void c1e_promotion_enable(void) 1982 { 1983 unsigned long long msr_bits; 1984 1985 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1986 msr_bits |= 0x2; 1987 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1988 } 1989 1990 static void c1e_promotion_disable(void) 1991 { 1992 unsigned long long msr_bits; 1993 1994 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1995 msr_bits &= ~0x2; 1996 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1997 } 1998 1999 /** 2000 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 2001 * @cpu: CPU to initialize. 2002 * 2003 * Register a cpuidle device object for @cpu and update its MSRs in accordance 2004 * with the processor model flags. 2005 */ 2006 static int intel_idle_cpu_init(unsigned int cpu) 2007 { 2008 struct cpuidle_device *dev; 2009 2010 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2011 dev->cpu = cpu; 2012 2013 if (cpuidle_register_device(dev)) { 2014 pr_debug("cpuidle_register_device %d failed!\n", cpu); 2015 return -EIO; 2016 } 2017 2018 if (auto_demotion_disable_flags) 2019 auto_demotion_disable(); 2020 2021 if (c1e_promotion == C1E_PROMOTION_ENABLE) 2022 c1e_promotion_enable(); 2023 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 2024 c1e_promotion_disable(); 2025 2026 return 0; 2027 } 2028 2029 static int intel_idle_cpu_online(unsigned int cpu) 2030 { 2031 struct cpuidle_device *dev; 2032 2033 if (!boot_cpu_has(X86_FEATURE_ARAT)) 2034 tick_broadcast_enable(); 2035 2036 /* 2037 * Some systems can hotplug a cpu at runtime after 2038 * the kernel has booted, we have to initialize the 2039 * driver in this case 2040 */ 2041 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2042 if (!dev->registered) 2043 return intel_idle_cpu_init(cpu); 2044 2045 return 0; 2046 } 2047 2048 /** 2049 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 2050 */ 2051 static void __init intel_idle_cpuidle_devices_uninit(void) 2052 { 2053 int i; 2054 2055 for_each_online_cpu(i) 2056 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2057 } 2058 2059 static int __init intel_idle_init(void) 2060 { 2061 const struct x86_cpu_id *id; 2062 unsigned int eax, ebx, ecx; 2063 int retval; 2064 2065 /* Do not load intel_idle at all for now if idle= is passed */ 2066 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 2067 return -ENODEV; 2068 2069 if (max_cstate == 0) { 2070 pr_debug("disabled\n"); 2071 return -EPERM; 2072 } 2073 2074 id = x86_match_cpu(intel_idle_ids); 2075 if (id) { 2076 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2077 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2078 return -ENODEV; 2079 } 2080 } else { 2081 id = x86_match_cpu(intel_mwait_ids); 2082 if (!id) 2083 return -ENODEV; 2084 } 2085 2086 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2087 return -ENODEV; 2088 2089 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2090 2091 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2092 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2093 !mwait_substates) 2094 return -ENODEV; 2095 2096 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2097 2098 icpu = (const struct idle_cpu *)id->driver_data; 2099 if (icpu) { 2100 cpuidle_state_table = icpu->state_table; 2101 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2102 if (icpu->disable_promotion_to_c1e) 2103 c1e_promotion = C1E_PROMOTION_DISABLE; 2104 if (icpu->use_acpi || force_use_acpi) 2105 intel_idle_acpi_cst_extract(); 2106 } else if (!intel_idle_acpi_cst_extract()) { 2107 return -ENODEV; 2108 } 2109 2110 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2111 boot_cpu_data.x86_model); 2112 2113 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2114 if (!intel_idle_cpuidle_devices) 2115 return -ENOMEM; 2116 2117 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2118 2119 retval = cpuidle_register_driver(&intel_idle_driver); 2120 if (retval) { 2121 struct cpuidle_driver *drv = cpuidle_get_driver(); 2122 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2123 drv ? drv->name : "none"); 2124 goto init_driver_fail; 2125 } 2126 2127 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2128 intel_idle_cpu_online, NULL); 2129 if (retval < 0) 2130 goto hp_setup_fail; 2131 2132 pr_debug("Local APIC timer is reliable in %s\n", 2133 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2134 2135 return 0; 2136 2137 hp_setup_fail: 2138 intel_idle_cpuidle_devices_uninit(); 2139 cpuidle_unregister_driver(&intel_idle_driver); 2140 init_driver_fail: 2141 free_percpu(intel_idle_cpuidle_devices); 2142 return retval; 2143 2144 } 2145 device_initcall(intel_idle_init); 2146 2147 /* 2148 * We are not really modular, but we used to support that. Meaning we also 2149 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2150 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2151 * is the easiest way (currently) to continue doing that. 2152 */ 2153 module_param(max_cstate, int, 0444); 2154 /* 2155 * The positions of the bits that are set in this number are the indices of the 2156 * idle states to be disabled by default (as reflected by the names of the 2157 * corresponding idle state directories in sysfs, "state0", "state1" ... 2158 * "state<i>" ..., where <i> is the index of the given state). 2159 */ 2160 module_param_named(states_off, disabled_states_mask, uint, 0444); 2161 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2162 /* 2163 * Some platforms come with mutually exclusive C-states, so that if one is 2164 * enabled, the other C-states must not be used. Example: C1 and C1E on 2165 * Sapphire Rapids platform. This parameter allows for selecting the 2166 * preferred C-states among the groups of mutually exclusive C-states - the 2167 * selected C-states will be registered, the other C-states from the mutually 2168 * exclusive group won't be registered. If the platform has no mutually 2169 * exclusive C-states, this parameter has no effect. 2170 */ 2171 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2172 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2173 /* 2174 * Debugging option that forces the driver to enter all C-states with 2175 * interrupts enabled. Does not apply to C-states with 2176 * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags. 2177 */ 2178 module_param(force_irq_on, bool, 0444); 2179