1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 60 #define INTEL_IDLE_VERSION "0.5.1" 61 62 static struct cpuidle_driver intel_idle_driver = { 63 .name = "intel_idle", 64 .owner = THIS_MODULE, 65 }; 66 /* intel_idle.max_cstate=0 disables driver */ 67 static int max_cstate = CPUIDLE_STATE_MAX - 1; 68 static unsigned int disabled_states_mask; 69 static unsigned int preferred_states_mask; 70 71 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 72 73 static unsigned long auto_demotion_disable_flags; 74 75 static enum { 76 C1E_PROMOTION_PRESERVE, 77 C1E_PROMOTION_ENABLE, 78 C1E_PROMOTION_DISABLE 79 } c1e_promotion = C1E_PROMOTION_PRESERVE; 80 81 struct idle_cpu { 82 struct cpuidle_state *state_table; 83 84 /* 85 * Hardware C-state auto-demotion may not always be optimal. 86 * Indicate which enable bits to clear here. 87 */ 88 unsigned long auto_demotion_disable_flags; 89 bool byt_auto_demotion_disable_flag; 90 bool disable_promotion_to_c1e; 91 bool use_acpi; 92 }; 93 94 static const struct idle_cpu *icpu __initdata; 95 static struct cpuidle_state *cpuidle_state_table __initdata; 96 97 static unsigned int mwait_substates __initdata; 98 99 /* 100 * Enable interrupts before entering the C-state. On some platforms and for 101 * some C-states, this may measurably decrease interrupt latency. 102 */ 103 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 104 105 /* 106 * Enable this state by default even if the ACPI _CST does not list it. 107 */ 108 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 109 110 /* 111 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 112 * above. 113 */ 114 #define CPUIDLE_FLAG_IBRS BIT(16) 115 116 /* 117 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 118 * the C-state (top nibble) and sub-state (bottom nibble) 119 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 120 * 121 * We store the hint at the top of our "flags" for each state. 122 */ 123 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 124 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 125 126 static __always_inline int __intel_idle(struct cpuidle_device *dev, 127 struct cpuidle_driver *drv, int index) 128 { 129 struct cpuidle_state *state = &drv->states[index]; 130 unsigned long eax = flg2MWAIT(state->flags); 131 unsigned long ecx = 1; /* break on interrupt flag */ 132 133 mwait_idle_with_hints(eax, ecx); 134 135 return index; 136 } 137 138 /** 139 * intel_idle - Ask the processor to enter the given idle state. 140 * @dev: cpuidle device of the target CPU. 141 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 142 * @index: Target idle state index. 143 * 144 * Use the MWAIT instruction to notify the processor that the CPU represented by 145 * @dev is idle and it can try to enter the idle state corresponding to @index. 146 * 147 * If the local APIC timer is not known to be reliable in the target idle state, 148 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 149 * 150 * Must be called under local_irq_disable(). 151 */ 152 static __cpuidle int intel_idle(struct cpuidle_device *dev, 153 struct cpuidle_driver *drv, int index) 154 { 155 return __intel_idle(dev, drv, index); 156 } 157 158 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 159 struct cpuidle_driver *drv, int index) 160 { 161 int ret; 162 163 raw_local_irq_enable(); 164 ret = __intel_idle(dev, drv, index); 165 166 /* 167 * The lockdep hardirqs state may be changed to 'on' with timer 168 * tick interrupt followed by __do_softirq(). Use local_irq_disable() 169 * to keep the hardirqs state correct. 170 */ 171 local_irq_disable(); 172 173 return ret; 174 } 175 176 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 177 struct cpuidle_driver *drv, int index) 178 { 179 bool smt_active = sched_smt_active(); 180 u64 spec_ctrl = spec_ctrl_current(); 181 int ret; 182 183 if (smt_active) 184 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 185 186 ret = __intel_idle(dev, drv, index); 187 188 if (smt_active) 189 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 190 191 return ret; 192 } 193 194 /** 195 * intel_idle_s2idle - Ask the processor to enter the given idle state. 196 * @dev: cpuidle device of the target CPU. 197 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 198 * @index: Target idle state index. 199 * 200 * Use the MWAIT instruction to notify the processor that the CPU represented by 201 * @dev is idle and it can try to enter the idle state corresponding to @index. 202 * 203 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 204 * scheduler tick and suspended scheduler clock on the target CPU. 205 */ 206 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 207 struct cpuidle_driver *drv, int index) 208 { 209 unsigned long eax = flg2MWAIT(drv->states[index].flags); 210 unsigned long ecx = 1; /* break on interrupt flag */ 211 212 mwait_idle_with_hints(eax, ecx); 213 214 return 0; 215 } 216 217 /* 218 * States are indexed by the cstate number, 219 * which is also the index into the MWAIT hint array. 220 * Thus C0 is a dummy. 221 */ 222 static struct cpuidle_state nehalem_cstates[] __initdata = { 223 { 224 .name = "C1", 225 .desc = "MWAIT 0x00", 226 .flags = MWAIT2flg(0x00), 227 .exit_latency = 3, 228 .target_residency = 6, 229 .enter = &intel_idle, 230 .enter_s2idle = intel_idle_s2idle, }, 231 { 232 .name = "C1E", 233 .desc = "MWAIT 0x01", 234 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 235 .exit_latency = 10, 236 .target_residency = 20, 237 .enter = &intel_idle, 238 .enter_s2idle = intel_idle_s2idle, }, 239 { 240 .name = "C3", 241 .desc = "MWAIT 0x10", 242 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 243 .exit_latency = 20, 244 .target_residency = 80, 245 .enter = &intel_idle, 246 .enter_s2idle = intel_idle_s2idle, }, 247 { 248 .name = "C6", 249 .desc = "MWAIT 0x20", 250 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 251 .exit_latency = 200, 252 .target_residency = 800, 253 .enter = &intel_idle, 254 .enter_s2idle = intel_idle_s2idle, }, 255 { 256 .enter = NULL } 257 }; 258 259 static struct cpuidle_state snb_cstates[] __initdata = { 260 { 261 .name = "C1", 262 .desc = "MWAIT 0x00", 263 .flags = MWAIT2flg(0x00), 264 .exit_latency = 2, 265 .target_residency = 2, 266 .enter = &intel_idle, 267 .enter_s2idle = intel_idle_s2idle, }, 268 { 269 .name = "C1E", 270 .desc = "MWAIT 0x01", 271 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 272 .exit_latency = 10, 273 .target_residency = 20, 274 .enter = &intel_idle, 275 .enter_s2idle = intel_idle_s2idle, }, 276 { 277 .name = "C3", 278 .desc = "MWAIT 0x10", 279 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 280 .exit_latency = 80, 281 .target_residency = 211, 282 .enter = &intel_idle, 283 .enter_s2idle = intel_idle_s2idle, }, 284 { 285 .name = "C6", 286 .desc = "MWAIT 0x20", 287 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 288 .exit_latency = 104, 289 .target_residency = 345, 290 .enter = &intel_idle, 291 .enter_s2idle = intel_idle_s2idle, }, 292 { 293 .name = "C7", 294 .desc = "MWAIT 0x30", 295 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 296 .exit_latency = 109, 297 .target_residency = 345, 298 .enter = &intel_idle, 299 .enter_s2idle = intel_idle_s2idle, }, 300 { 301 .enter = NULL } 302 }; 303 304 static struct cpuidle_state byt_cstates[] __initdata = { 305 { 306 .name = "C1", 307 .desc = "MWAIT 0x00", 308 .flags = MWAIT2flg(0x00), 309 .exit_latency = 1, 310 .target_residency = 1, 311 .enter = &intel_idle, 312 .enter_s2idle = intel_idle_s2idle, }, 313 { 314 .name = "C6N", 315 .desc = "MWAIT 0x58", 316 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 317 .exit_latency = 300, 318 .target_residency = 275, 319 .enter = &intel_idle, 320 .enter_s2idle = intel_idle_s2idle, }, 321 { 322 .name = "C6S", 323 .desc = "MWAIT 0x52", 324 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 325 .exit_latency = 500, 326 .target_residency = 560, 327 .enter = &intel_idle, 328 .enter_s2idle = intel_idle_s2idle, }, 329 { 330 .name = "C7", 331 .desc = "MWAIT 0x60", 332 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 333 .exit_latency = 1200, 334 .target_residency = 4000, 335 .enter = &intel_idle, 336 .enter_s2idle = intel_idle_s2idle, }, 337 { 338 .name = "C7S", 339 .desc = "MWAIT 0x64", 340 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 341 .exit_latency = 10000, 342 .target_residency = 20000, 343 .enter = &intel_idle, 344 .enter_s2idle = intel_idle_s2idle, }, 345 { 346 .enter = NULL } 347 }; 348 349 static struct cpuidle_state cht_cstates[] __initdata = { 350 { 351 .name = "C1", 352 .desc = "MWAIT 0x00", 353 .flags = MWAIT2flg(0x00), 354 .exit_latency = 1, 355 .target_residency = 1, 356 .enter = &intel_idle, 357 .enter_s2idle = intel_idle_s2idle, }, 358 { 359 .name = "C6N", 360 .desc = "MWAIT 0x58", 361 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 362 .exit_latency = 80, 363 .target_residency = 275, 364 .enter = &intel_idle, 365 .enter_s2idle = intel_idle_s2idle, }, 366 { 367 .name = "C6S", 368 .desc = "MWAIT 0x52", 369 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 370 .exit_latency = 200, 371 .target_residency = 560, 372 .enter = &intel_idle, 373 .enter_s2idle = intel_idle_s2idle, }, 374 { 375 .name = "C7", 376 .desc = "MWAIT 0x60", 377 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 378 .exit_latency = 1200, 379 .target_residency = 4000, 380 .enter = &intel_idle, 381 .enter_s2idle = intel_idle_s2idle, }, 382 { 383 .name = "C7S", 384 .desc = "MWAIT 0x64", 385 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 386 .exit_latency = 10000, 387 .target_residency = 20000, 388 .enter = &intel_idle, 389 .enter_s2idle = intel_idle_s2idle, }, 390 { 391 .enter = NULL } 392 }; 393 394 static struct cpuidle_state ivb_cstates[] __initdata = { 395 { 396 .name = "C1", 397 .desc = "MWAIT 0x00", 398 .flags = MWAIT2flg(0x00), 399 .exit_latency = 1, 400 .target_residency = 1, 401 .enter = &intel_idle, 402 .enter_s2idle = intel_idle_s2idle, }, 403 { 404 .name = "C1E", 405 .desc = "MWAIT 0x01", 406 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 407 .exit_latency = 10, 408 .target_residency = 20, 409 .enter = &intel_idle, 410 .enter_s2idle = intel_idle_s2idle, }, 411 { 412 .name = "C3", 413 .desc = "MWAIT 0x10", 414 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 415 .exit_latency = 59, 416 .target_residency = 156, 417 .enter = &intel_idle, 418 .enter_s2idle = intel_idle_s2idle, }, 419 { 420 .name = "C6", 421 .desc = "MWAIT 0x20", 422 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 423 .exit_latency = 80, 424 .target_residency = 300, 425 .enter = &intel_idle, 426 .enter_s2idle = intel_idle_s2idle, }, 427 { 428 .name = "C7", 429 .desc = "MWAIT 0x30", 430 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 431 .exit_latency = 87, 432 .target_residency = 300, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .enter = NULL } 437 }; 438 439 static struct cpuidle_state ivt_cstates[] __initdata = { 440 { 441 .name = "C1", 442 .desc = "MWAIT 0x00", 443 .flags = MWAIT2flg(0x00), 444 .exit_latency = 1, 445 .target_residency = 1, 446 .enter = &intel_idle, 447 .enter_s2idle = intel_idle_s2idle, }, 448 { 449 .name = "C1E", 450 .desc = "MWAIT 0x01", 451 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 452 .exit_latency = 10, 453 .target_residency = 80, 454 .enter = &intel_idle, 455 .enter_s2idle = intel_idle_s2idle, }, 456 { 457 .name = "C3", 458 .desc = "MWAIT 0x10", 459 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 460 .exit_latency = 59, 461 .target_residency = 156, 462 .enter = &intel_idle, 463 .enter_s2idle = intel_idle_s2idle, }, 464 { 465 .name = "C6", 466 .desc = "MWAIT 0x20", 467 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 468 .exit_latency = 82, 469 .target_residency = 300, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .enter = NULL } 474 }; 475 476 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 477 { 478 .name = "C1", 479 .desc = "MWAIT 0x00", 480 .flags = MWAIT2flg(0x00), 481 .exit_latency = 1, 482 .target_residency = 1, 483 .enter = &intel_idle, 484 .enter_s2idle = intel_idle_s2idle, }, 485 { 486 .name = "C1E", 487 .desc = "MWAIT 0x01", 488 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 489 .exit_latency = 10, 490 .target_residency = 250, 491 .enter = &intel_idle, 492 .enter_s2idle = intel_idle_s2idle, }, 493 { 494 .name = "C3", 495 .desc = "MWAIT 0x10", 496 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 497 .exit_latency = 59, 498 .target_residency = 300, 499 .enter = &intel_idle, 500 .enter_s2idle = intel_idle_s2idle, }, 501 { 502 .name = "C6", 503 .desc = "MWAIT 0x20", 504 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 505 .exit_latency = 84, 506 .target_residency = 400, 507 .enter = &intel_idle, 508 .enter_s2idle = intel_idle_s2idle, }, 509 { 510 .enter = NULL } 511 }; 512 513 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 514 { 515 .name = "C1", 516 .desc = "MWAIT 0x00", 517 .flags = MWAIT2flg(0x00), 518 .exit_latency = 1, 519 .target_residency = 1, 520 .enter = &intel_idle, 521 .enter_s2idle = intel_idle_s2idle, }, 522 { 523 .name = "C1E", 524 .desc = "MWAIT 0x01", 525 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 526 .exit_latency = 10, 527 .target_residency = 500, 528 .enter = &intel_idle, 529 .enter_s2idle = intel_idle_s2idle, }, 530 { 531 .name = "C3", 532 .desc = "MWAIT 0x10", 533 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 534 .exit_latency = 59, 535 .target_residency = 600, 536 .enter = &intel_idle, 537 .enter_s2idle = intel_idle_s2idle, }, 538 { 539 .name = "C6", 540 .desc = "MWAIT 0x20", 541 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 542 .exit_latency = 88, 543 .target_residency = 700, 544 .enter = &intel_idle, 545 .enter_s2idle = intel_idle_s2idle, }, 546 { 547 .enter = NULL } 548 }; 549 550 static struct cpuidle_state hsw_cstates[] __initdata = { 551 { 552 .name = "C1", 553 .desc = "MWAIT 0x00", 554 .flags = MWAIT2flg(0x00), 555 .exit_latency = 2, 556 .target_residency = 2, 557 .enter = &intel_idle, 558 .enter_s2idle = intel_idle_s2idle, }, 559 { 560 .name = "C1E", 561 .desc = "MWAIT 0x01", 562 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 563 .exit_latency = 10, 564 .target_residency = 20, 565 .enter = &intel_idle, 566 .enter_s2idle = intel_idle_s2idle, }, 567 { 568 .name = "C3", 569 .desc = "MWAIT 0x10", 570 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 571 .exit_latency = 33, 572 .target_residency = 100, 573 .enter = &intel_idle, 574 .enter_s2idle = intel_idle_s2idle, }, 575 { 576 .name = "C6", 577 .desc = "MWAIT 0x20", 578 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 579 .exit_latency = 133, 580 .target_residency = 400, 581 .enter = &intel_idle, 582 .enter_s2idle = intel_idle_s2idle, }, 583 { 584 .name = "C7s", 585 .desc = "MWAIT 0x32", 586 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 587 .exit_latency = 166, 588 .target_residency = 500, 589 .enter = &intel_idle, 590 .enter_s2idle = intel_idle_s2idle, }, 591 { 592 .name = "C8", 593 .desc = "MWAIT 0x40", 594 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 595 .exit_latency = 300, 596 .target_residency = 900, 597 .enter = &intel_idle, 598 .enter_s2idle = intel_idle_s2idle, }, 599 { 600 .name = "C9", 601 .desc = "MWAIT 0x50", 602 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 603 .exit_latency = 600, 604 .target_residency = 1800, 605 .enter = &intel_idle, 606 .enter_s2idle = intel_idle_s2idle, }, 607 { 608 .name = "C10", 609 .desc = "MWAIT 0x60", 610 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 611 .exit_latency = 2600, 612 .target_residency = 7700, 613 .enter = &intel_idle, 614 .enter_s2idle = intel_idle_s2idle, }, 615 { 616 .enter = NULL } 617 }; 618 static struct cpuidle_state bdw_cstates[] __initdata = { 619 { 620 .name = "C1", 621 .desc = "MWAIT 0x00", 622 .flags = MWAIT2flg(0x00), 623 .exit_latency = 2, 624 .target_residency = 2, 625 .enter = &intel_idle, 626 .enter_s2idle = intel_idle_s2idle, }, 627 { 628 .name = "C1E", 629 .desc = "MWAIT 0x01", 630 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 631 .exit_latency = 10, 632 .target_residency = 20, 633 .enter = &intel_idle, 634 .enter_s2idle = intel_idle_s2idle, }, 635 { 636 .name = "C3", 637 .desc = "MWAIT 0x10", 638 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 639 .exit_latency = 40, 640 .target_residency = 100, 641 .enter = &intel_idle, 642 .enter_s2idle = intel_idle_s2idle, }, 643 { 644 .name = "C6", 645 .desc = "MWAIT 0x20", 646 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 647 .exit_latency = 133, 648 .target_residency = 400, 649 .enter = &intel_idle, 650 .enter_s2idle = intel_idle_s2idle, }, 651 { 652 .name = "C7s", 653 .desc = "MWAIT 0x32", 654 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 655 .exit_latency = 166, 656 .target_residency = 500, 657 .enter = &intel_idle, 658 .enter_s2idle = intel_idle_s2idle, }, 659 { 660 .name = "C8", 661 .desc = "MWAIT 0x40", 662 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 663 .exit_latency = 300, 664 .target_residency = 900, 665 .enter = &intel_idle, 666 .enter_s2idle = intel_idle_s2idle, }, 667 { 668 .name = "C9", 669 .desc = "MWAIT 0x50", 670 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 671 .exit_latency = 600, 672 .target_residency = 1800, 673 .enter = &intel_idle, 674 .enter_s2idle = intel_idle_s2idle, }, 675 { 676 .name = "C10", 677 .desc = "MWAIT 0x60", 678 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 679 .exit_latency = 2600, 680 .target_residency = 7700, 681 .enter = &intel_idle, 682 .enter_s2idle = intel_idle_s2idle, }, 683 { 684 .enter = NULL } 685 }; 686 687 static struct cpuidle_state skl_cstates[] __initdata = { 688 { 689 .name = "C1", 690 .desc = "MWAIT 0x00", 691 .flags = MWAIT2flg(0x00), 692 .exit_latency = 2, 693 .target_residency = 2, 694 .enter = &intel_idle, 695 .enter_s2idle = intel_idle_s2idle, }, 696 { 697 .name = "C1E", 698 .desc = "MWAIT 0x01", 699 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 700 .exit_latency = 10, 701 .target_residency = 20, 702 .enter = &intel_idle, 703 .enter_s2idle = intel_idle_s2idle, }, 704 { 705 .name = "C3", 706 .desc = "MWAIT 0x10", 707 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 708 .exit_latency = 70, 709 .target_residency = 100, 710 .enter = &intel_idle, 711 .enter_s2idle = intel_idle_s2idle, }, 712 { 713 .name = "C6", 714 .desc = "MWAIT 0x20", 715 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 716 .exit_latency = 85, 717 .target_residency = 200, 718 .enter = &intel_idle, 719 .enter_s2idle = intel_idle_s2idle, }, 720 { 721 .name = "C7s", 722 .desc = "MWAIT 0x33", 723 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 724 .exit_latency = 124, 725 .target_residency = 800, 726 .enter = &intel_idle, 727 .enter_s2idle = intel_idle_s2idle, }, 728 { 729 .name = "C8", 730 .desc = "MWAIT 0x40", 731 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 732 .exit_latency = 200, 733 .target_residency = 800, 734 .enter = &intel_idle, 735 .enter_s2idle = intel_idle_s2idle, }, 736 { 737 .name = "C9", 738 .desc = "MWAIT 0x50", 739 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 740 .exit_latency = 480, 741 .target_residency = 5000, 742 .enter = &intel_idle, 743 .enter_s2idle = intel_idle_s2idle, }, 744 { 745 .name = "C10", 746 .desc = "MWAIT 0x60", 747 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 748 .exit_latency = 890, 749 .target_residency = 5000, 750 .enter = &intel_idle, 751 .enter_s2idle = intel_idle_s2idle, }, 752 { 753 .enter = NULL } 754 }; 755 756 static struct cpuidle_state skx_cstates[] __initdata = { 757 { 758 .name = "C1", 759 .desc = "MWAIT 0x00", 760 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 761 .exit_latency = 2, 762 .target_residency = 2, 763 .enter = &intel_idle, 764 .enter_s2idle = intel_idle_s2idle, }, 765 { 766 .name = "C1E", 767 .desc = "MWAIT 0x01", 768 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 769 .exit_latency = 10, 770 .target_residency = 20, 771 .enter = &intel_idle, 772 .enter_s2idle = intel_idle_s2idle, }, 773 { 774 .name = "C6", 775 .desc = "MWAIT 0x20", 776 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 777 .exit_latency = 133, 778 .target_residency = 600, 779 .enter = &intel_idle, 780 .enter_s2idle = intel_idle_s2idle, }, 781 { 782 .enter = NULL } 783 }; 784 785 static struct cpuidle_state icx_cstates[] __initdata = { 786 { 787 .name = "C1", 788 .desc = "MWAIT 0x00", 789 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 790 .exit_latency = 1, 791 .target_residency = 1, 792 .enter = &intel_idle, 793 .enter_s2idle = intel_idle_s2idle, }, 794 { 795 .name = "C1E", 796 .desc = "MWAIT 0x01", 797 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 798 .exit_latency = 4, 799 .target_residency = 4, 800 .enter = &intel_idle, 801 .enter_s2idle = intel_idle_s2idle, }, 802 { 803 .name = "C6", 804 .desc = "MWAIT 0x20", 805 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 806 .exit_latency = 170, 807 .target_residency = 600, 808 .enter = &intel_idle, 809 .enter_s2idle = intel_idle_s2idle, }, 810 { 811 .enter = NULL } 812 }; 813 814 /* 815 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 816 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 817 * But in this case there is effectively no C1, because C1 requests are 818 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 819 * and C1E requests end up with C1, so there is effectively no C1E. 820 * 821 * By default we enable C1E and disable C1 by marking it with 822 * 'CPUIDLE_FLAG_UNUSABLE'. 823 */ 824 static struct cpuidle_state adl_cstates[] __initdata = { 825 { 826 .name = "C1", 827 .desc = "MWAIT 0x00", 828 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 829 .exit_latency = 1, 830 .target_residency = 1, 831 .enter = &intel_idle, 832 .enter_s2idle = intel_idle_s2idle, }, 833 { 834 .name = "C1E", 835 .desc = "MWAIT 0x01", 836 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 837 .exit_latency = 2, 838 .target_residency = 4, 839 .enter = &intel_idle, 840 .enter_s2idle = intel_idle_s2idle, }, 841 { 842 .name = "C6", 843 .desc = "MWAIT 0x20", 844 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 845 .exit_latency = 220, 846 .target_residency = 600, 847 .enter = &intel_idle, 848 .enter_s2idle = intel_idle_s2idle, }, 849 { 850 .name = "C8", 851 .desc = "MWAIT 0x40", 852 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 853 .exit_latency = 280, 854 .target_residency = 800, 855 .enter = &intel_idle, 856 .enter_s2idle = intel_idle_s2idle, }, 857 { 858 .name = "C10", 859 .desc = "MWAIT 0x60", 860 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 861 .exit_latency = 680, 862 .target_residency = 2000, 863 .enter = &intel_idle, 864 .enter_s2idle = intel_idle_s2idle, }, 865 { 866 .enter = NULL } 867 }; 868 869 static struct cpuidle_state adl_l_cstates[] __initdata = { 870 { 871 .name = "C1", 872 .desc = "MWAIT 0x00", 873 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 874 .exit_latency = 1, 875 .target_residency = 1, 876 .enter = &intel_idle, 877 .enter_s2idle = intel_idle_s2idle, }, 878 { 879 .name = "C1E", 880 .desc = "MWAIT 0x01", 881 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 882 .exit_latency = 2, 883 .target_residency = 4, 884 .enter = &intel_idle, 885 .enter_s2idle = intel_idle_s2idle, }, 886 { 887 .name = "C6", 888 .desc = "MWAIT 0x20", 889 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 890 .exit_latency = 170, 891 .target_residency = 500, 892 .enter = &intel_idle, 893 .enter_s2idle = intel_idle_s2idle, }, 894 { 895 .name = "C8", 896 .desc = "MWAIT 0x40", 897 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 898 .exit_latency = 200, 899 .target_residency = 600, 900 .enter = &intel_idle, 901 .enter_s2idle = intel_idle_s2idle, }, 902 { 903 .name = "C10", 904 .desc = "MWAIT 0x60", 905 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 906 .exit_latency = 230, 907 .target_residency = 700, 908 .enter = &intel_idle, 909 .enter_s2idle = intel_idle_s2idle, }, 910 { 911 .enter = NULL } 912 }; 913 914 /* 915 * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice 916 * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in 917 * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 918 * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then 919 * both C1 and C1E requests end up with C1, so there is effectively no C1E. 920 * 921 * By default we enable C1 and disable C1E by marking it with 922 * 'CPUIDLE_FLAG_UNUSABLE'. 923 */ 924 static struct cpuidle_state spr_cstates[] __initdata = { 925 { 926 .name = "C1", 927 .desc = "MWAIT 0x00", 928 .flags = MWAIT2flg(0x00), 929 .exit_latency = 1, 930 .target_residency = 1, 931 .enter = &intel_idle, 932 .enter_s2idle = intel_idle_s2idle, }, 933 { 934 .name = "C1E", 935 .desc = "MWAIT 0x01", 936 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | 937 CPUIDLE_FLAG_UNUSABLE, 938 .exit_latency = 2, 939 .target_residency = 4, 940 .enter = &intel_idle, 941 .enter_s2idle = intel_idle_s2idle, }, 942 { 943 .name = "C6", 944 .desc = "MWAIT 0x20", 945 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 946 .exit_latency = 290, 947 .target_residency = 800, 948 .enter = &intel_idle, 949 .enter_s2idle = intel_idle_s2idle, }, 950 { 951 .enter = NULL } 952 }; 953 954 static struct cpuidle_state atom_cstates[] __initdata = { 955 { 956 .name = "C1E", 957 .desc = "MWAIT 0x00", 958 .flags = MWAIT2flg(0x00), 959 .exit_latency = 10, 960 .target_residency = 20, 961 .enter = &intel_idle, 962 .enter_s2idle = intel_idle_s2idle, }, 963 { 964 .name = "C2", 965 .desc = "MWAIT 0x10", 966 .flags = MWAIT2flg(0x10), 967 .exit_latency = 20, 968 .target_residency = 80, 969 .enter = &intel_idle, 970 .enter_s2idle = intel_idle_s2idle, }, 971 { 972 .name = "C4", 973 .desc = "MWAIT 0x30", 974 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 975 .exit_latency = 100, 976 .target_residency = 400, 977 .enter = &intel_idle, 978 .enter_s2idle = intel_idle_s2idle, }, 979 { 980 .name = "C6", 981 .desc = "MWAIT 0x52", 982 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 983 .exit_latency = 140, 984 .target_residency = 560, 985 .enter = &intel_idle, 986 .enter_s2idle = intel_idle_s2idle, }, 987 { 988 .enter = NULL } 989 }; 990 static struct cpuidle_state tangier_cstates[] __initdata = { 991 { 992 .name = "C1", 993 .desc = "MWAIT 0x00", 994 .flags = MWAIT2flg(0x00), 995 .exit_latency = 1, 996 .target_residency = 4, 997 .enter = &intel_idle, 998 .enter_s2idle = intel_idle_s2idle, }, 999 { 1000 .name = "C4", 1001 .desc = "MWAIT 0x30", 1002 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1003 .exit_latency = 100, 1004 .target_residency = 400, 1005 .enter = &intel_idle, 1006 .enter_s2idle = intel_idle_s2idle, }, 1007 { 1008 .name = "C6", 1009 .desc = "MWAIT 0x52", 1010 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1011 .exit_latency = 140, 1012 .target_residency = 560, 1013 .enter = &intel_idle, 1014 .enter_s2idle = intel_idle_s2idle, }, 1015 { 1016 .name = "C7", 1017 .desc = "MWAIT 0x60", 1018 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1019 .exit_latency = 1200, 1020 .target_residency = 4000, 1021 .enter = &intel_idle, 1022 .enter_s2idle = intel_idle_s2idle, }, 1023 { 1024 .name = "C9", 1025 .desc = "MWAIT 0x64", 1026 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1027 .exit_latency = 10000, 1028 .target_residency = 20000, 1029 .enter = &intel_idle, 1030 .enter_s2idle = intel_idle_s2idle, }, 1031 { 1032 .enter = NULL } 1033 }; 1034 static struct cpuidle_state avn_cstates[] __initdata = { 1035 { 1036 .name = "C1", 1037 .desc = "MWAIT 0x00", 1038 .flags = MWAIT2flg(0x00), 1039 .exit_latency = 2, 1040 .target_residency = 2, 1041 .enter = &intel_idle, 1042 .enter_s2idle = intel_idle_s2idle, }, 1043 { 1044 .name = "C6", 1045 .desc = "MWAIT 0x51", 1046 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1047 .exit_latency = 15, 1048 .target_residency = 45, 1049 .enter = &intel_idle, 1050 .enter_s2idle = intel_idle_s2idle, }, 1051 { 1052 .enter = NULL } 1053 }; 1054 static struct cpuidle_state knl_cstates[] __initdata = { 1055 { 1056 .name = "C1", 1057 .desc = "MWAIT 0x00", 1058 .flags = MWAIT2flg(0x00), 1059 .exit_latency = 1, 1060 .target_residency = 2, 1061 .enter = &intel_idle, 1062 .enter_s2idle = intel_idle_s2idle }, 1063 { 1064 .name = "C6", 1065 .desc = "MWAIT 0x10", 1066 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1067 .exit_latency = 120, 1068 .target_residency = 500, 1069 .enter = &intel_idle, 1070 .enter_s2idle = intel_idle_s2idle }, 1071 { 1072 .enter = NULL } 1073 }; 1074 1075 static struct cpuidle_state bxt_cstates[] __initdata = { 1076 { 1077 .name = "C1", 1078 .desc = "MWAIT 0x00", 1079 .flags = MWAIT2flg(0x00), 1080 .exit_latency = 2, 1081 .target_residency = 2, 1082 .enter = &intel_idle, 1083 .enter_s2idle = intel_idle_s2idle, }, 1084 { 1085 .name = "C1E", 1086 .desc = "MWAIT 0x01", 1087 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1088 .exit_latency = 10, 1089 .target_residency = 20, 1090 .enter = &intel_idle, 1091 .enter_s2idle = intel_idle_s2idle, }, 1092 { 1093 .name = "C6", 1094 .desc = "MWAIT 0x20", 1095 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1096 .exit_latency = 133, 1097 .target_residency = 133, 1098 .enter = &intel_idle, 1099 .enter_s2idle = intel_idle_s2idle, }, 1100 { 1101 .name = "C7s", 1102 .desc = "MWAIT 0x31", 1103 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1104 .exit_latency = 155, 1105 .target_residency = 155, 1106 .enter = &intel_idle, 1107 .enter_s2idle = intel_idle_s2idle, }, 1108 { 1109 .name = "C8", 1110 .desc = "MWAIT 0x40", 1111 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1112 .exit_latency = 1000, 1113 .target_residency = 1000, 1114 .enter = &intel_idle, 1115 .enter_s2idle = intel_idle_s2idle, }, 1116 { 1117 .name = "C9", 1118 .desc = "MWAIT 0x50", 1119 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1120 .exit_latency = 2000, 1121 .target_residency = 2000, 1122 .enter = &intel_idle, 1123 .enter_s2idle = intel_idle_s2idle, }, 1124 { 1125 .name = "C10", 1126 .desc = "MWAIT 0x60", 1127 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1128 .exit_latency = 10000, 1129 .target_residency = 10000, 1130 .enter = &intel_idle, 1131 .enter_s2idle = intel_idle_s2idle, }, 1132 { 1133 .enter = NULL } 1134 }; 1135 1136 static struct cpuidle_state dnv_cstates[] __initdata = { 1137 { 1138 .name = "C1", 1139 .desc = "MWAIT 0x00", 1140 .flags = MWAIT2flg(0x00), 1141 .exit_latency = 2, 1142 .target_residency = 2, 1143 .enter = &intel_idle, 1144 .enter_s2idle = intel_idle_s2idle, }, 1145 { 1146 .name = "C1E", 1147 .desc = "MWAIT 0x01", 1148 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1149 .exit_latency = 10, 1150 .target_residency = 20, 1151 .enter = &intel_idle, 1152 .enter_s2idle = intel_idle_s2idle, }, 1153 { 1154 .name = "C6", 1155 .desc = "MWAIT 0x20", 1156 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1157 .exit_latency = 50, 1158 .target_residency = 500, 1159 .enter = &intel_idle, 1160 .enter_s2idle = intel_idle_s2idle, }, 1161 { 1162 .enter = NULL } 1163 }; 1164 1165 /* 1166 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1167 * C6, and this is indicated in the CPUID mwait leaf. 1168 */ 1169 static struct cpuidle_state snr_cstates[] __initdata = { 1170 { 1171 .name = "C1", 1172 .desc = "MWAIT 0x00", 1173 .flags = MWAIT2flg(0x00), 1174 .exit_latency = 2, 1175 .target_residency = 2, 1176 .enter = &intel_idle, 1177 .enter_s2idle = intel_idle_s2idle, }, 1178 { 1179 .name = "C1E", 1180 .desc = "MWAIT 0x01", 1181 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1182 .exit_latency = 15, 1183 .target_residency = 25, 1184 .enter = &intel_idle, 1185 .enter_s2idle = intel_idle_s2idle, }, 1186 { 1187 .name = "C6", 1188 .desc = "MWAIT 0x20", 1189 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1190 .exit_latency = 130, 1191 .target_residency = 500, 1192 .enter = &intel_idle, 1193 .enter_s2idle = intel_idle_s2idle, }, 1194 { 1195 .enter = NULL } 1196 }; 1197 1198 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1199 .state_table = nehalem_cstates, 1200 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1201 .disable_promotion_to_c1e = true, 1202 }; 1203 1204 static const struct idle_cpu idle_cpu_nhx __initconst = { 1205 .state_table = nehalem_cstates, 1206 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1207 .disable_promotion_to_c1e = true, 1208 .use_acpi = true, 1209 }; 1210 1211 static const struct idle_cpu idle_cpu_atom __initconst = { 1212 .state_table = atom_cstates, 1213 }; 1214 1215 static const struct idle_cpu idle_cpu_tangier __initconst = { 1216 .state_table = tangier_cstates, 1217 }; 1218 1219 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1220 .state_table = atom_cstates, 1221 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1222 }; 1223 1224 static const struct idle_cpu idle_cpu_snb __initconst = { 1225 .state_table = snb_cstates, 1226 .disable_promotion_to_c1e = true, 1227 }; 1228 1229 static const struct idle_cpu idle_cpu_snx __initconst = { 1230 .state_table = snb_cstates, 1231 .disable_promotion_to_c1e = true, 1232 .use_acpi = true, 1233 }; 1234 1235 static const struct idle_cpu idle_cpu_byt __initconst = { 1236 .state_table = byt_cstates, 1237 .disable_promotion_to_c1e = true, 1238 .byt_auto_demotion_disable_flag = true, 1239 }; 1240 1241 static const struct idle_cpu idle_cpu_cht __initconst = { 1242 .state_table = cht_cstates, 1243 .disable_promotion_to_c1e = true, 1244 .byt_auto_demotion_disable_flag = true, 1245 }; 1246 1247 static const struct idle_cpu idle_cpu_ivb __initconst = { 1248 .state_table = ivb_cstates, 1249 .disable_promotion_to_c1e = true, 1250 }; 1251 1252 static const struct idle_cpu idle_cpu_ivt __initconst = { 1253 .state_table = ivt_cstates, 1254 .disable_promotion_to_c1e = true, 1255 .use_acpi = true, 1256 }; 1257 1258 static const struct idle_cpu idle_cpu_hsw __initconst = { 1259 .state_table = hsw_cstates, 1260 .disable_promotion_to_c1e = true, 1261 }; 1262 1263 static const struct idle_cpu idle_cpu_hsx __initconst = { 1264 .state_table = hsw_cstates, 1265 .disable_promotion_to_c1e = true, 1266 .use_acpi = true, 1267 }; 1268 1269 static const struct idle_cpu idle_cpu_bdw __initconst = { 1270 .state_table = bdw_cstates, 1271 .disable_promotion_to_c1e = true, 1272 }; 1273 1274 static const struct idle_cpu idle_cpu_bdx __initconst = { 1275 .state_table = bdw_cstates, 1276 .disable_promotion_to_c1e = true, 1277 .use_acpi = true, 1278 }; 1279 1280 static const struct idle_cpu idle_cpu_skl __initconst = { 1281 .state_table = skl_cstates, 1282 .disable_promotion_to_c1e = true, 1283 }; 1284 1285 static const struct idle_cpu idle_cpu_skx __initconst = { 1286 .state_table = skx_cstates, 1287 .disable_promotion_to_c1e = true, 1288 .use_acpi = true, 1289 }; 1290 1291 static const struct idle_cpu idle_cpu_icx __initconst = { 1292 .state_table = icx_cstates, 1293 .disable_promotion_to_c1e = true, 1294 .use_acpi = true, 1295 }; 1296 1297 static const struct idle_cpu idle_cpu_adl __initconst = { 1298 .state_table = adl_cstates, 1299 }; 1300 1301 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1302 .state_table = adl_l_cstates, 1303 }; 1304 1305 static const struct idle_cpu idle_cpu_spr __initconst = { 1306 .state_table = spr_cstates, 1307 .disable_promotion_to_c1e = true, 1308 .use_acpi = true, 1309 }; 1310 1311 static const struct idle_cpu idle_cpu_avn __initconst = { 1312 .state_table = avn_cstates, 1313 .disable_promotion_to_c1e = true, 1314 .use_acpi = true, 1315 }; 1316 1317 static const struct idle_cpu idle_cpu_knl __initconst = { 1318 .state_table = knl_cstates, 1319 .use_acpi = true, 1320 }; 1321 1322 static const struct idle_cpu idle_cpu_bxt __initconst = { 1323 .state_table = bxt_cstates, 1324 .disable_promotion_to_c1e = true, 1325 }; 1326 1327 static const struct idle_cpu idle_cpu_dnv __initconst = { 1328 .state_table = dnv_cstates, 1329 .disable_promotion_to_c1e = true, 1330 .use_acpi = true, 1331 }; 1332 1333 static const struct idle_cpu idle_cpu_snr __initconst = { 1334 .state_table = snr_cstates, 1335 .disable_promotion_to_c1e = true, 1336 .use_acpi = true, 1337 }; 1338 1339 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1340 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1341 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1342 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1343 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1344 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1345 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1346 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1347 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1348 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1349 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1350 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1351 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1352 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1353 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1354 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1355 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1356 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1357 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1358 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1359 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1360 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1361 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1362 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1363 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1364 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1365 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1366 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1367 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1368 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1369 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1370 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1371 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1372 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1373 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1374 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1375 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1376 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1377 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1378 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1379 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1380 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1381 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1382 {} 1383 }; 1384 1385 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1386 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1387 {} 1388 }; 1389 1390 static bool __init intel_idle_max_cstate_reached(int cstate) 1391 { 1392 if (cstate + 1 > max_cstate) { 1393 pr_info("max_cstate %d reached\n", max_cstate); 1394 return true; 1395 } 1396 return false; 1397 } 1398 1399 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1400 { 1401 unsigned long eax = flg2MWAIT(state->flags); 1402 1403 if (boot_cpu_has(X86_FEATURE_ARAT)) 1404 return false; 1405 1406 /* 1407 * Switch over to one-shot tick broadcast if the target C-state 1408 * is deeper than C1. 1409 */ 1410 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1411 } 1412 1413 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1414 #include <acpi/processor.h> 1415 1416 static bool no_acpi __read_mostly; 1417 module_param(no_acpi, bool, 0444); 1418 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1419 1420 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1421 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1422 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1423 1424 static struct acpi_processor_power acpi_state_table __initdata; 1425 1426 /** 1427 * intel_idle_cst_usable - Check if the _CST information can be used. 1428 * 1429 * Check if all of the C-states listed by _CST in the max_cstate range are 1430 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1431 */ 1432 static bool __init intel_idle_cst_usable(void) 1433 { 1434 int cstate, limit; 1435 1436 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1437 acpi_state_table.count); 1438 1439 for (cstate = 1; cstate < limit; cstate++) { 1440 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1441 1442 if (cx->entry_method != ACPI_CSTATE_FFH) 1443 return false; 1444 } 1445 1446 return true; 1447 } 1448 1449 static bool __init intel_idle_acpi_cst_extract(void) 1450 { 1451 unsigned int cpu; 1452 1453 if (no_acpi) { 1454 pr_debug("Not allowed to use ACPI _CST\n"); 1455 return false; 1456 } 1457 1458 for_each_possible_cpu(cpu) { 1459 struct acpi_processor *pr = per_cpu(processors, cpu); 1460 1461 if (!pr) 1462 continue; 1463 1464 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1465 continue; 1466 1467 acpi_state_table.count++; 1468 1469 if (!intel_idle_cst_usable()) 1470 continue; 1471 1472 if (!acpi_processor_claim_cst_control()) 1473 break; 1474 1475 return true; 1476 } 1477 1478 acpi_state_table.count = 0; 1479 pr_debug("ACPI _CST not found or not usable\n"); 1480 return false; 1481 } 1482 1483 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1484 { 1485 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1486 1487 /* 1488 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1489 * the interesting states are ACPI_CSTATE_FFH. 1490 */ 1491 for (cstate = 1; cstate < limit; cstate++) { 1492 struct acpi_processor_cx *cx; 1493 struct cpuidle_state *state; 1494 1495 if (intel_idle_max_cstate_reached(cstate - 1)) 1496 break; 1497 1498 cx = &acpi_state_table.states[cstate]; 1499 1500 state = &drv->states[drv->state_count++]; 1501 1502 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1503 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1504 state->exit_latency = cx->latency; 1505 /* 1506 * For C1-type C-states use the same number for both the exit 1507 * latency and target residency, because that is the case for 1508 * C1 in the majority of the static C-states tables above. 1509 * For the other types of C-states, however, set the target 1510 * residency to 3 times the exit latency which should lead to 1511 * a reasonable balance between energy-efficiency and 1512 * performance in the majority of interesting cases. 1513 */ 1514 state->target_residency = cx->latency; 1515 if (cx->type > ACPI_STATE_C1) 1516 state->target_residency *= 3; 1517 1518 state->flags = MWAIT2flg(cx->address); 1519 if (cx->type > ACPI_STATE_C2) 1520 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1521 1522 if (disabled_states_mask & BIT(cstate)) 1523 state->flags |= CPUIDLE_FLAG_OFF; 1524 1525 if (intel_idle_state_needs_timer_stop(state)) 1526 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1527 1528 state->enter = intel_idle; 1529 state->enter_s2idle = intel_idle_s2idle; 1530 } 1531 } 1532 1533 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1534 { 1535 int cstate, limit; 1536 1537 /* 1538 * If there are no _CST C-states, do not disable any C-states by 1539 * default. 1540 */ 1541 if (!acpi_state_table.count) 1542 return false; 1543 1544 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1545 /* 1546 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1547 * the interesting states are ACPI_CSTATE_FFH. 1548 */ 1549 for (cstate = 1; cstate < limit; cstate++) { 1550 if (acpi_state_table.states[cstate].address == mwait_hint) 1551 return false; 1552 } 1553 return true; 1554 } 1555 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1556 #define force_use_acpi (false) 1557 1558 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1559 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1560 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1561 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1562 1563 /** 1564 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1565 * 1566 * Tune IVT multi-socket targets. 1567 * Assumption: num_sockets == (max_package_num + 1). 1568 */ 1569 static void __init ivt_idle_state_table_update(void) 1570 { 1571 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1572 int cpu, package_num, num_sockets = 1; 1573 1574 for_each_online_cpu(cpu) { 1575 package_num = topology_physical_package_id(cpu); 1576 if (package_num + 1 > num_sockets) { 1577 num_sockets = package_num + 1; 1578 1579 if (num_sockets > 4) { 1580 cpuidle_state_table = ivt_cstates_8s; 1581 return; 1582 } 1583 } 1584 } 1585 1586 if (num_sockets > 2) 1587 cpuidle_state_table = ivt_cstates_4s; 1588 1589 /* else, 1 and 2 socket systems use default ivt_cstates */ 1590 } 1591 1592 /** 1593 * irtl_2_usec - IRTL to microseconds conversion. 1594 * @irtl: IRTL MSR value. 1595 * 1596 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1597 */ 1598 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1599 { 1600 static const unsigned int irtl_ns_units[] __initconst = { 1601 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1602 }; 1603 unsigned long long ns; 1604 1605 if (!irtl) 1606 return 0; 1607 1608 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1609 1610 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1611 } 1612 1613 /** 1614 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1615 * 1616 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1617 * definitive maximum latency and use the same value for target_residency. 1618 */ 1619 static void __init bxt_idle_state_table_update(void) 1620 { 1621 unsigned long long msr; 1622 unsigned int usec; 1623 1624 rdmsrl(MSR_PKGC6_IRTL, msr); 1625 usec = irtl_2_usec(msr); 1626 if (usec) { 1627 bxt_cstates[2].exit_latency = usec; 1628 bxt_cstates[2].target_residency = usec; 1629 } 1630 1631 rdmsrl(MSR_PKGC7_IRTL, msr); 1632 usec = irtl_2_usec(msr); 1633 if (usec) { 1634 bxt_cstates[3].exit_latency = usec; 1635 bxt_cstates[3].target_residency = usec; 1636 } 1637 1638 rdmsrl(MSR_PKGC8_IRTL, msr); 1639 usec = irtl_2_usec(msr); 1640 if (usec) { 1641 bxt_cstates[4].exit_latency = usec; 1642 bxt_cstates[4].target_residency = usec; 1643 } 1644 1645 rdmsrl(MSR_PKGC9_IRTL, msr); 1646 usec = irtl_2_usec(msr); 1647 if (usec) { 1648 bxt_cstates[5].exit_latency = usec; 1649 bxt_cstates[5].target_residency = usec; 1650 } 1651 1652 rdmsrl(MSR_PKGC10_IRTL, msr); 1653 usec = irtl_2_usec(msr); 1654 if (usec) { 1655 bxt_cstates[6].exit_latency = usec; 1656 bxt_cstates[6].target_residency = usec; 1657 } 1658 1659 } 1660 1661 /** 1662 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1663 * 1664 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1665 */ 1666 static void __init sklh_idle_state_table_update(void) 1667 { 1668 unsigned long long msr; 1669 unsigned int eax, ebx, ecx, edx; 1670 1671 1672 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1673 if (max_cstate <= 7) 1674 return; 1675 1676 /* if PC10 not present in CPUID.MWAIT.EDX */ 1677 if ((mwait_substates & (0xF << 28)) == 0) 1678 return; 1679 1680 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1681 1682 /* PC10 is not enabled in PKG C-state limit */ 1683 if ((msr & 0xF) != 8) 1684 return; 1685 1686 ecx = 0; 1687 cpuid(7, &eax, &ebx, &ecx, &edx); 1688 1689 /* if SGX is present */ 1690 if (ebx & (1 << 2)) { 1691 1692 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1693 1694 /* if SGX is enabled */ 1695 if (msr & (1 << 18)) 1696 return; 1697 } 1698 1699 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1700 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1701 } 1702 1703 /** 1704 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1705 * idle states table. 1706 */ 1707 static void __init skx_idle_state_table_update(void) 1708 { 1709 unsigned long long msr; 1710 1711 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1712 1713 /* 1714 * 000b: C0/C1 (no package C-state support) 1715 * 001b: C2 1716 * 010b: C6 (non-retention) 1717 * 011b: C6 (retention) 1718 * 111b: No Package C state limits. 1719 */ 1720 if ((msr & 0x7) < 2) { 1721 /* 1722 * Uses the CC6 + PC0 latency and 3 times of 1723 * latency for target_residency if the PC6 1724 * is disabled in BIOS. This is consistent 1725 * with how intel_idle driver uses _CST 1726 * to set the target_residency. 1727 */ 1728 skx_cstates[2].exit_latency = 92; 1729 skx_cstates[2].target_residency = 276; 1730 } 1731 } 1732 1733 /** 1734 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1735 */ 1736 static void __init adl_idle_state_table_update(void) 1737 { 1738 /* Check if user prefers C1 over C1E. */ 1739 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1740 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1741 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1742 1743 /* Disable C1E by clearing the "C1E promotion" bit. */ 1744 c1e_promotion = C1E_PROMOTION_DISABLE; 1745 return; 1746 } 1747 1748 /* Make sure C1E is enabled by default */ 1749 c1e_promotion = C1E_PROMOTION_ENABLE; 1750 } 1751 1752 /** 1753 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1754 */ 1755 static void __init spr_idle_state_table_update(void) 1756 { 1757 unsigned long long msr; 1758 1759 /* Check if user prefers C1E over C1. */ 1760 if ((preferred_states_mask & BIT(2)) && 1761 !(preferred_states_mask & BIT(1))) { 1762 /* Disable C1 and enable C1E. */ 1763 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; 1764 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1765 1766 /* Enable C1E using the "C1E promotion" bit. */ 1767 c1e_promotion = C1E_PROMOTION_ENABLE; 1768 } 1769 1770 /* 1771 * By default, the C6 state assumes the worst-case scenario of package 1772 * C6. However, if PC6 is disabled, we update the numbers to match 1773 * core C6. 1774 */ 1775 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1776 1777 /* Limit value 2 and above allow for PC6. */ 1778 if ((msr & 0x7) < 2) { 1779 spr_cstates[2].exit_latency = 190; 1780 spr_cstates[2].target_residency = 600; 1781 } 1782 } 1783 1784 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1785 { 1786 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1787 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1788 MWAIT_SUBSTATE_MASK; 1789 1790 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1791 if (num_substates == 0) 1792 return false; 1793 1794 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1795 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1796 1797 return true; 1798 } 1799 1800 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1801 { 1802 int cstate; 1803 1804 switch (boot_cpu_data.x86_model) { 1805 case INTEL_FAM6_IVYBRIDGE_X: 1806 ivt_idle_state_table_update(); 1807 break; 1808 case INTEL_FAM6_ATOM_GOLDMONT: 1809 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1810 bxt_idle_state_table_update(); 1811 break; 1812 case INTEL_FAM6_SKYLAKE: 1813 sklh_idle_state_table_update(); 1814 break; 1815 case INTEL_FAM6_SKYLAKE_X: 1816 skx_idle_state_table_update(); 1817 break; 1818 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1819 spr_idle_state_table_update(); 1820 break; 1821 case INTEL_FAM6_ALDERLAKE: 1822 case INTEL_FAM6_ALDERLAKE_L: 1823 adl_idle_state_table_update(); 1824 break; 1825 } 1826 1827 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1828 unsigned int mwait_hint; 1829 1830 if (intel_idle_max_cstate_reached(cstate)) 1831 break; 1832 1833 if (!cpuidle_state_table[cstate].enter && 1834 !cpuidle_state_table[cstate].enter_s2idle) 1835 break; 1836 1837 /* If marked as unusable, skip this state. */ 1838 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1839 pr_debug("state %s is disabled\n", 1840 cpuidle_state_table[cstate].name); 1841 continue; 1842 } 1843 1844 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1845 if (!intel_idle_verify_cstate(mwait_hint)) 1846 continue; 1847 1848 /* Structure copy. */ 1849 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1850 1851 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) 1852 drv->states[drv->state_count].enter = intel_idle_irq; 1853 1854 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1855 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { 1856 WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); 1857 drv->states[drv->state_count].enter = intel_idle_ibrs; 1858 } 1859 1860 if ((disabled_states_mask & BIT(drv->state_count)) || 1861 ((icpu->use_acpi || force_use_acpi) && 1862 intel_idle_off_by_default(mwait_hint) && 1863 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1864 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1865 1866 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1867 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1868 1869 drv->state_count++; 1870 } 1871 1872 if (icpu->byt_auto_demotion_disable_flag) { 1873 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1874 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1875 } 1876 } 1877 1878 /** 1879 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1880 * @drv: cpuidle driver structure to initialize. 1881 */ 1882 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1883 { 1884 cpuidle_poll_state_init(drv); 1885 1886 if (disabled_states_mask & BIT(0)) 1887 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1888 1889 drv->state_count = 1; 1890 1891 if (icpu) 1892 intel_idle_init_cstates_icpu(drv); 1893 else 1894 intel_idle_init_cstates_acpi(drv); 1895 } 1896 1897 static void auto_demotion_disable(void) 1898 { 1899 unsigned long long msr_bits; 1900 1901 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1902 msr_bits &= ~auto_demotion_disable_flags; 1903 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1904 } 1905 1906 static void c1e_promotion_enable(void) 1907 { 1908 unsigned long long msr_bits; 1909 1910 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1911 msr_bits |= 0x2; 1912 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1913 } 1914 1915 static void c1e_promotion_disable(void) 1916 { 1917 unsigned long long msr_bits; 1918 1919 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1920 msr_bits &= ~0x2; 1921 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1922 } 1923 1924 /** 1925 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1926 * @cpu: CPU to initialize. 1927 * 1928 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1929 * with the processor model flags. 1930 */ 1931 static int intel_idle_cpu_init(unsigned int cpu) 1932 { 1933 struct cpuidle_device *dev; 1934 1935 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1936 dev->cpu = cpu; 1937 1938 if (cpuidle_register_device(dev)) { 1939 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1940 return -EIO; 1941 } 1942 1943 if (auto_demotion_disable_flags) 1944 auto_demotion_disable(); 1945 1946 if (c1e_promotion == C1E_PROMOTION_ENABLE) 1947 c1e_promotion_enable(); 1948 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 1949 c1e_promotion_disable(); 1950 1951 return 0; 1952 } 1953 1954 static int intel_idle_cpu_online(unsigned int cpu) 1955 { 1956 struct cpuidle_device *dev; 1957 1958 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1959 tick_broadcast_enable(); 1960 1961 /* 1962 * Some systems can hotplug a cpu at runtime after 1963 * the kernel has booted, we have to initialize the 1964 * driver in this case 1965 */ 1966 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1967 if (!dev->registered) 1968 return intel_idle_cpu_init(cpu); 1969 1970 return 0; 1971 } 1972 1973 /** 1974 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1975 */ 1976 static void __init intel_idle_cpuidle_devices_uninit(void) 1977 { 1978 int i; 1979 1980 for_each_online_cpu(i) 1981 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1982 } 1983 1984 static int __init intel_idle_init(void) 1985 { 1986 const struct x86_cpu_id *id; 1987 unsigned int eax, ebx, ecx; 1988 int retval; 1989 1990 /* Do not load intel_idle at all for now if idle= is passed */ 1991 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1992 return -ENODEV; 1993 1994 if (max_cstate == 0) { 1995 pr_debug("disabled\n"); 1996 return -EPERM; 1997 } 1998 1999 id = x86_match_cpu(intel_idle_ids); 2000 if (id) { 2001 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2002 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2003 return -ENODEV; 2004 } 2005 } else { 2006 id = x86_match_cpu(intel_mwait_ids); 2007 if (!id) 2008 return -ENODEV; 2009 } 2010 2011 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2012 return -ENODEV; 2013 2014 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2015 2016 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2017 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2018 !mwait_substates) 2019 return -ENODEV; 2020 2021 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2022 2023 icpu = (const struct idle_cpu *)id->driver_data; 2024 if (icpu) { 2025 cpuidle_state_table = icpu->state_table; 2026 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2027 if (icpu->disable_promotion_to_c1e) 2028 c1e_promotion = C1E_PROMOTION_DISABLE; 2029 if (icpu->use_acpi || force_use_acpi) 2030 intel_idle_acpi_cst_extract(); 2031 } else if (!intel_idle_acpi_cst_extract()) { 2032 return -ENODEV; 2033 } 2034 2035 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2036 boot_cpu_data.x86_model); 2037 2038 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2039 if (!intel_idle_cpuidle_devices) 2040 return -ENOMEM; 2041 2042 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2043 2044 retval = cpuidle_register_driver(&intel_idle_driver); 2045 if (retval) { 2046 struct cpuidle_driver *drv = cpuidle_get_driver(); 2047 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2048 drv ? drv->name : "none"); 2049 goto init_driver_fail; 2050 } 2051 2052 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2053 intel_idle_cpu_online, NULL); 2054 if (retval < 0) 2055 goto hp_setup_fail; 2056 2057 pr_debug("Local APIC timer is reliable in %s\n", 2058 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2059 2060 return 0; 2061 2062 hp_setup_fail: 2063 intel_idle_cpuidle_devices_uninit(); 2064 cpuidle_unregister_driver(&intel_idle_driver); 2065 init_driver_fail: 2066 free_percpu(intel_idle_cpuidle_devices); 2067 return retval; 2068 2069 } 2070 device_initcall(intel_idle_init); 2071 2072 /* 2073 * We are not really modular, but we used to support that. Meaning we also 2074 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2075 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2076 * is the easiest way (currently) to continue doing that. 2077 */ 2078 module_param(max_cstate, int, 0444); 2079 /* 2080 * The positions of the bits that are set in this number are the indices of the 2081 * idle states to be disabled by default (as reflected by the names of the 2082 * corresponding idle state directories in sysfs, "state0", "state1" ... 2083 * "state<i>" ..., where <i> is the index of the given state). 2084 */ 2085 module_param_named(states_off, disabled_states_mask, uint, 0444); 2086 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2087 /* 2088 * Some platforms come with mutually exclusive C-states, so that if one is 2089 * enabled, the other C-states must not be used. Example: C1 and C1E on 2090 * Sapphire Rapids platform. This parameter allows for selecting the 2091 * preferred C-states among the groups of mutually exclusive C-states - the 2092 * selected C-states will be registered, the other C-states from the mutually 2093 * exclusive group won't be registered. If the platform has no mutually 2094 * exclusive C-states, this parameter has no effect. 2095 */ 2096 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2097 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2098