1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/notifier.h> 51 #include <linux/cpu.h> 52 #include <linux/moduleparam.h> 53 #include <asm/cpu_device_id.h> 54 #include <asm/intel-family.h> 55 #include <asm/mwait.h> 56 #include <asm/msr.h> 57 58 #define INTEL_IDLE_VERSION "0.5.1" 59 60 static struct cpuidle_driver intel_idle_driver = { 61 .name = "intel_idle", 62 .owner = THIS_MODULE, 63 }; 64 /* intel_idle.max_cstate=0 disables driver */ 65 static int max_cstate = CPUIDLE_STATE_MAX - 1; 66 static unsigned int disabled_states_mask; 67 static unsigned int preferred_states_mask; 68 69 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 70 71 static unsigned long auto_demotion_disable_flags; 72 73 static enum { 74 C1E_PROMOTION_PRESERVE, 75 C1E_PROMOTION_ENABLE, 76 C1E_PROMOTION_DISABLE 77 } c1e_promotion = C1E_PROMOTION_PRESERVE; 78 79 struct idle_cpu { 80 struct cpuidle_state *state_table; 81 82 /* 83 * Hardware C-state auto-demotion may not always be optimal. 84 * Indicate which enable bits to clear here. 85 */ 86 unsigned long auto_demotion_disable_flags; 87 bool byt_auto_demotion_disable_flag; 88 bool disable_promotion_to_c1e; 89 bool use_acpi; 90 }; 91 92 static const struct idle_cpu *icpu __initdata; 93 static struct cpuidle_state *cpuidle_state_table __initdata; 94 95 static unsigned int mwait_substates __initdata; 96 97 /* 98 * Enable interrupts before entering the C-state. On some platforms and for 99 * some C-states, this may measurably decrease interrupt latency. 100 */ 101 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 102 103 /* 104 * Enable this state by default even if the ACPI _CST does not list it. 105 */ 106 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 107 108 /* 109 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 110 * the C-state (top nibble) and sub-state (bottom nibble) 111 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 112 * 113 * We store the hint at the top of our "flags" for each state. 114 */ 115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 117 118 static __always_inline int __intel_idle(struct cpuidle_device *dev, 119 struct cpuidle_driver *drv, int index) 120 { 121 struct cpuidle_state *state = &drv->states[index]; 122 unsigned long eax = flg2MWAIT(state->flags); 123 unsigned long ecx = 1; /* break on interrupt flag */ 124 125 mwait_idle_with_hints(eax, ecx); 126 127 return index; 128 } 129 130 /** 131 * intel_idle - Ask the processor to enter the given idle state. 132 * @dev: cpuidle device of the target CPU. 133 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 134 * @index: Target idle state index. 135 * 136 * Use the MWAIT instruction to notify the processor that the CPU represented by 137 * @dev is idle and it can try to enter the idle state corresponding to @index. 138 * 139 * If the local APIC timer is not known to be reliable in the target idle state, 140 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 141 * 142 * Must be called under local_irq_disable(). 143 */ 144 static __cpuidle int intel_idle(struct cpuidle_device *dev, 145 struct cpuidle_driver *drv, int index) 146 { 147 return __intel_idle(dev, drv, index); 148 } 149 150 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 151 struct cpuidle_driver *drv, int index) 152 { 153 int ret; 154 155 raw_local_irq_enable(); 156 ret = __intel_idle(dev, drv, index); 157 raw_local_irq_disable(); 158 159 return ret; 160 } 161 162 /** 163 * intel_idle_s2idle - Ask the processor to enter the given idle state. 164 * @dev: cpuidle device of the target CPU. 165 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 166 * @index: Target idle state index. 167 * 168 * Use the MWAIT instruction to notify the processor that the CPU represented by 169 * @dev is idle and it can try to enter the idle state corresponding to @index. 170 * 171 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 172 * scheduler tick and suspended scheduler clock on the target CPU. 173 */ 174 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 175 struct cpuidle_driver *drv, int index) 176 { 177 unsigned long eax = flg2MWAIT(drv->states[index].flags); 178 unsigned long ecx = 1; /* break on interrupt flag */ 179 180 mwait_idle_with_hints(eax, ecx); 181 182 return 0; 183 } 184 185 /* 186 * States are indexed by the cstate number, 187 * which is also the index into the MWAIT hint array. 188 * Thus C0 is a dummy. 189 */ 190 static struct cpuidle_state nehalem_cstates[] __initdata = { 191 { 192 .name = "C1", 193 .desc = "MWAIT 0x00", 194 .flags = MWAIT2flg(0x00), 195 .exit_latency = 3, 196 .target_residency = 6, 197 .enter = &intel_idle, 198 .enter_s2idle = intel_idle_s2idle, }, 199 { 200 .name = "C1E", 201 .desc = "MWAIT 0x01", 202 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 203 .exit_latency = 10, 204 .target_residency = 20, 205 .enter = &intel_idle, 206 .enter_s2idle = intel_idle_s2idle, }, 207 { 208 .name = "C3", 209 .desc = "MWAIT 0x10", 210 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 211 .exit_latency = 20, 212 .target_residency = 80, 213 .enter = &intel_idle, 214 .enter_s2idle = intel_idle_s2idle, }, 215 { 216 .name = "C6", 217 .desc = "MWAIT 0x20", 218 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 219 .exit_latency = 200, 220 .target_residency = 800, 221 .enter = &intel_idle, 222 .enter_s2idle = intel_idle_s2idle, }, 223 { 224 .enter = NULL } 225 }; 226 227 static struct cpuidle_state snb_cstates[] __initdata = { 228 { 229 .name = "C1", 230 .desc = "MWAIT 0x00", 231 .flags = MWAIT2flg(0x00), 232 .exit_latency = 2, 233 .target_residency = 2, 234 .enter = &intel_idle, 235 .enter_s2idle = intel_idle_s2idle, }, 236 { 237 .name = "C1E", 238 .desc = "MWAIT 0x01", 239 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 240 .exit_latency = 10, 241 .target_residency = 20, 242 .enter = &intel_idle, 243 .enter_s2idle = intel_idle_s2idle, }, 244 { 245 .name = "C3", 246 .desc = "MWAIT 0x10", 247 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 248 .exit_latency = 80, 249 .target_residency = 211, 250 .enter = &intel_idle, 251 .enter_s2idle = intel_idle_s2idle, }, 252 { 253 .name = "C6", 254 .desc = "MWAIT 0x20", 255 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 256 .exit_latency = 104, 257 .target_residency = 345, 258 .enter = &intel_idle, 259 .enter_s2idle = intel_idle_s2idle, }, 260 { 261 .name = "C7", 262 .desc = "MWAIT 0x30", 263 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 264 .exit_latency = 109, 265 .target_residency = 345, 266 .enter = &intel_idle, 267 .enter_s2idle = intel_idle_s2idle, }, 268 { 269 .enter = NULL } 270 }; 271 272 static struct cpuidle_state byt_cstates[] __initdata = { 273 { 274 .name = "C1", 275 .desc = "MWAIT 0x00", 276 .flags = MWAIT2flg(0x00), 277 .exit_latency = 1, 278 .target_residency = 1, 279 .enter = &intel_idle, 280 .enter_s2idle = intel_idle_s2idle, }, 281 { 282 .name = "C6N", 283 .desc = "MWAIT 0x58", 284 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 285 .exit_latency = 300, 286 .target_residency = 275, 287 .enter = &intel_idle, 288 .enter_s2idle = intel_idle_s2idle, }, 289 { 290 .name = "C6S", 291 .desc = "MWAIT 0x52", 292 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 293 .exit_latency = 500, 294 .target_residency = 560, 295 .enter = &intel_idle, 296 .enter_s2idle = intel_idle_s2idle, }, 297 { 298 .name = "C7", 299 .desc = "MWAIT 0x60", 300 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 301 .exit_latency = 1200, 302 .target_residency = 4000, 303 .enter = &intel_idle, 304 .enter_s2idle = intel_idle_s2idle, }, 305 { 306 .name = "C7S", 307 .desc = "MWAIT 0x64", 308 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 309 .exit_latency = 10000, 310 .target_residency = 20000, 311 .enter = &intel_idle, 312 .enter_s2idle = intel_idle_s2idle, }, 313 { 314 .enter = NULL } 315 }; 316 317 static struct cpuidle_state cht_cstates[] __initdata = { 318 { 319 .name = "C1", 320 .desc = "MWAIT 0x00", 321 .flags = MWAIT2flg(0x00), 322 .exit_latency = 1, 323 .target_residency = 1, 324 .enter = &intel_idle, 325 .enter_s2idle = intel_idle_s2idle, }, 326 { 327 .name = "C6N", 328 .desc = "MWAIT 0x58", 329 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 330 .exit_latency = 80, 331 .target_residency = 275, 332 .enter = &intel_idle, 333 .enter_s2idle = intel_idle_s2idle, }, 334 { 335 .name = "C6S", 336 .desc = "MWAIT 0x52", 337 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 338 .exit_latency = 200, 339 .target_residency = 560, 340 .enter = &intel_idle, 341 .enter_s2idle = intel_idle_s2idle, }, 342 { 343 .name = "C7", 344 .desc = "MWAIT 0x60", 345 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 346 .exit_latency = 1200, 347 .target_residency = 4000, 348 .enter = &intel_idle, 349 .enter_s2idle = intel_idle_s2idle, }, 350 { 351 .name = "C7S", 352 .desc = "MWAIT 0x64", 353 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 354 .exit_latency = 10000, 355 .target_residency = 20000, 356 .enter = &intel_idle, 357 .enter_s2idle = intel_idle_s2idle, }, 358 { 359 .enter = NULL } 360 }; 361 362 static struct cpuidle_state ivb_cstates[] __initdata = { 363 { 364 .name = "C1", 365 .desc = "MWAIT 0x00", 366 .flags = MWAIT2flg(0x00), 367 .exit_latency = 1, 368 .target_residency = 1, 369 .enter = &intel_idle, 370 .enter_s2idle = intel_idle_s2idle, }, 371 { 372 .name = "C1E", 373 .desc = "MWAIT 0x01", 374 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 375 .exit_latency = 10, 376 .target_residency = 20, 377 .enter = &intel_idle, 378 .enter_s2idle = intel_idle_s2idle, }, 379 { 380 .name = "C3", 381 .desc = "MWAIT 0x10", 382 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 383 .exit_latency = 59, 384 .target_residency = 156, 385 .enter = &intel_idle, 386 .enter_s2idle = intel_idle_s2idle, }, 387 { 388 .name = "C6", 389 .desc = "MWAIT 0x20", 390 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 391 .exit_latency = 80, 392 .target_residency = 300, 393 .enter = &intel_idle, 394 .enter_s2idle = intel_idle_s2idle, }, 395 { 396 .name = "C7", 397 .desc = "MWAIT 0x30", 398 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 399 .exit_latency = 87, 400 .target_residency = 300, 401 .enter = &intel_idle, 402 .enter_s2idle = intel_idle_s2idle, }, 403 { 404 .enter = NULL } 405 }; 406 407 static struct cpuidle_state ivt_cstates[] __initdata = { 408 { 409 .name = "C1", 410 .desc = "MWAIT 0x00", 411 .flags = MWAIT2flg(0x00), 412 .exit_latency = 1, 413 .target_residency = 1, 414 .enter = &intel_idle, 415 .enter_s2idle = intel_idle_s2idle, }, 416 { 417 .name = "C1E", 418 .desc = "MWAIT 0x01", 419 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 420 .exit_latency = 10, 421 .target_residency = 80, 422 .enter = &intel_idle, 423 .enter_s2idle = intel_idle_s2idle, }, 424 { 425 .name = "C3", 426 .desc = "MWAIT 0x10", 427 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 428 .exit_latency = 59, 429 .target_residency = 156, 430 .enter = &intel_idle, 431 .enter_s2idle = intel_idle_s2idle, }, 432 { 433 .name = "C6", 434 .desc = "MWAIT 0x20", 435 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 436 .exit_latency = 82, 437 .target_residency = 300, 438 .enter = &intel_idle, 439 .enter_s2idle = intel_idle_s2idle, }, 440 { 441 .enter = NULL } 442 }; 443 444 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 445 { 446 .name = "C1", 447 .desc = "MWAIT 0x00", 448 .flags = MWAIT2flg(0x00), 449 .exit_latency = 1, 450 .target_residency = 1, 451 .enter = &intel_idle, 452 .enter_s2idle = intel_idle_s2idle, }, 453 { 454 .name = "C1E", 455 .desc = "MWAIT 0x01", 456 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 457 .exit_latency = 10, 458 .target_residency = 250, 459 .enter = &intel_idle, 460 .enter_s2idle = intel_idle_s2idle, }, 461 { 462 .name = "C3", 463 .desc = "MWAIT 0x10", 464 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 465 .exit_latency = 59, 466 .target_residency = 300, 467 .enter = &intel_idle, 468 .enter_s2idle = intel_idle_s2idle, }, 469 { 470 .name = "C6", 471 .desc = "MWAIT 0x20", 472 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 473 .exit_latency = 84, 474 .target_residency = 400, 475 .enter = &intel_idle, 476 .enter_s2idle = intel_idle_s2idle, }, 477 { 478 .enter = NULL } 479 }; 480 481 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 482 { 483 .name = "C1", 484 .desc = "MWAIT 0x00", 485 .flags = MWAIT2flg(0x00), 486 .exit_latency = 1, 487 .target_residency = 1, 488 .enter = &intel_idle, 489 .enter_s2idle = intel_idle_s2idle, }, 490 { 491 .name = "C1E", 492 .desc = "MWAIT 0x01", 493 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 494 .exit_latency = 10, 495 .target_residency = 500, 496 .enter = &intel_idle, 497 .enter_s2idle = intel_idle_s2idle, }, 498 { 499 .name = "C3", 500 .desc = "MWAIT 0x10", 501 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 502 .exit_latency = 59, 503 .target_residency = 600, 504 .enter = &intel_idle, 505 .enter_s2idle = intel_idle_s2idle, }, 506 { 507 .name = "C6", 508 .desc = "MWAIT 0x20", 509 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 510 .exit_latency = 88, 511 .target_residency = 700, 512 .enter = &intel_idle, 513 .enter_s2idle = intel_idle_s2idle, }, 514 { 515 .enter = NULL } 516 }; 517 518 static struct cpuidle_state hsw_cstates[] __initdata = { 519 { 520 .name = "C1", 521 .desc = "MWAIT 0x00", 522 .flags = MWAIT2flg(0x00), 523 .exit_latency = 2, 524 .target_residency = 2, 525 .enter = &intel_idle, 526 .enter_s2idle = intel_idle_s2idle, }, 527 { 528 .name = "C1E", 529 .desc = "MWAIT 0x01", 530 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 531 .exit_latency = 10, 532 .target_residency = 20, 533 .enter = &intel_idle, 534 .enter_s2idle = intel_idle_s2idle, }, 535 { 536 .name = "C3", 537 .desc = "MWAIT 0x10", 538 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 539 .exit_latency = 33, 540 .target_residency = 100, 541 .enter = &intel_idle, 542 .enter_s2idle = intel_idle_s2idle, }, 543 { 544 .name = "C6", 545 .desc = "MWAIT 0x20", 546 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 547 .exit_latency = 133, 548 .target_residency = 400, 549 .enter = &intel_idle, 550 .enter_s2idle = intel_idle_s2idle, }, 551 { 552 .name = "C7s", 553 .desc = "MWAIT 0x32", 554 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 555 .exit_latency = 166, 556 .target_residency = 500, 557 .enter = &intel_idle, 558 .enter_s2idle = intel_idle_s2idle, }, 559 { 560 .name = "C8", 561 .desc = "MWAIT 0x40", 562 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 563 .exit_latency = 300, 564 .target_residency = 900, 565 .enter = &intel_idle, 566 .enter_s2idle = intel_idle_s2idle, }, 567 { 568 .name = "C9", 569 .desc = "MWAIT 0x50", 570 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 571 .exit_latency = 600, 572 .target_residency = 1800, 573 .enter = &intel_idle, 574 .enter_s2idle = intel_idle_s2idle, }, 575 { 576 .name = "C10", 577 .desc = "MWAIT 0x60", 578 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 579 .exit_latency = 2600, 580 .target_residency = 7700, 581 .enter = &intel_idle, 582 .enter_s2idle = intel_idle_s2idle, }, 583 { 584 .enter = NULL } 585 }; 586 static struct cpuidle_state bdw_cstates[] __initdata = { 587 { 588 .name = "C1", 589 .desc = "MWAIT 0x00", 590 .flags = MWAIT2flg(0x00), 591 .exit_latency = 2, 592 .target_residency = 2, 593 .enter = &intel_idle, 594 .enter_s2idle = intel_idle_s2idle, }, 595 { 596 .name = "C1E", 597 .desc = "MWAIT 0x01", 598 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 599 .exit_latency = 10, 600 .target_residency = 20, 601 .enter = &intel_idle, 602 .enter_s2idle = intel_idle_s2idle, }, 603 { 604 .name = "C3", 605 .desc = "MWAIT 0x10", 606 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 607 .exit_latency = 40, 608 .target_residency = 100, 609 .enter = &intel_idle, 610 .enter_s2idle = intel_idle_s2idle, }, 611 { 612 .name = "C6", 613 .desc = "MWAIT 0x20", 614 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 615 .exit_latency = 133, 616 .target_residency = 400, 617 .enter = &intel_idle, 618 .enter_s2idle = intel_idle_s2idle, }, 619 { 620 .name = "C7s", 621 .desc = "MWAIT 0x32", 622 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 623 .exit_latency = 166, 624 .target_residency = 500, 625 .enter = &intel_idle, 626 .enter_s2idle = intel_idle_s2idle, }, 627 { 628 .name = "C8", 629 .desc = "MWAIT 0x40", 630 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 631 .exit_latency = 300, 632 .target_residency = 900, 633 .enter = &intel_idle, 634 .enter_s2idle = intel_idle_s2idle, }, 635 { 636 .name = "C9", 637 .desc = "MWAIT 0x50", 638 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 639 .exit_latency = 600, 640 .target_residency = 1800, 641 .enter = &intel_idle, 642 .enter_s2idle = intel_idle_s2idle, }, 643 { 644 .name = "C10", 645 .desc = "MWAIT 0x60", 646 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 647 .exit_latency = 2600, 648 .target_residency = 7700, 649 .enter = &intel_idle, 650 .enter_s2idle = intel_idle_s2idle, }, 651 { 652 .enter = NULL } 653 }; 654 655 static struct cpuidle_state skl_cstates[] __initdata = { 656 { 657 .name = "C1", 658 .desc = "MWAIT 0x00", 659 .flags = MWAIT2flg(0x00), 660 .exit_latency = 2, 661 .target_residency = 2, 662 .enter = &intel_idle, 663 .enter_s2idle = intel_idle_s2idle, }, 664 { 665 .name = "C1E", 666 .desc = "MWAIT 0x01", 667 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 668 .exit_latency = 10, 669 .target_residency = 20, 670 .enter = &intel_idle, 671 .enter_s2idle = intel_idle_s2idle, }, 672 { 673 .name = "C3", 674 .desc = "MWAIT 0x10", 675 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 676 .exit_latency = 70, 677 .target_residency = 100, 678 .enter = &intel_idle, 679 .enter_s2idle = intel_idle_s2idle, }, 680 { 681 .name = "C6", 682 .desc = "MWAIT 0x20", 683 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 684 .exit_latency = 85, 685 .target_residency = 200, 686 .enter = &intel_idle, 687 .enter_s2idle = intel_idle_s2idle, }, 688 { 689 .name = "C7s", 690 .desc = "MWAIT 0x33", 691 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 692 .exit_latency = 124, 693 .target_residency = 800, 694 .enter = &intel_idle, 695 .enter_s2idle = intel_idle_s2idle, }, 696 { 697 .name = "C8", 698 .desc = "MWAIT 0x40", 699 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 700 .exit_latency = 200, 701 .target_residency = 800, 702 .enter = &intel_idle, 703 .enter_s2idle = intel_idle_s2idle, }, 704 { 705 .name = "C9", 706 .desc = "MWAIT 0x50", 707 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 708 .exit_latency = 480, 709 .target_residency = 5000, 710 .enter = &intel_idle, 711 .enter_s2idle = intel_idle_s2idle, }, 712 { 713 .name = "C10", 714 .desc = "MWAIT 0x60", 715 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 716 .exit_latency = 890, 717 .target_residency = 5000, 718 .enter = &intel_idle, 719 .enter_s2idle = intel_idle_s2idle, }, 720 { 721 .enter = NULL } 722 }; 723 724 static struct cpuidle_state skx_cstates[] __initdata = { 725 { 726 .name = "C1", 727 .desc = "MWAIT 0x00", 728 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 729 .exit_latency = 2, 730 .target_residency = 2, 731 .enter = &intel_idle, 732 .enter_s2idle = intel_idle_s2idle, }, 733 { 734 .name = "C1E", 735 .desc = "MWAIT 0x01", 736 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 737 .exit_latency = 10, 738 .target_residency = 20, 739 .enter = &intel_idle, 740 .enter_s2idle = intel_idle_s2idle, }, 741 { 742 .name = "C6", 743 .desc = "MWAIT 0x20", 744 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 745 .exit_latency = 133, 746 .target_residency = 600, 747 .enter = &intel_idle, 748 .enter_s2idle = intel_idle_s2idle, }, 749 { 750 .enter = NULL } 751 }; 752 753 static struct cpuidle_state icx_cstates[] __initdata = { 754 { 755 .name = "C1", 756 .desc = "MWAIT 0x00", 757 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 758 .exit_latency = 1, 759 .target_residency = 1, 760 .enter = &intel_idle, 761 .enter_s2idle = intel_idle_s2idle, }, 762 { 763 .name = "C1E", 764 .desc = "MWAIT 0x01", 765 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 766 .exit_latency = 4, 767 .target_residency = 4, 768 .enter = &intel_idle, 769 .enter_s2idle = intel_idle_s2idle, }, 770 { 771 .name = "C6", 772 .desc = "MWAIT 0x20", 773 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 774 .exit_latency = 170, 775 .target_residency = 600, 776 .enter = &intel_idle, 777 .enter_s2idle = intel_idle_s2idle, }, 778 { 779 .enter = NULL } 780 }; 781 782 /* 783 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 784 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 785 * But in this case there is effectively no C1, because C1 requests are 786 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 787 * and C1E requests end up with C1, so there is effectively no C1E. 788 * 789 * By default we enable C1E and disable C1 by marking it with 790 * 'CPUIDLE_FLAG_UNUSABLE'. 791 */ 792 static struct cpuidle_state adl_cstates[] __initdata = { 793 { 794 .name = "C1", 795 .desc = "MWAIT 0x00", 796 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 797 .exit_latency = 1, 798 .target_residency = 1, 799 .enter = &intel_idle, 800 .enter_s2idle = intel_idle_s2idle, }, 801 { 802 .name = "C1E", 803 .desc = "MWAIT 0x01", 804 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 805 .exit_latency = 2, 806 .target_residency = 4, 807 .enter = &intel_idle, 808 .enter_s2idle = intel_idle_s2idle, }, 809 { 810 .name = "C6", 811 .desc = "MWAIT 0x20", 812 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 813 .exit_latency = 220, 814 .target_residency = 600, 815 .enter = &intel_idle, 816 .enter_s2idle = intel_idle_s2idle, }, 817 { 818 .name = "C8", 819 .desc = "MWAIT 0x40", 820 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 821 .exit_latency = 280, 822 .target_residency = 800, 823 .enter = &intel_idle, 824 .enter_s2idle = intel_idle_s2idle, }, 825 { 826 .name = "C10", 827 .desc = "MWAIT 0x60", 828 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 829 .exit_latency = 680, 830 .target_residency = 2000, 831 .enter = &intel_idle, 832 .enter_s2idle = intel_idle_s2idle, }, 833 { 834 .enter = NULL } 835 }; 836 837 static struct cpuidle_state adl_l_cstates[] __initdata = { 838 { 839 .name = "C1", 840 .desc = "MWAIT 0x00", 841 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 842 .exit_latency = 1, 843 .target_residency = 1, 844 .enter = &intel_idle, 845 .enter_s2idle = intel_idle_s2idle, }, 846 { 847 .name = "C1E", 848 .desc = "MWAIT 0x01", 849 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 850 .exit_latency = 2, 851 .target_residency = 4, 852 .enter = &intel_idle, 853 .enter_s2idle = intel_idle_s2idle, }, 854 { 855 .name = "C6", 856 .desc = "MWAIT 0x20", 857 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 858 .exit_latency = 170, 859 .target_residency = 500, 860 .enter = &intel_idle, 861 .enter_s2idle = intel_idle_s2idle, }, 862 { 863 .name = "C8", 864 .desc = "MWAIT 0x40", 865 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 866 .exit_latency = 200, 867 .target_residency = 600, 868 .enter = &intel_idle, 869 .enter_s2idle = intel_idle_s2idle, }, 870 { 871 .name = "C10", 872 .desc = "MWAIT 0x60", 873 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 874 .exit_latency = 230, 875 .target_residency = 700, 876 .enter = &intel_idle, 877 .enter_s2idle = intel_idle_s2idle, }, 878 { 879 .enter = NULL } 880 }; 881 882 /* 883 * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice 884 * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in 885 * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 886 * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then 887 * both C1 and C1E requests end up with C1, so there is effectively no C1E. 888 * 889 * By default we enable C1 and disable C1E by marking it with 890 * 'CPUIDLE_FLAG_UNUSABLE'. 891 */ 892 static struct cpuidle_state spr_cstates[] __initdata = { 893 { 894 .name = "C1", 895 .desc = "MWAIT 0x00", 896 .flags = MWAIT2flg(0x00), 897 .exit_latency = 1, 898 .target_residency = 1, 899 .enter = &intel_idle, 900 .enter_s2idle = intel_idle_s2idle, }, 901 { 902 .name = "C1E", 903 .desc = "MWAIT 0x01", 904 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | 905 CPUIDLE_FLAG_UNUSABLE, 906 .exit_latency = 2, 907 .target_residency = 4, 908 .enter = &intel_idle, 909 .enter_s2idle = intel_idle_s2idle, }, 910 { 911 .name = "C6", 912 .desc = "MWAIT 0x20", 913 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 914 .exit_latency = 290, 915 .target_residency = 800, 916 .enter = &intel_idle, 917 .enter_s2idle = intel_idle_s2idle, }, 918 { 919 .enter = NULL } 920 }; 921 922 static struct cpuidle_state atom_cstates[] __initdata = { 923 { 924 .name = "C1E", 925 .desc = "MWAIT 0x00", 926 .flags = MWAIT2flg(0x00), 927 .exit_latency = 10, 928 .target_residency = 20, 929 .enter = &intel_idle, 930 .enter_s2idle = intel_idle_s2idle, }, 931 { 932 .name = "C2", 933 .desc = "MWAIT 0x10", 934 .flags = MWAIT2flg(0x10), 935 .exit_latency = 20, 936 .target_residency = 80, 937 .enter = &intel_idle, 938 .enter_s2idle = intel_idle_s2idle, }, 939 { 940 .name = "C4", 941 .desc = "MWAIT 0x30", 942 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 943 .exit_latency = 100, 944 .target_residency = 400, 945 .enter = &intel_idle, 946 .enter_s2idle = intel_idle_s2idle, }, 947 { 948 .name = "C6", 949 .desc = "MWAIT 0x52", 950 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 951 .exit_latency = 140, 952 .target_residency = 560, 953 .enter = &intel_idle, 954 .enter_s2idle = intel_idle_s2idle, }, 955 { 956 .enter = NULL } 957 }; 958 static struct cpuidle_state tangier_cstates[] __initdata = { 959 { 960 .name = "C1", 961 .desc = "MWAIT 0x00", 962 .flags = MWAIT2flg(0x00), 963 .exit_latency = 1, 964 .target_residency = 4, 965 .enter = &intel_idle, 966 .enter_s2idle = intel_idle_s2idle, }, 967 { 968 .name = "C4", 969 .desc = "MWAIT 0x30", 970 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 971 .exit_latency = 100, 972 .target_residency = 400, 973 .enter = &intel_idle, 974 .enter_s2idle = intel_idle_s2idle, }, 975 { 976 .name = "C6", 977 .desc = "MWAIT 0x52", 978 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 979 .exit_latency = 140, 980 .target_residency = 560, 981 .enter = &intel_idle, 982 .enter_s2idle = intel_idle_s2idle, }, 983 { 984 .name = "C7", 985 .desc = "MWAIT 0x60", 986 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 987 .exit_latency = 1200, 988 .target_residency = 4000, 989 .enter = &intel_idle, 990 .enter_s2idle = intel_idle_s2idle, }, 991 { 992 .name = "C9", 993 .desc = "MWAIT 0x64", 994 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 995 .exit_latency = 10000, 996 .target_residency = 20000, 997 .enter = &intel_idle, 998 .enter_s2idle = intel_idle_s2idle, }, 999 { 1000 .enter = NULL } 1001 }; 1002 static struct cpuidle_state avn_cstates[] __initdata = { 1003 { 1004 .name = "C1", 1005 .desc = "MWAIT 0x00", 1006 .flags = MWAIT2flg(0x00), 1007 .exit_latency = 2, 1008 .target_residency = 2, 1009 .enter = &intel_idle, 1010 .enter_s2idle = intel_idle_s2idle, }, 1011 { 1012 .name = "C6", 1013 .desc = "MWAIT 0x51", 1014 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1015 .exit_latency = 15, 1016 .target_residency = 45, 1017 .enter = &intel_idle, 1018 .enter_s2idle = intel_idle_s2idle, }, 1019 { 1020 .enter = NULL } 1021 }; 1022 static struct cpuidle_state knl_cstates[] __initdata = { 1023 { 1024 .name = "C1", 1025 .desc = "MWAIT 0x00", 1026 .flags = MWAIT2flg(0x00), 1027 .exit_latency = 1, 1028 .target_residency = 2, 1029 .enter = &intel_idle, 1030 .enter_s2idle = intel_idle_s2idle }, 1031 { 1032 .name = "C6", 1033 .desc = "MWAIT 0x10", 1034 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1035 .exit_latency = 120, 1036 .target_residency = 500, 1037 .enter = &intel_idle, 1038 .enter_s2idle = intel_idle_s2idle }, 1039 { 1040 .enter = NULL } 1041 }; 1042 1043 static struct cpuidle_state bxt_cstates[] __initdata = { 1044 { 1045 .name = "C1", 1046 .desc = "MWAIT 0x00", 1047 .flags = MWAIT2flg(0x00), 1048 .exit_latency = 2, 1049 .target_residency = 2, 1050 .enter = &intel_idle, 1051 .enter_s2idle = intel_idle_s2idle, }, 1052 { 1053 .name = "C1E", 1054 .desc = "MWAIT 0x01", 1055 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1056 .exit_latency = 10, 1057 .target_residency = 20, 1058 .enter = &intel_idle, 1059 .enter_s2idle = intel_idle_s2idle, }, 1060 { 1061 .name = "C6", 1062 .desc = "MWAIT 0x20", 1063 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1064 .exit_latency = 133, 1065 .target_residency = 133, 1066 .enter = &intel_idle, 1067 .enter_s2idle = intel_idle_s2idle, }, 1068 { 1069 .name = "C7s", 1070 .desc = "MWAIT 0x31", 1071 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1072 .exit_latency = 155, 1073 .target_residency = 155, 1074 .enter = &intel_idle, 1075 .enter_s2idle = intel_idle_s2idle, }, 1076 { 1077 .name = "C8", 1078 .desc = "MWAIT 0x40", 1079 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1080 .exit_latency = 1000, 1081 .target_residency = 1000, 1082 .enter = &intel_idle, 1083 .enter_s2idle = intel_idle_s2idle, }, 1084 { 1085 .name = "C9", 1086 .desc = "MWAIT 0x50", 1087 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1088 .exit_latency = 2000, 1089 .target_residency = 2000, 1090 .enter = &intel_idle, 1091 .enter_s2idle = intel_idle_s2idle, }, 1092 { 1093 .name = "C10", 1094 .desc = "MWAIT 0x60", 1095 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1096 .exit_latency = 10000, 1097 .target_residency = 10000, 1098 .enter = &intel_idle, 1099 .enter_s2idle = intel_idle_s2idle, }, 1100 { 1101 .enter = NULL } 1102 }; 1103 1104 static struct cpuidle_state dnv_cstates[] __initdata = { 1105 { 1106 .name = "C1", 1107 .desc = "MWAIT 0x00", 1108 .flags = MWAIT2flg(0x00), 1109 .exit_latency = 2, 1110 .target_residency = 2, 1111 .enter = &intel_idle, 1112 .enter_s2idle = intel_idle_s2idle, }, 1113 { 1114 .name = "C1E", 1115 .desc = "MWAIT 0x01", 1116 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1117 .exit_latency = 10, 1118 .target_residency = 20, 1119 .enter = &intel_idle, 1120 .enter_s2idle = intel_idle_s2idle, }, 1121 { 1122 .name = "C6", 1123 .desc = "MWAIT 0x20", 1124 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1125 .exit_latency = 50, 1126 .target_residency = 500, 1127 .enter = &intel_idle, 1128 .enter_s2idle = intel_idle_s2idle, }, 1129 { 1130 .enter = NULL } 1131 }; 1132 1133 /* 1134 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1135 * C6, and this is indicated in the CPUID mwait leaf. 1136 */ 1137 static struct cpuidle_state snr_cstates[] __initdata = { 1138 { 1139 .name = "C1", 1140 .desc = "MWAIT 0x00", 1141 .flags = MWAIT2flg(0x00), 1142 .exit_latency = 2, 1143 .target_residency = 2, 1144 .enter = &intel_idle, 1145 .enter_s2idle = intel_idle_s2idle, }, 1146 { 1147 .name = "C1E", 1148 .desc = "MWAIT 0x01", 1149 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1150 .exit_latency = 15, 1151 .target_residency = 25, 1152 .enter = &intel_idle, 1153 .enter_s2idle = intel_idle_s2idle, }, 1154 { 1155 .name = "C6", 1156 .desc = "MWAIT 0x20", 1157 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1158 .exit_latency = 130, 1159 .target_residency = 500, 1160 .enter = &intel_idle, 1161 .enter_s2idle = intel_idle_s2idle, }, 1162 { 1163 .enter = NULL } 1164 }; 1165 1166 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1167 .state_table = nehalem_cstates, 1168 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1169 .disable_promotion_to_c1e = true, 1170 }; 1171 1172 static const struct idle_cpu idle_cpu_nhx __initconst = { 1173 .state_table = nehalem_cstates, 1174 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1175 .disable_promotion_to_c1e = true, 1176 .use_acpi = true, 1177 }; 1178 1179 static const struct idle_cpu idle_cpu_atom __initconst = { 1180 .state_table = atom_cstates, 1181 }; 1182 1183 static const struct idle_cpu idle_cpu_tangier __initconst = { 1184 .state_table = tangier_cstates, 1185 }; 1186 1187 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1188 .state_table = atom_cstates, 1189 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1190 }; 1191 1192 static const struct idle_cpu idle_cpu_snb __initconst = { 1193 .state_table = snb_cstates, 1194 .disable_promotion_to_c1e = true, 1195 }; 1196 1197 static const struct idle_cpu idle_cpu_snx __initconst = { 1198 .state_table = snb_cstates, 1199 .disable_promotion_to_c1e = true, 1200 .use_acpi = true, 1201 }; 1202 1203 static const struct idle_cpu idle_cpu_byt __initconst = { 1204 .state_table = byt_cstates, 1205 .disable_promotion_to_c1e = true, 1206 .byt_auto_demotion_disable_flag = true, 1207 }; 1208 1209 static const struct idle_cpu idle_cpu_cht __initconst = { 1210 .state_table = cht_cstates, 1211 .disable_promotion_to_c1e = true, 1212 .byt_auto_demotion_disable_flag = true, 1213 }; 1214 1215 static const struct idle_cpu idle_cpu_ivb __initconst = { 1216 .state_table = ivb_cstates, 1217 .disable_promotion_to_c1e = true, 1218 }; 1219 1220 static const struct idle_cpu idle_cpu_ivt __initconst = { 1221 .state_table = ivt_cstates, 1222 .disable_promotion_to_c1e = true, 1223 .use_acpi = true, 1224 }; 1225 1226 static const struct idle_cpu idle_cpu_hsw __initconst = { 1227 .state_table = hsw_cstates, 1228 .disable_promotion_to_c1e = true, 1229 }; 1230 1231 static const struct idle_cpu idle_cpu_hsx __initconst = { 1232 .state_table = hsw_cstates, 1233 .disable_promotion_to_c1e = true, 1234 .use_acpi = true, 1235 }; 1236 1237 static const struct idle_cpu idle_cpu_bdw __initconst = { 1238 .state_table = bdw_cstates, 1239 .disable_promotion_to_c1e = true, 1240 }; 1241 1242 static const struct idle_cpu idle_cpu_bdx __initconst = { 1243 .state_table = bdw_cstates, 1244 .disable_promotion_to_c1e = true, 1245 .use_acpi = true, 1246 }; 1247 1248 static const struct idle_cpu idle_cpu_skl __initconst = { 1249 .state_table = skl_cstates, 1250 .disable_promotion_to_c1e = true, 1251 }; 1252 1253 static const struct idle_cpu idle_cpu_skx __initconst = { 1254 .state_table = skx_cstates, 1255 .disable_promotion_to_c1e = true, 1256 .use_acpi = true, 1257 }; 1258 1259 static const struct idle_cpu idle_cpu_icx __initconst = { 1260 .state_table = icx_cstates, 1261 .disable_promotion_to_c1e = true, 1262 .use_acpi = true, 1263 }; 1264 1265 static const struct idle_cpu idle_cpu_adl __initconst = { 1266 .state_table = adl_cstates, 1267 }; 1268 1269 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1270 .state_table = adl_l_cstates, 1271 }; 1272 1273 static const struct idle_cpu idle_cpu_spr __initconst = { 1274 .state_table = spr_cstates, 1275 .disable_promotion_to_c1e = true, 1276 .use_acpi = true, 1277 }; 1278 1279 static const struct idle_cpu idle_cpu_avn __initconst = { 1280 .state_table = avn_cstates, 1281 .disable_promotion_to_c1e = true, 1282 .use_acpi = true, 1283 }; 1284 1285 static const struct idle_cpu idle_cpu_knl __initconst = { 1286 .state_table = knl_cstates, 1287 .use_acpi = true, 1288 }; 1289 1290 static const struct idle_cpu idle_cpu_bxt __initconst = { 1291 .state_table = bxt_cstates, 1292 .disable_promotion_to_c1e = true, 1293 }; 1294 1295 static const struct idle_cpu idle_cpu_dnv __initconst = { 1296 .state_table = dnv_cstates, 1297 .disable_promotion_to_c1e = true, 1298 .use_acpi = true, 1299 }; 1300 1301 static const struct idle_cpu idle_cpu_snr __initconst = { 1302 .state_table = snr_cstates, 1303 .disable_promotion_to_c1e = true, 1304 .use_acpi = true, 1305 }; 1306 1307 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1308 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1309 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1310 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1311 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1312 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1313 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1314 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1315 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1316 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1317 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1318 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1319 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1320 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1321 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1322 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1323 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1324 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1325 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1326 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1327 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1328 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1329 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1330 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1331 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1332 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1333 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1334 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1335 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1336 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1337 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1338 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1339 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1340 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1341 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1342 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1343 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1344 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1345 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1346 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1347 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1348 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1349 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1350 {} 1351 }; 1352 1353 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1354 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1355 {} 1356 }; 1357 1358 static bool __init intel_idle_max_cstate_reached(int cstate) 1359 { 1360 if (cstate + 1 > max_cstate) { 1361 pr_info("max_cstate %d reached\n", max_cstate); 1362 return true; 1363 } 1364 return false; 1365 } 1366 1367 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1368 { 1369 unsigned long eax = flg2MWAIT(state->flags); 1370 1371 if (boot_cpu_has(X86_FEATURE_ARAT)) 1372 return false; 1373 1374 /* 1375 * Switch over to one-shot tick broadcast if the target C-state 1376 * is deeper than C1. 1377 */ 1378 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1379 } 1380 1381 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1382 #include <acpi/processor.h> 1383 1384 static bool no_acpi __read_mostly; 1385 module_param(no_acpi, bool, 0444); 1386 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1387 1388 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1389 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1390 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1391 1392 static struct acpi_processor_power acpi_state_table __initdata; 1393 1394 /** 1395 * intel_idle_cst_usable - Check if the _CST information can be used. 1396 * 1397 * Check if all of the C-states listed by _CST in the max_cstate range are 1398 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1399 */ 1400 static bool __init intel_idle_cst_usable(void) 1401 { 1402 int cstate, limit; 1403 1404 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1405 acpi_state_table.count); 1406 1407 for (cstate = 1; cstate < limit; cstate++) { 1408 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1409 1410 if (cx->entry_method != ACPI_CSTATE_FFH) 1411 return false; 1412 } 1413 1414 return true; 1415 } 1416 1417 static bool __init intel_idle_acpi_cst_extract(void) 1418 { 1419 unsigned int cpu; 1420 1421 if (no_acpi) { 1422 pr_debug("Not allowed to use ACPI _CST\n"); 1423 return false; 1424 } 1425 1426 for_each_possible_cpu(cpu) { 1427 struct acpi_processor *pr = per_cpu(processors, cpu); 1428 1429 if (!pr) 1430 continue; 1431 1432 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1433 continue; 1434 1435 acpi_state_table.count++; 1436 1437 if (!intel_idle_cst_usable()) 1438 continue; 1439 1440 if (!acpi_processor_claim_cst_control()) 1441 break; 1442 1443 return true; 1444 } 1445 1446 acpi_state_table.count = 0; 1447 pr_debug("ACPI _CST not found or not usable\n"); 1448 return false; 1449 } 1450 1451 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1452 { 1453 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1454 1455 /* 1456 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1457 * the interesting states are ACPI_CSTATE_FFH. 1458 */ 1459 for (cstate = 1; cstate < limit; cstate++) { 1460 struct acpi_processor_cx *cx; 1461 struct cpuidle_state *state; 1462 1463 if (intel_idle_max_cstate_reached(cstate - 1)) 1464 break; 1465 1466 cx = &acpi_state_table.states[cstate]; 1467 1468 state = &drv->states[drv->state_count++]; 1469 1470 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1471 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1472 state->exit_latency = cx->latency; 1473 /* 1474 * For C1-type C-states use the same number for both the exit 1475 * latency and target residency, because that is the case for 1476 * C1 in the majority of the static C-states tables above. 1477 * For the other types of C-states, however, set the target 1478 * residency to 3 times the exit latency which should lead to 1479 * a reasonable balance between energy-efficiency and 1480 * performance in the majority of interesting cases. 1481 */ 1482 state->target_residency = cx->latency; 1483 if (cx->type > ACPI_STATE_C1) 1484 state->target_residency *= 3; 1485 1486 state->flags = MWAIT2flg(cx->address); 1487 if (cx->type > ACPI_STATE_C2) 1488 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1489 1490 if (disabled_states_mask & BIT(cstate)) 1491 state->flags |= CPUIDLE_FLAG_OFF; 1492 1493 if (intel_idle_state_needs_timer_stop(state)) 1494 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1495 1496 state->enter = intel_idle; 1497 state->enter_s2idle = intel_idle_s2idle; 1498 } 1499 } 1500 1501 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1502 { 1503 int cstate, limit; 1504 1505 /* 1506 * If there are no _CST C-states, do not disable any C-states by 1507 * default. 1508 */ 1509 if (!acpi_state_table.count) 1510 return false; 1511 1512 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1513 /* 1514 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1515 * the interesting states are ACPI_CSTATE_FFH. 1516 */ 1517 for (cstate = 1; cstate < limit; cstate++) { 1518 if (acpi_state_table.states[cstate].address == mwait_hint) 1519 return false; 1520 } 1521 return true; 1522 } 1523 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1524 #define force_use_acpi (false) 1525 1526 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1527 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1528 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1529 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1530 1531 /** 1532 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1533 * 1534 * Tune IVT multi-socket targets. 1535 * Assumption: num_sockets == (max_package_num + 1). 1536 */ 1537 static void __init ivt_idle_state_table_update(void) 1538 { 1539 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1540 int cpu, package_num, num_sockets = 1; 1541 1542 for_each_online_cpu(cpu) { 1543 package_num = topology_physical_package_id(cpu); 1544 if (package_num + 1 > num_sockets) { 1545 num_sockets = package_num + 1; 1546 1547 if (num_sockets > 4) { 1548 cpuidle_state_table = ivt_cstates_8s; 1549 return; 1550 } 1551 } 1552 } 1553 1554 if (num_sockets > 2) 1555 cpuidle_state_table = ivt_cstates_4s; 1556 1557 /* else, 1 and 2 socket systems use default ivt_cstates */ 1558 } 1559 1560 /** 1561 * irtl_2_usec - IRTL to microseconds conversion. 1562 * @irtl: IRTL MSR value. 1563 * 1564 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1565 */ 1566 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1567 { 1568 static const unsigned int irtl_ns_units[] __initconst = { 1569 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1570 }; 1571 unsigned long long ns; 1572 1573 if (!irtl) 1574 return 0; 1575 1576 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1577 1578 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1579 } 1580 1581 /** 1582 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1583 * 1584 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1585 * definitive maximum latency and use the same value for target_residency. 1586 */ 1587 static void __init bxt_idle_state_table_update(void) 1588 { 1589 unsigned long long msr; 1590 unsigned int usec; 1591 1592 rdmsrl(MSR_PKGC6_IRTL, msr); 1593 usec = irtl_2_usec(msr); 1594 if (usec) { 1595 bxt_cstates[2].exit_latency = usec; 1596 bxt_cstates[2].target_residency = usec; 1597 } 1598 1599 rdmsrl(MSR_PKGC7_IRTL, msr); 1600 usec = irtl_2_usec(msr); 1601 if (usec) { 1602 bxt_cstates[3].exit_latency = usec; 1603 bxt_cstates[3].target_residency = usec; 1604 } 1605 1606 rdmsrl(MSR_PKGC8_IRTL, msr); 1607 usec = irtl_2_usec(msr); 1608 if (usec) { 1609 bxt_cstates[4].exit_latency = usec; 1610 bxt_cstates[4].target_residency = usec; 1611 } 1612 1613 rdmsrl(MSR_PKGC9_IRTL, msr); 1614 usec = irtl_2_usec(msr); 1615 if (usec) { 1616 bxt_cstates[5].exit_latency = usec; 1617 bxt_cstates[5].target_residency = usec; 1618 } 1619 1620 rdmsrl(MSR_PKGC10_IRTL, msr); 1621 usec = irtl_2_usec(msr); 1622 if (usec) { 1623 bxt_cstates[6].exit_latency = usec; 1624 bxt_cstates[6].target_residency = usec; 1625 } 1626 1627 } 1628 1629 /** 1630 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1631 * 1632 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1633 */ 1634 static void __init sklh_idle_state_table_update(void) 1635 { 1636 unsigned long long msr; 1637 unsigned int eax, ebx, ecx, edx; 1638 1639 1640 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1641 if (max_cstate <= 7) 1642 return; 1643 1644 /* if PC10 not present in CPUID.MWAIT.EDX */ 1645 if ((mwait_substates & (0xF << 28)) == 0) 1646 return; 1647 1648 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1649 1650 /* PC10 is not enabled in PKG C-state limit */ 1651 if ((msr & 0xF) != 8) 1652 return; 1653 1654 ecx = 0; 1655 cpuid(7, &eax, &ebx, &ecx, &edx); 1656 1657 /* if SGX is present */ 1658 if (ebx & (1 << 2)) { 1659 1660 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1661 1662 /* if SGX is enabled */ 1663 if (msr & (1 << 18)) 1664 return; 1665 } 1666 1667 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1668 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1669 } 1670 1671 /** 1672 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1673 * idle states table. 1674 */ 1675 static void __init skx_idle_state_table_update(void) 1676 { 1677 unsigned long long msr; 1678 1679 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1680 1681 /* 1682 * 000b: C0/C1 (no package C-state support) 1683 * 001b: C2 1684 * 010b: C6 (non-retention) 1685 * 011b: C6 (retention) 1686 * 111b: No Package C state limits. 1687 */ 1688 if ((msr & 0x7) < 2) { 1689 /* 1690 * Uses the CC6 + PC0 latency and 3 times of 1691 * latency for target_residency if the PC6 1692 * is disabled in BIOS. This is consistent 1693 * with how intel_idle driver uses _CST 1694 * to set the target_residency. 1695 */ 1696 skx_cstates[2].exit_latency = 92; 1697 skx_cstates[2].target_residency = 276; 1698 } 1699 } 1700 1701 /** 1702 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1703 */ 1704 static void __init adl_idle_state_table_update(void) 1705 { 1706 /* Check if user prefers C1 over C1E. */ 1707 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1708 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1709 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1710 1711 /* Disable C1E by clearing the "C1E promotion" bit. */ 1712 c1e_promotion = C1E_PROMOTION_DISABLE; 1713 return; 1714 } 1715 1716 /* Make sure C1E is enabled by default */ 1717 c1e_promotion = C1E_PROMOTION_ENABLE; 1718 } 1719 1720 /** 1721 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1722 */ 1723 static void __init spr_idle_state_table_update(void) 1724 { 1725 unsigned long long msr; 1726 1727 /* Check if user prefers C1E over C1. */ 1728 if ((preferred_states_mask & BIT(2)) && 1729 !(preferred_states_mask & BIT(1))) { 1730 /* Disable C1 and enable C1E. */ 1731 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; 1732 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1733 1734 /* Enable C1E using the "C1E promotion" bit. */ 1735 c1e_promotion = C1E_PROMOTION_ENABLE; 1736 } 1737 1738 /* 1739 * By default, the C6 state assumes the worst-case scenario of package 1740 * C6. However, if PC6 is disabled, we update the numbers to match 1741 * core C6. 1742 */ 1743 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1744 1745 /* Limit value 2 and above allow for PC6. */ 1746 if ((msr & 0x7) < 2) { 1747 spr_cstates[2].exit_latency = 190; 1748 spr_cstates[2].target_residency = 600; 1749 } 1750 } 1751 1752 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1753 { 1754 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1755 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1756 MWAIT_SUBSTATE_MASK; 1757 1758 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1759 if (num_substates == 0) 1760 return false; 1761 1762 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1763 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1764 1765 return true; 1766 } 1767 1768 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1769 { 1770 int cstate; 1771 1772 switch (boot_cpu_data.x86_model) { 1773 case INTEL_FAM6_IVYBRIDGE_X: 1774 ivt_idle_state_table_update(); 1775 break; 1776 case INTEL_FAM6_ATOM_GOLDMONT: 1777 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1778 bxt_idle_state_table_update(); 1779 break; 1780 case INTEL_FAM6_SKYLAKE: 1781 sklh_idle_state_table_update(); 1782 break; 1783 case INTEL_FAM6_SKYLAKE_X: 1784 skx_idle_state_table_update(); 1785 break; 1786 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1787 spr_idle_state_table_update(); 1788 break; 1789 case INTEL_FAM6_ALDERLAKE: 1790 case INTEL_FAM6_ALDERLAKE_L: 1791 adl_idle_state_table_update(); 1792 break; 1793 } 1794 1795 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1796 unsigned int mwait_hint; 1797 1798 if (intel_idle_max_cstate_reached(cstate)) 1799 break; 1800 1801 if (!cpuidle_state_table[cstate].enter && 1802 !cpuidle_state_table[cstate].enter_s2idle) 1803 break; 1804 1805 /* If marked as unusable, skip this state. */ 1806 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1807 pr_debug("state %s is disabled\n", 1808 cpuidle_state_table[cstate].name); 1809 continue; 1810 } 1811 1812 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1813 if (!intel_idle_verify_cstate(mwait_hint)) 1814 continue; 1815 1816 /* Structure copy. */ 1817 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1818 1819 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) 1820 drv->states[drv->state_count].enter = intel_idle_irq; 1821 1822 if ((disabled_states_mask & BIT(drv->state_count)) || 1823 ((icpu->use_acpi || force_use_acpi) && 1824 intel_idle_off_by_default(mwait_hint) && 1825 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1826 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1827 1828 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1829 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1830 1831 drv->state_count++; 1832 } 1833 1834 if (icpu->byt_auto_demotion_disable_flag) { 1835 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1836 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1837 } 1838 } 1839 1840 /** 1841 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1842 * @drv: cpuidle driver structure to initialize. 1843 */ 1844 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1845 { 1846 cpuidle_poll_state_init(drv); 1847 1848 if (disabled_states_mask & BIT(0)) 1849 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1850 1851 drv->state_count = 1; 1852 1853 if (icpu) 1854 intel_idle_init_cstates_icpu(drv); 1855 else 1856 intel_idle_init_cstates_acpi(drv); 1857 } 1858 1859 static void auto_demotion_disable(void) 1860 { 1861 unsigned long long msr_bits; 1862 1863 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1864 msr_bits &= ~auto_demotion_disable_flags; 1865 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1866 } 1867 1868 static void c1e_promotion_enable(void) 1869 { 1870 unsigned long long msr_bits; 1871 1872 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1873 msr_bits |= 0x2; 1874 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1875 } 1876 1877 static void c1e_promotion_disable(void) 1878 { 1879 unsigned long long msr_bits; 1880 1881 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1882 msr_bits &= ~0x2; 1883 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1884 } 1885 1886 /** 1887 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1888 * @cpu: CPU to initialize. 1889 * 1890 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1891 * with the processor model flags. 1892 */ 1893 static int intel_idle_cpu_init(unsigned int cpu) 1894 { 1895 struct cpuidle_device *dev; 1896 1897 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1898 dev->cpu = cpu; 1899 1900 if (cpuidle_register_device(dev)) { 1901 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1902 return -EIO; 1903 } 1904 1905 if (auto_demotion_disable_flags) 1906 auto_demotion_disable(); 1907 1908 if (c1e_promotion == C1E_PROMOTION_ENABLE) 1909 c1e_promotion_enable(); 1910 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 1911 c1e_promotion_disable(); 1912 1913 return 0; 1914 } 1915 1916 static int intel_idle_cpu_online(unsigned int cpu) 1917 { 1918 struct cpuidle_device *dev; 1919 1920 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1921 tick_broadcast_enable(); 1922 1923 /* 1924 * Some systems can hotplug a cpu at runtime after 1925 * the kernel has booted, we have to initialize the 1926 * driver in this case 1927 */ 1928 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1929 if (!dev->registered) 1930 return intel_idle_cpu_init(cpu); 1931 1932 return 0; 1933 } 1934 1935 /** 1936 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1937 */ 1938 static void __init intel_idle_cpuidle_devices_uninit(void) 1939 { 1940 int i; 1941 1942 for_each_online_cpu(i) 1943 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1944 } 1945 1946 static int __init intel_idle_init(void) 1947 { 1948 const struct x86_cpu_id *id; 1949 unsigned int eax, ebx, ecx; 1950 int retval; 1951 1952 /* Do not load intel_idle at all for now if idle= is passed */ 1953 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1954 return -ENODEV; 1955 1956 if (max_cstate == 0) { 1957 pr_debug("disabled\n"); 1958 return -EPERM; 1959 } 1960 1961 id = x86_match_cpu(intel_idle_ids); 1962 if (id) { 1963 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1964 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1965 return -ENODEV; 1966 } 1967 } else { 1968 id = x86_match_cpu(intel_mwait_ids); 1969 if (!id) 1970 return -ENODEV; 1971 } 1972 1973 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1974 return -ENODEV; 1975 1976 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1977 1978 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1979 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1980 !mwait_substates) 1981 return -ENODEV; 1982 1983 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1984 1985 icpu = (const struct idle_cpu *)id->driver_data; 1986 if (icpu) { 1987 cpuidle_state_table = icpu->state_table; 1988 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1989 if (icpu->disable_promotion_to_c1e) 1990 c1e_promotion = C1E_PROMOTION_DISABLE; 1991 if (icpu->use_acpi || force_use_acpi) 1992 intel_idle_acpi_cst_extract(); 1993 } else if (!intel_idle_acpi_cst_extract()) { 1994 return -ENODEV; 1995 } 1996 1997 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1998 boot_cpu_data.x86_model); 1999 2000 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2001 if (!intel_idle_cpuidle_devices) 2002 return -ENOMEM; 2003 2004 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2005 2006 retval = cpuidle_register_driver(&intel_idle_driver); 2007 if (retval) { 2008 struct cpuidle_driver *drv = cpuidle_get_driver(); 2009 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2010 drv ? drv->name : "none"); 2011 goto init_driver_fail; 2012 } 2013 2014 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2015 intel_idle_cpu_online, NULL); 2016 if (retval < 0) 2017 goto hp_setup_fail; 2018 2019 pr_debug("Local APIC timer is reliable in %s\n", 2020 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2021 2022 return 0; 2023 2024 hp_setup_fail: 2025 intel_idle_cpuidle_devices_uninit(); 2026 cpuidle_unregister_driver(&intel_idle_driver); 2027 init_driver_fail: 2028 free_percpu(intel_idle_cpuidle_devices); 2029 return retval; 2030 2031 } 2032 device_initcall(intel_idle_init); 2033 2034 /* 2035 * We are not really modular, but we used to support that. Meaning we also 2036 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2037 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2038 * is the easiest way (currently) to continue doing that. 2039 */ 2040 module_param(max_cstate, int, 0444); 2041 /* 2042 * The positions of the bits that are set in this number are the indices of the 2043 * idle states to be disabled by default (as reflected by the names of the 2044 * corresponding idle state directories in sysfs, "state0", "state1" ... 2045 * "state<i>" ..., where <i> is the index of the given state). 2046 */ 2047 module_param_named(states_off, disabled_states_mask, uint, 0444); 2048 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2049 /* 2050 * Some platforms come with mutually exclusive C-states, so that if one is 2051 * enabled, the other C-states must not be used. Example: C1 and C1E on 2052 * Sapphire Rapids platform. This parameter allows for selecting the 2053 * preferred C-states among the groups of mutually exclusive C-states - the 2054 * selected C-states will be registered, the other C-states from the mutually 2055 * exclusive group won't be registered. If the platform has no mutually 2056 * exclusive C-states, this parameter has no effect. 2057 */ 2058 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2059 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2060