1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <linux/time64.h> 49 #include <trace/events/power.h> 50 #include <linux/sched.h> 51 #include <linux/sched/smt.h> 52 #include <linux/mutex.h> 53 #include <linux/notifier.h> 54 #include <linux/cpu.h> 55 #include <linux/moduleparam.h> 56 #include <linux/sysfs.h> 57 #include <asm/cpuid/api.h> 58 #include <asm/cpu_device_id.h> 59 #include <asm/intel-family.h> 60 #include <asm/mwait.h> 61 #include <asm/spec-ctrl.h> 62 #include <asm/msr.h> 63 #include <asm/tsc.h> 64 #include <asm/fpu/api.h> 65 #include <asm/smp.h> 66 67 static struct cpuidle_driver intel_idle_driver = { 68 .name = "intel_idle", 69 .owner = THIS_MODULE, 70 }; 71 /* intel_idle.max_cstate=0 disables driver */ 72 static int max_cstate = CPUIDLE_STATE_MAX - 1; 73 static unsigned int disabled_states_mask __read_mostly; 74 static bool force_irq_on __read_mostly; 75 static bool ibrs_off __read_mostly; 76 77 /* The maximum allowed length for the 'table' module parameter */ 78 #define MAX_CMDLINE_TABLE_LEN 256 79 /* Maximum allowed C-state latency */ 80 #define MAX_CMDLINE_LATENCY_US (5 * USEC_PER_MSEC) 81 /* Maximum allowed C-state target residency */ 82 #define MAX_CMDLINE_RESIDENCY_US (100 * USEC_PER_MSEC) 83 84 /* The Package C-State Limit bits in MSR_PKG_CST_CONFIG_CONTROL */ 85 #define SKX_PKG_CST_LIMIT_MASK GENMASK(2, 0) 86 /* PC6 is enabled when Package C-State Limit >= this value */ 87 #define SKX_PKG_CST_LIMIT_PC6 2 88 89 static char cmdline_table_str[MAX_CMDLINE_TABLE_LEN] __read_mostly; 90 91 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 92 93 static unsigned long auto_demotion_disable_flags; 94 95 static enum { 96 C1E_PROMOTION_PRESERVE, 97 C1E_PROMOTION_ENABLE, 98 C1E_PROMOTION_DISABLE 99 } c1e_promotion = C1E_PROMOTION_PRESERVE; 100 101 struct idle_cpu { 102 struct cpuidle_state *state_table; 103 104 /* 105 * Hardware C-state auto-demotion may not always be optimal. 106 * Indicate which enable bits to clear here. 107 */ 108 unsigned long auto_demotion_disable_flags; 109 bool disable_promotion_to_c1e; 110 bool c1_demotion_supported; 111 bool use_acpi; 112 }; 113 114 static bool c1_demotion_supported; 115 static DEFINE_MUTEX(c1_demotion_mutex); 116 117 static struct device *sysfs_root __initdata; 118 119 static const struct idle_cpu *icpu __initdata; 120 static struct cpuidle_state *cpuidle_state_table __initdata; 121 122 /* C-states data from the 'intel_idle.table' cmdline parameter */ 123 static struct cpuidle_state cmdline_states[CPUIDLE_STATE_MAX] __initdata; 124 125 static unsigned int mwait_substates __initdata; 126 127 /* 128 * Enable interrupts before entering the C-state. On some platforms and for 129 * some C-states, this may measurably decrease interrupt latency. 130 */ 131 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 132 133 /* 134 * Enable this state by default even if the ACPI _CST does not list it. 135 */ 136 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 137 138 /* 139 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 140 * above. 141 */ 142 #define CPUIDLE_FLAG_IBRS BIT(16) 143 144 /* 145 * Initialize large xstate for the C6-state entrance. 146 */ 147 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) 148 149 /* 150 * Ignore the sub-state when matching mwait hints between the ACPI _CST and 151 * custom tables. 152 */ 153 #define CPUIDLE_FLAG_PARTIAL_HINT_MATCH BIT(18) 154 155 /* 156 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 157 * the C-state (top nibble) and sub-state (bottom nibble) 158 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 159 * 160 * We store the hint at the top of our "flags" for each state. 161 */ 162 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 163 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 164 165 static __always_inline int __intel_idle(struct cpuidle_device *dev, 166 struct cpuidle_driver *drv, 167 int index, bool irqoff) 168 { 169 struct cpuidle_state *state = &drv->states[index]; 170 unsigned int eax = flg2MWAIT(state->flags); 171 unsigned int ecx = 1*irqoff; /* break on interrupt flag */ 172 173 mwait_idle_with_hints(eax, ecx); 174 175 return index; 176 } 177 178 /** 179 * intel_idle - Ask the processor to enter the given idle state. 180 * @dev: cpuidle device of the target CPU. 181 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 182 * @index: Target idle state index. 183 * 184 * Use the MWAIT instruction to notify the processor that the CPU represented by 185 * @dev is idle and it can try to enter the idle state corresponding to @index. 186 * 187 * If the local APIC timer is not known to be reliable in the target idle state, 188 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 189 * 190 * Must be called under local_irq_disable(). 191 */ 192 static __cpuidle int intel_idle(struct cpuidle_device *dev, 193 struct cpuidle_driver *drv, int index) 194 { 195 return __intel_idle(dev, drv, index, true); 196 } 197 198 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 199 struct cpuidle_driver *drv, int index) 200 { 201 return __intel_idle(dev, drv, index, false); 202 } 203 204 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 205 struct cpuidle_driver *drv, int index) 206 { 207 bool smt_active = sched_smt_active(); 208 u64 spec_ctrl = spec_ctrl_current(); 209 int ret; 210 211 if (smt_active) 212 __update_spec_ctrl(0); 213 214 ret = __intel_idle(dev, drv, index, true); 215 216 if (smt_active) 217 __update_spec_ctrl(spec_ctrl); 218 219 return ret; 220 } 221 222 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, 223 struct cpuidle_driver *drv, int index) 224 { 225 fpu_idle_fpregs(); 226 return __intel_idle(dev, drv, index, true); 227 } 228 229 /** 230 * intel_idle_s2idle - Ask the processor to enter the given idle state. 231 * @dev: cpuidle device of the target CPU. 232 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 233 * @index: Target idle state index. 234 * 235 * Use the MWAIT instruction to notify the processor that the CPU represented by 236 * @dev is idle and it can try to enter the idle state corresponding to @index. 237 * 238 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 239 * scheduler tick and suspended scheduler clock on the target CPU. 240 */ 241 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 242 struct cpuidle_driver *drv, int index) 243 { 244 struct cpuidle_state *state = &drv->states[index]; 245 unsigned int eax = flg2MWAIT(state->flags); 246 unsigned int ecx = 1; /* break on interrupt flag */ 247 248 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) 249 fpu_idle_fpregs(); 250 251 mwait_idle_with_hints(eax, ecx); 252 253 return 0; 254 } 255 256 static void intel_idle_enter_dead(struct cpuidle_device *dev, int index) 257 { 258 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 259 struct cpuidle_state *state = &drv->states[index]; 260 unsigned long eax = flg2MWAIT(state->flags); 261 262 mwait_play_dead(eax); 263 } 264 265 /* 266 * States are indexed by the cstate number, 267 * which is also the index into the MWAIT hint array. 268 * Thus C0 is a dummy. 269 */ 270 static struct cpuidle_state nehalem_cstates[] __initdata = { 271 { 272 .name = "C1", 273 .desc = "MWAIT 0x00", 274 .flags = MWAIT2flg(0x00), 275 .exit_latency = 3, 276 .target_residency = 6, 277 .enter = intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C1E", 281 .desc = "MWAIT 0x01", 282 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 283 .exit_latency = 10, 284 .target_residency = 20, 285 .enter = intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C3", 289 .desc = "MWAIT 0x10", 290 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 20, 292 .target_residency = 80, 293 .enter = intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .name = "C6", 297 .desc = "MWAIT 0x20", 298 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 299 .exit_latency = 200, 300 .target_residency = 800, 301 .enter = intel_idle, 302 .enter_s2idle = intel_idle_s2idle, }, 303 { 304 .enter = NULL } 305 }; 306 307 static struct cpuidle_state snb_cstates[] __initdata = { 308 { 309 .name = "C1", 310 .desc = "MWAIT 0x00", 311 .flags = MWAIT2flg(0x00), 312 .exit_latency = 2, 313 .target_residency = 2, 314 .enter = intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C1E", 318 .desc = "MWAIT 0x01", 319 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 320 .exit_latency = 10, 321 .target_residency = 20, 322 .enter = intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C3", 326 .desc = "MWAIT 0x10", 327 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 80, 329 .target_residency = 211, 330 .enter = intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C6", 334 .desc = "MWAIT 0x20", 335 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 104, 337 .target_residency = 345, 338 .enter = intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .name = "C7", 342 .desc = "MWAIT 0x30", 343 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 344 .exit_latency = 109, 345 .target_residency = 345, 346 .enter = intel_idle, 347 .enter_s2idle = intel_idle_s2idle, }, 348 { 349 .enter = NULL } 350 }; 351 352 static struct cpuidle_state byt_cstates[] __initdata = { 353 { 354 .name = "C1", 355 .desc = "MWAIT 0x00", 356 .flags = MWAIT2flg(0x00), 357 .exit_latency = 1, 358 .target_residency = 1, 359 .enter = intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C6N", 363 .desc = "MWAIT 0x58", 364 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 365 .exit_latency = 300, 366 .target_residency = 275, 367 .enter = intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6S", 371 .desc = "MWAIT 0x52", 372 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 500, 374 .target_residency = 560, 375 .enter = intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .name = "C7", 379 .desc = "MWAIT 0x60", 380 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 381 .exit_latency = 1200, 382 .target_residency = 4000, 383 .enter = intel_idle, 384 .enter_s2idle = intel_idle_s2idle, }, 385 { 386 .name = "C7S", 387 .desc = "MWAIT 0x64", 388 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 389 .exit_latency = 10000, 390 .target_residency = 20000, 391 .enter = intel_idle, 392 .enter_s2idle = intel_idle_s2idle, }, 393 { 394 .enter = NULL } 395 }; 396 397 static struct cpuidle_state cht_cstates[] __initdata = { 398 { 399 .name = "C1", 400 .desc = "MWAIT 0x00", 401 .flags = MWAIT2flg(0x00), 402 .exit_latency = 1, 403 .target_residency = 1, 404 .enter = intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C6N", 408 .desc = "MWAIT 0x58", 409 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 410 .exit_latency = 80, 411 .target_residency = 275, 412 .enter = intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .name = "C6S", 416 .desc = "MWAIT 0x52", 417 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 418 .exit_latency = 200, 419 .target_residency = 560, 420 .enter = intel_idle, 421 .enter_s2idle = intel_idle_s2idle, }, 422 { 423 .name = "C7", 424 .desc = "MWAIT 0x60", 425 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 426 .exit_latency = 1200, 427 .target_residency = 4000, 428 .enter = intel_idle, 429 .enter_s2idle = intel_idle_s2idle, }, 430 { 431 .name = "C7S", 432 .desc = "MWAIT 0x64", 433 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 434 .exit_latency = 10000, 435 .target_residency = 20000, 436 .enter = intel_idle, 437 .enter_s2idle = intel_idle_s2idle, }, 438 { 439 .enter = NULL } 440 }; 441 442 static struct cpuidle_state ivb_cstates[] __initdata = { 443 { 444 .name = "C1", 445 .desc = "MWAIT 0x00", 446 .flags = MWAIT2flg(0x00), 447 .exit_latency = 1, 448 .target_residency = 1, 449 .enter = intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .name = "C1E", 453 .desc = "MWAIT 0x01", 454 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 455 .exit_latency = 10, 456 .target_residency = 20, 457 .enter = intel_idle, 458 .enter_s2idle = intel_idle_s2idle, }, 459 { 460 .name = "C3", 461 .desc = "MWAIT 0x10", 462 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 463 .exit_latency = 59, 464 .target_residency = 156, 465 .enter = intel_idle, 466 .enter_s2idle = intel_idle_s2idle, }, 467 { 468 .name = "C6", 469 .desc = "MWAIT 0x20", 470 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 471 .exit_latency = 80, 472 .target_residency = 300, 473 .enter = intel_idle, 474 .enter_s2idle = intel_idle_s2idle, }, 475 { 476 .name = "C7", 477 .desc = "MWAIT 0x30", 478 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 479 .exit_latency = 87, 480 .target_residency = 300, 481 .enter = intel_idle, 482 .enter_s2idle = intel_idle_s2idle, }, 483 { 484 .enter = NULL } 485 }; 486 487 static struct cpuidle_state ivt_cstates[] __initdata = { 488 { 489 .name = "C1", 490 .desc = "MWAIT 0x00", 491 .flags = MWAIT2flg(0x00), 492 .exit_latency = 1, 493 .target_residency = 1, 494 .enter = intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C1E", 498 .desc = "MWAIT 0x01", 499 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 500 .exit_latency = 10, 501 .target_residency = 80, 502 .enter = intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .name = "C3", 506 .desc = "MWAIT 0x10", 507 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 508 .exit_latency = 59, 509 .target_residency = 156, 510 .enter = intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C6", 514 .desc = "MWAIT 0x20", 515 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 516 .exit_latency = 82, 517 .target_residency = 300, 518 .enter = intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .enter = NULL } 522 }; 523 524 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 525 { 526 .name = "C1", 527 .desc = "MWAIT 0x00", 528 .flags = MWAIT2flg(0x00), 529 .exit_latency = 1, 530 .target_residency = 1, 531 .enter = intel_idle, 532 .enter_s2idle = intel_idle_s2idle, }, 533 { 534 .name = "C1E", 535 .desc = "MWAIT 0x01", 536 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 537 .exit_latency = 10, 538 .target_residency = 250, 539 .enter = intel_idle, 540 .enter_s2idle = intel_idle_s2idle, }, 541 { 542 .name = "C3", 543 .desc = "MWAIT 0x10", 544 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 545 .exit_latency = 59, 546 .target_residency = 300, 547 .enter = intel_idle, 548 .enter_s2idle = intel_idle_s2idle, }, 549 { 550 .name = "C6", 551 .desc = "MWAIT 0x20", 552 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 553 .exit_latency = 84, 554 .target_residency = 400, 555 .enter = intel_idle, 556 .enter_s2idle = intel_idle_s2idle, }, 557 { 558 .enter = NULL } 559 }; 560 561 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 562 { 563 .name = "C1", 564 .desc = "MWAIT 0x00", 565 .flags = MWAIT2flg(0x00), 566 .exit_latency = 1, 567 .target_residency = 1, 568 .enter = intel_idle, 569 .enter_s2idle = intel_idle_s2idle, }, 570 { 571 .name = "C1E", 572 .desc = "MWAIT 0x01", 573 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 574 .exit_latency = 10, 575 .target_residency = 500, 576 .enter = intel_idle, 577 .enter_s2idle = intel_idle_s2idle, }, 578 { 579 .name = "C3", 580 .desc = "MWAIT 0x10", 581 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 582 .exit_latency = 59, 583 .target_residency = 600, 584 .enter = intel_idle, 585 .enter_s2idle = intel_idle_s2idle, }, 586 { 587 .name = "C6", 588 .desc = "MWAIT 0x20", 589 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 590 .exit_latency = 88, 591 .target_residency = 700, 592 .enter = intel_idle, 593 .enter_s2idle = intel_idle_s2idle, }, 594 { 595 .enter = NULL } 596 }; 597 598 static struct cpuidle_state hsw_cstates[] __initdata = { 599 { 600 .name = "C1", 601 .desc = "MWAIT 0x00", 602 .flags = MWAIT2flg(0x00), 603 .exit_latency = 2, 604 .target_residency = 2, 605 .enter = intel_idle, 606 .enter_s2idle = intel_idle_s2idle, }, 607 { 608 .name = "C1E", 609 .desc = "MWAIT 0x01", 610 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 611 .exit_latency = 10, 612 .target_residency = 20, 613 .enter = intel_idle, 614 .enter_s2idle = intel_idle_s2idle, }, 615 { 616 .name = "C3", 617 .desc = "MWAIT 0x10", 618 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 619 .exit_latency = 33, 620 .target_residency = 100, 621 .enter = intel_idle, 622 .enter_s2idle = intel_idle_s2idle, }, 623 { 624 .name = "C6", 625 .desc = "MWAIT 0x20", 626 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 627 .exit_latency = 133, 628 .target_residency = 400, 629 .enter = intel_idle, 630 .enter_s2idle = intel_idle_s2idle, }, 631 { 632 .name = "C7s", 633 .desc = "MWAIT 0x32", 634 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 635 .exit_latency = 166, 636 .target_residency = 500, 637 .enter = intel_idle, 638 .enter_s2idle = intel_idle_s2idle, }, 639 { 640 .name = "C8", 641 .desc = "MWAIT 0x40", 642 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 643 .exit_latency = 300, 644 .target_residency = 900, 645 .enter = intel_idle, 646 .enter_s2idle = intel_idle_s2idle, }, 647 { 648 .name = "C9", 649 .desc = "MWAIT 0x50", 650 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 651 .exit_latency = 600, 652 .target_residency = 1800, 653 .enter = intel_idle, 654 .enter_s2idle = intel_idle_s2idle, }, 655 { 656 .name = "C10", 657 .desc = "MWAIT 0x60", 658 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 659 .exit_latency = 2600, 660 .target_residency = 7700, 661 .enter = intel_idle, 662 .enter_s2idle = intel_idle_s2idle, }, 663 { 664 .enter = NULL } 665 }; 666 static struct cpuidle_state bdw_cstates[] __initdata = { 667 { 668 .name = "C1", 669 .desc = "MWAIT 0x00", 670 .flags = MWAIT2flg(0x00), 671 .exit_latency = 2, 672 .target_residency = 2, 673 .enter = intel_idle, 674 .enter_s2idle = intel_idle_s2idle, }, 675 { 676 .name = "C1E", 677 .desc = "MWAIT 0x01", 678 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 679 .exit_latency = 10, 680 .target_residency = 20, 681 .enter = intel_idle, 682 .enter_s2idle = intel_idle_s2idle, }, 683 { 684 .name = "C3", 685 .desc = "MWAIT 0x10", 686 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 687 .exit_latency = 40, 688 .target_residency = 100, 689 .enter = intel_idle, 690 .enter_s2idle = intel_idle_s2idle, }, 691 { 692 .name = "C6", 693 .desc = "MWAIT 0x20", 694 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 695 .exit_latency = 133, 696 .target_residency = 400, 697 .enter = intel_idle, 698 .enter_s2idle = intel_idle_s2idle, }, 699 { 700 .name = "C7s", 701 .desc = "MWAIT 0x32", 702 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 703 .exit_latency = 166, 704 .target_residency = 500, 705 .enter = intel_idle, 706 .enter_s2idle = intel_idle_s2idle, }, 707 { 708 .name = "C8", 709 .desc = "MWAIT 0x40", 710 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 711 .exit_latency = 300, 712 .target_residency = 900, 713 .enter = intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C9", 717 .desc = "MWAIT 0x50", 718 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 719 .exit_latency = 600, 720 .target_residency = 1800, 721 .enter = intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .name = "C10", 725 .desc = "MWAIT 0x60", 726 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 727 .exit_latency = 2600, 728 .target_residency = 7700, 729 .enter = intel_idle, 730 .enter_s2idle = intel_idle_s2idle, }, 731 { 732 .enter = NULL } 733 }; 734 735 static struct cpuidle_state skl_cstates[] __initdata = { 736 { 737 .name = "C1", 738 .desc = "MWAIT 0x00", 739 .flags = MWAIT2flg(0x00), 740 .exit_latency = 2, 741 .target_residency = 2, 742 .enter = intel_idle, 743 .enter_s2idle = intel_idle_s2idle, }, 744 { 745 .name = "C1E", 746 .desc = "MWAIT 0x01", 747 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 748 .exit_latency = 10, 749 .target_residency = 20, 750 .enter = intel_idle, 751 .enter_s2idle = intel_idle_s2idle, }, 752 { 753 .name = "C3", 754 .desc = "MWAIT 0x10", 755 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 756 .exit_latency = 70, 757 .target_residency = 100, 758 .enter = intel_idle, 759 .enter_s2idle = intel_idle_s2idle, }, 760 { 761 .name = "C6", 762 .desc = "MWAIT 0x20", 763 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 764 .exit_latency = 85, 765 .target_residency = 200, 766 .enter = intel_idle, 767 .enter_s2idle = intel_idle_s2idle, }, 768 { 769 .name = "C7s", 770 .desc = "MWAIT 0x33", 771 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 772 .exit_latency = 124, 773 .target_residency = 800, 774 .enter = intel_idle, 775 .enter_s2idle = intel_idle_s2idle, }, 776 { 777 .name = "C8", 778 .desc = "MWAIT 0x40", 779 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 780 .exit_latency = 200, 781 .target_residency = 800, 782 .enter = intel_idle, 783 .enter_s2idle = intel_idle_s2idle, }, 784 { 785 .name = "C9", 786 .desc = "MWAIT 0x50", 787 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 788 .exit_latency = 480, 789 .target_residency = 5000, 790 .enter = intel_idle, 791 .enter_s2idle = intel_idle_s2idle, }, 792 { 793 .name = "C10", 794 .desc = "MWAIT 0x60", 795 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 796 .exit_latency = 890, 797 .target_residency = 5000, 798 .enter = intel_idle, 799 .enter_s2idle = intel_idle_s2idle, }, 800 { 801 .enter = NULL } 802 }; 803 804 static struct cpuidle_state skx_cstates[] __initdata = { 805 { 806 .name = "C1", 807 .desc = "MWAIT 0x00", 808 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 809 .exit_latency = 2, 810 .target_residency = 2, 811 .enter = intel_idle, 812 .enter_s2idle = intel_idle_s2idle, }, 813 { 814 .name = "C1E", 815 .desc = "MWAIT 0x01", 816 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 817 .exit_latency = 10, 818 .target_residency = 20, 819 .enter = intel_idle, 820 .enter_s2idle = intel_idle_s2idle, }, 821 { 822 .name = "C6", 823 .desc = "MWAIT 0x20", 824 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 825 .exit_latency = 133, 826 .target_residency = 600, 827 .enter = intel_idle, 828 .enter_s2idle = intel_idle_s2idle, }, 829 { 830 .enter = NULL } 831 }; 832 833 static struct cpuidle_state icx_cstates[] __initdata = { 834 { 835 .name = "C1", 836 .desc = "MWAIT 0x00", 837 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 838 .exit_latency = 1, 839 .target_residency = 1, 840 .enter = intel_idle, 841 .enter_s2idle = intel_idle_s2idle, }, 842 { 843 .name = "C1E", 844 .desc = "MWAIT 0x01", 845 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 846 .exit_latency = 4, 847 .target_residency = 4, 848 .enter = intel_idle, 849 .enter_s2idle = intel_idle_s2idle, }, 850 { 851 .name = "C6", 852 .desc = "MWAIT 0x20", 853 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 854 .exit_latency = 170, 855 .target_residency = 600, 856 .enter = intel_idle, 857 .enter_s2idle = intel_idle_s2idle, }, 858 { 859 .enter = NULL } 860 }; 861 862 /* 863 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 864 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 865 * But in this case there is effectively no C1, because C1 requests are 866 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 867 * and C1E requests end up with C1, so there is effectively no C1E. 868 * 869 * By default we enable C1E and disable C1 by marking it with 870 * 'CPUIDLE_FLAG_UNUSABLE'. 871 */ 872 static struct cpuidle_state adl_cstates[] __initdata = { 873 { 874 .name = "C1", 875 .desc = "MWAIT 0x00", 876 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 877 .exit_latency = 1, 878 .target_residency = 1, 879 .enter = intel_idle, 880 .enter_s2idle = intel_idle_s2idle, }, 881 { 882 .name = "C1E", 883 .desc = "MWAIT 0x01", 884 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 885 .exit_latency = 2, 886 .target_residency = 4, 887 .enter = intel_idle, 888 .enter_s2idle = intel_idle_s2idle, }, 889 { 890 .name = "C6", 891 .desc = "MWAIT 0x20", 892 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 893 .exit_latency = 220, 894 .target_residency = 600, 895 .enter = intel_idle, 896 .enter_s2idle = intel_idle_s2idle, }, 897 { 898 .name = "C8", 899 .desc = "MWAIT 0x40", 900 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 901 .exit_latency = 280, 902 .target_residency = 800, 903 .enter = intel_idle, 904 .enter_s2idle = intel_idle_s2idle, }, 905 { 906 .name = "C10", 907 .desc = "MWAIT 0x60", 908 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 909 .exit_latency = 680, 910 .target_residency = 2000, 911 .enter = intel_idle, 912 .enter_s2idle = intel_idle_s2idle, }, 913 { 914 .enter = NULL } 915 }; 916 917 static struct cpuidle_state adl_l_cstates[] __initdata = { 918 { 919 .name = "C1", 920 .desc = "MWAIT 0x00", 921 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 922 .exit_latency = 1, 923 .target_residency = 1, 924 .enter = intel_idle, 925 .enter_s2idle = intel_idle_s2idle, }, 926 { 927 .name = "C1E", 928 .desc = "MWAIT 0x01", 929 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 930 .exit_latency = 2, 931 .target_residency = 4, 932 .enter = intel_idle, 933 .enter_s2idle = intel_idle_s2idle, }, 934 { 935 .name = "C6", 936 .desc = "MWAIT 0x20", 937 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 938 .exit_latency = 170, 939 .target_residency = 500, 940 .enter = intel_idle, 941 .enter_s2idle = intel_idle_s2idle, }, 942 { 943 .name = "C8", 944 .desc = "MWAIT 0x40", 945 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 946 .exit_latency = 200, 947 .target_residency = 600, 948 .enter = intel_idle, 949 .enter_s2idle = intel_idle_s2idle, }, 950 { 951 .name = "C10", 952 .desc = "MWAIT 0x60", 953 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 954 .exit_latency = 230, 955 .target_residency = 700, 956 .enter = intel_idle, 957 .enter_s2idle = intel_idle_s2idle, }, 958 { 959 .enter = NULL } 960 }; 961 962 static struct cpuidle_state mtl_l_cstates[] __initdata = { 963 { 964 .name = "C1E", 965 .desc = "MWAIT 0x01", 966 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 967 .exit_latency = 1, 968 .target_residency = 1, 969 .enter = intel_idle, 970 .enter_s2idle = intel_idle_s2idle, }, 971 { 972 .name = "C6", 973 .desc = "MWAIT 0x20", 974 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 975 .exit_latency = 140, 976 .target_residency = 420, 977 .enter = intel_idle, 978 .enter_s2idle = intel_idle_s2idle, }, 979 { 980 .name = "C10", 981 .desc = "MWAIT 0x60", 982 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 983 .exit_latency = 310, 984 .target_residency = 930, 985 .enter = intel_idle, 986 .enter_s2idle = intel_idle_s2idle, }, 987 { 988 .enter = NULL } 989 }; 990 991 static struct cpuidle_state ptl_cstates[] __initdata = { 992 { 993 .name = "C1", 994 .desc = "MWAIT 0x00", 995 .flags = MWAIT2flg(0x00), 996 .exit_latency = 1, 997 .target_residency = 1, 998 .enter = &intel_idle, 999 .enter_s2idle = intel_idle_s2idle, }, 1000 { 1001 .name = "C1E", 1002 .desc = "MWAIT 0x01", 1003 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1004 .exit_latency = 10, 1005 .target_residency = 10, 1006 .enter = &intel_idle, 1007 .enter_s2idle = intel_idle_s2idle, }, 1008 { 1009 .name = "C6S", 1010 .desc = "MWAIT 0x21", 1011 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED, 1012 .exit_latency = 300, 1013 .target_residency = 300, 1014 .enter = &intel_idle, 1015 .enter_s2idle = intel_idle_s2idle, }, 1016 { 1017 .name = "C10", 1018 .desc = "MWAIT 0x60", 1019 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1020 .exit_latency = 370, 1021 .target_residency = 2500, 1022 .enter = &intel_idle, 1023 .enter_s2idle = intel_idle_s2idle, }, 1024 { 1025 .enter = NULL } 1026 }; 1027 1028 static struct cpuidle_state gmt_cstates[] __initdata = { 1029 { 1030 .name = "C1", 1031 .desc = "MWAIT 0x00", 1032 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 1033 .exit_latency = 1, 1034 .target_residency = 1, 1035 .enter = intel_idle, 1036 .enter_s2idle = intel_idle_s2idle, }, 1037 { 1038 .name = "C1E", 1039 .desc = "MWAIT 0x01", 1040 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1041 .exit_latency = 2, 1042 .target_residency = 4, 1043 .enter = intel_idle, 1044 .enter_s2idle = intel_idle_s2idle, }, 1045 { 1046 .name = "C6", 1047 .desc = "MWAIT 0x20", 1048 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1049 .exit_latency = 195, 1050 .target_residency = 585, 1051 .enter = intel_idle, 1052 .enter_s2idle = intel_idle_s2idle, }, 1053 { 1054 .name = "C8", 1055 .desc = "MWAIT 0x40", 1056 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1057 .exit_latency = 260, 1058 .target_residency = 1040, 1059 .enter = intel_idle, 1060 .enter_s2idle = intel_idle_s2idle, }, 1061 { 1062 .name = "C10", 1063 .desc = "MWAIT 0x60", 1064 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1065 .exit_latency = 660, 1066 .target_residency = 1980, 1067 .enter = intel_idle, 1068 .enter_s2idle = intel_idle_s2idle, }, 1069 { 1070 .enter = NULL } 1071 }; 1072 1073 static struct cpuidle_state spr_cstates[] __initdata = { 1074 { 1075 .name = "C1", 1076 .desc = "MWAIT 0x00", 1077 .flags = MWAIT2flg(0x00), 1078 .exit_latency = 1, 1079 .target_residency = 1, 1080 .enter = intel_idle, 1081 .enter_s2idle = intel_idle_s2idle, }, 1082 { 1083 .name = "C1E", 1084 .desc = "MWAIT 0x01", 1085 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1086 .exit_latency = 2, 1087 .target_residency = 4, 1088 .enter = intel_idle, 1089 .enter_s2idle = intel_idle_s2idle, }, 1090 { 1091 .name = "C6", 1092 .desc = "MWAIT 0x20", 1093 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 1094 CPUIDLE_FLAG_INIT_XSTATE, 1095 .exit_latency = 290, 1096 .target_residency = 800, 1097 .enter = intel_idle, 1098 .enter_s2idle = intel_idle_s2idle, }, 1099 { 1100 .enter = NULL } 1101 }; 1102 1103 static struct cpuidle_state gnr_cstates[] __initdata = { 1104 { 1105 .name = "C1", 1106 .desc = "MWAIT 0x00", 1107 .flags = MWAIT2flg(0x00), 1108 .exit_latency = 1, 1109 .target_residency = 1, 1110 .enter = intel_idle, 1111 .enter_s2idle = intel_idle_s2idle, }, 1112 { 1113 .name = "C1E", 1114 .desc = "MWAIT 0x01", 1115 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1116 .exit_latency = 4, 1117 .target_residency = 4, 1118 .enter = intel_idle, 1119 .enter_s2idle = intel_idle_s2idle, }, 1120 { 1121 .name = "C6", 1122 .desc = "MWAIT 0x20", 1123 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 1124 CPUIDLE_FLAG_INIT_XSTATE | 1125 CPUIDLE_FLAG_PARTIAL_HINT_MATCH, 1126 .exit_latency = 170, 1127 .target_residency = 650, 1128 .enter = intel_idle, 1129 .enter_s2idle = intel_idle_s2idle, }, 1130 { 1131 .name = "C6P", 1132 .desc = "MWAIT 0x21", 1133 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED | 1134 CPUIDLE_FLAG_INIT_XSTATE | 1135 CPUIDLE_FLAG_PARTIAL_HINT_MATCH, 1136 .exit_latency = 210, 1137 .target_residency = 1000, 1138 .enter = intel_idle, 1139 .enter_s2idle = intel_idle_s2idle, }, 1140 { 1141 .enter = NULL } 1142 }; 1143 1144 static struct cpuidle_state gnrd_cstates[] __initdata = { 1145 { 1146 .name = "C1", 1147 .desc = "MWAIT 0x00", 1148 .flags = MWAIT2flg(0x00), 1149 .exit_latency = 1, 1150 .target_residency = 1, 1151 .enter = intel_idle, 1152 .enter_s2idle = intel_idle_s2idle, }, 1153 { 1154 .name = "C1E", 1155 .desc = "MWAIT 0x01", 1156 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1157 .exit_latency = 4, 1158 .target_residency = 4, 1159 .enter = intel_idle, 1160 .enter_s2idle = intel_idle_s2idle, }, 1161 { 1162 .name = "C6", 1163 .desc = "MWAIT 0x20", 1164 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 1165 CPUIDLE_FLAG_INIT_XSTATE | 1166 CPUIDLE_FLAG_PARTIAL_HINT_MATCH, 1167 .exit_latency = 220, 1168 .target_residency = 650, 1169 .enter = intel_idle, 1170 .enter_s2idle = intel_idle_s2idle, }, 1171 { 1172 .name = "C6P", 1173 .desc = "MWAIT 0x21", 1174 .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED | 1175 CPUIDLE_FLAG_INIT_XSTATE | 1176 CPUIDLE_FLAG_PARTIAL_HINT_MATCH, 1177 .exit_latency = 240, 1178 .target_residency = 750, 1179 .enter = intel_idle, 1180 .enter_s2idle = intel_idle_s2idle, }, 1181 { 1182 .enter = NULL } 1183 }; 1184 1185 static struct cpuidle_state atom_cstates[] __initdata = { 1186 { 1187 .name = "C1E", 1188 .desc = "MWAIT 0x00", 1189 .flags = MWAIT2flg(0x00), 1190 .exit_latency = 10, 1191 .target_residency = 20, 1192 .enter = intel_idle, 1193 .enter_s2idle = intel_idle_s2idle, }, 1194 { 1195 .name = "C2", 1196 .desc = "MWAIT 0x10", 1197 .flags = MWAIT2flg(0x10), 1198 .exit_latency = 20, 1199 .target_residency = 80, 1200 .enter = intel_idle, 1201 .enter_s2idle = intel_idle_s2idle, }, 1202 { 1203 .name = "C4", 1204 .desc = "MWAIT 0x30", 1205 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1206 .exit_latency = 100, 1207 .target_residency = 400, 1208 .enter = intel_idle, 1209 .enter_s2idle = intel_idle_s2idle, }, 1210 { 1211 .name = "C6", 1212 .desc = "MWAIT 0x52", 1213 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1214 .exit_latency = 140, 1215 .target_residency = 560, 1216 .enter = intel_idle, 1217 .enter_s2idle = intel_idle_s2idle, }, 1218 { 1219 .enter = NULL } 1220 }; 1221 static struct cpuidle_state tangier_cstates[] __initdata = { 1222 { 1223 .name = "C1", 1224 .desc = "MWAIT 0x00", 1225 .flags = MWAIT2flg(0x00), 1226 .exit_latency = 1, 1227 .target_residency = 4, 1228 .enter = intel_idle, 1229 .enter_s2idle = intel_idle_s2idle, }, 1230 { 1231 .name = "C4", 1232 .desc = "MWAIT 0x30", 1233 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1234 .exit_latency = 100, 1235 .target_residency = 400, 1236 .enter = intel_idle, 1237 .enter_s2idle = intel_idle_s2idle, }, 1238 { 1239 .name = "C6", 1240 .desc = "MWAIT 0x52", 1241 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1242 .exit_latency = 140, 1243 .target_residency = 560, 1244 .enter = intel_idle, 1245 .enter_s2idle = intel_idle_s2idle, }, 1246 { 1247 .name = "C7", 1248 .desc = "MWAIT 0x60", 1249 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1250 .exit_latency = 1200, 1251 .target_residency = 4000, 1252 .enter = intel_idle, 1253 .enter_s2idle = intel_idle_s2idle, }, 1254 { 1255 .name = "C9", 1256 .desc = "MWAIT 0x64", 1257 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1258 .exit_latency = 10000, 1259 .target_residency = 20000, 1260 .enter = intel_idle, 1261 .enter_s2idle = intel_idle_s2idle, }, 1262 { 1263 .enter = NULL } 1264 }; 1265 static struct cpuidle_state avn_cstates[] __initdata = { 1266 { 1267 .name = "C1", 1268 .desc = "MWAIT 0x00", 1269 .flags = MWAIT2flg(0x00), 1270 .exit_latency = 2, 1271 .target_residency = 2, 1272 .enter = intel_idle, 1273 .enter_s2idle = intel_idle_s2idle, }, 1274 { 1275 .name = "C6", 1276 .desc = "MWAIT 0x51", 1277 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1278 .exit_latency = 15, 1279 .target_residency = 45, 1280 .enter = intel_idle, 1281 .enter_s2idle = intel_idle_s2idle, }, 1282 { 1283 .enter = NULL } 1284 }; 1285 static struct cpuidle_state knl_cstates[] __initdata = { 1286 { 1287 .name = "C1", 1288 .desc = "MWAIT 0x00", 1289 .flags = MWAIT2flg(0x00), 1290 .exit_latency = 1, 1291 .target_residency = 2, 1292 .enter = intel_idle, 1293 .enter_s2idle = intel_idle_s2idle }, 1294 { 1295 .name = "C6", 1296 .desc = "MWAIT 0x10", 1297 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1298 .exit_latency = 120, 1299 .target_residency = 500, 1300 .enter = intel_idle, 1301 .enter_s2idle = intel_idle_s2idle }, 1302 { 1303 .enter = NULL } 1304 }; 1305 1306 static struct cpuidle_state bxt_cstates[] __initdata = { 1307 { 1308 .name = "C1", 1309 .desc = "MWAIT 0x00", 1310 .flags = MWAIT2flg(0x00), 1311 .exit_latency = 2, 1312 .target_residency = 2, 1313 .enter = intel_idle, 1314 .enter_s2idle = intel_idle_s2idle, }, 1315 { 1316 .name = "C1E", 1317 .desc = "MWAIT 0x01", 1318 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1319 .exit_latency = 10, 1320 .target_residency = 20, 1321 .enter = intel_idle, 1322 .enter_s2idle = intel_idle_s2idle, }, 1323 { 1324 .name = "C6", 1325 .desc = "MWAIT 0x20", 1326 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1327 .exit_latency = 133, 1328 .target_residency = 133, 1329 .enter = intel_idle, 1330 .enter_s2idle = intel_idle_s2idle, }, 1331 { 1332 .name = "C7s", 1333 .desc = "MWAIT 0x31", 1334 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1335 .exit_latency = 155, 1336 .target_residency = 155, 1337 .enter = intel_idle, 1338 .enter_s2idle = intel_idle_s2idle, }, 1339 { 1340 .name = "C8", 1341 .desc = "MWAIT 0x40", 1342 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1343 .exit_latency = 1000, 1344 .target_residency = 1000, 1345 .enter = intel_idle, 1346 .enter_s2idle = intel_idle_s2idle, }, 1347 { 1348 .name = "C9", 1349 .desc = "MWAIT 0x50", 1350 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1351 .exit_latency = 2000, 1352 .target_residency = 2000, 1353 .enter = intel_idle, 1354 .enter_s2idle = intel_idle_s2idle, }, 1355 { 1356 .name = "C10", 1357 .desc = "MWAIT 0x60", 1358 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1359 .exit_latency = 10000, 1360 .target_residency = 10000, 1361 .enter = intel_idle, 1362 .enter_s2idle = intel_idle_s2idle, }, 1363 { 1364 .enter = NULL } 1365 }; 1366 1367 static struct cpuidle_state dnv_cstates[] __initdata = { 1368 { 1369 .name = "C1", 1370 .desc = "MWAIT 0x00", 1371 .flags = MWAIT2flg(0x00), 1372 .exit_latency = 2, 1373 .target_residency = 2, 1374 .enter = intel_idle, 1375 .enter_s2idle = intel_idle_s2idle, }, 1376 { 1377 .name = "C1E", 1378 .desc = "MWAIT 0x01", 1379 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1380 .exit_latency = 10, 1381 .target_residency = 20, 1382 .enter = intel_idle, 1383 .enter_s2idle = intel_idle_s2idle, }, 1384 { 1385 .name = "C6", 1386 .desc = "MWAIT 0x20", 1387 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1388 .exit_latency = 50, 1389 .target_residency = 500, 1390 .enter = intel_idle, 1391 .enter_s2idle = intel_idle_s2idle, }, 1392 { 1393 .enter = NULL } 1394 }; 1395 1396 /* 1397 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1398 * C6, and this is indicated in the CPUID mwait leaf. 1399 */ 1400 static struct cpuidle_state snr_cstates[] __initdata = { 1401 { 1402 .name = "C1", 1403 .desc = "MWAIT 0x00", 1404 .flags = MWAIT2flg(0x00), 1405 .exit_latency = 2, 1406 .target_residency = 2, 1407 .enter = intel_idle, 1408 .enter_s2idle = intel_idle_s2idle, }, 1409 { 1410 .name = "C1E", 1411 .desc = "MWAIT 0x01", 1412 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1413 .exit_latency = 15, 1414 .target_residency = 25, 1415 .enter = intel_idle, 1416 .enter_s2idle = intel_idle_s2idle, }, 1417 { 1418 .name = "C6", 1419 .desc = "MWAIT 0x20", 1420 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1421 .exit_latency = 130, 1422 .target_residency = 500, 1423 .enter = intel_idle, 1424 .enter_s2idle = intel_idle_s2idle, }, 1425 { 1426 .enter = NULL } 1427 }; 1428 1429 static struct cpuidle_state grr_cstates[] __initdata = { 1430 { 1431 .name = "C1", 1432 .desc = "MWAIT 0x00", 1433 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1434 .exit_latency = 1, 1435 .target_residency = 1, 1436 .enter = intel_idle, 1437 .enter_s2idle = intel_idle_s2idle, }, 1438 { 1439 .name = "C1E", 1440 .desc = "MWAIT 0x01", 1441 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1442 .exit_latency = 2, 1443 .target_residency = 10, 1444 .enter = intel_idle, 1445 .enter_s2idle = intel_idle_s2idle, }, 1446 { 1447 .name = "C6S", 1448 .desc = "MWAIT 0x22", 1449 .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, 1450 .exit_latency = 140, 1451 .target_residency = 500, 1452 .enter = intel_idle, 1453 .enter_s2idle = intel_idle_s2idle, }, 1454 { 1455 .enter = NULL } 1456 }; 1457 1458 static struct cpuidle_state srf_cstates[] __initdata = { 1459 { 1460 .name = "C1", 1461 .desc = "MWAIT 0x00", 1462 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1463 .exit_latency = 1, 1464 .target_residency = 1, 1465 .enter = intel_idle, 1466 .enter_s2idle = intel_idle_s2idle, }, 1467 { 1468 .name = "C1E", 1469 .desc = "MWAIT 0x01", 1470 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1471 .exit_latency = 2, 1472 .target_residency = 10, 1473 .enter = intel_idle, 1474 .enter_s2idle = intel_idle_s2idle, }, 1475 { 1476 .name = "C6S", 1477 .desc = "MWAIT 0x22", 1478 .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED | 1479 CPUIDLE_FLAG_PARTIAL_HINT_MATCH, 1480 .exit_latency = 270, 1481 .target_residency = 700, 1482 .enter = intel_idle, 1483 .enter_s2idle = intel_idle_s2idle, }, 1484 { 1485 .name = "C6SP", 1486 .desc = "MWAIT 0x23", 1487 .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED | 1488 CPUIDLE_FLAG_PARTIAL_HINT_MATCH, 1489 .exit_latency = 310, 1490 .target_residency = 900, 1491 .enter = intel_idle, 1492 .enter_s2idle = intel_idle_s2idle, }, 1493 { 1494 .enter = NULL } 1495 }; 1496 1497 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1498 .state_table = nehalem_cstates, 1499 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1500 .disable_promotion_to_c1e = true, 1501 }; 1502 1503 static const struct idle_cpu idle_cpu_nhx __initconst = { 1504 .state_table = nehalem_cstates, 1505 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1506 .disable_promotion_to_c1e = true, 1507 .use_acpi = true, 1508 }; 1509 1510 static const struct idle_cpu idle_cpu_atom __initconst = { 1511 .state_table = atom_cstates, 1512 }; 1513 1514 static const struct idle_cpu idle_cpu_tangier __initconst = { 1515 .state_table = tangier_cstates, 1516 }; 1517 1518 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1519 .state_table = atom_cstates, 1520 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1521 }; 1522 1523 static const struct idle_cpu idle_cpu_snb __initconst = { 1524 .state_table = snb_cstates, 1525 .disable_promotion_to_c1e = true, 1526 }; 1527 1528 static const struct idle_cpu idle_cpu_snx __initconst = { 1529 .state_table = snb_cstates, 1530 .disable_promotion_to_c1e = true, 1531 .use_acpi = true, 1532 }; 1533 1534 static const struct idle_cpu idle_cpu_byt __initconst = { 1535 .state_table = byt_cstates, 1536 .disable_promotion_to_c1e = true, 1537 }; 1538 1539 static const struct idle_cpu idle_cpu_cht __initconst = { 1540 .state_table = cht_cstates, 1541 .disable_promotion_to_c1e = true, 1542 }; 1543 1544 static const struct idle_cpu idle_cpu_ivb __initconst = { 1545 .state_table = ivb_cstates, 1546 .disable_promotion_to_c1e = true, 1547 }; 1548 1549 static const struct idle_cpu idle_cpu_ivt __initconst = { 1550 .state_table = ivt_cstates, 1551 .disable_promotion_to_c1e = true, 1552 .use_acpi = true, 1553 }; 1554 1555 static const struct idle_cpu idle_cpu_hsw __initconst = { 1556 .state_table = hsw_cstates, 1557 .disable_promotion_to_c1e = true, 1558 }; 1559 1560 static const struct idle_cpu idle_cpu_hsx __initconst = { 1561 .state_table = hsw_cstates, 1562 .disable_promotion_to_c1e = true, 1563 .use_acpi = true, 1564 }; 1565 1566 static const struct idle_cpu idle_cpu_bdw __initconst = { 1567 .state_table = bdw_cstates, 1568 .disable_promotion_to_c1e = true, 1569 }; 1570 1571 static const struct idle_cpu idle_cpu_bdx __initconst = { 1572 .state_table = bdw_cstates, 1573 .disable_promotion_to_c1e = true, 1574 .use_acpi = true, 1575 }; 1576 1577 static const struct idle_cpu idle_cpu_skl __initconst = { 1578 .state_table = skl_cstates, 1579 .disable_promotion_to_c1e = true, 1580 }; 1581 1582 static const struct idle_cpu idle_cpu_skx __initconst = { 1583 .state_table = skx_cstates, 1584 .disable_promotion_to_c1e = true, 1585 .use_acpi = true, 1586 }; 1587 1588 static const struct idle_cpu idle_cpu_icx __initconst = { 1589 .state_table = icx_cstates, 1590 .disable_promotion_to_c1e = true, 1591 .use_acpi = true, 1592 }; 1593 1594 static const struct idle_cpu idle_cpu_adl __initconst = { 1595 .state_table = adl_cstates, 1596 }; 1597 1598 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1599 .state_table = adl_l_cstates, 1600 }; 1601 1602 static const struct idle_cpu idle_cpu_mtl_l __initconst = { 1603 .state_table = mtl_l_cstates, 1604 }; 1605 1606 static const struct idle_cpu idle_cpu_ptl __initconst = { 1607 .state_table = ptl_cstates, 1608 }; 1609 1610 static const struct idle_cpu idle_cpu_gmt __initconst = { 1611 .state_table = gmt_cstates, 1612 }; 1613 1614 static const struct idle_cpu idle_cpu_spr __initconst = { 1615 .state_table = spr_cstates, 1616 .disable_promotion_to_c1e = true, 1617 .c1_demotion_supported = true, 1618 .use_acpi = true, 1619 }; 1620 1621 static const struct idle_cpu idle_cpu_gnr __initconst = { 1622 .state_table = gnr_cstates, 1623 .disable_promotion_to_c1e = true, 1624 .c1_demotion_supported = true, 1625 .use_acpi = true, 1626 }; 1627 1628 static const struct idle_cpu idle_cpu_gnrd __initconst = { 1629 .state_table = gnrd_cstates, 1630 .disable_promotion_to_c1e = true, 1631 .c1_demotion_supported = true, 1632 .use_acpi = true, 1633 }; 1634 1635 static const struct idle_cpu idle_cpu_avn __initconst = { 1636 .state_table = avn_cstates, 1637 .disable_promotion_to_c1e = true, 1638 .use_acpi = true, 1639 }; 1640 1641 static const struct idle_cpu idle_cpu_knl __initconst = { 1642 .state_table = knl_cstates, 1643 .use_acpi = true, 1644 }; 1645 1646 static const struct idle_cpu idle_cpu_bxt __initconst = { 1647 .state_table = bxt_cstates, 1648 .disable_promotion_to_c1e = true, 1649 }; 1650 1651 static const struct idle_cpu idle_cpu_dnv __initconst = { 1652 .state_table = dnv_cstates, 1653 .disable_promotion_to_c1e = true, 1654 .use_acpi = true, 1655 }; 1656 1657 static const struct idle_cpu idle_cpu_tmt __initconst = { 1658 .disable_promotion_to_c1e = true, 1659 }; 1660 1661 static const struct idle_cpu idle_cpu_snr __initconst = { 1662 .state_table = snr_cstates, 1663 .disable_promotion_to_c1e = true, 1664 .use_acpi = true, 1665 }; 1666 1667 static const struct idle_cpu idle_cpu_grr __initconst = { 1668 .state_table = grr_cstates, 1669 .disable_promotion_to_c1e = true, 1670 .c1_demotion_supported = true, 1671 .use_acpi = true, 1672 }; 1673 1674 static const struct idle_cpu idle_cpu_srf __initconst = { 1675 .state_table = srf_cstates, 1676 .disable_promotion_to_c1e = true, 1677 .c1_demotion_supported = true, 1678 .use_acpi = true, 1679 }; 1680 1681 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1682 X86_MATCH_VFM(INTEL_NEHALEM_EP, &idle_cpu_nhx), 1683 X86_MATCH_VFM(INTEL_NEHALEM, &idle_cpu_nehalem), 1684 X86_MATCH_VFM(INTEL_NEHALEM_G, &idle_cpu_nehalem), 1685 X86_MATCH_VFM(INTEL_WESTMERE, &idle_cpu_nehalem), 1686 X86_MATCH_VFM(INTEL_WESTMERE_EP, &idle_cpu_nhx), 1687 X86_MATCH_VFM(INTEL_NEHALEM_EX, &idle_cpu_nhx), 1688 X86_MATCH_VFM(INTEL_ATOM_BONNELL, &idle_cpu_atom), 1689 X86_MATCH_VFM(INTEL_ATOM_BONNELL_MID, &idle_cpu_lincroft), 1690 X86_MATCH_VFM(INTEL_WESTMERE_EX, &idle_cpu_nhx), 1691 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &idle_cpu_snb), 1692 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &idle_cpu_snx), 1693 X86_MATCH_VFM(INTEL_ATOM_SALTWELL, &idle_cpu_atom), 1694 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &idle_cpu_byt), 1695 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1696 X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &idle_cpu_cht), 1697 X86_MATCH_VFM(INTEL_IVYBRIDGE, &idle_cpu_ivb), 1698 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &idle_cpu_ivt), 1699 X86_MATCH_VFM(INTEL_HASWELL, &idle_cpu_hsw), 1700 X86_MATCH_VFM(INTEL_HASWELL_X, &idle_cpu_hsx), 1701 X86_MATCH_VFM(INTEL_HASWELL_L, &idle_cpu_hsw), 1702 X86_MATCH_VFM(INTEL_HASWELL_G, &idle_cpu_hsw), 1703 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D, &idle_cpu_avn), 1704 X86_MATCH_VFM(INTEL_BROADWELL, &idle_cpu_bdw), 1705 X86_MATCH_VFM(INTEL_BROADWELL_G, &idle_cpu_bdw), 1706 X86_MATCH_VFM(INTEL_BROADWELL_X, &idle_cpu_bdx), 1707 X86_MATCH_VFM(INTEL_BROADWELL_D, &idle_cpu_bdx), 1708 X86_MATCH_VFM(INTEL_SKYLAKE_L, &idle_cpu_skl), 1709 X86_MATCH_VFM(INTEL_SKYLAKE, &idle_cpu_skl), 1710 X86_MATCH_VFM(INTEL_KABYLAKE_L, &idle_cpu_skl), 1711 X86_MATCH_VFM(INTEL_KABYLAKE, &idle_cpu_skl), 1712 X86_MATCH_VFM(INTEL_SKYLAKE_X, &idle_cpu_skx), 1713 X86_MATCH_VFM(INTEL_ICELAKE_X, &idle_cpu_icx), 1714 X86_MATCH_VFM(INTEL_ICELAKE_D, &idle_cpu_icx), 1715 X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl), 1716 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l), 1717 X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l), 1718 X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl), 1719 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt), 1720 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr), 1721 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr), 1722 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &idle_cpu_gnr), 1723 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &idle_cpu_gnrd), 1724 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &idle_cpu_knl), 1725 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &idle_cpu_knl), 1726 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &idle_cpu_bxt), 1727 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1728 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &idle_cpu_dnv), 1729 X86_MATCH_VFM(INTEL_ATOM_TREMONT, &idle_cpu_tmt), 1730 X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &idle_cpu_tmt), 1731 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr), 1732 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr), 1733 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf), 1734 X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &idle_cpu_srf), 1735 {} 1736 }; 1737 1738 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1739 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL), 1740 {} 1741 }; 1742 1743 static bool __init intel_idle_max_cstate_reached(int cstate) 1744 { 1745 if (cstate + 1 > max_cstate) { 1746 pr_info("max_cstate %d reached\n", max_cstate); 1747 return true; 1748 } 1749 return false; 1750 } 1751 1752 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1753 { 1754 unsigned long eax = flg2MWAIT(state->flags); 1755 1756 if (boot_cpu_has(X86_FEATURE_ARAT)) 1757 return false; 1758 1759 /* 1760 * Switch over to one-shot tick broadcast if the target C-state 1761 * is deeper than C1. 1762 */ 1763 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1764 } 1765 1766 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1767 #include <acpi/processor.h> 1768 1769 static bool no_acpi __read_mostly; 1770 module_param(no_acpi, bool, 0444); 1771 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1772 1773 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1774 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1775 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1776 1777 static bool no_native __read_mostly; /* No effect if no_acpi is set. */ 1778 module_param_named(no_native, no_native, bool, 0444); 1779 MODULE_PARM_DESC(no_native, "Ignore cpu specific (native) idle states in lieu of ACPI idle states"); 1780 1781 static struct acpi_processor_power acpi_state_table __initdata; 1782 1783 /** 1784 * intel_idle_cst_usable - Check if the _CST information can be used. 1785 * 1786 * Check if all of the C-states listed by _CST in the max_cstate range are 1787 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1788 */ 1789 static bool __init intel_idle_cst_usable(void) 1790 { 1791 int cstate, limit; 1792 1793 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1794 acpi_state_table.count); 1795 1796 for (cstate = 1; cstate < limit; cstate++) { 1797 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1798 1799 if (cx->entry_method != ACPI_CSTATE_FFH) 1800 return false; 1801 } 1802 1803 return true; 1804 } 1805 1806 static bool __init intel_idle_acpi_cst_extract(void) 1807 { 1808 unsigned int cpu; 1809 1810 if (no_acpi) { 1811 pr_debug("Not allowed to use ACPI _CST\n"); 1812 return false; 1813 } 1814 1815 for_each_possible_cpu(cpu) { 1816 struct acpi_processor *pr = per_cpu(processors, cpu); 1817 1818 if (!pr) 1819 continue; 1820 1821 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1822 continue; 1823 1824 acpi_state_table.count++; 1825 1826 if (!intel_idle_cst_usable()) 1827 continue; 1828 1829 if (!acpi_processor_claim_cst_control()) 1830 break; 1831 1832 return true; 1833 } 1834 1835 acpi_state_table.count = 0; 1836 pr_debug("ACPI _CST not found or not usable\n"); 1837 return false; 1838 } 1839 1840 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1841 { 1842 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1843 1844 /* 1845 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1846 * the interesting states are ACPI_CSTATE_FFH. 1847 */ 1848 for (cstate = 1; cstate < limit; cstate++) { 1849 struct acpi_processor_cx *cx; 1850 struct cpuidle_state *state; 1851 1852 if (intel_idle_max_cstate_reached(cstate - 1)) 1853 break; 1854 1855 cx = &acpi_state_table.states[cstate]; 1856 1857 state = &drv->states[drv->state_count++]; 1858 1859 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1860 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1861 state->exit_latency = cx->latency; 1862 /* 1863 * For C1-type C-states use the same number for both the exit 1864 * latency and target residency, because that is the case for 1865 * C1 in the majority of the static C-states tables above. 1866 * For the other types of C-states, however, set the target 1867 * residency to 3 times the exit latency which should lead to 1868 * a reasonable balance between energy-efficiency and 1869 * performance in the majority of interesting cases. 1870 */ 1871 state->target_residency = cx->latency; 1872 if (cx->type > ACPI_STATE_C1) 1873 state->target_residency *= 3; 1874 1875 state->flags = MWAIT2flg(cx->address); 1876 if (cx->type > ACPI_STATE_C2) 1877 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1878 1879 if (disabled_states_mask & BIT(cstate)) 1880 state->flags |= CPUIDLE_FLAG_OFF; 1881 1882 if (intel_idle_state_needs_timer_stop(state)) 1883 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1884 1885 if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1886 mark_tsc_unstable("TSC halts in idle"); 1887 1888 state->enter = intel_idle; 1889 state->enter_dead = intel_idle_enter_dead; 1890 state->enter_s2idle = intel_idle_s2idle; 1891 } 1892 } 1893 1894 static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint) 1895 { 1896 int cstate, limit; 1897 1898 /* 1899 * If there are no _CST C-states, do not disable any C-states by 1900 * default. 1901 */ 1902 if (!acpi_state_table.count) 1903 return false; 1904 1905 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1906 /* 1907 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1908 * the interesting states are ACPI_CSTATE_FFH. 1909 */ 1910 for (cstate = 1; cstate < limit; cstate++) { 1911 u32 acpi_hint = acpi_state_table.states[cstate].address; 1912 u32 table_hint = mwait_hint; 1913 1914 if (flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH) { 1915 acpi_hint &= ~MWAIT_SUBSTATE_MASK; 1916 table_hint &= ~MWAIT_SUBSTATE_MASK; 1917 } 1918 1919 if (acpi_hint == table_hint) 1920 return false; 1921 } 1922 return true; 1923 } 1924 1925 static inline bool ignore_native(void) 1926 { 1927 return no_native && !no_acpi; 1928 } 1929 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1930 #define force_use_acpi (false) 1931 1932 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1933 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1934 static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint) 1935 { 1936 return false; 1937 } 1938 static inline bool ignore_native(void) { return false; } 1939 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1940 1941 /** 1942 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1943 * 1944 * Tune IVT multi-socket targets. 1945 * Assumption: num_sockets == (max_package_num + 1). 1946 */ 1947 static void __init ivt_idle_state_table_update(void) 1948 { 1949 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1950 int cpu, package_num, num_sockets = 1; 1951 1952 for_each_online_cpu(cpu) { 1953 package_num = topology_physical_package_id(cpu); 1954 if (package_num + 1 > num_sockets) { 1955 num_sockets = package_num + 1; 1956 1957 if (num_sockets > 4) { 1958 cpuidle_state_table = ivt_cstates_8s; 1959 return; 1960 } 1961 } 1962 } 1963 1964 if (num_sockets > 2) 1965 cpuidle_state_table = ivt_cstates_4s; 1966 1967 /* else, 1 and 2 socket systems use default ivt_cstates */ 1968 } 1969 1970 /** 1971 * irtl_2_usec - IRTL to microseconds conversion. 1972 * @irtl: IRTL MSR value. 1973 * 1974 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1975 */ 1976 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1977 { 1978 static const unsigned int irtl_ns_units[] __initconst = { 1979 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1980 }; 1981 unsigned long long ns; 1982 1983 if (!irtl) 1984 return 0; 1985 1986 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1987 1988 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1989 } 1990 1991 /** 1992 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1993 * 1994 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1995 * definitive maximum latency and use the same value for target_residency. 1996 */ 1997 static void __init bxt_idle_state_table_update(void) 1998 { 1999 unsigned long long msr; 2000 unsigned int usec; 2001 2002 rdmsrq(MSR_PKGC6_IRTL, msr); 2003 usec = irtl_2_usec(msr); 2004 if (usec) { 2005 bxt_cstates[2].exit_latency = usec; 2006 bxt_cstates[2].target_residency = usec; 2007 } 2008 2009 rdmsrq(MSR_PKGC7_IRTL, msr); 2010 usec = irtl_2_usec(msr); 2011 if (usec) { 2012 bxt_cstates[3].exit_latency = usec; 2013 bxt_cstates[3].target_residency = usec; 2014 } 2015 2016 rdmsrq(MSR_PKGC8_IRTL, msr); 2017 usec = irtl_2_usec(msr); 2018 if (usec) { 2019 bxt_cstates[4].exit_latency = usec; 2020 bxt_cstates[4].target_residency = usec; 2021 } 2022 2023 rdmsrq(MSR_PKGC9_IRTL, msr); 2024 usec = irtl_2_usec(msr); 2025 if (usec) { 2026 bxt_cstates[5].exit_latency = usec; 2027 bxt_cstates[5].target_residency = usec; 2028 } 2029 2030 rdmsrq(MSR_PKGC10_IRTL, msr); 2031 usec = irtl_2_usec(msr); 2032 if (usec) { 2033 bxt_cstates[6].exit_latency = usec; 2034 bxt_cstates[6].target_residency = usec; 2035 } 2036 2037 } 2038 2039 /** 2040 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 2041 * 2042 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 2043 */ 2044 static void __init sklh_idle_state_table_update(void) 2045 { 2046 unsigned long long msr; 2047 unsigned int eax, ebx, ecx, edx; 2048 2049 2050 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 2051 if (max_cstate <= 7) 2052 return; 2053 2054 /* if PC10 not present in CPUID.MWAIT.EDX */ 2055 if ((mwait_substates & (0xF << 28)) == 0) 2056 return; 2057 2058 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr); 2059 2060 /* PC10 is not enabled in PKG C-state limit */ 2061 if ((msr & 0xF) != 8) 2062 return; 2063 2064 ecx = 0; 2065 cpuid(7, &eax, &ebx, &ecx, &edx); 2066 2067 /* if SGX is present */ 2068 if (ebx & (1 << 2)) { 2069 2070 rdmsrq(MSR_IA32_FEAT_CTL, msr); 2071 2072 /* if SGX is enabled */ 2073 if (msr & (1 << 18)) 2074 return; 2075 } 2076 2077 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 2078 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 2079 } 2080 2081 /** 2082 * skx_is_pc6_disabled() - Check if PC6 is disabled in BIOS. 2083 * 2084 * Return: %true if PC6 is disabled, %false otherwise. 2085 */ 2086 static bool __init skx_is_pc6_disabled(void) 2087 { 2088 u64 msr; 2089 2090 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr); 2091 2092 /* 2093 * 000b: C0/C1 (no package C-state support) 2094 * 001b: C2 2095 * 010b: C6 (non-retention) 2096 * 011b: C6 (retention) 2097 * 111b: No Package C state limits. 2098 */ 2099 return (msr & SKX_PKG_CST_LIMIT_MASK) < SKX_PKG_CST_LIMIT_PC6; 2100 } 2101 2102 /** 2103 * skx_idle_state_table_update - Adjust the SKX/CLX idle states table. 2104 * 2105 * Adjust Sky Lake or Cascade Lake Xeon idle states if PC6 is disabled in BIOS. 2106 * Use the CC6 + PC0 latency and 3 times of that latency for target_residency. 2107 * This is consistent with how the intel_idle driver uses _CST to set the 2108 * target_residency. 2109 */ 2110 static void __init skx_idle_state_table_update(void) 2111 { 2112 if (skx_is_pc6_disabled()) { 2113 skx_cstates[2].exit_latency = 92; 2114 skx_cstates[2].target_residency = 276; 2115 } 2116 } 2117 2118 /** 2119 * spr_idle_state_table_update - Adjust Sapphire Rapids Xeon idle states table. 2120 * 2121 * By default, the C6 state assumes the worst-case scenario of package C6. 2122 * However, if PC6 is disabled in BIOS, update the numbers to match core C6. 2123 */ 2124 static void __init spr_idle_state_table_update(void) 2125 { 2126 if (skx_is_pc6_disabled()) { 2127 spr_cstates[2].exit_latency = 190; 2128 spr_cstates[2].target_residency = 600; 2129 } 2130 } 2131 2132 /** 2133 * drop_pc6_redundant_cstates() - Drop C-states redundant when PC6 is disabled. 2134 * @states: Idle states table to modify. 2135 * 2136 * When PC6 is disabled in BIOS, C-states that exist solely to enable PC6 2137 * entry (such as C6P or C6SP) become identical to shallower C-states like 2138 * C6, and are therefore redundant. Should be called only on systems with 2139 * multiple C6 flavors. 2140 */ 2141 static void __init drop_pc6_redundant_cstates(struct cpuidle_state *states) 2142 { 2143 int count; 2144 2145 if (!skx_is_pc6_disabled()) 2146 /* PC6 is not disabled, nothing to do */ 2147 return; 2148 2149 for (count = 0; states[count].enter; count++) 2150 continue; 2151 2152 if (count < 2) { 2153 pr_debug("Too few idle states to drop PC6-redundant states\n"); 2154 return; 2155 } 2156 2157 /* 2158 * Sanity check: At this point all platforms with multiple C6 flavors 2159 * use the CPUIDLE_FLAG_PARTIAL_HINT_MATCH flag. And the last state in 2160 * the table is the one that becomes redundant when PC6 is disabled. 2161 */ 2162 if (!(states[count - 1].flags & CPUIDLE_FLAG_PARTIAL_HINT_MATCH)) { 2163 pr_debug("Can't drop PC6-redundant states: unexpected flags\n"); 2164 return; 2165 } 2166 2167 /* 2168 * On all current platforms with multiple C6 flavors, there is only one 2169 * C-state that becomes redundant when PC6 is disabled. This state is 2170 * the last one in the table. Drop it by marking it with 2171 * CPUIDLE_FLAG_UNUSABLE so that cpuidle excludes it when registering 2172 * idle states. 2173 */ 2174 pr_info("Dropping idle state %s because PC6 is disabled\n", 2175 states[count - 1].name); 2176 states[count - 1].flags |= CPUIDLE_FLAG_UNUSABLE; 2177 } 2178 2179 /** 2180 * byt_cht_auto_demotion_disable - Disable Bay/Cherry Trail auto-demotion. 2181 */ 2182 static void __init byt_cht_auto_demotion_disable(void) 2183 { 2184 wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 2185 wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 2186 } 2187 2188 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 2189 { 2190 unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) & 2191 MWAIT_CSTATE_MASK; 2192 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 2193 MWAIT_SUBSTATE_MASK; 2194 2195 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 2196 if (num_substates == 0) 2197 return false; 2198 2199 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 2200 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 2201 2202 return true; 2203 } 2204 2205 static void state_update_enter_method(struct cpuidle_state *state, int cstate) 2206 { 2207 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { 2208 /* 2209 * Combining with XSTATE with IBRS or IRQ_ENABLE flags 2210 * is not currently supported but this driver. 2211 */ 2212 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); 2213 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 2214 state->enter = intel_idle_xstate; 2215 return; 2216 } 2217 2218 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 2219 ((state->flags & CPUIDLE_FLAG_IBRS) || ibrs_off)) { 2220 /* 2221 * IBRS mitigation requires that C-states are entered 2222 * with interrupts disabled. 2223 */ 2224 if (ibrs_off && (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)) 2225 state->flags &= ~CPUIDLE_FLAG_IRQ_ENABLE; 2226 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 2227 state->enter = intel_idle_ibrs; 2228 return; 2229 } 2230 2231 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { 2232 state->enter = intel_idle_irq; 2233 return; 2234 } 2235 2236 if (force_irq_on) { 2237 pr_info("forced intel_idle_irq for state %d\n", cstate); 2238 state->enter = intel_idle_irq; 2239 } 2240 } 2241 2242 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 2243 { 2244 int cstate; 2245 2246 switch (boot_cpu_data.x86_vfm) { 2247 case INTEL_IVYBRIDGE_X: 2248 ivt_idle_state_table_update(); 2249 break; 2250 case INTEL_ATOM_GOLDMONT: 2251 case INTEL_ATOM_GOLDMONT_PLUS: 2252 bxt_idle_state_table_update(); 2253 break; 2254 case INTEL_SKYLAKE: 2255 sklh_idle_state_table_update(); 2256 break; 2257 case INTEL_SKYLAKE_X: 2258 skx_idle_state_table_update(); 2259 break; 2260 case INTEL_SAPPHIRERAPIDS_X: 2261 case INTEL_EMERALDRAPIDS_X: 2262 spr_idle_state_table_update(); 2263 break; 2264 case INTEL_ATOM_SILVERMONT: 2265 case INTEL_ATOM_AIRMONT: 2266 byt_cht_auto_demotion_disable(); 2267 break; 2268 case INTEL_GRANITERAPIDS_D: 2269 case INTEL_GRANITERAPIDS_X: 2270 case INTEL_ATOM_CRESTMONT_X: 2271 case INTEL_ATOM_DARKMONT_X: 2272 drop_pc6_redundant_cstates(cpuidle_state_table); 2273 break; 2274 } 2275 2276 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 2277 struct cpuidle_state *state; 2278 unsigned int mwait_hint; 2279 2280 if (intel_idle_max_cstate_reached(cstate)) 2281 break; 2282 2283 if (!cpuidle_state_table[cstate].enter && 2284 !cpuidle_state_table[cstate].enter_s2idle) 2285 break; 2286 2287 if (!cpuidle_state_table[cstate].enter_dead) 2288 cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead; 2289 2290 /* If marked as unusable, skip this state. */ 2291 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 2292 pr_debug("state %s is disabled\n", 2293 cpuidle_state_table[cstate].name); 2294 continue; 2295 } 2296 2297 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 2298 if (!intel_idle_verify_cstate(mwait_hint)) 2299 continue; 2300 2301 /* Structure copy. */ 2302 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 2303 state = &drv->states[drv->state_count]; 2304 2305 state_update_enter_method(state, cstate); 2306 2307 2308 if ((disabled_states_mask & BIT(drv->state_count)) || 2309 ((icpu->use_acpi || force_use_acpi) && 2310 intel_idle_off_by_default(state->flags, mwait_hint) && 2311 !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 2312 state->flags |= CPUIDLE_FLAG_OFF; 2313 2314 if (intel_idle_state_needs_timer_stop(state)) 2315 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 2316 2317 drv->state_count++; 2318 } 2319 } 2320 2321 /** 2322 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 2323 * @drv: cpuidle driver structure to initialize. 2324 */ 2325 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 2326 { 2327 cpuidle_poll_state_init(drv); 2328 2329 if (disabled_states_mask & BIT(0)) 2330 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 2331 2332 drv->state_count = 1; 2333 2334 if (icpu && icpu->state_table) 2335 intel_idle_init_cstates_icpu(drv); 2336 else 2337 intel_idle_init_cstates_acpi(drv); 2338 } 2339 2340 static void auto_demotion_disable(void) 2341 { 2342 unsigned long long msr_bits; 2343 2344 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 2345 msr_bits &= ~auto_demotion_disable_flags; 2346 wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 2347 } 2348 2349 static void c1e_promotion_enable(void) 2350 { 2351 unsigned long long msr_bits; 2352 2353 rdmsrq(MSR_IA32_POWER_CTL, msr_bits); 2354 msr_bits |= 0x2; 2355 wrmsrq(MSR_IA32_POWER_CTL, msr_bits); 2356 } 2357 2358 static void c1e_promotion_disable(void) 2359 { 2360 unsigned long long msr_bits; 2361 2362 rdmsrq(MSR_IA32_POWER_CTL, msr_bits); 2363 msr_bits &= ~0x2; 2364 wrmsrq(MSR_IA32_POWER_CTL, msr_bits); 2365 } 2366 2367 /** 2368 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 2369 * @cpu: CPU to initialize. 2370 * 2371 * Register a cpuidle device object for @cpu and update its MSRs in accordance 2372 * with the processor model flags. 2373 */ 2374 static int intel_idle_cpu_init(unsigned int cpu) 2375 { 2376 struct cpuidle_device *dev; 2377 2378 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2379 dev->cpu = cpu; 2380 2381 if (cpuidle_register_device(dev)) { 2382 pr_debug("cpuidle_register_device %d failed!\n", cpu); 2383 return -EIO; 2384 } 2385 2386 if (auto_demotion_disable_flags) 2387 auto_demotion_disable(); 2388 2389 if (c1e_promotion == C1E_PROMOTION_ENABLE) 2390 c1e_promotion_enable(); 2391 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 2392 c1e_promotion_disable(); 2393 2394 return 0; 2395 } 2396 2397 static int intel_idle_cpu_online(unsigned int cpu) 2398 { 2399 struct cpuidle_device *dev; 2400 2401 if (!boot_cpu_has(X86_FEATURE_ARAT)) 2402 tick_broadcast_enable(); 2403 2404 /* 2405 * Some systems can hotplug a cpu at runtime after 2406 * the kernel has booted, we have to initialize the 2407 * driver in this case 2408 */ 2409 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2410 if (!dev->registered) 2411 return intel_idle_cpu_init(cpu); 2412 2413 return 0; 2414 } 2415 2416 /** 2417 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 2418 */ 2419 static void __init intel_idle_cpuidle_devices_uninit(void) 2420 { 2421 int i; 2422 2423 for_each_online_cpu(i) 2424 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2425 } 2426 2427 static void intel_c1_demotion_toggle(void *enable) 2428 { 2429 unsigned long long msr_val; 2430 2431 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_val); 2432 /* 2433 * Enable/disable C1 undemotion along with C1 demotion, as this is the 2434 * most sensible configuration in general. 2435 */ 2436 if (enable) 2437 msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE; 2438 else 2439 msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE); 2440 wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_val); 2441 } 2442 2443 static ssize_t intel_c1_demotion_store(struct device *dev, 2444 struct device_attribute *attr, 2445 const char *buf, size_t count) 2446 { 2447 bool enable; 2448 int err; 2449 2450 err = kstrtobool(buf, &enable); 2451 if (err) 2452 return err; 2453 2454 mutex_lock(&c1_demotion_mutex); 2455 /* Enable/disable C1 demotion on all CPUs */ 2456 on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1); 2457 mutex_unlock(&c1_demotion_mutex); 2458 2459 return count; 2460 } 2461 2462 static ssize_t intel_c1_demotion_show(struct device *dev, 2463 struct device_attribute *attr, char *buf) 2464 { 2465 unsigned long long msr_val; 2466 2467 /* 2468 * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other 2469 * configuration would be a BIOS bug. 2470 */ 2471 rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_val); 2472 return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE)); 2473 } 2474 static DEVICE_ATTR_RW(intel_c1_demotion); 2475 2476 static int __init intel_idle_sysfs_init(void) 2477 { 2478 int err; 2479 2480 if (!c1_demotion_supported) 2481 return 0; 2482 2483 sysfs_root = bus_get_dev_root(&cpu_subsys); 2484 if (!sysfs_root) 2485 return 0; 2486 2487 err = sysfs_add_file_to_group(&sysfs_root->kobj, 2488 &dev_attr_intel_c1_demotion.attr, 2489 "cpuidle"); 2490 if (err) { 2491 put_device(sysfs_root); 2492 return err; 2493 } 2494 2495 return 0; 2496 } 2497 2498 static void __init intel_idle_sysfs_uninit(void) 2499 { 2500 if (!sysfs_root) 2501 return; 2502 2503 sysfs_remove_file_from_group(&sysfs_root->kobj, 2504 &dev_attr_intel_c1_demotion.attr, 2505 "cpuidle"); 2506 put_device(sysfs_root); 2507 } 2508 2509 /** 2510 * get_cmdline_field - Get the current field from a cmdline string. 2511 * @args: The cmdline string to get the current field from. 2512 * @field: Pointer to the current field upon return. 2513 * @sep: The fields separator character. 2514 * 2515 * Examples: 2516 * Input: args="C1:1:1,C1E:2:10", sep=':' 2517 * Output: field="C1", return "1:1,C1E:2:10" 2518 * Input: args="C1:1:1,C1E:2:10", sep=',' 2519 * Output: field="C1:1:1", return "C1E:2:10" 2520 * Ipnut: args="::", sep=':' 2521 * Output: field="", return ":" 2522 * 2523 * Return: The continuation of the cmdline string after the field or NULL. 2524 */ 2525 static char *get_cmdline_field(char *args, char **field, char sep) 2526 { 2527 unsigned int i; 2528 2529 for (i = 0; args[i] && !isspace(args[i]); i++) { 2530 if (args[i] == sep) 2531 break; 2532 } 2533 2534 *field = args; 2535 2536 if (args[i] != sep) 2537 return NULL; 2538 2539 args[i] = '\0'; 2540 return args + i + 1; 2541 } 2542 2543 /** 2544 * validate_cmdline_cstate - Validate a C-state from cmdline. 2545 * @state: The C-state to validate. 2546 * @prev_state: The previous C-state in the table or NULL. 2547 * 2548 * Return: 0 if the C-state is valid or -EINVAL otherwise. 2549 */ 2550 static int validate_cmdline_cstate(struct cpuidle_state *state, 2551 struct cpuidle_state *prev_state) 2552 { 2553 if (state->exit_latency == 0) 2554 /* Exit latency 0 can only be used for the POLL state */ 2555 return -EINVAL; 2556 2557 if (state->exit_latency > MAX_CMDLINE_LATENCY_US) 2558 return -EINVAL; 2559 2560 if (state->target_residency > MAX_CMDLINE_RESIDENCY_US) 2561 return -EINVAL; 2562 2563 if (state->target_residency < state->exit_latency) 2564 return -EINVAL; 2565 2566 if (!prev_state) 2567 return 0; 2568 2569 if (state->exit_latency <= prev_state->exit_latency) 2570 return -EINVAL; 2571 2572 if (state->target_residency <= prev_state->target_residency) 2573 return -EINVAL; 2574 2575 return 0; 2576 } 2577 2578 /** 2579 * cmdline_table_adjust - Adjust the C-states table with data from cmdline. 2580 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 2581 * 2582 * Adjust the C-states table with data from the 'intel_idle.table' module 2583 * parameter (if specified). 2584 */ 2585 static void __init cmdline_table_adjust(struct cpuidle_driver *drv) 2586 { 2587 char *args = cmdline_table_str; 2588 struct cpuidle_state *state; 2589 int i; 2590 2591 if (args[0] == '\0') 2592 /* The 'intel_idle.table' module parameter was not specified */ 2593 return; 2594 2595 /* Create a copy of the C-states table */ 2596 for (i = 0; i < drv->state_count; i++) 2597 cmdline_states[i] = drv->states[i]; 2598 2599 /* 2600 * Adjust the C-states table copy with data from the 'intel_idle.table' 2601 * module parameter. 2602 */ 2603 while (args) { 2604 char *fields, *name, *val; 2605 2606 /* 2607 * Get the next C-state definition, which is expected to be 2608 * '<name>:<latency_us>:<target_residency_us>'. Treat "empty" 2609 * fields as unchanged. For example, 2610 * '<name>::<target_residency_us>' leaves the latency unchanged. 2611 */ 2612 args = get_cmdline_field(args, &fields, ','); 2613 2614 /* name */ 2615 fields = get_cmdline_field(fields, &name, ':'); 2616 if (!fields) 2617 goto error; 2618 2619 if (!strcmp(name, "POLL")) { 2620 pr_err("Cannot adjust POLL\n"); 2621 continue; 2622 } 2623 2624 /* Find the C-state by its name */ 2625 state = NULL; 2626 for (i = 0; i < drv->state_count; i++) { 2627 if (!strcmp(name, drv->states[i].name)) { 2628 state = &cmdline_states[i]; 2629 break; 2630 } 2631 } 2632 2633 if (!state) { 2634 pr_err("C-state '%s' was not found\n", name); 2635 continue; 2636 } 2637 2638 /* Latency */ 2639 fields = get_cmdline_field(fields, &val, ':'); 2640 if (!fields) 2641 goto error; 2642 2643 if (*val) { 2644 if (kstrtouint(val, 0, &state->exit_latency)) 2645 goto error; 2646 } 2647 2648 /* Target residency */ 2649 fields = get_cmdline_field(fields, &val, ':'); 2650 2651 if (*val) { 2652 if (kstrtouint(val, 0, &state->target_residency)) 2653 goto error; 2654 } 2655 2656 /* 2657 * Allow for 3 more fields, but ignore them. Helps to make 2658 * possible future extensions of the cmdline format backward 2659 * compatible. 2660 */ 2661 for (i = 0; fields && i < 3; i++) { 2662 fields = get_cmdline_field(fields, &val, ':'); 2663 if (!fields) 2664 break; 2665 } 2666 2667 if (fields) { 2668 pr_err("Too many fields for C-state '%s'\n", state->name); 2669 goto error; 2670 } 2671 2672 pr_info("C-state from cmdline: name=%s, latency=%u, residency=%u\n", 2673 state->name, state->exit_latency, state->target_residency); 2674 } 2675 2676 /* Validate the adjusted C-states, start with index 1 to skip POLL */ 2677 for (i = 1; i < drv->state_count; i++) { 2678 struct cpuidle_state *prev_state; 2679 2680 state = &cmdline_states[i]; 2681 prev_state = &cmdline_states[i - 1]; 2682 2683 if (validate_cmdline_cstate(state, prev_state)) { 2684 pr_err("C-state '%s' validation failed\n", state->name); 2685 goto error; 2686 } 2687 } 2688 2689 /* Copy the adjusted C-states table back */ 2690 for (i = 1; i < drv->state_count; i++) 2691 drv->states[i] = cmdline_states[i]; 2692 2693 pr_info("Adjusted C-states with data from 'intel_idle.table'\n"); 2694 return; 2695 2696 error: 2697 pr_info("Failed to adjust C-states with data from 'intel_idle.table'\n"); 2698 } 2699 2700 static int __init intel_idle_init(void) 2701 { 2702 const struct x86_cpu_id *id; 2703 unsigned int eax, ebx, ecx; 2704 int retval; 2705 2706 /* Do not load intel_idle at all for now if idle= is passed */ 2707 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 2708 return -ENODEV; 2709 2710 if (max_cstate == 0) { 2711 pr_debug("disabled\n"); 2712 return -EPERM; 2713 } 2714 2715 id = x86_match_cpu(intel_idle_ids); 2716 if (id) { 2717 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2718 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2719 return -ENODEV; 2720 } 2721 } else { 2722 id = x86_match_cpu(intel_mwait_ids); 2723 if (!id) 2724 return -ENODEV; 2725 } 2726 2727 cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates); 2728 2729 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2730 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2731 !mwait_substates) 2732 return -ENODEV; 2733 2734 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2735 2736 icpu = (const struct idle_cpu *)id->driver_data; 2737 if (icpu && ignore_native()) { 2738 pr_debug("ignoring native CPU idle states\n"); 2739 icpu = NULL; 2740 } 2741 if (icpu) { 2742 if (icpu->state_table) 2743 cpuidle_state_table = icpu->state_table; 2744 else if (!intel_idle_acpi_cst_extract()) 2745 return -ENODEV; 2746 2747 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2748 if (icpu->disable_promotion_to_c1e) 2749 c1e_promotion = C1E_PROMOTION_DISABLE; 2750 if (icpu->c1_demotion_supported) 2751 c1_demotion_supported = true; 2752 if (icpu->use_acpi || force_use_acpi) 2753 intel_idle_acpi_cst_extract(); 2754 } else if (!intel_idle_acpi_cst_extract()) { 2755 return -ENODEV; 2756 } 2757 2758 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2759 if (!intel_idle_cpuidle_devices) 2760 return -ENOMEM; 2761 2762 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2763 cmdline_table_adjust(&intel_idle_driver); 2764 2765 retval = intel_idle_sysfs_init(); 2766 if (retval) 2767 pr_warn("failed to initialized sysfs"); 2768 2769 retval = cpuidle_register_driver(&intel_idle_driver); 2770 if (retval) { 2771 struct cpuidle_driver *drv = cpuidle_get_driver(); 2772 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2773 drv ? drv->name : "none"); 2774 goto init_driver_fail; 2775 } 2776 2777 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2778 intel_idle_cpu_online, NULL); 2779 if (retval < 0) 2780 goto hp_setup_fail; 2781 2782 pr_debug("Local APIC timer is reliable in %s\n", 2783 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2784 2785 arch_cpu_rescan_dead_smt_siblings(); 2786 2787 return 0; 2788 2789 hp_setup_fail: 2790 intel_idle_cpuidle_devices_uninit(); 2791 cpuidle_unregister_driver(&intel_idle_driver); 2792 init_driver_fail: 2793 intel_idle_sysfs_uninit(); 2794 free_percpu(intel_idle_cpuidle_devices); 2795 return retval; 2796 2797 } 2798 subsys_initcall_sync(intel_idle_init); 2799 2800 /* 2801 * We are not really modular, but we used to support that. Meaning we also 2802 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2803 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2804 * is the easiest way (currently) to continue doing that. 2805 */ 2806 module_param(max_cstate, int, 0444); 2807 /* 2808 * The positions of the bits that are set in this number are the indices of the 2809 * idle states to be disabled by default (as reflected by the names of the 2810 * corresponding idle state directories in sysfs, "state0", "state1" ... 2811 * "state<i>" ..., where <i> is the index of the given state). 2812 */ 2813 module_param_named(states_off, disabled_states_mask, uint, 0444); 2814 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2815 /* 2816 * Debugging option that forces the driver to enter all C-states with 2817 * interrupts enabled. Does not apply to C-states with 2818 * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags. 2819 */ 2820 module_param(force_irq_on, bool, 0444); 2821 /* 2822 * Force the disabling of IBRS when X86_FEATURE_KERNEL_IBRS is on and 2823 * CPUIDLE_FLAG_IRQ_ENABLE isn't set. 2824 */ 2825 module_param(ibrs_off, bool, 0444); 2826 MODULE_PARM_DESC(ibrs_off, "Disable IBRS when idle"); 2827 2828 /* 2829 * Define the C-states table from a user input string. Expected format is 2830 * 'name:latency:residency', where: 2831 * - name: The C-state name. 2832 * - latency: The C-state exit latency in us. 2833 * - residency: The C-state target residency in us. 2834 * 2835 * Multiple C-states can be defined by separating them with commas: 2836 * 'name1:latency1:residency1,name2:latency2:residency2' 2837 * 2838 * Example: intel_idle.table=C1:1:1,C1E:5:10,C6:100:600 2839 * 2840 * To leave latency or residency unchanged, use an empty field, for example: 2841 * 'C1:1:1,C1E::10' - leaves C1E latency unchanged. 2842 */ 2843 module_param_string(table, cmdline_table_str, MAX_CMDLINE_TABLE_LEN, 0444); 2844 MODULE_PARM_DESC(table, "Build the C-states table from a user input string"); 2845