1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/notifier.h> 51 #include <linux/cpu.h> 52 #include <linux/moduleparam.h> 53 #include <asm/cpu_device_id.h> 54 #include <asm/intel-family.h> 55 #include <asm/mwait.h> 56 #include <asm/msr.h> 57 58 #define INTEL_IDLE_VERSION "0.5.1" 59 60 static struct cpuidle_driver intel_idle_driver = { 61 .name = "intel_idle", 62 .owner = THIS_MODULE, 63 }; 64 /* intel_idle.max_cstate=0 disables driver */ 65 static int max_cstate = CPUIDLE_STATE_MAX - 1; 66 static unsigned int disabled_states_mask; 67 68 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 69 70 static unsigned long auto_demotion_disable_flags; 71 static bool disable_promotion_to_c1e; 72 73 struct idle_cpu { 74 struct cpuidle_state *state_table; 75 76 /* 77 * Hardware C-state auto-demotion may not always be optimal. 78 * Indicate which enable bits to clear here. 79 */ 80 unsigned long auto_demotion_disable_flags; 81 bool byt_auto_demotion_disable_flag; 82 bool disable_promotion_to_c1e; 83 bool use_acpi; 84 }; 85 86 static const struct idle_cpu *icpu __initdata; 87 static struct cpuidle_state *cpuidle_state_table __initdata; 88 89 static unsigned int mwait_substates __initdata; 90 91 /* 92 * Enable this state by default even if the ACPI _CST does not list it. 93 */ 94 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 95 96 /* 97 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 98 * the C-state (top nibble) and sub-state (bottom nibble) 99 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 100 * 101 * We store the hint at the top of our "flags" for each state. 102 */ 103 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 104 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 105 106 /** 107 * intel_idle - Ask the processor to enter the given idle state. 108 * @dev: cpuidle device of the target CPU. 109 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 110 * @index: Target idle state index. 111 * 112 * Use the MWAIT instruction to notify the processor that the CPU represented by 113 * @dev is idle and it can try to enter the idle state corresponding to @index. 114 * 115 * If the local APIC timer is not known to be reliable in the target idle state, 116 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 117 * 118 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to 119 * flushing user TLBs. 120 * 121 * Must be called under local_irq_disable(). 122 */ 123 static __cpuidle int intel_idle(struct cpuidle_device *dev, 124 struct cpuidle_driver *drv, int index) 125 { 126 struct cpuidle_state *state = &drv->states[index]; 127 unsigned long eax = flg2MWAIT(state->flags); 128 unsigned long ecx = 1; /* break on interrupt flag */ 129 130 mwait_idle_with_hints(eax, ecx); 131 132 return index; 133 } 134 135 /** 136 * intel_idle_s2idle - Ask the processor to enter the given idle state. 137 * @dev: cpuidle device of the target CPU. 138 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 139 * @index: Target idle state index. 140 * 141 * Use the MWAIT instruction to notify the processor that the CPU represented by 142 * @dev is idle and it can try to enter the idle state corresponding to @index. 143 * 144 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 145 * scheduler tick and suspended scheduler clock on the target CPU. 146 */ 147 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 148 struct cpuidle_driver *drv, int index) 149 { 150 unsigned long eax = flg2MWAIT(drv->states[index].flags); 151 unsigned long ecx = 1; /* break on interrupt flag */ 152 153 mwait_idle_with_hints(eax, ecx); 154 155 return 0; 156 } 157 158 /* 159 * States are indexed by the cstate number, 160 * which is also the index into the MWAIT hint array. 161 * Thus C0 is a dummy. 162 */ 163 static struct cpuidle_state nehalem_cstates[] __initdata = { 164 { 165 .name = "C1", 166 .desc = "MWAIT 0x00", 167 .flags = MWAIT2flg(0x00), 168 .exit_latency = 3, 169 .target_residency = 6, 170 .enter = &intel_idle, 171 .enter_s2idle = intel_idle_s2idle, }, 172 { 173 .name = "C1E", 174 .desc = "MWAIT 0x01", 175 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 176 .exit_latency = 10, 177 .target_residency = 20, 178 .enter = &intel_idle, 179 .enter_s2idle = intel_idle_s2idle, }, 180 { 181 .name = "C3", 182 .desc = "MWAIT 0x10", 183 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 184 .exit_latency = 20, 185 .target_residency = 80, 186 .enter = &intel_idle, 187 .enter_s2idle = intel_idle_s2idle, }, 188 { 189 .name = "C6", 190 .desc = "MWAIT 0x20", 191 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 192 .exit_latency = 200, 193 .target_residency = 800, 194 .enter = &intel_idle, 195 .enter_s2idle = intel_idle_s2idle, }, 196 { 197 .enter = NULL } 198 }; 199 200 static struct cpuidle_state snb_cstates[] __initdata = { 201 { 202 .name = "C1", 203 .desc = "MWAIT 0x00", 204 .flags = MWAIT2flg(0x00), 205 .exit_latency = 2, 206 .target_residency = 2, 207 .enter = &intel_idle, 208 .enter_s2idle = intel_idle_s2idle, }, 209 { 210 .name = "C1E", 211 .desc = "MWAIT 0x01", 212 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 213 .exit_latency = 10, 214 .target_residency = 20, 215 .enter = &intel_idle, 216 .enter_s2idle = intel_idle_s2idle, }, 217 { 218 .name = "C3", 219 .desc = "MWAIT 0x10", 220 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 221 .exit_latency = 80, 222 .target_residency = 211, 223 .enter = &intel_idle, 224 .enter_s2idle = intel_idle_s2idle, }, 225 { 226 .name = "C6", 227 .desc = "MWAIT 0x20", 228 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 229 .exit_latency = 104, 230 .target_residency = 345, 231 .enter = &intel_idle, 232 .enter_s2idle = intel_idle_s2idle, }, 233 { 234 .name = "C7", 235 .desc = "MWAIT 0x30", 236 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 237 .exit_latency = 109, 238 .target_residency = 345, 239 .enter = &intel_idle, 240 .enter_s2idle = intel_idle_s2idle, }, 241 { 242 .enter = NULL } 243 }; 244 245 static struct cpuidle_state byt_cstates[] __initdata = { 246 { 247 .name = "C1", 248 .desc = "MWAIT 0x00", 249 .flags = MWAIT2flg(0x00), 250 .exit_latency = 1, 251 .target_residency = 1, 252 .enter = &intel_idle, 253 .enter_s2idle = intel_idle_s2idle, }, 254 { 255 .name = "C6N", 256 .desc = "MWAIT 0x58", 257 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 258 .exit_latency = 300, 259 .target_residency = 275, 260 .enter = &intel_idle, 261 .enter_s2idle = intel_idle_s2idle, }, 262 { 263 .name = "C6S", 264 .desc = "MWAIT 0x52", 265 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 266 .exit_latency = 500, 267 .target_residency = 560, 268 .enter = &intel_idle, 269 .enter_s2idle = intel_idle_s2idle, }, 270 { 271 .name = "C7", 272 .desc = "MWAIT 0x60", 273 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 274 .exit_latency = 1200, 275 .target_residency = 4000, 276 .enter = &intel_idle, 277 .enter_s2idle = intel_idle_s2idle, }, 278 { 279 .name = "C7S", 280 .desc = "MWAIT 0x64", 281 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 282 .exit_latency = 10000, 283 .target_residency = 20000, 284 .enter = &intel_idle, 285 .enter_s2idle = intel_idle_s2idle, }, 286 { 287 .enter = NULL } 288 }; 289 290 static struct cpuidle_state cht_cstates[] __initdata = { 291 { 292 .name = "C1", 293 .desc = "MWAIT 0x00", 294 .flags = MWAIT2flg(0x00), 295 .exit_latency = 1, 296 .target_residency = 1, 297 .enter = &intel_idle, 298 .enter_s2idle = intel_idle_s2idle, }, 299 { 300 .name = "C6N", 301 .desc = "MWAIT 0x58", 302 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 303 .exit_latency = 80, 304 .target_residency = 275, 305 .enter = &intel_idle, 306 .enter_s2idle = intel_idle_s2idle, }, 307 { 308 .name = "C6S", 309 .desc = "MWAIT 0x52", 310 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 311 .exit_latency = 200, 312 .target_residency = 560, 313 .enter = &intel_idle, 314 .enter_s2idle = intel_idle_s2idle, }, 315 { 316 .name = "C7", 317 .desc = "MWAIT 0x60", 318 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 319 .exit_latency = 1200, 320 .target_residency = 4000, 321 .enter = &intel_idle, 322 .enter_s2idle = intel_idle_s2idle, }, 323 { 324 .name = "C7S", 325 .desc = "MWAIT 0x64", 326 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 327 .exit_latency = 10000, 328 .target_residency = 20000, 329 .enter = &intel_idle, 330 .enter_s2idle = intel_idle_s2idle, }, 331 { 332 .enter = NULL } 333 }; 334 335 static struct cpuidle_state ivb_cstates[] __initdata = { 336 { 337 .name = "C1", 338 .desc = "MWAIT 0x00", 339 .flags = MWAIT2flg(0x00), 340 .exit_latency = 1, 341 .target_residency = 1, 342 .enter = &intel_idle, 343 .enter_s2idle = intel_idle_s2idle, }, 344 { 345 .name = "C1E", 346 .desc = "MWAIT 0x01", 347 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 348 .exit_latency = 10, 349 .target_residency = 20, 350 .enter = &intel_idle, 351 .enter_s2idle = intel_idle_s2idle, }, 352 { 353 .name = "C3", 354 .desc = "MWAIT 0x10", 355 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 356 .exit_latency = 59, 357 .target_residency = 156, 358 .enter = &intel_idle, 359 .enter_s2idle = intel_idle_s2idle, }, 360 { 361 .name = "C6", 362 .desc = "MWAIT 0x20", 363 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 364 .exit_latency = 80, 365 .target_residency = 300, 366 .enter = &intel_idle, 367 .enter_s2idle = intel_idle_s2idle, }, 368 { 369 .name = "C7", 370 .desc = "MWAIT 0x30", 371 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 372 .exit_latency = 87, 373 .target_residency = 300, 374 .enter = &intel_idle, 375 .enter_s2idle = intel_idle_s2idle, }, 376 { 377 .enter = NULL } 378 }; 379 380 static struct cpuidle_state ivt_cstates[] __initdata = { 381 { 382 .name = "C1", 383 .desc = "MWAIT 0x00", 384 .flags = MWAIT2flg(0x00), 385 .exit_latency = 1, 386 .target_residency = 1, 387 .enter = &intel_idle, 388 .enter_s2idle = intel_idle_s2idle, }, 389 { 390 .name = "C1E", 391 .desc = "MWAIT 0x01", 392 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 393 .exit_latency = 10, 394 .target_residency = 80, 395 .enter = &intel_idle, 396 .enter_s2idle = intel_idle_s2idle, }, 397 { 398 .name = "C3", 399 .desc = "MWAIT 0x10", 400 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 401 .exit_latency = 59, 402 .target_residency = 156, 403 .enter = &intel_idle, 404 .enter_s2idle = intel_idle_s2idle, }, 405 { 406 .name = "C6", 407 .desc = "MWAIT 0x20", 408 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 409 .exit_latency = 82, 410 .target_residency = 300, 411 .enter = &intel_idle, 412 .enter_s2idle = intel_idle_s2idle, }, 413 { 414 .enter = NULL } 415 }; 416 417 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 418 { 419 .name = "C1", 420 .desc = "MWAIT 0x00", 421 .flags = MWAIT2flg(0x00), 422 .exit_latency = 1, 423 .target_residency = 1, 424 .enter = &intel_idle, 425 .enter_s2idle = intel_idle_s2idle, }, 426 { 427 .name = "C1E", 428 .desc = "MWAIT 0x01", 429 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 430 .exit_latency = 10, 431 .target_residency = 250, 432 .enter = &intel_idle, 433 .enter_s2idle = intel_idle_s2idle, }, 434 { 435 .name = "C3", 436 .desc = "MWAIT 0x10", 437 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 438 .exit_latency = 59, 439 .target_residency = 300, 440 .enter = &intel_idle, 441 .enter_s2idle = intel_idle_s2idle, }, 442 { 443 .name = "C6", 444 .desc = "MWAIT 0x20", 445 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 446 .exit_latency = 84, 447 .target_residency = 400, 448 .enter = &intel_idle, 449 .enter_s2idle = intel_idle_s2idle, }, 450 { 451 .enter = NULL } 452 }; 453 454 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 455 { 456 .name = "C1", 457 .desc = "MWAIT 0x00", 458 .flags = MWAIT2flg(0x00), 459 .exit_latency = 1, 460 .target_residency = 1, 461 .enter = &intel_idle, 462 .enter_s2idle = intel_idle_s2idle, }, 463 { 464 .name = "C1E", 465 .desc = "MWAIT 0x01", 466 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 467 .exit_latency = 10, 468 .target_residency = 500, 469 .enter = &intel_idle, 470 .enter_s2idle = intel_idle_s2idle, }, 471 { 472 .name = "C3", 473 .desc = "MWAIT 0x10", 474 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 475 .exit_latency = 59, 476 .target_residency = 600, 477 .enter = &intel_idle, 478 .enter_s2idle = intel_idle_s2idle, }, 479 { 480 .name = "C6", 481 .desc = "MWAIT 0x20", 482 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 483 .exit_latency = 88, 484 .target_residency = 700, 485 .enter = &intel_idle, 486 .enter_s2idle = intel_idle_s2idle, }, 487 { 488 .enter = NULL } 489 }; 490 491 static struct cpuidle_state hsw_cstates[] __initdata = { 492 { 493 .name = "C1", 494 .desc = "MWAIT 0x00", 495 .flags = MWAIT2flg(0x00), 496 .exit_latency = 2, 497 .target_residency = 2, 498 .enter = &intel_idle, 499 .enter_s2idle = intel_idle_s2idle, }, 500 { 501 .name = "C1E", 502 .desc = "MWAIT 0x01", 503 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 504 .exit_latency = 10, 505 .target_residency = 20, 506 .enter = &intel_idle, 507 .enter_s2idle = intel_idle_s2idle, }, 508 { 509 .name = "C3", 510 .desc = "MWAIT 0x10", 511 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 512 .exit_latency = 33, 513 .target_residency = 100, 514 .enter = &intel_idle, 515 .enter_s2idle = intel_idle_s2idle, }, 516 { 517 .name = "C6", 518 .desc = "MWAIT 0x20", 519 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 520 .exit_latency = 133, 521 .target_residency = 400, 522 .enter = &intel_idle, 523 .enter_s2idle = intel_idle_s2idle, }, 524 { 525 .name = "C7s", 526 .desc = "MWAIT 0x32", 527 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 528 .exit_latency = 166, 529 .target_residency = 500, 530 .enter = &intel_idle, 531 .enter_s2idle = intel_idle_s2idle, }, 532 { 533 .name = "C8", 534 .desc = "MWAIT 0x40", 535 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 536 .exit_latency = 300, 537 .target_residency = 900, 538 .enter = &intel_idle, 539 .enter_s2idle = intel_idle_s2idle, }, 540 { 541 .name = "C9", 542 .desc = "MWAIT 0x50", 543 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 544 .exit_latency = 600, 545 .target_residency = 1800, 546 .enter = &intel_idle, 547 .enter_s2idle = intel_idle_s2idle, }, 548 { 549 .name = "C10", 550 .desc = "MWAIT 0x60", 551 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 552 .exit_latency = 2600, 553 .target_residency = 7700, 554 .enter = &intel_idle, 555 .enter_s2idle = intel_idle_s2idle, }, 556 { 557 .enter = NULL } 558 }; 559 static struct cpuidle_state bdw_cstates[] __initdata = { 560 { 561 .name = "C1", 562 .desc = "MWAIT 0x00", 563 .flags = MWAIT2flg(0x00), 564 .exit_latency = 2, 565 .target_residency = 2, 566 .enter = &intel_idle, 567 .enter_s2idle = intel_idle_s2idle, }, 568 { 569 .name = "C1E", 570 .desc = "MWAIT 0x01", 571 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 572 .exit_latency = 10, 573 .target_residency = 20, 574 .enter = &intel_idle, 575 .enter_s2idle = intel_idle_s2idle, }, 576 { 577 .name = "C3", 578 .desc = "MWAIT 0x10", 579 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 580 .exit_latency = 40, 581 .target_residency = 100, 582 .enter = &intel_idle, 583 .enter_s2idle = intel_idle_s2idle, }, 584 { 585 .name = "C6", 586 .desc = "MWAIT 0x20", 587 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 588 .exit_latency = 133, 589 .target_residency = 400, 590 .enter = &intel_idle, 591 .enter_s2idle = intel_idle_s2idle, }, 592 { 593 .name = "C7s", 594 .desc = "MWAIT 0x32", 595 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 596 .exit_latency = 166, 597 .target_residency = 500, 598 .enter = &intel_idle, 599 .enter_s2idle = intel_idle_s2idle, }, 600 { 601 .name = "C8", 602 .desc = "MWAIT 0x40", 603 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 604 .exit_latency = 300, 605 .target_residency = 900, 606 .enter = &intel_idle, 607 .enter_s2idle = intel_idle_s2idle, }, 608 { 609 .name = "C9", 610 .desc = "MWAIT 0x50", 611 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 612 .exit_latency = 600, 613 .target_residency = 1800, 614 .enter = &intel_idle, 615 .enter_s2idle = intel_idle_s2idle, }, 616 { 617 .name = "C10", 618 .desc = "MWAIT 0x60", 619 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 620 .exit_latency = 2600, 621 .target_residency = 7700, 622 .enter = &intel_idle, 623 .enter_s2idle = intel_idle_s2idle, }, 624 { 625 .enter = NULL } 626 }; 627 628 static struct cpuidle_state skl_cstates[] __initdata = { 629 { 630 .name = "C1", 631 .desc = "MWAIT 0x00", 632 .flags = MWAIT2flg(0x00), 633 .exit_latency = 2, 634 .target_residency = 2, 635 .enter = &intel_idle, 636 .enter_s2idle = intel_idle_s2idle, }, 637 { 638 .name = "C1E", 639 .desc = "MWAIT 0x01", 640 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 641 .exit_latency = 10, 642 .target_residency = 20, 643 .enter = &intel_idle, 644 .enter_s2idle = intel_idle_s2idle, }, 645 { 646 .name = "C3", 647 .desc = "MWAIT 0x10", 648 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 649 .exit_latency = 70, 650 .target_residency = 100, 651 .enter = &intel_idle, 652 .enter_s2idle = intel_idle_s2idle, }, 653 { 654 .name = "C6", 655 .desc = "MWAIT 0x20", 656 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 657 .exit_latency = 85, 658 .target_residency = 200, 659 .enter = &intel_idle, 660 .enter_s2idle = intel_idle_s2idle, }, 661 { 662 .name = "C7s", 663 .desc = "MWAIT 0x33", 664 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 665 .exit_latency = 124, 666 .target_residency = 800, 667 .enter = &intel_idle, 668 .enter_s2idle = intel_idle_s2idle, }, 669 { 670 .name = "C8", 671 .desc = "MWAIT 0x40", 672 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 673 .exit_latency = 200, 674 .target_residency = 800, 675 .enter = &intel_idle, 676 .enter_s2idle = intel_idle_s2idle, }, 677 { 678 .name = "C9", 679 .desc = "MWAIT 0x50", 680 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 681 .exit_latency = 480, 682 .target_residency = 5000, 683 .enter = &intel_idle, 684 .enter_s2idle = intel_idle_s2idle, }, 685 { 686 .name = "C10", 687 .desc = "MWAIT 0x60", 688 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 689 .exit_latency = 890, 690 .target_residency = 5000, 691 .enter = &intel_idle, 692 .enter_s2idle = intel_idle_s2idle, }, 693 { 694 .enter = NULL } 695 }; 696 697 static struct cpuidle_state skx_cstates[] __initdata = { 698 { 699 .name = "C1", 700 .desc = "MWAIT 0x00", 701 .flags = MWAIT2flg(0x00), 702 .exit_latency = 2, 703 .target_residency = 2, 704 .enter = &intel_idle, 705 .enter_s2idle = intel_idle_s2idle, }, 706 { 707 .name = "C1E", 708 .desc = "MWAIT 0x01", 709 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 710 .exit_latency = 10, 711 .target_residency = 20, 712 .enter = &intel_idle, 713 .enter_s2idle = intel_idle_s2idle, }, 714 { 715 .name = "C6", 716 .desc = "MWAIT 0x20", 717 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 718 .exit_latency = 133, 719 .target_residency = 600, 720 .enter = &intel_idle, 721 .enter_s2idle = intel_idle_s2idle, }, 722 { 723 .enter = NULL } 724 }; 725 726 static struct cpuidle_state icx_cstates[] __initdata = { 727 { 728 .name = "C1", 729 .desc = "MWAIT 0x00", 730 .flags = MWAIT2flg(0x00), 731 .exit_latency = 1, 732 .target_residency = 1, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .name = "C1E", 737 .desc = "MWAIT 0x01", 738 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 739 .exit_latency = 4, 740 .target_residency = 4, 741 .enter = &intel_idle, 742 .enter_s2idle = intel_idle_s2idle, }, 743 { 744 .name = "C6", 745 .desc = "MWAIT 0x20", 746 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 747 .exit_latency = 170, 748 .target_residency = 600, 749 .enter = &intel_idle, 750 .enter_s2idle = intel_idle_s2idle, }, 751 { 752 .enter = NULL } 753 }; 754 755 static struct cpuidle_state atom_cstates[] __initdata = { 756 { 757 .name = "C1E", 758 .desc = "MWAIT 0x00", 759 .flags = MWAIT2flg(0x00), 760 .exit_latency = 10, 761 .target_residency = 20, 762 .enter = &intel_idle, 763 .enter_s2idle = intel_idle_s2idle, }, 764 { 765 .name = "C2", 766 .desc = "MWAIT 0x10", 767 .flags = MWAIT2flg(0x10), 768 .exit_latency = 20, 769 .target_residency = 80, 770 .enter = &intel_idle, 771 .enter_s2idle = intel_idle_s2idle, }, 772 { 773 .name = "C4", 774 .desc = "MWAIT 0x30", 775 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 776 .exit_latency = 100, 777 .target_residency = 400, 778 .enter = &intel_idle, 779 .enter_s2idle = intel_idle_s2idle, }, 780 { 781 .name = "C6", 782 .desc = "MWAIT 0x52", 783 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 784 .exit_latency = 140, 785 .target_residency = 560, 786 .enter = &intel_idle, 787 .enter_s2idle = intel_idle_s2idle, }, 788 { 789 .enter = NULL } 790 }; 791 static struct cpuidle_state tangier_cstates[] __initdata = { 792 { 793 .name = "C1", 794 .desc = "MWAIT 0x00", 795 .flags = MWAIT2flg(0x00), 796 .exit_latency = 1, 797 .target_residency = 4, 798 .enter = &intel_idle, 799 .enter_s2idle = intel_idle_s2idle, }, 800 { 801 .name = "C4", 802 .desc = "MWAIT 0x30", 803 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 804 .exit_latency = 100, 805 .target_residency = 400, 806 .enter = &intel_idle, 807 .enter_s2idle = intel_idle_s2idle, }, 808 { 809 .name = "C6", 810 .desc = "MWAIT 0x52", 811 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 812 .exit_latency = 140, 813 .target_residency = 560, 814 .enter = &intel_idle, 815 .enter_s2idle = intel_idle_s2idle, }, 816 { 817 .name = "C7", 818 .desc = "MWAIT 0x60", 819 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 820 .exit_latency = 1200, 821 .target_residency = 4000, 822 .enter = &intel_idle, 823 .enter_s2idle = intel_idle_s2idle, }, 824 { 825 .name = "C9", 826 .desc = "MWAIT 0x64", 827 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 828 .exit_latency = 10000, 829 .target_residency = 20000, 830 .enter = &intel_idle, 831 .enter_s2idle = intel_idle_s2idle, }, 832 { 833 .enter = NULL } 834 }; 835 static struct cpuidle_state avn_cstates[] __initdata = { 836 { 837 .name = "C1", 838 .desc = "MWAIT 0x00", 839 .flags = MWAIT2flg(0x00), 840 .exit_latency = 2, 841 .target_residency = 2, 842 .enter = &intel_idle, 843 .enter_s2idle = intel_idle_s2idle, }, 844 { 845 .name = "C6", 846 .desc = "MWAIT 0x51", 847 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 848 .exit_latency = 15, 849 .target_residency = 45, 850 .enter = &intel_idle, 851 .enter_s2idle = intel_idle_s2idle, }, 852 { 853 .enter = NULL } 854 }; 855 static struct cpuidle_state knl_cstates[] __initdata = { 856 { 857 .name = "C1", 858 .desc = "MWAIT 0x00", 859 .flags = MWAIT2flg(0x00), 860 .exit_latency = 1, 861 .target_residency = 2, 862 .enter = &intel_idle, 863 .enter_s2idle = intel_idle_s2idle }, 864 { 865 .name = "C6", 866 .desc = "MWAIT 0x10", 867 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 868 .exit_latency = 120, 869 .target_residency = 500, 870 .enter = &intel_idle, 871 .enter_s2idle = intel_idle_s2idle }, 872 { 873 .enter = NULL } 874 }; 875 876 static struct cpuidle_state bxt_cstates[] __initdata = { 877 { 878 .name = "C1", 879 .desc = "MWAIT 0x00", 880 .flags = MWAIT2flg(0x00), 881 .exit_latency = 2, 882 .target_residency = 2, 883 .enter = &intel_idle, 884 .enter_s2idle = intel_idle_s2idle, }, 885 { 886 .name = "C1E", 887 .desc = "MWAIT 0x01", 888 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 889 .exit_latency = 10, 890 .target_residency = 20, 891 .enter = &intel_idle, 892 .enter_s2idle = intel_idle_s2idle, }, 893 { 894 .name = "C6", 895 .desc = "MWAIT 0x20", 896 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 897 .exit_latency = 133, 898 .target_residency = 133, 899 .enter = &intel_idle, 900 .enter_s2idle = intel_idle_s2idle, }, 901 { 902 .name = "C7s", 903 .desc = "MWAIT 0x31", 904 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 905 .exit_latency = 155, 906 .target_residency = 155, 907 .enter = &intel_idle, 908 .enter_s2idle = intel_idle_s2idle, }, 909 { 910 .name = "C8", 911 .desc = "MWAIT 0x40", 912 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 913 .exit_latency = 1000, 914 .target_residency = 1000, 915 .enter = &intel_idle, 916 .enter_s2idle = intel_idle_s2idle, }, 917 { 918 .name = "C9", 919 .desc = "MWAIT 0x50", 920 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 921 .exit_latency = 2000, 922 .target_residency = 2000, 923 .enter = &intel_idle, 924 .enter_s2idle = intel_idle_s2idle, }, 925 { 926 .name = "C10", 927 .desc = "MWAIT 0x60", 928 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 929 .exit_latency = 10000, 930 .target_residency = 10000, 931 .enter = &intel_idle, 932 .enter_s2idle = intel_idle_s2idle, }, 933 { 934 .enter = NULL } 935 }; 936 937 static struct cpuidle_state dnv_cstates[] __initdata = { 938 { 939 .name = "C1", 940 .desc = "MWAIT 0x00", 941 .flags = MWAIT2flg(0x00), 942 .exit_latency = 2, 943 .target_residency = 2, 944 .enter = &intel_idle, 945 .enter_s2idle = intel_idle_s2idle, }, 946 { 947 .name = "C1E", 948 .desc = "MWAIT 0x01", 949 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 950 .exit_latency = 10, 951 .target_residency = 20, 952 .enter = &intel_idle, 953 .enter_s2idle = intel_idle_s2idle, }, 954 { 955 .name = "C6", 956 .desc = "MWAIT 0x20", 957 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 958 .exit_latency = 50, 959 .target_residency = 500, 960 .enter = &intel_idle, 961 .enter_s2idle = intel_idle_s2idle, }, 962 { 963 .enter = NULL } 964 }; 965 966 /* 967 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 968 * C6, and this is indicated in the CPUID mwait leaf. 969 */ 970 static struct cpuidle_state snr_cstates[] __initdata = { 971 { 972 .name = "C1", 973 .desc = "MWAIT 0x00", 974 .flags = MWAIT2flg(0x00), 975 .exit_latency = 2, 976 .target_residency = 2, 977 .enter = &intel_idle, 978 .enter_s2idle = intel_idle_s2idle, }, 979 { 980 .name = "C1E", 981 .desc = "MWAIT 0x01", 982 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 983 .exit_latency = 15, 984 .target_residency = 25, 985 .enter = &intel_idle, 986 .enter_s2idle = intel_idle_s2idle, }, 987 { 988 .name = "C6", 989 .desc = "MWAIT 0x20", 990 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 991 .exit_latency = 130, 992 .target_residency = 500, 993 .enter = &intel_idle, 994 .enter_s2idle = intel_idle_s2idle, }, 995 { 996 .enter = NULL } 997 }; 998 999 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1000 .state_table = nehalem_cstates, 1001 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1002 .disable_promotion_to_c1e = true, 1003 }; 1004 1005 static const struct idle_cpu idle_cpu_nhx __initconst = { 1006 .state_table = nehalem_cstates, 1007 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1008 .disable_promotion_to_c1e = true, 1009 .use_acpi = true, 1010 }; 1011 1012 static const struct idle_cpu idle_cpu_atom __initconst = { 1013 .state_table = atom_cstates, 1014 }; 1015 1016 static const struct idle_cpu idle_cpu_tangier __initconst = { 1017 .state_table = tangier_cstates, 1018 }; 1019 1020 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1021 .state_table = atom_cstates, 1022 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1023 }; 1024 1025 static const struct idle_cpu idle_cpu_snb __initconst = { 1026 .state_table = snb_cstates, 1027 .disable_promotion_to_c1e = true, 1028 }; 1029 1030 static const struct idle_cpu idle_cpu_snx __initconst = { 1031 .state_table = snb_cstates, 1032 .disable_promotion_to_c1e = true, 1033 .use_acpi = true, 1034 }; 1035 1036 static const struct idle_cpu idle_cpu_byt __initconst = { 1037 .state_table = byt_cstates, 1038 .disable_promotion_to_c1e = true, 1039 .byt_auto_demotion_disable_flag = true, 1040 }; 1041 1042 static const struct idle_cpu idle_cpu_cht __initconst = { 1043 .state_table = cht_cstates, 1044 .disable_promotion_to_c1e = true, 1045 .byt_auto_demotion_disable_flag = true, 1046 }; 1047 1048 static const struct idle_cpu idle_cpu_ivb __initconst = { 1049 .state_table = ivb_cstates, 1050 .disable_promotion_to_c1e = true, 1051 }; 1052 1053 static const struct idle_cpu idle_cpu_ivt __initconst = { 1054 .state_table = ivt_cstates, 1055 .disable_promotion_to_c1e = true, 1056 .use_acpi = true, 1057 }; 1058 1059 static const struct idle_cpu idle_cpu_hsw __initconst = { 1060 .state_table = hsw_cstates, 1061 .disable_promotion_to_c1e = true, 1062 }; 1063 1064 static const struct idle_cpu idle_cpu_hsx __initconst = { 1065 .state_table = hsw_cstates, 1066 .disable_promotion_to_c1e = true, 1067 .use_acpi = true, 1068 }; 1069 1070 static const struct idle_cpu idle_cpu_bdw __initconst = { 1071 .state_table = bdw_cstates, 1072 .disable_promotion_to_c1e = true, 1073 }; 1074 1075 static const struct idle_cpu idle_cpu_bdx __initconst = { 1076 .state_table = bdw_cstates, 1077 .disable_promotion_to_c1e = true, 1078 .use_acpi = true, 1079 }; 1080 1081 static const struct idle_cpu idle_cpu_skl __initconst = { 1082 .state_table = skl_cstates, 1083 .disable_promotion_to_c1e = true, 1084 }; 1085 1086 static const struct idle_cpu idle_cpu_skx __initconst = { 1087 .state_table = skx_cstates, 1088 .disable_promotion_to_c1e = true, 1089 .use_acpi = true, 1090 }; 1091 1092 static const struct idle_cpu idle_cpu_icx __initconst = { 1093 .state_table = icx_cstates, 1094 .disable_promotion_to_c1e = true, 1095 .use_acpi = true, 1096 }; 1097 1098 static const struct idle_cpu idle_cpu_avn __initconst = { 1099 .state_table = avn_cstates, 1100 .disable_promotion_to_c1e = true, 1101 .use_acpi = true, 1102 }; 1103 1104 static const struct idle_cpu idle_cpu_knl __initconst = { 1105 .state_table = knl_cstates, 1106 .use_acpi = true, 1107 }; 1108 1109 static const struct idle_cpu idle_cpu_bxt __initconst = { 1110 .state_table = bxt_cstates, 1111 .disable_promotion_to_c1e = true, 1112 }; 1113 1114 static const struct idle_cpu idle_cpu_dnv __initconst = { 1115 .state_table = dnv_cstates, 1116 .disable_promotion_to_c1e = true, 1117 .use_acpi = true, 1118 }; 1119 1120 static const struct idle_cpu idle_cpu_snr __initconst = { 1121 .state_table = snr_cstates, 1122 .disable_promotion_to_c1e = true, 1123 .use_acpi = true, 1124 }; 1125 1126 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1127 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1128 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1129 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1130 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1131 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1132 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1133 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1134 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1135 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1136 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1137 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1138 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1139 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1140 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1141 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1142 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1143 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1144 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1145 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1146 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1147 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1148 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1149 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1150 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1151 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1152 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1153 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1154 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1155 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1156 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1157 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1158 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1159 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1160 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1161 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1162 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1163 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1164 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1165 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1166 {} 1167 }; 1168 1169 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1170 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1171 {} 1172 }; 1173 1174 static bool __init intel_idle_max_cstate_reached(int cstate) 1175 { 1176 if (cstate + 1 > max_cstate) { 1177 pr_info("max_cstate %d reached\n", max_cstate); 1178 return true; 1179 } 1180 return false; 1181 } 1182 1183 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1184 { 1185 unsigned long eax = flg2MWAIT(state->flags); 1186 1187 if (boot_cpu_has(X86_FEATURE_ARAT)) 1188 return false; 1189 1190 /* 1191 * Switch over to one-shot tick broadcast if the target C-state 1192 * is deeper than C1. 1193 */ 1194 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1195 } 1196 1197 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1198 #include <acpi/processor.h> 1199 1200 static bool no_acpi __read_mostly; 1201 module_param(no_acpi, bool, 0444); 1202 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1203 1204 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1205 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1206 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1207 1208 static struct acpi_processor_power acpi_state_table __initdata; 1209 1210 /** 1211 * intel_idle_cst_usable - Check if the _CST information can be used. 1212 * 1213 * Check if all of the C-states listed by _CST in the max_cstate range are 1214 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1215 */ 1216 static bool __init intel_idle_cst_usable(void) 1217 { 1218 int cstate, limit; 1219 1220 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1221 acpi_state_table.count); 1222 1223 for (cstate = 1; cstate < limit; cstate++) { 1224 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1225 1226 if (cx->entry_method != ACPI_CSTATE_FFH) 1227 return false; 1228 } 1229 1230 return true; 1231 } 1232 1233 static bool __init intel_idle_acpi_cst_extract(void) 1234 { 1235 unsigned int cpu; 1236 1237 if (no_acpi) { 1238 pr_debug("Not allowed to use ACPI _CST\n"); 1239 return false; 1240 } 1241 1242 for_each_possible_cpu(cpu) { 1243 struct acpi_processor *pr = per_cpu(processors, cpu); 1244 1245 if (!pr) 1246 continue; 1247 1248 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1249 continue; 1250 1251 acpi_state_table.count++; 1252 1253 if (!intel_idle_cst_usable()) 1254 continue; 1255 1256 if (!acpi_processor_claim_cst_control()) 1257 break; 1258 1259 return true; 1260 } 1261 1262 acpi_state_table.count = 0; 1263 pr_debug("ACPI _CST not found or not usable\n"); 1264 return false; 1265 } 1266 1267 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1268 { 1269 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1270 1271 /* 1272 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1273 * the interesting states are ACPI_CSTATE_FFH. 1274 */ 1275 for (cstate = 1; cstate < limit; cstate++) { 1276 struct acpi_processor_cx *cx; 1277 struct cpuidle_state *state; 1278 1279 if (intel_idle_max_cstate_reached(cstate - 1)) 1280 break; 1281 1282 cx = &acpi_state_table.states[cstate]; 1283 1284 state = &drv->states[drv->state_count++]; 1285 1286 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1287 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1288 state->exit_latency = cx->latency; 1289 /* 1290 * For C1-type C-states use the same number for both the exit 1291 * latency and target residency, because that is the case for 1292 * C1 in the majority of the static C-states tables above. 1293 * For the other types of C-states, however, set the target 1294 * residency to 3 times the exit latency which should lead to 1295 * a reasonable balance between energy-efficiency and 1296 * performance in the majority of interesting cases. 1297 */ 1298 state->target_residency = cx->latency; 1299 if (cx->type > ACPI_STATE_C1) 1300 state->target_residency *= 3; 1301 1302 state->flags = MWAIT2flg(cx->address); 1303 if (cx->type > ACPI_STATE_C2) 1304 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1305 1306 if (disabled_states_mask & BIT(cstate)) 1307 state->flags |= CPUIDLE_FLAG_OFF; 1308 1309 if (intel_idle_state_needs_timer_stop(state)) 1310 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1311 1312 state->enter = intel_idle; 1313 state->enter_s2idle = intel_idle_s2idle; 1314 } 1315 } 1316 1317 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1318 { 1319 int cstate, limit; 1320 1321 /* 1322 * If there are no _CST C-states, do not disable any C-states by 1323 * default. 1324 */ 1325 if (!acpi_state_table.count) 1326 return false; 1327 1328 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1329 /* 1330 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1331 * the interesting states are ACPI_CSTATE_FFH. 1332 */ 1333 for (cstate = 1; cstate < limit; cstate++) { 1334 if (acpi_state_table.states[cstate].address == mwait_hint) 1335 return false; 1336 } 1337 return true; 1338 } 1339 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1340 #define force_use_acpi (false) 1341 1342 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1343 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1344 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1345 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1346 1347 /** 1348 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1349 * 1350 * Tune IVT multi-socket targets. 1351 * Assumption: num_sockets == (max_package_num + 1). 1352 */ 1353 static void __init ivt_idle_state_table_update(void) 1354 { 1355 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1356 int cpu, package_num, num_sockets = 1; 1357 1358 for_each_online_cpu(cpu) { 1359 package_num = topology_physical_package_id(cpu); 1360 if (package_num + 1 > num_sockets) { 1361 num_sockets = package_num + 1; 1362 1363 if (num_sockets > 4) { 1364 cpuidle_state_table = ivt_cstates_8s; 1365 return; 1366 } 1367 } 1368 } 1369 1370 if (num_sockets > 2) 1371 cpuidle_state_table = ivt_cstates_4s; 1372 1373 /* else, 1 and 2 socket systems use default ivt_cstates */ 1374 } 1375 1376 /** 1377 * irtl_2_usec - IRTL to microseconds conversion. 1378 * @irtl: IRTL MSR value. 1379 * 1380 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1381 */ 1382 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1383 { 1384 static const unsigned int irtl_ns_units[] __initconst = { 1385 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1386 }; 1387 unsigned long long ns; 1388 1389 if (!irtl) 1390 return 0; 1391 1392 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1393 1394 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1395 } 1396 1397 /** 1398 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1399 * 1400 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1401 * definitive maximum latency and use the same value for target_residency. 1402 */ 1403 static void __init bxt_idle_state_table_update(void) 1404 { 1405 unsigned long long msr; 1406 unsigned int usec; 1407 1408 rdmsrl(MSR_PKGC6_IRTL, msr); 1409 usec = irtl_2_usec(msr); 1410 if (usec) { 1411 bxt_cstates[2].exit_latency = usec; 1412 bxt_cstates[2].target_residency = usec; 1413 } 1414 1415 rdmsrl(MSR_PKGC7_IRTL, msr); 1416 usec = irtl_2_usec(msr); 1417 if (usec) { 1418 bxt_cstates[3].exit_latency = usec; 1419 bxt_cstates[3].target_residency = usec; 1420 } 1421 1422 rdmsrl(MSR_PKGC8_IRTL, msr); 1423 usec = irtl_2_usec(msr); 1424 if (usec) { 1425 bxt_cstates[4].exit_latency = usec; 1426 bxt_cstates[4].target_residency = usec; 1427 } 1428 1429 rdmsrl(MSR_PKGC9_IRTL, msr); 1430 usec = irtl_2_usec(msr); 1431 if (usec) { 1432 bxt_cstates[5].exit_latency = usec; 1433 bxt_cstates[5].target_residency = usec; 1434 } 1435 1436 rdmsrl(MSR_PKGC10_IRTL, msr); 1437 usec = irtl_2_usec(msr); 1438 if (usec) { 1439 bxt_cstates[6].exit_latency = usec; 1440 bxt_cstates[6].target_residency = usec; 1441 } 1442 1443 } 1444 1445 /** 1446 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1447 * 1448 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1449 */ 1450 static void __init sklh_idle_state_table_update(void) 1451 { 1452 unsigned long long msr; 1453 unsigned int eax, ebx, ecx, edx; 1454 1455 1456 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1457 if (max_cstate <= 7) 1458 return; 1459 1460 /* if PC10 not present in CPUID.MWAIT.EDX */ 1461 if ((mwait_substates & (0xF << 28)) == 0) 1462 return; 1463 1464 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1465 1466 /* PC10 is not enabled in PKG C-state limit */ 1467 if ((msr & 0xF) != 8) 1468 return; 1469 1470 ecx = 0; 1471 cpuid(7, &eax, &ebx, &ecx, &edx); 1472 1473 /* if SGX is present */ 1474 if (ebx & (1 << 2)) { 1475 1476 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1477 1478 /* if SGX is enabled */ 1479 if (msr & (1 << 18)) 1480 return; 1481 } 1482 1483 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1484 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1485 } 1486 1487 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1488 { 1489 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1490 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1491 MWAIT_SUBSTATE_MASK; 1492 1493 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1494 if (num_substates == 0) 1495 return false; 1496 1497 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1498 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1499 1500 return true; 1501 } 1502 1503 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1504 { 1505 int cstate; 1506 1507 switch (boot_cpu_data.x86_model) { 1508 case INTEL_FAM6_IVYBRIDGE_X: 1509 ivt_idle_state_table_update(); 1510 break; 1511 case INTEL_FAM6_ATOM_GOLDMONT: 1512 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1513 bxt_idle_state_table_update(); 1514 break; 1515 case INTEL_FAM6_SKYLAKE: 1516 sklh_idle_state_table_update(); 1517 break; 1518 } 1519 1520 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1521 unsigned int mwait_hint; 1522 1523 if (intel_idle_max_cstate_reached(cstate)) 1524 break; 1525 1526 if (!cpuidle_state_table[cstate].enter && 1527 !cpuidle_state_table[cstate].enter_s2idle) 1528 break; 1529 1530 /* If marked as unusable, skip this state. */ 1531 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1532 pr_debug("state %s is disabled\n", 1533 cpuidle_state_table[cstate].name); 1534 continue; 1535 } 1536 1537 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1538 if (!intel_idle_verify_cstate(mwait_hint)) 1539 continue; 1540 1541 /* Structure copy. */ 1542 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1543 1544 if ((disabled_states_mask & BIT(drv->state_count)) || 1545 ((icpu->use_acpi || force_use_acpi) && 1546 intel_idle_off_by_default(mwait_hint) && 1547 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1548 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1549 1550 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1551 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1552 1553 drv->state_count++; 1554 } 1555 1556 if (icpu->byt_auto_demotion_disable_flag) { 1557 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1558 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1559 } 1560 } 1561 1562 /** 1563 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1564 * @drv: cpuidle driver structure to initialize. 1565 */ 1566 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1567 { 1568 cpuidle_poll_state_init(drv); 1569 1570 if (disabled_states_mask & BIT(0)) 1571 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1572 1573 drv->state_count = 1; 1574 1575 if (icpu) 1576 intel_idle_init_cstates_icpu(drv); 1577 else 1578 intel_idle_init_cstates_acpi(drv); 1579 } 1580 1581 static void auto_demotion_disable(void) 1582 { 1583 unsigned long long msr_bits; 1584 1585 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1586 msr_bits &= ~auto_demotion_disable_flags; 1587 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1588 } 1589 1590 static void c1e_promotion_disable(void) 1591 { 1592 unsigned long long msr_bits; 1593 1594 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1595 msr_bits &= ~0x2; 1596 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1597 } 1598 1599 /** 1600 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1601 * @cpu: CPU to initialize. 1602 * 1603 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1604 * with the processor model flags. 1605 */ 1606 static int intel_idle_cpu_init(unsigned int cpu) 1607 { 1608 struct cpuidle_device *dev; 1609 1610 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1611 dev->cpu = cpu; 1612 1613 if (cpuidle_register_device(dev)) { 1614 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1615 return -EIO; 1616 } 1617 1618 if (auto_demotion_disable_flags) 1619 auto_demotion_disable(); 1620 1621 if (disable_promotion_to_c1e) 1622 c1e_promotion_disable(); 1623 1624 return 0; 1625 } 1626 1627 static int intel_idle_cpu_online(unsigned int cpu) 1628 { 1629 struct cpuidle_device *dev; 1630 1631 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1632 tick_broadcast_enable(); 1633 1634 /* 1635 * Some systems can hotplug a cpu at runtime after 1636 * the kernel has booted, we have to initialize the 1637 * driver in this case 1638 */ 1639 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1640 if (!dev->registered) 1641 return intel_idle_cpu_init(cpu); 1642 1643 return 0; 1644 } 1645 1646 /** 1647 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1648 */ 1649 static void __init intel_idle_cpuidle_devices_uninit(void) 1650 { 1651 int i; 1652 1653 for_each_online_cpu(i) 1654 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1655 } 1656 1657 static int __init intel_idle_init(void) 1658 { 1659 const struct x86_cpu_id *id; 1660 unsigned int eax, ebx, ecx; 1661 int retval; 1662 1663 /* Do not load intel_idle at all for now if idle= is passed */ 1664 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1665 return -ENODEV; 1666 1667 if (max_cstate == 0) { 1668 pr_debug("disabled\n"); 1669 return -EPERM; 1670 } 1671 1672 id = x86_match_cpu(intel_idle_ids); 1673 if (id) { 1674 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1675 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1676 return -ENODEV; 1677 } 1678 } else { 1679 id = x86_match_cpu(intel_mwait_ids); 1680 if (!id) 1681 return -ENODEV; 1682 } 1683 1684 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1685 return -ENODEV; 1686 1687 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1688 1689 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1690 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1691 !mwait_substates) 1692 return -ENODEV; 1693 1694 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1695 1696 icpu = (const struct idle_cpu *)id->driver_data; 1697 if (icpu) { 1698 cpuidle_state_table = icpu->state_table; 1699 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1700 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; 1701 if (icpu->use_acpi || force_use_acpi) 1702 intel_idle_acpi_cst_extract(); 1703 } else if (!intel_idle_acpi_cst_extract()) { 1704 return -ENODEV; 1705 } 1706 1707 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1708 boot_cpu_data.x86_model); 1709 1710 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1711 if (!intel_idle_cpuidle_devices) 1712 return -ENOMEM; 1713 1714 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1715 1716 retval = cpuidle_register_driver(&intel_idle_driver); 1717 if (retval) { 1718 struct cpuidle_driver *drv = cpuidle_get_driver(); 1719 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1720 drv ? drv->name : "none"); 1721 goto init_driver_fail; 1722 } 1723 1724 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1725 intel_idle_cpu_online, NULL); 1726 if (retval < 0) 1727 goto hp_setup_fail; 1728 1729 pr_debug("Local APIC timer is reliable in %s\n", 1730 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 1731 1732 return 0; 1733 1734 hp_setup_fail: 1735 intel_idle_cpuidle_devices_uninit(); 1736 cpuidle_unregister_driver(&intel_idle_driver); 1737 init_driver_fail: 1738 free_percpu(intel_idle_cpuidle_devices); 1739 return retval; 1740 1741 } 1742 device_initcall(intel_idle_init); 1743 1744 /* 1745 * We are not really modular, but we used to support that. Meaning we also 1746 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1747 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1748 * is the easiest way (currently) to continue doing that. 1749 */ 1750 module_param(max_cstate, int, 0444); 1751 /* 1752 * The positions of the bits that are set in this number are the indices of the 1753 * idle states to be disabled by default (as reflected by the names of the 1754 * corresponding idle state directories in sysfs, "state0", "state1" ... 1755 * "state<i>" ..., where <i> is the index of the given state). 1756 */ 1757 module_param_named(states_off, disabled_states_mask, uint, 0444); 1758 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1759