1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 */ 8 9 /* 10 * intel_idle is a cpuidle driver that loads on specific Intel processors 11 * in lieu of the legacy ACPI processor_idle driver. The intent is to 12 * make Linux more efficient on these processors, as intel_idle knows 13 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 14 */ 15 16 /* 17 * Design Assumptions 18 * 19 * All CPUs have same idle states as boot CPU 20 * 21 * Chipset BM_STS (bus master status) bit is a NOP 22 * for preventing entry into deep C-stats 23 */ 24 25 /* 26 * Known limitations 27 * 28 * The driver currently initializes for_each_online_cpu() upon modprobe. 29 * It it unaware of subsequent processors hot-added to the system. 30 * This means that if you boot with maxcpus=n and later online 31 * processors above n, those processors will use C1 only. 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 #define DEBUG 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/kernel.h> 45 #include <linux/cpuidle.h> 46 #include <linux/tick.h> 47 #include <trace/events/power.h> 48 #include <linux/sched.h> 49 #include <linux/notifier.h> 50 #include <linux/cpu.h> 51 #include <linux/moduleparam.h> 52 #include <asm/cpu_device_id.h> 53 #include <asm/intel-family.h> 54 #include <asm/mwait.h> 55 #include <asm/msr.h> 56 57 #define INTEL_IDLE_VERSION "0.4.1" 58 59 static struct cpuidle_driver intel_idle_driver = { 60 .name = "intel_idle", 61 .owner = THIS_MODULE, 62 }; 63 /* intel_idle.max_cstate=0 disables driver */ 64 static int max_cstate = CPUIDLE_STATE_MAX - 1; 65 66 static unsigned int mwait_substates; 67 68 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 69 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 70 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 71 72 struct idle_cpu { 73 struct cpuidle_state *state_table; 74 75 /* 76 * Hardware C-state auto-demotion may not always be optimal. 77 * Indicate which enable bits to clear here. 78 */ 79 unsigned long auto_demotion_disable_flags; 80 bool byt_auto_demotion_disable_flag; 81 bool disable_promotion_to_c1e; 82 }; 83 84 static const struct idle_cpu *icpu; 85 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 86 static int intel_idle(struct cpuidle_device *dev, 87 struct cpuidle_driver *drv, int index); 88 static void intel_idle_s2idle(struct cpuidle_device *dev, 89 struct cpuidle_driver *drv, int index); 90 static struct cpuidle_state *cpuidle_state_table; 91 92 /* 93 * Set this flag for states where the HW flushes the TLB for us 94 * and so we don't need cross-calls to keep it consistent. 95 * If this flag is set, SW flushes the TLB, so even if the 96 * HW doesn't do the flushing, this flag is safe to use. 97 */ 98 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 99 100 /* 101 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 102 * the C-state (top nibble) and sub-state (bottom nibble) 103 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 104 * 105 * We store the hint at the top of our "flags" for each state. 106 */ 107 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 108 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 109 110 /* 111 * States are indexed by the cstate number, 112 * which is also the index into the MWAIT hint array. 113 * Thus C0 is a dummy. 114 */ 115 static struct cpuidle_state nehalem_cstates[] = { 116 { 117 .name = "C1", 118 .desc = "MWAIT 0x00", 119 .flags = MWAIT2flg(0x00), 120 .exit_latency = 3, 121 .target_residency = 6, 122 .enter = &intel_idle, 123 .enter_s2idle = intel_idle_s2idle, }, 124 { 125 .name = "C1E", 126 .desc = "MWAIT 0x01", 127 .flags = MWAIT2flg(0x01), 128 .exit_latency = 10, 129 .target_residency = 20, 130 .enter = &intel_idle, 131 .enter_s2idle = intel_idle_s2idle, }, 132 { 133 .name = "C3", 134 .desc = "MWAIT 0x10", 135 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 136 .exit_latency = 20, 137 .target_residency = 80, 138 .enter = &intel_idle, 139 .enter_s2idle = intel_idle_s2idle, }, 140 { 141 .name = "C6", 142 .desc = "MWAIT 0x20", 143 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 144 .exit_latency = 200, 145 .target_residency = 800, 146 .enter = &intel_idle, 147 .enter_s2idle = intel_idle_s2idle, }, 148 { 149 .enter = NULL } 150 }; 151 152 static struct cpuidle_state snb_cstates[] = { 153 { 154 .name = "C1", 155 .desc = "MWAIT 0x00", 156 .flags = MWAIT2flg(0x00), 157 .exit_latency = 2, 158 .target_residency = 2, 159 .enter = &intel_idle, 160 .enter_s2idle = intel_idle_s2idle, }, 161 { 162 .name = "C1E", 163 .desc = "MWAIT 0x01", 164 .flags = MWAIT2flg(0x01), 165 .exit_latency = 10, 166 .target_residency = 20, 167 .enter = &intel_idle, 168 .enter_s2idle = intel_idle_s2idle, }, 169 { 170 .name = "C3", 171 .desc = "MWAIT 0x10", 172 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 173 .exit_latency = 80, 174 .target_residency = 211, 175 .enter = &intel_idle, 176 .enter_s2idle = intel_idle_s2idle, }, 177 { 178 .name = "C6", 179 .desc = "MWAIT 0x20", 180 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 181 .exit_latency = 104, 182 .target_residency = 345, 183 .enter = &intel_idle, 184 .enter_s2idle = intel_idle_s2idle, }, 185 { 186 .name = "C7", 187 .desc = "MWAIT 0x30", 188 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 189 .exit_latency = 109, 190 .target_residency = 345, 191 .enter = &intel_idle, 192 .enter_s2idle = intel_idle_s2idle, }, 193 { 194 .enter = NULL } 195 }; 196 197 static struct cpuidle_state byt_cstates[] = { 198 { 199 .name = "C1", 200 .desc = "MWAIT 0x00", 201 .flags = MWAIT2flg(0x00), 202 .exit_latency = 1, 203 .target_residency = 1, 204 .enter = &intel_idle, 205 .enter_s2idle = intel_idle_s2idle, }, 206 { 207 .name = "C6N", 208 .desc = "MWAIT 0x58", 209 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 210 .exit_latency = 300, 211 .target_residency = 275, 212 .enter = &intel_idle, 213 .enter_s2idle = intel_idle_s2idle, }, 214 { 215 .name = "C6S", 216 .desc = "MWAIT 0x52", 217 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 218 .exit_latency = 500, 219 .target_residency = 560, 220 .enter = &intel_idle, 221 .enter_s2idle = intel_idle_s2idle, }, 222 { 223 .name = "C7", 224 .desc = "MWAIT 0x60", 225 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 226 .exit_latency = 1200, 227 .target_residency = 4000, 228 .enter = &intel_idle, 229 .enter_s2idle = intel_idle_s2idle, }, 230 { 231 .name = "C7S", 232 .desc = "MWAIT 0x64", 233 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 234 .exit_latency = 10000, 235 .target_residency = 20000, 236 .enter = &intel_idle, 237 .enter_s2idle = intel_idle_s2idle, }, 238 { 239 .enter = NULL } 240 }; 241 242 static struct cpuidle_state cht_cstates[] = { 243 { 244 .name = "C1", 245 .desc = "MWAIT 0x00", 246 .flags = MWAIT2flg(0x00), 247 .exit_latency = 1, 248 .target_residency = 1, 249 .enter = &intel_idle, 250 .enter_s2idle = intel_idle_s2idle, }, 251 { 252 .name = "C6N", 253 .desc = "MWAIT 0x58", 254 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 255 .exit_latency = 80, 256 .target_residency = 275, 257 .enter = &intel_idle, 258 .enter_s2idle = intel_idle_s2idle, }, 259 { 260 .name = "C6S", 261 .desc = "MWAIT 0x52", 262 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 263 .exit_latency = 200, 264 .target_residency = 560, 265 .enter = &intel_idle, 266 .enter_s2idle = intel_idle_s2idle, }, 267 { 268 .name = "C7", 269 .desc = "MWAIT 0x60", 270 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 271 .exit_latency = 1200, 272 .target_residency = 4000, 273 .enter = &intel_idle, 274 .enter_s2idle = intel_idle_s2idle, }, 275 { 276 .name = "C7S", 277 .desc = "MWAIT 0x64", 278 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 279 .exit_latency = 10000, 280 .target_residency = 20000, 281 .enter = &intel_idle, 282 .enter_s2idle = intel_idle_s2idle, }, 283 { 284 .enter = NULL } 285 }; 286 287 static struct cpuidle_state ivb_cstates[] = { 288 { 289 .name = "C1", 290 .desc = "MWAIT 0x00", 291 .flags = MWAIT2flg(0x00), 292 .exit_latency = 1, 293 .target_residency = 1, 294 .enter = &intel_idle, 295 .enter_s2idle = intel_idle_s2idle, }, 296 { 297 .name = "C1E", 298 .desc = "MWAIT 0x01", 299 .flags = MWAIT2flg(0x01), 300 .exit_latency = 10, 301 .target_residency = 20, 302 .enter = &intel_idle, 303 .enter_s2idle = intel_idle_s2idle, }, 304 { 305 .name = "C3", 306 .desc = "MWAIT 0x10", 307 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 308 .exit_latency = 59, 309 .target_residency = 156, 310 .enter = &intel_idle, 311 .enter_s2idle = intel_idle_s2idle, }, 312 { 313 .name = "C6", 314 .desc = "MWAIT 0x20", 315 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 316 .exit_latency = 80, 317 .target_residency = 300, 318 .enter = &intel_idle, 319 .enter_s2idle = intel_idle_s2idle, }, 320 { 321 .name = "C7", 322 .desc = "MWAIT 0x30", 323 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 324 .exit_latency = 87, 325 .target_residency = 300, 326 .enter = &intel_idle, 327 .enter_s2idle = intel_idle_s2idle, }, 328 { 329 .enter = NULL } 330 }; 331 332 static struct cpuidle_state ivt_cstates[] = { 333 { 334 .name = "C1", 335 .desc = "MWAIT 0x00", 336 .flags = MWAIT2flg(0x00), 337 .exit_latency = 1, 338 .target_residency = 1, 339 .enter = &intel_idle, 340 .enter_s2idle = intel_idle_s2idle, }, 341 { 342 .name = "C1E", 343 .desc = "MWAIT 0x01", 344 .flags = MWAIT2flg(0x01), 345 .exit_latency = 10, 346 .target_residency = 80, 347 .enter = &intel_idle, 348 .enter_s2idle = intel_idle_s2idle, }, 349 { 350 .name = "C3", 351 .desc = "MWAIT 0x10", 352 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 353 .exit_latency = 59, 354 .target_residency = 156, 355 .enter = &intel_idle, 356 .enter_s2idle = intel_idle_s2idle, }, 357 { 358 .name = "C6", 359 .desc = "MWAIT 0x20", 360 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 361 .exit_latency = 82, 362 .target_residency = 300, 363 .enter = &intel_idle, 364 .enter_s2idle = intel_idle_s2idle, }, 365 { 366 .enter = NULL } 367 }; 368 369 static struct cpuidle_state ivt_cstates_4s[] = { 370 { 371 .name = "C1", 372 .desc = "MWAIT 0x00", 373 .flags = MWAIT2flg(0x00), 374 .exit_latency = 1, 375 .target_residency = 1, 376 .enter = &intel_idle, 377 .enter_s2idle = intel_idle_s2idle, }, 378 { 379 .name = "C1E", 380 .desc = "MWAIT 0x01", 381 .flags = MWAIT2flg(0x01), 382 .exit_latency = 10, 383 .target_residency = 250, 384 .enter = &intel_idle, 385 .enter_s2idle = intel_idle_s2idle, }, 386 { 387 .name = "C3", 388 .desc = "MWAIT 0x10", 389 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 390 .exit_latency = 59, 391 .target_residency = 300, 392 .enter = &intel_idle, 393 .enter_s2idle = intel_idle_s2idle, }, 394 { 395 .name = "C6", 396 .desc = "MWAIT 0x20", 397 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 398 .exit_latency = 84, 399 .target_residency = 400, 400 .enter = &intel_idle, 401 .enter_s2idle = intel_idle_s2idle, }, 402 { 403 .enter = NULL } 404 }; 405 406 static struct cpuidle_state ivt_cstates_8s[] = { 407 { 408 .name = "C1", 409 .desc = "MWAIT 0x00", 410 .flags = MWAIT2flg(0x00), 411 .exit_latency = 1, 412 .target_residency = 1, 413 .enter = &intel_idle, 414 .enter_s2idle = intel_idle_s2idle, }, 415 { 416 .name = "C1E", 417 .desc = "MWAIT 0x01", 418 .flags = MWAIT2flg(0x01), 419 .exit_latency = 10, 420 .target_residency = 500, 421 .enter = &intel_idle, 422 .enter_s2idle = intel_idle_s2idle, }, 423 { 424 .name = "C3", 425 .desc = "MWAIT 0x10", 426 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 427 .exit_latency = 59, 428 .target_residency = 600, 429 .enter = &intel_idle, 430 .enter_s2idle = intel_idle_s2idle, }, 431 { 432 .name = "C6", 433 .desc = "MWAIT 0x20", 434 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 435 .exit_latency = 88, 436 .target_residency = 700, 437 .enter = &intel_idle, 438 .enter_s2idle = intel_idle_s2idle, }, 439 { 440 .enter = NULL } 441 }; 442 443 static struct cpuidle_state hsw_cstates[] = { 444 { 445 .name = "C1", 446 .desc = "MWAIT 0x00", 447 .flags = MWAIT2flg(0x00), 448 .exit_latency = 2, 449 .target_residency = 2, 450 .enter = &intel_idle, 451 .enter_s2idle = intel_idle_s2idle, }, 452 { 453 .name = "C1E", 454 .desc = "MWAIT 0x01", 455 .flags = MWAIT2flg(0x01), 456 .exit_latency = 10, 457 .target_residency = 20, 458 .enter = &intel_idle, 459 .enter_s2idle = intel_idle_s2idle, }, 460 { 461 .name = "C3", 462 .desc = "MWAIT 0x10", 463 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 464 .exit_latency = 33, 465 .target_residency = 100, 466 .enter = &intel_idle, 467 .enter_s2idle = intel_idle_s2idle, }, 468 { 469 .name = "C6", 470 .desc = "MWAIT 0x20", 471 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 472 .exit_latency = 133, 473 .target_residency = 400, 474 .enter = &intel_idle, 475 .enter_s2idle = intel_idle_s2idle, }, 476 { 477 .name = "C7s", 478 .desc = "MWAIT 0x32", 479 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 480 .exit_latency = 166, 481 .target_residency = 500, 482 .enter = &intel_idle, 483 .enter_s2idle = intel_idle_s2idle, }, 484 { 485 .name = "C8", 486 .desc = "MWAIT 0x40", 487 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 488 .exit_latency = 300, 489 .target_residency = 900, 490 .enter = &intel_idle, 491 .enter_s2idle = intel_idle_s2idle, }, 492 { 493 .name = "C9", 494 .desc = "MWAIT 0x50", 495 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 496 .exit_latency = 600, 497 .target_residency = 1800, 498 .enter = &intel_idle, 499 .enter_s2idle = intel_idle_s2idle, }, 500 { 501 .name = "C10", 502 .desc = "MWAIT 0x60", 503 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 504 .exit_latency = 2600, 505 .target_residency = 7700, 506 .enter = &intel_idle, 507 .enter_s2idle = intel_idle_s2idle, }, 508 { 509 .enter = NULL } 510 }; 511 static struct cpuidle_state bdw_cstates[] = { 512 { 513 .name = "C1", 514 .desc = "MWAIT 0x00", 515 .flags = MWAIT2flg(0x00), 516 .exit_latency = 2, 517 .target_residency = 2, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .name = "C1E", 522 .desc = "MWAIT 0x01", 523 .flags = MWAIT2flg(0x01), 524 .exit_latency = 10, 525 .target_residency = 20, 526 .enter = &intel_idle, 527 .enter_s2idle = intel_idle_s2idle, }, 528 { 529 .name = "C3", 530 .desc = "MWAIT 0x10", 531 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 532 .exit_latency = 40, 533 .target_residency = 100, 534 .enter = &intel_idle, 535 .enter_s2idle = intel_idle_s2idle, }, 536 { 537 .name = "C6", 538 .desc = "MWAIT 0x20", 539 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 540 .exit_latency = 133, 541 .target_residency = 400, 542 .enter = &intel_idle, 543 .enter_s2idle = intel_idle_s2idle, }, 544 { 545 .name = "C7s", 546 .desc = "MWAIT 0x32", 547 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 548 .exit_latency = 166, 549 .target_residency = 500, 550 .enter = &intel_idle, 551 .enter_s2idle = intel_idle_s2idle, }, 552 { 553 .name = "C8", 554 .desc = "MWAIT 0x40", 555 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 556 .exit_latency = 300, 557 .target_residency = 900, 558 .enter = &intel_idle, 559 .enter_s2idle = intel_idle_s2idle, }, 560 { 561 .name = "C9", 562 .desc = "MWAIT 0x50", 563 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 564 .exit_latency = 600, 565 .target_residency = 1800, 566 .enter = &intel_idle, 567 .enter_s2idle = intel_idle_s2idle, }, 568 { 569 .name = "C10", 570 .desc = "MWAIT 0x60", 571 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 572 .exit_latency = 2600, 573 .target_residency = 7700, 574 .enter = &intel_idle, 575 .enter_s2idle = intel_idle_s2idle, }, 576 { 577 .enter = NULL } 578 }; 579 580 static struct cpuidle_state skl_cstates[] = { 581 { 582 .name = "C1", 583 .desc = "MWAIT 0x00", 584 .flags = MWAIT2flg(0x00), 585 .exit_latency = 2, 586 .target_residency = 2, 587 .enter = &intel_idle, 588 .enter_s2idle = intel_idle_s2idle, }, 589 { 590 .name = "C1E", 591 .desc = "MWAIT 0x01", 592 .flags = MWAIT2flg(0x01), 593 .exit_latency = 10, 594 .target_residency = 20, 595 .enter = &intel_idle, 596 .enter_s2idle = intel_idle_s2idle, }, 597 { 598 .name = "C3", 599 .desc = "MWAIT 0x10", 600 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 601 .exit_latency = 70, 602 .target_residency = 100, 603 .enter = &intel_idle, 604 .enter_s2idle = intel_idle_s2idle, }, 605 { 606 .name = "C6", 607 .desc = "MWAIT 0x20", 608 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 609 .exit_latency = 85, 610 .target_residency = 200, 611 .enter = &intel_idle, 612 .enter_s2idle = intel_idle_s2idle, }, 613 { 614 .name = "C7s", 615 .desc = "MWAIT 0x33", 616 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 617 .exit_latency = 124, 618 .target_residency = 800, 619 .enter = &intel_idle, 620 .enter_s2idle = intel_idle_s2idle, }, 621 { 622 .name = "C8", 623 .desc = "MWAIT 0x40", 624 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 625 .exit_latency = 200, 626 .target_residency = 800, 627 .enter = &intel_idle, 628 .enter_s2idle = intel_idle_s2idle, }, 629 { 630 .name = "C9", 631 .desc = "MWAIT 0x50", 632 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 633 .exit_latency = 480, 634 .target_residency = 5000, 635 .enter = &intel_idle, 636 .enter_s2idle = intel_idle_s2idle, }, 637 { 638 .name = "C10", 639 .desc = "MWAIT 0x60", 640 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 641 .exit_latency = 890, 642 .target_residency = 5000, 643 .enter = &intel_idle, 644 .enter_s2idle = intel_idle_s2idle, }, 645 { 646 .enter = NULL } 647 }; 648 649 static struct cpuidle_state skx_cstates[] = { 650 { 651 .name = "C1", 652 .desc = "MWAIT 0x00", 653 .flags = MWAIT2flg(0x00), 654 .exit_latency = 2, 655 .target_residency = 2, 656 .enter = &intel_idle, 657 .enter_s2idle = intel_idle_s2idle, }, 658 { 659 .name = "C1E", 660 .desc = "MWAIT 0x01", 661 .flags = MWAIT2flg(0x01), 662 .exit_latency = 10, 663 .target_residency = 20, 664 .enter = &intel_idle, 665 .enter_s2idle = intel_idle_s2idle, }, 666 { 667 .name = "C6", 668 .desc = "MWAIT 0x20", 669 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 670 .exit_latency = 133, 671 .target_residency = 600, 672 .enter = &intel_idle, 673 .enter_s2idle = intel_idle_s2idle, }, 674 { 675 .enter = NULL } 676 }; 677 678 static struct cpuidle_state atom_cstates[] = { 679 { 680 .name = "C1E", 681 .desc = "MWAIT 0x00", 682 .flags = MWAIT2flg(0x00), 683 .exit_latency = 10, 684 .target_residency = 20, 685 .enter = &intel_idle, 686 .enter_s2idle = intel_idle_s2idle, }, 687 { 688 .name = "C2", 689 .desc = "MWAIT 0x10", 690 .flags = MWAIT2flg(0x10), 691 .exit_latency = 20, 692 .target_residency = 80, 693 .enter = &intel_idle, 694 .enter_s2idle = intel_idle_s2idle, }, 695 { 696 .name = "C4", 697 .desc = "MWAIT 0x30", 698 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 699 .exit_latency = 100, 700 .target_residency = 400, 701 .enter = &intel_idle, 702 .enter_s2idle = intel_idle_s2idle, }, 703 { 704 .name = "C6", 705 .desc = "MWAIT 0x52", 706 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 707 .exit_latency = 140, 708 .target_residency = 560, 709 .enter = &intel_idle, 710 .enter_s2idle = intel_idle_s2idle, }, 711 { 712 .enter = NULL } 713 }; 714 static struct cpuidle_state tangier_cstates[] = { 715 { 716 .name = "C1", 717 .desc = "MWAIT 0x00", 718 .flags = MWAIT2flg(0x00), 719 .exit_latency = 1, 720 .target_residency = 4, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .name = "C4", 725 .desc = "MWAIT 0x30", 726 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 727 .exit_latency = 100, 728 .target_residency = 400, 729 .enter = &intel_idle, 730 .enter_s2idle = intel_idle_s2idle, }, 731 { 732 .name = "C6", 733 .desc = "MWAIT 0x52", 734 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 735 .exit_latency = 140, 736 .target_residency = 560, 737 .enter = &intel_idle, 738 .enter_s2idle = intel_idle_s2idle, }, 739 { 740 .name = "C7", 741 .desc = "MWAIT 0x60", 742 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 743 .exit_latency = 1200, 744 .target_residency = 4000, 745 .enter = &intel_idle, 746 .enter_s2idle = intel_idle_s2idle, }, 747 { 748 .name = "C9", 749 .desc = "MWAIT 0x64", 750 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 751 .exit_latency = 10000, 752 .target_residency = 20000, 753 .enter = &intel_idle, 754 .enter_s2idle = intel_idle_s2idle, }, 755 { 756 .enter = NULL } 757 }; 758 static struct cpuidle_state avn_cstates[] = { 759 { 760 .name = "C1", 761 .desc = "MWAIT 0x00", 762 .flags = MWAIT2flg(0x00), 763 .exit_latency = 2, 764 .target_residency = 2, 765 .enter = &intel_idle, 766 .enter_s2idle = intel_idle_s2idle, }, 767 { 768 .name = "C6", 769 .desc = "MWAIT 0x51", 770 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 771 .exit_latency = 15, 772 .target_residency = 45, 773 .enter = &intel_idle, 774 .enter_s2idle = intel_idle_s2idle, }, 775 { 776 .enter = NULL } 777 }; 778 static struct cpuidle_state knl_cstates[] = { 779 { 780 .name = "C1", 781 .desc = "MWAIT 0x00", 782 .flags = MWAIT2flg(0x00), 783 .exit_latency = 1, 784 .target_residency = 2, 785 .enter = &intel_idle, 786 .enter_s2idle = intel_idle_s2idle }, 787 { 788 .name = "C6", 789 .desc = "MWAIT 0x10", 790 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 791 .exit_latency = 120, 792 .target_residency = 500, 793 .enter = &intel_idle, 794 .enter_s2idle = intel_idle_s2idle }, 795 { 796 .enter = NULL } 797 }; 798 799 static struct cpuidle_state bxt_cstates[] = { 800 { 801 .name = "C1", 802 .desc = "MWAIT 0x00", 803 .flags = MWAIT2flg(0x00), 804 .exit_latency = 2, 805 .target_residency = 2, 806 .enter = &intel_idle, 807 .enter_s2idle = intel_idle_s2idle, }, 808 { 809 .name = "C1E", 810 .desc = "MWAIT 0x01", 811 .flags = MWAIT2flg(0x01), 812 .exit_latency = 10, 813 .target_residency = 20, 814 .enter = &intel_idle, 815 .enter_s2idle = intel_idle_s2idle, }, 816 { 817 .name = "C6", 818 .desc = "MWAIT 0x20", 819 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 820 .exit_latency = 133, 821 .target_residency = 133, 822 .enter = &intel_idle, 823 .enter_s2idle = intel_idle_s2idle, }, 824 { 825 .name = "C7s", 826 .desc = "MWAIT 0x31", 827 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 828 .exit_latency = 155, 829 .target_residency = 155, 830 .enter = &intel_idle, 831 .enter_s2idle = intel_idle_s2idle, }, 832 { 833 .name = "C8", 834 .desc = "MWAIT 0x40", 835 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 836 .exit_latency = 1000, 837 .target_residency = 1000, 838 .enter = &intel_idle, 839 .enter_s2idle = intel_idle_s2idle, }, 840 { 841 .name = "C9", 842 .desc = "MWAIT 0x50", 843 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 844 .exit_latency = 2000, 845 .target_residency = 2000, 846 .enter = &intel_idle, 847 .enter_s2idle = intel_idle_s2idle, }, 848 { 849 .name = "C10", 850 .desc = "MWAIT 0x60", 851 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 852 .exit_latency = 10000, 853 .target_residency = 10000, 854 .enter = &intel_idle, 855 .enter_s2idle = intel_idle_s2idle, }, 856 { 857 .enter = NULL } 858 }; 859 860 static struct cpuidle_state dnv_cstates[] = { 861 { 862 .name = "C1", 863 .desc = "MWAIT 0x00", 864 .flags = MWAIT2flg(0x00), 865 .exit_latency = 2, 866 .target_residency = 2, 867 .enter = &intel_idle, 868 .enter_s2idle = intel_idle_s2idle, }, 869 { 870 .name = "C1E", 871 .desc = "MWAIT 0x01", 872 .flags = MWAIT2flg(0x01), 873 .exit_latency = 10, 874 .target_residency = 20, 875 .enter = &intel_idle, 876 .enter_s2idle = intel_idle_s2idle, }, 877 { 878 .name = "C6", 879 .desc = "MWAIT 0x20", 880 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 881 .exit_latency = 50, 882 .target_residency = 500, 883 .enter = &intel_idle, 884 .enter_s2idle = intel_idle_s2idle, }, 885 { 886 .enter = NULL } 887 }; 888 889 /** 890 * intel_idle 891 * @dev: cpuidle_device 892 * @drv: cpuidle driver 893 * @index: index of cpuidle state 894 * 895 * Must be called under local_irq_disable(). 896 */ 897 static __cpuidle int intel_idle(struct cpuidle_device *dev, 898 struct cpuidle_driver *drv, int index) 899 { 900 unsigned long ecx = 1; /* break on interrupt flag */ 901 struct cpuidle_state *state = &drv->states[index]; 902 unsigned long eax = flg2MWAIT(state->flags); 903 unsigned int cstate; 904 bool uninitialized_var(tick); 905 int cpu = smp_processor_id(); 906 907 /* 908 * leave_mm() to avoid costly and often unnecessary wakeups 909 * for flushing the user TLB's associated with the active mm. 910 */ 911 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 912 leave_mm(cpu); 913 914 if (!static_cpu_has(X86_FEATURE_ARAT)) { 915 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & 916 MWAIT_CSTATE_MASK) + 1; 917 tick = false; 918 if (!(lapic_timer_reliable_states & (1 << (cstate)))) { 919 tick = true; 920 tick_broadcast_enter(); 921 } 922 } 923 924 mwait_idle_with_hints(eax, ecx); 925 926 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 927 tick_broadcast_exit(); 928 929 return index; 930 } 931 932 /** 933 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle 934 * @dev: cpuidle_device 935 * @drv: cpuidle driver 936 * @index: state index 937 */ 938 static void intel_idle_s2idle(struct cpuidle_device *dev, 939 struct cpuidle_driver *drv, int index) 940 { 941 unsigned long ecx = 1; /* break on interrupt flag */ 942 unsigned long eax = flg2MWAIT(drv->states[index].flags); 943 944 mwait_idle_with_hints(eax, ecx); 945 } 946 947 static void __setup_broadcast_timer(bool on) 948 { 949 if (on) 950 tick_broadcast_enable(); 951 else 952 tick_broadcast_disable(); 953 } 954 955 static void auto_demotion_disable(void) 956 { 957 unsigned long long msr_bits; 958 959 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 960 msr_bits &= ~(icpu->auto_demotion_disable_flags); 961 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 962 } 963 static void c1e_promotion_disable(void) 964 { 965 unsigned long long msr_bits; 966 967 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 968 msr_bits &= ~0x2; 969 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 970 } 971 972 static const struct idle_cpu idle_cpu_nehalem = { 973 .state_table = nehalem_cstates, 974 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 975 .disable_promotion_to_c1e = true, 976 }; 977 978 static const struct idle_cpu idle_cpu_atom = { 979 .state_table = atom_cstates, 980 }; 981 982 static const struct idle_cpu idle_cpu_tangier = { 983 .state_table = tangier_cstates, 984 }; 985 986 static const struct idle_cpu idle_cpu_lincroft = { 987 .state_table = atom_cstates, 988 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 989 }; 990 991 static const struct idle_cpu idle_cpu_snb = { 992 .state_table = snb_cstates, 993 .disable_promotion_to_c1e = true, 994 }; 995 996 static const struct idle_cpu idle_cpu_byt = { 997 .state_table = byt_cstates, 998 .disable_promotion_to_c1e = true, 999 .byt_auto_demotion_disable_flag = true, 1000 }; 1001 1002 static const struct idle_cpu idle_cpu_cht = { 1003 .state_table = cht_cstates, 1004 .disable_promotion_to_c1e = true, 1005 .byt_auto_demotion_disable_flag = true, 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_ivb = { 1009 .state_table = ivb_cstates, 1010 .disable_promotion_to_c1e = true, 1011 }; 1012 1013 static const struct idle_cpu idle_cpu_ivt = { 1014 .state_table = ivt_cstates, 1015 .disable_promotion_to_c1e = true, 1016 }; 1017 1018 static const struct idle_cpu idle_cpu_hsw = { 1019 .state_table = hsw_cstates, 1020 .disable_promotion_to_c1e = true, 1021 }; 1022 1023 static const struct idle_cpu idle_cpu_bdw = { 1024 .state_table = bdw_cstates, 1025 .disable_promotion_to_c1e = true, 1026 }; 1027 1028 static const struct idle_cpu idle_cpu_skl = { 1029 .state_table = skl_cstates, 1030 .disable_promotion_to_c1e = true, 1031 }; 1032 1033 static const struct idle_cpu idle_cpu_skx = { 1034 .state_table = skx_cstates, 1035 .disable_promotion_to_c1e = true, 1036 }; 1037 1038 static const struct idle_cpu idle_cpu_avn = { 1039 .state_table = avn_cstates, 1040 .disable_promotion_to_c1e = true, 1041 }; 1042 1043 static const struct idle_cpu idle_cpu_knl = { 1044 .state_table = knl_cstates, 1045 }; 1046 1047 static const struct idle_cpu idle_cpu_bxt = { 1048 .state_table = bxt_cstates, 1049 .disable_promotion_to_c1e = true, 1050 }; 1051 1052 static const struct idle_cpu idle_cpu_dnv = { 1053 .state_table = dnv_cstates, 1054 .disable_promotion_to_c1e = true, 1055 }; 1056 1057 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1058 INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), 1059 INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), 1060 INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), 1061 INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), 1062 INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), 1063 INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), 1064 INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), 1065 INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), 1066 INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), 1067 INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), 1068 INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), 1069 INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), 1070 INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), 1071 INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), 1072 INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), 1073 INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), 1074 INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), 1075 INTEL_CPU_FAM6(HASWELL_CORE, idle_cpu_hsw), 1076 INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), 1077 INTEL_CPU_FAM6(HASWELL_ULT, idle_cpu_hsw), 1078 INTEL_CPU_FAM6(HASWELL_GT3E, idle_cpu_hsw), 1079 INTEL_CPU_FAM6(ATOM_SILVERMONT_X, idle_cpu_avn), 1080 INTEL_CPU_FAM6(BROADWELL_CORE, idle_cpu_bdw), 1081 INTEL_CPU_FAM6(BROADWELL_GT3E, idle_cpu_bdw), 1082 INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), 1083 INTEL_CPU_FAM6(BROADWELL_XEON_D, idle_cpu_bdw), 1084 INTEL_CPU_FAM6(SKYLAKE_MOBILE, idle_cpu_skl), 1085 INTEL_CPU_FAM6(SKYLAKE_DESKTOP, idle_cpu_skl), 1086 INTEL_CPU_FAM6(KABYLAKE_MOBILE, idle_cpu_skl), 1087 INTEL_CPU_FAM6(KABYLAKE_DESKTOP, idle_cpu_skl), 1088 INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), 1089 INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), 1090 INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), 1091 INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), 1092 INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), 1093 INTEL_CPU_FAM6(ATOM_GOLDMONT_X, idle_cpu_dnv), 1094 INTEL_CPU_FAM6(ATOM_TREMONT_X, idle_cpu_dnv), 1095 {} 1096 }; 1097 1098 /* 1099 * intel_idle_probe() 1100 */ 1101 static int __init intel_idle_probe(void) 1102 { 1103 unsigned int eax, ebx, ecx; 1104 const struct x86_cpu_id *id; 1105 1106 if (max_cstate == 0) { 1107 pr_debug("disabled\n"); 1108 return -EPERM; 1109 } 1110 1111 id = x86_match_cpu(intel_idle_ids); 1112 if (!id) { 1113 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1114 boot_cpu_data.x86 == 6) 1115 pr_debug("does not run on family %d model %d\n", 1116 boot_cpu_data.x86, boot_cpu_data.x86_model); 1117 return -ENODEV; 1118 } 1119 1120 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1121 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1122 return -ENODEV; 1123 } 1124 1125 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1126 return -ENODEV; 1127 1128 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1129 1130 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1131 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1132 !mwait_substates) 1133 return -ENODEV; 1134 1135 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1136 1137 icpu = (const struct idle_cpu *)id->driver_data; 1138 cpuidle_state_table = icpu->state_table; 1139 1140 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1141 boot_cpu_data.x86_model); 1142 1143 return 0; 1144 } 1145 1146 /* 1147 * intel_idle_cpuidle_devices_uninit() 1148 * Unregisters the cpuidle devices. 1149 */ 1150 static void intel_idle_cpuidle_devices_uninit(void) 1151 { 1152 int i; 1153 struct cpuidle_device *dev; 1154 1155 for_each_online_cpu(i) { 1156 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1157 cpuidle_unregister_device(dev); 1158 } 1159 } 1160 1161 /* 1162 * ivt_idle_state_table_update(void) 1163 * 1164 * Tune IVT multi-socket targets 1165 * Assumption: num_sockets == (max_package_num + 1) 1166 */ 1167 static void ivt_idle_state_table_update(void) 1168 { 1169 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1170 int cpu, package_num, num_sockets = 1; 1171 1172 for_each_online_cpu(cpu) { 1173 package_num = topology_physical_package_id(cpu); 1174 if (package_num + 1 > num_sockets) { 1175 num_sockets = package_num + 1; 1176 1177 if (num_sockets > 4) { 1178 cpuidle_state_table = ivt_cstates_8s; 1179 return; 1180 } 1181 } 1182 } 1183 1184 if (num_sockets > 2) 1185 cpuidle_state_table = ivt_cstates_4s; 1186 1187 /* else, 1 and 2 socket systems use default ivt_cstates */ 1188 } 1189 1190 /* 1191 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1192 */ 1193 1194 static unsigned int irtl_ns_units[] = { 1195 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1196 1197 static unsigned long long irtl_2_usec(unsigned long long irtl) 1198 { 1199 unsigned long long ns; 1200 1201 if (!irtl) 1202 return 0; 1203 1204 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1205 1206 return div64_u64((irtl & 0x3FF) * ns, 1000); 1207 } 1208 /* 1209 * bxt_idle_state_table_update(void) 1210 * 1211 * On BXT, we trust the IRTL to show the definitive maximum latency 1212 * We use the same value for target_residency. 1213 */ 1214 static void bxt_idle_state_table_update(void) 1215 { 1216 unsigned long long msr; 1217 unsigned int usec; 1218 1219 rdmsrl(MSR_PKGC6_IRTL, msr); 1220 usec = irtl_2_usec(msr); 1221 if (usec) { 1222 bxt_cstates[2].exit_latency = usec; 1223 bxt_cstates[2].target_residency = usec; 1224 } 1225 1226 rdmsrl(MSR_PKGC7_IRTL, msr); 1227 usec = irtl_2_usec(msr); 1228 if (usec) { 1229 bxt_cstates[3].exit_latency = usec; 1230 bxt_cstates[3].target_residency = usec; 1231 } 1232 1233 rdmsrl(MSR_PKGC8_IRTL, msr); 1234 usec = irtl_2_usec(msr); 1235 if (usec) { 1236 bxt_cstates[4].exit_latency = usec; 1237 bxt_cstates[4].target_residency = usec; 1238 } 1239 1240 rdmsrl(MSR_PKGC9_IRTL, msr); 1241 usec = irtl_2_usec(msr); 1242 if (usec) { 1243 bxt_cstates[5].exit_latency = usec; 1244 bxt_cstates[5].target_residency = usec; 1245 } 1246 1247 rdmsrl(MSR_PKGC10_IRTL, msr); 1248 usec = irtl_2_usec(msr); 1249 if (usec) { 1250 bxt_cstates[6].exit_latency = usec; 1251 bxt_cstates[6].target_residency = usec; 1252 } 1253 1254 } 1255 /* 1256 * sklh_idle_state_table_update(void) 1257 * 1258 * On SKL-H (model 0x5e) disable C8 and C9 if: 1259 * C10 is enabled and SGX disabled 1260 */ 1261 static void sklh_idle_state_table_update(void) 1262 { 1263 unsigned long long msr; 1264 unsigned int eax, ebx, ecx, edx; 1265 1266 1267 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1268 if (max_cstate <= 7) 1269 return; 1270 1271 /* if PC10 not present in CPUID.MWAIT.EDX */ 1272 if ((mwait_substates & (0xF << 28)) == 0) 1273 return; 1274 1275 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1276 1277 /* PC10 is not enabled in PKG C-state limit */ 1278 if ((msr & 0xF) != 8) 1279 return; 1280 1281 ecx = 0; 1282 cpuid(7, &eax, &ebx, &ecx, &edx); 1283 1284 /* if SGX is present */ 1285 if (ebx & (1 << 2)) { 1286 1287 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1288 1289 /* if SGX is enabled */ 1290 if (msr & (1 << 18)) 1291 return; 1292 } 1293 1294 skl_cstates[5].disabled = 1; /* C8-SKL */ 1295 skl_cstates[6].disabled = 1; /* C9-SKL */ 1296 } 1297 /* 1298 * intel_idle_state_table_update() 1299 * 1300 * Update the default state_table for this CPU-id 1301 */ 1302 1303 static void intel_idle_state_table_update(void) 1304 { 1305 switch (boot_cpu_data.x86_model) { 1306 1307 case INTEL_FAM6_IVYBRIDGE_X: 1308 ivt_idle_state_table_update(); 1309 break; 1310 case INTEL_FAM6_ATOM_GOLDMONT: 1311 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1312 bxt_idle_state_table_update(); 1313 break; 1314 case INTEL_FAM6_SKYLAKE_DESKTOP: 1315 sklh_idle_state_table_update(); 1316 break; 1317 } 1318 } 1319 1320 /* 1321 * intel_idle_cpuidle_driver_init() 1322 * allocate, initialize cpuidle_states 1323 */ 1324 static void __init intel_idle_cpuidle_driver_init(void) 1325 { 1326 int cstate; 1327 struct cpuidle_driver *drv = &intel_idle_driver; 1328 1329 intel_idle_state_table_update(); 1330 1331 cpuidle_poll_state_init(drv); 1332 drv->state_count = 1; 1333 1334 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1335 int num_substates, mwait_hint, mwait_cstate; 1336 1337 if ((cpuidle_state_table[cstate].enter == NULL) && 1338 (cpuidle_state_table[cstate].enter_s2idle == NULL)) 1339 break; 1340 1341 if (cstate + 1 > max_cstate) { 1342 pr_info("max_cstate %d reached\n", max_cstate); 1343 break; 1344 } 1345 1346 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1347 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1348 1349 /* number of sub-states for this state in CPUID.MWAIT */ 1350 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1351 & MWAIT_SUBSTATE_MASK; 1352 1353 /* if NO sub-states for this state in CPUID, skip it */ 1354 if (num_substates == 0) 1355 continue; 1356 1357 /* if state marked as disabled, skip it */ 1358 if (cpuidle_state_table[cstate].disabled != 0) { 1359 pr_debug("state %s is disabled\n", 1360 cpuidle_state_table[cstate].name); 1361 continue; 1362 } 1363 1364 1365 if (((mwait_cstate + 1) > 2) && 1366 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1367 mark_tsc_unstable("TSC halts in idle" 1368 " states deeper than C2"); 1369 1370 drv->states[drv->state_count] = /* structure copy */ 1371 cpuidle_state_table[cstate]; 1372 1373 drv->state_count += 1; 1374 } 1375 1376 if (icpu->byt_auto_demotion_disable_flag) { 1377 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1378 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1379 } 1380 } 1381 1382 1383 /* 1384 * intel_idle_cpu_init() 1385 * allocate, initialize, register cpuidle_devices 1386 * @cpu: cpu/core to initialize 1387 */ 1388 static int intel_idle_cpu_init(unsigned int cpu) 1389 { 1390 struct cpuidle_device *dev; 1391 1392 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1393 dev->cpu = cpu; 1394 1395 if (cpuidle_register_device(dev)) { 1396 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1397 return -EIO; 1398 } 1399 1400 if (icpu->auto_demotion_disable_flags) 1401 auto_demotion_disable(); 1402 1403 if (icpu->disable_promotion_to_c1e) 1404 c1e_promotion_disable(); 1405 1406 return 0; 1407 } 1408 1409 static int intel_idle_cpu_online(unsigned int cpu) 1410 { 1411 struct cpuidle_device *dev; 1412 1413 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1414 __setup_broadcast_timer(true); 1415 1416 /* 1417 * Some systems can hotplug a cpu at runtime after 1418 * the kernel has booted, we have to initialize the 1419 * driver in this case 1420 */ 1421 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1422 if (!dev->registered) 1423 return intel_idle_cpu_init(cpu); 1424 1425 return 0; 1426 } 1427 1428 static int __init intel_idle_init(void) 1429 { 1430 int retval; 1431 1432 /* Do not load intel_idle at all for now if idle= is passed */ 1433 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1434 return -ENODEV; 1435 1436 retval = intel_idle_probe(); 1437 if (retval) 1438 return retval; 1439 1440 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1441 if (intel_idle_cpuidle_devices == NULL) 1442 return -ENOMEM; 1443 1444 intel_idle_cpuidle_driver_init(); 1445 retval = cpuidle_register_driver(&intel_idle_driver); 1446 if (retval) { 1447 struct cpuidle_driver *drv = cpuidle_get_driver(); 1448 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1449 drv ? drv->name : "none"); 1450 goto init_driver_fail; 1451 } 1452 1453 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1454 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1455 1456 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1457 intel_idle_cpu_online, NULL); 1458 if (retval < 0) 1459 goto hp_setup_fail; 1460 1461 pr_debug("lapic_timer_reliable_states 0x%x\n", 1462 lapic_timer_reliable_states); 1463 1464 return 0; 1465 1466 hp_setup_fail: 1467 intel_idle_cpuidle_devices_uninit(); 1468 cpuidle_unregister_driver(&intel_idle_driver); 1469 init_driver_fail: 1470 free_percpu(intel_idle_cpuidle_devices); 1471 return retval; 1472 1473 } 1474 device_initcall(intel_idle_init); 1475 1476 /* 1477 * We are not really modular, but we used to support that. Meaning we also 1478 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1479 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1480 * is the easiest way (currently) to continue doing that. 1481 */ 1482 module_param(max_cstate, int, 0444); 1483