1 /* 2 * processor_idle - idle state submodule to the ACPI processor driver 3 * 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 6 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 8 * - Added processor hotplug support 9 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 10 * - Added support for C3 on SMP 11 * 12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or (at 17 * your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, but 20 * WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License along 25 * with this program; if not, write to the Free Software Foundation, Inc., 26 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 27 * 28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 */ 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/init.h> 34 #include <linux/cpufreq.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/acpi.h> 38 #include <linux/dmi.h> 39 #include <linux/moduleparam.h> 40 #include <linux/sched.h> /* need_resched() */ 41 #include <linux/latency.h> 42 #include <linux/clockchips.h> 43 44 /* 45 * Include the apic definitions for x86 to have the APIC timer related defines 46 * available also for UP (on SMP it gets magically included via linux/smp.h). 47 * asm/acpi.h is not an option, as it would require more include magic. Also 48 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera. 49 */ 50 #ifdef CONFIG_X86 51 #include <asm/apic.h> 52 #endif 53 54 #include <asm/io.h> 55 #include <asm/uaccess.h> 56 57 #include <acpi/acpi_bus.h> 58 #include <acpi/processor.h> 59 60 #define ACPI_PROCESSOR_COMPONENT 0x01000000 61 #define ACPI_PROCESSOR_CLASS "processor" 62 #define _COMPONENT ACPI_PROCESSOR_COMPONENT 63 ACPI_MODULE_NAME("processor_idle"); 64 #define ACPI_PROCESSOR_FILE_POWER "power" 65 #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) 66 #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 67 #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 68 static void (*pm_idle_save) (void) __read_mostly; 69 module_param(max_cstate, uint, 0644); 70 71 static unsigned int nocst __read_mostly; 72 module_param(nocst, uint, 0000); 73 74 /* 75 * bm_history -- bit-mask with a bit per jiffy of bus-master activity 76 * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms 77 * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms 78 * 100 HZ: 0x0000000F: 4 jiffies = 40ms 79 * reduce history for more aggressive entry into C3 80 */ 81 static unsigned int bm_history __read_mostly = 82 (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); 83 module_param(bm_history, uint, 0644); 84 /* -------------------------------------------------------------------------- 85 Power Management 86 -------------------------------------------------------------------------- */ 87 88 /* 89 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. 90 * For now disable this. Probably a bug somewhere else. 91 * 92 * To skip this limit, boot/load with a large max_cstate limit. 93 */ 94 static int set_max_cstate(struct dmi_system_id *id) 95 { 96 if (max_cstate > ACPI_PROCESSOR_MAX_POWER) 97 return 0; 98 99 printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." 100 " Override with \"processor.max_cstate=%d\"\n", id->ident, 101 (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 102 103 max_cstate = (long)id->driver_data; 104 105 return 0; 106 } 107 108 /* Actually this shouldn't be __cpuinitdata, would be better to fix the 109 callers to only run once -AK */ 110 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { 111 { set_max_cstate, "IBM ThinkPad R40e", { 112 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 113 DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1}, 114 { set_max_cstate, "IBM ThinkPad R40e", { 115 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 116 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1}, 117 { set_max_cstate, "IBM ThinkPad R40e", { 118 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 119 DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1}, 120 { set_max_cstate, "IBM ThinkPad R40e", { 121 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 122 DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1}, 123 { set_max_cstate, "IBM ThinkPad R40e", { 124 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 125 DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1}, 126 { set_max_cstate, "IBM ThinkPad R40e", { 127 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 128 DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1}, 129 { set_max_cstate, "IBM ThinkPad R40e", { 130 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 131 DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1}, 132 { set_max_cstate, "IBM ThinkPad R40e", { 133 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 134 DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1}, 135 { set_max_cstate, "IBM ThinkPad R40e", { 136 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 137 DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1}, 138 { set_max_cstate, "IBM ThinkPad R40e", { 139 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 140 DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1}, 141 { set_max_cstate, "IBM ThinkPad R40e", { 142 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 143 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1}, 144 { set_max_cstate, "IBM ThinkPad R40e", { 145 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 146 DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1}, 147 { set_max_cstate, "IBM ThinkPad R40e", { 148 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 149 DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1}, 150 { set_max_cstate, "IBM ThinkPad R40e", { 151 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 152 DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1}, 153 { set_max_cstate, "IBM ThinkPad R40e", { 154 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 155 DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1}, 156 { set_max_cstate, "IBM ThinkPad R40e", { 157 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 158 DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1}, 159 { set_max_cstate, "Medion 41700", { 160 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 161 DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1}, 162 { set_max_cstate, "Clevo 5600D", { 163 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 164 DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, 165 (void *)2}, 166 {}, 167 }; 168 169 static inline u32 ticks_elapsed(u32 t1, u32 t2) 170 { 171 if (t2 >= t1) 172 return (t2 - t1); 173 else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) 174 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); 175 else 176 return ((0xFFFFFFFF - t1) + t2); 177 } 178 179 static void 180 acpi_processor_power_activate(struct acpi_processor *pr, 181 struct acpi_processor_cx *new) 182 { 183 struct acpi_processor_cx *old; 184 185 if (!pr || !new) 186 return; 187 188 old = pr->power.state; 189 190 if (old) 191 old->promotion.count = 0; 192 new->demotion.count = 0; 193 194 /* Cleanup from old state. */ 195 if (old) { 196 switch (old->type) { 197 case ACPI_STATE_C3: 198 /* Disable bus master reload */ 199 if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) 200 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 201 break; 202 } 203 } 204 205 /* Prepare to use new state. */ 206 switch (new->type) { 207 case ACPI_STATE_C3: 208 /* Enable bus master reload */ 209 if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) 210 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 211 break; 212 } 213 214 pr->power.state = new; 215 216 return; 217 } 218 219 static void acpi_safe_halt(void) 220 { 221 current_thread_info()->status &= ~TS_POLLING; 222 /* 223 * TS_POLLING-cleared state must be visible before we 224 * test NEED_RESCHED: 225 */ 226 smp_mb(); 227 if (!need_resched()) 228 safe_halt(); 229 current_thread_info()->status |= TS_POLLING; 230 } 231 232 static atomic_t c3_cpu_count; 233 234 /* Common C-state entry for C2, C3, .. */ 235 static void acpi_cstate_enter(struct acpi_processor_cx *cstate) 236 { 237 if (cstate->space_id == ACPI_CSTATE_FFH) { 238 /* Call into architectural FFH based C-state */ 239 acpi_processor_ffh_cstate_enter(cstate); 240 } else { 241 int unused; 242 /* IO port based C-state */ 243 inb(cstate->address); 244 /* Dummy wait op - must do something useless after P_LVL2 read 245 because chipsets cannot guarantee that STPCLK# signal 246 gets asserted in time to freeze execution properly. */ 247 unused = inl(acpi_gbl_FADT.xpm_timer_block.address); 248 } 249 } 250 251 #ifdef ARCH_APICTIMER_STOPS_ON_C3 252 253 /* 254 * Some BIOS implementations switch to C3 in the published C2 state. 255 * This seems to be a common problem on AMD boxen, but other vendors 256 * are affected too. We pick the most conservative approach: we assume 257 * that the local APIC stops in both C2 and C3. 258 */ 259 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 260 struct acpi_processor_cx *cx) 261 { 262 struct acpi_processor_power *pwr = &pr->power; 263 u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2; 264 265 /* 266 * Check, if one of the previous states already marked the lapic 267 * unstable 268 */ 269 if (pwr->timer_broadcast_on_state < state) 270 return; 271 272 if (cx->type >= type) 273 pr->power.timer_broadcast_on_state = state; 274 } 275 276 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) 277 { 278 #ifdef CONFIG_GENERIC_CLOCKEVENTS 279 unsigned long reason; 280 281 reason = pr->power.timer_broadcast_on_state < INT_MAX ? 282 CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; 283 284 clockevents_notify(reason, &pr->id); 285 #else 286 cpumask_t mask = cpumask_of_cpu(pr->id); 287 288 if (pr->power.timer_broadcast_on_state < INT_MAX) 289 on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1); 290 else 291 on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); 292 #endif 293 } 294 295 /* Power(C) State timer broadcast control */ 296 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 297 struct acpi_processor_cx *cx, 298 int broadcast) 299 { 300 #ifdef CONFIG_GENERIC_CLOCKEVENTS 301 302 int state = cx - pr->power.states; 303 304 if (state >= pr->power.timer_broadcast_on_state) { 305 unsigned long reason; 306 307 reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : 308 CLOCK_EVT_NOTIFY_BROADCAST_EXIT; 309 clockevents_notify(reason, &pr->id); 310 } 311 #endif 312 } 313 314 #else 315 316 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 317 struct acpi_processor_cx *cstate) { } 318 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } 319 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 320 struct acpi_processor_cx *cx, 321 int broadcast) 322 { 323 } 324 325 #endif 326 327 static void acpi_processor_idle(void) 328 { 329 struct acpi_processor *pr = NULL; 330 struct acpi_processor_cx *cx = NULL; 331 struct acpi_processor_cx *next_state = NULL; 332 int sleep_ticks = 0; 333 u32 t1, t2 = 0; 334 335 pr = processors[smp_processor_id()]; 336 if (!pr) 337 return; 338 339 /* 340 * Interrupts must be disabled during bus mastering calculations and 341 * for C2/C3 transitions. 342 */ 343 local_irq_disable(); 344 345 /* 346 * Check whether we truly need to go idle, or should 347 * reschedule: 348 */ 349 if (unlikely(need_resched())) { 350 local_irq_enable(); 351 return; 352 } 353 354 cx = pr->power.state; 355 if (!cx) { 356 if (pm_idle_save) 357 pm_idle_save(); 358 else 359 acpi_safe_halt(); 360 return; 361 } 362 363 /* 364 * Check BM Activity 365 * ----------------- 366 * Check for bus mastering activity (if required), record, and check 367 * for demotion. 368 */ 369 if (pr->flags.bm_check) { 370 u32 bm_status = 0; 371 unsigned long diff = jiffies - pr->power.bm_check_timestamp; 372 373 if (diff > 31) 374 diff = 31; 375 376 pr->power.bm_activity <<= diff; 377 378 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 379 if (bm_status) { 380 pr->power.bm_activity |= 0x1; 381 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 382 } 383 /* 384 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 385 * the true state of bus mastering activity; forcing us to 386 * manually check the BMIDEA bit of each IDE channel. 387 */ 388 else if (errata.piix4.bmisx) { 389 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) 390 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) 391 pr->power.bm_activity |= 0x1; 392 } 393 394 pr->power.bm_check_timestamp = jiffies; 395 396 /* 397 * If bus mastering is or was active this jiffy, demote 398 * to avoid a faulty transition. Note that the processor 399 * won't enter a low-power state during this call (to this 400 * function) but should upon the next. 401 * 402 * TBD: A better policy might be to fallback to the demotion 403 * state (use it for this quantum only) istead of 404 * demoting -- and rely on duration as our sole demotion 405 * qualification. This may, however, introduce DMA 406 * issues (e.g. floppy DMA transfer overrun/underrun). 407 */ 408 if ((pr->power.bm_activity & 0x1) && 409 cx->demotion.threshold.bm) { 410 local_irq_enable(); 411 next_state = cx->demotion.state; 412 goto end; 413 } 414 } 415 416 #ifdef CONFIG_HOTPLUG_CPU 417 /* 418 * Check for P_LVL2_UP flag before entering C2 and above on 419 * an SMP system. We do it here instead of doing it at _CST/P_LVL 420 * detection phase, to work cleanly with logical CPU hotplug. 421 */ 422 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 423 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 424 cx = &pr->power.states[ACPI_STATE_C1]; 425 #endif 426 427 /* 428 * Sleep: 429 * ------ 430 * Invoke the current Cx state to put the processor to sleep. 431 */ 432 if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { 433 current_thread_info()->status &= ~TS_POLLING; 434 /* 435 * TS_POLLING-cleared state must be visible before we 436 * test NEED_RESCHED: 437 */ 438 smp_mb(); 439 if (need_resched()) { 440 current_thread_info()->status |= TS_POLLING; 441 local_irq_enable(); 442 return; 443 } 444 } 445 446 switch (cx->type) { 447 448 case ACPI_STATE_C1: 449 /* 450 * Invoke C1. 451 * Use the appropriate idle routine, the one that would 452 * be used without acpi C-states. 453 */ 454 if (pm_idle_save) 455 pm_idle_save(); 456 else 457 acpi_safe_halt(); 458 459 /* 460 * TBD: Can't get time duration while in C1, as resumes 461 * go to an ISR rather than here. Need to instrument 462 * base interrupt handler. 463 */ 464 sleep_ticks = 0xFFFFFFFF; 465 break; 466 467 case ACPI_STATE_C2: 468 /* Get start time (ticks) */ 469 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 470 /* Invoke C2 */ 471 acpi_state_timer_broadcast(pr, cx, 1); 472 acpi_cstate_enter(cx); 473 /* Get end time (ticks) */ 474 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 475 476 #ifdef CONFIG_GENERIC_TIME 477 /* TSC halts in C2, so notify users */ 478 mark_tsc_unstable("possible TSC halt in C2"); 479 #endif 480 /* Re-enable interrupts */ 481 local_irq_enable(); 482 current_thread_info()->status |= TS_POLLING; 483 /* Compute time (ticks) that we were actually asleep */ 484 sleep_ticks = 485 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; 486 acpi_state_timer_broadcast(pr, cx, 0); 487 break; 488 489 case ACPI_STATE_C3: 490 491 if (pr->flags.bm_check) { 492 if (atomic_inc_return(&c3_cpu_count) == 493 num_online_cpus()) { 494 /* 495 * All CPUs are trying to go to C3 496 * Disable bus master arbitration 497 */ 498 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 499 } 500 } else { 501 /* SMP with no shared cache... Invalidate cache */ 502 ACPI_FLUSH_CPU_CACHE(); 503 } 504 505 /* Get start time (ticks) */ 506 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 507 /* Invoke C3 */ 508 acpi_state_timer_broadcast(pr, cx, 1); 509 acpi_cstate_enter(cx); 510 /* Get end time (ticks) */ 511 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 512 if (pr->flags.bm_check) { 513 /* Enable bus master arbitration */ 514 atomic_dec(&c3_cpu_count); 515 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 516 } 517 518 #ifdef CONFIG_GENERIC_TIME 519 /* TSC halts in C3, so notify users */ 520 mark_tsc_unstable("TSC halts in C3"); 521 #endif 522 /* Re-enable interrupts */ 523 local_irq_enable(); 524 current_thread_info()->status |= TS_POLLING; 525 /* Compute time (ticks) that we were actually asleep */ 526 sleep_ticks = 527 ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; 528 acpi_state_timer_broadcast(pr, cx, 0); 529 break; 530 531 default: 532 local_irq_enable(); 533 return; 534 } 535 cx->usage++; 536 if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0)) 537 cx->time += sleep_ticks; 538 539 next_state = pr->power.state; 540 541 #ifdef CONFIG_HOTPLUG_CPU 542 /* Don't do promotion/demotion */ 543 if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) && 544 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) { 545 next_state = cx; 546 goto end; 547 } 548 #endif 549 550 /* 551 * Promotion? 552 * ---------- 553 * Track the number of longs (time asleep is greater than threshold) 554 * and promote when the count threshold is reached. Note that bus 555 * mastering activity may prevent promotions. 556 * Do not promote above max_cstate. 557 */ 558 if (cx->promotion.state && 559 ((cx->promotion.state - pr->power.states) <= max_cstate)) { 560 if (sleep_ticks > cx->promotion.threshold.ticks && 561 cx->promotion.state->latency <= system_latency_constraint()) { 562 cx->promotion.count++; 563 cx->demotion.count = 0; 564 if (cx->promotion.count >= 565 cx->promotion.threshold.count) { 566 if (pr->flags.bm_check) { 567 if (! 568 (pr->power.bm_activity & cx-> 569 promotion.threshold.bm)) { 570 next_state = 571 cx->promotion.state; 572 goto end; 573 } 574 } else { 575 next_state = cx->promotion.state; 576 goto end; 577 } 578 } 579 } 580 } 581 582 /* 583 * Demotion? 584 * --------- 585 * Track the number of shorts (time asleep is less than time threshold) 586 * and demote when the usage threshold is reached. 587 */ 588 if (cx->demotion.state) { 589 if (sleep_ticks < cx->demotion.threshold.ticks) { 590 cx->demotion.count++; 591 cx->promotion.count = 0; 592 if (cx->demotion.count >= cx->demotion.threshold.count) { 593 next_state = cx->demotion.state; 594 goto end; 595 } 596 } 597 } 598 599 end: 600 /* 601 * Demote if current state exceeds max_cstate 602 * or if the latency of the current state is unacceptable 603 */ 604 if ((pr->power.state - pr->power.states) > max_cstate || 605 pr->power.state->latency > system_latency_constraint()) { 606 if (cx->demotion.state) 607 next_state = cx->demotion.state; 608 } 609 610 /* 611 * New Cx State? 612 * ------------- 613 * If we're going to start using a new Cx state we must clean up 614 * from the previous and prepare to use the new. 615 */ 616 if (next_state != pr->power.state) 617 acpi_processor_power_activate(pr, next_state); 618 } 619 620 static int acpi_processor_set_power_policy(struct acpi_processor *pr) 621 { 622 unsigned int i; 623 unsigned int state_is_set = 0; 624 struct acpi_processor_cx *lower = NULL; 625 struct acpi_processor_cx *higher = NULL; 626 struct acpi_processor_cx *cx; 627 628 629 if (!pr) 630 return -EINVAL; 631 632 /* 633 * This function sets the default Cx state policy (OS idle handler). 634 * Our scheme is to promote quickly to C2 but more conservatively 635 * to C3. We're favoring C2 for its characteristics of low latency 636 * (quick response), good power savings, and ability to allow bus 637 * mastering activity. Note that the Cx state policy is completely 638 * customizable and can be altered dynamically. 639 */ 640 641 /* startup state */ 642 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 643 cx = &pr->power.states[i]; 644 if (!cx->valid) 645 continue; 646 647 if (!state_is_set) 648 pr->power.state = cx; 649 state_is_set++; 650 break; 651 } 652 653 if (!state_is_set) 654 return -ENODEV; 655 656 /* demotion */ 657 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 658 cx = &pr->power.states[i]; 659 if (!cx->valid) 660 continue; 661 662 if (lower) { 663 cx->demotion.state = lower; 664 cx->demotion.threshold.ticks = cx->latency_ticks; 665 cx->demotion.threshold.count = 1; 666 if (cx->type == ACPI_STATE_C3) 667 cx->demotion.threshold.bm = bm_history; 668 } 669 670 lower = cx; 671 } 672 673 /* promotion */ 674 for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { 675 cx = &pr->power.states[i]; 676 if (!cx->valid) 677 continue; 678 679 if (higher) { 680 cx->promotion.state = higher; 681 cx->promotion.threshold.ticks = cx->latency_ticks; 682 if (cx->type >= ACPI_STATE_C2) 683 cx->promotion.threshold.count = 4; 684 else 685 cx->promotion.threshold.count = 10; 686 if (higher->type == ACPI_STATE_C3) 687 cx->promotion.threshold.bm = bm_history; 688 } 689 690 higher = cx; 691 } 692 693 return 0; 694 } 695 696 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) 697 { 698 699 if (!pr) 700 return -EINVAL; 701 702 if (!pr->pblk) 703 return -ENODEV; 704 705 /* if info is obtained from pblk/fadt, type equals state */ 706 pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; 707 pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; 708 709 #ifndef CONFIG_HOTPLUG_CPU 710 /* 711 * Check for P_LVL2_UP flag before entering C2 and above on 712 * an SMP system. 713 */ 714 if ((num_online_cpus() > 1) && 715 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 716 return -ENODEV; 717 #endif 718 719 /* determine C2 and C3 address from pblk */ 720 pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; 721 pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; 722 723 /* determine latencies from FADT */ 724 pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; 725 pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; 726 727 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 728 "lvl2[0x%08x] lvl3[0x%08x]\n", 729 pr->power.states[ACPI_STATE_C2].address, 730 pr->power.states[ACPI_STATE_C3].address)); 731 732 return 0; 733 } 734 735 static int acpi_processor_get_power_info_default(struct acpi_processor *pr) 736 { 737 if (!pr->power.states[ACPI_STATE_C1].valid) { 738 /* set the first C-State to C1 */ 739 /* all processors need to support C1 */ 740 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; 741 pr->power.states[ACPI_STATE_C1].valid = 1; 742 } 743 /* the C0 state only exists as a filler in our array */ 744 pr->power.states[ACPI_STATE_C0].valid = 1; 745 return 0; 746 } 747 748 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) 749 { 750 acpi_status status = 0; 751 acpi_integer count; 752 int current_count; 753 int i; 754 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 755 union acpi_object *cst; 756 757 758 if (nocst) 759 return -ENODEV; 760 761 current_count = 0; 762 763 status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); 764 if (ACPI_FAILURE(status)) { 765 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); 766 return -ENODEV; 767 } 768 769 cst = buffer.pointer; 770 771 /* There must be at least 2 elements */ 772 if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { 773 printk(KERN_ERR PREFIX "not enough elements in _CST\n"); 774 status = -EFAULT; 775 goto end; 776 } 777 778 count = cst->package.elements[0].integer.value; 779 780 /* Validate number of power states. */ 781 if (count < 1 || count != cst->package.count - 1) { 782 printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); 783 status = -EFAULT; 784 goto end; 785 } 786 787 /* Tell driver that at least _CST is supported. */ 788 pr->flags.has_cst = 1; 789 790 for (i = 1; i <= count; i++) { 791 union acpi_object *element; 792 union acpi_object *obj; 793 struct acpi_power_register *reg; 794 struct acpi_processor_cx cx; 795 796 memset(&cx, 0, sizeof(cx)); 797 798 element = &(cst->package.elements[i]); 799 if (element->type != ACPI_TYPE_PACKAGE) 800 continue; 801 802 if (element->package.count != 4) 803 continue; 804 805 obj = &(element->package.elements[0]); 806 807 if (obj->type != ACPI_TYPE_BUFFER) 808 continue; 809 810 reg = (struct acpi_power_register *)obj->buffer.pointer; 811 812 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && 813 (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) 814 continue; 815 816 /* There should be an easy way to extract an integer... */ 817 obj = &(element->package.elements[1]); 818 if (obj->type != ACPI_TYPE_INTEGER) 819 continue; 820 821 cx.type = obj->integer.value; 822 /* 823 * Some buggy BIOSes won't list C1 in _CST - 824 * Let acpi_processor_get_power_info_default() handle them later 825 */ 826 if (i == 1 && cx.type != ACPI_STATE_C1) 827 current_count++; 828 829 cx.address = reg->address; 830 cx.index = current_count + 1; 831 832 cx.space_id = ACPI_CSTATE_SYSTEMIO; 833 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 834 if (acpi_processor_ffh_cstate_probe 835 (pr->id, &cx, reg) == 0) { 836 cx.space_id = ACPI_CSTATE_FFH; 837 } else if (cx.type != ACPI_STATE_C1) { 838 /* 839 * C1 is a special case where FIXED_HARDWARE 840 * can be handled in non-MWAIT way as well. 841 * In that case, save this _CST entry info. 842 * That is, we retain space_id of SYSTEM_IO for 843 * halt based C1. 844 * Otherwise, ignore this info and continue. 845 */ 846 continue; 847 } 848 } 849 850 obj = &(element->package.elements[2]); 851 if (obj->type != ACPI_TYPE_INTEGER) 852 continue; 853 854 cx.latency = obj->integer.value; 855 856 obj = &(element->package.elements[3]); 857 if (obj->type != ACPI_TYPE_INTEGER) 858 continue; 859 860 cx.power = obj->integer.value; 861 862 current_count++; 863 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); 864 865 /* 866 * We support total ACPI_PROCESSOR_MAX_POWER - 1 867 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) 868 */ 869 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { 870 printk(KERN_WARNING 871 "Limiting number of power states to max (%d)\n", 872 ACPI_PROCESSOR_MAX_POWER); 873 printk(KERN_WARNING 874 "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 875 break; 876 } 877 } 878 879 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", 880 current_count)); 881 882 /* Validate number of power states discovered */ 883 if (current_count < 2) 884 status = -EFAULT; 885 886 end: 887 kfree(buffer.pointer); 888 889 return status; 890 } 891 892 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) 893 { 894 895 if (!cx->address) 896 return; 897 898 /* 899 * C2 latency must be less than or equal to 100 900 * microseconds. 901 */ 902 else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { 903 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 904 "latency too large [%d]\n", cx->latency)); 905 return; 906 } 907 908 /* 909 * Otherwise we've met all of our C2 requirements. 910 * Normalize the C2 latency to expidite policy 911 */ 912 cx->valid = 1; 913 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 914 915 return; 916 } 917 918 static void acpi_processor_power_verify_c3(struct acpi_processor *pr, 919 struct acpi_processor_cx *cx) 920 { 921 static int bm_check_flag; 922 923 924 if (!cx->address) 925 return; 926 927 /* 928 * C3 latency must be less than or equal to 1000 929 * microseconds. 930 */ 931 else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { 932 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 933 "latency too large [%d]\n", cx->latency)); 934 return; 935 } 936 937 /* 938 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) 939 * DMA transfers are used by any ISA device to avoid livelock. 940 * Note that we could disable Type-F DMA (as recommended by 941 * the erratum), but this is known to disrupt certain ISA 942 * devices thus we take the conservative approach. 943 */ 944 else if (errata.piix4.fdma) { 945 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 946 "C3 not supported on PIIX4 with Type-F DMA\n")); 947 return; 948 } 949 950 /* All the logic here assumes flags.bm_check is same across all CPUs */ 951 if (!bm_check_flag) { 952 /* Determine whether bm_check is needed based on CPU */ 953 acpi_processor_power_init_bm_check(&(pr->flags), pr->id); 954 bm_check_flag = pr->flags.bm_check; 955 } else { 956 pr->flags.bm_check = bm_check_flag; 957 } 958 959 if (pr->flags.bm_check) { 960 /* bus mastering control is necessary */ 961 if (!pr->flags.bm_control) { 962 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 963 "C3 support requires bus mastering control\n")); 964 return; 965 } 966 } else { 967 /* 968 * WBINVD should be set in fadt, for C3 state to be 969 * supported on when bm_check is not required. 970 */ 971 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) { 972 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 973 "Cache invalidation should work properly" 974 " for C3 to be enabled on SMP systems\n")); 975 return; 976 } 977 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 978 } 979 980 /* 981 * Otherwise we've met all of our C3 requirements. 982 * Normalize the C3 latency to expidite policy. Enable 983 * checking of bus mastering status (bm_check) so we can 984 * use this in our C3 policy 985 */ 986 cx->valid = 1; 987 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 988 989 return; 990 } 991 992 static int acpi_processor_power_verify(struct acpi_processor *pr) 993 { 994 unsigned int i; 995 unsigned int working = 0; 996 997 pr->power.timer_broadcast_on_state = INT_MAX; 998 999 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 1000 struct acpi_processor_cx *cx = &pr->power.states[i]; 1001 1002 switch (cx->type) { 1003 case ACPI_STATE_C1: 1004 cx->valid = 1; 1005 break; 1006 1007 case ACPI_STATE_C2: 1008 acpi_processor_power_verify_c2(cx); 1009 if (cx->valid) 1010 acpi_timer_check_state(i, pr, cx); 1011 break; 1012 1013 case ACPI_STATE_C3: 1014 acpi_processor_power_verify_c3(pr, cx); 1015 if (cx->valid) 1016 acpi_timer_check_state(i, pr, cx); 1017 break; 1018 } 1019 1020 if (cx->valid) 1021 working++; 1022 } 1023 1024 acpi_propagate_timer_broadcast(pr); 1025 1026 return (working); 1027 } 1028 1029 static int acpi_processor_get_power_info(struct acpi_processor *pr) 1030 { 1031 unsigned int i; 1032 int result; 1033 1034 1035 /* NOTE: the idle thread may not be running while calling 1036 * this function */ 1037 1038 /* Zero initialize all the C-states info. */ 1039 memset(pr->power.states, 0, sizeof(pr->power.states)); 1040 1041 result = acpi_processor_get_power_info_cst(pr); 1042 if (result == -ENODEV) 1043 result = acpi_processor_get_power_info_fadt(pr); 1044 1045 if (result) 1046 return result; 1047 1048 acpi_processor_get_power_info_default(pr); 1049 1050 pr->power.count = acpi_processor_power_verify(pr); 1051 1052 /* 1053 * Set Default Policy 1054 * ------------------ 1055 * Now that we know which states are supported, set the default 1056 * policy. Note that this policy can be changed dynamically 1057 * (e.g. encourage deeper sleeps to conserve battery life when 1058 * not on AC). 1059 */ 1060 result = acpi_processor_set_power_policy(pr); 1061 if (result) 1062 return result; 1063 1064 /* 1065 * if one state of type C2 or C3 is available, mark this 1066 * CPU as being "idle manageable" 1067 */ 1068 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 1069 if (pr->power.states[i].valid) { 1070 pr->power.count = i; 1071 if (pr->power.states[i].type >= ACPI_STATE_C2) 1072 pr->flags.power = 1; 1073 } 1074 } 1075 1076 return 0; 1077 } 1078 1079 int acpi_processor_cst_has_changed(struct acpi_processor *pr) 1080 { 1081 int result = 0; 1082 1083 1084 if (!pr) 1085 return -EINVAL; 1086 1087 if (nocst) { 1088 return -ENODEV; 1089 } 1090 1091 if (!pr->flags.power_setup_done) 1092 return -ENODEV; 1093 1094 /* Fall back to the default idle loop */ 1095 pm_idle = pm_idle_save; 1096 synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ 1097 1098 pr->flags.power = 0; 1099 result = acpi_processor_get_power_info(pr); 1100 if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) 1101 pm_idle = acpi_processor_idle; 1102 1103 return result; 1104 } 1105 1106 /* proc interface */ 1107 1108 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) 1109 { 1110 struct acpi_processor *pr = seq->private; 1111 unsigned int i; 1112 1113 1114 if (!pr) 1115 goto end; 1116 1117 seq_printf(seq, "active state: C%zd\n" 1118 "max_cstate: C%d\n" 1119 "bus master activity: %08x\n" 1120 "maximum allowed latency: %d usec\n", 1121 pr->power.state ? pr->power.state - pr->power.states : 0, 1122 max_cstate, (unsigned)pr->power.bm_activity, 1123 system_latency_constraint()); 1124 1125 seq_puts(seq, "states:\n"); 1126 1127 for (i = 1; i <= pr->power.count; i++) { 1128 seq_printf(seq, " %cC%d: ", 1129 (&pr->power.states[i] == 1130 pr->power.state ? '*' : ' '), i); 1131 1132 if (!pr->power.states[i].valid) { 1133 seq_puts(seq, "<not supported>\n"); 1134 continue; 1135 } 1136 1137 switch (pr->power.states[i].type) { 1138 case ACPI_STATE_C1: 1139 seq_printf(seq, "type[C1] "); 1140 break; 1141 case ACPI_STATE_C2: 1142 seq_printf(seq, "type[C2] "); 1143 break; 1144 case ACPI_STATE_C3: 1145 seq_printf(seq, "type[C3] "); 1146 break; 1147 default: 1148 seq_printf(seq, "type[--] "); 1149 break; 1150 } 1151 1152 if (pr->power.states[i].promotion.state) 1153 seq_printf(seq, "promotion[C%zd] ", 1154 (pr->power.states[i].promotion.state - 1155 pr->power.states)); 1156 else 1157 seq_puts(seq, "promotion[--] "); 1158 1159 if (pr->power.states[i].demotion.state) 1160 seq_printf(seq, "demotion[C%zd] ", 1161 (pr->power.states[i].demotion.state - 1162 pr->power.states)); 1163 else 1164 seq_puts(seq, "demotion[--] "); 1165 1166 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n", 1167 pr->power.states[i].latency, 1168 pr->power.states[i].usage, 1169 (unsigned long long)pr->power.states[i].time); 1170 } 1171 1172 end: 1173 return 0; 1174 } 1175 1176 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) 1177 { 1178 return single_open(file, acpi_processor_power_seq_show, 1179 PDE(inode)->data); 1180 } 1181 1182 static const struct file_operations acpi_processor_power_fops = { 1183 .open = acpi_processor_power_open_fs, 1184 .read = seq_read, 1185 .llseek = seq_lseek, 1186 .release = single_release, 1187 }; 1188 1189 #ifdef CONFIG_SMP 1190 static void smp_callback(void *v) 1191 { 1192 /* we already woke the CPU up, nothing more to do */ 1193 } 1194 1195 /* 1196 * This function gets called when a part of the kernel has a new latency 1197 * requirement. This means we need to get all processors out of their C-state, 1198 * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that 1199 * wakes them all right up. 1200 */ 1201 static int acpi_processor_latency_notify(struct notifier_block *b, 1202 unsigned long l, void *v) 1203 { 1204 smp_call_function(smp_callback, NULL, 0, 1); 1205 return NOTIFY_OK; 1206 } 1207 1208 static struct notifier_block acpi_processor_latency_notifier = { 1209 .notifier_call = acpi_processor_latency_notify, 1210 }; 1211 #endif 1212 1213 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, 1214 struct acpi_device *device) 1215 { 1216 acpi_status status = 0; 1217 static int first_run; 1218 struct proc_dir_entry *entry = NULL; 1219 unsigned int i; 1220 1221 1222 if (!first_run) { 1223 dmi_check_system(processor_power_dmi_table); 1224 if (max_cstate < ACPI_C_STATES_MAX) 1225 printk(KERN_NOTICE 1226 "ACPI: processor limited to max C-state %d\n", 1227 max_cstate); 1228 first_run++; 1229 #ifdef CONFIG_SMP 1230 register_latency_notifier(&acpi_processor_latency_notifier); 1231 #endif 1232 } 1233 1234 if (!pr) 1235 return -EINVAL; 1236 1237 if (acpi_gbl_FADT.cst_control && !nocst) { 1238 status = 1239 acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8); 1240 if (ACPI_FAILURE(status)) { 1241 ACPI_EXCEPTION((AE_INFO, status, 1242 "Notifying BIOS of _CST ability failed")); 1243 } 1244 } 1245 1246 acpi_processor_get_power_info(pr); 1247 1248 /* 1249 * Install the idle handler if processor power management is supported. 1250 * Note that we use previously set idle handler will be used on 1251 * platforms that only support C1. 1252 */ 1253 if ((pr->flags.power) && (!boot_option_idle_override)) { 1254 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); 1255 for (i = 1; i <= pr->power.count; i++) 1256 if (pr->power.states[i].valid) 1257 printk(" C%d[C%d]", i, 1258 pr->power.states[i].type); 1259 printk(")\n"); 1260 1261 if (pr->id == 0) { 1262 pm_idle_save = pm_idle; 1263 pm_idle = acpi_processor_idle; 1264 } 1265 } 1266 1267 /* 'power' [R] */ 1268 entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1269 S_IRUGO, acpi_device_dir(device)); 1270 if (!entry) 1271 return -EIO; 1272 else { 1273 entry->proc_fops = &acpi_processor_power_fops; 1274 entry->data = acpi_driver_data(device); 1275 entry->owner = THIS_MODULE; 1276 } 1277 1278 pr->flags.power_setup_done = 1; 1279 1280 return 0; 1281 } 1282 1283 int acpi_processor_power_exit(struct acpi_processor *pr, 1284 struct acpi_device *device) 1285 { 1286 1287 pr->flags.power_setup_done = 0; 1288 1289 if (acpi_device_dir(device)) 1290 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1291 acpi_device_dir(device)); 1292 1293 /* Unregister the idle handler when processor #0 is removed. */ 1294 if (pr->id == 0) { 1295 pm_idle = pm_idle_save; 1296 1297 /* 1298 * We are about to unload the current idle thread pm callback 1299 * (pm_idle), Wait for all processors to update cached/local 1300 * copies of pm_idle before proceeding. 1301 */ 1302 cpu_idle_wait(); 1303 #ifdef CONFIG_SMP 1304 unregister_latency_notifier(&acpi_processor_latency_notifier); 1305 #endif 1306 } 1307 1308 return 0; 1309 } 1310