1 /* 2 * processor_idle - idle state submodule to the ACPI processor driver 3 * 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 6 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 8 * - Added processor hotplug support 9 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 10 * - Added support for C3 on SMP 11 * 12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or (at 17 * your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, but 20 * WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License along 25 * with this program; if not, write to the Free Software Foundation, Inc., 26 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 27 * 28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 */ 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/init.h> 34 #include <linux/cpufreq.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/acpi.h> 38 #include <linux/dmi.h> 39 #include <linux/moduleparam.h> 40 #include <linux/sched.h> /* need_resched() */ 41 #include <linux/latency.h> 42 #include <linux/clockchips.h> 43 44 /* 45 * Include the apic definitions for x86 to have the APIC timer related defines 46 * available also for UP (on SMP it gets magically included via linux/smp.h). 47 * asm/acpi.h is not an option, as it would require more include magic. Also 48 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera. 49 */ 50 #ifdef CONFIG_X86 51 #include <asm/apic.h> 52 #endif 53 54 /* 55 * Include the apic definitions for x86 to have the APIC timer related defines 56 * available also for UP (on SMP it gets magically included via linux/smp.h). 57 */ 58 #ifdef CONFIG_X86 59 #include <asm/apic.h> 60 #endif 61 62 #include <asm/io.h> 63 #include <asm/uaccess.h> 64 65 #include <acpi/acpi_bus.h> 66 #include <acpi/processor.h> 67 68 #define ACPI_PROCESSOR_COMPONENT 0x01000000 69 #define ACPI_PROCESSOR_CLASS "processor" 70 #define _COMPONENT ACPI_PROCESSOR_COMPONENT 71 ACPI_MODULE_NAME("processor_idle"); 72 #define ACPI_PROCESSOR_FILE_POWER "power" 73 #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) 74 #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 75 #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 76 static void (*pm_idle_save) (void) __read_mostly; 77 module_param(max_cstate, uint, 0644); 78 79 static unsigned int nocst __read_mostly; 80 module_param(nocst, uint, 0000); 81 82 /* 83 * bm_history -- bit-mask with a bit per jiffy of bus-master activity 84 * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms 85 * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms 86 * 100 HZ: 0x0000000F: 4 jiffies = 40ms 87 * reduce history for more aggressive entry into C3 88 */ 89 static unsigned int bm_history __read_mostly = 90 (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); 91 module_param(bm_history, uint, 0644); 92 /* -------------------------------------------------------------------------- 93 Power Management 94 -------------------------------------------------------------------------- */ 95 96 /* 97 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. 98 * For now disable this. Probably a bug somewhere else. 99 * 100 * To skip this limit, boot/load with a large max_cstate limit. 101 */ 102 static int set_max_cstate(struct dmi_system_id *id) 103 { 104 if (max_cstate > ACPI_PROCESSOR_MAX_POWER) 105 return 0; 106 107 printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." 108 " Override with \"processor.max_cstate=%d\"\n", id->ident, 109 (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 110 111 max_cstate = (long)id->driver_data; 112 113 return 0; 114 } 115 116 /* Actually this shouldn't be __cpuinitdata, would be better to fix the 117 callers to only run once -AK */ 118 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { 119 { set_max_cstate, "IBM ThinkPad R40e", { 120 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 121 DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1}, 122 { set_max_cstate, "IBM ThinkPad R40e", { 123 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 124 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1}, 125 { set_max_cstate, "IBM ThinkPad R40e", { 126 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 127 DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1}, 128 { set_max_cstate, "IBM ThinkPad R40e", { 129 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 130 DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1}, 131 { set_max_cstate, "IBM ThinkPad R40e", { 132 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 133 DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1}, 134 { set_max_cstate, "IBM ThinkPad R40e", { 135 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 136 DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1}, 137 { set_max_cstate, "IBM ThinkPad R40e", { 138 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 139 DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1}, 140 { set_max_cstate, "IBM ThinkPad R40e", { 141 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 142 DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1}, 143 { set_max_cstate, "IBM ThinkPad R40e", { 144 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 145 DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1}, 146 { set_max_cstate, "IBM ThinkPad R40e", { 147 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 148 DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1}, 149 { set_max_cstate, "IBM ThinkPad R40e", { 150 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 151 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1}, 152 { set_max_cstate, "IBM ThinkPad R40e", { 153 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 154 DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1}, 155 { set_max_cstate, "IBM ThinkPad R40e", { 156 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 157 DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1}, 158 { set_max_cstate, "IBM ThinkPad R40e", { 159 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 160 DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1}, 161 { set_max_cstate, "IBM ThinkPad R40e", { 162 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 163 DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1}, 164 { set_max_cstate, "IBM ThinkPad R40e", { 165 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 166 DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1}, 167 { set_max_cstate, "Medion 41700", { 168 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 169 DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1}, 170 { set_max_cstate, "Clevo 5600D", { 171 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 172 DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, 173 (void *)2}, 174 {}, 175 }; 176 177 static inline u32 ticks_elapsed(u32 t1, u32 t2) 178 { 179 if (t2 >= t1) 180 return (t2 - t1); 181 else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) 182 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); 183 else 184 return ((0xFFFFFFFF - t1) + t2); 185 } 186 187 static void 188 acpi_processor_power_activate(struct acpi_processor *pr, 189 struct acpi_processor_cx *new) 190 { 191 struct acpi_processor_cx *old; 192 193 if (!pr || !new) 194 return; 195 196 old = pr->power.state; 197 198 if (old) 199 old->promotion.count = 0; 200 new->demotion.count = 0; 201 202 /* Cleanup from old state. */ 203 if (old) { 204 switch (old->type) { 205 case ACPI_STATE_C3: 206 /* Disable bus master reload */ 207 if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) 208 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 209 break; 210 } 211 } 212 213 /* Prepare to use new state. */ 214 switch (new->type) { 215 case ACPI_STATE_C3: 216 /* Enable bus master reload */ 217 if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) 218 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 219 break; 220 } 221 222 pr->power.state = new; 223 224 return; 225 } 226 227 static void acpi_safe_halt(void) 228 { 229 current_thread_info()->status &= ~TS_POLLING; 230 /* 231 * TS_POLLING-cleared state must be visible before we 232 * test NEED_RESCHED: 233 */ 234 smp_mb(); 235 if (!need_resched()) 236 safe_halt(); 237 current_thread_info()->status |= TS_POLLING; 238 } 239 240 static atomic_t c3_cpu_count; 241 242 /* Common C-state entry for C2, C3, .. */ 243 static void acpi_cstate_enter(struct acpi_processor_cx *cstate) 244 { 245 if (cstate->space_id == ACPI_CSTATE_FFH) { 246 /* Call into architectural FFH based C-state */ 247 acpi_processor_ffh_cstate_enter(cstate); 248 } else { 249 int unused; 250 /* IO port based C-state */ 251 inb(cstate->address); 252 /* Dummy wait op - must do something useless after P_LVL2 read 253 because chipsets cannot guarantee that STPCLK# signal 254 gets asserted in time to freeze execution properly. */ 255 unused = inl(acpi_gbl_FADT.xpm_timer_block.address); 256 } 257 } 258 259 #ifdef ARCH_APICTIMER_STOPS_ON_C3 260 261 /* 262 * Some BIOS implementations switch to C3 in the published C2 state. 263 * This seems to be a common problem on AMD boxen, but other vendors 264 * are affected too. We pick the most conservative approach: we assume 265 * that the local APIC stops in both C2 and C3. 266 */ 267 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 268 struct acpi_processor_cx *cx) 269 { 270 struct acpi_processor_power *pwr = &pr->power; 271 272 /* 273 * Check, if one of the previous states already marked the lapic 274 * unstable 275 */ 276 if (pwr->timer_broadcast_on_state < state) 277 return; 278 279 if (cx->type >= ACPI_STATE_C2) 280 pr->power.timer_broadcast_on_state = state; 281 } 282 283 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) 284 { 285 #ifdef CONFIG_GENERIC_CLOCKEVENTS 286 unsigned long reason; 287 288 reason = pr->power.timer_broadcast_on_state < INT_MAX ? 289 CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; 290 291 clockevents_notify(reason, &pr->id); 292 #else 293 cpumask_t mask = cpumask_of_cpu(pr->id); 294 295 if (pr->power.timer_broadcast_on_state < INT_MAX) 296 on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1); 297 else 298 on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); 299 #endif 300 } 301 302 /* Power(C) State timer broadcast control */ 303 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 304 struct acpi_processor_cx *cx, 305 int broadcast) 306 { 307 #ifdef CONFIG_GENERIC_CLOCKEVENTS 308 309 int state = cx - pr->power.states; 310 311 if (state >= pr->power.timer_broadcast_on_state) { 312 unsigned long reason; 313 314 reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : 315 CLOCK_EVT_NOTIFY_BROADCAST_EXIT; 316 clockevents_notify(reason, &pr->id); 317 } 318 #endif 319 } 320 321 #else 322 323 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 324 struct acpi_processor_cx *cstate) { } 325 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } 326 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 327 struct acpi_processor_cx *cx, 328 int broadcast) 329 { 330 } 331 332 #endif 333 334 static void acpi_processor_idle(void) 335 { 336 struct acpi_processor *pr = NULL; 337 struct acpi_processor_cx *cx = NULL; 338 struct acpi_processor_cx *next_state = NULL; 339 int sleep_ticks = 0; 340 u32 t1, t2 = 0; 341 342 pr = processors[smp_processor_id()]; 343 if (!pr) 344 return; 345 346 /* 347 * Interrupts must be disabled during bus mastering calculations and 348 * for C2/C3 transitions. 349 */ 350 local_irq_disable(); 351 352 /* 353 * Check whether we truly need to go idle, or should 354 * reschedule: 355 */ 356 if (unlikely(need_resched())) { 357 local_irq_enable(); 358 return; 359 } 360 361 cx = pr->power.state; 362 if (!cx) { 363 if (pm_idle_save) 364 pm_idle_save(); 365 else 366 acpi_safe_halt(); 367 return; 368 } 369 370 /* 371 * Check BM Activity 372 * ----------------- 373 * Check for bus mastering activity (if required), record, and check 374 * for demotion. 375 */ 376 if (pr->flags.bm_check) { 377 u32 bm_status = 0; 378 unsigned long diff = jiffies - pr->power.bm_check_timestamp; 379 380 if (diff > 31) 381 diff = 31; 382 383 pr->power.bm_activity <<= diff; 384 385 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 386 if (bm_status) { 387 pr->power.bm_activity |= 0x1; 388 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 389 } 390 /* 391 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 392 * the true state of bus mastering activity; forcing us to 393 * manually check the BMIDEA bit of each IDE channel. 394 */ 395 else if (errata.piix4.bmisx) { 396 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) 397 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) 398 pr->power.bm_activity |= 0x1; 399 } 400 401 pr->power.bm_check_timestamp = jiffies; 402 403 /* 404 * If bus mastering is or was active this jiffy, demote 405 * to avoid a faulty transition. Note that the processor 406 * won't enter a low-power state during this call (to this 407 * function) but should upon the next. 408 * 409 * TBD: A better policy might be to fallback to the demotion 410 * state (use it for this quantum only) istead of 411 * demoting -- and rely on duration as our sole demotion 412 * qualification. This may, however, introduce DMA 413 * issues (e.g. floppy DMA transfer overrun/underrun). 414 */ 415 if ((pr->power.bm_activity & 0x1) && 416 cx->demotion.threshold.bm) { 417 local_irq_enable(); 418 next_state = cx->demotion.state; 419 goto end; 420 } 421 } 422 423 #ifdef CONFIG_HOTPLUG_CPU 424 /* 425 * Check for P_LVL2_UP flag before entering C2 and above on 426 * an SMP system. We do it here instead of doing it at _CST/P_LVL 427 * detection phase, to work cleanly with logical CPU hotplug. 428 */ 429 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 430 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 431 cx = &pr->power.states[ACPI_STATE_C1]; 432 #endif 433 434 /* 435 * Sleep: 436 * ------ 437 * Invoke the current Cx state to put the processor to sleep. 438 */ 439 if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { 440 current_thread_info()->status &= ~TS_POLLING; 441 /* 442 * TS_POLLING-cleared state must be visible before we 443 * test NEED_RESCHED: 444 */ 445 smp_mb(); 446 if (need_resched()) { 447 current_thread_info()->status |= TS_POLLING; 448 local_irq_enable(); 449 return; 450 } 451 } 452 453 switch (cx->type) { 454 455 case ACPI_STATE_C1: 456 /* 457 * Invoke C1. 458 * Use the appropriate idle routine, the one that would 459 * be used without acpi C-states. 460 */ 461 if (pm_idle_save) 462 pm_idle_save(); 463 else 464 acpi_safe_halt(); 465 466 /* 467 * TBD: Can't get time duration while in C1, as resumes 468 * go to an ISR rather than here. Need to instrument 469 * base interrupt handler. 470 */ 471 sleep_ticks = 0xFFFFFFFF; 472 break; 473 474 case ACPI_STATE_C2: 475 /* Get start time (ticks) */ 476 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 477 /* Invoke C2 */ 478 acpi_state_timer_broadcast(pr, cx, 1); 479 acpi_cstate_enter(cx); 480 /* Get end time (ticks) */ 481 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 482 483 #ifdef CONFIG_GENERIC_TIME 484 /* TSC halts in C2, so notify users */ 485 mark_tsc_unstable(); 486 #endif 487 /* Re-enable interrupts */ 488 local_irq_enable(); 489 current_thread_info()->status |= TS_POLLING; 490 /* Compute time (ticks) that we were actually asleep */ 491 sleep_ticks = 492 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; 493 acpi_state_timer_broadcast(pr, cx, 0); 494 break; 495 496 case ACPI_STATE_C3: 497 498 if (pr->flags.bm_check) { 499 if (atomic_inc_return(&c3_cpu_count) == 500 num_online_cpus()) { 501 /* 502 * All CPUs are trying to go to C3 503 * Disable bus master arbitration 504 */ 505 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 506 } 507 } else { 508 /* SMP with no shared cache... Invalidate cache */ 509 ACPI_FLUSH_CPU_CACHE(); 510 } 511 512 /* Get start time (ticks) */ 513 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 514 /* Invoke C3 */ 515 acpi_state_timer_broadcast(pr, cx, 1); 516 acpi_cstate_enter(cx); 517 /* Get end time (ticks) */ 518 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 519 if (pr->flags.bm_check) { 520 /* Enable bus master arbitration */ 521 atomic_dec(&c3_cpu_count); 522 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 523 } 524 525 #ifdef CONFIG_GENERIC_TIME 526 /* TSC halts in C3, so notify users */ 527 mark_tsc_unstable(); 528 #endif 529 /* Re-enable interrupts */ 530 local_irq_enable(); 531 current_thread_info()->status |= TS_POLLING; 532 /* Compute time (ticks) that we were actually asleep */ 533 sleep_ticks = 534 ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; 535 acpi_state_timer_broadcast(pr, cx, 0); 536 break; 537 538 default: 539 local_irq_enable(); 540 return; 541 } 542 cx->usage++; 543 if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0)) 544 cx->time += sleep_ticks; 545 546 next_state = pr->power.state; 547 548 #ifdef CONFIG_HOTPLUG_CPU 549 /* Don't do promotion/demotion */ 550 if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) && 551 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) { 552 next_state = cx; 553 goto end; 554 } 555 #endif 556 557 /* 558 * Promotion? 559 * ---------- 560 * Track the number of longs (time asleep is greater than threshold) 561 * and promote when the count threshold is reached. Note that bus 562 * mastering activity may prevent promotions. 563 * Do not promote above max_cstate. 564 */ 565 if (cx->promotion.state && 566 ((cx->promotion.state - pr->power.states) <= max_cstate)) { 567 if (sleep_ticks > cx->promotion.threshold.ticks && 568 cx->promotion.state->latency <= system_latency_constraint()) { 569 cx->promotion.count++; 570 cx->demotion.count = 0; 571 if (cx->promotion.count >= 572 cx->promotion.threshold.count) { 573 if (pr->flags.bm_check) { 574 if (! 575 (pr->power.bm_activity & cx-> 576 promotion.threshold.bm)) { 577 next_state = 578 cx->promotion.state; 579 goto end; 580 } 581 } else { 582 next_state = cx->promotion.state; 583 goto end; 584 } 585 } 586 } 587 } 588 589 /* 590 * Demotion? 591 * --------- 592 * Track the number of shorts (time asleep is less than time threshold) 593 * and demote when the usage threshold is reached. 594 */ 595 if (cx->demotion.state) { 596 if (sleep_ticks < cx->demotion.threshold.ticks) { 597 cx->demotion.count++; 598 cx->promotion.count = 0; 599 if (cx->demotion.count >= cx->demotion.threshold.count) { 600 next_state = cx->demotion.state; 601 goto end; 602 } 603 } 604 } 605 606 end: 607 /* 608 * Demote if current state exceeds max_cstate 609 * or if the latency of the current state is unacceptable 610 */ 611 if ((pr->power.state - pr->power.states) > max_cstate || 612 pr->power.state->latency > system_latency_constraint()) { 613 if (cx->demotion.state) 614 next_state = cx->demotion.state; 615 } 616 617 /* 618 * New Cx State? 619 * ------------- 620 * If we're going to start using a new Cx state we must clean up 621 * from the previous and prepare to use the new. 622 */ 623 if (next_state != pr->power.state) 624 acpi_processor_power_activate(pr, next_state); 625 } 626 627 static int acpi_processor_set_power_policy(struct acpi_processor *pr) 628 { 629 unsigned int i; 630 unsigned int state_is_set = 0; 631 struct acpi_processor_cx *lower = NULL; 632 struct acpi_processor_cx *higher = NULL; 633 struct acpi_processor_cx *cx; 634 635 636 if (!pr) 637 return -EINVAL; 638 639 /* 640 * This function sets the default Cx state policy (OS idle handler). 641 * Our scheme is to promote quickly to C2 but more conservatively 642 * to C3. We're favoring C2 for its characteristics of low latency 643 * (quick response), good power savings, and ability to allow bus 644 * mastering activity. Note that the Cx state policy is completely 645 * customizable and can be altered dynamically. 646 */ 647 648 /* startup state */ 649 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 650 cx = &pr->power.states[i]; 651 if (!cx->valid) 652 continue; 653 654 if (!state_is_set) 655 pr->power.state = cx; 656 state_is_set++; 657 break; 658 } 659 660 if (!state_is_set) 661 return -ENODEV; 662 663 /* demotion */ 664 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 665 cx = &pr->power.states[i]; 666 if (!cx->valid) 667 continue; 668 669 if (lower) { 670 cx->demotion.state = lower; 671 cx->demotion.threshold.ticks = cx->latency_ticks; 672 cx->demotion.threshold.count = 1; 673 if (cx->type == ACPI_STATE_C3) 674 cx->demotion.threshold.bm = bm_history; 675 } 676 677 lower = cx; 678 } 679 680 /* promotion */ 681 for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { 682 cx = &pr->power.states[i]; 683 if (!cx->valid) 684 continue; 685 686 if (higher) { 687 cx->promotion.state = higher; 688 cx->promotion.threshold.ticks = cx->latency_ticks; 689 if (cx->type >= ACPI_STATE_C2) 690 cx->promotion.threshold.count = 4; 691 else 692 cx->promotion.threshold.count = 10; 693 if (higher->type == ACPI_STATE_C3) 694 cx->promotion.threshold.bm = bm_history; 695 } 696 697 higher = cx; 698 } 699 700 return 0; 701 } 702 703 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) 704 { 705 706 if (!pr) 707 return -EINVAL; 708 709 if (!pr->pblk) 710 return -ENODEV; 711 712 /* if info is obtained from pblk/fadt, type equals state */ 713 pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; 714 pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; 715 716 #ifndef CONFIG_HOTPLUG_CPU 717 /* 718 * Check for P_LVL2_UP flag before entering C2 and above on 719 * an SMP system. 720 */ 721 if ((num_online_cpus() > 1) && 722 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 723 return -ENODEV; 724 #endif 725 726 /* determine C2 and C3 address from pblk */ 727 pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; 728 pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; 729 730 /* determine latencies from FADT */ 731 pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; 732 pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; 733 734 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 735 "lvl2[0x%08x] lvl3[0x%08x]\n", 736 pr->power.states[ACPI_STATE_C2].address, 737 pr->power.states[ACPI_STATE_C3].address)); 738 739 return 0; 740 } 741 742 static int acpi_processor_get_power_info_default(struct acpi_processor *pr) 743 { 744 if (!pr->power.states[ACPI_STATE_C1].valid) { 745 /* set the first C-State to C1 */ 746 /* all processors need to support C1 */ 747 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; 748 pr->power.states[ACPI_STATE_C1].valid = 1; 749 } 750 /* the C0 state only exists as a filler in our array */ 751 pr->power.states[ACPI_STATE_C0].valid = 1; 752 return 0; 753 } 754 755 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) 756 { 757 acpi_status status = 0; 758 acpi_integer count; 759 int current_count; 760 int i; 761 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 762 union acpi_object *cst; 763 764 765 if (nocst) 766 return -ENODEV; 767 768 current_count = 0; 769 770 status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); 771 if (ACPI_FAILURE(status)) { 772 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); 773 return -ENODEV; 774 } 775 776 cst = buffer.pointer; 777 778 /* There must be at least 2 elements */ 779 if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { 780 printk(KERN_ERR PREFIX "not enough elements in _CST\n"); 781 status = -EFAULT; 782 goto end; 783 } 784 785 count = cst->package.elements[0].integer.value; 786 787 /* Validate number of power states. */ 788 if (count < 1 || count != cst->package.count - 1) { 789 printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); 790 status = -EFAULT; 791 goto end; 792 } 793 794 /* Tell driver that at least _CST is supported. */ 795 pr->flags.has_cst = 1; 796 797 for (i = 1; i <= count; i++) { 798 union acpi_object *element; 799 union acpi_object *obj; 800 struct acpi_power_register *reg; 801 struct acpi_processor_cx cx; 802 803 memset(&cx, 0, sizeof(cx)); 804 805 element = &(cst->package.elements[i]); 806 if (element->type != ACPI_TYPE_PACKAGE) 807 continue; 808 809 if (element->package.count != 4) 810 continue; 811 812 obj = &(element->package.elements[0]); 813 814 if (obj->type != ACPI_TYPE_BUFFER) 815 continue; 816 817 reg = (struct acpi_power_register *)obj->buffer.pointer; 818 819 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && 820 (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) 821 continue; 822 823 /* There should be an easy way to extract an integer... */ 824 obj = &(element->package.elements[1]); 825 if (obj->type != ACPI_TYPE_INTEGER) 826 continue; 827 828 cx.type = obj->integer.value; 829 /* 830 * Some buggy BIOSes won't list C1 in _CST - 831 * Let acpi_processor_get_power_info_default() handle them later 832 */ 833 if (i == 1 && cx.type != ACPI_STATE_C1) 834 current_count++; 835 836 cx.address = reg->address; 837 cx.index = current_count + 1; 838 839 cx.space_id = ACPI_CSTATE_SYSTEMIO; 840 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 841 if (acpi_processor_ffh_cstate_probe 842 (pr->id, &cx, reg) == 0) { 843 cx.space_id = ACPI_CSTATE_FFH; 844 } else if (cx.type != ACPI_STATE_C1) { 845 /* 846 * C1 is a special case where FIXED_HARDWARE 847 * can be handled in non-MWAIT way as well. 848 * In that case, save this _CST entry info. 849 * That is, we retain space_id of SYSTEM_IO for 850 * halt based C1. 851 * Otherwise, ignore this info and continue. 852 */ 853 continue; 854 } 855 } 856 857 obj = &(element->package.elements[2]); 858 if (obj->type != ACPI_TYPE_INTEGER) 859 continue; 860 861 cx.latency = obj->integer.value; 862 863 obj = &(element->package.elements[3]); 864 if (obj->type != ACPI_TYPE_INTEGER) 865 continue; 866 867 cx.power = obj->integer.value; 868 869 current_count++; 870 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); 871 872 /* 873 * We support total ACPI_PROCESSOR_MAX_POWER - 1 874 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) 875 */ 876 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { 877 printk(KERN_WARNING 878 "Limiting number of power states to max (%d)\n", 879 ACPI_PROCESSOR_MAX_POWER); 880 printk(KERN_WARNING 881 "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 882 break; 883 } 884 } 885 886 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", 887 current_count)); 888 889 /* Validate number of power states discovered */ 890 if (current_count < 2) 891 status = -EFAULT; 892 893 end: 894 kfree(buffer.pointer); 895 896 return status; 897 } 898 899 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) 900 { 901 902 if (!cx->address) 903 return; 904 905 /* 906 * C2 latency must be less than or equal to 100 907 * microseconds. 908 */ 909 else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { 910 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 911 "latency too large [%d]\n", cx->latency)); 912 return; 913 } 914 915 /* 916 * Otherwise we've met all of our C2 requirements. 917 * Normalize the C2 latency to expidite policy 918 */ 919 cx->valid = 1; 920 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 921 922 return; 923 } 924 925 static void acpi_processor_power_verify_c3(struct acpi_processor *pr, 926 struct acpi_processor_cx *cx) 927 { 928 static int bm_check_flag; 929 930 931 if (!cx->address) 932 return; 933 934 /* 935 * C3 latency must be less than or equal to 1000 936 * microseconds. 937 */ 938 else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { 939 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 940 "latency too large [%d]\n", cx->latency)); 941 return; 942 } 943 944 /* 945 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) 946 * DMA transfers are used by any ISA device to avoid livelock. 947 * Note that we could disable Type-F DMA (as recommended by 948 * the erratum), but this is known to disrupt certain ISA 949 * devices thus we take the conservative approach. 950 */ 951 else if (errata.piix4.fdma) { 952 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 953 "C3 not supported on PIIX4 with Type-F DMA\n")); 954 return; 955 } 956 957 /* All the logic here assumes flags.bm_check is same across all CPUs */ 958 if (!bm_check_flag) { 959 /* Determine whether bm_check is needed based on CPU */ 960 acpi_processor_power_init_bm_check(&(pr->flags), pr->id); 961 bm_check_flag = pr->flags.bm_check; 962 } else { 963 pr->flags.bm_check = bm_check_flag; 964 } 965 966 if (pr->flags.bm_check) { 967 /* bus mastering control is necessary */ 968 if (!pr->flags.bm_control) { 969 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 970 "C3 support requires bus mastering control\n")); 971 return; 972 } 973 } else { 974 /* 975 * WBINVD should be set in fadt, for C3 state to be 976 * supported on when bm_check is not required. 977 */ 978 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) { 979 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 980 "Cache invalidation should work properly" 981 " for C3 to be enabled on SMP systems\n")); 982 return; 983 } 984 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 985 } 986 987 /* 988 * Otherwise we've met all of our C3 requirements. 989 * Normalize the C3 latency to expidite policy. Enable 990 * checking of bus mastering status (bm_check) so we can 991 * use this in our C3 policy 992 */ 993 cx->valid = 1; 994 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 995 996 return; 997 } 998 999 static int acpi_processor_power_verify(struct acpi_processor *pr) 1000 { 1001 unsigned int i; 1002 unsigned int working = 0; 1003 1004 pr->power.timer_broadcast_on_state = INT_MAX; 1005 1006 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 1007 struct acpi_processor_cx *cx = &pr->power.states[i]; 1008 1009 switch (cx->type) { 1010 case ACPI_STATE_C1: 1011 cx->valid = 1; 1012 break; 1013 1014 case ACPI_STATE_C2: 1015 acpi_processor_power_verify_c2(cx); 1016 if (cx->valid) 1017 acpi_timer_check_state(i, pr, cx); 1018 break; 1019 1020 case ACPI_STATE_C3: 1021 acpi_processor_power_verify_c3(pr, cx); 1022 if (cx->valid) 1023 acpi_timer_check_state(i, pr, cx); 1024 break; 1025 } 1026 1027 if (cx->valid) 1028 working++; 1029 } 1030 1031 acpi_propagate_timer_broadcast(pr); 1032 1033 return (working); 1034 } 1035 1036 static int acpi_processor_get_power_info(struct acpi_processor *pr) 1037 { 1038 unsigned int i; 1039 int result; 1040 1041 1042 /* NOTE: the idle thread may not be running while calling 1043 * this function */ 1044 1045 /* Zero initialize all the C-states info. */ 1046 memset(pr->power.states, 0, sizeof(pr->power.states)); 1047 1048 result = acpi_processor_get_power_info_cst(pr); 1049 if (result == -ENODEV) 1050 result = acpi_processor_get_power_info_fadt(pr); 1051 1052 if (result) 1053 return result; 1054 1055 acpi_processor_get_power_info_default(pr); 1056 1057 pr->power.count = acpi_processor_power_verify(pr); 1058 1059 /* 1060 * Set Default Policy 1061 * ------------------ 1062 * Now that we know which states are supported, set the default 1063 * policy. Note that this policy can be changed dynamically 1064 * (e.g. encourage deeper sleeps to conserve battery life when 1065 * not on AC). 1066 */ 1067 result = acpi_processor_set_power_policy(pr); 1068 if (result) 1069 return result; 1070 1071 /* 1072 * if one state of type C2 or C3 is available, mark this 1073 * CPU as being "idle manageable" 1074 */ 1075 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 1076 if (pr->power.states[i].valid) { 1077 pr->power.count = i; 1078 if (pr->power.states[i].type >= ACPI_STATE_C2) 1079 pr->flags.power = 1; 1080 } 1081 } 1082 1083 return 0; 1084 } 1085 1086 int acpi_processor_cst_has_changed(struct acpi_processor *pr) 1087 { 1088 int result = 0; 1089 1090 1091 if (!pr) 1092 return -EINVAL; 1093 1094 if (nocst) { 1095 return -ENODEV; 1096 } 1097 1098 if (!pr->flags.power_setup_done) 1099 return -ENODEV; 1100 1101 /* Fall back to the default idle loop */ 1102 pm_idle = pm_idle_save; 1103 synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ 1104 1105 pr->flags.power = 0; 1106 result = acpi_processor_get_power_info(pr); 1107 if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) 1108 pm_idle = acpi_processor_idle; 1109 1110 return result; 1111 } 1112 1113 /* proc interface */ 1114 1115 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) 1116 { 1117 struct acpi_processor *pr = seq->private; 1118 unsigned int i; 1119 1120 1121 if (!pr) 1122 goto end; 1123 1124 seq_printf(seq, "active state: C%zd\n" 1125 "max_cstate: C%d\n" 1126 "bus master activity: %08x\n" 1127 "maximum allowed latency: %d usec\n", 1128 pr->power.state ? pr->power.state - pr->power.states : 0, 1129 max_cstate, (unsigned)pr->power.bm_activity, 1130 system_latency_constraint()); 1131 1132 seq_puts(seq, "states:\n"); 1133 1134 for (i = 1; i <= pr->power.count; i++) { 1135 seq_printf(seq, " %cC%d: ", 1136 (&pr->power.states[i] == 1137 pr->power.state ? '*' : ' '), i); 1138 1139 if (!pr->power.states[i].valid) { 1140 seq_puts(seq, "<not supported>\n"); 1141 continue; 1142 } 1143 1144 switch (pr->power.states[i].type) { 1145 case ACPI_STATE_C1: 1146 seq_printf(seq, "type[C1] "); 1147 break; 1148 case ACPI_STATE_C2: 1149 seq_printf(seq, "type[C2] "); 1150 break; 1151 case ACPI_STATE_C3: 1152 seq_printf(seq, "type[C3] "); 1153 break; 1154 default: 1155 seq_printf(seq, "type[--] "); 1156 break; 1157 } 1158 1159 if (pr->power.states[i].promotion.state) 1160 seq_printf(seq, "promotion[C%zd] ", 1161 (pr->power.states[i].promotion.state - 1162 pr->power.states)); 1163 else 1164 seq_puts(seq, "promotion[--] "); 1165 1166 if (pr->power.states[i].demotion.state) 1167 seq_printf(seq, "demotion[C%zd] ", 1168 (pr->power.states[i].demotion.state - 1169 pr->power.states)); 1170 else 1171 seq_puts(seq, "demotion[--] "); 1172 1173 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n", 1174 pr->power.states[i].latency, 1175 pr->power.states[i].usage, 1176 (unsigned long long)pr->power.states[i].time); 1177 } 1178 1179 end: 1180 return 0; 1181 } 1182 1183 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) 1184 { 1185 return single_open(file, acpi_processor_power_seq_show, 1186 PDE(inode)->data); 1187 } 1188 1189 static const struct file_operations acpi_processor_power_fops = { 1190 .open = acpi_processor_power_open_fs, 1191 .read = seq_read, 1192 .llseek = seq_lseek, 1193 .release = single_release, 1194 }; 1195 1196 #ifdef CONFIG_SMP 1197 static void smp_callback(void *v) 1198 { 1199 /* we already woke the CPU up, nothing more to do */ 1200 } 1201 1202 /* 1203 * This function gets called when a part of the kernel has a new latency 1204 * requirement. This means we need to get all processors out of their C-state, 1205 * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that 1206 * wakes them all right up. 1207 */ 1208 static int acpi_processor_latency_notify(struct notifier_block *b, 1209 unsigned long l, void *v) 1210 { 1211 smp_call_function(smp_callback, NULL, 0, 1); 1212 return NOTIFY_OK; 1213 } 1214 1215 static struct notifier_block acpi_processor_latency_notifier = { 1216 .notifier_call = acpi_processor_latency_notify, 1217 }; 1218 #endif 1219 1220 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, 1221 struct acpi_device *device) 1222 { 1223 acpi_status status = 0; 1224 static int first_run; 1225 struct proc_dir_entry *entry = NULL; 1226 unsigned int i; 1227 1228 1229 if (!first_run) { 1230 dmi_check_system(processor_power_dmi_table); 1231 if (max_cstate < ACPI_C_STATES_MAX) 1232 printk(KERN_NOTICE 1233 "ACPI: processor limited to max C-state %d\n", 1234 max_cstate); 1235 first_run++; 1236 #ifdef CONFIG_SMP 1237 register_latency_notifier(&acpi_processor_latency_notifier); 1238 #endif 1239 } 1240 1241 if (!pr) 1242 return -EINVAL; 1243 1244 if (acpi_gbl_FADT.cst_control && !nocst) { 1245 status = 1246 acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8); 1247 if (ACPI_FAILURE(status)) { 1248 ACPI_EXCEPTION((AE_INFO, status, 1249 "Notifying BIOS of _CST ability failed")); 1250 } 1251 } 1252 1253 acpi_processor_get_power_info(pr); 1254 1255 /* 1256 * Install the idle handler if processor power management is supported. 1257 * Note that we use previously set idle handler will be used on 1258 * platforms that only support C1. 1259 */ 1260 if ((pr->flags.power) && (!boot_option_idle_override)) { 1261 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); 1262 for (i = 1; i <= pr->power.count; i++) 1263 if (pr->power.states[i].valid) 1264 printk(" C%d[C%d]", i, 1265 pr->power.states[i].type); 1266 printk(")\n"); 1267 1268 if (pr->id == 0) { 1269 pm_idle_save = pm_idle; 1270 pm_idle = acpi_processor_idle; 1271 } 1272 } 1273 1274 /* 'power' [R] */ 1275 entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1276 S_IRUGO, acpi_device_dir(device)); 1277 if (!entry) 1278 return -EIO; 1279 else { 1280 entry->proc_fops = &acpi_processor_power_fops; 1281 entry->data = acpi_driver_data(device); 1282 entry->owner = THIS_MODULE; 1283 } 1284 1285 pr->flags.power_setup_done = 1; 1286 1287 return 0; 1288 } 1289 1290 int acpi_processor_power_exit(struct acpi_processor *pr, 1291 struct acpi_device *device) 1292 { 1293 1294 pr->flags.power_setup_done = 0; 1295 1296 if (acpi_device_dir(device)) 1297 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1298 acpi_device_dir(device)); 1299 1300 /* Unregister the idle handler when processor #0 is removed. */ 1301 if (pr->id == 0) { 1302 pm_idle = pm_idle_save; 1303 1304 /* 1305 * We are about to unload the current idle thread pm callback 1306 * (pm_idle), Wait for all processors to update cached/local 1307 * copies of pm_idle before proceeding. 1308 */ 1309 cpu_idle_wait(); 1310 #ifdef CONFIG_SMP 1311 unregister_latency_notifier(&acpi_processor_latency_notifier); 1312 #endif 1313 } 1314 1315 return 0; 1316 } 1317