1 /* 2 * processor_idle - idle state submodule to the ACPI processor driver 3 * 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 6 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 8 * - Added processor hotplug support 9 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 10 * - Added support for C3 on SMP 11 * 12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or (at 17 * your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, but 20 * WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License along 25 * with this program; if not, write to the Free Software Foundation, Inc., 26 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 27 * 28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 */ 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/init.h> 34 #include <linux/cpufreq.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/acpi.h> 38 #include <linux/dmi.h> 39 #include <linux/moduleparam.h> 40 #include <linux/sched.h> /* need_resched() */ 41 #include <linux/latency.h> 42 43 #include <asm/io.h> 44 #include <asm/uaccess.h> 45 46 #include <acpi/acpi_bus.h> 47 #include <acpi/processor.h> 48 49 #define ACPI_PROCESSOR_COMPONENT 0x01000000 50 #define ACPI_PROCESSOR_CLASS "processor" 51 #define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver" 52 #define _COMPONENT ACPI_PROCESSOR_COMPONENT 53 ACPI_MODULE_NAME("acpi_processor") 54 #define ACPI_PROCESSOR_FILE_POWER "power" 55 #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) 56 #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 57 #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 58 static void (*pm_idle_save) (void) __read_mostly; 59 module_param(max_cstate, uint, 0644); 60 61 static unsigned int nocst __read_mostly; 62 module_param(nocst, uint, 0000); 63 64 /* 65 * bm_history -- bit-mask with a bit per jiffy of bus-master activity 66 * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms 67 * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms 68 * 100 HZ: 0x0000000F: 4 jiffies = 40ms 69 * reduce history for more aggressive entry into C3 70 */ 71 static unsigned int bm_history __read_mostly = 72 (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); 73 module_param(bm_history, uint, 0644); 74 /* -------------------------------------------------------------------------- 75 Power Management 76 -------------------------------------------------------------------------- */ 77 78 /* 79 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. 80 * For now disable this. Probably a bug somewhere else. 81 * 82 * To skip this limit, boot/load with a large max_cstate limit. 83 */ 84 static int set_max_cstate(struct dmi_system_id *id) 85 { 86 if (max_cstate > ACPI_PROCESSOR_MAX_POWER) 87 return 0; 88 89 printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." 90 " Override with \"processor.max_cstate=%d\"\n", id->ident, 91 (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 92 93 max_cstate = (long)id->driver_data; 94 95 return 0; 96 } 97 98 /* Actually this shouldn't be __cpuinitdata, would be better to fix the 99 callers to only run once -AK */ 100 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { 101 { set_max_cstate, "IBM ThinkPad R40e", { 102 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 103 DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1}, 104 { set_max_cstate, "IBM ThinkPad R40e", { 105 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 106 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1}, 107 { set_max_cstate, "IBM ThinkPad R40e", { 108 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 109 DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1}, 110 { set_max_cstate, "IBM ThinkPad R40e", { 111 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 112 DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1}, 113 { set_max_cstate, "IBM ThinkPad R40e", { 114 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 115 DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1}, 116 { set_max_cstate, "IBM ThinkPad R40e", { 117 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 118 DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1}, 119 { set_max_cstate, "IBM ThinkPad R40e", { 120 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 121 DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1}, 122 { set_max_cstate, "IBM ThinkPad R40e", { 123 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 124 DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1}, 125 { set_max_cstate, "IBM ThinkPad R40e", { 126 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 127 DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1}, 128 { set_max_cstate, "IBM ThinkPad R40e", { 129 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 130 DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1}, 131 { set_max_cstate, "IBM ThinkPad R40e", { 132 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 133 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1}, 134 { set_max_cstate, "IBM ThinkPad R40e", { 135 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 136 DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1}, 137 { set_max_cstate, "IBM ThinkPad R40e", { 138 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 139 DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1}, 140 { set_max_cstate, "IBM ThinkPad R40e", { 141 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 142 DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1}, 143 { set_max_cstate, "IBM ThinkPad R40e", { 144 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 145 DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1}, 146 { set_max_cstate, "IBM ThinkPad R40e", { 147 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 148 DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1}, 149 { set_max_cstate, "Medion 41700", { 150 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 151 DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1}, 152 { set_max_cstate, "Clevo 5600D", { 153 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 154 DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, 155 (void *)2}, 156 {}, 157 }; 158 159 static inline u32 ticks_elapsed(u32 t1, u32 t2) 160 { 161 if (t2 >= t1) 162 return (t2 - t1); 163 else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) 164 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); 165 else 166 return ((0xFFFFFFFF - t1) + t2); 167 } 168 169 static void 170 acpi_processor_power_activate(struct acpi_processor *pr, 171 struct acpi_processor_cx *new) 172 { 173 struct acpi_processor_cx *old; 174 175 if (!pr || !new) 176 return; 177 178 old = pr->power.state; 179 180 if (old) 181 old->promotion.count = 0; 182 new->demotion.count = 0; 183 184 /* Cleanup from old state. */ 185 if (old) { 186 switch (old->type) { 187 case ACPI_STATE_C3: 188 /* Disable bus master reload */ 189 if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) 190 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 191 break; 192 } 193 } 194 195 /* Prepare to use new state. */ 196 switch (new->type) { 197 case ACPI_STATE_C3: 198 /* Enable bus master reload */ 199 if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) 200 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 201 break; 202 } 203 204 pr->power.state = new; 205 206 return; 207 } 208 209 static void acpi_safe_halt(void) 210 { 211 current_thread_info()->status &= ~TS_POLLING; 212 /* 213 * TS_POLLING-cleared state must be visible before we 214 * test NEED_RESCHED: 215 */ 216 smp_mb(); 217 if (!need_resched()) 218 safe_halt(); 219 current_thread_info()->status |= TS_POLLING; 220 } 221 222 static atomic_t c3_cpu_count; 223 224 /* Common C-state entry for C2, C3, .. */ 225 static void acpi_cstate_enter(struct acpi_processor_cx *cstate) 226 { 227 if (cstate->space_id == ACPI_CSTATE_FFH) { 228 /* Call into architectural FFH based C-state */ 229 acpi_processor_ffh_cstate_enter(cstate); 230 } else { 231 int unused; 232 /* IO port based C-state */ 233 inb(cstate->address); 234 /* Dummy wait op - must do something useless after P_LVL2 read 235 because chipsets cannot guarantee that STPCLK# signal 236 gets asserted in time to freeze execution properly. */ 237 unused = inl(acpi_gbl_FADT.xpm_timer_block.address); 238 } 239 } 240 241 static void acpi_processor_idle(void) 242 { 243 struct acpi_processor *pr = NULL; 244 struct acpi_processor_cx *cx = NULL; 245 struct acpi_processor_cx *next_state = NULL; 246 int sleep_ticks = 0; 247 u32 t1, t2 = 0; 248 249 pr = processors[smp_processor_id()]; 250 if (!pr) 251 return; 252 253 /* 254 * Interrupts must be disabled during bus mastering calculations and 255 * for C2/C3 transitions. 256 */ 257 local_irq_disable(); 258 259 /* 260 * Check whether we truly need to go idle, or should 261 * reschedule: 262 */ 263 if (unlikely(need_resched())) { 264 local_irq_enable(); 265 return; 266 } 267 268 cx = pr->power.state; 269 if (!cx) { 270 if (pm_idle_save) 271 pm_idle_save(); 272 else 273 acpi_safe_halt(); 274 return; 275 } 276 277 /* 278 * Check BM Activity 279 * ----------------- 280 * Check for bus mastering activity (if required), record, and check 281 * for demotion. 282 */ 283 if (pr->flags.bm_check) { 284 u32 bm_status = 0; 285 unsigned long diff = jiffies - pr->power.bm_check_timestamp; 286 287 if (diff > 31) 288 diff = 31; 289 290 pr->power.bm_activity <<= diff; 291 292 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 293 if (bm_status) { 294 pr->power.bm_activity |= 0x1; 295 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 296 } 297 /* 298 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 299 * the true state of bus mastering activity; forcing us to 300 * manually check the BMIDEA bit of each IDE channel. 301 */ 302 else if (errata.piix4.bmisx) { 303 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) 304 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) 305 pr->power.bm_activity |= 0x1; 306 } 307 308 pr->power.bm_check_timestamp = jiffies; 309 310 /* 311 * If bus mastering is or was active this jiffy, demote 312 * to avoid a faulty transition. Note that the processor 313 * won't enter a low-power state during this call (to this 314 * function) but should upon the next. 315 * 316 * TBD: A better policy might be to fallback to the demotion 317 * state (use it for this quantum only) istead of 318 * demoting -- and rely on duration as our sole demotion 319 * qualification. This may, however, introduce DMA 320 * issues (e.g. floppy DMA transfer overrun/underrun). 321 */ 322 if ((pr->power.bm_activity & 0x1) && 323 cx->demotion.threshold.bm) { 324 local_irq_enable(); 325 next_state = cx->demotion.state; 326 goto end; 327 } 328 } 329 330 #ifdef CONFIG_HOTPLUG_CPU 331 /* 332 * Check for P_LVL2_UP flag before entering C2 and above on 333 * an SMP system. We do it here instead of doing it at _CST/P_LVL 334 * detection phase, to work cleanly with logical CPU hotplug. 335 */ 336 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 337 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 338 cx = &pr->power.states[ACPI_STATE_C1]; 339 #endif 340 341 /* 342 * Sleep: 343 * ------ 344 * Invoke the current Cx state to put the processor to sleep. 345 */ 346 if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { 347 current_thread_info()->status &= ~TS_POLLING; 348 /* 349 * TS_POLLING-cleared state must be visible before we 350 * test NEED_RESCHED: 351 */ 352 smp_mb(); 353 if (need_resched()) { 354 current_thread_info()->status |= TS_POLLING; 355 local_irq_enable(); 356 return; 357 } 358 } 359 360 switch (cx->type) { 361 362 case ACPI_STATE_C1: 363 /* 364 * Invoke C1. 365 * Use the appropriate idle routine, the one that would 366 * be used without acpi C-states. 367 */ 368 if (pm_idle_save) 369 pm_idle_save(); 370 else 371 acpi_safe_halt(); 372 373 /* 374 * TBD: Can't get time duration while in C1, as resumes 375 * go to an ISR rather than here. Need to instrument 376 * base interrupt handler. 377 */ 378 sleep_ticks = 0xFFFFFFFF; 379 break; 380 381 case ACPI_STATE_C2: 382 /* Get start time (ticks) */ 383 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 384 /* Invoke C2 */ 385 acpi_cstate_enter(cx); 386 /* Get end time (ticks) */ 387 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 388 389 #ifdef CONFIG_GENERIC_TIME 390 /* TSC halts in C2, so notify users */ 391 mark_tsc_unstable(); 392 #endif 393 /* Re-enable interrupts */ 394 local_irq_enable(); 395 current_thread_info()->status |= TS_POLLING; 396 /* Compute time (ticks) that we were actually asleep */ 397 sleep_ticks = 398 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; 399 break; 400 401 case ACPI_STATE_C3: 402 403 if (pr->flags.bm_check) { 404 if (atomic_inc_return(&c3_cpu_count) == 405 num_online_cpus()) { 406 /* 407 * All CPUs are trying to go to C3 408 * Disable bus master arbitration 409 */ 410 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 411 } 412 } else { 413 /* SMP with no shared cache... Invalidate cache */ 414 ACPI_FLUSH_CPU_CACHE(); 415 } 416 417 /* Get start time (ticks) */ 418 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 419 /* Invoke C3 */ 420 acpi_cstate_enter(cx); 421 /* Get end time (ticks) */ 422 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 423 if (pr->flags.bm_check) { 424 /* Enable bus master arbitration */ 425 atomic_dec(&c3_cpu_count); 426 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 427 } 428 429 #ifdef CONFIG_GENERIC_TIME 430 /* TSC halts in C3, so notify users */ 431 mark_tsc_unstable(); 432 #endif 433 /* Re-enable interrupts */ 434 local_irq_enable(); 435 current_thread_info()->status |= TS_POLLING; 436 /* Compute time (ticks) that we were actually asleep */ 437 sleep_ticks = 438 ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; 439 break; 440 441 default: 442 local_irq_enable(); 443 return; 444 } 445 cx->usage++; 446 if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0)) 447 cx->time += sleep_ticks; 448 449 next_state = pr->power.state; 450 451 #ifdef CONFIG_HOTPLUG_CPU 452 /* Don't do promotion/demotion */ 453 if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) && 454 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) { 455 next_state = cx; 456 goto end; 457 } 458 #endif 459 460 /* 461 * Promotion? 462 * ---------- 463 * Track the number of longs (time asleep is greater than threshold) 464 * and promote when the count threshold is reached. Note that bus 465 * mastering activity may prevent promotions. 466 * Do not promote above max_cstate. 467 */ 468 if (cx->promotion.state && 469 ((cx->promotion.state - pr->power.states) <= max_cstate)) { 470 if (sleep_ticks > cx->promotion.threshold.ticks && 471 cx->promotion.state->latency <= system_latency_constraint()) { 472 cx->promotion.count++; 473 cx->demotion.count = 0; 474 if (cx->promotion.count >= 475 cx->promotion.threshold.count) { 476 if (pr->flags.bm_check) { 477 if (! 478 (pr->power.bm_activity & cx-> 479 promotion.threshold.bm)) { 480 next_state = 481 cx->promotion.state; 482 goto end; 483 } 484 } else { 485 next_state = cx->promotion.state; 486 goto end; 487 } 488 } 489 } 490 } 491 492 /* 493 * Demotion? 494 * --------- 495 * Track the number of shorts (time asleep is less than time threshold) 496 * and demote when the usage threshold is reached. 497 */ 498 if (cx->demotion.state) { 499 if (sleep_ticks < cx->demotion.threshold.ticks) { 500 cx->demotion.count++; 501 cx->promotion.count = 0; 502 if (cx->demotion.count >= cx->demotion.threshold.count) { 503 next_state = cx->demotion.state; 504 goto end; 505 } 506 } 507 } 508 509 end: 510 /* 511 * Demote if current state exceeds max_cstate 512 * or if the latency of the current state is unacceptable 513 */ 514 if ((pr->power.state - pr->power.states) > max_cstate || 515 pr->power.state->latency > system_latency_constraint()) { 516 if (cx->demotion.state) 517 next_state = cx->demotion.state; 518 } 519 520 /* 521 * New Cx State? 522 * ------------- 523 * If we're going to start using a new Cx state we must clean up 524 * from the previous and prepare to use the new. 525 */ 526 if (next_state != pr->power.state) 527 acpi_processor_power_activate(pr, next_state); 528 } 529 530 static int acpi_processor_set_power_policy(struct acpi_processor *pr) 531 { 532 unsigned int i; 533 unsigned int state_is_set = 0; 534 struct acpi_processor_cx *lower = NULL; 535 struct acpi_processor_cx *higher = NULL; 536 struct acpi_processor_cx *cx; 537 538 539 if (!pr) 540 return -EINVAL; 541 542 /* 543 * This function sets the default Cx state policy (OS idle handler). 544 * Our scheme is to promote quickly to C2 but more conservatively 545 * to C3. We're favoring C2 for its characteristics of low latency 546 * (quick response), good power savings, and ability to allow bus 547 * mastering activity. Note that the Cx state policy is completely 548 * customizable and can be altered dynamically. 549 */ 550 551 /* startup state */ 552 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 553 cx = &pr->power.states[i]; 554 if (!cx->valid) 555 continue; 556 557 if (!state_is_set) 558 pr->power.state = cx; 559 state_is_set++; 560 break; 561 } 562 563 if (!state_is_set) 564 return -ENODEV; 565 566 /* demotion */ 567 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 568 cx = &pr->power.states[i]; 569 if (!cx->valid) 570 continue; 571 572 if (lower) { 573 cx->demotion.state = lower; 574 cx->demotion.threshold.ticks = cx->latency_ticks; 575 cx->demotion.threshold.count = 1; 576 if (cx->type == ACPI_STATE_C3) 577 cx->demotion.threshold.bm = bm_history; 578 } 579 580 lower = cx; 581 } 582 583 /* promotion */ 584 for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { 585 cx = &pr->power.states[i]; 586 if (!cx->valid) 587 continue; 588 589 if (higher) { 590 cx->promotion.state = higher; 591 cx->promotion.threshold.ticks = cx->latency_ticks; 592 if (cx->type >= ACPI_STATE_C2) 593 cx->promotion.threshold.count = 4; 594 else 595 cx->promotion.threshold.count = 10; 596 if (higher->type == ACPI_STATE_C3) 597 cx->promotion.threshold.bm = bm_history; 598 } 599 600 higher = cx; 601 } 602 603 return 0; 604 } 605 606 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) 607 { 608 609 if (!pr) 610 return -EINVAL; 611 612 if (!pr->pblk) 613 return -ENODEV; 614 615 /* if info is obtained from pblk/fadt, type equals state */ 616 pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; 617 pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; 618 619 #ifndef CONFIG_HOTPLUG_CPU 620 /* 621 * Check for P_LVL2_UP flag before entering C2 and above on 622 * an SMP system. 623 */ 624 if ((num_online_cpus() > 1) && 625 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 626 return -ENODEV; 627 #endif 628 629 /* determine C2 and C3 address from pblk */ 630 pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; 631 pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; 632 633 /* determine latencies from FADT */ 634 pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; 635 pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; 636 637 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 638 "lvl2[0x%08x] lvl3[0x%08x]\n", 639 pr->power.states[ACPI_STATE_C2].address, 640 pr->power.states[ACPI_STATE_C3].address)); 641 642 return 0; 643 } 644 645 static int acpi_processor_get_power_info_default(struct acpi_processor *pr) 646 { 647 if (!pr->power.states[ACPI_STATE_C1].valid) { 648 /* set the first C-State to C1 */ 649 /* all processors need to support C1 */ 650 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; 651 pr->power.states[ACPI_STATE_C1].valid = 1; 652 } 653 /* the C0 state only exists as a filler in our array */ 654 pr->power.states[ACPI_STATE_C0].valid = 1; 655 return 0; 656 } 657 658 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) 659 { 660 acpi_status status = 0; 661 acpi_integer count; 662 int current_count; 663 int i; 664 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 665 union acpi_object *cst; 666 667 668 if (nocst) 669 return -ENODEV; 670 671 current_count = 0; 672 673 status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); 674 if (ACPI_FAILURE(status)) { 675 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); 676 return -ENODEV; 677 } 678 679 cst = buffer.pointer; 680 681 /* There must be at least 2 elements */ 682 if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { 683 printk(KERN_ERR PREFIX "not enough elements in _CST\n"); 684 status = -EFAULT; 685 goto end; 686 } 687 688 count = cst->package.elements[0].integer.value; 689 690 /* Validate number of power states. */ 691 if (count < 1 || count != cst->package.count - 1) { 692 printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); 693 status = -EFAULT; 694 goto end; 695 } 696 697 /* Tell driver that at least _CST is supported. */ 698 pr->flags.has_cst = 1; 699 700 for (i = 1; i <= count; i++) { 701 union acpi_object *element; 702 union acpi_object *obj; 703 struct acpi_power_register *reg; 704 struct acpi_processor_cx cx; 705 706 memset(&cx, 0, sizeof(cx)); 707 708 element = &(cst->package.elements[i]); 709 if (element->type != ACPI_TYPE_PACKAGE) 710 continue; 711 712 if (element->package.count != 4) 713 continue; 714 715 obj = &(element->package.elements[0]); 716 717 if (obj->type != ACPI_TYPE_BUFFER) 718 continue; 719 720 reg = (struct acpi_power_register *)obj->buffer.pointer; 721 722 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && 723 (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) 724 continue; 725 726 /* There should be an easy way to extract an integer... */ 727 obj = &(element->package.elements[1]); 728 if (obj->type != ACPI_TYPE_INTEGER) 729 continue; 730 731 cx.type = obj->integer.value; 732 /* 733 * Some buggy BIOSes won't list C1 in _CST - 734 * Let acpi_processor_get_power_info_default() handle them later 735 */ 736 if (i == 1 && cx.type != ACPI_STATE_C1) 737 current_count++; 738 739 cx.address = reg->address; 740 cx.index = current_count + 1; 741 742 cx.space_id = ACPI_CSTATE_SYSTEMIO; 743 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 744 if (acpi_processor_ffh_cstate_probe 745 (pr->id, &cx, reg) == 0) { 746 cx.space_id = ACPI_CSTATE_FFH; 747 } else if (cx.type != ACPI_STATE_C1) { 748 /* 749 * C1 is a special case where FIXED_HARDWARE 750 * can be handled in non-MWAIT way as well. 751 * In that case, save this _CST entry info. 752 * That is, we retain space_id of SYSTEM_IO for 753 * halt based C1. 754 * Otherwise, ignore this info and continue. 755 */ 756 continue; 757 } 758 } 759 760 obj = &(element->package.elements[2]); 761 if (obj->type != ACPI_TYPE_INTEGER) 762 continue; 763 764 cx.latency = obj->integer.value; 765 766 obj = &(element->package.elements[3]); 767 if (obj->type != ACPI_TYPE_INTEGER) 768 continue; 769 770 cx.power = obj->integer.value; 771 772 current_count++; 773 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); 774 775 /* 776 * We support total ACPI_PROCESSOR_MAX_POWER - 1 777 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) 778 */ 779 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { 780 printk(KERN_WARNING 781 "Limiting number of power states to max (%d)\n", 782 ACPI_PROCESSOR_MAX_POWER); 783 printk(KERN_WARNING 784 "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 785 break; 786 } 787 } 788 789 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", 790 current_count)); 791 792 /* Validate number of power states discovered */ 793 if (current_count < 2) 794 status = -EFAULT; 795 796 end: 797 kfree(buffer.pointer); 798 799 return status; 800 } 801 802 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) 803 { 804 805 if (!cx->address) 806 return; 807 808 /* 809 * C2 latency must be less than or equal to 100 810 * microseconds. 811 */ 812 else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { 813 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 814 "latency too large [%d]\n", cx->latency)); 815 return; 816 } 817 818 /* 819 * Otherwise we've met all of our C2 requirements. 820 * Normalize the C2 latency to expidite policy 821 */ 822 cx->valid = 1; 823 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 824 825 return; 826 } 827 828 static void acpi_processor_power_verify_c3(struct acpi_processor *pr, 829 struct acpi_processor_cx *cx) 830 { 831 static int bm_check_flag; 832 833 834 if (!cx->address) 835 return; 836 837 /* 838 * C3 latency must be less than or equal to 1000 839 * microseconds. 840 */ 841 else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { 842 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 843 "latency too large [%d]\n", cx->latency)); 844 return; 845 } 846 847 /* 848 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) 849 * DMA transfers are used by any ISA device to avoid livelock. 850 * Note that we could disable Type-F DMA (as recommended by 851 * the erratum), but this is known to disrupt certain ISA 852 * devices thus we take the conservative approach. 853 */ 854 else if (errata.piix4.fdma) { 855 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 856 "C3 not supported on PIIX4 with Type-F DMA\n")); 857 return; 858 } 859 860 /* All the logic here assumes flags.bm_check is same across all CPUs */ 861 if (!bm_check_flag) { 862 /* Determine whether bm_check is needed based on CPU */ 863 acpi_processor_power_init_bm_check(&(pr->flags), pr->id); 864 bm_check_flag = pr->flags.bm_check; 865 } else { 866 pr->flags.bm_check = bm_check_flag; 867 } 868 869 if (pr->flags.bm_check) { 870 /* bus mastering control is necessary */ 871 if (!pr->flags.bm_control) { 872 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 873 "C3 support requires bus mastering control\n")); 874 return; 875 } 876 } else { 877 /* 878 * WBINVD should be set in fadt, for C3 state to be 879 * supported on when bm_check is not required. 880 */ 881 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) { 882 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 883 "Cache invalidation should work properly" 884 " for C3 to be enabled on SMP systems\n")); 885 return; 886 } 887 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 888 } 889 890 /* 891 * Otherwise we've met all of our C3 requirements. 892 * Normalize the C3 latency to expidite policy. Enable 893 * checking of bus mastering status (bm_check) so we can 894 * use this in our C3 policy 895 */ 896 cx->valid = 1; 897 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 898 899 return; 900 } 901 902 static int acpi_processor_power_verify(struct acpi_processor *pr) 903 { 904 unsigned int i; 905 unsigned int working = 0; 906 907 #ifdef ARCH_APICTIMER_STOPS_ON_C3 908 int timer_broadcast = 0; 909 cpumask_t mask = cpumask_of_cpu(pr->id); 910 on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); 911 #endif 912 913 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 914 struct acpi_processor_cx *cx = &pr->power.states[i]; 915 916 switch (cx->type) { 917 case ACPI_STATE_C1: 918 cx->valid = 1; 919 break; 920 921 case ACPI_STATE_C2: 922 acpi_processor_power_verify_c2(cx); 923 #ifdef ARCH_APICTIMER_STOPS_ON_C3 924 /* Some AMD systems fake C3 as C2, but still 925 have timer troubles */ 926 if (cx->valid && 927 boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 928 timer_broadcast++; 929 #endif 930 break; 931 932 case ACPI_STATE_C3: 933 acpi_processor_power_verify_c3(pr, cx); 934 #ifdef ARCH_APICTIMER_STOPS_ON_C3 935 if (cx->valid) 936 timer_broadcast++; 937 #endif 938 break; 939 } 940 941 if (cx->valid) 942 working++; 943 } 944 945 #ifdef ARCH_APICTIMER_STOPS_ON_C3 946 if (timer_broadcast) 947 on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1); 948 #endif 949 950 return (working); 951 } 952 953 static int acpi_processor_get_power_info(struct acpi_processor *pr) 954 { 955 unsigned int i; 956 int result; 957 958 959 /* NOTE: the idle thread may not be running while calling 960 * this function */ 961 962 /* Zero initialize all the C-states info. */ 963 memset(pr->power.states, 0, sizeof(pr->power.states)); 964 965 result = acpi_processor_get_power_info_cst(pr); 966 if (result == -ENODEV) 967 result = acpi_processor_get_power_info_fadt(pr); 968 969 if (result) 970 return result; 971 972 acpi_processor_get_power_info_default(pr); 973 974 pr->power.count = acpi_processor_power_verify(pr); 975 976 /* 977 * Set Default Policy 978 * ------------------ 979 * Now that we know which states are supported, set the default 980 * policy. Note that this policy can be changed dynamically 981 * (e.g. encourage deeper sleeps to conserve battery life when 982 * not on AC). 983 */ 984 result = acpi_processor_set_power_policy(pr); 985 if (result) 986 return result; 987 988 /* 989 * if one state of type C2 or C3 is available, mark this 990 * CPU as being "idle manageable" 991 */ 992 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 993 if (pr->power.states[i].valid) { 994 pr->power.count = i; 995 if (pr->power.states[i].type >= ACPI_STATE_C2) 996 pr->flags.power = 1; 997 } 998 } 999 1000 return 0; 1001 } 1002 1003 int acpi_processor_cst_has_changed(struct acpi_processor *pr) 1004 { 1005 int result = 0; 1006 1007 1008 if (!pr) 1009 return -EINVAL; 1010 1011 if (nocst) { 1012 return -ENODEV; 1013 } 1014 1015 if (!pr->flags.power_setup_done) 1016 return -ENODEV; 1017 1018 /* Fall back to the default idle loop */ 1019 pm_idle = pm_idle_save; 1020 synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ 1021 1022 pr->flags.power = 0; 1023 result = acpi_processor_get_power_info(pr); 1024 if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) 1025 pm_idle = acpi_processor_idle; 1026 1027 return result; 1028 } 1029 1030 /* proc interface */ 1031 1032 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) 1033 { 1034 struct acpi_processor *pr = seq->private; 1035 unsigned int i; 1036 1037 1038 if (!pr) 1039 goto end; 1040 1041 seq_printf(seq, "active state: C%zd\n" 1042 "max_cstate: C%d\n" 1043 "bus master activity: %08x\n" 1044 "maximum allowed latency: %d usec\n", 1045 pr->power.state ? pr->power.state - pr->power.states : 0, 1046 max_cstate, (unsigned)pr->power.bm_activity, 1047 system_latency_constraint()); 1048 1049 seq_puts(seq, "states:\n"); 1050 1051 for (i = 1; i <= pr->power.count; i++) { 1052 seq_printf(seq, " %cC%d: ", 1053 (&pr->power.states[i] == 1054 pr->power.state ? '*' : ' '), i); 1055 1056 if (!pr->power.states[i].valid) { 1057 seq_puts(seq, "<not supported>\n"); 1058 continue; 1059 } 1060 1061 switch (pr->power.states[i].type) { 1062 case ACPI_STATE_C1: 1063 seq_printf(seq, "type[C1] "); 1064 break; 1065 case ACPI_STATE_C2: 1066 seq_printf(seq, "type[C2] "); 1067 break; 1068 case ACPI_STATE_C3: 1069 seq_printf(seq, "type[C3] "); 1070 break; 1071 default: 1072 seq_printf(seq, "type[--] "); 1073 break; 1074 } 1075 1076 if (pr->power.states[i].promotion.state) 1077 seq_printf(seq, "promotion[C%zd] ", 1078 (pr->power.states[i].promotion.state - 1079 pr->power.states)); 1080 else 1081 seq_puts(seq, "promotion[--] "); 1082 1083 if (pr->power.states[i].demotion.state) 1084 seq_printf(seq, "demotion[C%zd] ", 1085 (pr->power.states[i].demotion.state - 1086 pr->power.states)); 1087 else 1088 seq_puts(seq, "demotion[--] "); 1089 1090 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n", 1091 pr->power.states[i].latency, 1092 pr->power.states[i].usage, 1093 (unsigned long long)pr->power.states[i].time); 1094 } 1095 1096 end: 1097 return 0; 1098 } 1099 1100 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) 1101 { 1102 return single_open(file, acpi_processor_power_seq_show, 1103 PDE(inode)->data); 1104 } 1105 1106 static const struct file_operations acpi_processor_power_fops = { 1107 .open = acpi_processor_power_open_fs, 1108 .read = seq_read, 1109 .llseek = seq_lseek, 1110 .release = single_release, 1111 }; 1112 1113 #ifdef CONFIG_SMP 1114 static void smp_callback(void *v) 1115 { 1116 /* we already woke the CPU up, nothing more to do */ 1117 } 1118 1119 /* 1120 * This function gets called when a part of the kernel has a new latency 1121 * requirement. This means we need to get all processors out of their C-state, 1122 * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that 1123 * wakes them all right up. 1124 */ 1125 static int acpi_processor_latency_notify(struct notifier_block *b, 1126 unsigned long l, void *v) 1127 { 1128 smp_call_function(smp_callback, NULL, 0, 1); 1129 return NOTIFY_OK; 1130 } 1131 1132 static struct notifier_block acpi_processor_latency_notifier = { 1133 .notifier_call = acpi_processor_latency_notify, 1134 }; 1135 #endif 1136 1137 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, 1138 struct acpi_device *device) 1139 { 1140 acpi_status status = 0; 1141 static int first_run; 1142 struct proc_dir_entry *entry = NULL; 1143 unsigned int i; 1144 1145 1146 if (!first_run) { 1147 dmi_check_system(processor_power_dmi_table); 1148 if (max_cstate < ACPI_C_STATES_MAX) 1149 printk(KERN_NOTICE 1150 "ACPI: processor limited to max C-state %d\n", 1151 max_cstate); 1152 first_run++; 1153 #ifdef CONFIG_SMP 1154 register_latency_notifier(&acpi_processor_latency_notifier); 1155 #endif 1156 } 1157 1158 if (!pr) 1159 return -EINVAL; 1160 1161 if (acpi_gbl_FADT.cst_control && !nocst) { 1162 status = 1163 acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8); 1164 if (ACPI_FAILURE(status)) { 1165 ACPI_EXCEPTION((AE_INFO, status, 1166 "Notifying BIOS of _CST ability failed")); 1167 } 1168 } 1169 1170 acpi_processor_get_power_info(pr); 1171 1172 /* 1173 * Install the idle handler if processor power management is supported. 1174 * Note that we use previously set idle handler will be used on 1175 * platforms that only support C1. 1176 */ 1177 if ((pr->flags.power) && (!boot_option_idle_override)) { 1178 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); 1179 for (i = 1; i <= pr->power.count; i++) 1180 if (pr->power.states[i].valid) 1181 printk(" C%d[C%d]", i, 1182 pr->power.states[i].type); 1183 printk(")\n"); 1184 1185 if (pr->id == 0) { 1186 pm_idle_save = pm_idle; 1187 pm_idle = acpi_processor_idle; 1188 } 1189 } 1190 1191 /* 'power' [R] */ 1192 entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1193 S_IRUGO, acpi_device_dir(device)); 1194 if (!entry) 1195 return -EIO; 1196 else { 1197 entry->proc_fops = &acpi_processor_power_fops; 1198 entry->data = acpi_driver_data(device); 1199 entry->owner = THIS_MODULE; 1200 } 1201 1202 pr->flags.power_setup_done = 1; 1203 1204 return 0; 1205 } 1206 1207 int acpi_processor_power_exit(struct acpi_processor *pr, 1208 struct acpi_device *device) 1209 { 1210 1211 pr->flags.power_setup_done = 0; 1212 1213 if (acpi_device_dir(device)) 1214 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1215 acpi_device_dir(device)); 1216 1217 /* Unregister the idle handler when processor #0 is removed. */ 1218 if (pr->id == 0) { 1219 pm_idle = pm_idle_save; 1220 1221 /* 1222 * We are about to unload the current idle thread pm callback 1223 * (pm_idle), Wait for all processors to update cached/local 1224 * copies of pm_idle before proceeding. 1225 */ 1226 cpu_idle_wait(); 1227 #ifdef CONFIG_SMP 1228 unregister_latency_notifier(&acpi_processor_latency_notifier); 1229 #endif 1230 } 1231 1232 return 0; 1233 } 1234