xref: /linux/drivers/acpi/processor_idle.c (revision 20d0021394c1b070bf04b22c5bc8fdb437edd4c5)
1 /*
2  * processor_idle - idle state submodule to the ACPI processor driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2004       Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8  *  			- Added processor hotplug support
9  *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
10  *  			- Added support for C3 on SMP
11  *
12  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13  *
14  *  This program is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or (at
17  *  your option) any later version.
18  *
19  *  This program is distributed in the hope that it will be useful, but
20  *  WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  *  General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License along
25  *  with this program; if not, write to the Free Software Foundation, Inc.,
26  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27  *
28  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29  */
30 
31 #include <linux/kernel.h>
32 #include <linux/module.h>
33 #include <linux/init.h>
34 #include <linux/cpufreq.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/acpi.h>
38 #include <linux/dmi.h>
39 #include <linux/moduleparam.h>
40 
41 #include <asm/io.h>
42 #include <asm/uaccess.h>
43 
44 #include <acpi/acpi_bus.h>
45 #include <acpi/processor.h>
46 
47 #define ACPI_PROCESSOR_COMPONENT        0x01000000
48 #define ACPI_PROCESSOR_CLASS            "processor"
49 #define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
50 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
51 ACPI_MODULE_NAME                ("acpi_processor")
52 
53 #define ACPI_PROCESSOR_FILE_POWER	"power"
54 
55 #define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
56 #define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
57 #define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
58 
59 static void (*pm_idle_save)(void);
60 module_param(max_cstate, uint, 0644);
61 
62 static unsigned int nocst = 0;
63 module_param(nocst, uint, 0000);
64 
65 /*
66  * bm_history -- bit-mask with a bit per jiffy of bus-master activity
67  * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
68  * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
69  * 100 HZ: 0x0000000F: 4 jiffies = 40ms
70  * reduce history for more aggressive entry into C3
71  */
72 static unsigned int bm_history = (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
73 module_param(bm_history, uint, 0644);
74 /* --------------------------------------------------------------------------
75                                 Power Management
76    -------------------------------------------------------------------------- */
77 
78 /*
79  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
80  * For now disable this. Probably a bug somewhere else.
81  *
82  * To skip this limit, boot/load with a large max_cstate limit.
83  */
84 static int no_c2c3(struct dmi_system_id *id)
85 {
86 	if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
87 		return 0;
88 
89 	printk(KERN_NOTICE PREFIX "%s detected - C2,C3 disabled."
90 		" Override with \"processor.max_cstate=%d\"\n", id->ident,
91 	       ACPI_PROCESSOR_MAX_POWER + 1);
92 
93 	max_cstate = 1;
94 
95 	return 0;
96 }
97 
98 
99 
100 
101 static struct dmi_system_id __initdata processor_power_dmi_table[] = {
102 	{ no_c2c3, "IBM ThinkPad R40e", {
103 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
104 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }},
105 	{ no_c2c3, "Medion 41700", {
106 	  DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
107 	  DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J") }},
108 	{},
109 };
110 
111 
112 static inline u32
113 ticks_elapsed (
114 	u32			t1,
115 	u32			t2)
116 {
117 	if (t2 >= t1)
118 		return (t2 - t1);
119 	else if (!acpi_fadt.tmr_val_ext)
120 		return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
121 	else
122 		return ((0xFFFFFFFF - t1) + t2);
123 }
124 
125 
126 static void
127 acpi_processor_power_activate (
128 	struct acpi_processor	*pr,
129 	struct acpi_processor_cx  *new)
130 {
131 	struct acpi_processor_cx  *old;
132 
133 	if (!pr || !new)
134 		return;
135 
136 	old = pr->power.state;
137 
138 	if (old)
139 		old->promotion.count = 0;
140  	new->demotion.count = 0;
141 
142 	/* Cleanup from old state. */
143 	if (old) {
144 		switch (old->type) {
145 		case ACPI_STATE_C3:
146 			/* Disable bus master reload */
147 			if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
148 				acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, ACPI_MTX_DO_NOT_LOCK);
149 			break;
150 		}
151 	}
152 
153 	/* Prepare to use new state. */
154 	switch (new->type) {
155 	case ACPI_STATE_C3:
156 		/* Enable bus master reload */
157 		if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
158 			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1, ACPI_MTX_DO_NOT_LOCK);
159 		break;
160 	}
161 
162 	pr->power.state = new;
163 
164 	return;
165 }
166 
167 
168 static atomic_t 	c3_cpu_count;
169 
170 
171 static void acpi_processor_idle (void)
172 {
173 	struct acpi_processor	*pr = NULL;
174 	struct acpi_processor_cx *cx = NULL;
175 	struct acpi_processor_cx *next_state = NULL;
176 	int			sleep_ticks = 0;
177 	u32			t1, t2 = 0;
178 
179 	pr = processors[raw_smp_processor_id()];
180 	if (!pr)
181 		return;
182 
183 	/*
184 	 * Interrupts must be disabled during bus mastering calculations and
185 	 * for C2/C3 transitions.
186 	 */
187 	local_irq_disable();
188 
189 	/*
190 	 * Check whether we truly need to go idle, or should
191 	 * reschedule:
192 	 */
193 	if (unlikely(need_resched())) {
194 		local_irq_enable();
195 		return;
196 	}
197 
198 	cx = pr->power.state;
199 	if (!cx)
200 		goto easy_out;
201 
202 	/*
203 	 * Check BM Activity
204 	 * -----------------
205 	 * Check for bus mastering activity (if required), record, and check
206 	 * for demotion.
207 	 */
208 	if (pr->flags.bm_check) {
209 		u32		bm_status = 0;
210 		unsigned long	diff = jiffies - pr->power.bm_check_timestamp;
211 
212 		if (diff > 32)
213 			diff = 32;
214 
215 		while (diff) {
216 			/* if we didn't get called, assume there was busmaster activity */
217 			diff--;
218 			if (diff)
219 				pr->power.bm_activity |= 0x1;
220 			pr->power.bm_activity <<= 1;
221 		}
222 
223 		acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS,
224 			&bm_status, ACPI_MTX_DO_NOT_LOCK);
225 		if (bm_status) {
226 			pr->power.bm_activity++;
227 			acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS,
228 				1, ACPI_MTX_DO_NOT_LOCK);
229 		}
230 		/*
231 		 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
232 		 * the true state of bus mastering activity; forcing us to
233 		 * manually check the BMIDEA bit of each IDE channel.
234 		 */
235 		else if (errata.piix4.bmisx) {
236 			if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
237 				|| (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
238 				pr->power.bm_activity++;
239 		}
240 
241 		pr->power.bm_check_timestamp = jiffies;
242 
243 		/*
244 		 * Apply bus mastering demotion policy.  Automatically demote
245 		 * to avoid a faulty transition.  Note that the processor
246 		 * won't enter a low-power state during this call (to this
247 		 * funciton) but should upon the next.
248 		 *
249 		 * TBD: A better policy might be to fallback to the demotion
250 		 *      state (use it for this quantum only) istead of
251 		 *      demoting -- and rely on duration as our sole demotion
252 		 *      qualification.  This may, however, introduce DMA
253 		 *      issues (e.g. floppy DMA transfer overrun/underrun).
254 		 */
255 		if (pr->power.bm_activity & cx->demotion.threshold.bm) {
256 			local_irq_enable();
257 			next_state = cx->demotion.state;
258 			goto end;
259 		}
260 	}
261 
262 	cx->usage++;
263 
264 	/*
265 	 * Sleep:
266 	 * ------
267 	 * Invoke the current Cx state to put the processor to sleep.
268 	 */
269 	switch (cx->type) {
270 
271 	case ACPI_STATE_C1:
272 		/*
273 		 * Invoke C1.
274 		 * Use the appropriate idle routine, the one that would
275 		 * be used without acpi C-states.
276 		 */
277 		if (pm_idle_save)
278 			pm_idle_save();
279 		else
280 			safe_halt();
281 		/*
282                  * TBD: Can't get time duration while in C1, as resumes
283 		 *      go to an ISR rather than here.  Need to instrument
284 		 *      base interrupt handler.
285 		 */
286 		sleep_ticks = 0xFFFFFFFF;
287 		break;
288 
289 	case ACPI_STATE_C2:
290 		/* Get start time (ticks) */
291 		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
292 		/* Invoke C2 */
293 		inb(cx->address);
294 		/* Dummy op - must do something useless after P_LVL2 read */
295 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
296 		/* Get end time (ticks) */
297 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
298 		/* Re-enable interrupts */
299 		local_irq_enable();
300 		/* Compute time (ticks) that we were actually asleep */
301 		sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
302 		break;
303 
304 	case ACPI_STATE_C3:
305 
306 		if (pr->flags.bm_check) {
307 			if (atomic_inc_return(&c3_cpu_count) ==
308 					num_online_cpus()) {
309 				/*
310 				 * All CPUs are trying to go to C3
311 				 * Disable bus master arbitration
312 				 */
313 				acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
314 					ACPI_MTX_DO_NOT_LOCK);
315 			}
316 		} else {
317 			/* SMP with no shared cache... Invalidate cache  */
318 			ACPI_FLUSH_CPU_CACHE();
319 		}
320 
321 		/* Get start time (ticks) */
322 		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
323 		/* Invoke C3 */
324 		inb(cx->address);
325 		/* Dummy op - must do something useless after P_LVL3 read */
326 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
327 		/* Get end time (ticks) */
328 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
329 		if (pr->flags.bm_check) {
330 			/* Enable bus master arbitration */
331 			atomic_dec(&c3_cpu_count);
332 			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK);
333 		}
334 
335 		/* Re-enable interrupts */
336 		local_irq_enable();
337 		/* Compute time (ticks) that we were actually asleep */
338 		sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
339 		break;
340 
341 	default:
342 		local_irq_enable();
343 		return;
344 	}
345 
346 	next_state = pr->power.state;
347 
348 	/*
349 	 * Promotion?
350 	 * ----------
351 	 * Track the number of longs (time asleep is greater than threshold)
352 	 * and promote when the count threshold is reached.  Note that bus
353 	 * mastering activity may prevent promotions.
354 	 * Do not promote above max_cstate.
355 	 */
356 	if (cx->promotion.state &&
357 	    ((cx->promotion.state - pr->power.states) <= max_cstate)) {
358 		if (sleep_ticks > cx->promotion.threshold.ticks) {
359 			cx->promotion.count++;
360  			cx->demotion.count = 0;
361 			if (cx->promotion.count >= cx->promotion.threshold.count) {
362 				if (pr->flags.bm_check) {
363 					if (!(pr->power.bm_activity & cx->promotion.threshold.bm)) {
364 						next_state = cx->promotion.state;
365 						goto end;
366 					}
367 				}
368 				else {
369 					next_state = cx->promotion.state;
370 					goto end;
371 				}
372 			}
373 		}
374 	}
375 
376 	/*
377 	 * Demotion?
378 	 * ---------
379 	 * Track the number of shorts (time asleep is less than time threshold)
380 	 * and demote when the usage threshold is reached.
381 	 */
382 	if (cx->demotion.state) {
383 		if (sleep_ticks < cx->demotion.threshold.ticks) {
384 			cx->demotion.count++;
385 			cx->promotion.count = 0;
386 			if (cx->demotion.count >= cx->demotion.threshold.count) {
387 				next_state = cx->demotion.state;
388 				goto end;
389 			}
390 		}
391 	}
392 
393 end:
394 	/*
395 	 * Demote if current state exceeds max_cstate
396 	 */
397 	if ((pr->power.state - pr->power.states) > max_cstate) {
398 		if (cx->demotion.state)
399 			next_state = cx->demotion.state;
400 	}
401 
402 	/*
403 	 * New Cx State?
404 	 * -------------
405 	 * If we're going to start using a new Cx state we must clean up
406 	 * from the previous and prepare to use the new.
407 	 */
408 	if (next_state != pr->power.state)
409 		acpi_processor_power_activate(pr, next_state);
410 
411 	return;
412 
413  easy_out:
414 	/* do C1 instead of busy loop */
415 	if (pm_idle_save)
416 		pm_idle_save();
417 	else
418 		safe_halt();
419 	return;
420 }
421 
422 
423 static int
424 acpi_processor_set_power_policy (
425 	struct acpi_processor	*pr)
426 {
427 	unsigned int i;
428 	unsigned int state_is_set = 0;
429 	struct acpi_processor_cx *lower = NULL;
430 	struct acpi_processor_cx *higher = NULL;
431 	struct acpi_processor_cx *cx;
432 
433  	ACPI_FUNCTION_TRACE("acpi_processor_set_power_policy");
434 
435 	if (!pr)
436 		return_VALUE(-EINVAL);
437 
438 	/*
439 	 * This function sets the default Cx state policy (OS idle handler).
440 	 * Our scheme is to promote quickly to C2 but more conservatively
441 	 * to C3.  We're favoring C2  for its characteristics of low latency
442 	 * (quick response), good power savings, and ability to allow bus
443 	 * mastering activity.  Note that the Cx state policy is completely
444 	 * customizable and can be altered dynamically.
445 	 */
446 
447 	/* startup state */
448 	for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
449 		cx = &pr->power.states[i];
450 		if (!cx->valid)
451 			continue;
452 
453 		if (!state_is_set)
454 			pr->power.state = cx;
455 		state_is_set++;
456 		break;
457  	}
458 
459 	if (!state_is_set)
460 		return_VALUE(-ENODEV);
461 
462 	/* demotion */
463 	for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
464 		cx = &pr->power.states[i];
465 		if (!cx->valid)
466 			continue;
467 
468 		if (lower) {
469 			cx->demotion.state = lower;
470 			cx->demotion.threshold.ticks = cx->latency_ticks;
471 			cx->demotion.threshold.count = 1;
472 			if (cx->type == ACPI_STATE_C3)
473 				cx->demotion.threshold.bm = bm_history;
474 		}
475 
476 		lower = cx;
477 	}
478 
479 	/* promotion */
480 	for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) {
481 		cx = &pr->power.states[i];
482 		if (!cx->valid)
483 			continue;
484 
485 		if (higher) {
486 			cx->promotion.state  = higher;
487 			cx->promotion.threshold.ticks = cx->latency_ticks;
488 			if (cx->type >= ACPI_STATE_C2)
489 				cx->promotion.threshold.count = 4;
490 			else
491 				cx->promotion.threshold.count = 10;
492 			if (higher->type == ACPI_STATE_C3)
493 				cx->promotion.threshold.bm = bm_history;
494 		}
495 
496 		higher = cx;
497 	}
498 
499  	return_VALUE(0);
500 }
501 
502 
503 static int acpi_processor_get_power_info_fadt (struct acpi_processor *pr)
504 {
505 	int i;
506 
507 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_fadt");
508 
509 	if (!pr)
510 		return_VALUE(-EINVAL);
511 
512 	if (!pr->pblk)
513 		return_VALUE(-ENODEV);
514 
515 	for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++)
516 		memset(pr->power.states, 0, sizeof(struct acpi_processor_cx));
517 
518 	/* if info is obtained from pblk/fadt, type equals state */
519 	pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
520 	pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
521 	pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
522 
523 	/* the C0 state only exists as a filler in our array,
524 	 * and all processors need to support C1 */
525 	pr->power.states[ACPI_STATE_C0].valid = 1;
526 	pr->power.states[ACPI_STATE_C1].valid = 1;
527 
528 	/* determine C2 and C3 address from pblk */
529 	pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
530 	pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;
531 
532 	/* determine latencies from FADT */
533 	pr->power.states[ACPI_STATE_C2].latency = acpi_fadt.plvl2_lat;
534 	pr->power.states[ACPI_STATE_C3].latency = acpi_fadt.plvl3_lat;
535 
536 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
537 			  "lvl2[0x%08x] lvl3[0x%08x]\n",
538 			  pr->power.states[ACPI_STATE_C2].address,
539 			  pr->power.states[ACPI_STATE_C3].address));
540 
541 	return_VALUE(0);
542 }
543 
544 
545 static int acpi_processor_get_power_info_default_c1 (struct acpi_processor *pr)
546 {
547 	int i;
548 
549 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_default_c1");
550 
551 	for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++)
552 		memset(pr->power.states, 0, sizeof(struct acpi_processor_cx));
553 
554 	/* if info is obtained from pblk/fadt, type equals state */
555 	pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
556 	pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
557 	pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
558 
559 	/* the C0 state only exists as a filler in our array,
560 	 * and all processors need to support C1 */
561 	pr->power.states[ACPI_STATE_C0].valid = 1;
562 	pr->power.states[ACPI_STATE_C1].valid = 1;
563 
564 	return_VALUE(0);
565 }
566 
567 
568 static int acpi_processor_get_power_info_cst (struct acpi_processor *pr)
569 {
570 	acpi_status		status = 0;
571 	acpi_integer		count;
572 	int			i;
573 	struct acpi_buffer	buffer = {ACPI_ALLOCATE_BUFFER, NULL};
574 	union acpi_object	*cst;
575 
576 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_cst");
577 
578 	if (nocst)
579 		return_VALUE(-ENODEV);
580 
581 	pr->power.count = 0;
582 	for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++)
583 		memset(pr->power.states, 0, sizeof(struct acpi_processor_cx));
584 
585 	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
586 	if (ACPI_FAILURE(status)) {
587 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
588 		return_VALUE(-ENODEV);
589  	}
590 
591 	cst = (union acpi_object *) buffer.pointer;
592 
593 	/* There must be at least 2 elements */
594 	if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
595 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "not enough elements in _CST\n"));
596 		status = -EFAULT;
597 		goto end;
598 	}
599 
600 	count = cst->package.elements[0].integer.value;
601 
602 	/* Validate number of power states. */
603 	if (count < 1 || count != cst->package.count - 1) {
604 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "count given by _CST is not valid\n"));
605 		status = -EFAULT;
606 		goto end;
607 	}
608 
609 	/* We support up to ACPI_PROCESSOR_MAX_POWER. */
610 	if (count > ACPI_PROCESSOR_MAX_POWER) {
611 		printk(KERN_WARNING "Limiting number of power states to max (%d)\n", ACPI_PROCESSOR_MAX_POWER);
612 		printk(KERN_WARNING "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
613 		count = ACPI_PROCESSOR_MAX_POWER;
614 	}
615 
616 	/* Tell driver that at least _CST is supported. */
617 	pr->flags.has_cst = 1;
618 
619 	for (i = 1; i <= count; i++) {
620 		union acpi_object *element;
621 		union acpi_object *obj;
622 		struct acpi_power_register *reg;
623 		struct acpi_processor_cx cx;
624 
625 		memset(&cx, 0, sizeof(cx));
626 
627 		element = (union acpi_object *) &(cst->package.elements[i]);
628 		if (element->type != ACPI_TYPE_PACKAGE)
629 			continue;
630 
631 		if (element->package.count != 4)
632 			continue;
633 
634 		obj = (union acpi_object *) &(element->package.elements[0]);
635 
636 		if (obj->type != ACPI_TYPE_BUFFER)
637 			continue;
638 
639 		reg = (struct acpi_power_register *) obj->buffer.pointer;
640 
641 		if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
642 			(reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
643 			continue;
644 
645 		cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ?
646 			0 : reg->address;
647 
648 		/* There should be an easy way to extract an integer... */
649 		obj = (union acpi_object *) &(element->package.elements[1]);
650 		if (obj->type != ACPI_TYPE_INTEGER)
651 			continue;
652 
653 		cx.type = obj->integer.value;
654 
655 		if ((cx.type != ACPI_STATE_C1) &&
656 		    (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO))
657 			continue;
658 
659 		if ((cx.type < ACPI_STATE_C1) ||
660 		    (cx.type > ACPI_STATE_C3))
661 			continue;
662 
663 		obj = (union acpi_object *) &(element->package.elements[2]);
664 		if (obj->type != ACPI_TYPE_INTEGER)
665 			continue;
666 
667 		cx.latency = obj->integer.value;
668 
669 		obj = (union acpi_object *) &(element->package.elements[3]);
670 		if (obj->type != ACPI_TYPE_INTEGER)
671 			continue;
672 
673 		cx.power = obj->integer.value;
674 
675 		(pr->power.count)++;
676 		memcpy(&(pr->power.states[pr->power.count]), &cx, sizeof(cx));
677 	}
678 
679 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", pr->power.count));
680 
681 	/* Validate number of power states discovered */
682 	if (pr->power.count < 2)
683 		status = -ENODEV;
684 
685 end:
686 	acpi_os_free(buffer.pointer);
687 
688 	return_VALUE(status);
689 }
690 
691 
692 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
693 {
694 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c2");
695 
696 	if (!cx->address)
697 		return_VOID;
698 
699 	/*
700 	 * C2 latency must be less than or equal to 100
701 	 * microseconds.
702 	 */
703 	else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
704 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
705 				  "latency too large [%d]\n",
706 				  cx->latency));
707 		return_VOID;
708 	}
709 
710 	/*
711 	 * Otherwise we've met all of our C2 requirements.
712 	 * Normalize the C2 latency to expidite policy
713 	 */
714 	cx->valid = 1;
715 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
716 
717 	return_VOID;
718 }
719 
720 
721 static void acpi_processor_power_verify_c3(
722 	struct acpi_processor *pr,
723 	struct acpi_processor_cx *cx)
724 {
725 	static int bm_check_flag;
726 
727 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c3");
728 
729 	if (!cx->address)
730 		return_VOID;
731 
732 	/*
733 	 * C3 latency must be less than or equal to 1000
734 	 * microseconds.
735 	 */
736 	else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
737 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
738 				  "latency too large [%d]\n",
739 				  cx->latency));
740 		return_VOID;
741 	}
742 
743 	/*
744 	 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
745 	 * DMA transfers are used by any ISA device to avoid livelock.
746 	 * Note that we could disable Type-F DMA (as recommended by
747 	 * the erratum), but this is known to disrupt certain ISA
748 	 * devices thus we take the conservative approach.
749 	 */
750 	else if (errata.piix4.fdma) {
751 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
752 			"C3 not supported on PIIX4 with Type-F DMA\n"));
753 		return_VOID;
754 	}
755 
756 	/* All the logic here assumes flags.bm_check is same across all CPUs */
757 	if (!bm_check_flag) {
758 		/* Determine whether bm_check is needed based on CPU  */
759 		acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
760 		bm_check_flag = pr->flags.bm_check;
761 	} else {
762 		pr->flags.bm_check = bm_check_flag;
763 	}
764 
765 	if (pr->flags.bm_check) {
766 		printk("Disabling BM access before entering C3\n");
767 		/* bus mastering control is necessary */
768 		if (!pr->flags.bm_control) {
769 			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
770 			  "C3 support requires bus mastering control\n"));
771 			return_VOID;
772 		}
773 	} else {
774 		printk("Invalidating cache before entering C3\n");
775 		/*
776 		 * WBINVD should be set in fadt, for C3 state to be
777 		 * supported on when bm_check is not required.
778 		 */
779 		if (acpi_fadt.wb_invd != 1) {
780 			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
781 			  "Cache invalidation should work properly"
782 			  " for C3 to be enabled on SMP systems\n"));
783 			return_VOID;
784 		}
785 		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD,
786 				0, ACPI_MTX_DO_NOT_LOCK);
787 	}
788 
789 	/*
790 	 * Otherwise we've met all of our C3 requirements.
791 	 * Normalize the C3 latency to expidite policy.  Enable
792 	 * checking of bus mastering status (bm_check) so we can
793 	 * use this in our C3 policy
794 	 */
795 	cx->valid = 1;
796 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
797 
798 	return_VOID;
799 }
800 
801 
802 static int acpi_processor_power_verify(struct acpi_processor *pr)
803 {
804 	unsigned int i;
805 	unsigned int working = 0;
806 
807 	for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
808 		struct acpi_processor_cx *cx = &pr->power.states[i];
809 
810 		switch (cx->type) {
811 		case ACPI_STATE_C1:
812 			cx->valid = 1;
813 			break;
814 
815 		case ACPI_STATE_C2:
816 			acpi_processor_power_verify_c2(cx);
817 			break;
818 
819 		case ACPI_STATE_C3:
820 			acpi_processor_power_verify_c3(pr, cx);
821 			break;
822 		}
823 
824 		if (cx->valid)
825 			working++;
826 	}
827 
828 	return (working);
829 }
830 
831 static int acpi_processor_get_power_info (
832 	struct acpi_processor	*pr)
833 {
834 	unsigned int i;
835 	int result;
836 
837 	ACPI_FUNCTION_TRACE("acpi_processor_get_power_info");
838 
839 	/* NOTE: the idle thread may not be running while calling
840 	 * this function */
841 
842 	result = acpi_processor_get_power_info_cst(pr);
843 	if ((result) || (acpi_processor_power_verify(pr) < 2)) {
844 		result = acpi_processor_get_power_info_fadt(pr);
845 		if (result)
846 			result = acpi_processor_get_power_info_default_c1(pr);
847 	}
848 
849 	/*
850 	 * Set Default Policy
851 	 * ------------------
852 	 * Now that we know which states are supported, set the default
853 	 * policy.  Note that this policy can be changed dynamically
854 	 * (e.g. encourage deeper sleeps to conserve battery life when
855 	 * not on AC).
856 	 */
857 	result = acpi_processor_set_power_policy(pr);
858 	if (result)
859 		return_VALUE(result);
860 
861 	/*
862 	 * if one state of type C2 or C3 is available, mark this
863 	 * CPU as being "idle manageable"
864 	 */
865 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
866 		if (pr->power.states[i].valid) {
867 			pr->power.count = i;
868 			pr->flags.power = 1;
869 		}
870 	}
871 
872 	return_VALUE(0);
873 }
874 
875 int acpi_processor_cst_has_changed (struct acpi_processor *pr)
876 {
877  	int			result = 0;
878 
879 	ACPI_FUNCTION_TRACE("acpi_processor_cst_has_changed");
880 
881 	if (!pr)
882  		return_VALUE(-EINVAL);
883 
884 	if ( nocst) {
885 		return_VALUE(-ENODEV);
886 	}
887 
888 	if (!pr->flags.power_setup_done)
889 		return_VALUE(-ENODEV);
890 
891 	/* Fall back to the default idle loop */
892 	pm_idle = pm_idle_save;
893 	synchronize_sched();  /* Relies on interrupts forcing exit from idle. */
894 
895 	pr->flags.power = 0;
896 	result = acpi_processor_get_power_info(pr);
897 	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
898 		pm_idle = acpi_processor_idle;
899 
900 	return_VALUE(result);
901 }
902 
903 /* proc interface */
904 
905 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
906 {
907 	struct acpi_processor	*pr = (struct acpi_processor *)seq->private;
908 	unsigned int		i;
909 
910 	ACPI_FUNCTION_TRACE("acpi_processor_power_seq_show");
911 
912 	if (!pr)
913 		goto end;
914 
915 	seq_printf(seq, "active state:            C%zd\n"
916 			"max_cstate:              C%d\n"
917 			"bus master activity:     %08x\n",
918 			pr->power.state ? pr->power.state - pr->power.states : 0,
919 			max_cstate,
920 			(unsigned)pr->power.bm_activity);
921 
922 	seq_puts(seq, "states:\n");
923 
924 	for (i = 1; i <= pr->power.count; i++) {
925 		seq_printf(seq, "   %cC%d:                  ",
926 			(&pr->power.states[i] == pr->power.state?'*':' '), i);
927 
928 		if (!pr->power.states[i].valid) {
929 			seq_puts(seq, "<not supported>\n");
930 			continue;
931 		}
932 
933 		switch (pr->power.states[i].type) {
934 		case ACPI_STATE_C1:
935 			seq_printf(seq, "type[C1] ");
936 			break;
937 		case ACPI_STATE_C2:
938 			seq_printf(seq, "type[C2] ");
939 			break;
940 		case ACPI_STATE_C3:
941 			seq_printf(seq, "type[C3] ");
942 			break;
943 		default:
944 			seq_printf(seq, "type[--] ");
945 			break;
946 		}
947 
948 		if (pr->power.states[i].promotion.state)
949 			seq_printf(seq, "promotion[C%zd] ",
950 				(pr->power.states[i].promotion.state -
951 				 pr->power.states));
952 		else
953 			seq_puts(seq, "promotion[--] ");
954 
955 		if (pr->power.states[i].demotion.state)
956 			seq_printf(seq, "demotion[C%zd] ",
957 				(pr->power.states[i].demotion.state -
958 				 pr->power.states));
959 		else
960 			seq_puts(seq, "demotion[--] ");
961 
962 		seq_printf(seq, "latency[%03d] usage[%08d]\n",
963 			pr->power.states[i].latency,
964 			pr->power.states[i].usage);
965 	}
966 
967 end:
968 	return_VALUE(0);
969 }
970 
971 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file)
972 {
973 	return single_open(file, acpi_processor_power_seq_show,
974 						PDE(inode)->data);
975 }
976 
977 static struct file_operations acpi_processor_power_fops = {
978 	.open 		= acpi_processor_power_open_fs,
979 	.read		= seq_read,
980 	.llseek		= seq_lseek,
981 	.release	= single_release,
982 };
983 
984 int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device)
985 {
986 	acpi_status		status = 0;
987 	static int		first_run = 0;
988 	struct proc_dir_entry	*entry = NULL;
989 	unsigned int i;
990 
991 	ACPI_FUNCTION_TRACE("acpi_processor_power_init");
992 
993 	if (!first_run) {
994 		dmi_check_system(processor_power_dmi_table);
995 		if (max_cstate < ACPI_C_STATES_MAX)
996 			printk(KERN_NOTICE "ACPI: processor limited to max C-state %d\n", max_cstate);
997 		first_run++;
998 	}
999 
1000 	if (!pr)
1001 		return_VALUE(-EINVAL);
1002 
1003 	if (acpi_fadt.cst_cnt && !nocst) {
1004 		status = acpi_os_write_port(acpi_fadt.smi_cmd, acpi_fadt.cst_cnt, 8);
1005 		if (ACPI_FAILURE(status)) {
1006 			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
1007 					  "Notifying BIOS of _CST ability failed\n"));
1008 		}
1009 	}
1010 
1011 	acpi_processor_power_init_pdc(&(pr->power), pr->id);
1012 	acpi_processor_set_pdc(pr, pr->power.pdc);
1013 	acpi_processor_get_power_info(pr);
1014 
1015 	/*
1016 	 * Install the idle handler if processor power management is supported.
1017 	 * Note that we use previously set idle handler will be used on
1018 	 * platforms that only support C1.
1019 	 */
1020 	if ((pr->flags.power) && (!boot_option_idle_override)) {
1021 		printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
1022 		for (i = 1; i <= pr->power.count; i++)
1023 			if (pr->power.states[i].valid)
1024 				printk(" C%d[C%d]", i, pr->power.states[i].type);
1025 		printk(")\n");
1026 
1027 		if (pr->id == 0) {
1028 			pm_idle_save = pm_idle;
1029 			pm_idle = acpi_processor_idle;
1030 		}
1031 	}
1032 
1033 	/* 'power' [R] */
1034 	entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1035 		S_IRUGO, acpi_device_dir(device));
1036 	if (!entry)
1037 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
1038 			"Unable to create '%s' fs entry\n",
1039 			ACPI_PROCESSOR_FILE_POWER));
1040 	else {
1041 		entry->proc_fops = &acpi_processor_power_fops;
1042 		entry->data = acpi_driver_data(device);
1043 		entry->owner = THIS_MODULE;
1044 	}
1045 
1046 	pr->flags.power_setup_done = 1;
1047 
1048 	return_VALUE(0);
1049 }
1050 
1051 int acpi_processor_power_exit(struct acpi_processor *pr, struct acpi_device *device)
1052 {
1053 	ACPI_FUNCTION_TRACE("acpi_processor_power_exit");
1054 
1055 	pr->flags.power_setup_done = 0;
1056 
1057 	if (acpi_device_dir(device))
1058 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,acpi_device_dir(device));
1059 
1060 	/* Unregister the idle handler when processor #0 is removed. */
1061 	if (pr->id == 0) {
1062 		pm_idle = pm_idle_save;
1063 
1064 		/*
1065 		 * We are about to unload the current idle thread pm callback
1066 		 * (pm_idle), Wait for all processors to update cached/local
1067 		 * copies of pm_idle before proceeding.
1068 		 */
1069 		cpu_idle_wait();
1070 	}
1071 
1072 	return_VALUE(0);
1073 }
1074