xref: /linux/drivers/cpufreq/cpufreq.c (revision ac6a0cf6716bb46813d0161024c66c2af66e53d1)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33 						"cpufreq-core", msg)
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  */
65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67 
68 #define lock_policy_rwsem(mode, cpu)					\
69 int lock_policy_rwsem_##mode						\
70 (int cpu)								\
71 {									\
72 	int policy_cpu = per_cpu(policy_cpu, cpu);			\
73 	BUG_ON(policy_cpu == -1);					\
74 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
75 	if (unlikely(!cpu_online(cpu))) {				\
76 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
77 		return -1;						\
78 	}								\
79 									\
80 	return 0;							\
81 }
82 
83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85 
86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88 
89 void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96 
97 void unlock_policy_rwsem_write(int cpu)
98 {
99 	int policy_cpu = per_cpu(policy_cpu, cpu);
100 	BUG_ON(policy_cpu == -1);
101 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104 
105 
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy,
108 		unsigned int event);
109 static unsigned int __cpufreq_get(unsigned int cpu);
110 static void handle_update(struct work_struct *work);
111 
112 /**
113  * Two notifier lists: the "policy" list is involved in the
114  * validation process for a new CPU frequency policy; the
115  * "transition" list for kernel code that needs to handle
116  * changes to devices when the CPU clock speed changes.
117  * The mutex locks both lists.
118  */
119 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
120 static struct srcu_notifier_head cpufreq_transition_notifier_list;
121 
122 static bool init_cpufreq_transition_notifier_list_called;
123 static int __init init_cpufreq_transition_notifier_list(void)
124 {
125 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
126 	init_cpufreq_transition_notifier_list_called = true;
127 	return 0;
128 }
129 pure_initcall(init_cpufreq_transition_notifier_list);
130 
131 static LIST_HEAD(cpufreq_governor_list);
132 static DEFINE_MUTEX(cpufreq_governor_mutex);
133 
134 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
135 {
136 	struct cpufreq_policy *data;
137 	unsigned long flags;
138 
139 	if (cpu >= nr_cpu_ids)
140 		goto err_out;
141 
142 	/* get the cpufreq driver */
143 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
144 
145 	if (!cpufreq_driver)
146 		goto err_out_unlock;
147 
148 	if (!try_module_get(cpufreq_driver->owner))
149 		goto err_out_unlock;
150 
151 
152 	/* get the CPU */
153 	data = per_cpu(cpufreq_cpu_data, cpu);
154 
155 	if (!data)
156 		goto err_out_put_module;
157 
158 	if (!kobject_get(&data->kobj))
159 		goto err_out_put_module;
160 
161 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
162 	return data;
163 
164 err_out_put_module:
165 	module_put(cpufreq_driver->owner);
166 err_out_unlock:
167 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
168 err_out:
169 	return NULL;
170 }
171 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
172 
173 
174 void cpufreq_cpu_put(struct cpufreq_policy *data)
175 {
176 	kobject_put(&data->kobj);
177 	module_put(cpufreq_driver->owner);
178 }
179 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
180 
181 
182 /*********************************************************************
183  *                     UNIFIED DEBUG HELPERS                         *
184  *********************************************************************/
185 #ifdef CONFIG_CPU_FREQ_DEBUG
186 
187 /* what part(s) of the CPUfreq subsystem are debugged? */
188 static unsigned int debug;
189 
190 /* is the debug output ratelimit'ed using printk_ratelimit? User can
191  * set or modify this value.
192  */
193 static unsigned int debug_ratelimit = 1;
194 
195 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
196  * loading of a cpufreq driver, temporarily disabled when a new policy
197  * is set, and disabled upon cpufreq driver removal
198  */
199 static unsigned int disable_ratelimit = 1;
200 static DEFINE_SPINLOCK(disable_ratelimit_lock);
201 
202 static void cpufreq_debug_enable_ratelimit(void)
203 {
204 	unsigned long flags;
205 
206 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
207 	if (disable_ratelimit)
208 		disable_ratelimit--;
209 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
210 }
211 
212 static void cpufreq_debug_disable_ratelimit(void)
213 {
214 	unsigned long flags;
215 
216 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
217 	disable_ratelimit++;
218 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
219 }
220 
221 void cpufreq_debug_printk(unsigned int type, const char *prefix,
222 			const char *fmt, ...)
223 {
224 	char s[256];
225 	va_list args;
226 	unsigned int len;
227 	unsigned long flags;
228 
229 	WARN_ON(!prefix);
230 	if (type & debug) {
231 		spin_lock_irqsave(&disable_ratelimit_lock, flags);
232 		if (!disable_ratelimit && debug_ratelimit
233 					&& !printk_ratelimit()) {
234 			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
235 			return;
236 		}
237 		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
238 
239 		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
240 
241 		va_start(args, fmt);
242 		len += vsnprintf(&s[len], (256 - len), fmt, args);
243 		va_end(args);
244 
245 		printk(s);
246 
247 		WARN_ON(len < 5);
248 	}
249 }
250 EXPORT_SYMBOL(cpufreq_debug_printk);
251 
252 
253 module_param(debug, uint, 0644);
254 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
255 			" 2 to debug drivers, and 4 to debug governors.");
256 
257 module_param(debug_ratelimit, uint, 0644);
258 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
259 					" set to 0 to disable ratelimiting.");
260 
261 #else /* !CONFIG_CPU_FREQ_DEBUG */
262 
263 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
264 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
265 
266 #endif /* CONFIG_CPU_FREQ_DEBUG */
267 
268 
269 /*********************************************************************
270  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
271  *********************************************************************/
272 
273 /**
274  * adjust_jiffies - adjust the system "loops_per_jiffy"
275  *
276  * This function alters the system "loops_per_jiffy" for the clock
277  * speed change. Note that loops_per_jiffy cannot be updated on SMP
278  * systems as each CPU might be scaled differently. So, use the arch
279  * per-CPU loops_per_jiffy value wherever possible.
280  */
281 #ifndef CONFIG_SMP
282 static unsigned long l_p_j_ref;
283 static unsigned int  l_p_j_ref_freq;
284 
285 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
286 {
287 	if (ci->flags & CPUFREQ_CONST_LOOPS)
288 		return;
289 
290 	if (!l_p_j_ref_freq) {
291 		l_p_j_ref = loops_per_jiffy;
292 		l_p_j_ref_freq = ci->old;
293 		dprintk("saving %lu as reference value for loops_per_jiffy; "
294 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
295 	}
296 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
297 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
298 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
299 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
300 								ci->new);
301 		dprintk("scaling loops_per_jiffy to %lu "
302 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
303 	}
304 }
305 #else
306 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
307 {
308 	return;
309 }
310 #endif
311 
312 
313 /**
314  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
315  * on frequency transition.
316  *
317  * This function calls the transition notifiers and the "adjust_jiffies"
318  * function. It is called twice on all CPU frequency changes that have
319  * external effects.
320  */
321 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
322 {
323 	struct cpufreq_policy *policy;
324 
325 	BUG_ON(irqs_disabled());
326 
327 	freqs->flags = cpufreq_driver->flags;
328 	dprintk("notification %u of frequency transition to %u kHz\n",
329 		state, freqs->new);
330 
331 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
332 	switch (state) {
333 
334 	case CPUFREQ_PRECHANGE:
335 		/* detect if the driver reported a value as "old frequency"
336 		 * which is not equal to what the cpufreq core thinks is
337 		 * "old frequency".
338 		 */
339 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
340 			if ((policy) && (policy->cpu == freqs->cpu) &&
341 			    (policy->cur) && (policy->cur != freqs->old)) {
342 				dprintk("Warning: CPU frequency is"
343 					" %u, cpufreq assumed %u kHz.\n",
344 					freqs->old, policy->cur);
345 				freqs->old = policy->cur;
346 			}
347 		}
348 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
349 				CPUFREQ_PRECHANGE, freqs);
350 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
351 		break;
352 
353 	case CPUFREQ_POSTCHANGE:
354 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
355 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
356 				CPUFREQ_POSTCHANGE, freqs);
357 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
358 			policy->cur = freqs->new;
359 		break;
360 	}
361 }
362 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
363 
364 
365 
366 /*********************************************************************
367  *                          SYSFS INTERFACE                          *
368  *********************************************************************/
369 
370 static struct cpufreq_governor *__find_governor(const char *str_governor)
371 {
372 	struct cpufreq_governor *t;
373 
374 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
375 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
376 			return t;
377 
378 	return NULL;
379 }
380 
381 /**
382  * cpufreq_parse_governor - parse a governor string
383  */
384 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
385 				struct cpufreq_governor **governor)
386 {
387 	int err = -EINVAL;
388 
389 	if (!cpufreq_driver)
390 		goto out;
391 
392 	if (cpufreq_driver->setpolicy) {
393 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
394 			*policy = CPUFREQ_POLICY_PERFORMANCE;
395 			err = 0;
396 		} else if (!strnicmp(str_governor, "powersave",
397 						CPUFREQ_NAME_LEN)) {
398 			*policy = CPUFREQ_POLICY_POWERSAVE;
399 			err = 0;
400 		}
401 	} else if (cpufreq_driver->target) {
402 		struct cpufreq_governor *t;
403 
404 		mutex_lock(&cpufreq_governor_mutex);
405 
406 		t = __find_governor(str_governor);
407 
408 		if (t == NULL) {
409 			char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
410 								str_governor);
411 
412 			if (name) {
413 				int ret;
414 
415 				mutex_unlock(&cpufreq_governor_mutex);
416 				ret = request_module("%s", name);
417 				mutex_lock(&cpufreq_governor_mutex);
418 
419 				if (ret == 0)
420 					t = __find_governor(str_governor);
421 			}
422 
423 			kfree(name);
424 		}
425 
426 		if (t != NULL) {
427 			*governor = t;
428 			err = 0;
429 		}
430 
431 		mutex_unlock(&cpufreq_governor_mutex);
432 	}
433 out:
434 	return err;
435 }
436 
437 
438 /**
439  * cpufreq_per_cpu_attr_read() / show_##file_name() -
440  * print out cpufreq information
441  *
442  * Write out information from cpufreq_driver->policy[cpu]; object must be
443  * "unsigned int".
444  */
445 
446 #define show_one(file_name, object)			\
447 static ssize_t show_##file_name				\
448 (struct cpufreq_policy *policy, char *buf)		\
449 {							\
450 	return sprintf(buf, "%u\n", policy->object);	\
451 }
452 
453 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
454 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
455 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
456 show_one(scaling_min_freq, min);
457 show_one(scaling_max_freq, max);
458 show_one(scaling_cur_freq, cur);
459 
460 static int __cpufreq_set_policy(struct cpufreq_policy *data,
461 				struct cpufreq_policy *policy);
462 
463 /**
464  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
465  */
466 #define store_one(file_name, object)			\
467 static ssize_t store_##file_name					\
468 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
469 {									\
470 	unsigned int ret = -EINVAL;					\
471 	struct cpufreq_policy new_policy;				\
472 									\
473 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
474 	if (ret)							\
475 		return -EINVAL;						\
476 									\
477 	ret = sscanf(buf, "%u", &new_policy.object);			\
478 	if (ret != 1)							\
479 		return -EINVAL;						\
480 									\
481 	ret = __cpufreq_set_policy(policy, &new_policy);		\
482 	policy->user_policy.object = policy->object;			\
483 									\
484 	return ret ? ret : count;					\
485 }
486 
487 store_one(scaling_min_freq, min);
488 store_one(scaling_max_freq, max);
489 
490 /**
491  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
492  */
493 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
494 					char *buf)
495 {
496 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
497 	if (!cur_freq)
498 		return sprintf(buf, "<unknown>");
499 	return sprintf(buf, "%u\n", cur_freq);
500 }
501 
502 
503 /**
504  * show_scaling_governor - show the current policy for the specified CPU
505  */
506 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
507 {
508 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
509 		return sprintf(buf, "powersave\n");
510 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
511 		return sprintf(buf, "performance\n");
512 	else if (policy->governor)
513 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
514 				policy->governor->name);
515 	return -EINVAL;
516 }
517 
518 
519 /**
520  * store_scaling_governor - store policy for the specified CPU
521  */
522 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
523 					const char *buf, size_t count)
524 {
525 	unsigned int ret = -EINVAL;
526 	char	str_governor[16];
527 	struct cpufreq_policy new_policy;
528 
529 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
530 	if (ret)
531 		return ret;
532 
533 	ret = sscanf(buf, "%15s", str_governor);
534 	if (ret != 1)
535 		return -EINVAL;
536 
537 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
538 						&new_policy.governor))
539 		return -EINVAL;
540 
541 	/* Do not use cpufreq_set_policy here or the user_policy.max
542 	   will be wrongly overridden */
543 	ret = __cpufreq_set_policy(policy, &new_policy);
544 
545 	policy->user_policy.policy = policy->policy;
546 	policy->user_policy.governor = policy->governor;
547 
548 	if (ret)
549 		return ret;
550 	else
551 		return count;
552 }
553 
554 /**
555  * show_scaling_driver - show the cpufreq driver currently loaded
556  */
557 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
558 {
559 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
560 }
561 
562 /**
563  * show_scaling_available_governors - show the available CPUfreq governors
564  */
565 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
566 						char *buf)
567 {
568 	ssize_t i = 0;
569 	struct cpufreq_governor *t;
570 
571 	if (!cpufreq_driver->target) {
572 		i += sprintf(buf, "performance powersave");
573 		goto out;
574 	}
575 
576 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
577 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
578 		    - (CPUFREQ_NAME_LEN + 2)))
579 			goto out;
580 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
581 	}
582 out:
583 	i += sprintf(&buf[i], "\n");
584 	return i;
585 }
586 
587 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
588 {
589 	ssize_t i = 0;
590 	unsigned int cpu;
591 
592 	for_each_cpu(cpu, mask) {
593 		if (i)
594 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
595 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
596 		if (i >= (PAGE_SIZE - 5))
597 			break;
598 	}
599 	i += sprintf(&buf[i], "\n");
600 	return i;
601 }
602 
603 /**
604  * show_related_cpus - show the CPUs affected by each transition even if
605  * hw coordination is in use
606  */
607 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
608 {
609 	if (cpumask_empty(policy->related_cpus))
610 		return show_cpus(policy->cpus, buf);
611 	return show_cpus(policy->related_cpus, buf);
612 }
613 
614 /**
615  * show_affected_cpus - show the CPUs affected by each transition
616  */
617 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
618 {
619 	return show_cpus(policy->cpus, buf);
620 }
621 
622 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
623 					const char *buf, size_t count)
624 {
625 	unsigned int freq = 0;
626 	unsigned int ret;
627 
628 	if (!policy->governor || !policy->governor->store_setspeed)
629 		return -EINVAL;
630 
631 	ret = sscanf(buf, "%u", &freq);
632 	if (ret != 1)
633 		return -EINVAL;
634 
635 	policy->governor->store_setspeed(policy, freq);
636 
637 	return count;
638 }
639 
640 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
641 {
642 	if (!policy->governor || !policy->governor->show_setspeed)
643 		return sprintf(buf, "<unsupported>\n");
644 
645 	return policy->governor->show_setspeed(policy, buf);
646 }
647 
648 #define define_one_ro(_name) \
649 static struct freq_attr _name = \
650 __ATTR(_name, 0444, show_##_name, NULL)
651 
652 #define define_one_ro0400(_name) \
653 static struct freq_attr _name = \
654 __ATTR(_name, 0400, show_##_name, NULL)
655 
656 #define define_one_rw(_name) \
657 static struct freq_attr _name = \
658 __ATTR(_name, 0644, show_##_name, store_##_name)
659 
660 define_one_ro0400(cpuinfo_cur_freq);
661 define_one_ro(cpuinfo_min_freq);
662 define_one_ro(cpuinfo_max_freq);
663 define_one_ro(cpuinfo_transition_latency);
664 define_one_ro(scaling_available_governors);
665 define_one_ro(scaling_driver);
666 define_one_ro(scaling_cur_freq);
667 define_one_ro(related_cpus);
668 define_one_ro(affected_cpus);
669 define_one_rw(scaling_min_freq);
670 define_one_rw(scaling_max_freq);
671 define_one_rw(scaling_governor);
672 define_one_rw(scaling_setspeed);
673 
674 static struct attribute *default_attrs[] = {
675 	&cpuinfo_min_freq.attr,
676 	&cpuinfo_max_freq.attr,
677 	&cpuinfo_transition_latency.attr,
678 	&scaling_min_freq.attr,
679 	&scaling_max_freq.attr,
680 	&affected_cpus.attr,
681 	&related_cpus.attr,
682 	&scaling_governor.attr,
683 	&scaling_driver.attr,
684 	&scaling_available_governors.attr,
685 	&scaling_setspeed.attr,
686 	NULL
687 };
688 
689 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
690 #define to_attr(a) container_of(a, struct freq_attr, attr)
691 
692 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
693 {
694 	struct cpufreq_policy *policy = to_policy(kobj);
695 	struct freq_attr *fattr = to_attr(attr);
696 	ssize_t ret = -EINVAL;
697 	policy = cpufreq_cpu_get(policy->cpu);
698 	if (!policy)
699 		goto no_policy;
700 
701 	if (lock_policy_rwsem_read(policy->cpu) < 0)
702 		goto fail;
703 
704 	if (fattr->show)
705 		ret = fattr->show(policy, buf);
706 	else
707 		ret = -EIO;
708 
709 	unlock_policy_rwsem_read(policy->cpu);
710 fail:
711 	cpufreq_cpu_put(policy);
712 no_policy:
713 	return ret;
714 }
715 
716 static ssize_t store(struct kobject *kobj, struct attribute *attr,
717 		     const char *buf, size_t count)
718 {
719 	struct cpufreq_policy *policy = to_policy(kobj);
720 	struct freq_attr *fattr = to_attr(attr);
721 	ssize_t ret = -EINVAL;
722 	policy = cpufreq_cpu_get(policy->cpu);
723 	if (!policy)
724 		goto no_policy;
725 
726 	if (lock_policy_rwsem_write(policy->cpu) < 0)
727 		goto fail;
728 
729 	if (fattr->store)
730 		ret = fattr->store(policy, buf, count);
731 	else
732 		ret = -EIO;
733 
734 	unlock_policy_rwsem_write(policy->cpu);
735 fail:
736 	cpufreq_cpu_put(policy);
737 no_policy:
738 	return ret;
739 }
740 
741 static void cpufreq_sysfs_release(struct kobject *kobj)
742 {
743 	struct cpufreq_policy *policy = to_policy(kobj);
744 	dprintk("last reference is dropped\n");
745 	complete(&policy->kobj_unregister);
746 }
747 
748 static struct sysfs_ops sysfs_ops = {
749 	.show	= show,
750 	.store	= store,
751 };
752 
753 static struct kobj_type ktype_cpufreq = {
754 	.sysfs_ops	= &sysfs_ops,
755 	.default_attrs	= default_attrs,
756 	.release	= cpufreq_sysfs_release,
757 };
758 
759 
760 /**
761  * cpufreq_add_dev - add a CPU device
762  *
763  * Adds the cpufreq interface for a CPU device.
764  *
765  * The Oracle says: try running cpufreq registration/unregistration concurrently
766  * with with cpu hotplugging and all hell will break loose. Tried to clean this
767  * mess up, but more thorough testing is needed. - Mathieu
768  */
769 static int cpufreq_add_dev(struct sys_device *sys_dev)
770 {
771 	unsigned int cpu = sys_dev->id;
772 	int ret = 0;
773 	struct cpufreq_policy new_policy;
774 	struct cpufreq_policy *policy;
775 	struct freq_attr **drv_attr;
776 	struct sys_device *cpu_sys_dev;
777 	unsigned long flags;
778 	unsigned int j;
779 
780 	if (cpu_is_offline(cpu))
781 		return 0;
782 
783 	cpufreq_debug_disable_ratelimit();
784 	dprintk("adding CPU %u\n", cpu);
785 
786 #ifdef CONFIG_SMP
787 	/* check whether a different CPU already registered this
788 	 * CPU because it is in the same boat. */
789 	policy = cpufreq_cpu_get(cpu);
790 	if (unlikely(policy)) {
791 		cpufreq_cpu_put(policy);
792 		cpufreq_debug_enable_ratelimit();
793 		return 0;
794 	}
795 #endif
796 
797 	if (!try_module_get(cpufreq_driver->owner)) {
798 		ret = -EINVAL;
799 		goto module_out;
800 	}
801 
802 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
803 	if (!policy) {
804 		ret = -ENOMEM;
805 		goto nomem_out;
806 	}
807 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) {
808 		ret = -ENOMEM;
809 		goto err_free_policy;
810 	}
811 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
812 		ret = -ENOMEM;
813 		goto err_free_cpumask;
814 	}
815 
816 	policy->cpu = cpu;
817 	cpumask_copy(policy->cpus, cpumask_of(cpu));
818 
819 	/* Initially set CPU itself as the policy_cpu */
820 	per_cpu(policy_cpu, cpu) = cpu;
821 	ret = (lock_policy_rwsem_write(cpu) < 0);
822 	WARN_ON(ret);
823 
824 	init_completion(&policy->kobj_unregister);
825 	INIT_WORK(&policy->update, handle_update);
826 
827 	/* Set governor before ->init, so that driver could check it */
828 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
829 	/* call driver. From then on the cpufreq must be able
830 	 * to accept all calls to ->verify and ->setpolicy for this CPU
831 	 */
832 	ret = cpufreq_driver->init(policy);
833 	if (ret) {
834 		dprintk("initialization failed\n");
835 		goto err_unlock_policy;
836 	}
837 	policy->user_policy.min = policy->min;
838 	policy->user_policy.max = policy->max;
839 
840 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
841 				     CPUFREQ_START, policy);
842 
843 #ifdef CONFIG_SMP
844 
845 #ifdef CONFIG_HOTPLUG_CPU
846 	if (per_cpu(cpufreq_cpu_governor, cpu)) {
847 		policy->governor = per_cpu(cpufreq_cpu_governor, cpu);
848 		dprintk("Restoring governor %s for cpu %d\n",
849 		       policy->governor->name, cpu);
850 	}
851 #endif
852 
853 	for_each_cpu(j, policy->cpus) {
854 		struct cpufreq_policy *managed_policy;
855 
856 		if (cpu == j)
857 			continue;
858 
859 		/* Check for existing affected CPUs.
860 		 * They may not be aware of it due to CPU Hotplug.
861 		 * cpufreq_cpu_put is called when the device is removed
862 		 * in __cpufreq_remove_dev()
863 		 */
864 		managed_policy = cpufreq_cpu_get(j);
865 		if (unlikely(managed_policy)) {
866 
867 			/* Set proper policy_cpu */
868 			unlock_policy_rwsem_write(cpu);
869 			per_cpu(policy_cpu, cpu) = managed_policy->cpu;
870 
871 			if (lock_policy_rwsem_write(cpu) < 0) {
872 				/* Should not go through policy unlock path */
873 				if (cpufreq_driver->exit)
874 					cpufreq_driver->exit(policy);
875 				ret = -EBUSY;
876 				cpufreq_cpu_put(managed_policy);
877 				goto err_free_cpumask;
878 			}
879 
880 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
881 			cpumask_copy(managed_policy->cpus, policy->cpus);
882 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
883 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
884 
885 			dprintk("CPU already managed, adding link\n");
886 			ret = sysfs_create_link(&sys_dev->kobj,
887 						&managed_policy->kobj,
888 						"cpufreq");
889 			if (ret)
890 				cpufreq_cpu_put(managed_policy);
891 			/*
892 			 * Success. We only needed to be added to the mask.
893 			 * Call driver->exit() because only the cpu parent of
894 			 * the kobj needed to call init().
895 			 */
896 			goto out_driver_exit; /* call driver->exit() */
897 		}
898 	}
899 #endif
900 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
901 
902 	/* prepare interface data */
903 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
904 				   "cpufreq");
905 	if (ret)
906 		goto out_driver_exit;
907 
908 	/* set up files for this cpu device */
909 	drv_attr = cpufreq_driver->attr;
910 	while ((drv_attr) && (*drv_attr)) {
911 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
912 		if (ret)
913 			goto err_out_kobj_put;
914 		drv_attr++;
915 	}
916 	if (cpufreq_driver->get) {
917 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
918 		if (ret)
919 			goto err_out_kobj_put;
920 	}
921 	if (cpufreq_driver->target) {
922 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
923 		if (ret)
924 			goto err_out_kobj_put;
925 	}
926 
927 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
928 	for_each_cpu(j, policy->cpus) {
929 		if (!cpu_online(j))
930 			continue;
931 		per_cpu(cpufreq_cpu_data, j) = policy;
932 		per_cpu(policy_cpu, j) = policy->cpu;
933 	}
934 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
935 
936 	/* symlink affected CPUs */
937 	for_each_cpu(j, policy->cpus) {
938 		struct cpufreq_policy *managed_policy;
939 
940 		if (j == cpu)
941 			continue;
942 		if (!cpu_online(j))
943 			continue;
944 
945 		dprintk("CPU %u already managed, adding link\n", j);
946 		managed_policy = cpufreq_cpu_get(cpu);
947 		cpu_sys_dev = get_cpu_sysdev(j);
948 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
949 					"cpufreq");
950 		if (ret) {
951 			cpufreq_cpu_put(managed_policy);
952 			goto err_out_unregister;
953 		}
954 	}
955 
956 	policy->governor = NULL; /* to assure that the starting sequence is
957 				  * run in cpufreq_set_policy */
958 
959 	/* set default policy */
960 	ret = __cpufreq_set_policy(policy, &new_policy);
961 	policy->user_policy.policy = policy->policy;
962 	policy->user_policy.governor = policy->governor;
963 
964 	if (ret) {
965 		dprintk("setting policy failed\n");
966 		goto err_out_unregister;
967 	}
968 
969 	unlock_policy_rwsem_write(cpu);
970 
971 	kobject_uevent(&policy->kobj, KOBJ_ADD);
972 	module_put(cpufreq_driver->owner);
973 	dprintk("initialization complete\n");
974 	cpufreq_debug_enable_ratelimit();
975 
976 	return 0;
977 
978 
979 err_out_unregister:
980 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
981 	for_each_cpu(j, policy->cpus)
982 		per_cpu(cpufreq_cpu_data, j) = NULL;
983 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
984 
985 err_out_kobj_put:
986 	kobject_put(&policy->kobj);
987 	wait_for_completion(&policy->kobj_unregister);
988 
989 out_driver_exit:
990 	if (cpufreq_driver->exit)
991 		cpufreq_driver->exit(policy);
992 
993 err_unlock_policy:
994 	unlock_policy_rwsem_write(cpu);
995 err_free_cpumask:
996 	free_cpumask_var(policy->cpus);
997 err_free_policy:
998 	kfree(policy);
999 nomem_out:
1000 	module_put(cpufreq_driver->owner);
1001 module_out:
1002 	cpufreq_debug_enable_ratelimit();
1003 	return ret;
1004 }
1005 
1006 
1007 /**
1008  * __cpufreq_remove_dev - remove a CPU device
1009  *
1010  * Removes the cpufreq interface for a CPU device.
1011  * Caller should already have policy_rwsem in write mode for this CPU.
1012  * This routine frees the rwsem before returning.
1013  */
1014 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1015 {
1016 	unsigned int cpu = sys_dev->id;
1017 	unsigned long flags;
1018 	struct cpufreq_policy *data;
1019 #ifdef CONFIG_SMP
1020 	struct sys_device *cpu_sys_dev;
1021 	unsigned int j;
1022 #endif
1023 
1024 	cpufreq_debug_disable_ratelimit();
1025 	dprintk("unregistering CPU %u\n", cpu);
1026 
1027 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1028 	data = per_cpu(cpufreq_cpu_data, cpu);
1029 
1030 	if (!data) {
1031 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1032 		cpufreq_debug_enable_ratelimit();
1033 		unlock_policy_rwsem_write(cpu);
1034 		return -EINVAL;
1035 	}
1036 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1037 
1038 
1039 #ifdef CONFIG_SMP
1040 	/* if this isn't the CPU which is the parent of the kobj, we
1041 	 * only need to unlink, put and exit
1042 	 */
1043 	if (unlikely(cpu != data->cpu)) {
1044 		dprintk("removing link\n");
1045 		cpumask_clear_cpu(cpu, data->cpus);
1046 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1047 		sysfs_remove_link(&sys_dev->kobj, "cpufreq");
1048 		cpufreq_cpu_put(data);
1049 		cpufreq_debug_enable_ratelimit();
1050 		unlock_policy_rwsem_write(cpu);
1051 		return 0;
1052 	}
1053 #endif
1054 
1055 #ifdef CONFIG_SMP
1056 
1057 #ifdef CONFIG_HOTPLUG_CPU
1058 	per_cpu(cpufreq_cpu_governor, cpu) = data->governor;
1059 #endif
1060 
1061 	/* if we have other CPUs still registered, we need to unlink them,
1062 	 * or else wait_for_completion below will lock up. Clean the
1063 	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1064 	 * the sysfs links afterwards.
1065 	 */
1066 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1067 		for_each_cpu(j, data->cpus) {
1068 			if (j == cpu)
1069 				continue;
1070 			per_cpu(cpufreq_cpu_data, j) = NULL;
1071 		}
1072 	}
1073 
1074 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1075 
1076 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1077 		for_each_cpu(j, data->cpus) {
1078 			if (j == cpu)
1079 				continue;
1080 			dprintk("removing link for cpu %u\n", j);
1081 #ifdef CONFIG_HOTPLUG_CPU
1082 			per_cpu(cpufreq_cpu_governor, j) = data->governor;
1083 #endif
1084 			cpu_sys_dev = get_cpu_sysdev(j);
1085 			sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1086 			cpufreq_cpu_put(data);
1087 		}
1088 	}
1089 #else
1090 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1091 #endif
1092 
1093 	if (cpufreq_driver->target)
1094 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1095 
1096 	kobject_put(&data->kobj);
1097 
1098 	/* we need to make sure that the underlying kobj is actually
1099 	 * not referenced anymore by anybody before we proceed with
1100 	 * unloading.
1101 	 */
1102 	dprintk("waiting for dropping of refcount\n");
1103 	wait_for_completion(&data->kobj_unregister);
1104 	dprintk("wait complete\n");
1105 
1106 	if (cpufreq_driver->exit)
1107 		cpufreq_driver->exit(data);
1108 
1109 	unlock_policy_rwsem_write(cpu);
1110 
1111 	free_cpumask_var(data->related_cpus);
1112 	free_cpumask_var(data->cpus);
1113 	kfree(data);
1114 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1115 
1116 	cpufreq_debug_enable_ratelimit();
1117 	return 0;
1118 }
1119 
1120 
1121 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1122 {
1123 	unsigned int cpu = sys_dev->id;
1124 	int retval;
1125 
1126 	if (cpu_is_offline(cpu))
1127 		return 0;
1128 
1129 	if (unlikely(lock_policy_rwsem_write(cpu)))
1130 		BUG();
1131 
1132 	retval = __cpufreq_remove_dev(sys_dev);
1133 	return retval;
1134 }
1135 
1136 
1137 static void handle_update(struct work_struct *work)
1138 {
1139 	struct cpufreq_policy *policy =
1140 		container_of(work, struct cpufreq_policy, update);
1141 	unsigned int cpu = policy->cpu;
1142 	dprintk("handle_update for cpu %u called\n", cpu);
1143 	cpufreq_update_policy(cpu);
1144 }
1145 
1146 /**
1147  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1148  *	@cpu: cpu number
1149  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1150  *	@new_freq: CPU frequency the CPU actually runs at
1151  *
1152  *	We adjust to current frequency first, and need to clean up later.
1153  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1154  */
1155 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1156 				unsigned int new_freq)
1157 {
1158 	struct cpufreq_freqs freqs;
1159 
1160 	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1161 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1162 
1163 	freqs.cpu = cpu;
1164 	freqs.old = old_freq;
1165 	freqs.new = new_freq;
1166 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1167 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1168 }
1169 
1170 
1171 /**
1172  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1173  * @cpu: CPU number
1174  *
1175  * This is the last known freq, without actually getting it from the driver.
1176  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1177  */
1178 unsigned int cpufreq_quick_get(unsigned int cpu)
1179 {
1180 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1181 	unsigned int ret_freq = 0;
1182 
1183 	if (policy) {
1184 		ret_freq = policy->cur;
1185 		cpufreq_cpu_put(policy);
1186 	}
1187 
1188 	return ret_freq;
1189 }
1190 EXPORT_SYMBOL(cpufreq_quick_get);
1191 
1192 
1193 static unsigned int __cpufreq_get(unsigned int cpu)
1194 {
1195 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1196 	unsigned int ret_freq = 0;
1197 
1198 	if (!cpufreq_driver->get)
1199 		return ret_freq;
1200 
1201 	ret_freq = cpufreq_driver->get(cpu);
1202 
1203 	if (ret_freq && policy->cur &&
1204 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1205 		/* verify no discrepancy between actual and
1206 					saved value exists */
1207 		if (unlikely(ret_freq != policy->cur)) {
1208 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1209 			schedule_work(&policy->update);
1210 		}
1211 	}
1212 
1213 	return ret_freq;
1214 }
1215 
1216 /**
1217  * cpufreq_get - get the current CPU frequency (in kHz)
1218  * @cpu: CPU number
1219  *
1220  * Get the CPU current (static) CPU frequency
1221  */
1222 unsigned int cpufreq_get(unsigned int cpu)
1223 {
1224 	unsigned int ret_freq = 0;
1225 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1226 
1227 	if (!policy)
1228 		goto out;
1229 
1230 	if (unlikely(lock_policy_rwsem_read(cpu)))
1231 		goto out_policy;
1232 
1233 	ret_freq = __cpufreq_get(cpu);
1234 
1235 	unlock_policy_rwsem_read(cpu);
1236 
1237 out_policy:
1238 	cpufreq_cpu_put(policy);
1239 out:
1240 	return ret_freq;
1241 }
1242 EXPORT_SYMBOL(cpufreq_get);
1243 
1244 
1245 /**
1246  *	cpufreq_suspend - let the low level driver prepare for suspend
1247  */
1248 
1249 static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
1250 {
1251 	int ret = 0;
1252 
1253 #ifdef __powerpc__
1254 	int cpu = sysdev->id;
1255 	unsigned int cur_freq = 0;
1256 	struct cpufreq_policy *cpu_policy;
1257 
1258 	dprintk("suspending cpu %u\n", cpu);
1259 
1260 	/*
1261 	 * This whole bogosity is here because Powerbooks are made of fail.
1262 	 * No sane platform should need any of the code below to be run.
1263 	 * (it's entirely the wrong thing to do, as driver->get may
1264 	 *  reenable interrupts on some architectures).
1265 	 */
1266 
1267 	if (!cpu_online(cpu))
1268 		return 0;
1269 
1270 	/* we may be lax here as interrupts are off. Nonetheless
1271 	 * we need to grab the correct cpu policy, as to check
1272 	 * whether we really run on this CPU.
1273 	 */
1274 
1275 	cpu_policy = cpufreq_cpu_get(cpu);
1276 	if (!cpu_policy)
1277 		return -EINVAL;
1278 
1279 	/* only handle each CPU group once */
1280 	if (unlikely(cpu_policy->cpu != cpu))
1281 		goto out;
1282 
1283 	if (cpufreq_driver->suspend) {
1284 		ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1285 		if (ret) {
1286 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1287 					"step on CPU %u\n", cpu_policy->cpu);
1288 			goto out;
1289 		}
1290 	}
1291 
1292 	if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1293 		goto out;
1294 
1295 	if (cpufreq_driver->get)
1296 		cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1297 
1298 	if (!cur_freq || !cpu_policy->cur) {
1299 		printk(KERN_ERR "cpufreq: suspend failed to assert current "
1300 		       "frequency is what timing core thinks it is.\n");
1301 		goto out;
1302 	}
1303 
1304 	if (unlikely(cur_freq != cpu_policy->cur)) {
1305 		struct cpufreq_freqs freqs;
1306 
1307 		if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1308 			dprintk("Warning: CPU frequency is %u, "
1309 			       "cpufreq assumed %u kHz.\n",
1310 			       cur_freq, cpu_policy->cur);
1311 
1312 		freqs.cpu = cpu;
1313 		freqs.old = cpu_policy->cur;
1314 		freqs.new = cur_freq;
1315 
1316 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1317 				    CPUFREQ_SUSPENDCHANGE, &freqs);
1318 		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1319 
1320 		cpu_policy->cur = cur_freq;
1321 	}
1322 
1323 out:
1324 	cpufreq_cpu_put(cpu_policy);
1325 #endif	/* __powerpc__ */
1326 	return ret;
1327 }
1328 
1329 /**
1330  *	cpufreq_resume -  restore proper CPU frequency handling after resume
1331  *
1332  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1333  *	2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1334  *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1335  *	    restored.
1336  */
1337 static int cpufreq_resume(struct sys_device *sysdev)
1338 {
1339 	int ret = 0;
1340 
1341 #ifdef __powerpc__
1342 	int cpu = sysdev->id;
1343 	struct cpufreq_policy *cpu_policy;
1344 
1345 	dprintk("resuming cpu %u\n", cpu);
1346 
1347 	/* As with the ->suspend method, all the code below is
1348 	 * only necessary because Powerbooks suck.
1349 	 * See commit 42d4dc3f4e1e for jokes. */
1350 
1351 	if (!cpu_online(cpu))
1352 		return 0;
1353 
1354 	/* we may be lax here as interrupts are off. Nonetheless
1355 	 * we need to grab the correct cpu policy, as to check
1356 	 * whether we really run on this CPU.
1357 	 */
1358 
1359 	cpu_policy = cpufreq_cpu_get(cpu);
1360 	if (!cpu_policy)
1361 		return -EINVAL;
1362 
1363 	/* only handle each CPU group once */
1364 	if (unlikely(cpu_policy->cpu != cpu))
1365 		goto fail;
1366 
1367 	if (cpufreq_driver->resume) {
1368 		ret = cpufreq_driver->resume(cpu_policy);
1369 		if (ret) {
1370 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1371 					"step on CPU %u\n", cpu_policy->cpu);
1372 			goto fail;
1373 		}
1374 	}
1375 
1376 	if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1377 		unsigned int cur_freq = 0;
1378 
1379 		if (cpufreq_driver->get)
1380 			cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1381 
1382 		if (!cur_freq || !cpu_policy->cur) {
1383 			printk(KERN_ERR "cpufreq: resume failed to assert "
1384 					"current frequency is what timing core "
1385 					"thinks it is.\n");
1386 			goto out;
1387 		}
1388 
1389 		if (unlikely(cur_freq != cpu_policy->cur)) {
1390 			struct cpufreq_freqs freqs;
1391 
1392 			if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1393 				dprintk("Warning: CPU frequency "
1394 				       "is %u, cpufreq assumed %u kHz.\n",
1395 				       cur_freq, cpu_policy->cur);
1396 
1397 			freqs.cpu = cpu;
1398 			freqs.old = cpu_policy->cur;
1399 			freqs.new = cur_freq;
1400 
1401 			srcu_notifier_call_chain(
1402 					&cpufreq_transition_notifier_list,
1403 					CPUFREQ_RESUMECHANGE, &freqs);
1404 			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1405 
1406 			cpu_policy->cur = cur_freq;
1407 		}
1408 	}
1409 
1410 out:
1411 	schedule_work(&cpu_policy->update);
1412 fail:
1413 	cpufreq_cpu_put(cpu_policy);
1414 #endif	/* __powerpc__ */
1415 	return ret;
1416 }
1417 
1418 static struct sysdev_driver cpufreq_sysdev_driver = {
1419 	.add		= cpufreq_add_dev,
1420 	.remove		= cpufreq_remove_dev,
1421 	.suspend	= cpufreq_suspend,
1422 	.resume		= cpufreq_resume,
1423 };
1424 
1425 
1426 /*********************************************************************
1427  *                     NOTIFIER LISTS INTERFACE                      *
1428  *********************************************************************/
1429 
1430 /**
1431  *	cpufreq_register_notifier - register a driver with cpufreq
1432  *	@nb: notifier function to register
1433  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1434  *
1435  *	Add a driver to one of two lists: either a list of drivers that
1436  *      are notified about clock rate changes (once before and once after
1437  *      the transition), or a list of drivers that are notified about
1438  *      changes in cpufreq policy.
1439  *
1440  *	This function may sleep, and has the same return conditions as
1441  *	blocking_notifier_chain_register.
1442  */
1443 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1444 {
1445 	int ret;
1446 
1447 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1448 
1449 	switch (list) {
1450 	case CPUFREQ_TRANSITION_NOTIFIER:
1451 		ret = srcu_notifier_chain_register(
1452 				&cpufreq_transition_notifier_list, nb);
1453 		break;
1454 	case CPUFREQ_POLICY_NOTIFIER:
1455 		ret = blocking_notifier_chain_register(
1456 				&cpufreq_policy_notifier_list, nb);
1457 		break;
1458 	default:
1459 		ret = -EINVAL;
1460 	}
1461 
1462 	return ret;
1463 }
1464 EXPORT_SYMBOL(cpufreq_register_notifier);
1465 
1466 
1467 /**
1468  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1469  *	@nb: notifier block to be unregistered
1470  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1471  *
1472  *	Remove a driver from the CPU frequency notifier list.
1473  *
1474  *	This function may sleep, and has the same return conditions as
1475  *	blocking_notifier_chain_unregister.
1476  */
1477 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1478 {
1479 	int ret;
1480 
1481 	switch (list) {
1482 	case CPUFREQ_TRANSITION_NOTIFIER:
1483 		ret = srcu_notifier_chain_unregister(
1484 				&cpufreq_transition_notifier_list, nb);
1485 		break;
1486 	case CPUFREQ_POLICY_NOTIFIER:
1487 		ret = blocking_notifier_chain_unregister(
1488 				&cpufreq_policy_notifier_list, nb);
1489 		break;
1490 	default:
1491 		ret = -EINVAL;
1492 	}
1493 
1494 	return ret;
1495 }
1496 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1497 
1498 
1499 /*********************************************************************
1500  *                              GOVERNORS                            *
1501  *********************************************************************/
1502 
1503 
1504 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1505 			    unsigned int target_freq,
1506 			    unsigned int relation)
1507 {
1508 	int retval = -EINVAL;
1509 
1510 	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1511 		target_freq, relation);
1512 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1513 		retval = cpufreq_driver->target(policy, target_freq, relation);
1514 
1515 	return retval;
1516 }
1517 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1518 
1519 int cpufreq_driver_target(struct cpufreq_policy *policy,
1520 			  unsigned int target_freq,
1521 			  unsigned int relation)
1522 {
1523 	int ret = -EINVAL;
1524 
1525 	policy = cpufreq_cpu_get(policy->cpu);
1526 	if (!policy)
1527 		goto no_policy;
1528 
1529 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1530 		goto fail;
1531 
1532 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1533 
1534 	unlock_policy_rwsem_write(policy->cpu);
1535 
1536 fail:
1537 	cpufreq_cpu_put(policy);
1538 no_policy:
1539 	return ret;
1540 }
1541 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1542 
1543 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1544 {
1545 	int ret = 0;
1546 
1547 	policy = cpufreq_cpu_get(policy->cpu);
1548 	if (!policy)
1549 		return -EINVAL;
1550 
1551 	if (cpu_online(cpu) && cpufreq_driver->getavg)
1552 		ret = cpufreq_driver->getavg(policy, cpu);
1553 
1554 	cpufreq_cpu_put(policy);
1555 	return ret;
1556 }
1557 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1558 
1559 /*
1560  * when "event" is CPUFREQ_GOV_LIMITS
1561  */
1562 
1563 static int __cpufreq_governor(struct cpufreq_policy *policy,
1564 					unsigned int event)
1565 {
1566 	int ret;
1567 
1568 	/* Only must be defined when default governor is known to have latency
1569 	   restrictions, like e.g. conservative or ondemand.
1570 	   That this is the case is already ensured in Kconfig
1571 	*/
1572 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1573 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1574 #else
1575 	struct cpufreq_governor *gov = NULL;
1576 #endif
1577 
1578 	if (policy->governor->max_transition_latency &&
1579 	    policy->cpuinfo.transition_latency >
1580 	    policy->governor->max_transition_latency) {
1581 		if (!gov)
1582 			return -EINVAL;
1583 		else {
1584 			printk(KERN_WARNING "%s governor failed, too long"
1585 			       " transition latency of HW, fallback"
1586 			       " to %s governor\n",
1587 			       policy->governor->name,
1588 			       gov->name);
1589 			policy->governor = gov;
1590 		}
1591 	}
1592 
1593 	if (!try_module_get(policy->governor->owner))
1594 		return -EINVAL;
1595 
1596 	dprintk("__cpufreq_governor for CPU %u, event %u\n",
1597 						policy->cpu, event);
1598 	ret = policy->governor->governor(policy, event);
1599 
1600 	/* we keep one module reference alive for
1601 			each CPU governed by this CPU */
1602 	if ((event != CPUFREQ_GOV_START) || ret)
1603 		module_put(policy->governor->owner);
1604 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1605 		module_put(policy->governor->owner);
1606 
1607 	return ret;
1608 }
1609 
1610 
1611 int cpufreq_register_governor(struct cpufreq_governor *governor)
1612 {
1613 	int err;
1614 
1615 	if (!governor)
1616 		return -EINVAL;
1617 
1618 	mutex_lock(&cpufreq_governor_mutex);
1619 
1620 	err = -EBUSY;
1621 	if (__find_governor(governor->name) == NULL) {
1622 		err = 0;
1623 		list_add(&governor->governor_list, &cpufreq_governor_list);
1624 	}
1625 
1626 	mutex_unlock(&cpufreq_governor_mutex);
1627 	return err;
1628 }
1629 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1630 
1631 
1632 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1633 {
1634 	if (!governor)
1635 		return;
1636 
1637 	mutex_lock(&cpufreq_governor_mutex);
1638 	list_del(&governor->governor_list);
1639 	mutex_unlock(&cpufreq_governor_mutex);
1640 	return;
1641 }
1642 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1643 
1644 
1645 
1646 /*********************************************************************
1647  *                          POLICY INTERFACE                         *
1648  *********************************************************************/
1649 
1650 /**
1651  * cpufreq_get_policy - get the current cpufreq_policy
1652  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1653  *	is written
1654  *
1655  * Reads the current cpufreq policy.
1656  */
1657 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1658 {
1659 	struct cpufreq_policy *cpu_policy;
1660 	if (!policy)
1661 		return -EINVAL;
1662 
1663 	cpu_policy = cpufreq_cpu_get(cpu);
1664 	if (!cpu_policy)
1665 		return -EINVAL;
1666 
1667 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1668 
1669 	cpufreq_cpu_put(cpu_policy);
1670 	return 0;
1671 }
1672 EXPORT_SYMBOL(cpufreq_get_policy);
1673 
1674 
1675 /*
1676  * data   : current policy.
1677  * policy : policy to be set.
1678  */
1679 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1680 				struct cpufreq_policy *policy)
1681 {
1682 	int ret = 0;
1683 
1684 	cpufreq_debug_disable_ratelimit();
1685 	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1686 		policy->min, policy->max);
1687 
1688 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1689 				sizeof(struct cpufreq_cpuinfo));
1690 
1691 	if (policy->min > data->max || policy->max < data->min) {
1692 		ret = -EINVAL;
1693 		goto error_out;
1694 	}
1695 
1696 	/* verify the cpu speed can be set within this limit */
1697 	ret = cpufreq_driver->verify(policy);
1698 	if (ret)
1699 		goto error_out;
1700 
1701 	/* adjust if necessary - all reasons */
1702 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1703 			CPUFREQ_ADJUST, policy);
1704 
1705 	/* adjust if necessary - hardware incompatibility*/
1706 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1707 			CPUFREQ_INCOMPATIBLE, policy);
1708 
1709 	/* verify the cpu speed can be set within this limit,
1710 	   which might be different to the first one */
1711 	ret = cpufreq_driver->verify(policy);
1712 	if (ret)
1713 		goto error_out;
1714 
1715 	/* notification of the new policy */
1716 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1717 			CPUFREQ_NOTIFY, policy);
1718 
1719 	data->min = policy->min;
1720 	data->max = policy->max;
1721 
1722 	dprintk("new min and max freqs are %u - %u kHz\n",
1723 					data->min, data->max);
1724 
1725 	if (cpufreq_driver->setpolicy) {
1726 		data->policy = policy->policy;
1727 		dprintk("setting range\n");
1728 		ret = cpufreq_driver->setpolicy(policy);
1729 	} else {
1730 		if (policy->governor != data->governor) {
1731 			/* save old, working values */
1732 			struct cpufreq_governor *old_gov = data->governor;
1733 
1734 			dprintk("governor switch\n");
1735 
1736 			/* end old governor */
1737 			if (data->governor)
1738 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1739 
1740 			/* start new governor */
1741 			data->governor = policy->governor;
1742 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1743 				/* new governor failed, so re-start old one */
1744 				dprintk("starting governor %s failed\n",
1745 							data->governor->name);
1746 				if (old_gov) {
1747 					data->governor = old_gov;
1748 					__cpufreq_governor(data,
1749 							   CPUFREQ_GOV_START);
1750 				}
1751 				ret = -EINVAL;
1752 				goto error_out;
1753 			}
1754 			/* might be a policy change, too, so fall through */
1755 		}
1756 		dprintk("governor: change or update limits\n");
1757 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1758 	}
1759 
1760 error_out:
1761 	cpufreq_debug_enable_ratelimit();
1762 	return ret;
1763 }
1764 
1765 /**
1766  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1767  *	@cpu: CPU which shall be re-evaluated
1768  *
1769  *	Usefull for policy notifiers which have different necessities
1770  *	at different times.
1771  */
1772 int cpufreq_update_policy(unsigned int cpu)
1773 {
1774 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1775 	struct cpufreq_policy policy;
1776 	int ret;
1777 
1778 	if (!data) {
1779 		ret = -ENODEV;
1780 		goto no_policy;
1781 	}
1782 
1783 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1784 		ret = -EINVAL;
1785 		goto fail;
1786 	}
1787 
1788 	dprintk("updating policy for CPU %u\n", cpu);
1789 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1790 	policy.min = data->user_policy.min;
1791 	policy.max = data->user_policy.max;
1792 	policy.policy = data->user_policy.policy;
1793 	policy.governor = data->user_policy.governor;
1794 
1795 	/* BIOS might change freq behind our back
1796 	  -> ask driver for current freq and notify governors about a change */
1797 	if (cpufreq_driver->get) {
1798 		policy.cur = cpufreq_driver->get(cpu);
1799 		if (!data->cur) {
1800 			dprintk("Driver did not initialize current freq");
1801 			data->cur = policy.cur;
1802 		} else {
1803 			if (data->cur != policy.cur)
1804 				cpufreq_out_of_sync(cpu, data->cur,
1805 								policy.cur);
1806 		}
1807 	}
1808 
1809 	ret = __cpufreq_set_policy(data, &policy);
1810 
1811 	unlock_policy_rwsem_write(cpu);
1812 
1813 fail:
1814 	cpufreq_cpu_put(data);
1815 no_policy:
1816 	return ret;
1817 }
1818 EXPORT_SYMBOL(cpufreq_update_policy);
1819 
1820 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1821 					unsigned long action, void *hcpu)
1822 {
1823 	unsigned int cpu = (unsigned long)hcpu;
1824 	struct sys_device *sys_dev;
1825 
1826 	sys_dev = get_cpu_sysdev(cpu);
1827 	if (sys_dev) {
1828 		switch (action) {
1829 		case CPU_ONLINE:
1830 		case CPU_ONLINE_FROZEN:
1831 			cpufreq_add_dev(sys_dev);
1832 			break;
1833 		case CPU_DOWN_PREPARE:
1834 		case CPU_DOWN_PREPARE_FROZEN:
1835 			if (unlikely(lock_policy_rwsem_write(cpu)))
1836 				BUG();
1837 
1838 			__cpufreq_remove_dev(sys_dev);
1839 			break;
1840 		case CPU_DOWN_FAILED:
1841 		case CPU_DOWN_FAILED_FROZEN:
1842 			cpufreq_add_dev(sys_dev);
1843 			break;
1844 		}
1845 	}
1846 	return NOTIFY_OK;
1847 }
1848 
1849 static struct notifier_block __refdata cpufreq_cpu_notifier =
1850 {
1851     .notifier_call = cpufreq_cpu_callback,
1852 };
1853 
1854 /*********************************************************************
1855  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1856  *********************************************************************/
1857 
1858 /**
1859  * cpufreq_register_driver - register a CPU Frequency driver
1860  * @driver_data: A struct cpufreq_driver containing the values#
1861  * submitted by the CPU Frequency driver.
1862  *
1863  *   Registers a CPU Frequency driver to this core code. This code
1864  * returns zero on success, -EBUSY when another driver got here first
1865  * (and isn't unregistered in the meantime).
1866  *
1867  */
1868 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1869 {
1870 	unsigned long flags;
1871 	int ret;
1872 
1873 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1874 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1875 		return -EINVAL;
1876 
1877 	dprintk("trying to register driver %s\n", driver_data->name);
1878 
1879 	if (driver_data->setpolicy)
1880 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1881 
1882 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1883 	if (cpufreq_driver) {
1884 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1885 		return -EBUSY;
1886 	}
1887 	cpufreq_driver = driver_data;
1888 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1889 
1890 	ret = sysdev_driver_register(&cpu_sysdev_class,
1891 					&cpufreq_sysdev_driver);
1892 
1893 	if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1894 		int i;
1895 		ret = -ENODEV;
1896 
1897 		/* check for at least one working CPU */
1898 		for (i = 0; i < nr_cpu_ids; i++)
1899 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1900 				ret = 0;
1901 				break;
1902 			}
1903 
1904 		/* if all ->init() calls failed, unregister */
1905 		if (ret) {
1906 			dprintk("no CPU initialized for driver %s\n",
1907 							driver_data->name);
1908 			sysdev_driver_unregister(&cpu_sysdev_class,
1909 						&cpufreq_sysdev_driver);
1910 
1911 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
1912 			cpufreq_driver = NULL;
1913 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1914 		}
1915 	}
1916 
1917 	if (!ret) {
1918 		register_hotcpu_notifier(&cpufreq_cpu_notifier);
1919 		dprintk("driver %s up and running\n", driver_data->name);
1920 		cpufreq_debug_enable_ratelimit();
1921 	}
1922 
1923 	return ret;
1924 }
1925 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1926 
1927 
1928 /**
1929  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1930  *
1931  *    Unregister the current CPUFreq driver. Only call this if you have
1932  * the right to do so, i.e. if you have succeeded in initialising before!
1933  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1934  * currently not initialised.
1935  */
1936 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1937 {
1938 	unsigned long flags;
1939 
1940 	cpufreq_debug_disable_ratelimit();
1941 
1942 	if (!cpufreq_driver || (driver != cpufreq_driver)) {
1943 		cpufreq_debug_enable_ratelimit();
1944 		return -EINVAL;
1945 	}
1946 
1947 	dprintk("unregistering driver %s\n", driver->name);
1948 
1949 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1950 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1951 
1952 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1953 	cpufreq_driver = NULL;
1954 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1955 
1956 	return 0;
1957 }
1958 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1959 
1960 static int __init cpufreq_core_init(void)
1961 {
1962 	int cpu;
1963 
1964 	for_each_possible_cpu(cpu) {
1965 		per_cpu(policy_cpu, cpu) = -1;
1966 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1967 	}
1968 	return 0;
1969 }
1970 
1971 core_initcall(cpufreq_core_init);
1972