xref: /linux/drivers/cpufreq/cpufreq.c (revision 7f3edee81fbd49114c28057512906f169caa0bed)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33 						"cpufreq-core", msg)
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  */
65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67 
68 #define lock_policy_rwsem(mode, cpu)					\
69 int lock_policy_rwsem_##mode						\
70 (int cpu)								\
71 {									\
72 	int policy_cpu = per_cpu(policy_cpu, cpu);			\
73 	BUG_ON(policy_cpu == -1);					\
74 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
75 	if (unlikely(!cpu_online(cpu))) {				\
76 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
77 		return -1;						\
78 	}								\
79 									\
80 	return 0;							\
81 }
82 
83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85 
86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88 
89 void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96 
97 void unlock_policy_rwsem_write(int cpu)
98 {
99 	int policy_cpu = per_cpu(policy_cpu, cpu);
100 	BUG_ON(policy_cpu == -1);
101 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104 
105 
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
108 static unsigned int __cpufreq_get(unsigned int cpu);
109 static void handle_update(struct work_struct *work);
110 
111 /**
112  * Two notifier lists: the "policy" list is involved in the
113  * validation process for a new CPU frequency policy; the
114  * "transition" list for kernel code that needs to handle
115  * changes to devices when the CPU clock speed changes.
116  * The mutex locks both lists.
117  */
118 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
119 static struct srcu_notifier_head cpufreq_transition_notifier_list;
120 
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	return 0;
125 }
126 pure_initcall(init_cpufreq_transition_notifier_list);
127 
128 static LIST_HEAD(cpufreq_governor_list);
129 static DEFINE_MUTEX (cpufreq_governor_mutex);
130 
131 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
132 {
133 	struct cpufreq_policy *data;
134 	unsigned long flags;
135 
136 	if (cpu >= NR_CPUS)
137 		goto err_out;
138 
139 	/* get the cpufreq driver */
140 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
141 
142 	if (!cpufreq_driver)
143 		goto err_out_unlock;
144 
145 	if (!try_module_get(cpufreq_driver->owner))
146 		goto err_out_unlock;
147 
148 
149 	/* get the CPU */
150 	data = cpufreq_cpu_data[cpu];
151 
152 	if (!data)
153 		goto err_out_put_module;
154 
155 	if (!kobject_get(&data->kobj))
156 		goto err_out_put_module;
157 
158 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
159 	return data;
160 
161 err_out_put_module:
162 	module_put(cpufreq_driver->owner);
163 err_out_unlock:
164 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
165 err_out:
166 	return NULL;
167 }
168 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
169 
170 
171 void cpufreq_cpu_put(struct cpufreq_policy *data)
172 {
173 	kobject_put(&data->kobj);
174 	module_put(cpufreq_driver->owner);
175 }
176 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
177 
178 
179 /*********************************************************************
180  *                     UNIFIED DEBUG HELPERS                         *
181  *********************************************************************/
182 #ifdef CONFIG_CPU_FREQ_DEBUG
183 
184 /* what part(s) of the CPUfreq subsystem are debugged? */
185 static unsigned int debug;
186 
187 /* is the debug output ratelimit'ed using printk_ratelimit? User can
188  * set or modify this value.
189  */
190 static unsigned int debug_ratelimit = 1;
191 
192 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
193  * loading of a cpufreq driver, temporarily disabled when a new policy
194  * is set, and disabled upon cpufreq driver removal
195  */
196 static unsigned int disable_ratelimit = 1;
197 static DEFINE_SPINLOCK(disable_ratelimit_lock);
198 
199 static void cpufreq_debug_enable_ratelimit(void)
200 {
201 	unsigned long flags;
202 
203 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
204 	if (disable_ratelimit)
205 		disable_ratelimit--;
206 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
207 }
208 
209 static void cpufreq_debug_disable_ratelimit(void)
210 {
211 	unsigned long flags;
212 
213 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
214 	disable_ratelimit++;
215 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
216 }
217 
218 void cpufreq_debug_printk(unsigned int type, const char *prefix,
219 							const char *fmt, ...)
220 {
221 	char s[256];
222 	va_list args;
223 	unsigned int len;
224 	unsigned long flags;
225 
226 	WARN_ON(!prefix);
227 	if (type & debug) {
228 		spin_lock_irqsave(&disable_ratelimit_lock, flags);
229 		if (!disable_ratelimit && debug_ratelimit
230 					&& !printk_ratelimit()) {
231 			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
232 			return;
233 		}
234 		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
235 
236 		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
237 
238 		va_start(args, fmt);
239 		len += vsnprintf(&s[len], (256 - len), fmt, args);
240 		va_end(args);
241 
242 		printk(s);
243 
244 		WARN_ON(len < 5);
245 	}
246 }
247 EXPORT_SYMBOL(cpufreq_debug_printk);
248 
249 
250 module_param(debug, uint, 0644);
251 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
252 			" 2 to debug drivers, and 4 to debug governors.");
253 
254 module_param(debug_ratelimit, uint, 0644);
255 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
256 					" set to 0 to disable ratelimiting.");
257 
258 #else /* !CONFIG_CPU_FREQ_DEBUG */
259 
260 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
261 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
262 
263 #endif /* CONFIG_CPU_FREQ_DEBUG */
264 
265 
266 /*********************************************************************
267  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
268  *********************************************************************/
269 
270 /**
271  * adjust_jiffies - adjust the system "loops_per_jiffy"
272  *
273  * This function alters the system "loops_per_jiffy" for the clock
274  * speed change. Note that loops_per_jiffy cannot be updated on SMP
275  * systems as each CPU might be scaled differently. So, use the arch
276  * per-CPU loops_per_jiffy value wherever possible.
277  */
278 #ifndef CONFIG_SMP
279 static unsigned long l_p_j_ref;
280 static unsigned int  l_p_j_ref_freq;
281 
282 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
283 {
284 	if (ci->flags & CPUFREQ_CONST_LOOPS)
285 		return;
286 
287 	if (!l_p_j_ref_freq) {
288 		l_p_j_ref = loops_per_jiffy;
289 		l_p_j_ref_freq = ci->old;
290 		dprintk("saving %lu as reference value for loops_per_jiffy;"
291 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
292 	}
293 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
294 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
295 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
296 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
297 								ci->new);
298 		dprintk("scaling loops_per_jiffy to %lu"
299 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
300 	}
301 }
302 #else
303 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
304 {
305 	return;
306 }
307 #endif
308 
309 
310 /**
311  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
312  * on frequency transition.
313  *
314  * This function calls the transition notifiers and the "adjust_jiffies"
315  * function. It is called twice on all CPU frequency changes that have
316  * external effects.
317  */
318 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
319 {
320 	struct cpufreq_policy *policy;
321 
322 	BUG_ON(irqs_disabled());
323 
324 	freqs->flags = cpufreq_driver->flags;
325 	dprintk("notification %u of frequency transition to %u kHz\n",
326 		state, freqs->new);
327 
328 	policy = cpufreq_cpu_data[freqs->cpu];
329 	switch (state) {
330 
331 	case CPUFREQ_PRECHANGE:
332 		/* detect if the driver reported a value as "old frequency"
333 		 * which is not equal to what the cpufreq core thinks is
334 		 * "old frequency".
335 		 */
336 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
337 			if ((policy) && (policy->cpu == freqs->cpu) &&
338 			    (policy->cur) && (policy->cur != freqs->old)) {
339 				dprintk("Warning: CPU frequency is"
340 					" %u, cpufreq assumed %u kHz.\n",
341 					freqs->old, policy->cur);
342 				freqs->old = policy->cur;
343 			}
344 		}
345 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
346 				CPUFREQ_PRECHANGE, freqs);
347 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
348 		break;
349 
350 	case CPUFREQ_POSTCHANGE:
351 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
352 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
353 				CPUFREQ_POSTCHANGE, freqs);
354 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
355 			policy->cur = freqs->new;
356 		break;
357 	}
358 }
359 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
360 
361 
362 
363 /*********************************************************************
364  *                          SYSFS INTERFACE                          *
365  *********************************************************************/
366 
367 static struct cpufreq_governor *__find_governor(const char *str_governor)
368 {
369 	struct cpufreq_governor *t;
370 
371 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
372 		if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
373 			return t;
374 
375 	return NULL;
376 }
377 
378 /**
379  * cpufreq_parse_governor - parse a governor string
380  */
381 static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
382 				struct cpufreq_governor **governor)
383 {
384 	int err = -EINVAL;
385 
386 	if (!cpufreq_driver)
387 		goto out;
388 
389 	if (cpufreq_driver->setpolicy) {
390 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
391 			*policy = CPUFREQ_POLICY_PERFORMANCE;
392 			err = 0;
393 		} else if (!strnicmp(str_governor, "powersave",
394 						CPUFREQ_NAME_LEN)) {
395 			*policy = CPUFREQ_POLICY_POWERSAVE;
396 			err = 0;
397 		}
398 	} else if (cpufreq_driver->target) {
399 		struct cpufreq_governor *t;
400 
401 		mutex_lock(&cpufreq_governor_mutex);
402 
403 		t = __find_governor(str_governor);
404 
405 		if (t == NULL) {
406 			char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
407 								str_governor);
408 
409 			if (name) {
410 				int ret;
411 
412 				mutex_unlock(&cpufreq_governor_mutex);
413 				ret = request_module(name);
414 				mutex_lock(&cpufreq_governor_mutex);
415 
416 				if (ret == 0)
417 					t = __find_governor(str_governor);
418 			}
419 
420 			kfree(name);
421 		}
422 
423 		if (t != NULL) {
424 			*governor = t;
425 			err = 0;
426 		}
427 
428 		mutex_unlock(&cpufreq_governor_mutex);
429 	}
430   out:
431 	return err;
432 }
433 
434 
435 /* drivers/base/cpu.c */
436 extern struct sysdev_class cpu_sysdev_class;
437 
438 
439 /**
440  * cpufreq_per_cpu_attr_read() / show_##file_name() -
441  * print out cpufreq information
442  *
443  * Write out information from cpufreq_driver->policy[cpu]; object must be
444  * "unsigned int".
445  */
446 
447 #define show_one(file_name, object)			\
448 static ssize_t show_##file_name				\
449 (struct cpufreq_policy * policy, char *buf)		\
450 {							\
451 	return sprintf (buf, "%u\n", policy->object);	\
452 }
453 
454 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
455 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
456 show_one(scaling_min_freq, min);
457 show_one(scaling_max_freq, max);
458 show_one(scaling_cur_freq, cur);
459 
460 static int __cpufreq_set_policy(struct cpufreq_policy *data,
461 				struct cpufreq_policy *policy);
462 
463 /**
464  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
465  */
466 #define store_one(file_name, object)			\
467 static ssize_t store_##file_name					\
468 (struct cpufreq_policy * policy, const char *buf, size_t count)		\
469 {									\
470 	unsigned int ret = -EINVAL;					\
471 	struct cpufreq_policy new_policy;				\
472 									\
473 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
474 	if (ret)							\
475 		return -EINVAL;						\
476 									\
477 	ret = sscanf (buf, "%u", &new_policy.object);			\
478 	if (ret != 1)							\
479 		return -EINVAL;						\
480 									\
481 	ret = __cpufreq_set_policy(policy, &new_policy);		\
482 	policy->user_policy.object = policy->object;			\
483 									\
484 	return ret ? ret : count;					\
485 }
486 
487 store_one(scaling_min_freq,min);
488 store_one(scaling_max_freq,max);
489 
490 /**
491  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
492  */
493 static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
494 							char *buf)
495 {
496 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
497 	if (!cur_freq)
498 		return sprintf(buf, "<unknown>");
499 	return sprintf(buf, "%u\n", cur_freq);
500 }
501 
502 
503 /**
504  * show_scaling_governor - show the current policy for the specified CPU
505  */
506 static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
507 							char *buf)
508 {
509 	if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
510 		return sprintf(buf, "powersave\n");
511 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
512 		return sprintf(buf, "performance\n");
513 	else if (policy->governor)
514 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
515 	return -EINVAL;
516 }
517 
518 
519 /**
520  * store_scaling_governor - store policy for the specified CPU
521  */
522 static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
523 				       const char *buf, size_t count)
524 {
525 	unsigned int ret = -EINVAL;
526 	char	str_governor[16];
527 	struct cpufreq_policy new_policy;
528 
529 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
530 	if (ret)
531 		return ret;
532 
533 	ret = sscanf (buf, "%15s", str_governor);
534 	if (ret != 1)
535 		return -EINVAL;
536 
537 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
538 						&new_policy.governor))
539 		return -EINVAL;
540 
541 	/* Do not use cpufreq_set_policy here or the user_policy.max
542 	   will be wrongly overridden */
543 	ret = __cpufreq_set_policy(policy, &new_policy);
544 
545 	policy->user_policy.policy = policy->policy;
546 	policy->user_policy.governor = policy->governor;
547 
548 	if (ret)
549 		return ret;
550 	else
551 		return count;
552 }
553 
554 /**
555  * show_scaling_driver - show the cpufreq driver currently loaded
556  */
557 static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
558 {
559 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
560 }
561 
562 /**
563  * show_scaling_available_governors - show the available CPUfreq governors
564  */
565 static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
566 				char *buf)
567 {
568 	ssize_t i = 0;
569 	struct cpufreq_governor *t;
570 
571 	if (!cpufreq_driver->target) {
572 		i += sprintf(buf, "performance powersave");
573 		goto out;
574 	}
575 
576 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
577 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
578 			goto out;
579 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
580 	}
581 out:
582 	i += sprintf(&buf[i], "\n");
583 	return i;
584 }
585 /**
586  * show_affected_cpus - show the CPUs affected by each transition
587  */
588 static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf)
589 {
590 	ssize_t i = 0;
591 	unsigned int cpu;
592 
593 	for_each_cpu_mask(cpu, policy->cpus) {
594 		if (i)
595 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
596 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
597 		if (i >= (PAGE_SIZE - 5))
598 		    break;
599 	}
600 	i += sprintf(&buf[i], "\n");
601 	return i;
602 }
603 
604 
605 #define define_one_ro(_name) \
606 static struct freq_attr _name = \
607 __ATTR(_name, 0444, show_##_name, NULL)
608 
609 #define define_one_ro0400(_name) \
610 static struct freq_attr _name = \
611 __ATTR(_name, 0400, show_##_name, NULL)
612 
613 #define define_one_rw(_name) \
614 static struct freq_attr _name = \
615 __ATTR(_name, 0644, show_##_name, store_##_name)
616 
617 define_one_ro0400(cpuinfo_cur_freq);
618 define_one_ro(cpuinfo_min_freq);
619 define_one_ro(cpuinfo_max_freq);
620 define_one_ro(scaling_available_governors);
621 define_one_ro(scaling_driver);
622 define_one_ro(scaling_cur_freq);
623 define_one_ro(affected_cpus);
624 define_one_rw(scaling_min_freq);
625 define_one_rw(scaling_max_freq);
626 define_one_rw(scaling_governor);
627 
628 static struct attribute * default_attrs[] = {
629 	&cpuinfo_min_freq.attr,
630 	&cpuinfo_max_freq.attr,
631 	&scaling_min_freq.attr,
632 	&scaling_max_freq.attr,
633 	&affected_cpus.attr,
634 	&scaling_governor.attr,
635 	&scaling_driver.attr,
636 	&scaling_available_governors.attr,
637 	NULL
638 };
639 
640 #define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
641 #define to_attr(a) container_of(a,struct freq_attr,attr)
642 
643 static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
644 {
645 	struct cpufreq_policy * policy = to_policy(kobj);
646 	struct freq_attr * fattr = to_attr(attr);
647 	ssize_t ret;
648 	policy = cpufreq_cpu_get(policy->cpu);
649 	if (!policy)
650 		return -EINVAL;
651 
652 	if (lock_policy_rwsem_read(policy->cpu) < 0)
653 		return -EINVAL;
654 
655 	if (fattr->show)
656 		ret = fattr->show(policy, buf);
657 	else
658 		ret = -EIO;
659 
660 	unlock_policy_rwsem_read(policy->cpu);
661 
662 	cpufreq_cpu_put(policy);
663 	return ret;
664 }
665 
666 static ssize_t store(struct kobject * kobj, struct attribute * attr,
667 		     const char * buf, size_t count)
668 {
669 	struct cpufreq_policy * policy = to_policy(kobj);
670 	struct freq_attr * fattr = to_attr(attr);
671 	ssize_t ret;
672 	policy = cpufreq_cpu_get(policy->cpu);
673 	if (!policy)
674 		return -EINVAL;
675 
676 	if (lock_policy_rwsem_write(policy->cpu) < 0)
677 		return -EINVAL;
678 
679 	if (fattr->store)
680 		ret = fattr->store(policy, buf, count);
681 	else
682 		ret = -EIO;
683 
684 	unlock_policy_rwsem_write(policy->cpu);
685 
686 	cpufreq_cpu_put(policy);
687 	return ret;
688 }
689 
690 static void cpufreq_sysfs_release(struct kobject * kobj)
691 {
692 	struct cpufreq_policy * policy = to_policy(kobj);
693 	dprintk("last reference is dropped\n");
694 	complete(&policy->kobj_unregister);
695 }
696 
697 static struct sysfs_ops sysfs_ops = {
698 	.show	= show,
699 	.store	= store,
700 };
701 
702 static struct kobj_type ktype_cpufreq = {
703 	.sysfs_ops	= &sysfs_ops,
704 	.default_attrs	= default_attrs,
705 	.release	= cpufreq_sysfs_release,
706 };
707 
708 
709 /**
710  * cpufreq_add_dev - add a CPU device
711  *
712  * Adds the cpufreq interface for a CPU device.
713  */
714 static int cpufreq_add_dev (struct sys_device * sys_dev)
715 {
716 	unsigned int cpu = sys_dev->id;
717 	int ret = 0;
718 	struct cpufreq_policy new_policy;
719 	struct cpufreq_policy *policy;
720 	struct freq_attr **drv_attr;
721 	struct sys_device *cpu_sys_dev;
722 	unsigned long flags;
723 	unsigned int j;
724 #ifdef CONFIG_SMP
725 	struct cpufreq_policy *managed_policy;
726 #endif
727 
728 	if (cpu_is_offline(cpu))
729 		return 0;
730 
731 	cpufreq_debug_disable_ratelimit();
732 	dprintk("adding CPU %u\n", cpu);
733 
734 #ifdef CONFIG_SMP
735 	/* check whether a different CPU already registered this
736 	 * CPU because it is in the same boat. */
737 	policy = cpufreq_cpu_get(cpu);
738 	if (unlikely(policy)) {
739 		cpufreq_cpu_put(policy);
740 		cpufreq_debug_enable_ratelimit();
741 		return 0;
742 	}
743 #endif
744 
745 	if (!try_module_get(cpufreq_driver->owner)) {
746 		ret = -EINVAL;
747 		goto module_out;
748 	}
749 
750 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
751 	if (!policy) {
752 		ret = -ENOMEM;
753 		goto nomem_out;
754 	}
755 
756 	policy->cpu = cpu;
757 	policy->cpus = cpumask_of_cpu(cpu);
758 
759 	/* Initially set CPU itself as the policy_cpu */
760 	per_cpu(policy_cpu, cpu) = cpu;
761 	lock_policy_rwsem_write(cpu);
762 
763 	init_completion(&policy->kobj_unregister);
764 	INIT_WORK(&policy->update, handle_update);
765 
766 	/* Set governor before ->init, so that driver could check it */
767 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
768 	/* call driver. From then on the cpufreq must be able
769 	 * to accept all calls to ->verify and ->setpolicy for this CPU
770 	 */
771 	ret = cpufreq_driver->init(policy);
772 	if (ret) {
773 		dprintk("initialization failed\n");
774 		unlock_policy_rwsem_write(cpu);
775 		goto err_out;
776 	}
777 	policy->user_policy.min = policy->cpuinfo.min_freq;
778 	policy->user_policy.max = policy->cpuinfo.max_freq;
779 
780 #ifdef CONFIG_SMP
781 
782 #ifdef CONFIG_HOTPLUG_CPU
783 	if (cpufreq_cpu_governor[cpu]){
784 		policy->governor = cpufreq_cpu_governor[cpu];
785 		dprintk("Restoring governor %s for cpu %d\n",
786 		       policy->governor->name, cpu);
787 	}
788 #endif
789 
790 	for_each_cpu_mask(j, policy->cpus) {
791 		if (cpu == j)
792 			continue;
793 
794 		/* check for existing affected CPUs.  They may not be aware
795 		 * of it due to CPU Hotplug.
796 		 */
797 		managed_policy = cpufreq_cpu_get(j);
798 		if (unlikely(managed_policy)) {
799 
800 			/* Set proper policy_cpu */
801 			unlock_policy_rwsem_write(cpu);
802 			per_cpu(policy_cpu, cpu) = managed_policy->cpu;
803 
804 			if (lock_policy_rwsem_write(cpu) < 0)
805 				goto err_out_driver_exit;
806 
807 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
808 			managed_policy->cpus = policy->cpus;
809 			cpufreq_cpu_data[cpu] = managed_policy;
810 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
811 
812 			dprintk("CPU already managed, adding link\n");
813 			ret = sysfs_create_link(&sys_dev->kobj,
814 						&managed_policy->kobj,
815 						"cpufreq");
816 			if (ret) {
817 				unlock_policy_rwsem_write(cpu);
818 				goto err_out_driver_exit;
819 			}
820 
821 			cpufreq_debug_enable_ratelimit();
822 			ret = 0;
823 			unlock_policy_rwsem_write(cpu);
824 			goto err_out_driver_exit; /* call driver->exit() */
825 		}
826 	}
827 #endif
828 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
829 
830 	/* prepare interface data */
831 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
832 				   "cpufreq");
833 	if (ret) {
834 		unlock_policy_rwsem_write(cpu);
835 		goto err_out_driver_exit;
836 	}
837 	/* set up files for this cpu device */
838 	drv_attr = cpufreq_driver->attr;
839 	while ((drv_attr) && (*drv_attr)) {
840 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
841 		if (ret) {
842 			unlock_policy_rwsem_write(cpu);
843 			goto err_out_driver_exit;
844 		}
845 		drv_attr++;
846 	}
847 	if (cpufreq_driver->get){
848 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
849 		if (ret) {
850 			unlock_policy_rwsem_write(cpu);
851 			goto err_out_driver_exit;
852 		}
853 	}
854 	if (cpufreq_driver->target){
855 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
856 		if (ret) {
857 			unlock_policy_rwsem_write(cpu);
858 			goto err_out_driver_exit;
859 		}
860 	}
861 
862 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
863 	for_each_cpu_mask(j, policy->cpus) {
864 		cpufreq_cpu_data[j] = policy;
865 		per_cpu(policy_cpu, j) = policy->cpu;
866 	}
867 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
868 
869 	/* symlink affected CPUs */
870 	for_each_cpu_mask(j, policy->cpus) {
871 		if (j == cpu)
872 			continue;
873 		if (!cpu_online(j))
874 			continue;
875 
876 		dprintk("CPU %u already managed, adding link\n", j);
877 		cpufreq_cpu_get(cpu);
878 		cpu_sys_dev = get_cpu_sysdev(j);
879 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
880 					"cpufreq");
881 		if (ret) {
882 			unlock_policy_rwsem_write(cpu);
883 			goto err_out_unregister;
884 		}
885 	}
886 
887 	policy->governor = NULL; /* to assure that the starting sequence is
888 				  * run in cpufreq_set_policy */
889 
890 	/* set default policy */
891 	ret = __cpufreq_set_policy(policy, &new_policy);
892 	policy->user_policy.policy = policy->policy;
893 	policy->user_policy.governor = policy->governor;
894 
895 	unlock_policy_rwsem_write(cpu);
896 
897 	if (ret) {
898 		dprintk("setting policy failed\n");
899 		goto err_out_unregister;
900 	}
901 
902 	kobject_uevent(&policy->kobj, KOBJ_ADD);
903 	module_put(cpufreq_driver->owner);
904 	dprintk("initialization complete\n");
905 	cpufreq_debug_enable_ratelimit();
906 
907 	return 0;
908 
909 
910 err_out_unregister:
911 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
912 	for_each_cpu_mask(j, policy->cpus)
913 		cpufreq_cpu_data[j] = NULL;
914 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
915 
916 	kobject_put(&policy->kobj);
917 	wait_for_completion(&policy->kobj_unregister);
918 
919 err_out_driver_exit:
920 	if (cpufreq_driver->exit)
921 		cpufreq_driver->exit(policy);
922 
923 err_out:
924 	kfree(policy);
925 
926 nomem_out:
927 	module_put(cpufreq_driver->owner);
928 module_out:
929 	cpufreq_debug_enable_ratelimit();
930 	return ret;
931 }
932 
933 
934 /**
935  * __cpufreq_remove_dev - remove a CPU device
936  *
937  * Removes the cpufreq interface for a CPU device.
938  * Caller should already have policy_rwsem in write mode for this CPU.
939  * This routine frees the rwsem before returning.
940  */
941 static int __cpufreq_remove_dev (struct sys_device * sys_dev)
942 {
943 	unsigned int cpu = sys_dev->id;
944 	unsigned long flags;
945 	struct cpufreq_policy *data;
946 #ifdef CONFIG_SMP
947 	struct sys_device *cpu_sys_dev;
948 	unsigned int j;
949 #endif
950 
951 	cpufreq_debug_disable_ratelimit();
952 	dprintk("unregistering CPU %u\n", cpu);
953 
954 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
955 	data = cpufreq_cpu_data[cpu];
956 
957 	if (!data) {
958 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
959 		cpufreq_debug_enable_ratelimit();
960 		unlock_policy_rwsem_write(cpu);
961 		return -EINVAL;
962 	}
963 	cpufreq_cpu_data[cpu] = NULL;
964 
965 
966 #ifdef CONFIG_SMP
967 	/* if this isn't the CPU which is the parent of the kobj, we
968 	 * only need to unlink, put and exit
969 	 */
970 	if (unlikely(cpu != data->cpu)) {
971 		dprintk("removing link\n");
972 		cpu_clear(cpu, data->cpus);
973 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
974 		sysfs_remove_link(&sys_dev->kobj, "cpufreq");
975 		cpufreq_cpu_put(data);
976 		cpufreq_debug_enable_ratelimit();
977 		unlock_policy_rwsem_write(cpu);
978 		return 0;
979 	}
980 #endif
981 
982 
983 	if (!kobject_get(&data->kobj)) {
984 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
985 		cpufreq_debug_enable_ratelimit();
986 		unlock_policy_rwsem_write(cpu);
987 		return -EFAULT;
988 	}
989 
990 #ifdef CONFIG_SMP
991 
992 #ifdef CONFIG_HOTPLUG_CPU
993 	cpufreq_cpu_governor[cpu] = data->governor;
994 #endif
995 
996 	/* if we have other CPUs still registered, we need to unlink them,
997 	 * or else wait_for_completion below will lock up. Clean the
998 	 * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
999 	 * links afterwards.
1000 	 */
1001 	if (unlikely(cpus_weight(data->cpus) > 1)) {
1002 		for_each_cpu_mask(j, data->cpus) {
1003 			if (j == cpu)
1004 				continue;
1005 			cpufreq_cpu_data[j] = NULL;
1006 		}
1007 	}
1008 
1009 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1010 
1011 	if (unlikely(cpus_weight(data->cpus) > 1)) {
1012 		for_each_cpu_mask(j, data->cpus) {
1013 			if (j == cpu)
1014 				continue;
1015 			dprintk("removing link for cpu %u\n", j);
1016 #ifdef CONFIG_HOTPLUG_CPU
1017 			cpufreq_cpu_governor[j] = data->governor;
1018 #endif
1019 			cpu_sys_dev = get_cpu_sysdev(j);
1020 			sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1021 			cpufreq_cpu_put(data);
1022 		}
1023 	}
1024 #else
1025 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1026 #endif
1027 
1028 	if (cpufreq_driver->target)
1029 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1030 
1031 	unlock_policy_rwsem_write(cpu);
1032 
1033 	kobject_put(&data->kobj);
1034 
1035 	/* we need to make sure that the underlying kobj is actually
1036 	 * not referenced anymore by anybody before we proceed with
1037 	 * unloading.
1038 	 */
1039 	dprintk("waiting for dropping of refcount\n");
1040 	wait_for_completion(&data->kobj_unregister);
1041 	dprintk("wait complete\n");
1042 
1043 	if (cpufreq_driver->exit)
1044 		cpufreq_driver->exit(data);
1045 
1046 	kfree(data);
1047 
1048 	cpufreq_debug_enable_ratelimit();
1049 	return 0;
1050 }
1051 
1052 
1053 static int cpufreq_remove_dev (struct sys_device * sys_dev)
1054 {
1055 	unsigned int cpu = sys_dev->id;
1056 	int retval;
1057 
1058 	if (cpu_is_offline(cpu))
1059 		return 0;
1060 
1061 	if (unlikely(lock_policy_rwsem_write(cpu)))
1062 		BUG();
1063 
1064 	retval = __cpufreq_remove_dev(sys_dev);
1065 	return retval;
1066 }
1067 
1068 
1069 static void handle_update(struct work_struct *work)
1070 {
1071 	struct cpufreq_policy *policy =
1072 		container_of(work, struct cpufreq_policy, update);
1073 	unsigned int cpu = policy->cpu;
1074 	dprintk("handle_update for cpu %u called\n", cpu);
1075 	cpufreq_update_policy(cpu);
1076 }
1077 
1078 /**
1079  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1080  *	@cpu: cpu number
1081  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1082  *	@new_freq: CPU frequency the CPU actually runs at
1083  *
1084  *	We adjust to current frequency first, and need to clean up later. So either call
1085  *	to cpufreq_update_policy() or schedule handle_update()).
1086  */
1087 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1088 				unsigned int new_freq)
1089 {
1090 	struct cpufreq_freqs freqs;
1091 
1092 	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1093 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1094 
1095 	freqs.cpu = cpu;
1096 	freqs.old = old_freq;
1097 	freqs.new = new_freq;
1098 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1099 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1100 }
1101 
1102 
1103 /**
1104  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1105  * @cpu: CPU number
1106  *
1107  * This is the last known freq, without actually getting it from the driver.
1108  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1109  */
1110 unsigned int cpufreq_quick_get(unsigned int cpu)
1111 {
1112 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1113 	unsigned int ret_freq = 0;
1114 
1115 	if (policy) {
1116 		ret_freq = policy->cur;
1117 		cpufreq_cpu_put(policy);
1118 	}
1119 
1120 	return (ret_freq);
1121 }
1122 EXPORT_SYMBOL(cpufreq_quick_get);
1123 
1124 
1125 static unsigned int __cpufreq_get(unsigned int cpu)
1126 {
1127 	struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
1128 	unsigned int ret_freq = 0;
1129 
1130 	if (!cpufreq_driver->get)
1131 		return (ret_freq);
1132 
1133 	ret_freq = cpufreq_driver->get(cpu);
1134 
1135 	if (ret_freq && policy->cur &&
1136 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1137 		/* verify no discrepancy between actual and
1138 					saved value exists */
1139 		if (unlikely(ret_freq != policy->cur)) {
1140 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1141 			schedule_work(&policy->update);
1142 		}
1143 	}
1144 
1145 	return (ret_freq);
1146 }
1147 
1148 /**
1149  * cpufreq_get - get the current CPU frequency (in kHz)
1150  * @cpu: CPU number
1151  *
1152  * Get the CPU current (static) CPU frequency
1153  */
1154 unsigned int cpufreq_get(unsigned int cpu)
1155 {
1156 	unsigned int ret_freq = 0;
1157 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1158 
1159 	if (!policy)
1160 		goto out;
1161 
1162 	if (unlikely(lock_policy_rwsem_read(cpu)))
1163 		goto out_policy;
1164 
1165 	ret_freq = __cpufreq_get(cpu);
1166 
1167 	unlock_policy_rwsem_read(cpu);
1168 
1169 out_policy:
1170 	cpufreq_cpu_put(policy);
1171 out:
1172 	return (ret_freq);
1173 }
1174 EXPORT_SYMBOL(cpufreq_get);
1175 
1176 
1177 /**
1178  *	cpufreq_suspend - let the low level driver prepare for suspend
1179  */
1180 
1181 static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
1182 {
1183 	int cpu = sysdev->id;
1184 	int ret = 0;
1185 	unsigned int cur_freq = 0;
1186 	struct cpufreq_policy *cpu_policy;
1187 
1188 	dprintk("suspending cpu %u\n", cpu);
1189 
1190 	if (!cpu_online(cpu))
1191 		return 0;
1192 
1193 	/* we may be lax here as interrupts are off. Nonetheless
1194 	 * we need to grab the correct cpu policy, as to check
1195 	 * whether we really run on this CPU.
1196 	 */
1197 
1198 	cpu_policy = cpufreq_cpu_get(cpu);
1199 	if (!cpu_policy)
1200 		return -EINVAL;
1201 
1202 	/* only handle each CPU group once */
1203 	if (unlikely(cpu_policy->cpu != cpu)) {
1204 		cpufreq_cpu_put(cpu_policy);
1205 		return 0;
1206 	}
1207 
1208 	if (cpufreq_driver->suspend) {
1209 		ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1210 		if (ret) {
1211 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1212 					"step on CPU %u\n", cpu_policy->cpu);
1213 			cpufreq_cpu_put(cpu_policy);
1214 			return ret;
1215 		}
1216 	}
1217 
1218 
1219 	if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1220 		goto out;
1221 
1222 	if (cpufreq_driver->get)
1223 		cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1224 
1225 	if (!cur_freq || !cpu_policy->cur) {
1226 		printk(KERN_ERR "cpufreq: suspend failed to assert current "
1227 		       "frequency is what timing core thinks it is.\n");
1228 		goto out;
1229 	}
1230 
1231 	if (unlikely(cur_freq != cpu_policy->cur)) {
1232 		struct cpufreq_freqs freqs;
1233 
1234 		if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1235 			dprintk("Warning: CPU frequency is %u, "
1236 			       "cpufreq assumed %u kHz.\n",
1237 			       cur_freq, cpu_policy->cur);
1238 
1239 		freqs.cpu = cpu;
1240 		freqs.old = cpu_policy->cur;
1241 		freqs.new = cur_freq;
1242 
1243 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1244 				    CPUFREQ_SUSPENDCHANGE, &freqs);
1245 		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1246 
1247 		cpu_policy->cur = cur_freq;
1248 	}
1249 
1250 out:
1251 	cpufreq_cpu_put(cpu_policy);
1252 	return 0;
1253 }
1254 
1255 /**
1256  *	cpufreq_resume -  restore proper CPU frequency handling after resume
1257  *
1258  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1259  *	2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1260  *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1261  *	    restored.
1262  */
1263 static int cpufreq_resume(struct sys_device * sysdev)
1264 {
1265 	int cpu = sysdev->id;
1266 	int ret = 0;
1267 	struct cpufreq_policy *cpu_policy;
1268 
1269 	dprintk("resuming cpu %u\n", cpu);
1270 
1271 	if (!cpu_online(cpu))
1272 		return 0;
1273 
1274 	/* we may be lax here as interrupts are off. Nonetheless
1275 	 * we need to grab the correct cpu policy, as to check
1276 	 * whether we really run on this CPU.
1277 	 */
1278 
1279 	cpu_policy = cpufreq_cpu_get(cpu);
1280 	if (!cpu_policy)
1281 		return -EINVAL;
1282 
1283 	/* only handle each CPU group once */
1284 	if (unlikely(cpu_policy->cpu != cpu)) {
1285 		cpufreq_cpu_put(cpu_policy);
1286 		return 0;
1287 	}
1288 
1289 	if (cpufreq_driver->resume) {
1290 		ret = cpufreq_driver->resume(cpu_policy);
1291 		if (ret) {
1292 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1293 					"step on CPU %u\n", cpu_policy->cpu);
1294 			cpufreq_cpu_put(cpu_policy);
1295 			return ret;
1296 		}
1297 	}
1298 
1299 	if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1300 		unsigned int cur_freq = 0;
1301 
1302 		if (cpufreq_driver->get)
1303 			cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1304 
1305 		if (!cur_freq || !cpu_policy->cur) {
1306 			printk(KERN_ERR "cpufreq: resume failed to assert "
1307 					"current frequency is what timing core "
1308 					"thinks it is.\n");
1309 			goto out;
1310 		}
1311 
1312 		if (unlikely(cur_freq != cpu_policy->cur)) {
1313 			struct cpufreq_freqs freqs;
1314 
1315 			if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1316 				dprintk("Warning: CPU frequency"
1317 				       "is %u, cpufreq assumed %u kHz.\n",
1318 				       cur_freq, cpu_policy->cur);
1319 
1320 			freqs.cpu = cpu;
1321 			freqs.old = cpu_policy->cur;
1322 			freqs.new = cur_freq;
1323 
1324 			srcu_notifier_call_chain(
1325 					&cpufreq_transition_notifier_list,
1326 					CPUFREQ_RESUMECHANGE, &freqs);
1327 			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1328 
1329 			cpu_policy->cur = cur_freq;
1330 		}
1331 	}
1332 
1333 out:
1334 	schedule_work(&cpu_policy->update);
1335 	cpufreq_cpu_put(cpu_policy);
1336 	return ret;
1337 }
1338 
1339 static struct sysdev_driver cpufreq_sysdev_driver = {
1340 	.add		= cpufreq_add_dev,
1341 	.remove		= cpufreq_remove_dev,
1342 	.suspend	= cpufreq_suspend,
1343 	.resume		= cpufreq_resume,
1344 };
1345 
1346 
1347 /*********************************************************************
1348  *                     NOTIFIER LISTS INTERFACE                      *
1349  *********************************************************************/
1350 
1351 /**
1352  *	cpufreq_register_notifier - register a driver with cpufreq
1353  *	@nb: notifier function to register
1354  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1355  *
1356  *	Add a driver to one of two lists: either a list of drivers that
1357  *      are notified about clock rate changes (once before and once after
1358  *      the transition), or a list of drivers that are notified about
1359  *      changes in cpufreq policy.
1360  *
1361  *	This function may sleep, and has the same return conditions as
1362  *	blocking_notifier_chain_register.
1363  */
1364 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1365 {
1366 	int ret;
1367 
1368 	switch (list) {
1369 	case CPUFREQ_TRANSITION_NOTIFIER:
1370 		ret = srcu_notifier_chain_register(
1371 				&cpufreq_transition_notifier_list, nb);
1372 		break;
1373 	case CPUFREQ_POLICY_NOTIFIER:
1374 		ret = blocking_notifier_chain_register(
1375 				&cpufreq_policy_notifier_list, nb);
1376 		break;
1377 	default:
1378 		ret = -EINVAL;
1379 	}
1380 
1381 	return ret;
1382 }
1383 EXPORT_SYMBOL(cpufreq_register_notifier);
1384 
1385 
1386 /**
1387  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1388  *	@nb: notifier block to be unregistered
1389  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1390  *
1391  *	Remove a driver from the CPU frequency notifier list.
1392  *
1393  *	This function may sleep, and has the same return conditions as
1394  *	blocking_notifier_chain_unregister.
1395  */
1396 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1397 {
1398 	int ret;
1399 
1400 	switch (list) {
1401 	case CPUFREQ_TRANSITION_NOTIFIER:
1402 		ret = srcu_notifier_chain_unregister(
1403 				&cpufreq_transition_notifier_list, nb);
1404 		break;
1405 	case CPUFREQ_POLICY_NOTIFIER:
1406 		ret = blocking_notifier_chain_unregister(
1407 				&cpufreq_policy_notifier_list, nb);
1408 		break;
1409 	default:
1410 		ret = -EINVAL;
1411 	}
1412 
1413 	return ret;
1414 }
1415 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1416 
1417 
1418 /*********************************************************************
1419  *                              GOVERNORS                            *
1420  *********************************************************************/
1421 
1422 
1423 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1424 			    unsigned int target_freq,
1425 			    unsigned int relation)
1426 {
1427 	int retval = -EINVAL;
1428 
1429 	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1430 		target_freq, relation);
1431 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1432 		retval = cpufreq_driver->target(policy, target_freq, relation);
1433 
1434 	return retval;
1435 }
1436 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1437 
1438 int cpufreq_driver_target(struct cpufreq_policy *policy,
1439 			  unsigned int target_freq,
1440 			  unsigned int relation)
1441 {
1442 	int ret;
1443 
1444 	policy = cpufreq_cpu_get(policy->cpu);
1445 	if (!policy)
1446 		return -EINVAL;
1447 
1448 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1449 		return -EINVAL;
1450 
1451 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1452 
1453 	unlock_policy_rwsem_write(policy->cpu);
1454 
1455 	cpufreq_cpu_put(policy);
1456 	return ret;
1457 }
1458 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1459 
1460 int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
1461 {
1462 	int ret = 0;
1463 
1464 	policy = cpufreq_cpu_get(policy->cpu);
1465 	if (!policy)
1466 		return -EINVAL;
1467 
1468 	if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
1469 		ret = cpufreq_driver->getavg(policy->cpu);
1470 
1471 	cpufreq_cpu_put(policy);
1472 	return ret;
1473 }
1474 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1475 
1476 /*
1477  * when "event" is CPUFREQ_GOV_LIMITS
1478  */
1479 
1480 static int __cpufreq_governor(struct cpufreq_policy *policy,
1481 					unsigned int event)
1482 {
1483 	int ret;
1484 
1485 	/* Only must be defined when default governor is known to have latency
1486 	   restrictions, like e.g. conservative or ondemand.
1487 	   That this is the case is already ensured in Kconfig
1488 	*/
1489 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1490 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1491 #else
1492 	struct cpufreq_governor *gov = NULL;
1493 #endif
1494 
1495 	if (policy->governor->max_transition_latency &&
1496 	    policy->cpuinfo.transition_latency >
1497 	    policy->governor->max_transition_latency) {
1498 		if (!gov)
1499 			return -EINVAL;
1500 		else {
1501 			printk(KERN_WARNING "%s governor failed, too long"
1502 			       " transition latency of HW, fallback"
1503 			       " to %s governor\n",
1504 			       policy->governor->name,
1505 			       gov->name);
1506 			policy->governor = gov;
1507 		}
1508 	}
1509 
1510 	if (!try_module_get(policy->governor->owner))
1511 		return -EINVAL;
1512 
1513 	dprintk("__cpufreq_governor for CPU %u, event %u\n",
1514 						policy->cpu, event);
1515 	ret = policy->governor->governor(policy, event);
1516 
1517 	/* we keep one module reference alive for
1518 			each CPU governed by this CPU */
1519 	if ((event != CPUFREQ_GOV_START) || ret)
1520 		module_put(policy->governor->owner);
1521 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1522 		module_put(policy->governor->owner);
1523 
1524 	return ret;
1525 }
1526 
1527 
1528 int cpufreq_register_governor(struct cpufreq_governor *governor)
1529 {
1530 	int err;
1531 
1532 	if (!governor)
1533 		return -EINVAL;
1534 
1535 	mutex_lock(&cpufreq_governor_mutex);
1536 
1537 	err = -EBUSY;
1538 	if (__find_governor(governor->name) == NULL) {
1539 		err = 0;
1540 		list_add(&governor->governor_list, &cpufreq_governor_list);
1541 	}
1542 
1543 	mutex_unlock(&cpufreq_governor_mutex);
1544 	return err;
1545 }
1546 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1547 
1548 
1549 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1550 {
1551 	if (!governor)
1552 		return;
1553 
1554 	mutex_lock(&cpufreq_governor_mutex);
1555 	list_del(&governor->governor_list);
1556 	mutex_unlock(&cpufreq_governor_mutex);
1557 	return;
1558 }
1559 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1560 
1561 
1562 
1563 /*********************************************************************
1564  *                          POLICY INTERFACE                         *
1565  *********************************************************************/
1566 
1567 /**
1568  * cpufreq_get_policy - get the current cpufreq_policy
1569  * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
1570  *
1571  * Reads the current cpufreq policy.
1572  */
1573 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1574 {
1575 	struct cpufreq_policy *cpu_policy;
1576 	if (!policy)
1577 		return -EINVAL;
1578 
1579 	cpu_policy = cpufreq_cpu_get(cpu);
1580 	if (!cpu_policy)
1581 		return -EINVAL;
1582 
1583 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1584 
1585 	cpufreq_cpu_put(cpu_policy);
1586 	return 0;
1587 }
1588 EXPORT_SYMBOL(cpufreq_get_policy);
1589 
1590 
1591 /*
1592  * data   : current policy.
1593  * policy : policy to be set.
1594  */
1595 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1596 				struct cpufreq_policy *policy)
1597 {
1598 	int ret = 0;
1599 
1600 	cpufreq_debug_disable_ratelimit();
1601 	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1602 		policy->min, policy->max);
1603 
1604 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1605 				sizeof(struct cpufreq_cpuinfo));
1606 
1607 	if (policy->min > data->min && policy->min > policy->max) {
1608 		ret = -EINVAL;
1609 		goto error_out;
1610 	}
1611 
1612 	/* verify the cpu speed can be set within this limit */
1613 	ret = cpufreq_driver->verify(policy);
1614 	if (ret)
1615 		goto error_out;
1616 
1617 	/* adjust if necessary - all reasons */
1618 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1619 			CPUFREQ_ADJUST, policy);
1620 
1621 	/* adjust if necessary - hardware incompatibility*/
1622 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1623 			CPUFREQ_INCOMPATIBLE, policy);
1624 
1625 	/* verify the cpu speed can be set within this limit,
1626 	   which might be different to the first one */
1627 	ret = cpufreq_driver->verify(policy);
1628 	if (ret)
1629 		goto error_out;
1630 
1631 	/* notification of the new policy */
1632 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1633 			CPUFREQ_NOTIFY, policy);
1634 
1635 	data->min = policy->min;
1636 	data->max = policy->max;
1637 
1638 	dprintk("new min and max freqs are %u - %u kHz\n",
1639 					data->min, data->max);
1640 
1641 	if (cpufreq_driver->setpolicy) {
1642 		data->policy = policy->policy;
1643 		dprintk("setting range\n");
1644 		ret = cpufreq_driver->setpolicy(policy);
1645 	} else {
1646 		if (policy->governor != data->governor) {
1647 			/* save old, working values */
1648 			struct cpufreq_governor *old_gov = data->governor;
1649 
1650 			dprintk("governor switch\n");
1651 
1652 			/* end old governor */
1653 			if (data->governor)
1654 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1655 
1656 			/* start new governor */
1657 			data->governor = policy->governor;
1658 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1659 				/* new governor failed, so re-start old one */
1660 				dprintk("starting governor %s failed\n",
1661 							data->governor->name);
1662 				if (old_gov) {
1663 					data->governor = old_gov;
1664 					__cpufreq_governor(data,
1665 							   CPUFREQ_GOV_START);
1666 				}
1667 				ret = -EINVAL;
1668 				goto error_out;
1669 			}
1670 			/* might be a policy change, too, so fall through */
1671 		}
1672 		dprintk("governor: change or update limits\n");
1673 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1674 	}
1675 
1676 error_out:
1677 	cpufreq_debug_enable_ratelimit();
1678 	return ret;
1679 }
1680 
1681 /**
1682  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1683  *	@cpu: CPU which shall be re-evaluated
1684  *
1685  *	Usefull for policy notifiers which have different necessities
1686  *	at different times.
1687  */
1688 int cpufreq_update_policy(unsigned int cpu)
1689 {
1690 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1691 	struct cpufreq_policy policy;
1692 	int ret = 0;
1693 
1694 	if (!data)
1695 		return -ENODEV;
1696 
1697 	if (unlikely(lock_policy_rwsem_write(cpu)))
1698 		return -EINVAL;
1699 
1700 	dprintk("updating policy for CPU %u\n", cpu);
1701 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1702 	policy.min = data->user_policy.min;
1703 	policy.max = data->user_policy.max;
1704 	policy.policy = data->user_policy.policy;
1705 	policy.governor = data->user_policy.governor;
1706 
1707 	/* BIOS might change freq behind our back
1708 	  -> ask driver for current freq and notify governors about a change */
1709 	if (cpufreq_driver->get) {
1710 		policy.cur = cpufreq_driver->get(cpu);
1711 		if (!data->cur) {
1712 			dprintk("Driver did not initialize current freq");
1713 			data->cur = policy.cur;
1714 		} else {
1715 			if (data->cur != policy.cur)
1716 				cpufreq_out_of_sync(cpu, data->cur,
1717 								policy.cur);
1718 		}
1719 	}
1720 
1721 	ret = __cpufreq_set_policy(data, &policy);
1722 
1723 	unlock_policy_rwsem_write(cpu);
1724 
1725 	cpufreq_cpu_put(data);
1726 	return ret;
1727 }
1728 EXPORT_SYMBOL(cpufreq_update_policy);
1729 
1730 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1731 					unsigned long action, void *hcpu)
1732 {
1733 	unsigned int cpu = (unsigned long)hcpu;
1734 	struct sys_device *sys_dev;
1735 
1736 	sys_dev = get_cpu_sysdev(cpu);
1737 	if (sys_dev) {
1738 		switch (action) {
1739 		case CPU_ONLINE:
1740 		case CPU_ONLINE_FROZEN:
1741 			cpufreq_add_dev(sys_dev);
1742 			break;
1743 		case CPU_DOWN_PREPARE:
1744 		case CPU_DOWN_PREPARE_FROZEN:
1745 			if (unlikely(lock_policy_rwsem_write(cpu)))
1746 				BUG();
1747 
1748 			__cpufreq_remove_dev(sys_dev);
1749 			break;
1750 		case CPU_DOWN_FAILED:
1751 		case CPU_DOWN_FAILED_FROZEN:
1752 			cpufreq_add_dev(sys_dev);
1753 			break;
1754 		}
1755 	}
1756 	return NOTIFY_OK;
1757 }
1758 
1759 static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
1760 {
1761     .notifier_call = cpufreq_cpu_callback,
1762 };
1763 
1764 /*********************************************************************
1765  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1766  *********************************************************************/
1767 
1768 /**
1769  * cpufreq_register_driver - register a CPU Frequency driver
1770  * @driver_data: A struct cpufreq_driver containing the values#
1771  * submitted by the CPU Frequency driver.
1772  *
1773  *   Registers a CPU Frequency driver to this core code. This code
1774  * returns zero on success, -EBUSY when another driver got here first
1775  * (and isn't unregistered in the meantime).
1776  *
1777  */
1778 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1779 {
1780 	unsigned long flags;
1781 	int ret;
1782 
1783 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1784 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1785 		return -EINVAL;
1786 
1787 	dprintk("trying to register driver %s\n", driver_data->name);
1788 
1789 	if (driver_data->setpolicy)
1790 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1791 
1792 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1793 	if (cpufreq_driver) {
1794 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1795 		return -EBUSY;
1796 	}
1797 	cpufreq_driver = driver_data;
1798 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1799 
1800 	ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
1801 
1802 	if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1803 		int i;
1804 		ret = -ENODEV;
1805 
1806 		/* check for at least one working CPU */
1807 		for (i=0; i<NR_CPUS; i++)
1808 			if (cpufreq_cpu_data[i])
1809 				ret = 0;
1810 
1811 		/* if all ->init() calls failed, unregister */
1812 		if (ret) {
1813 			dprintk("no CPU initialized for driver %s\n",
1814 							driver_data->name);
1815 			sysdev_driver_unregister(&cpu_sysdev_class,
1816 						&cpufreq_sysdev_driver);
1817 
1818 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
1819 			cpufreq_driver = NULL;
1820 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1821 		}
1822 	}
1823 
1824 	if (!ret) {
1825 		register_hotcpu_notifier(&cpufreq_cpu_notifier);
1826 		dprintk("driver %s up and running\n", driver_data->name);
1827 		cpufreq_debug_enable_ratelimit();
1828 	}
1829 
1830 	return (ret);
1831 }
1832 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1833 
1834 
1835 /**
1836  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1837  *
1838  *    Unregister the current CPUFreq driver. Only call this if you have
1839  * the right to do so, i.e. if you have succeeded in initialising before!
1840  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1841  * currently not initialised.
1842  */
1843 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1844 {
1845 	unsigned long flags;
1846 
1847 	cpufreq_debug_disable_ratelimit();
1848 
1849 	if (!cpufreq_driver || (driver != cpufreq_driver)) {
1850 		cpufreq_debug_enable_ratelimit();
1851 		return -EINVAL;
1852 	}
1853 
1854 	dprintk("unregistering driver %s\n", driver->name);
1855 
1856 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1857 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1858 
1859 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1860 	cpufreq_driver = NULL;
1861 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1862 
1863 	return 0;
1864 }
1865 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1866 
1867 static int __init cpufreq_core_init(void)
1868 {
1869 	int cpu;
1870 
1871 	for_each_possible_cpu(cpu) {
1872 		per_cpu(policy_cpu, cpu) = -1;
1873 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1874 	}
1875 	return 0;
1876 }
1877 
1878 core_initcall(cpufreq_core_init);
1879