xref: /linux/drivers/cpufreq/cpufreq.c (revision 7c43185138cf523b0810ffd2c9e18e2ecb356730)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32 
33 #include <trace/events/power.h>
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69 
70 #define lock_policy_rwsem(mode, cpu)					\
71 static int lock_policy_rwsem_##mode					\
72 (int cpu)								\
73 {									\
74 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
75 	BUG_ON(policy_cpu == -1);					\
76 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
77 	if (unlikely(!cpu_online(cpu))) {				\
78 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
79 		return -1;						\
80 	}								\
81 									\
82 	return 0;							\
83 }
84 
85 lock_policy_rwsem(read, cpu);
86 
87 lock_policy_rwsem(write, cpu);
88 
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99 	BUG_ON(policy_cpu == -1);
100 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102 
103 
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106 		unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109 
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119 
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	init_cpufreq_transition_notifier_list_called = true;
125 	return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128 
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131 
132 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133 {
134 	struct cpufreq_policy *data;
135 	unsigned long flags;
136 
137 	if (cpu >= nr_cpu_ids)
138 		goto err_out;
139 
140 	/* get the cpufreq driver */
141 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
142 
143 	if (!cpufreq_driver)
144 		goto err_out_unlock;
145 
146 	if (!try_module_get(cpufreq_driver->owner))
147 		goto err_out_unlock;
148 
149 
150 	/* get the CPU */
151 	data = per_cpu(cpufreq_cpu_data, cpu);
152 
153 	if (!data)
154 		goto err_out_put_module;
155 
156 	if (!kobject_get(&data->kobj))
157 		goto err_out_put_module;
158 
159 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160 	return data;
161 
162 err_out_put_module:
163 	module_put(cpufreq_driver->owner);
164 err_out_unlock:
165 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166 err_out:
167 	return NULL;
168 }
169 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170 
171 
172 void cpufreq_cpu_put(struct cpufreq_policy *data)
173 {
174 	kobject_put(&data->kobj);
175 	module_put(cpufreq_driver->owner);
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178 
179 
180 /*********************************************************************
181  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182  *********************************************************************/
183 
184 /**
185  * adjust_jiffies - adjust the system "loops_per_jiffy"
186  *
187  * This function alters the system "loops_per_jiffy" for the clock
188  * speed change. Note that loops_per_jiffy cannot be updated on SMP
189  * systems as each CPU might be scaled differently. So, use the arch
190  * per-CPU loops_per_jiffy value wherever possible.
191  */
192 #ifndef CONFIG_SMP
193 static unsigned long l_p_j_ref;
194 static unsigned int  l_p_j_ref_freq;
195 
196 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197 {
198 	if (ci->flags & CPUFREQ_CONST_LOOPS)
199 		return;
200 
201 	if (!l_p_j_ref_freq) {
202 		l_p_j_ref = loops_per_jiffy;
203 		l_p_j_ref_freq = ci->old;
204 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
205 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206 	}
207 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
208 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
209 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
210 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
211 								ci->new);
212 		pr_debug("scaling loops_per_jiffy to %lu "
213 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
214 	}
215 }
216 #else
217 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
218 {
219 	return;
220 }
221 #endif
222 
223 
224 /**
225  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
226  * on frequency transition.
227  *
228  * This function calls the transition notifiers and the "adjust_jiffies"
229  * function. It is called twice on all CPU frequency changes that have
230  * external effects.
231  */
232 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
233 {
234 	struct cpufreq_policy *policy;
235 
236 	BUG_ON(irqs_disabled());
237 
238 	freqs->flags = cpufreq_driver->flags;
239 	pr_debug("notification %u of frequency transition to %u kHz\n",
240 		state, freqs->new);
241 
242 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
243 	switch (state) {
244 
245 	case CPUFREQ_PRECHANGE:
246 		/* detect if the driver reported a value as "old frequency"
247 		 * which is not equal to what the cpufreq core thinks is
248 		 * "old frequency".
249 		 */
250 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
251 			if ((policy) && (policy->cpu == freqs->cpu) &&
252 			    (policy->cur) && (policy->cur != freqs->old)) {
253 				pr_debug("Warning: CPU frequency is"
254 					" %u, cpufreq assumed %u kHz.\n",
255 					freqs->old, policy->cur);
256 				freqs->old = policy->cur;
257 			}
258 		}
259 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
260 				CPUFREQ_PRECHANGE, freqs);
261 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
262 		break;
263 
264 	case CPUFREQ_POSTCHANGE:
265 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
266 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
267 			(unsigned long)freqs->cpu);
268 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
269 		trace_cpu_frequency(freqs->new, freqs->cpu);
270 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
271 				CPUFREQ_POSTCHANGE, freqs);
272 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
273 			policy->cur = freqs->new;
274 		break;
275 	}
276 }
277 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
278 
279 
280 
281 /*********************************************************************
282  *                          SYSFS INTERFACE                          *
283  *********************************************************************/
284 
285 static struct cpufreq_governor *__find_governor(const char *str_governor)
286 {
287 	struct cpufreq_governor *t;
288 
289 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
290 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
291 			return t;
292 
293 	return NULL;
294 }
295 
296 /**
297  * cpufreq_parse_governor - parse a governor string
298  */
299 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
300 				struct cpufreq_governor **governor)
301 {
302 	int err = -EINVAL;
303 
304 	if (!cpufreq_driver)
305 		goto out;
306 
307 	if (cpufreq_driver->setpolicy) {
308 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
309 			*policy = CPUFREQ_POLICY_PERFORMANCE;
310 			err = 0;
311 		} else if (!strnicmp(str_governor, "powersave",
312 						CPUFREQ_NAME_LEN)) {
313 			*policy = CPUFREQ_POLICY_POWERSAVE;
314 			err = 0;
315 		}
316 	} else if (cpufreq_driver->target) {
317 		struct cpufreq_governor *t;
318 
319 		mutex_lock(&cpufreq_governor_mutex);
320 
321 		t = __find_governor(str_governor);
322 
323 		if (t == NULL) {
324 			int ret;
325 
326 			mutex_unlock(&cpufreq_governor_mutex);
327 			ret = request_module("cpufreq_%s", str_governor);
328 			mutex_lock(&cpufreq_governor_mutex);
329 
330 			if (ret == 0)
331 				t = __find_governor(str_governor);
332 		}
333 
334 		if (t != NULL) {
335 			*governor = t;
336 			err = 0;
337 		}
338 
339 		mutex_unlock(&cpufreq_governor_mutex);
340 	}
341 out:
342 	return err;
343 }
344 
345 
346 /**
347  * cpufreq_per_cpu_attr_read() / show_##file_name() -
348  * print out cpufreq information
349  *
350  * Write out information from cpufreq_driver->policy[cpu]; object must be
351  * "unsigned int".
352  */
353 
354 #define show_one(file_name, object)			\
355 static ssize_t show_##file_name				\
356 (struct cpufreq_policy *policy, char *buf)		\
357 {							\
358 	return sprintf(buf, "%u\n", policy->object);	\
359 }
360 
361 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
362 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
363 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
364 show_one(scaling_min_freq, min);
365 show_one(scaling_max_freq, max);
366 show_one(scaling_cur_freq, cur);
367 
368 static int __cpufreq_set_policy(struct cpufreq_policy *data,
369 				struct cpufreq_policy *policy);
370 
371 /**
372  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
373  */
374 #define store_one(file_name, object)			\
375 static ssize_t store_##file_name					\
376 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
377 {									\
378 	unsigned int ret = -EINVAL;					\
379 	struct cpufreq_policy new_policy;				\
380 									\
381 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
382 	if (ret)							\
383 		return -EINVAL;						\
384 									\
385 	ret = sscanf(buf, "%u", &new_policy.object);			\
386 	if (ret != 1)							\
387 		return -EINVAL;						\
388 									\
389 	ret = __cpufreq_set_policy(policy, &new_policy);		\
390 	policy->user_policy.object = policy->object;			\
391 									\
392 	return ret ? ret : count;					\
393 }
394 
395 store_one(scaling_min_freq, min);
396 store_one(scaling_max_freq, max);
397 
398 /**
399  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
400  */
401 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
402 					char *buf)
403 {
404 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
405 	if (!cur_freq)
406 		return sprintf(buf, "<unknown>");
407 	return sprintf(buf, "%u\n", cur_freq);
408 }
409 
410 
411 /**
412  * show_scaling_governor - show the current policy for the specified CPU
413  */
414 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
415 {
416 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
417 		return sprintf(buf, "powersave\n");
418 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
419 		return sprintf(buf, "performance\n");
420 	else if (policy->governor)
421 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
422 				policy->governor->name);
423 	return -EINVAL;
424 }
425 
426 
427 /**
428  * store_scaling_governor - store policy for the specified CPU
429  */
430 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
431 					const char *buf, size_t count)
432 {
433 	unsigned int ret = -EINVAL;
434 	char	str_governor[16];
435 	struct cpufreq_policy new_policy;
436 
437 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
438 	if (ret)
439 		return ret;
440 
441 	ret = sscanf(buf, "%15s", str_governor);
442 	if (ret != 1)
443 		return -EINVAL;
444 
445 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
446 						&new_policy.governor))
447 		return -EINVAL;
448 
449 	/* Do not use cpufreq_set_policy here or the user_policy.max
450 	   will be wrongly overridden */
451 	ret = __cpufreq_set_policy(policy, &new_policy);
452 
453 	policy->user_policy.policy = policy->policy;
454 	policy->user_policy.governor = policy->governor;
455 
456 	if (ret)
457 		return ret;
458 	else
459 		return count;
460 }
461 
462 /**
463  * show_scaling_driver - show the cpufreq driver currently loaded
464  */
465 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
466 {
467 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
468 }
469 
470 /**
471  * show_scaling_available_governors - show the available CPUfreq governors
472  */
473 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
474 						char *buf)
475 {
476 	ssize_t i = 0;
477 	struct cpufreq_governor *t;
478 
479 	if (!cpufreq_driver->target) {
480 		i += sprintf(buf, "performance powersave");
481 		goto out;
482 	}
483 
484 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
485 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
486 		    - (CPUFREQ_NAME_LEN + 2)))
487 			goto out;
488 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
489 	}
490 out:
491 	i += sprintf(&buf[i], "\n");
492 	return i;
493 }
494 
495 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
496 {
497 	ssize_t i = 0;
498 	unsigned int cpu;
499 
500 	for_each_cpu(cpu, mask) {
501 		if (i)
502 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
503 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
504 		if (i >= (PAGE_SIZE - 5))
505 			break;
506 	}
507 	i += sprintf(&buf[i], "\n");
508 	return i;
509 }
510 
511 /**
512  * show_related_cpus - show the CPUs affected by each transition even if
513  * hw coordination is in use
514  */
515 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
516 {
517 	if (cpumask_empty(policy->related_cpus))
518 		return show_cpus(policy->cpus, buf);
519 	return show_cpus(policy->related_cpus, buf);
520 }
521 
522 /**
523  * show_affected_cpus - show the CPUs affected by each transition
524  */
525 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
526 {
527 	return show_cpus(policy->cpus, buf);
528 }
529 
530 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
531 					const char *buf, size_t count)
532 {
533 	unsigned int freq = 0;
534 	unsigned int ret;
535 
536 	if (!policy->governor || !policy->governor->store_setspeed)
537 		return -EINVAL;
538 
539 	ret = sscanf(buf, "%u", &freq);
540 	if (ret != 1)
541 		return -EINVAL;
542 
543 	policy->governor->store_setspeed(policy, freq);
544 
545 	return count;
546 }
547 
548 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
549 {
550 	if (!policy->governor || !policy->governor->show_setspeed)
551 		return sprintf(buf, "<unsupported>\n");
552 
553 	return policy->governor->show_setspeed(policy, buf);
554 }
555 
556 /**
557  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
558  */
559 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
560 {
561 	unsigned int limit;
562 	int ret;
563 	if (cpufreq_driver->bios_limit) {
564 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
565 		if (!ret)
566 			return sprintf(buf, "%u\n", limit);
567 	}
568 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
569 }
570 
571 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
572 cpufreq_freq_attr_ro(cpuinfo_min_freq);
573 cpufreq_freq_attr_ro(cpuinfo_max_freq);
574 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
575 cpufreq_freq_attr_ro(scaling_available_governors);
576 cpufreq_freq_attr_ro(scaling_driver);
577 cpufreq_freq_attr_ro(scaling_cur_freq);
578 cpufreq_freq_attr_ro(bios_limit);
579 cpufreq_freq_attr_ro(related_cpus);
580 cpufreq_freq_attr_ro(affected_cpus);
581 cpufreq_freq_attr_rw(scaling_min_freq);
582 cpufreq_freq_attr_rw(scaling_max_freq);
583 cpufreq_freq_attr_rw(scaling_governor);
584 cpufreq_freq_attr_rw(scaling_setspeed);
585 
586 static struct attribute *default_attrs[] = {
587 	&cpuinfo_min_freq.attr,
588 	&cpuinfo_max_freq.attr,
589 	&cpuinfo_transition_latency.attr,
590 	&scaling_min_freq.attr,
591 	&scaling_max_freq.attr,
592 	&affected_cpus.attr,
593 	&related_cpus.attr,
594 	&scaling_governor.attr,
595 	&scaling_driver.attr,
596 	&scaling_available_governors.attr,
597 	&scaling_setspeed.attr,
598 	NULL
599 };
600 
601 struct kobject *cpufreq_global_kobject;
602 EXPORT_SYMBOL(cpufreq_global_kobject);
603 
604 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
605 #define to_attr(a) container_of(a, struct freq_attr, attr)
606 
607 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
608 {
609 	struct cpufreq_policy *policy = to_policy(kobj);
610 	struct freq_attr *fattr = to_attr(attr);
611 	ssize_t ret = -EINVAL;
612 	policy = cpufreq_cpu_get(policy->cpu);
613 	if (!policy)
614 		goto no_policy;
615 
616 	if (lock_policy_rwsem_read(policy->cpu) < 0)
617 		goto fail;
618 
619 	if (fattr->show)
620 		ret = fattr->show(policy, buf);
621 	else
622 		ret = -EIO;
623 
624 	unlock_policy_rwsem_read(policy->cpu);
625 fail:
626 	cpufreq_cpu_put(policy);
627 no_policy:
628 	return ret;
629 }
630 
631 static ssize_t store(struct kobject *kobj, struct attribute *attr,
632 		     const char *buf, size_t count)
633 {
634 	struct cpufreq_policy *policy = to_policy(kobj);
635 	struct freq_attr *fattr = to_attr(attr);
636 	ssize_t ret = -EINVAL;
637 	policy = cpufreq_cpu_get(policy->cpu);
638 	if (!policy)
639 		goto no_policy;
640 
641 	if (lock_policy_rwsem_write(policy->cpu) < 0)
642 		goto fail;
643 
644 	if (fattr->store)
645 		ret = fattr->store(policy, buf, count);
646 	else
647 		ret = -EIO;
648 
649 	unlock_policy_rwsem_write(policy->cpu);
650 fail:
651 	cpufreq_cpu_put(policy);
652 no_policy:
653 	return ret;
654 }
655 
656 static void cpufreq_sysfs_release(struct kobject *kobj)
657 {
658 	struct cpufreq_policy *policy = to_policy(kobj);
659 	pr_debug("last reference is dropped\n");
660 	complete(&policy->kobj_unregister);
661 }
662 
663 static const struct sysfs_ops sysfs_ops = {
664 	.show	= show,
665 	.store	= store,
666 };
667 
668 static struct kobj_type ktype_cpufreq = {
669 	.sysfs_ops	= &sysfs_ops,
670 	.default_attrs	= default_attrs,
671 	.release	= cpufreq_sysfs_release,
672 };
673 
674 /*
675  * Returns:
676  *   Negative: Failure
677  *   0:        Success
678  *   Positive: When we have a managed CPU and the sysfs got symlinked
679  */
680 static int cpufreq_add_dev_policy(unsigned int cpu,
681 				  struct cpufreq_policy *policy,
682 				  struct sys_device *sys_dev)
683 {
684 	int ret = 0;
685 #ifdef CONFIG_SMP
686 	unsigned long flags;
687 	unsigned int j;
688 #ifdef CONFIG_HOTPLUG_CPU
689 	struct cpufreq_governor *gov;
690 
691 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
692 	if (gov) {
693 		policy->governor = gov;
694 		pr_debug("Restoring governor %s for cpu %d\n",
695 		       policy->governor->name, cpu);
696 	}
697 #endif
698 
699 	for_each_cpu(j, policy->cpus) {
700 		struct cpufreq_policy *managed_policy;
701 
702 		if (cpu == j)
703 			continue;
704 
705 		/* Check for existing affected CPUs.
706 		 * They may not be aware of it due to CPU Hotplug.
707 		 * cpufreq_cpu_put is called when the device is removed
708 		 * in __cpufreq_remove_dev()
709 		 */
710 		managed_policy = cpufreq_cpu_get(j);
711 		if (unlikely(managed_policy)) {
712 
713 			/* Set proper policy_cpu */
714 			unlock_policy_rwsem_write(cpu);
715 			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
716 
717 			if (lock_policy_rwsem_write(cpu) < 0) {
718 				/* Should not go through policy unlock path */
719 				if (cpufreq_driver->exit)
720 					cpufreq_driver->exit(policy);
721 				cpufreq_cpu_put(managed_policy);
722 				return -EBUSY;
723 			}
724 
725 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
726 			cpumask_copy(managed_policy->cpus, policy->cpus);
727 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
728 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
729 
730 			pr_debug("CPU already managed, adding link\n");
731 			ret = sysfs_create_link(&sys_dev->kobj,
732 						&managed_policy->kobj,
733 						"cpufreq");
734 			if (ret)
735 				cpufreq_cpu_put(managed_policy);
736 			/*
737 			 * Success. We only needed to be added to the mask.
738 			 * Call driver->exit() because only the cpu parent of
739 			 * the kobj needed to call init().
740 			 */
741 			if (cpufreq_driver->exit)
742 				cpufreq_driver->exit(policy);
743 
744 			if (!ret)
745 				return 1;
746 			else
747 				return ret;
748 		}
749 	}
750 #endif
751 	return ret;
752 }
753 
754 
755 /* symlink affected CPUs */
756 static int cpufreq_add_dev_symlink(unsigned int cpu,
757 				   struct cpufreq_policy *policy)
758 {
759 	unsigned int j;
760 	int ret = 0;
761 
762 	for_each_cpu(j, policy->cpus) {
763 		struct cpufreq_policy *managed_policy;
764 		struct sys_device *cpu_sys_dev;
765 
766 		if (j == cpu)
767 			continue;
768 		if (!cpu_online(j))
769 			continue;
770 
771 		pr_debug("CPU %u already managed, adding link\n", j);
772 		managed_policy = cpufreq_cpu_get(cpu);
773 		cpu_sys_dev = get_cpu_sysdev(j);
774 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
775 					"cpufreq");
776 		if (ret) {
777 			cpufreq_cpu_put(managed_policy);
778 			return ret;
779 		}
780 	}
781 	return ret;
782 }
783 
784 static int cpufreq_add_dev_interface(unsigned int cpu,
785 				     struct cpufreq_policy *policy,
786 				     struct sys_device *sys_dev)
787 {
788 	struct cpufreq_policy new_policy;
789 	struct freq_attr **drv_attr;
790 	unsigned long flags;
791 	int ret = 0;
792 	unsigned int j;
793 
794 	/* prepare interface data */
795 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
796 				   &sys_dev->kobj, "cpufreq");
797 	if (ret)
798 		return ret;
799 
800 	/* set up files for this cpu device */
801 	drv_attr = cpufreq_driver->attr;
802 	while ((drv_attr) && (*drv_attr)) {
803 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
804 		if (ret)
805 			goto err_out_kobj_put;
806 		drv_attr++;
807 	}
808 	if (cpufreq_driver->get) {
809 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
810 		if (ret)
811 			goto err_out_kobj_put;
812 	}
813 	if (cpufreq_driver->target) {
814 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
815 		if (ret)
816 			goto err_out_kobj_put;
817 	}
818 	if (cpufreq_driver->bios_limit) {
819 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
820 		if (ret)
821 			goto err_out_kobj_put;
822 	}
823 
824 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
825 	for_each_cpu(j, policy->cpus) {
826 		if (!cpu_online(j))
827 			continue;
828 		per_cpu(cpufreq_cpu_data, j) = policy;
829 		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
830 	}
831 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
832 
833 	ret = cpufreq_add_dev_symlink(cpu, policy);
834 	if (ret)
835 		goto err_out_kobj_put;
836 
837 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
838 	/* assure that the starting sequence is run in __cpufreq_set_policy */
839 	policy->governor = NULL;
840 
841 	/* set default policy */
842 	ret = __cpufreq_set_policy(policy, &new_policy);
843 	policy->user_policy.policy = policy->policy;
844 	policy->user_policy.governor = policy->governor;
845 
846 	if (ret) {
847 		pr_debug("setting policy failed\n");
848 		if (cpufreq_driver->exit)
849 			cpufreq_driver->exit(policy);
850 	}
851 	return ret;
852 
853 err_out_kobj_put:
854 	kobject_put(&policy->kobj);
855 	wait_for_completion(&policy->kobj_unregister);
856 	return ret;
857 }
858 
859 
860 /**
861  * cpufreq_add_dev - add a CPU device
862  *
863  * Adds the cpufreq interface for a CPU device.
864  *
865  * The Oracle says: try running cpufreq registration/unregistration concurrently
866  * with with cpu hotplugging and all hell will break loose. Tried to clean this
867  * mess up, but more thorough testing is needed. - Mathieu
868  */
869 static int cpufreq_add_dev(struct sys_device *sys_dev)
870 {
871 	unsigned int cpu = sys_dev->id;
872 	int ret = 0, found = 0;
873 	struct cpufreq_policy *policy;
874 	unsigned long flags;
875 	unsigned int j;
876 #ifdef CONFIG_HOTPLUG_CPU
877 	int sibling;
878 #endif
879 
880 	if (cpu_is_offline(cpu))
881 		return 0;
882 
883 	pr_debug("adding CPU %u\n", cpu);
884 
885 #ifdef CONFIG_SMP
886 	/* check whether a different CPU already registered this
887 	 * CPU because it is in the same boat. */
888 	policy = cpufreq_cpu_get(cpu);
889 	if (unlikely(policy)) {
890 		cpufreq_cpu_put(policy);
891 		return 0;
892 	}
893 #endif
894 
895 	if (!try_module_get(cpufreq_driver->owner)) {
896 		ret = -EINVAL;
897 		goto module_out;
898 	}
899 
900 	ret = -ENOMEM;
901 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
902 	if (!policy)
903 		goto nomem_out;
904 
905 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
906 		goto err_free_policy;
907 
908 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
909 		goto err_free_cpumask;
910 
911 	policy->cpu = cpu;
912 	cpumask_copy(policy->cpus, cpumask_of(cpu));
913 
914 	/* Initially set CPU itself as the policy_cpu */
915 	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
916 	ret = (lock_policy_rwsem_write(cpu) < 0);
917 	WARN_ON(ret);
918 
919 	init_completion(&policy->kobj_unregister);
920 	INIT_WORK(&policy->update, handle_update);
921 
922 	/* Set governor before ->init, so that driver could check it */
923 #ifdef CONFIG_HOTPLUG_CPU
924 	for_each_online_cpu(sibling) {
925 		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
926 		if (cp && cp->governor &&
927 		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
928 			policy->governor = cp->governor;
929 			found = 1;
930 			break;
931 		}
932 	}
933 #endif
934 	if (!found)
935 		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
936 	/* call driver. From then on the cpufreq must be able
937 	 * to accept all calls to ->verify and ->setpolicy for this CPU
938 	 */
939 	ret = cpufreq_driver->init(policy);
940 	if (ret) {
941 		pr_debug("initialization failed\n");
942 		goto err_unlock_policy;
943 	}
944 	policy->user_policy.min = policy->min;
945 	policy->user_policy.max = policy->max;
946 
947 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
948 				     CPUFREQ_START, policy);
949 
950 	ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
951 	if (ret) {
952 		if (ret > 0)
953 			/* This is a managed cpu, symlink created,
954 			   exit with 0 */
955 			ret = 0;
956 		goto err_unlock_policy;
957 	}
958 
959 	ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
960 	if (ret)
961 		goto err_out_unregister;
962 
963 	unlock_policy_rwsem_write(cpu);
964 
965 	kobject_uevent(&policy->kobj, KOBJ_ADD);
966 	module_put(cpufreq_driver->owner);
967 	pr_debug("initialization complete\n");
968 
969 	return 0;
970 
971 
972 err_out_unregister:
973 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
974 	for_each_cpu(j, policy->cpus)
975 		per_cpu(cpufreq_cpu_data, j) = NULL;
976 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
977 
978 	kobject_put(&policy->kobj);
979 	wait_for_completion(&policy->kobj_unregister);
980 
981 err_unlock_policy:
982 	unlock_policy_rwsem_write(cpu);
983 	free_cpumask_var(policy->related_cpus);
984 err_free_cpumask:
985 	free_cpumask_var(policy->cpus);
986 err_free_policy:
987 	kfree(policy);
988 nomem_out:
989 	module_put(cpufreq_driver->owner);
990 module_out:
991 	return ret;
992 }
993 
994 
995 /**
996  * __cpufreq_remove_dev - remove a CPU device
997  *
998  * Removes the cpufreq interface for a CPU device.
999  * Caller should already have policy_rwsem in write mode for this CPU.
1000  * This routine frees the rwsem before returning.
1001  */
1002 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1003 {
1004 	unsigned int cpu = sys_dev->id;
1005 	unsigned long flags;
1006 	struct cpufreq_policy *data;
1007 	struct kobject *kobj;
1008 	struct completion *cmp;
1009 #ifdef CONFIG_SMP
1010 	struct sys_device *cpu_sys_dev;
1011 	unsigned int j;
1012 #endif
1013 
1014 	pr_debug("unregistering CPU %u\n", cpu);
1015 
1016 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1017 	data = per_cpu(cpufreq_cpu_data, cpu);
1018 
1019 	if (!data) {
1020 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1021 		unlock_policy_rwsem_write(cpu);
1022 		return -EINVAL;
1023 	}
1024 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1025 
1026 
1027 #ifdef CONFIG_SMP
1028 	/* if this isn't the CPU which is the parent of the kobj, we
1029 	 * only need to unlink, put and exit
1030 	 */
1031 	if (unlikely(cpu != data->cpu)) {
1032 		pr_debug("removing link\n");
1033 		cpumask_clear_cpu(cpu, data->cpus);
1034 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035 		kobj = &sys_dev->kobj;
1036 		cpufreq_cpu_put(data);
1037 		unlock_policy_rwsem_write(cpu);
1038 		sysfs_remove_link(kobj, "cpufreq");
1039 		return 0;
1040 	}
1041 #endif
1042 
1043 #ifdef CONFIG_SMP
1044 
1045 #ifdef CONFIG_HOTPLUG_CPU
1046 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1047 			CPUFREQ_NAME_LEN);
1048 #endif
1049 
1050 	/* if we have other CPUs still registered, we need to unlink them,
1051 	 * or else wait_for_completion below will lock up. Clean the
1052 	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1053 	 * the sysfs links afterwards.
1054 	 */
1055 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1056 		for_each_cpu(j, data->cpus) {
1057 			if (j == cpu)
1058 				continue;
1059 			per_cpu(cpufreq_cpu_data, j) = NULL;
1060 		}
1061 	}
1062 
1063 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064 
1065 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1066 		for_each_cpu(j, data->cpus) {
1067 			if (j == cpu)
1068 				continue;
1069 			pr_debug("removing link for cpu %u\n", j);
1070 #ifdef CONFIG_HOTPLUG_CPU
1071 			strncpy(per_cpu(cpufreq_cpu_governor, j),
1072 				data->governor->name, CPUFREQ_NAME_LEN);
1073 #endif
1074 			cpu_sys_dev = get_cpu_sysdev(j);
1075 			kobj = &cpu_sys_dev->kobj;
1076 			unlock_policy_rwsem_write(cpu);
1077 			sysfs_remove_link(kobj, "cpufreq");
1078 			lock_policy_rwsem_write(cpu);
1079 			cpufreq_cpu_put(data);
1080 		}
1081 	}
1082 #else
1083 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1084 #endif
1085 
1086 	if (cpufreq_driver->target)
1087 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1088 
1089 	kobj = &data->kobj;
1090 	cmp = &data->kobj_unregister;
1091 	unlock_policy_rwsem_write(cpu);
1092 	kobject_put(kobj);
1093 
1094 	/* we need to make sure that the underlying kobj is actually
1095 	 * not referenced anymore by anybody before we proceed with
1096 	 * unloading.
1097 	 */
1098 	pr_debug("waiting for dropping of refcount\n");
1099 	wait_for_completion(cmp);
1100 	pr_debug("wait complete\n");
1101 
1102 	lock_policy_rwsem_write(cpu);
1103 	if (cpufreq_driver->exit)
1104 		cpufreq_driver->exit(data);
1105 	unlock_policy_rwsem_write(cpu);
1106 
1107 #ifdef CONFIG_HOTPLUG_CPU
1108 	/* when the CPU which is the parent of the kobj is hotplugged
1109 	 * offline, check for siblings, and create cpufreq sysfs interface
1110 	 * and symlinks
1111 	 */
1112 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1113 		/* first sibling now owns the new sysfs dir */
1114 		cpumask_clear_cpu(cpu, data->cpus);
1115 		cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
1116 
1117 		/* finally remove our own symlink */
1118 		lock_policy_rwsem_write(cpu);
1119 		__cpufreq_remove_dev(sys_dev);
1120 	}
1121 #endif
1122 
1123 	free_cpumask_var(data->related_cpus);
1124 	free_cpumask_var(data->cpus);
1125 	kfree(data);
1126 
1127 	return 0;
1128 }
1129 
1130 
1131 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1132 {
1133 	unsigned int cpu = sys_dev->id;
1134 	int retval;
1135 
1136 	if (cpu_is_offline(cpu))
1137 		return 0;
1138 
1139 	if (unlikely(lock_policy_rwsem_write(cpu)))
1140 		BUG();
1141 
1142 	retval = __cpufreq_remove_dev(sys_dev);
1143 	return retval;
1144 }
1145 
1146 
1147 static void handle_update(struct work_struct *work)
1148 {
1149 	struct cpufreq_policy *policy =
1150 		container_of(work, struct cpufreq_policy, update);
1151 	unsigned int cpu = policy->cpu;
1152 	pr_debug("handle_update for cpu %u called\n", cpu);
1153 	cpufreq_update_policy(cpu);
1154 }
1155 
1156 /**
1157  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1158  *	@cpu: cpu number
1159  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1160  *	@new_freq: CPU frequency the CPU actually runs at
1161  *
1162  *	We adjust to current frequency first, and need to clean up later.
1163  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1164  */
1165 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1166 				unsigned int new_freq)
1167 {
1168 	struct cpufreq_freqs freqs;
1169 
1170 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1171 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1172 
1173 	freqs.cpu = cpu;
1174 	freqs.old = old_freq;
1175 	freqs.new = new_freq;
1176 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1177 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1178 }
1179 
1180 
1181 /**
1182  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1183  * @cpu: CPU number
1184  *
1185  * This is the last known freq, without actually getting it from the driver.
1186  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1187  */
1188 unsigned int cpufreq_quick_get(unsigned int cpu)
1189 {
1190 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1191 	unsigned int ret_freq = 0;
1192 
1193 	if (policy) {
1194 		ret_freq = policy->cur;
1195 		cpufreq_cpu_put(policy);
1196 	}
1197 
1198 	return ret_freq;
1199 }
1200 EXPORT_SYMBOL(cpufreq_quick_get);
1201 
1202 /**
1203  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1204  * @cpu: CPU number
1205  *
1206  * Just return the max possible frequency for a given CPU.
1207  */
1208 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1209 {
1210 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1211 	unsigned int ret_freq = 0;
1212 
1213 	if (policy) {
1214 		ret_freq = policy->max;
1215 		cpufreq_cpu_put(policy);
1216 	}
1217 
1218 	return ret_freq;
1219 }
1220 EXPORT_SYMBOL(cpufreq_quick_get_max);
1221 
1222 
1223 static unsigned int __cpufreq_get(unsigned int cpu)
1224 {
1225 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1226 	unsigned int ret_freq = 0;
1227 
1228 	if (!cpufreq_driver->get)
1229 		return ret_freq;
1230 
1231 	ret_freq = cpufreq_driver->get(cpu);
1232 
1233 	if (ret_freq && policy->cur &&
1234 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1235 		/* verify no discrepancy between actual and
1236 					saved value exists */
1237 		if (unlikely(ret_freq != policy->cur)) {
1238 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1239 			schedule_work(&policy->update);
1240 		}
1241 	}
1242 
1243 	return ret_freq;
1244 }
1245 
1246 /**
1247  * cpufreq_get - get the current CPU frequency (in kHz)
1248  * @cpu: CPU number
1249  *
1250  * Get the CPU current (static) CPU frequency
1251  */
1252 unsigned int cpufreq_get(unsigned int cpu)
1253 {
1254 	unsigned int ret_freq = 0;
1255 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1256 
1257 	if (!policy)
1258 		goto out;
1259 
1260 	if (unlikely(lock_policy_rwsem_read(cpu)))
1261 		goto out_policy;
1262 
1263 	ret_freq = __cpufreq_get(cpu);
1264 
1265 	unlock_policy_rwsem_read(cpu);
1266 
1267 out_policy:
1268 	cpufreq_cpu_put(policy);
1269 out:
1270 	return ret_freq;
1271 }
1272 EXPORT_SYMBOL(cpufreq_get);
1273 
1274 static struct sysdev_driver cpufreq_sysdev_driver = {
1275 	.add		= cpufreq_add_dev,
1276 	.remove		= cpufreq_remove_dev,
1277 };
1278 
1279 
1280 /**
1281  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1282  *
1283  * This function is only executed for the boot processor.  The other CPUs
1284  * have been put offline by means of CPU hotplug.
1285  */
1286 static int cpufreq_bp_suspend(void)
1287 {
1288 	int ret = 0;
1289 
1290 	int cpu = smp_processor_id();
1291 	struct cpufreq_policy *cpu_policy;
1292 
1293 	pr_debug("suspending cpu %u\n", cpu);
1294 
1295 	/* If there's no policy for the boot CPU, we have nothing to do. */
1296 	cpu_policy = cpufreq_cpu_get(cpu);
1297 	if (!cpu_policy)
1298 		return 0;
1299 
1300 	if (cpufreq_driver->suspend) {
1301 		ret = cpufreq_driver->suspend(cpu_policy);
1302 		if (ret)
1303 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1304 					"step on CPU %u\n", cpu_policy->cpu);
1305 	}
1306 
1307 	cpufreq_cpu_put(cpu_policy);
1308 	return ret;
1309 }
1310 
1311 /**
1312  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1313  *
1314  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1315  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1316  *	    restored. It will verify that the current freq is in sync with
1317  *	    what we believe it to be. This is a bit later than when it
1318  *	    should be, but nonethteless it's better than calling
1319  *	    cpufreq_driver->get() here which might re-enable interrupts...
1320  *
1321  * This function is only executed for the boot CPU.  The other CPUs have not
1322  * been turned on yet.
1323  */
1324 static void cpufreq_bp_resume(void)
1325 {
1326 	int ret = 0;
1327 
1328 	int cpu = smp_processor_id();
1329 	struct cpufreq_policy *cpu_policy;
1330 
1331 	pr_debug("resuming cpu %u\n", cpu);
1332 
1333 	/* If there's no policy for the boot CPU, we have nothing to do. */
1334 	cpu_policy = cpufreq_cpu_get(cpu);
1335 	if (!cpu_policy)
1336 		return;
1337 
1338 	if (cpufreq_driver->resume) {
1339 		ret = cpufreq_driver->resume(cpu_policy);
1340 		if (ret) {
1341 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1342 					"step on CPU %u\n", cpu_policy->cpu);
1343 			goto fail;
1344 		}
1345 	}
1346 
1347 	schedule_work(&cpu_policy->update);
1348 
1349 fail:
1350 	cpufreq_cpu_put(cpu_policy);
1351 }
1352 
1353 static struct syscore_ops cpufreq_syscore_ops = {
1354 	.suspend	= cpufreq_bp_suspend,
1355 	.resume		= cpufreq_bp_resume,
1356 };
1357 
1358 
1359 /*********************************************************************
1360  *                     NOTIFIER LISTS INTERFACE                      *
1361  *********************************************************************/
1362 
1363 /**
1364  *	cpufreq_register_notifier - register a driver with cpufreq
1365  *	@nb: notifier function to register
1366  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1367  *
1368  *	Add a driver to one of two lists: either a list of drivers that
1369  *      are notified about clock rate changes (once before and once after
1370  *      the transition), or a list of drivers that are notified about
1371  *      changes in cpufreq policy.
1372  *
1373  *	This function may sleep, and has the same return conditions as
1374  *	blocking_notifier_chain_register.
1375  */
1376 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1377 {
1378 	int ret;
1379 
1380 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1381 
1382 	switch (list) {
1383 	case CPUFREQ_TRANSITION_NOTIFIER:
1384 		ret = srcu_notifier_chain_register(
1385 				&cpufreq_transition_notifier_list, nb);
1386 		break;
1387 	case CPUFREQ_POLICY_NOTIFIER:
1388 		ret = blocking_notifier_chain_register(
1389 				&cpufreq_policy_notifier_list, nb);
1390 		break;
1391 	default:
1392 		ret = -EINVAL;
1393 	}
1394 
1395 	return ret;
1396 }
1397 EXPORT_SYMBOL(cpufreq_register_notifier);
1398 
1399 
1400 /**
1401  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1402  *	@nb: notifier block to be unregistered
1403  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1404  *
1405  *	Remove a driver from the CPU frequency notifier list.
1406  *
1407  *	This function may sleep, and has the same return conditions as
1408  *	blocking_notifier_chain_unregister.
1409  */
1410 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1411 {
1412 	int ret;
1413 
1414 	switch (list) {
1415 	case CPUFREQ_TRANSITION_NOTIFIER:
1416 		ret = srcu_notifier_chain_unregister(
1417 				&cpufreq_transition_notifier_list, nb);
1418 		break;
1419 	case CPUFREQ_POLICY_NOTIFIER:
1420 		ret = blocking_notifier_chain_unregister(
1421 				&cpufreq_policy_notifier_list, nb);
1422 		break;
1423 	default:
1424 		ret = -EINVAL;
1425 	}
1426 
1427 	return ret;
1428 }
1429 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1430 
1431 
1432 /*********************************************************************
1433  *                              GOVERNORS                            *
1434  *********************************************************************/
1435 
1436 
1437 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1438 			    unsigned int target_freq,
1439 			    unsigned int relation)
1440 {
1441 	int retval = -EINVAL;
1442 
1443 	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1444 		target_freq, relation);
1445 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1446 		retval = cpufreq_driver->target(policy, target_freq, relation);
1447 
1448 	return retval;
1449 }
1450 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1451 
1452 int cpufreq_driver_target(struct cpufreq_policy *policy,
1453 			  unsigned int target_freq,
1454 			  unsigned int relation)
1455 {
1456 	int ret = -EINVAL;
1457 
1458 	policy = cpufreq_cpu_get(policy->cpu);
1459 	if (!policy)
1460 		goto no_policy;
1461 
1462 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1463 		goto fail;
1464 
1465 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1466 
1467 	unlock_policy_rwsem_write(policy->cpu);
1468 
1469 fail:
1470 	cpufreq_cpu_put(policy);
1471 no_policy:
1472 	return ret;
1473 }
1474 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1475 
1476 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1477 {
1478 	int ret = 0;
1479 
1480 	policy = cpufreq_cpu_get(policy->cpu);
1481 	if (!policy)
1482 		return -EINVAL;
1483 
1484 	if (cpu_online(cpu) && cpufreq_driver->getavg)
1485 		ret = cpufreq_driver->getavg(policy, cpu);
1486 
1487 	cpufreq_cpu_put(policy);
1488 	return ret;
1489 }
1490 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1491 
1492 /*
1493  * when "event" is CPUFREQ_GOV_LIMITS
1494  */
1495 
1496 static int __cpufreq_governor(struct cpufreq_policy *policy,
1497 					unsigned int event)
1498 {
1499 	int ret;
1500 
1501 	/* Only must be defined when default governor is known to have latency
1502 	   restrictions, like e.g. conservative or ondemand.
1503 	   That this is the case is already ensured in Kconfig
1504 	*/
1505 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1506 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1507 #else
1508 	struct cpufreq_governor *gov = NULL;
1509 #endif
1510 
1511 	if (policy->governor->max_transition_latency &&
1512 	    policy->cpuinfo.transition_latency >
1513 	    policy->governor->max_transition_latency) {
1514 		if (!gov)
1515 			return -EINVAL;
1516 		else {
1517 			printk(KERN_WARNING "%s governor failed, too long"
1518 			       " transition latency of HW, fallback"
1519 			       " to %s governor\n",
1520 			       policy->governor->name,
1521 			       gov->name);
1522 			policy->governor = gov;
1523 		}
1524 	}
1525 
1526 	if (!try_module_get(policy->governor->owner))
1527 		return -EINVAL;
1528 
1529 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1530 						policy->cpu, event);
1531 	ret = policy->governor->governor(policy, event);
1532 
1533 	/* we keep one module reference alive for
1534 			each CPU governed by this CPU */
1535 	if ((event != CPUFREQ_GOV_START) || ret)
1536 		module_put(policy->governor->owner);
1537 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1538 		module_put(policy->governor->owner);
1539 
1540 	return ret;
1541 }
1542 
1543 
1544 int cpufreq_register_governor(struct cpufreq_governor *governor)
1545 {
1546 	int err;
1547 
1548 	if (!governor)
1549 		return -EINVAL;
1550 
1551 	mutex_lock(&cpufreq_governor_mutex);
1552 
1553 	err = -EBUSY;
1554 	if (__find_governor(governor->name) == NULL) {
1555 		err = 0;
1556 		list_add(&governor->governor_list, &cpufreq_governor_list);
1557 	}
1558 
1559 	mutex_unlock(&cpufreq_governor_mutex);
1560 	return err;
1561 }
1562 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1563 
1564 
1565 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1566 {
1567 #ifdef CONFIG_HOTPLUG_CPU
1568 	int cpu;
1569 #endif
1570 
1571 	if (!governor)
1572 		return;
1573 
1574 #ifdef CONFIG_HOTPLUG_CPU
1575 	for_each_present_cpu(cpu) {
1576 		if (cpu_online(cpu))
1577 			continue;
1578 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1579 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1580 	}
1581 #endif
1582 
1583 	mutex_lock(&cpufreq_governor_mutex);
1584 	list_del(&governor->governor_list);
1585 	mutex_unlock(&cpufreq_governor_mutex);
1586 	return;
1587 }
1588 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1589 
1590 
1591 
1592 /*********************************************************************
1593  *                          POLICY INTERFACE                         *
1594  *********************************************************************/
1595 
1596 /**
1597  * cpufreq_get_policy - get the current cpufreq_policy
1598  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1599  *	is written
1600  *
1601  * Reads the current cpufreq policy.
1602  */
1603 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1604 {
1605 	struct cpufreq_policy *cpu_policy;
1606 	if (!policy)
1607 		return -EINVAL;
1608 
1609 	cpu_policy = cpufreq_cpu_get(cpu);
1610 	if (!cpu_policy)
1611 		return -EINVAL;
1612 
1613 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1614 
1615 	cpufreq_cpu_put(cpu_policy);
1616 	return 0;
1617 }
1618 EXPORT_SYMBOL(cpufreq_get_policy);
1619 
1620 
1621 /*
1622  * data   : current policy.
1623  * policy : policy to be set.
1624  */
1625 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1626 				struct cpufreq_policy *policy)
1627 {
1628 	int ret = 0;
1629 
1630 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1631 		policy->min, policy->max);
1632 
1633 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1634 				sizeof(struct cpufreq_cpuinfo));
1635 
1636 	if (policy->min > data->max || policy->max < data->min) {
1637 		ret = -EINVAL;
1638 		goto error_out;
1639 	}
1640 
1641 	/* verify the cpu speed can be set within this limit */
1642 	ret = cpufreq_driver->verify(policy);
1643 	if (ret)
1644 		goto error_out;
1645 
1646 	/* adjust if necessary - all reasons */
1647 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1648 			CPUFREQ_ADJUST, policy);
1649 
1650 	/* adjust if necessary - hardware incompatibility*/
1651 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1652 			CPUFREQ_INCOMPATIBLE, policy);
1653 
1654 	/* verify the cpu speed can be set within this limit,
1655 	   which might be different to the first one */
1656 	ret = cpufreq_driver->verify(policy);
1657 	if (ret)
1658 		goto error_out;
1659 
1660 	/* notification of the new policy */
1661 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1662 			CPUFREQ_NOTIFY, policy);
1663 
1664 	data->min = policy->min;
1665 	data->max = policy->max;
1666 
1667 	pr_debug("new min and max freqs are %u - %u kHz\n",
1668 					data->min, data->max);
1669 
1670 	if (cpufreq_driver->setpolicy) {
1671 		data->policy = policy->policy;
1672 		pr_debug("setting range\n");
1673 		ret = cpufreq_driver->setpolicy(policy);
1674 	} else {
1675 		if (policy->governor != data->governor) {
1676 			/* save old, working values */
1677 			struct cpufreq_governor *old_gov = data->governor;
1678 
1679 			pr_debug("governor switch\n");
1680 
1681 			/* end old governor */
1682 			if (data->governor)
1683 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1684 
1685 			/* start new governor */
1686 			data->governor = policy->governor;
1687 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1688 				/* new governor failed, so re-start old one */
1689 				pr_debug("starting governor %s failed\n",
1690 							data->governor->name);
1691 				if (old_gov) {
1692 					data->governor = old_gov;
1693 					__cpufreq_governor(data,
1694 							   CPUFREQ_GOV_START);
1695 				}
1696 				ret = -EINVAL;
1697 				goto error_out;
1698 			}
1699 			/* might be a policy change, too, so fall through */
1700 		}
1701 		pr_debug("governor: change or update limits\n");
1702 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1703 	}
1704 
1705 error_out:
1706 	return ret;
1707 }
1708 
1709 /**
1710  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1711  *	@cpu: CPU which shall be re-evaluated
1712  *
1713  *	Useful for policy notifiers which have different necessities
1714  *	at different times.
1715  */
1716 int cpufreq_update_policy(unsigned int cpu)
1717 {
1718 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1719 	struct cpufreq_policy policy;
1720 	int ret;
1721 
1722 	if (!data) {
1723 		ret = -ENODEV;
1724 		goto no_policy;
1725 	}
1726 
1727 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1728 		ret = -EINVAL;
1729 		goto fail;
1730 	}
1731 
1732 	pr_debug("updating policy for CPU %u\n", cpu);
1733 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1734 	policy.min = data->user_policy.min;
1735 	policy.max = data->user_policy.max;
1736 	policy.policy = data->user_policy.policy;
1737 	policy.governor = data->user_policy.governor;
1738 
1739 	/* BIOS might change freq behind our back
1740 	  -> ask driver for current freq and notify governors about a change */
1741 	if (cpufreq_driver->get) {
1742 		policy.cur = cpufreq_driver->get(cpu);
1743 		if (!data->cur) {
1744 			pr_debug("Driver did not initialize current freq");
1745 			data->cur = policy.cur;
1746 		} else {
1747 			if (data->cur != policy.cur)
1748 				cpufreq_out_of_sync(cpu, data->cur,
1749 								policy.cur);
1750 		}
1751 	}
1752 
1753 	ret = __cpufreq_set_policy(data, &policy);
1754 
1755 	unlock_policy_rwsem_write(cpu);
1756 
1757 fail:
1758 	cpufreq_cpu_put(data);
1759 no_policy:
1760 	return ret;
1761 }
1762 EXPORT_SYMBOL(cpufreq_update_policy);
1763 
1764 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1765 					unsigned long action, void *hcpu)
1766 {
1767 	unsigned int cpu = (unsigned long)hcpu;
1768 	struct sys_device *sys_dev;
1769 
1770 	sys_dev = get_cpu_sysdev(cpu);
1771 	if (sys_dev) {
1772 		switch (action) {
1773 		case CPU_ONLINE:
1774 		case CPU_ONLINE_FROZEN:
1775 			cpufreq_add_dev(sys_dev);
1776 			break;
1777 		case CPU_DOWN_PREPARE:
1778 		case CPU_DOWN_PREPARE_FROZEN:
1779 			if (unlikely(lock_policy_rwsem_write(cpu)))
1780 				BUG();
1781 
1782 			__cpufreq_remove_dev(sys_dev);
1783 			break;
1784 		case CPU_DOWN_FAILED:
1785 		case CPU_DOWN_FAILED_FROZEN:
1786 			cpufreq_add_dev(sys_dev);
1787 			break;
1788 		}
1789 	}
1790 	return NOTIFY_OK;
1791 }
1792 
1793 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1794     .notifier_call = cpufreq_cpu_callback,
1795 };
1796 
1797 /*********************************************************************
1798  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1799  *********************************************************************/
1800 
1801 /**
1802  * cpufreq_register_driver - register a CPU Frequency driver
1803  * @driver_data: A struct cpufreq_driver containing the values#
1804  * submitted by the CPU Frequency driver.
1805  *
1806  *   Registers a CPU Frequency driver to this core code. This code
1807  * returns zero on success, -EBUSY when another driver got here first
1808  * (and isn't unregistered in the meantime).
1809  *
1810  */
1811 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1812 {
1813 	unsigned long flags;
1814 	int ret;
1815 
1816 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1817 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1818 		return -EINVAL;
1819 
1820 	pr_debug("trying to register driver %s\n", driver_data->name);
1821 
1822 	if (driver_data->setpolicy)
1823 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1824 
1825 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1826 	if (cpufreq_driver) {
1827 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1828 		return -EBUSY;
1829 	}
1830 	cpufreq_driver = driver_data;
1831 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1832 
1833 	ret = sysdev_driver_register(&cpu_sysdev_class,
1834 					&cpufreq_sysdev_driver);
1835 	if (ret)
1836 		goto err_null_driver;
1837 
1838 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1839 		int i;
1840 		ret = -ENODEV;
1841 
1842 		/* check for at least one working CPU */
1843 		for (i = 0; i < nr_cpu_ids; i++)
1844 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1845 				ret = 0;
1846 				break;
1847 			}
1848 
1849 		/* if all ->init() calls failed, unregister */
1850 		if (ret) {
1851 			pr_debug("no CPU initialized for driver %s\n",
1852 							driver_data->name);
1853 			goto err_sysdev_unreg;
1854 		}
1855 	}
1856 
1857 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1858 	pr_debug("driver %s up and running\n", driver_data->name);
1859 
1860 	return 0;
1861 err_sysdev_unreg:
1862 	sysdev_driver_unregister(&cpu_sysdev_class,
1863 			&cpufreq_sysdev_driver);
1864 err_null_driver:
1865 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1866 	cpufreq_driver = NULL;
1867 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1868 	return ret;
1869 }
1870 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1871 
1872 
1873 /**
1874  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1875  *
1876  *    Unregister the current CPUFreq driver. Only call this if you have
1877  * the right to do so, i.e. if you have succeeded in initialising before!
1878  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1879  * currently not initialised.
1880  */
1881 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1882 {
1883 	unsigned long flags;
1884 
1885 	if (!cpufreq_driver || (driver != cpufreq_driver))
1886 		return -EINVAL;
1887 
1888 	pr_debug("unregistering driver %s\n", driver->name);
1889 
1890 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1891 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1892 
1893 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1894 	cpufreq_driver = NULL;
1895 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1896 
1897 	return 0;
1898 }
1899 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1900 
1901 static int __init cpufreq_core_init(void)
1902 {
1903 	int cpu;
1904 
1905 	for_each_possible_cpu(cpu) {
1906 		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1907 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1908 	}
1909 
1910 	cpufreq_global_kobject = kobject_create_and_add("cpufreq",
1911 						&cpu_sysdev_class.kset.kobj);
1912 	BUG_ON(!cpufreq_global_kobject);
1913 	register_syscore_ops(&cpufreq_syscore_ops);
1914 
1915 	return 0;
1916 }
1917 core_initcall(cpufreq_core_init);
1918