xref: /linux/drivers/cpufreq/cpufreq.c (revision dc6876a288cc6a446a6617ccfcb96082f67fa0c4)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32 
33 #include <trace/events/power.h>
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69 
70 #define lock_policy_rwsem(mode, cpu)					\
71 static int lock_policy_rwsem_##mode					\
72 (int cpu)								\
73 {									\
74 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
75 	BUG_ON(policy_cpu == -1);					\
76 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
77 	if (unlikely(!cpu_online(cpu))) {				\
78 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
79 		return -1;						\
80 	}								\
81 									\
82 	return 0;							\
83 }
84 
85 lock_policy_rwsem(read, cpu);
86 
87 lock_policy_rwsem(write, cpu);
88 
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99 	BUG_ON(policy_cpu == -1);
100 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102 
103 
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106 		unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109 
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119 
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	init_cpufreq_transition_notifier_list_called = true;
125 	return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128 
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131 
132 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133 {
134 	struct cpufreq_policy *data;
135 	unsigned long flags;
136 
137 	if (cpu >= nr_cpu_ids)
138 		goto err_out;
139 
140 	/* get the cpufreq driver */
141 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
142 
143 	if (!cpufreq_driver)
144 		goto err_out_unlock;
145 
146 	if (!try_module_get(cpufreq_driver->owner))
147 		goto err_out_unlock;
148 
149 
150 	/* get the CPU */
151 	data = per_cpu(cpufreq_cpu_data, cpu);
152 
153 	if (!data)
154 		goto err_out_put_module;
155 
156 	if (!kobject_get(&data->kobj))
157 		goto err_out_put_module;
158 
159 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160 	return data;
161 
162 err_out_put_module:
163 	module_put(cpufreq_driver->owner);
164 err_out_unlock:
165 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166 err_out:
167 	return NULL;
168 }
169 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170 
171 
172 void cpufreq_cpu_put(struct cpufreq_policy *data)
173 {
174 	kobject_put(&data->kobj);
175 	module_put(cpufreq_driver->owner);
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178 
179 
180 /*********************************************************************
181  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182  *********************************************************************/
183 
184 /**
185  * adjust_jiffies - adjust the system "loops_per_jiffy"
186  *
187  * This function alters the system "loops_per_jiffy" for the clock
188  * speed change. Note that loops_per_jiffy cannot be updated on SMP
189  * systems as each CPU might be scaled differently. So, use the arch
190  * per-CPU loops_per_jiffy value wherever possible.
191  */
192 #ifndef CONFIG_SMP
193 static unsigned long l_p_j_ref;
194 static unsigned int  l_p_j_ref_freq;
195 
196 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197 {
198 	if (ci->flags & CPUFREQ_CONST_LOOPS)
199 		return;
200 
201 	if (!l_p_j_ref_freq) {
202 		l_p_j_ref = loops_per_jiffy;
203 		l_p_j_ref_freq = ci->old;
204 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
205 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206 	}
207 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
208 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
209 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
210 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
211 								ci->new);
212 		pr_debug("scaling loops_per_jiffy to %lu "
213 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
214 	}
215 }
216 #else
217 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
218 {
219 	return;
220 }
221 #endif
222 
223 
224 /**
225  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
226  * on frequency transition.
227  *
228  * This function calls the transition notifiers and the "adjust_jiffies"
229  * function. It is called twice on all CPU frequency changes that have
230  * external effects.
231  */
232 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
233 {
234 	struct cpufreq_policy *policy;
235 
236 	BUG_ON(irqs_disabled());
237 
238 	freqs->flags = cpufreq_driver->flags;
239 	pr_debug("notification %u of frequency transition to %u kHz\n",
240 		state, freqs->new);
241 
242 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
243 	switch (state) {
244 
245 	case CPUFREQ_PRECHANGE:
246 		/* detect if the driver reported a value as "old frequency"
247 		 * which is not equal to what the cpufreq core thinks is
248 		 * "old frequency".
249 		 */
250 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
251 			if ((policy) && (policy->cpu == freqs->cpu) &&
252 			    (policy->cur) && (policy->cur != freqs->old)) {
253 				pr_debug("Warning: CPU frequency is"
254 					" %u, cpufreq assumed %u kHz.\n",
255 					freqs->old, policy->cur);
256 				freqs->old = policy->cur;
257 			}
258 		}
259 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
260 				CPUFREQ_PRECHANGE, freqs);
261 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
262 		break;
263 
264 	case CPUFREQ_POSTCHANGE:
265 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
266 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
267 			(unsigned long)freqs->cpu);
268 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
269 		trace_cpu_frequency(freqs->new, freqs->cpu);
270 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
271 				CPUFREQ_POSTCHANGE, freqs);
272 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
273 			policy->cur = freqs->new;
274 		break;
275 	}
276 }
277 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
278 
279 
280 
281 /*********************************************************************
282  *                          SYSFS INTERFACE                          *
283  *********************************************************************/
284 
285 static struct cpufreq_governor *__find_governor(const char *str_governor)
286 {
287 	struct cpufreq_governor *t;
288 
289 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
290 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
291 			return t;
292 
293 	return NULL;
294 }
295 
296 /**
297  * cpufreq_parse_governor - parse a governor string
298  */
299 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
300 				struct cpufreq_governor **governor)
301 {
302 	int err = -EINVAL;
303 
304 	if (!cpufreq_driver)
305 		goto out;
306 
307 	if (cpufreq_driver->setpolicy) {
308 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
309 			*policy = CPUFREQ_POLICY_PERFORMANCE;
310 			err = 0;
311 		} else if (!strnicmp(str_governor, "powersave",
312 						CPUFREQ_NAME_LEN)) {
313 			*policy = CPUFREQ_POLICY_POWERSAVE;
314 			err = 0;
315 		}
316 	} else if (cpufreq_driver->target) {
317 		struct cpufreq_governor *t;
318 
319 		mutex_lock(&cpufreq_governor_mutex);
320 
321 		t = __find_governor(str_governor);
322 
323 		if (t == NULL) {
324 			int ret;
325 
326 			mutex_unlock(&cpufreq_governor_mutex);
327 			ret = request_module("cpufreq_%s", str_governor);
328 			mutex_lock(&cpufreq_governor_mutex);
329 
330 			if (ret == 0)
331 				t = __find_governor(str_governor);
332 		}
333 
334 		if (t != NULL) {
335 			*governor = t;
336 			err = 0;
337 		}
338 
339 		mutex_unlock(&cpufreq_governor_mutex);
340 	}
341 out:
342 	return err;
343 }
344 
345 
346 /**
347  * cpufreq_per_cpu_attr_read() / show_##file_name() -
348  * print out cpufreq information
349  *
350  * Write out information from cpufreq_driver->policy[cpu]; object must be
351  * "unsigned int".
352  */
353 
354 #define show_one(file_name, object)			\
355 static ssize_t show_##file_name				\
356 (struct cpufreq_policy *policy, char *buf)		\
357 {							\
358 	return sprintf(buf, "%u\n", policy->object);	\
359 }
360 
361 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
362 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
363 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
364 show_one(scaling_min_freq, min);
365 show_one(scaling_max_freq, max);
366 show_one(scaling_cur_freq, cur);
367 
368 static int __cpufreq_set_policy(struct cpufreq_policy *data,
369 				struct cpufreq_policy *policy);
370 
371 /**
372  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
373  */
374 #define store_one(file_name, object)			\
375 static ssize_t store_##file_name					\
376 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
377 {									\
378 	unsigned int ret = -EINVAL;					\
379 	struct cpufreq_policy new_policy;				\
380 									\
381 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
382 	if (ret)							\
383 		return -EINVAL;						\
384 									\
385 	ret = sscanf(buf, "%u", &new_policy.object);			\
386 	if (ret != 1)							\
387 		return -EINVAL;						\
388 									\
389 	ret = __cpufreq_set_policy(policy, &new_policy);		\
390 	policy->user_policy.object = policy->object;			\
391 									\
392 	return ret ? ret : count;					\
393 }
394 
395 store_one(scaling_min_freq, min);
396 store_one(scaling_max_freq, max);
397 
398 /**
399  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
400  */
401 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
402 					char *buf)
403 {
404 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
405 	if (!cur_freq)
406 		return sprintf(buf, "<unknown>");
407 	return sprintf(buf, "%u\n", cur_freq);
408 }
409 
410 
411 /**
412  * show_scaling_governor - show the current policy for the specified CPU
413  */
414 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
415 {
416 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
417 		return sprintf(buf, "powersave\n");
418 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
419 		return sprintf(buf, "performance\n");
420 	else if (policy->governor)
421 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
422 				policy->governor->name);
423 	return -EINVAL;
424 }
425 
426 
427 /**
428  * store_scaling_governor - store policy for the specified CPU
429  */
430 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
431 					const char *buf, size_t count)
432 {
433 	unsigned int ret = -EINVAL;
434 	char	str_governor[16];
435 	struct cpufreq_policy new_policy;
436 
437 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
438 	if (ret)
439 		return ret;
440 
441 	ret = sscanf(buf, "%15s", str_governor);
442 	if (ret != 1)
443 		return -EINVAL;
444 
445 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
446 						&new_policy.governor))
447 		return -EINVAL;
448 
449 	/* Do not use cpufreq_set_policy here or the user_policy.max
450 	   will be wrongly overridden */
451 	ret = __cpufreq_set_policy(policy, &new_policy);
452 
453 	policy->user_policy.policy = policy->policy;
454 	policy->user_policy.governor = policy->governor;
455 
456 	if (ret)
457 		return ret;
458 	else
459 		return count;
460 }
461 
462 /**
463  * show_scaling_driver - show the cpufreq driver currently loaded
464  */
465 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
466 {
467 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
468 }
469 
470 /**
471  * show_scaling_available_governors - show the available CPUfreq governors
472  */
473 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
474 						char *buf)
475 {
476 	ssize_t i = 0;
477 	struct cpufreq_governor *t;
478 
479 	if (!cpufreq_driver->target) {
480 		i += sprintf(buf, "performance powersave");
481 		goto out;
482 	}
483 
484 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
485 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
486 		    - (CPUFREQ_NAME_LEN + 2)))
487 			goto out;
488 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
489 	}
490 out:
491 	i += sprintf(&buf[i], "\n");
492 	return i;
493 }
494 
495 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
496 {
497 	ssize_t i = 0;
498 	unsigned int cpu;
499 
500 	for_each_cpu(cpu, mask) {
501 		if (i)
502 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
503 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
504 		if (i >= (PAGE_SIZE - 5))
505 			break;
506 	}
507 	i += sprintf(&buf[i], "\n");
508 	return i;
509 }
510 
511 /**
512  * show_related_cpus - show the CPUs affected by each transition even if
513  * hw coordination is in use
514  */
515 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
516 {
517 	if (cpumask_empty(policy->related_cpus))
518 		return show_cpus(policy->cpus, buf);
519 	return show_cpus(policy->related_cpus, buf);
520 }
521 
522 /**
523  * show_affected_cpus - show the CPUs affected by each transition
524  */
525 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
526 {
527 	return show_cpus(policy->cpus, buf);
528 }
529 
530 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
531 					const char *buf, size_t count)
532 {
533 	unsigned int freq = 0;
534 	unsigned int ret;
535 
536 	if (!policy->governor || !policy->governor->store_setspeed)
537 		return -EINVAL;
538 
539 	ret = sscanf(buf, "%u", &freq);
540 	if (ret != 1)
541 		return -EINVAL;
542 
543 	policy->governor->store_setspeed(policy, freq);
544 
545 	return count;
546 }
547 
548 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
549 {
550 	if (!policy->governor || !policy->governor->show_setspeed)
551 		return sprintf(buf, "<unsupported>\n");
552 
553 	return policy->governor->show_setspeed(policy, buf);
554 }
555 
556 /**
557  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
558  */
559 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
560 {
561 	unsigned int limit;
562 	int ret;
563 	if (cpufreq_driver->bios_limit) {
564 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
565 		if (!ret)
566 			return sprintf(buf, "%u\n", limit);
567 	}
568 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
569 }
570 
571 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
572 cpufreq_freq_attr_ro(cpuinfo_min_freq);
573 cpufreq_freq_attr_ro(cpuinfo_max_freq);
574 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
575 cpufreq_freq_attr_ro(scaling_available_governors);
576 cpufreq_freq_attr_ro(scaling_driver);
577 cpufreq_freq_attr_ro(scaling_cur_freq);
578 cpufreq_freq_attr_ro(bios_limit);
579 cpufreq_freq_attr_ro(related_cpus);
580 cpufreq_freq_attr_ro(affected_cpus);
581 cpufreq_freq_attr_rw(scaling_min_freq);
582 cpufreq_freq_attr_rw(scaling_max_freq);
583 cpufreq_freq_attr_rw(scaling_governor);
584 cpufreq_freq_attr_rw(scaling_setspeed);
585 
586 static struct attribute *default_attrs[] = {
587 	&cpuinfo_min_freq.attr,
588 	&cpuinfo_max_freq.attr,
589 	&cpuinfo_transition_latency.attr,
590 	&scaling_min_freq.attr,
591 	&scaling_max_freq.attr,
592 	&affected_cpus.attr,
593 	&related_cpus.attr,
594 	&scaling_governor.attr,
595 	&scaling_driver.attr,
596 	&scaling_available_governors.attr,
597 	&scaling_setspeed.attr,
598 	NULL
599 };
600 
601 struct kobject *cpufreq_global_kobject;
602 EXPORT_SYMBOL(cpufreq_global_kobject);
603 
604 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
605 #define to_attr(a) container_of(a, struct freq_attr, attr)
606 
607 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
608 {
609 	struct cpufreq_policy *policy = to_policy(kobj);
610 	struct freq_attr *fattr = to_attr(attr);
611 	ssize_t ret = -EINVAL;
612 	policy = cpufreq_cpu_get(policy->cpu);
613 	if (!policy)
614 		goto no_policy;
615 
616 	if (lock_policy_rwsem_read(policy->cpu) < 0)
617 		goto fail;
618 
619 	if (fattr->show)
620 		ret = fattr->show(policy, buf);
621 	else
622 		ret = -EIO;
623 
624 	unlock_policy_rwsem_read(policy->cpu);
625 fail:
626 	cpufreq_cpu_put(policy);
627 no_policy:
628 	return ret;
629 }
630 
631 static ssize_t store(struct kobject *kobj, struct attribute *attr,
632 		     const char *buf, size_t count)
633 {
634 	struct cpufreq_policy *policy = to_policy(kobj);
635 	struct freq_attr *fattr = to_attr(attr);
636 	ssize_t ret = -EINVAL;
637 	policy = cpufreq_cpu_get(policy->cpu);
638 	if (!policy)
639 		goto no_policy;
640 
641 	if (lock_policy_rwsem_write(policy->cpu) < 0)
642 		goto fail;
643 
644 	if (fattr->store)
645 		ret = fattr->store(policy, buf, count);
646 	else
647 		ret = -EIO;
648 
649 	unlock_policy_rwsem_write(policy->cpu);
650 fail:
651 	cpufreq_cpu_put(policy);
652 no_policy:
653 	return ret;
654 }
655 
656 static void cpufreq_sysfs_release(struct kobject *kobj)
657 {
658 	struct cpufreq_policy *policy = to_policy(kobj);
659 	pr_debug("last reference is dropped\n");
660 	complete(&policy->kobj_unregister);
661 }
662 
663 static const struct sysfs_ops sysfs_ops = {
664 	.show	= show,
665 	.store	= store,
666 };
667 
668 static struct kobj_type ktype_cpufreq = {
669 	.sysfs_ops	= &sysfs_ops,
670 	.default_attrs	= default_attrs,
671 	.release	= cpufreq_sysfs_release,
672 };
673 
674 /*
675  * Returns:
676  *   Negative: Failure
677  *   0:        Success
678  *   Positive: When we have a managed CPU and the sysfs got symlinked
679  */
680 static int cpufreq_add_dev_policy(unsigned int cpu,
681 				  struct cpufreq_policy *policy,
682 				  struct device *dev)
683 {
684 	int ret = 0;
685 #ifdef CONFIG_SMP
686 	unsigned long flags;
687 	unsigned int j;
688 #ifdef CONFIG_HOTPLUG_CPU
689 	struct cpufreq_governor *gov;
690 
691 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
692 	if (gov) {
693 		policy->governor = gov;
694 		pr_debug("Restoring governor %s for cpu %d\n",
695 		       policy->governor->name, cpu);
696 	}
697 #endif
698 
699 	for_each_cpu(j, policy->cpus) {
700 		struct cpufreq_policy *managed_policy;
701 
702 		if (cpu == j)
703 			continue;
704 
705 		/* Check for existing affected CPUs.
706 		 * They may not be aware of it due to CPU Hotplug.
707 		 * cpufreq_cpu_put is called when the device is removed
708 		 * in __cpufreq_remove_dev()
709 		 */
710 		managed_policy = cpufreq_cpu_get(j);
711 		if (unlikely(managed_policy)) {
712 
713 			/* Set proper policy_cpu */
714 			unlock_policy_rwsem_write(cpu);
715 			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
716 
717 			if (lock_policy_rwsem_write(cpu) < 0) {
718 				/* Should not go through policy unlock path */
719 				if (cpufreq_driver->exit)
720 					cpufreq_driver->exit(policy);
721 				cpufreq_cpu_put(managed_policy);
722 				return -EBUSY;
723 			}
724 
725 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
726 			cpumask_copy(managed_policy->cpus, policy->cpus);
727 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
728 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
729 
730 			pr_debug("CPU already managed, adding link\n");
731 			ret = sysfs_create_link(&dev->kobj,
732 						&managed_policy->kobj,
733 						"cpufreq");
734 			if (ret)
735 				cpufreq_cpu_put(managed_policy);
736 			/*
737 			 * Success. We only needed to be added to the mask.
738 			 * Call driver->exit() because only the cpu parent of
739 			 * the kobj needed to call init().
740 			 */
741 			if (cpufreq_driver->exit)
742 				cpufreq_driver->exit(policy);
743 
744 			if (!ret)
745 				return 1;
746 			else
747 				return ret;
748 		}
749 	}
750 #endif
751 	return ret;
752 }
753 
754 
755 /* symlink affected CPUs */
756 static int cpufreq_add_dev_symlink(unsigned int cpu,
757 				   struct cpufreq_policy *policy)
758 {
759 	unsigned int j;
760 	int ret = 0;
761 
762 	for_each_cpu(j, policy->cpus) {
763 		struct cpufreq_policy *managed_policy;
764 		struct device *cpu_dev;
765 
766 		if (j == cpu)
767 			continue;
768 		if (!cpu_online(j))
769 			continue;
770 
771 		pr_debug("CPU %u already managed, adding link\n", j);
772 		managed_policy = cpufreq_cpu_get(cpu);
773 		cpu_dev = get_cpu_device(j);
774 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
775 					"cpufreq");
776 		if (ret) {
777 			cpufreq_cpu_put(managed_policy);
778 			return ret;
779 		}
780 	}
781 	return ret;
782 }
783 
784 static int cpufreq_add_dev_interface(unsigned int cpu,
785 				     struct cpufreq_policy *policy,
786 				     struct device *dev)
787 {
788 	struct cpufreq_policy new_policy;
789 	struct freq_attr **drv_attr;
790 	unsigned long flags;
791 	int ret = 0;
792 	unsigned int j;
793 
794 	/* prepare interface data */
795 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
796 				   &dev->kobj, "cpufreq");
797 	if (ret)
798 		return ret;
799 
800 	/* set up files for this cpu device */
801 	drv_attr = cpufreq_driver->attr;
802 	while ((drv_attr) && (*drv_attr)) {
803 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
804 		if (ret)
805 			goto err_out_kobj_put;
806 		drv_attr++;
807 	}
808 	if (cpufreq_driver->get) {
809 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
810 		if (ret)
811 			goto err_out_kobj_put;
812 	}
813 	if (cpufreq_driver->target) {
814 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
815 		if (ret)
816 			goto err_out_kobj_put;
817 	}
818 	if (cpufreq_driver->bios_limit) {
819 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
820 		if (ret)
821 			goto err_out_kobj_put;
822 	}
823 
824 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
825 	for_each_cpu(j, policy->cpus) {
826 		if (!cpu_online(j))
827 			continue;
828 		per_cpu(cpufreq_cpu_data, j) = policy;
829 		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
830 	}
831 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
832 
833 	ret = cpufreq_add_dev_symlink(cpu, policy);
834 	if (ret)
835 		goto err_out_kobj_put;
836 
837 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
838 	/* assure that the starting sequence is run in __cpufreq_set_policy */
839 	policy->governor = NULL;
840 
841 	/* set default policy */
842 	ret = __cpufreq_set_policy(policy, &new_policy);
843 	policy->user_policy.policy = policy->policy;
844 	policy->user_policy.governor = policy->governor;
845 
846 	if (ret) {
847 		pr_debug("setting policy failed\n");
848 		if (cpufreq_driver->exit)
849 			cpufreq_driver->exit(policy);
850 	}
851 	return ret;
852 
853 err_out_kobj_put:
854 	kobject_put(&policy->kobj);
855 	wait_for_completion(&policy->kobj_unregister);
856 	return ret;
857 }
858 
859 
860 /**
861  * cpufreq_add_dev - add a CPU device
862  *
863  * Adds the cpufreq interface for a CPU device.
864  *
865  * The Oracle says: try running cpufreq registration/unregistration concurrently
866  * with with cpu hotplugging and all hell will break loose. Tried to clean this
867  * mess up, but more thorough testing is needed. - Mathieu
868  */
869 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
870 {
871 	unsigned int cpu = dev->id;
872 	int ret = 0, found = 0;
873 	struct cpufreq_policy *policy;
874 	unsigned long flags;
875 	unsigned int j;
876 #ifdef CONFIG_HOTPLUG_CPU
877 	int sibling;
878 #endif
879 
880 	if (cpu_is_offline(cpu))
881 		return 0;
882 
883 	pr_debug("adding CPU %u\n", cpu);
884 
885 #ifdef CONFIG_SMP
886 	/* check whether a different CPU already registered this
887 	 * CPU because it is in the same boat. */
888 	policy = cpufreq_cpu_get(cpu);
889 	if (unlikely(policy)) {
890 		cpufreq_cpu_put(policy);
891 		return 0;
892 	}
893 #endif
894 
895 	if (!try_module_get(cpufreq_driver->owner)) {
896 		ret = -EINVAL;
897 		goto module_out;
898 	}
899 
900 	ret = -ENOMEM;
901 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
902 	if (!policy)
903 		goto nomem_out;
904 
905 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
906 		goto err_free_policy;
907 
908 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
909 		goto err_free_cpumask;
910 
911 	policy->cpu = cpu;
912 	cpumask_copy(policy->cpus, cpumask_of(cpu));
913 
914 	/* Initially set CPU itself as the policy_cpu */
915 	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
916 	ret = (lock_policy_rwsem_write(cpu) < 0);
917 	WARN_ON(ret);
918 
919 	init_completion(&policy->kobj_unregister);
920 	INIT_WORK(&policy->update, handle_update);
921 
922 	/* Set governor before ->init, so that driver could check it */
923 #ifdef CONFIG_HOTPLUG_CPU
924 	for_each_online_cpu(sibling) {
925 		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
926 		if (cp && cp->governor &&
927 		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
928 			policy->governor = cp->governor;
929 			found = 1;
930 			break;
931 		}
932 	}
933 #endif
934 	if (!found)
935 		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
936 	/* call driver. From then on the cpufreq must be able
937 	 * to accept all calls to ->verify and ->setpolicy for this CPU
938 	 */
939 	ret = cpufreq_driver->init(policy);
940 	if (ret) {
941 		pr_debug("initialization failed\n");
942 		goto err_unlock_policy;
943 	}
944 	policy->user_policy.min = policy->min;
945 	policy->user_policy.max = policy->max;
946 
947 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
948 				     CPUFREQ_START, policy);
949 
950 	ret = cpufreq_add_dev_policy(cpu, policy, dev);
951 	if (ret) {
952 		if (ret > 0)
953 			/* This is a managed cpu, symlink created,
954 			   exit with 0 */
955 			ret = 0;
956 		goto err_unlock_policy;
957 	}
958 
959 	ret = cpufreq_add_dev_interface(cpu, policy, dev);
960 	if (ret)
961 		goto err_out_unregister;
962 
963 	unlock_policy_rwsem_write(cpu);
964 
965 	kobject_uevent(&policy->kobj, KOBJ_ADD);
966 	module_put(cpufreq_driver->owner);
967 	pr_debug("initialization complete\n");
968 
969 	return 0;
970 
971 
972 err_out_unregister:
973 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
974 	for_each_cpu(j, policy->cpus)
975 		per_cpu(cpufreq_cpu_data, j) = NULL;
976 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
977 
978 	kobject_put(&policy->kobj);
979 	wait_for_completion(&policy->kobj_unregister);
980 
981 err_unlock_policy:
982 	unlock_policy_rwsem_write(cpu);
983 	free_cpumask_var(policy->related_cpus);
984 err_free_cpumask:
985 	free_cpumask_var(policy->cpus);
986 err_free_policy:
987 	kfree(policy);
988 nomem_out:
989 	module_put(cpufreq_driver->owner);
990 module_out:
991 	return ret;
992 }
993 
994 
995 /**
996  * __cpufreq_remove_dev - remove a CPU device
997  *
998  * Removes the cpufreq interface for a CPU device.
999  * Caller should already have policy_rwsem in write mode for this CPU.
1000  * This routine frees the rwsem before returning.
1001  */
1002 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1003 {
1004 	unsigned int cpu = dev->id;
1005 	unsigned long flags;
1006 	struct cpufreq_policy *data;
1007 	struct kobject *kobj;
1008 	struct completion *cmp;
1009 #ifdef CONFIG_SMP
1010 	struct device *cpu_dev;
1011 	unsigned int j;
1012 #endif
1013 
1014 	pr_debug("unregistering CPU %u\n", cpu);
1015 
1016 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1017 	data = per_cpu(cpufreq_cpu_data, cpu);
1018 
1019 	if (!data) {
1020 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1021 		unlock_policy_rwsem_write(cpu);
1022 		return -EINVAL;
1023 	}
1024 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1025 
1026 
1027 #ifdef CONFIG_SMP
1028 	/* if this isn't the CPU which is the parent of the kobj, we
1029 	 * only need to unlink, put and exit
1030 	 */
1031 	if (unlikely(cpu != data->cpu)) {
1032 		pr_debug("removing link\n");
1033 		cpumask_clear_cpu(cpu, data->cpus);
1034 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035 		kobj = &dev->kobj;
1036 		cpufreq_cpu_put(data);
1037 		unlock_policy_rwsem_write(cpu);
1038 		sysfs_remove_link(kobj, "cpufreq");
1039 		return 0;
1040 	}
1041 #endif
1042 
1043 #ifdef CONFIG_SMP
1044 
1045 #ifdef CONFIG_HOTPLUG_CPU
1046 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1047 			CPUFREQ_NAME_LEN);
1048 #endif
1049 
1050 	/* if we have other CPUs still registered, we need to unlink them,
1051 	 * or else wait_for_completion below will lock up. Clean the
1052 	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1053 	 * the sysfs links afterwards.
1054 	 */
1055 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1056 		for_each_cpu(j, data->cpus) {
1057 			if (j == cpu)
1058 				continue;
1059 			per_cpu(cpufreq_cpu_data, j) = NULL;
1060 		}
1061 	}
1062 
1063 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064 
1065 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1066 		for_each_cpu(j, data->cpus) {
1067 			if (j == cpu)
1068 				continue;
1069 			pr_debug("removing link for cpu %u\n", j);
1070 #ifdef CONFIG_HOTPLUG_CPU
1071 			strncpy(per_cpu(cpufreq_cpu_governor, j),
1072 				data->governor->name, CPUFREQ_NAME_LEN);
1073 #endif
1074 			cpu_dev = get_cpu_device(j);
1075 			kobj = &cpu_dev->kobj;
1076 			unlock_policy_rwsem_write(cpu);
1077 			sysfs_remove_link(kobj, "cpufreq");
1078 			lock_policy_rwsem_write(cpu);
1079 			cpufreq_cpu_put(data);
1080 		}
1081 	}
1082 #else
1083 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1084 #endif
1085 
1086 	if (cpufreq_driver->target)
1087 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1088 
1089 	kobj = &data->kobj;
1090 	cmp = &data->kobj_unregister;
1091 	unlock_policy_rwsem_write(cpu);
1092 	kobject_put(kobj);
1093 
1094 	/* we need to make sure that the underlying kobj is actually
1095 	 * not referenced anymore by anybody before we proceed with
1096 	 * unloading.
1097 	 */
1098 	pr_debug("waiting for dropping of refcount\n");
1099 	wait_for_completion(cmp);
1100 	pr_debug("wait complete\n");
1101 
1102 	lock_policy_rwsem_write(cpu);
1103 	if (cpufreq_driver->exit)
1104 		cpufreq_driver->exit(data);
1105 	unlock_policy_rwsem_write(cpu);
1106 
1107 #ifdef CONFIG_HOTPLUG_CPU
1108 	/* when the CPU which is the parent of the kobj is hotplugged
1109 	 * offline, check for siblings, and create cpufreq sysfs interface
1110 	 * and symlinks
1111 	 */
1112 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1113 		/* first sibling now owns the new sysfs dir */
1114 		cpumask_clear_cpu(cpu, data->cpus);
1115 		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1116 
1117 		/* finally remove our own symlink */
1118 		lock_policy_rwsem_write(cpu);
1119 		__cpufreq_remove_dev(dev, sif);
1120 	}
1121 #endif
1122 
1123 	free_cpumask_var(data->related_cpus);
1124 	free_cpumask_var(data->cpus);
1125 	kfree(data);
1126 
1127 	return 0;
1128 }
1129 
1130 
1131 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1132 {
1133 	unsigned int cpu = dev->id;
1134 	int retval;
1135 
1136 	if (cpu_is_offline(cpu))
1137 		return 0;
1138 
1139 	if (unlikely(lock_policy_rwsem_write(cpu)))
1140 		BUG();
1141 
1142 	retval = __cpufreq_remove_dev(dev, sif);
1143 	return retval;
1144 }
1145 
1146 
1147 static void handle_update(struct work_struct *work)
1148 {
1149 	struct cpufreq_policy *policy =
1150 		container_of(work, struct cpufreq_policy, update);
1151 	unsigned int cpu = policy->cpu;
1152 	pr_debug("handle_update for cpu %u called\n", cpu);
1153 	cpufreq_update_policy(cpu);
1154 }
1155 
1156 /**
1157  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1158  *	@cpu: cpu number
1159  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1160  *	@new_freq: CPU frequency the CPU actually runs at
1161  *
1162  *	We adjust to current frequency first, and need to clean up later.
1163  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1164  */
1165 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1166 				unsigned int new_freq)
1167 {
1168 	struct cpufreq_freqs freqs;
1169 
1170 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1171 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1172 
1173 	freqs.cpu = cpu;
1174 	freqs.old = old_freq;
1175 	freqs.new = new_freq;
1176 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1177 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1178 }
1179 
1180 
1181 /**
1182  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1183  * @cpu: CPU number
1184  *
1185  * This is the last known freq, without actually getting it from the driver.
1186  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1187  */
1188 unsigned int cpufreq_quick_get(unsigned int cpu)
1189 {
1190 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1191 	unsigned int ret_freq = 0;
1192 
1193 	if (policy) {
1194 		ret_freq = policy->cur;
1195 		cpufreq_cpu_put(policy);
1196 	}
1197 
1198 	return ret_freq;
1199 }
1200 EXPORT_SYMBOL(cpufreq_quick_get);
1201 
1202 /**
1203  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1204  * @cpu: CPU number
1205  *
1206  * Just return the max possible frequency for a given CPU.
1207  */
1208 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1209 {
1210 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1211 	unsigned int ret_freq = 0;
1212 
1213 	if (policy) {
1214 		ret_freq = policy->max;
1215 		cpufreq_cpu_put(policy);
1216 	}
1217 
1218 	return ret_freq;
1219 }
1220 EXPORT_SYMBOL(cpufreq_quick_get_max);
1221 
1222 
1223 static unsigned int __cpufreq_get(unsigned int cpu)
1224 {
1225 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1226 	unsigned int ret_freq = 0;
1227 
1228 	if (!cpufreq_driver->get)
1229 		return ret_freq;
1230 
1231 	ret_freq = cpufreq_driver->get(cpu);
1232 
1233 	if (ret_freq && policy->cur &&
1234 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1235 		/* verify no discrepancy between actual and
1236 					saved value exists */
1237 		if (unlikely(ret_freq != policy->cur)) {
1238 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1239 			schedule_work(&policy->update);
1240 		}
1241 	}
1242 
1243 	return ret_freq;
1244 }
1245 
1246 /**
1247  * cpufreq_get - get the current CPU frequency (in kHz)
1248  * @cpu: CPU number
1249  *
1250  * Get the CPU current (static) CPU frequency
1251  */
1252 unsigned int cpufreq_get(unsigned int cpu)
1253 {
1254 	unsigned int ret_freq = 0;
1255 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1256 
1257 	if (!policy)
1258 		goto out;
1259 
1260 	if (unlikely(lock_policy_rwsem_read(cpu)))
1261 		goto out_policy;
1262 
1263 	ret_freq = __cpufreq_get(cpu);
1264 
1265 	unlock_policy_rwsem_read(cpu);
1266 
1267 out_policy:
1268 	cpufreq_cpu_put(policy);
1269 out:
1270 	return ret_freq;
1271 }
1272 EXPORT_SYMBOL(cpufreq_get);
1273 
1274 static struct subsys_interface cpufreq_interface = {
1275 	.name		= "cpufreq",
1276 	.subsys		= &cpu_subsys,
1277 	.add_dev	= cpufreq_add_dev,
1278 	.remove_dev	= cpufreq_remove_dev,
1279 };
1280 
1281 
1282 /**
1283  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1284  *
1285  * This function is only executed for the boot processor.  The other CPUs
1286  * have been put offline by means of CPU hotplug.
1287  */
1288 static int cpufreq_bp_suspend(void)
1289 {
1290 	int ret = 0;
1291 
1292 	int cpu = smp_processor_id();
1293 	struct cpufreq_policy *cpu_policy;
1294 
1295 	pr_debug("suspending cpu %u\n", cpu);
1296 
1297 	/* If there's no policy for the boot CPU, we have nothing to do. */
1298 	cpu_policy = cpufreq_cpu_get(cpu);
1299 	if (!cpu_policy)
1300 		return 0;
1301 
1302 	if (cpufreq_driver->suspend) {
1303 		ret = cpufreq_driver->suspend(cpu_policy);
1304 		if (ret)
1305 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1306 					"step on CPU %u\n", cpu_policy->cpu);
1307 	}
1308 
1309 	cpufreq_cpu_put(cpu_policy);
1310 	return ret;
1311 }
1312 
1313 /**
1314  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1315  *
1316  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1317  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1318  *	    restored. It will verify that the current freq is in sync with
1319  *	    what we believe it to be. This is a bit later than when it
1320  *	    should be, but nonethteless it's better than calling
1321  *	    cpufreq_driver->get() here which might re-enable interrupts...
1322  *
1323  * This function is only executed for the boot CPU.  The other CPUs have not
1324  * been turned on yet.
1325  */
1326 static void cpufreq_bp_resume(void)
1327 {
1328 	int ret = 0;
1329 
1330 	int cpu = smp_processor_id();
1331 	struct cpufreq_policy *cpu_policy;
1332 
1333 	pr_debug("resuming cpu %u\n", cpu);
1334 
1335 	/* If there's no policy for the boot CPU, we have nothing to do. */
1336 	cpu_policy = cpufreq_cpu_get(cpu);
1337 	if (!cpu_policy)
1338 		return;
1339 
1340 	if (cpufreq_driver->resume) {
1341 		ret = cpufreq_driver->resume(cpu_policy);
1342 		if (ret) {
1343 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1344 					"step on CPU %u\n", cpu_policy->cpu);
1345 			goto fail;
1346 		}
1347 	}
1348 
1349 	schedule_work(&cpu_policy->update);
1350 
1351 fail:
1352 	cpufreq_cpu_put(cpu_policy);
1353 }
1354 
1355 static struct syscore_ops cpufreq_syscore_ops = {
1356 	.suspend	= cpufreq_bp_suspend,
1357 	.resume		= cpufreq_bp_resume,
1358 };
1359 
1360 
1361 /*********************************************************************
1362  *                     NOTIFIER LISTS INTERFACE                      *
1363  *********************************************************************/
1364 
1365 /**
1366  *	cpufreq_register_notifier - register a driver with cpufreq
1367  *	@nb: notifier function to register
1368  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1369  *
1370  *	Add a driver to one of two lists: either a list of drivers that
1371  *      are notified about clock rate changes (once before and once after
1372  *      the transition), or a list of drivers that are notified about
1373  *      changes in cpufreq policy.
1374  *
1375  *	This function may sleep, and has the same return conditions as
1376  *	blocking_notifier_chain_register.
1377  */
1378 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1379 {
1380 	int ret;
1381 
1382 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1383 
1384 	switch (list) {
1385 	case CPUFREQ_TRANSITION_NOTIFIER:
1386 		ret = srcu_notifier_chain_register(
1387 				&cpufreq_transition_notifier_list, nb);
1388 		break;
1389 	case CPUFREQ_POLICY_NOTIFIER:
1390 		ret = blocking_notifier_chain_register(
1391 				&cpufreq_policy_notifier_list, nb);
1392 		break;
1393 	default:
1394 		ret = -EINVAL;
1395 	}
1396 
1397 	return ret;
1398 }
1399 EXPORT_SYMBOL(cpufreq_register_notifier);
1400 
1401 
1402 /**
1403  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1404  *	@nb: notifier block to be unregistered
1405  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1406  *
1407  *	Remove a driver from the CPU frequency notifier list.
1408  *
1409  *	This function may sleep, and has the same return conditions as
1410  *	blocking_notifier_chain_unregister.
1411  */
1412 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1413 {
1414 	int ret;
1415 
1416 	switch (list) {
1417 	case CPUFREQ_TRANSITION_NOTIFIER:
1418 		ret = srcu_notifier_chain_unregister(
1419 				&cpufreq_transition_notifier_list, nb);
1420 		break;
1421 	case CPUFREQ_POLICY_NOTIFIER:
1422 		ret = blocking_notifier_chain_unregister(
1423 				&cpufreq_policy_notifier_list, nb);
1424 		break;
1425 	default:
1426 		ret = -EINVAL;
1427 	}
1428 
1429 	return ret;
1430 }
1431 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1432 
1433 
1434 /*********************************************************************
1435  *                              GOVERNORS                            *
1436  *********************************************************************/
1437 
1438 
1439 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1440 			    unsigned int target_freq,
1441 			    unsigned int relation)
1442 {
1443 	int retval = -EINVAL;
1444 
1445 	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1446 		target_freq, relation);
1447 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1448 		retval = cpufreq_driver->target(policy, target_freq, relation);
1449 
1450 	return retval;
1451 }
1452 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1453 
1454 int cpufreq_driver_target(struct cpufreq_policy *policy,
1455 			  unsigned int target_freq,
1456 			  unsigned int relation)
1457 {
1458 	int ret = -EINVAL;
1459 
1460 	policy = cpufreq_cpu_get(policy->cpu);
1461 	if (!policy)
1462 		goto no_policy;
1463 
1464 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1465 		goto fail;
1466 
1467 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1468 
1469 	unlock_policy_rwsem_write(policy->cpu);
1470 
1471 fail:
1472 	cpufreq_cpu_put(policy);
1473 no_policy:
1474 	return ret;
1475 }
1476 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1477 
1478 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1479 {
1480 	int ret = 0;
1481 
1482 	policy = cpufreq_cpu_get(policy->cpu);
1483 	if (!policy)
1484 		return -EINVAL;
1485 
1486 	if (cpu_online(cpu) && cpufreq_driver->getavg)
1487 		ret = cpufreq_driver->getavg(policy, cpu);
1488 
1489 	cpufreq_cpu_put(policy);
1490 	return ret;
1491 }
1492 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1493 
1494 /*
1495  * when "event" is CPUFREQ_GOV_LIMITS
1496  */
1497 
1498 static int __cpufreq_governor(struct cpufreq_policy *policy,
1499 					unsigned int event)
1500 {
1501 	int ret;
1502 
1503 	/* Only must be defined when default governor is known to have latency
1504 	   restrictions, like e.g. conservative or ondemand.
1505 	   That this is the case is already ensured in Kconfig
1506 	*/
1507 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1508 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1509 #else
1510 	struct cpufreq_governor *gov = NULL;
1511 #endif
1512 
1513 	if (policy->governor->max_transition_latency &&
1514 	    policy->cpuinfo.transition_latency >
1515 	    policy->governor->max_transition_latency) {
1516 		if (!gov)
1517 			return -EINVAL;
1518 		else {
1519 			printk(KERN_WARNING "%s governor failed, too long"
1520 			       " transition latency of HW, fallback"
1521 			       " to %s governor\n",
1522 			       policy->governor->name,
1523 			       gov->name);
1524 			policy->governor = gov;
1525 		}
1526 	}
1527 
1528 	if (!try_module_get(policy->governor->owner))
1529 		return -EINVAL;
1530 
1531 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1532 						policy->cpu, event);
1533 	ret = policy->governor->governor(policy, event);
1534 
1535 	/* we keep one module reference alive for
1536 			each CPU governed by this CPU */
1537 	if ((event != CPUFREQ_GOV_START) || ret)
1538 		module_put(policy->governor->owner);
1539 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1540 		module_put(policy->governor->owner);
1541 
1542 	return ret;
1543 }
1544 
1545 
1546 int cpufreq_register_governor(struct cpufreq_governor *governor)
1547 {
1548 	int err;
1549 
1550 	if (!governor)
1551 		return -EINVAL;
1552 
1553 	mutex_lock(&cpufreq_governor_mutex);
1554 
1555 	err = -EBUSY;
1556 	if (__find_governor(governor->name) == NULL) {
1557 		err = 0;
1558 		list_add(&governor->governor_list, &cpufreq_governor_list);
1559 	}
1560 
1561 	mutex_unlock(&cpufreq_governor_mutex);
1562 	return err;
1563 }
1564 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1565 
1566 
1567 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1568 {
1569 #ifdef CONFIG_HOTPLUG_CPU
1570 	int cpu;
1571 #endif
1572 
1573 	if (!governor)
1574 		return;
1575 
1576 #ifdef CONFIG_HOTPLUG_CPU
1577 	for_each_present_cpu(cpu) {
1578 		if (cpu_online(cpu))
1579 			continue;
1580 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1581 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1582 	}
1583 #endif
1584 
1585 	mutex_lock(&cpufreq_governor_mutex);
1586 	list_del(&governor->governor_list);
1587 	mutex_unlock(&cpufreq_governor_mutex);
1588 	return;
1589 }
1590 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1591 
1592 
1593 
1594 /*********************************************************************
1595  *                          POLICY INTERFACE                         *
1596  *********************************************************************/
1597 
1598 /**
1599  * cpufreq_get_policy - get the current cpufreq_policy
1600  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1601  *	is written
1602  *
1603  * Reads the current cpufreq policy.
1604  */
1605 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1606 {
1607 	struct cpufreq_policy *cpu_policy;
1608 	if (!policy)
1609 		return -EINVAL;
1610 
1611 	cpu_policy = cpufreq_cpu_get(cpu);
1612 	if (!cpu_policy)
1613 		return -EINVAL;
1614 
1615 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1616 
1617 	cpufreq_cpu_put(cpu_policy);
1618 	return 0;
1619 }
1620 EXPORT_SYMBOL(cpufreq_get_policy);
1621 
1622 
1623 /*
1624  * data   : current policy.
1625  * policy : policy to be set.
1626  */
1627 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1628 				struct cpufreq_policy *policy)
1629 {
1630 	int ret = 0;
1631 
1632 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1633 		policy->min, policy->max);
1634 
1635 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1636 				sizeof(struct cpufreq_cpuinfo));
1637 
1638 	if (policy->min > data->max || policy->max < data->min) {
1639 		ret = -EINVAL;
1640 		goto error_out;
1641 	}
1642 
1643 	/* verify the cpu speed can be set within this limit */
1644 	ret = cpufreq_driver->verify(policy);
1645 	if (ret)
1646 		goto error_out;
1647 
1648 	/* adjust if necessary - all reasons */
1649 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1650 			CPUFREQ_ADJUST, policy);
1651 
1652 	/* adjust if necessary - hardware incompatibility*/
1653 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1654 			CPUFREQ_INCOMPATIBLE, policy);
1655 
1656 	/* verify the cpu speed can be set within this limit,
1657 	   which might be different to the first one */
1658 	ret = cpufreq_driver->verify(policy);
1659 	if (ret)
1660 		goto error_out;
1661 
1662 	/* notification of the new policy */
1663 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1664 			CPUFREQ_NOTIFY, policy);
1665 
1666 	data->min = policy->min;
1667 	data->max = policy->max;
1668 
1669 	pr_debug("new min and max freqs are %u - %u kHz\n",
1670 					data->min, data->max);
1671 
1672 	if (cpufreq_driver->setpolicy) {
1673 		data->policy = policy->policy;
1674 		pr_debug("setting range\n");
1675 		ret = cpufreq_driver->setpolicy(policy);
1676 	} else {
1677 		if (policy->governor != data->governor) {
1678 			/* save old, working values */
1679 			struct cpufreq_governor *old_gov = data->governor;
1680 
1681 			pr_debug("governor switch\n");
1682 
1683 			/* end old governor */
1684 			if (data->governor)
1685 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1686 
1687 			/* start new governor */
1688 			data->governor = policy->governor;
1689 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1690 				/* new governor failed, so re-start old one */
1691 				pr_debug("starting governor %s failed\n",
1692 							data->governor->name);
1693 				if (old_gov) {
1694 					data->governor = old_gov;
1695 					__cpufreq_governor(data,
1696 							   CPUFREQ_GOV_START);
1697 				}
1698 				ret = -EINVAL;
1699 				goto error_out;
1700 			}
1701 			/* might be a policy change, too, so fall through */
1702 		}
1703 		pr_debug("governor: change or update limits\n");
1704 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1705 	}
1706 
1707 error_out:
1708 	return ret;
1709 }
1710 
1711 /**
1712  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1713  *	@cpu: CPU which shall be re-evaluated
1714  *
1715  *	Useful for policy notifiers which have different necessities
1716  *	at different times.
1717  */
1718 int cpufreq_update_policy(unsigned int cpu)
1719 {
1720 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1721 	struct cpufreq_policy policy;
1722 	int ret;
1723 
1724 	if (!data) {
1725 		ret = -ENODEV;
1726 		goto no_policy;
1727 	}
1728 
1729 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1730 		ret = -EINVAL;
1731 		goto fail;
1732 	}
1733 
1734 	pr_debug("updating policy for CPU %u\n", cpu);
1735 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1736 	policy.min = data->user_policy.min;
1737 	policy.max = data->user_policy.max;
1738 	policy.policy = data->user_policy.policy;
1739 	policy.governor = data->user_policy.governor;
1740 
1741 	/* BIOS might change freq behind our back
1742 	  -> ask driver for current freq and notify governors about a change */
1743 	if (cpufreq_driver->get) {
1744 		policy.cur = cpufreq_driver->get(cpu);
1745 		if (!data->cur) {
1746 			pr_debug("Driver did not initialize current freq");
1747 			data->cur = policy.cur;
1748 		} else {
1749 			if (data->cur != policy.cur)
1750 				cpufreq_out_of_sync(cpu, data->cur,
1751 								policy.cur);
1752 		}
1753 	}
1754 
1755 	ret = __cpufreq_set_policy(data, &policy);
1756 
1757 	unlock_policy_rwsem_write(cpu);
1758 
1759 fail:
1760 	cpufreq_cpu_put(data);
1761 no_policy:
1762 	return ret;
1763 }
1764 EXPORT_SYMBOL(cpufreq_update_policy);
1765 
1766 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1767 					unsigned long action, void *hcpu)
1768 {
1769 	unsigned int cpu = (unsigned long)hcpu;
1770 	struct device *dev;
1771 
1772 	dev = get_cpu_device(cpu);
1773 	if (dev) {
1774 		switch (action) {
1775 		case CPU_ONLINE:
1776 		case CPU_ONLINE_FROZEN:
1777 			cpufreq_add_dev(dev, NULL);
1778 			break;
1779 		case CPU_DOWN_PREPARE:
1780 		case CPU_DOWN_PREPARE_FROZEN:
1781 			if (unlikely(lock_policy_rwsem_write(cpu)))
1782 				BUG();
1783 
1784 			__cpufreq_remove_dev(dev, NULL);
1785 			break;
1786 		case CPU_DOWN_FAILED:
1787 		case CPU_DOWN_FAILED_FROZEN:
1788 			cpufreq_add_dev(dev, NULL);
1789 			break;
1790 		}
1791 	}
1792 	return NOTIFY_OK;
1793 }
1794 
1795 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1796     .notifier_call = cpufreq_cpu_callback,
1797 };
1798 
1799 /*********************************************************************
1800  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1801  *********************************************************************/
1802 
1803 /**
1804  * cpufreq_register_driver - register a CPU Frequency driver
1805  * @driver_data: A struct cpufreq_driver containing the values#
1806  * submitted by the CPU Frequency driver.
1807  *
1808  *   Registers a CPU Frequency driver to this core code. This code
1809  * returns zero on success, -EBUSY when another driver got here first
1810  * (and isn't unregistered in the meantime).
1811  *
1812  */
1813 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1814 {
1815 	unsigned long flags;
1816 	int ret;
1817 
1818 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1819 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1820 		return -EINVAL;
1821 
1822 	pr_debug("trying to register driver %s\n", driver_data->name);
1823 
1824 	if (driver_data->setpolicy)
1825 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1826 
1827 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1828 	if (cpufreq_driver) {
1829 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1830 		return -EBUSY;
1831 	}
1832 	cpufreq_driver = driver_data;
1833 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1834 
1835 	ret = subsys_interface_register(&cpufreq_interface);
1836 	if (ret)
1837 		goto err_null_driver;
1838 
1839 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1840 		int i;
1841 		ret = -ENODEV;
1842 
1843 		/* check for at least one working CPU */
1844 		for (i = 0; i < nr_cpu_ids; i++)
1845 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1846 				ret = 0;
1847 				break;
1848 			}
1849 
1850 		/* if all ->init() calls failed, unregister */
1851 		if (ret) {
1852 			pr_debug("no CPU initialized for driver %s\n",
1853 							driver_data->name);
1854 			goto err_if_unreg;
1855 		}
1856 	}
1857 
1858 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1859 	pr_debug("driver %s up and running\n", driver_data->name);
1860 
1861 	return 0;
1862 err_if_unreg:
1863 	subsys_interface_unregister(&cpufreq_interface);
1864 err_null_driver:
1865 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1866 	cpufreq_driver = NULL;
1867 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1868 	return ret;
1869 }
1870 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1871 
1872 
1873 /**
1874  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1875  *
1876  *    Unregister the current CPUFreq driver. Only call this if you have
1877  * the right to do so, i.e. if you have succeeded in initialising before!
1878  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1879  * currently not initialised.
1880  */
1881 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1882 {
1883 	unsigned long flags;
1884 
1885 	if (!cpufreq_driver || (driver != cpufreq_driver))
1886 		return -EINVAL;
1887 
1888 	pr_debug("unregistering driver %s\n", driver->name);
1889 
1890 	subsys_interface_unregister(&cpufreq_interface);
1891 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1892 
1893 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1894 	cpufreq_driver = NULL;
1895 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1896 
1897 	return 0;
1898 }
1899 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1900 
1901 static int __init cpufreq_core_init(void)
1902 {
1903 	int cpu;
1904 
1905 	for_each_possible_cpu(cpu) {
1906 		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1907 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1908 	}
1909 
1910 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1911 	BUG_ON(!cpufreq_global_kobject);
1912 	register_syscore_ops(&cpufreq_syscore_ops);
1913 
1914 	return 0;
1915 }
1916 core_initcall(cpufreq_core_init);
1917