xref: /linux/drivers/cpufreq/cpufreq.c (revision 12871a0bd67dd4db4418e1daafcd46e9d329ef10)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32 
33 #include <trace/events/power.h>
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69 
70 #define lock_policy_rwsem(mode, cpu)					\
71 static int lock_policy_rwsem_##mode					\
72 (int cpu)								\
73 {									\
74 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
75 	BUG_ON(policy_cpu == -1);					\
76 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
77 	if (unlikely(!cpu_online(cpu))) {				\
78 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
79 		return -1;						\
80 	}								\
81 									\
82 	return 0;							\
83 }
84 
85 lock_policy_rwsem(read, cpu);
86 
87 lock_policy_rwsem(write, cpu);
88 
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99 	BUG_ON(policy_cpu == -1);
100 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102 
103 
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106 		unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109 
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119 
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	init_cpufreq_transition_notifier_list_called = true;
125 	return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128 
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131 
132 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133 {
134 	struct cpufreq_policy *data;
135 	unsigned long flags;
136 
137 	if (cpu >= nr_cpu_ids)
138 		goto err_out;
139 
140 	/* get the cpufreq driver */
141 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
142 
143 	if (!cpufreq_driver)
144 		goto err_out_unlock;
145 
146 	if (!try_module_get(cpufreq_driver->owner))
147 		goto err_out_unlock;
148 
149 
150 	/* get the CPU */
151 	data = per_cpu(cpufreq_cpu_data, cpu);
152 
153 	if (!data)
154 		goto err_out_put_module;
155 
156 	if (!kobject_get(&data->kobj))
157 		goto err_out_put_module;
158 
159 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160 	return data;
161 
162 err_out_put_module:
163 	module_put(cpufreq_driver->owner);
164 err_out_unlock:
165 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166 err_out:
167 	return NULL;
168 }
169 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170 
171 
172 void cpufreq_cpu_put(struct cpufreq_policy *data)
173 {
174 	kobject_put(&data->kobj);
175 	module_put(cpufreq_driver->owner);
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178 
179 
180 /*********************************************************************
181  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182  *********************************************************************/
183 
184 /**
185  * adjust_jiffies - adjust the system "loops_per_jiffy"
186  *
187  * This function alters the system "loops_per_jiffy" for the clock
188  * speed change. Note that loops_per_jiffy cannot be updated on SMP
189  * systems as each CPU might be scaled differently. So, use the arch
190  * per-CPU loops_per_jiffy value wherever possible.
191  */
192 #ifndef CONFIG_SMP
193 static unsigned long l_p_j_ref;
194 static unsigned int  l_p_j_ref_freq;
195 
196 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197 {
198 	if (ci->flags & CPUFREQ_CONST_LOOPS)
199 		return;
200 
201 	if (!l_p_j_ref_freq) {
202 		l_p_j_ref = loops_per_jiffy;
203 		l_p_j_ref_freq = ci->old;
204 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
205 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206 	}
207 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
208 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
209 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
210 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
211 								ci->new);
212 		pr_debug("scaling loops_per_jiffy to %lu "
213 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
214 	}
215 }
216 #else
217 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
218 {
219 	return;
220 }
221 #endif
222 
223 
224 /**
225  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
226  * on frequency transition.
227  *
228  * This function calls the transition notifiers and the "adjust_jiffies"
229  * function. It is called twice on all CPU frequency changes that have
230  * external effects.
231  */
232 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
233 {
234 	struct cpufreq_policy *policy;
235 
236 	BUG_ON(irqs_disabled());
237 
238 	freqs->flags = cpufreq_driver->flags;
239 	pr_debug("notification %u of frequency transition to %u kHz\n",
240 		state, freqs->new);
241 
242 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
243 	switch (state) {
244 
245 	case CPUFREQ_PRECHANGE:
246 		/* detect if the driver reported a value as "old frequency"
247 		 * which is not equal to what the cpufreq core thinks is
248 		 * "old frequency".
249 		 */
250 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
251 			if ((policy) && (policy->cpu == freqs->cpu) &&
252 			    (policy->cur) && (policy->cur != freqs->old)) {
253 				pr_debug("Warning: CPU frequency is"
254 					" %u, cpufreq assumed %u kHz.\n",
255 					freqs->old, policy->cur);
256 				freqs->old = policy->cur;
257 			}
258 		}
259 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
260 				CPUFREQ_PRECHANGE, freqs);
261 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
262 		break;
263 
264 	case CPUFREQ_POSTCHANGE:
265 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
266 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
267 			(unsigned long)freqs->cpu);
268 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
269 		trace_cpu_frequency(freqs->new, freqs->cpu);
270 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
271 				CPUFREQ_POSTCHANGE, freqs);
272 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
273 			policy->cur = freqs->new;
274 		break;
275 	}
276 }
277 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
278 
279 
280 
281 /*********************************************************************
282  *                          SYSFS INTERFACE                          *
283  *********************************************************************/
284 
285 static struct cpufreq_governor *__find_governor(const char *str_governor)
286 {
287 	struct cpufreq_governor *t;
288 
289 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
290 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
291 			return t;
292 
293 	return NULL;
294 }
295 
296 /**
297  * cpufreq_parse_governor - parse a governor string
298  */
299 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
300 				struct cpufreq_governor **governor)
301 {
302 	int err = -EINVAL;
303 
304 	if (!cpufreq_driver)
305 		goto out;
306 
307 	if (cpufreq_driver->setpolicy) {
308 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
309 			*policy = CPUFREQ_POLICY_PERFORMANCE;
310 			err = 0;
311 		} else if (!strnicmp(str_governor, "powersave",
312 						CPUFREQ_NAME_LEN)) {
313 			*policy = CPUFREQ_POLICY_POWERSAVE;
314 			err = 0;
315 		}
316 	} else if (cpufreq_driver->target) {
317 		struct cpufreq_governor *t;
318 
319 		mutex_lock(&cpufreq_governor_mutex);
320 
321 		t = __find_governor(str_governor);
322 
323 		if (t == NULL) {
324 			int ret;
325 
326 			mutex_unlock(&cpufreq_governor_mutex);
327 			ret = request_module("cpufreq_%s", str_governor);
328 			mutex_lock(&cpufreq_governor_mutex);
329 
330 			if (ret == 0)
331 				t = __find_governor(str_governor);
332 		}
333 
334 		if (t != NULL) {
335 			*governor = t;
336 			err = 0;
337 		}
338 
339 		mutex_unlock(&cpufreq_governor_mutex);
340 	}
341 out:
342 	return err;
343 }
344 
345 
346 /**
347  * cpufreq_per_cpu_attr_read() / show_##file_name() -
348  * print out cpufreq information
349  *
350  * Write out information from cpufreq_driver->policy[cpu]; object must be
351  * "unsigned int".
352  */
353 
354 #define show_one(file_name, object)			\
355 static ssize_t show_##file_name				\
356 (struct cpufreq_policy *policy, char *buf)		\
357 {							\
358 	return sprintf(buf, "%u\n", policy->object);	\
359 }
360 
361 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
362 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
363 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
364 show_one(scaling_min_freq, min);
365 show_one(scaling_max_freq, max);
366 show_one(scaling_cur_freq, cur);
367 
368 static int __cpufreq_set_policy(struct cpufreq_policy *data,
369 				struct cpufreq_policy *policy);
370 
371 /**
372  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
373  */
374 #define store_one(file_name, object)			\
375 static ssize_t store_##file_name					\
376 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
377 {									\
378 	unsigned int ret = -EINVAL;					\
379 	struct cpufreq_policy new_policy;				\
380 									\
381 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
382 	if (ret)							\
383 		return -EINVAL;						\
384 									\
385 	ret = sscanf(buf, "%u", &new_policy.object);			\
386 	if (ret != 1)							\
387 		return -EINVAL;						\
388 									\
389 	ret = __cpufreq_set_policy(policy, &new_policy);		\
390 	policy->user_policy.object = policy->object;			\
391 									\
392 	return ret ? ret : count;					\
393 }
394 
395 store_one(scaling_min_freq, min);
396 store_one(scaling_max_freq, max);
397 
398 /**
399  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
400  */
401 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
402 					char *buf)
403 {
404 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
405 	if (!cur_freq)
406 		return sprintf(buf, "<unknown>");
407 	return sprintf(buf, "%u\n", cur_freq);
408 }
409 
410 
411 /**
412  * show_scaling_governor - show the current policy for the specified CPU
413  */
414 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
415 {
416 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
417 		return sprintf(buf, "powersave\n");
418 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
419 		return sprintf(buf, "performance\n");
420 	else if (policy->governor)
421 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
422 				policy->governor->name);
423 	return -EINVAL;
424 }
425 
426 
427 /**
428  * store_scaling_governor - store policy for the specified CPU
429  */
430 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
431 					const char *buf, size_t count)
432 {
433 	unsigned int ret = -EINVAL;
434 	char	str_governor[16];
435 	struct cpufreq_policy new_policy;
436 
437 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
438 	if (ret)
439 		return ret;
440 
441 	ret = sscanf(buf, "%15s", str_governor);
442 	if (ret != 1)
443 		return -EINVAL;
444 
445 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
446 						&new_policy.governor))
447 		return -EINVAL;
448 
449 	/* Do not use cpufreq_set_policy here or the user_policy.max
450 	   will be wrongly overridden */
451 	ret = __cpufreq_set_policy(policy, &new_policy);
452 
453 	policy->user_policy.policy = policy->policy;
454 	policy->user_policy.governor = policy->governor;
455 
456 	if (ret)
457 		return ret;
458 	else
459 		return count;
460 }
461 
462 /**
463  * show_scaling_driver - show the cpufreq driver currently loaded
464  */
465 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
466 {
467 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
468 }
469 
470 /**
471  * show_scaling_available_governors - show the available CPUfreq governors
472  */
473 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
474 						char *buf)
475 {
476 	ssize_t i = 0;
477 	struct cpufreq_governor *t;
478 
479 	if (!cpufreq_driver->target) {
480 		i += sprintf(buf, "performance powersave");
481 		goto out;
482 	}
483 
484 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
485 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
486 		    - (CPUFREQ_NAME_LEN + 2)))
487 			goto out;
488 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
489 	}
490 out:
491 	i += sprintf(&buf[i], "\n");
492 	return i;
493 }
494 
495 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
496 {
497 	ssize_t i = 0;
498 	unsigned int cpu;
499 
500 	for_each_cpu(cpu, mask) {
501 		if (i)
502 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
503 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
504 		if (i >= (PAGE_SIZE - 5))
505 			break;
506 	}
507 	i += sprintf(&buf[i], "\n");
508 	return i;
509 }
510 
511 /**
512  * show_related_cpus - show the CPUs affected by each transition even if
513  * hw coordination is in use
514  */
515 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
516 {
517 	if (cpumask_empty(policy->related_cpus))
518 		return show_cpus(policy->cpus, buf);
519 	return show_cpus(policy->related_cpus, buf);
520 }
521 
522 /**
523  * show_affected_cpus - show the CPUs affected by each transition
524  */
525 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
526 {
527 	return show_cpus(policy->cpus, buf);
528 }
529 
530 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
531 					const char *buf, size_t count)
532 {
533 	unsigned int freq = 0;
534 	unsigned int ret;
535 
536 	if (!policy->governor || !policy->governor->store_setspeed)
537 		return -EINVAL;
538 
539 	ret = sscanf(buf, "%u", &freq);
540 	if (ret != 1)
541 		return -EINVAL;
542 
543 	policy->governor->store_setspeed(policy, freq);
544 
545 	return count;
546 }
547 
548 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
549 {
550 	if (!policy->governor || !policy->governor->show_setspeed)
551 		return sprintf(buf, "<unsupported>\n");
552 
553 	return policy->governor->show_setspeed(policy, buf);
554 }
555 
556 /**
557  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
558  */
559 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
560 {
561 	unsigned int limit;
562 	int ret;
563 	if (cpufreq_driver->bios_limit) {
564 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
565 		if (!ret)
566 			return sprintf(buf, "%u\n", limit);
567 	}
568 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
569 }
570 
571 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
572 cpufreq_freq_attr_ro(cpuinfo_min_freq);
573 cpufreq_freq_attr_ro(cpuinfo_max_freq);
574 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
575 cpufreq_freq_attr_ro(scaling_available_governors);
576 cpufreq_freq_attr_ro(scaling_driver);
577 cpufreq_freq_attr_ro(scaling_cur_freq);
578 cpufreq_freq_attr_ro(bios_limit);
579 cpufreq_freq_attr_ro(related_cpus);
580 cpufreq_freq_attr_ro(affected_cpus);
581 cpufreq_freq_attr_rw(scaling_min_freq);
582 cpufreq_freq_attr_rw(scaling_max_freq);
583 cpufreq_freq_attr_rw(scaling_governor);
584 cpufreq_freq_attr_rw(scaling_setspeed);
585 
586 static struct attribute *default_attrs[] = {
587 	&cpuinfo_min_freq.attr,
588 	&cpuinfo_max_freq.attr,
589 	&cpuinfo_transition_latency.attr,
590 	&scaling_min_freq.attr,
591 	&scaling_max_freq.attr,
592 	&affected_cpus.attr,
593 	&related_cpus.attr,
594 	&scaling_governor.attr,
595 	&scaling_driver.attr,
596 	&scaling_available_governors.attr,
597 	&scaling_setspeed.attr,
598 	NULL
599 };
600 
601 struct kobject *cpufreq_global_kobject;
602 EXPORT_SYMBOL(cpufreq_global_kobject);
603 
604 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
605 #define to_attr(a) container_of(a, struct freq_attr, attr)
606 
607 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
608 {
609 	struct cpufreq_policy *policy = to_policy(kobj);
610 	struct freq_attr *fattr = to_attr(attr);
611 	ssize_t ret = -EINVAL;
612 	policy = cpufreq_cpu_get(policy->cpu);
613 	if (!policy)
614 		goto no_policy;
615 
616 	if (lock_policy_rwsem_read(policy->cpu) < 0)
617 		goto fail;
618 
619 	if (fattr->show)
620 		ret = fattr->show(policy, buf);
621 	else
622 		ret = -EIO;
623 
624 	unlock_policy_rwsem_read(policy->cpu);
625 fail:
626 	cpufreq_cpu_put(policy);
627 no_policy:
628 	return ret;
629 }
630 
631 static ssize_t store(struct kobject *kobj, struct attribute *attr,
632 		     const char *buf, size_t count)
633 {
634 	struct cpufreq_policy *policy = to_policy(kobj);
635 	struct freq_attr *fattr = to_attr(attr);
636 	ssize_t ret = -EINVAL;
637 	policy = cpufreq_cpu_get(policy->cpu);
638 	if (!policy)
639 		goto no_policy;
640 
641 	if (lock_policy_rwsem_write(policy->cpu) < 0)
642 		goto fail;
643 
644 	if (fattr->store)
645 		ret = fattr->store(policy, buf, count);
646 	else
647 		ret = -EIO;
648 
649 	unlock_policy_rwsem_write(policy->cpu);
650 fail:
651 	cpufreq_cpu_put(policy);
652 no_policy:
653 	return ret;
654 }
655 
656 static void cpufreq_sysfs_release(struct kobject *kobj)
657 {
658 	struct cpufreq_policy *policy = to_policy(kobj);
659 	pr_debug("last reference is dropped\n");
660 	complete(&policy->kobj_unregister);
661 }
662 
663 static const struct sysfs_ops sysfs_ops = {
664 	.show	= show,
665 	.store	= store,
666 };
667 
668 static struct kobj_type ktype_cpufreq = {
669 	.sysfs_ops	= &sysfs_ops,
670 	.default_attrs	= default_attrs,
671 	.release	= cpufreq_sysfs_release,
672 };
673 
674 /*
675  * Returns:
676  *   Negative: Failure
677  *   0:        Success
678  *   Positive: When we have a managed CPU and the sysfs got symlinked
679  */
680 static int cpufreq_add_dev_policy(unsigned int cpu,
681 				  struct cpufreq_policy *policy,
682 				  struct sys_device *sys_dev)
683 {
684 	int ret = 0;
685 #ifdef CONFIG_SMP
686 	unsigned long flags;
687 	unsigned int j;
688 #ifdef CONFIG_HOTPLUG_CPU
689 	struct cpufreq_governor *gov;
690 
691 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
692 	if (gov) {
693 		policy->governor = gov;
694 		pr_debug("Restoring governor %s for cpu %d\n",
695 		       policy->governor->name, cpu);
696 	}
697 #endif
698 
699 	for_each_cpu(j, policy->cpus) {
700 		struct cpufreq_policy *managed_policy;
701 
702 		if (cpu == j)
703 			continue;
704 
705 		/* Check for existing affected CPUs.
706 		 * They may not be aware of it due to CPU Hotplug.
707 		 * cpufreq_cpu_put is called when the device is removed
708 		 * in __cpufreq_remove_dev()
709 		 */
710 		managed_policy = cpufreq_cpu_get(j);
711 		if (unlikely(managed_policy)) {
712 
713 			/* Set proper policy_cpu */
714 			unlock_policy_rwsem_write(cpu);
715 			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
716 
717 			if (lock_policy_rwsem_write(cpu) < 0) {
718 				/* Should not go through policy unlock path */
719 				if (cpufreq_driver->exit)
720 					cpufreq_driver->exit(policy);
721 				cpufreq_cpu_put(managed_policy);
722 				return -EBUSY;
723 			}
724 
725 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
726 			cpumask_copy(managed_policy->cpus, policy->cpus);
727 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
728 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
729 
730 			pr_debug("CPU already managed, adding link\n");
731 			ret = sysfs_create_link(&sys_dev->kobj,
732 						&managed_policy->kobj,
733 						"cpufreq");
734 			if (ret)
735 				cpufreq_cpu_put(managed_policy);
736 			/*
737 			 * Success. We only needed to be added to the mask.
738 			 * Call driver->exit() because only the cpu parent of
739 			 * the kobj needed to call init().
740 			 */
741 			if (cpufreq_driver->exit)
742 				cpufreq_driver->exit(policy);
743 
744 			if (!ret)
745 				return 1;
746 			else
747 				return ret;
748 		}
749 	}
750 #endif
751 	return ret;
752 }
753 
754 
755 /* symlink affected CPUs */
756 static int cpufreq_add_dev_symlink(unsigned int cpu,
757 				   struct cpufreq_policy *policy)
758 {
759 	unsigned int j;
760 	int ret = 0;
761 
762 	for_each_cpu(j, policy->cpus) {
763 		struct cpufreq_policy *managed_policy;
764 		struct sys_device *cpu_sys_dev;
765 
766 		if (j == cpu)
767 			continue;
768 		if (!cpu_online(j))
769 			continue;
770 
771 		pr_debug("CPU %u already managed, adding link\n", j);
772 		managed_policy = cpufreq_cpu_get(cpu);
773 		cpu_sys_dev = get_cpu_sysdev(j);
774 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
775 					"cpufreq");
776 		if (ret) {
777 			cpufreq_cpu_put(managed_policy);
778 			return ret;
779 		}
780 	}
781 	return ret;
782 }
783 
784 static int cpufreq_add_dev_interface(unsigned int cpu,
785 				     struct cpufreq_policy *policy,
786 				     struct sys_device *sys_dev)
787 {
788 	struct cpufreq_policy new_policy;
789 	struct freq_attr **drv_attr;
790 	unsigned long flags;
791 	int ret = 0;
792 	unsigned int j;
793 
794 	/* prepare interface data */
795 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
796 				   &sys_dev->kobj, "cpufreq");
797 	if (ret)
798 		return ret;
799 
800 	/* set up files for this cpu device */
801 	drv_attr = cpufreq_driver->attr;
802 	while ((drv_attr) && (*drv_attr)) {
803 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
804 		if (ret)
805 			goto err_out_kobj_put;
806 		drv_attr++;
807 	}
808 	if (cpufreq_driver->get) {
809 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
810 		if (ret)
811 			goto err_out_kobj_put;
812 	}
813 	if (cpufreq_driver->target) {
814 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
815 		if (ret)
816 			goto err_out_kobj_put;
817 	}
818 	if (cpufreq_driver->bios_limit) {
819 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
820 		if (ret)
821 			goto err_out_kobj_put;
822 	}
823 
824 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
825 	for_each_cpu(j, policy->cpus) {
826 		if (!cpu_online(j))
827 			continue;
828 		per_cpu(cpufreq_cpu_data, j) = policy;
829 		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
830 	}
831 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
832 
833 	ret = cpufreq_add_dev_symlink(cpu, policy);
834 	if (ret)
835 		goto err_out_kobj_put;
836 
837 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
838 	/* assure that the starting sequence is run in __cpufreq_set_policy */
839 	policy->governor = NULL;
840 
841 	/* set default policy */
842 	ret = __cpufreq_set_policy(policy, &new_policy);
843 	policy->user_policy.policy = policy->policy;
844 	policy->user_policy.governor = policy->governor;
845 
846 	if (ret) {
847 		pr_debug("setting policy failed\n");
848 		if (cpufreq_driver->exit)
849 			cpufreq_driver->exit(policy);
850 	}
851 	return ret;
852 
853 err_out_kobj_put:
854 	kobject_put(&policy->kobj);
855 	wait_for_completion(&policy->kobj_unregister);
856 	return ret;
857 }
858 
859 
860 /**
861  * cpufreq_add_dev - add a CPU device
862  *
863  * Adds the cpufreq interface for a CPU device.
864  *
865  * The Oracle says: try running cpufreq registration/unregistration concurrently
866  * with with cpu hotplugging and all hell will break loose. Tried to clean this
867  * mess up, but more thorough testing is needed. - Mathieu
868  */
869 static int cpufreq_add_dev(struct sys_device *sys_dev)
870 {
871 	unsigned int cpu = sys_dev->id;
872 	int ret = 0, found = 0;
873 	struct cpufreq_policy *policy;
874 	unsigned long flags;
875 	unsigned int j;
876 #ifdef CONFIG_HOTPLUG_CPU
877 	int sibling;
878 #endif
879 
880 	if (cpu_is_offline(cpu))
881 		return 0;
882 
883 	pr_debug("adding CPU %u\n", cpu);
884 
885 #ifdef CONFIG_SMP
886 	/* check whether a different CPU already registered this
887 	 * CPU because it is in the same boat. */
888 	policy = cpufreq_cpu_get(cpu);
889 	if (unlikely(policy)) {
890 		cpufreq_cpu_put(policy);
891 		return 0;
892 	}
893 #endif
894 
895 	if (!try_module_get(cpufreq_driver->owner)) {
896 		ret = -EINVAL;
897 		goto module_out;
898 	}
899 
900 	ret = -ENOMEM;
901 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
902 	if (!policy)
903 		goto nomem_out;
904 
905 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
906 		goto err_free_policy;
907 
908 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
909 		goto err_free_cpumask;
910 
911 	policy->cpu = cpu;
912 	cpumask_copy(policy->cpus, cpumask_of(cpu));
913 
914 	/* Initially set CPU itself as the policy_cpu */
915 	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
916 	ret = (lock_policy_rwsem_write(cpu) < 0);
917 	WARN_ON(ret);
918 
919 	init_completion(&policy->kobj_unregister);
920 	INIT_WORK(&policy->update, handle_update);
921 
922 	/* Set governor before ->init, so that driver could check it */
923 #ifdef CONFIG_HOTPLUG_CPU
924 	for_each_online_cpu(sibling) {
925 		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
926 		if (cp && cp->governor &&
927 		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
928 			policy->governor = cp->governor;
929 			found = 1;
930 			break;
931 		}
932 	}
933 #endif
934 	if (!found)
935 		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
936 	/* call driver. From then on the cpufreq must be able
937 	 * to accept all calls to ->verify and ->setpolicy for this CPU
938 	 */
939 	ret = cpufreq_driver->init(policy);
940 	if (ret) {
941 		pr_debug("initialization failed\n");
942 		goto err_unlock_policy;
943 	}
944 	policy->user_policy.min = policy->min;
945 	policy->user_policy.max = policy->max;
946 
947 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
948 				     CPUFREQ_START, policy);
949 
950 	ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
951 	if (ret) {
952 		if (ret > 0)
953 			/* This is a managed cpu, symlink created,
954 			   exit with 0 */
955 			ret = 0;
956 		goto err_unlock_policy;
957 	}
958 
959 	ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
960 	if (ret)
961 		goto err_out_unregister;
962 
963 	unlock_policy_rwsem_write(cpu);
964 
965 	kobject_uevent(&policy->kobj, KOBJ_ADD);
966 	module_put(cpufreq_driver->owner);
967 	pr_debug("initialization complete\n");
968 
969 	return 0;
970 
971 
972 err_out_unregister:
973 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
974 	for_each_cpu(j, policy->cpus)
975 		per_cpu(cpufreq_cpu_data, j) = NULL;
976 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
977 
978 	kobject_put(&policy->kobj);
979 	wait_for_completion(&policy->kobj_unregister);
980 
981 err_unlock_policy:
982 	unlock_policy_rwsem_write(cpu);
983 	free_cpumask_var(policy->related_cpus);
984 err_free_cpumask:
985 	free_cpumask_var(policy->cpus);
986 err_free_policy:
987 	kfree(policy);
988 nomem_out:
989 	module_put(cpufreq_driver->owner);
990 module_out:
991 	return ret;
992 }
993 
994 
995 /**
996  * __cpufreq_remove_dev - remove a CPU device
997  *
998  * Removes the cpufreq interface for a CPU device.
999  * Caller should already have policy_rwsem in write mode for this CPU.
1000  * This routine frees the rwsem before returning.
1001  */
1002 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1003 {
1004 	unsigned int cpu = sys_dev->id;
1005 	unsigned long flags;
1006 	struct cpufreq_policy *data;
1007 	struct kobject *kobj;
1008 	struct completion *cmp;
1009 #ifdef CONFIG_SMP
1010 	struct sys_device *cpu_sys_dev;
1011 	unsigned int j;
1012 #endif
1013 
1014 	pr_debug("unregistering CPU %u\n", cpu);
1015 
1016 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1017 	data = per_cpu(cpufreq_cpu_data, cpu);
1018 
1019 	if (!data) {
1020 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1021 		unlock_policy_rwsem_write(cpu);
1022 		return -EINVAL;
1023 	}
1024 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1025 
1026 
1027 #ifdef CONFIG_SMP
1028 	/* if this isn't the CPU which is the parent of the kobj, we
1029 	 * only need to unlink, put and exit
1030 	 */
1031 	if (unlikely(cpu != data->cpu)) {
1032 		pr_debug("removing link\n");
1033 		cpumask_clear_cpu(cpu, data->cpus);
1034 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035 		kobj = &sys_dev->kobj;
1036 		cpufreq_cpu_put(data);
1037 		unlock_policy_rwsem_write(cpu);
1038 		sysfs_remove_link(kobj, "cpufreq");
1039 		return 0;
1040 	}
1041 #endif
1042 
1043 #ifdef CONFIG_SMP
1044 
1045 #ifdef CONFIG_HOTPLUG_CPU
1046 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1047 			CPUFREQ_NAME_LEN);
1048 #endif
1049 
1050 	/* if we have other CPUs still registered, we need to unlink them,
1051 	 * or else wait_for_completion below will lock up. Clean the
1052 	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1053 	 * the sysfs links afterwards.
1054 	 */
1055 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1056 		for_each_cpu(j, data->cpus) {
1057 			if (j == cpu)
1058 				continue;
1059 			per_cpu(cpufreq_cpu_data, j) = NULL;
1060 		}
1061 	}
1062 
1063 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064 
1065 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1066 		for_each_cpu(j, data->cpus) {
1067 			if (j == cpu)
1068 				continue;
1069 			pr_debug("removing link for cpu %u\n", j);
1070 #ifdef CONFIG_HOTPLUG_CPU
1071 			strncpy(per_cpu(cpufreq_cpu_governor, j),
1072 				data->governor->name, CPUFREQ_NAME_LEN);
1073 #endif
1074 			cpu_sys_dev = get_cpu_sysdev(j);
1075 			kobj = &cpu_sys_dev->kobj;
1076 			unlock_policy_rwsem_write(cpu);
1077 			sysfs_remove_link(kobj, "cpufreq");
1078 			lock_policy_rwsem_write(cpu);
1079 			cpufreq_cpu_put(data);
1080 		}
1081 	}
1082 #else
1083 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1084 #endif
1085 
1086 	if (cpufreq_driver->target)
1087 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1088 
1089 	kobj = &data->kobj;
1090 	cmp = &data->kobj_unregister;
1091 	unlock_policy_rwsem_write(cpu);
1092 	kobject_put(kobj);
1093 
1094 	/* we need to make sure that the underlying kobj is actually
1095 	 * not referenced anymore by anybody before we proceed with
1096 	 * unloading.
1097 	 */
1098 	pr_debug("waiting for dropping of refcount\n");
1099 	wait_for_completion(cmp);
1100 	pr_debug("wait complete\n");
1101 
1102 	lock_policy_rwsem_write(cpu);
1103 	if (cpufreq_driver->exit)
1104 		cpufreq_driver->exit(data);
1105 	unlock_policy_rwsem_write(cpu);
1106 
1107 #ifdef CONFIG_HOTPLUG_CPU
1108 	/* when the CPU which is the parent of the kobj is hotplugged
1109 	 * offline, check for siblings, and create cpufreq sysfs interface
1110 	 * and symlinks
1111 	 */
1112 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1113 		/* first sibling now owns the new sysfs dir */
1114 		cpumask_clear_cpu(cpu, data->cpus);
1115 		cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
1116 
1117 		/* finally remove our own symlink */
1118 		lock_policy_rwsem_write(cpu);
1119 		__cpufreq_remove_dev(sys_dev);
1120 	}
1121 #endif
1122 
1123 	free_cpumask_var(data->related_cpus);
1124 	free_cpumask_var(data->cpus);
1125 	kfree(data);
1126 
1127 	return 0;
1128 }
1129 
1130 
1131 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1132 {
1133 	unsigned int cpu = sys_dev->id;
1134 	int retval;
1135 
1136 	if (cpu_is_offline(cpu))
1137 		return 0;
1138 
1139 	if (unlikely(lock_policy_rwsem_write(cpu)))
1140 		BUG();
1141 
1142 	retval = __cpufreq_remove_dev(sys_dev);
1143 	return retval;
1144 }
1145 
1146 
1147 static void handle_update(struct work_struct *work)
1148 {
1149 	struct cpufreq_policy *policy =
1150 		container_of(work, struct cpufreq_policy, update);
1151 	unsigned int cpu = policy->cpu;
1152 	pr_debug("handle_update for cpu %u called\n", cpu);
1153 	cpufreq_update_policy(cpu);
1154 }
1155 
1156 /**
1157  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1158  *	@cpu: cpu number
1159  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1160  *	@new_freq: CPU frequency the CPU actually runs at
1161  *
1162  *	We adjust to current frequency first, and need to clean up later.
1163  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1164  */
1165 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1166 				unsigned int new_freq)
1167 {
1168 	struct cpufreq_freqs freqs;
1169 
1170 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1171 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1172 
1173 	freqs.cpu = cpu;
1174 	freqs.old = old_freq;
1175 	freqs.new = new_freq;
1176 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1177 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1178 }
1179 
1180 
1181 /**
1182  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1183  * @cpu: CPU number
1184  *
1185  * This is the last known freq, without actually getting it from the driver.
1186  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1187  */
1188 unsigned int cpufreq_quick_get(unsigned int cpu)
1189 {
1190 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1191 	unsigned int ret_freq = 0;
1192 
1193 	if (policy) {
1194 		ret_freq = policy->cur;
1195 		cpufreq_cpu_put(policy);
1196 	}
1197 
1198 	return ret_freq;
1199 }
1200 EXPORT_SYMBOL(cpufreq_quick_get);
1201 
1202 
1203 static unsigned int __cpufreq_get(unsigned int cpu)
1204 {
1205 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1206 	unsigned int ret_freq = 0;
1207 
1208 	if (!cpufreq_driver->get)
1209 		return ret_freq;
1210 
1211 	ret_freq = cpufreq_driver->get(cpu);
1212 
1213 	if (ret_freq && policy->cur &&
1214 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1215 		/* verify no discrepancy between actual and
1216 					saved value exists */
1217 		if (unlikely(ret_freq != policy->cur)) {
1218 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1219 			schedule_work(&policy->update);
1220 		}
1221 	}
1222 
1223 	return ret_freq;
1224 }
1225 
1226 /**
1227  * cpufreq_get - get the current CPU frequency (in kHz)
1228  * @cpu: CPU number
1229  *
1230  * Get the CPU current (static) CPU frequency
1231  */
1232 unsigned int cpufreq_get(unsigned int cpu)
1233 {
1234 	unsigned int ret_freq = 0;
1235 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1236 
1237 	if (!policy)
1238 		goto out;
1239 
1240 	if (unlikely(lock_policy_rwsem_read(cpu)))
1241 		goto out_policy;
1242 
1243 	ret_freq = __cpufreq_get(cpu);
1244 
1245 	unlock_policy_rwsem_read(cpu);
1246 
1247 out_policy:
1248 	cpufreq_cpu_put(policy);
1249 out:
1250 	return ret_freq;
1251 }
1252 EXPORT_SYMBOL(cpufreq_get);
1253 
1254 static struct sysdev_driver cpufreq_sysdev_driver = {
1255 	.add		= cpufreq_add_dev,
1256 	.remove		= cpufreq_remove_dev,
1257 };
1258 
1259 
1260 /**
1261  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1262  *
1263  * This function is only executed for the boot processor.  The other CPUs
1264  * have been put offline by means of CPU hotplug.
1265  */
1266 static int cpufreq_bp_suspend(void)
1267 {
1268 	int ret = 0;
1269 
1270 	int cpu = smp_processor_id();
1271 	struct cpufreq_policy *cpu_policy;
1272 
1273 	pr_debug("suspending cpu %u\n", cpu);
1274 
1275 	/* If there's no policy for the boot CPU, we have nothing to do. */
1276 	cpu_policy = cpufreq_cpu_get(cpu);
1277 	if (!cpu_policy)
1278 		return 0;
1279 
1280 	if (cpufreq_driver->suspend) {
1281 		ret = cpufreq_driver->suspend(cpu_policy);
1282 		if (ret)
1283 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1284 					"step on CPU %u\n", cpu_policy->cpu);
1285 	}
1286 
1287 	cpufreq_cpu_put(cpu_policy);
1288 	return ret;
1289 }
1290 
1291 /**
1292  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1293  *
1294  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1295  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1296  *	    restored. It will verify that the current freq is in sync with
1297  *	    what we believe it to be. This is a bit later than when it
1298  *	    should be, but nonethteless it's better than calling
1299  *	    cpufreq_driver->get() here which might re-enable interrupts...
1300  *
1301  * This function is only executed for the boot CPU.  The other CPUs have not
1302  * been turned on yet.
1303  */
1304 static void cpufreq_bp_resume(void)
1305 {
1306 	int ret = 0;
1307 
1308 	int cpu = smp_processor_id();
1309 	struct cpufreq_policy *cpu_policy;
1310 
1311 	pr_debug("resuming cpu %u\n", cpu);
1312 
1313 	/* If there's no policy for the boot CPU, we have nothing to do. */
1314 	cpu_policy = cpufreq_cpu_get(cpu);
1315 	if (!cpu_policy)
1316 		return;
1317 
1318 	if (cpufreq_driver->resume) {
1319 		ret = cpufreq_driver->resume(cpu_policy);
1320 		if (ret) {
1321 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1322 					"step on CPU %u\n", cpu_policy->cpu);
1323 			goto fail;
1324 		}
1325 	}
1326 
1327 	schedule_work(&cpu_policy->update);
1328 
1329 fail:
1330 	cpufreq_cpu_put(cpu_policy);
1331 }
1332 
1333 static struct syscore_ops cpufreq_syscore_ops = {
1334 	.suspend	= cpufreq_bp_suspend,
1335 	.resume		= cpufreq_bp_resume,
1336 };
1337 
1338 
1339 /*********************************************************************
1340  *                     NOTIFIER LISTS INTERFACE                      *
1341  *********************************************************************/
1342 
1343 /**
1344  *	cpufreq_register_notifier - register a driver with cpufreq
1345  *	@nb: notifier function to register
1346  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1347  *
1348  *	Add a driver to one of two lists: either a list of drivers that
1349  *      are notified about clock rate changes (once before and once after
1350  *      the transition), or a list of drivers that are notified about
1351  *      changes in cpufreq policy.
1352  *
1353  *	This function may sleep, and has the same return conditions as
1354  *	blocking_notifier_chain_register.
1355  */
1356 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1357 {
1358 	int ret;
1359 
1360 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1361 
1362 	switch (list) {
1363 	case CPUFREQ_TRANSITION_NOTIFIER:
1364 		ret = srcu_notifier_chain_register(
1365 				&cpufreq_transition_notifier_list, nb);
1366 		break;
1367 	case CPUFREQ_POLICY_NOTIFIER:
1368 		ret = blocking_notifier_chain_register(
1369 				&cpufreq_policy_notifier_list, nb);
1370 		break;
1371 	default:
1372 		ret = -EINVAL;
1373 	}
1374 
1375 	return ret;
1376 }
1377 EXPORT_SYMBOL(cpufreq_register_notifier);
1378 
1379 
1380 /**
1381  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1382  *	@nb: notifier block to be unregistered
1383  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1384  *
1385  *	Remove a driver from the CPU frequency notifier list.
1386  *
1387  *	This function may sleep, and has the same return conditions as
1388  *	blocking_notifier_chain_unregister.
1389  */
1390 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1391 {
1392 	int ret;
1393 
1394 	switch (list) {
1395 	case CPUFREQ_TRANSITION_NOTIFIER:
1396 		ret = srcu_notifier_chain_unregister(
1397 				&cpufreq_transition_notifier_list, nb);
1398 		break;
1399 	case CPUFREQ_POLICY_NOTIFIER:
1400 		ret = blocking_notifier_chain_unregister(
1401 				&cpufreq_policy_notifier_list, nb);
1402 		break;
1403 	default:
1404 		ret = -EINVAL;
1405 	}
1406 
1407 	return ret;
1408 }
1409 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1410 
1411 
1412 /*********************************************************************
1413  *                              GOVERNORS                            *
1414  *********************************************************************/
1415 
1416 
1417 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1418 			    unsigned int target_freq,
1419 			    unsigned int relation)
1420 {
1421 	int retval = -EINVAL;
1422 
1423 	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1424 		target_freq, relation);
1425 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1426 		retval = cpufreq_driver->target(policy, target_freq, relation);
1427 
1428 	return retval;
1429 }
1430 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1431 
1432 int cpufreq_driver_target(struct cpufreq_policy *policy,
1433 			  unsigned int target_freq,
1434 			  unsigned int relation)
1435 {
1436 	int ret = -EINVAL;
1437 
1438 	policy = cpufreq_cpu_get(policy->cpu);
1439 	if (!policy)
1440 		goto no_policy;
1441 
1442 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1443 		goto fail;
1444 
1445 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1446 
1447 	unlock_policy_rwsem_write(policy->cpu);
1448 
1449 fail:
1450 	cpufreq_cpu_put(policy);
1451 no_policy:
1452 	return ret;
1453 }
1454 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1455 
1456 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1457 {
1458 	int ret = 0;
1459 
1460 	policy = cpufreq_cpu_get(policy->cpu);
1461 	if (!policy)
1462 		return -EINVAL;
1463 
1464 	if (cpu_online(cpu) && cpufreq_driver->getavg)
1465 		ret = cpufreq_driver->getavg(policy, cpu);
1466 
1467 	cpufreq_cpu_put(policy);
1468 	return ret;
1469 }
1470 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1471 
1472 /*
1473  * when "event" is CPUFREQ_GOV_LIMITS
1474  */
1475 
1476 static int __cpufreq_governor(struct cpufreq_policy *policy,
1477 					unsigned int event)
1478 {
1479 	int ret;
1480 
1481 	/* Only must be defined when default governor is known to have latency
1482 	   restrictions, like e.g. conservative or ondemand.
1483 	   That this is the case is already ensured in Kconfig
1484 	*/
1485 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1486 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1487 #else
1488 	struct cpufreq_governor *gov = NULL;
1489 #endif
1490 
1491 	if (policy->governor->max_transition_latency &&
1492 	    policy->cpuinfo.transition_latency >
1493 	    policy->governor->max_transition_latency) {
1494 		if (!gov)
1495 			return -EINVAL;
1496 		else {
1497 			printk(KERN_WARNING "%s governor failed, too long"
1498 			       " transition latency of HW, fallback"
1499 			       " to %s governor\n",
1500 			       policy->governor->name,
1501 			       gov->name);
1502 			policy->governor = gov;
1503 		}
1504 	}
1505 
1506 	if (!try_module_get(policy->governor->owner))
1507 		return -EINVAL;
1508 
1509 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1510 						policy->cpu, event);
1511 	ret = policy->governor->governor(policy, event);
1512 
1513 	/* we keep one module reference alive for
1514 			each CPU governed by this CPU */
1515 	if ((event != CPUFREQ_GOV_START) || ret)
1516 		module_put(policy->governor->owner);
1517 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1518 		module_put(policy->governor->owner);
1519 
1520 	return ret;
1521 }
1522 
1523 
1524 int cpufreq_register_governor(struct cpufreq_governor *governor)
1525 {
1526 	int err;
1527 
1528 	if (!governor)
1529 		return -EINVAL;
1530 
1531 	mutex_lock(&cpufreq_governor_mutex);
1532 
1533 	err = -EBUSY;
1534 	if (__find_governor(governor->name) == NULL) {
1535 		err = 0;
1536 		list_add(&governor->governor_list, &cpufreq_governor_list);
1537 	}
1538 
1539 	mutex_unlock(&cpufreq_governor_mutex);
1540 	return err;
1541 }
1542 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1543 
1544 
1545 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1546 {
1547 #ifdef CONFIG_HOTPLUG_CPU
1548 	int cpu;
1549 #endif
1550 
1551 	if (!governor)
1552 		return;
1553 
1554 #ifdef CONFIG_HOTPLUG_CPU
1555 	for_each_present_cpu(cpu) {
1556 		if (cpu_online(cpu))
1557 			continue;
1558 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1559 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1560 	}
1561 #endif
1562 
1563 	mutex_lock(&cpufreq_governor_mutex);
1564 	list_del(&governor->governor_list);
1565 	mutex_unlock(&cpufreq_governor_mutex);
1566 	return;
1567 }
1568 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1569 
1570 
1571 
1572 /*********************************************************************
1573  *                          POLICY INTERFACE                         *
1574  *********************************************************************/
1575 
1576 /**
1577  * cpufreq_get_policy - get the current cpufreq_policy
1578  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1579  *	is written
1580  *
1581  * Reads the current cpufreq policy.
1582  */
1583 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1584 {
1585 	struct cpufreq_policy *cpu_policy;
1586 	if (!policy)
1587 		return -EINVAL;
1588 
1589 	cpu_policy = cpufreq_cpu_get(cpu);
1590 	if (!cpu_policy)
1591 		return -EINVAL;
1592 
1593 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1594 
1595 	cpufreq_cpu_put(cpu_policy);
1596 	return 0;
1597 }
1598 EXPORT_SYMBOL(cpufreq_get_policy);
1599 
1600 
1601 /*
1602  * data   : current policy.
1603  * policy : policy to be set.
1604  */
1605 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1606 				struct cpufreq_policy *policy)
1607 {
1608 	int ret = 0;
1609 
1610 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1611 		policy->min, policy->max);
1612 
1613 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1614 				sizeof(struct cpufreq_cpuinfo));
1615 
1616 	if (policy->min > data->max || policy->max < data->min) {
1617 		ret = -EINVAL;
1618 		goto error_out;
1619 	}
1620 
1621 	/* verify the cpu speed can be set within this limit */
1622 	ret = cpufreq_driver->verify(policy);
1623 	if (ret)
1624 		goto error_out;
1625 
1626 	/* adjust if necessary - all reasons */
1627 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1628 			CPUFREQ_ADJUST, policy);
1629 
1630 	/* adjust if necessary - hardware incompatibility*/
1631 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1632 			CPUFREQ_INCOMPATIBLE, policy);
1633 
1634 	/* verify the cpu speed can be set within this limit,
1635 	   which might be different to the first one */
1636 	ret = cpufreq_driver->verify(policy);
1637 	if (ret)
1638 		goto error_out;
1639 
1640 	/* notification of the new policy */
1641 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1642 			CPUFREQ_NOTIFY, policy);
1643 
1644 	data->min = policy->min;
1645 	data->max = policy->max;
1646 
1647 	pr_debug("new min and max freqs are %u - %u kHz\n",
1648 					data->min, data->max);
1649 
1650 	if (cpufreq_driver->setpolicy) {
1651 		data->policy = policy->policy;
1652 		pr_debug("setting range\n");
1653 		ret = cpufreq_driver->setpolicy(policy);
1654 	} else {
1655 		if (policy->governor != data->governor) {
1656 			/* save old, working values */
1657 			struct cpufreq_governor *old_gov = data->governor;
1658 
1659 			pr_debug("governor switch\n");
1660 
1661 			/* end old governor */
1662 			if (data->governor)
1663 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1664 
1665 			/* start new governor */
1666 			data->governor = policy->governor;
1667 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1668 				/* new governor failed, so re-start old one */
1669 				pr_debug("starting governor %s failed\n",
1670 							data->governor->name);
1671 				if (old_gov) {
1672 					data->governor = old_gov;
1673 					__cpufreq_governor(data,
1674 							   CPUFREQ_GOV_START);
1675 				}
1676 				ret = -EINVAL;
1677 				goto error_out;
1678 			}
1679 			/* might be a policy change, too, so fall through */
1680 		}
1681 		pr_debug("governor: change or update limits\n");
1682 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1683 	}
1684 
1685 error_out:
1686 	return ret;
1687 }
1688 
1689 /**
1690  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1691  *	@cpu: CPU which shall be re-evaluated
1692  *
1693  *	Useful for policy notifiers which have different necessities
1694  *	at different times.
1695  */
1696 int cpufreq_update_policy(unsigned int cpu)
1697 {
1698 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1699 	struct cpufreq_policy policy;
1700 	int ret;
1701 
1702 	if (!data) {
1703 		ret = -ENODEV;
1704 		goto no_policy;
1705 	}
1706 
1707 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1708 		ret = -EINVAL;
1709 		goto fail;
1710 	}
1711 
1712 	pr_debug("updating policy for CPU %u\n", cpu);
1713 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1714 	policy.min = data->user_policy.min;
1715 	policy.max = data->user_policy.max;
1716 	policy.policy = data->user_policy.policy;
1717 	policy.governor = data->user_policy.governor;
1718 
1719 	/* BIOS might change freq behind our back
1720 	  -> ask driver for current freq and notify governors about a change */
1721 	if (cpufreq_driver->get) {
1722 		policy.cur = cpufreq_driver->get(cpu);
1723 		if (!data->cur) {
1724 			pr_debug("Driver did not initialize current freq");
1725 			data->cur = policy.cur;
1726 		} else {
1727 			if (data->cur != policy.cur)
1728 				cpufreq_out_of_sync(cpu, data->cur,
1729 								policy.cur);
1730 		}
1731 	}
1732 
1733 	ret = __cpufreq_set_policy(data, &policy);
1734 
1735 	unlock_policy_rwsem_write(cpu);
1736 
1737 fail:
1738 	cpufreq_cpu_put(data);
1739 no_policy:
1740 	return ret;
1741 }
1742 EXPORT_SYMBOL(cpufreq_update_policy);
1743 
1744 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1745 					unsigned long action, void *hcpu)
1746 {
1747 	unsigned int cpu = (unsigned long)hcpu;
1748 	struct sys_device *sys_dev;
1749 
1750 	sys_dev = get_cpu_sysdev(cpu);
1751 	if (sys_dev) {
1752 		switch (action) {
1753 		case CPU_ONLINE:
1754 		case CPU_ONLINE_FROZEN:
1755 			cpufreq_add_dev(sys_dev);
1756 			break;
1757 		case CPU_DOWN_PREPARE:
1758 		case CPU_DOWN_PREPARE_FROZEN:
1759 			if (unlikely(lock_policy_rwsem_write(cpu)))
1760 				BUG();
1761 
1762 			__cpufreq_remove_dev(sys_dev);
1763 			break;
1764 		case CPU_DOWN_FAILED:
1765 		case CPU_DOWN_FAILED_FROZEN:
1766 			cpufreq_add_dev(sys_dev);
1767 			break;
1768 		}
1769 	}
1770 	return NOTIFY_OK;
1771 }
1772 
1773 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1774     .notifier_call = cpufreq_cpu_callback,
1775 };
1776 
1777 /*********************************************************************
1778  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1779  *********************************************************************/
1780 
1781 /**
1782  * cpufreq_register_driver - register a CPU Frequency driver
1783  * @driver_data: A struct cpufreq_driver containing the values#
1784  * submitted by the CPU Frequency driver.
1785  *
1786  *   Registers a CPU Frequency driver to this core code. This code
1787  * returns zero on success, -EBUSY when another driver got here first
1788  * (and isn't unregistered in the meantime).
1789  *
1790  */
1791 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1792 {
1793 	unsigned long flags;
1794 	int ret;
1795 
1796 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1797 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1798 		return -EINVAL;
1799 
1800 	pr_debug("trying to register driver %s\n", driver_data->name);
1801 
1802 	if (driver_data->setpolicy)
1803 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1804 
1805 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1806 	if (cpufreq_driver) {
1807 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1808 		return -EBUSY;
1809 	}
1810 	cpufreq_driver = driver_data;
1811 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1812 
1813 	ret = sysdev_driver_register(&cpu_sysdev_class,
1814 					&cpufreq_sysdev_driver);
1815 	if (ret)
1816 		goto err_null_driver;
1817 
1818 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1819 		int i;
1820 		ret = -ENODEV;
1821 
1822 		/* check for at least one working CPU */
1823 		for (i = 0; i < nr_cpu_ids; i++)
1824 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1825 				ret = 0;
1826 				break;
1827 			}
1828 
1829 		/* if all ->init() calls failed, unregister */
1830 		if (ret) {
1831 			pr_debug("no CPU initialized for driver %s\n",
1832 							driver_data->name);
1833 			goto err_sysdev_unreg;
1834 		}
1835 	}
1836 
1837 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1838 	pr_debug("driver %s up and running\n", driver_data->name);
1839 
1840 	return 0;
1841 err_sysdev_unreg:
1842 	sysdev_driver_unregister(&cpu_sysdev_class,
1843 			&cpufreq_sysdev_driver);
1844 err_null_driver:
1845 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1846 	cpufreq_driver = NULL;
1847 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1848 	return ret;
1849 }
1850 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1851 
1852 
1853 /**
1854  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1855  *
1856  *    Unregister the current CPUFreq driver. Only call this if you have
1857  * the right to do so, i.e. if you have succeeded in initialising before!
1858  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1859  * currently not initialised.
1860  */
1861 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1862 {
1863 	unsigned long flags;
1864 
1865 	if (!cpufreq_driver || (driver != cpufreq_driver))
1866 		return -EINVAL;
1867 
1868 	pr_debug("unregistering driver %s\n", driver->name);
1869 
1870 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1871 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1872 
1873 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1874 	cpufreq_driver = NULL;
1875 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1876 
1877 	return 0;
1878 }
1879 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1880 
1881 static int __init cpufreq_core_init(void)
1882 {
1883 	int cpu;
1884 
1885 	for_each_possible_cpu(cpu) {
1886 		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1887 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1888 	}
1889 
1890 	cpufreq_global_kobject = kobject_create_and_add("cpufreq",
1891 						&cpu_sysdev_class.kset.kobj);
1892 	BUG_ON(!cpufreq_global_kobject);
1893 	register_syscore_ops(&cpufreq_syscore_ops);
1894 
1895 	return 0;
1896 }
1897 core_initcall(cpufreq_core_init);
1898