xref: /linux/drivers/cpuidle/cpuidle.c (revision d229807f669ba3dea9f64467ee965051c4366aed)
1 /*
2  * cpuidle.c - core cpuidle infrastructure
3  *
4  * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5  *               Shaohua Li <shaohua.li@intel.com>
6  *               Adam Belay <abelay@novell.com>
7  *
8  * This code is licenced under the GPL.
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/mutex.h>
13 #include <linux/sched.h>
14 #include <linux/notifier.h>
15 #include <linux/pm_qos.h>
16 #include <linux/cpu.h>
17 #include <linux/cpuidle.h>
18 #include <linux/ktime.h>
19 #include <linux/hrtimer.h>
20 #include <linux/module.h>
21 #include <trace/events/power.h>
22 
23 #include "cpuidle.h"
24 
25 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
26 
27 DEFINE_MUTEX(cpuidle_lock);
28 LIST_HEAD(cpuidle_detected_devices);
29 
30 static int enabled_devices;
31 static int off __read_mostly;
32 static int initialized __read_mostly;
33 
34 int cpuidle_disabled(void)
35 {
36 	return off;
37 }
38 void disable_cpuidle(void)
39 {
40 	off = 1;
41 }
42 
43 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
44 static void cpuidle_kick_cpus(void)
45 {
46 	cpu_idle_wait();
47 }
48 #elif defined(CONFIG_SMP)
49 # error "Arch needs cpu_idle_wait() equivalent here"
50 #else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT && !CONFIG_SMP */
51 static void cpuidle_kick_cpus(void) {}
52 #endif
53 
54 static int __cpuidle_register_device(struct cpuidle_device *dev);
55 
56 /**
57  * cpuidle_idle_call - the main idle loop
58  *
59  * NOTE: no locks or semaphores should be used here
60  * return non-zero on failure
61  */
62 int cpuidle_idle_call(void)
63 {
64 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
65 	struct cpuidle_state *target_state;
66 	int next_state;
67 
68 	if (off)
69 		return -ENODEV;
70 
71 	if (!initialized)
72 		return -ENODEV;
73 
74 	/* check if the device is ready */
75 	if (!dev || !dev->enabled)
76 		return -EBUSY;
77 
78 #if 0
79 	/* shows regressions, re-enable for 2.6.29 */
80 	/*
81 	 * run any timers that can be run now, at this point
82 	 * before calculating the idle duration etc.
83 	 */
84 	hrtimer_peek_ahead_timers();
85 #endif
86 
87 	/*
88 	 * Call the device's prepare function before calling the
89 	 * governor's select function.  ->prepare gives the device's
90 	 * cpuidle driver a chance to update any dynamic information
91 	 * of its cpuidle states for the current idle period, e.g.
92 	 * state availability, latencies, residencies, etc.
93 	 */
94 	if (dev->prepare)
95 		dev->prepare(dev);
96 
97 	/* ask the governor for the next state */
98 	next_state = cpuidle_curr_governor->select(dev);
99 	if (need_resched()) {
100 		local_irq_enable();
101 		return 0;
102 	}
103 
104 	target_state = &dev->states[next_state];
105 
106 	/* enter the state and update stats */
107 	dev->last_state = target_state;
108 
109 	trace_power_start(POWER_CSTATE, next_state, dev->cpu);
110 	trace_cpu_idle(next_state, dev->cpu);
111 
112 	dev->last_residency = target_state->enter(dev, target_state);
113 
114 	trace_power_end(dev->cpu);
115 	trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
116 
117 	if (dev->last_state)
118 		target_state = dev->last_state;
119 
120 	target_state->time += (unsigned long long)dev->last_residency;
121 	target_state->usage++;
122 
123 	/* give the governor an opportunity to reflect on the outcome */
124 	if (cpuidle_curr_governor->reflect)
125 		cpuidle_curr_governor->reflect(dev);
126 
127 	return 0;
128 }
129 
130 /**
131  * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
132  */
133 void cpuidle_install_idle_handler(void)
134 {
135 	if (enabled_devices) {
136 		/* Make sure all changes finished before we switch to new idle */
137 		smp_wmb();
138 		initialized = 1;
139 	}
140 }
141 
142 /**
143  * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
144  */
145 void cpuidle_uninstall_idle_handler(void)
146 {
147 	if (enabled_devices) {
148 		initialized = 0;
149 		cpuidle_kick_cpus();
150 	}
151 }
152 
153 /**
154  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
155  */
156 void cpuidle_pause_and_lock(void)
157 {
158 	mutex_lock(&cpuidle_lock);
159 	cpuidle_uninstall_idle_handler();
160 }
161 
162 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
163 
164 /**
165  * cpuidle_resume_and_unlock - resumes CPUIDLE operation
166  */
167 void cpuidle_resume_and_unlock(void)
168 {
169 	cpuidle_install_idle_handler();
170 	mutex_unlock(&cpuidle_lock);
171 }
172 
173 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
174 
175 #ifdef CONFIG_ARCH_HAS_CPU_RELAX
176 static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
177 {
178 	ktime_t	t1, t2;
179 	s64 diff;
180 	int ret;
181 
182 	t1 = ktime_get();
183 	local_irq_enable();
184 	while (!need_resched())
185 		cpu_relax();
186 
187 	t2 = ktime_get();
188 	diff = ktime_to_us(ktime_sub(t2, t1));
189 	if (diff > INT_MAX)
190 		diff = INT_MAX;
191 
192 	ret = (int) diff;
193 	return ret;
194 }
195 
196 static void poll_idle_init(struct cpuidle_device *dev)
197 {
198 	struct cpuidle_state *state = &dev->states[0];
199 
200 	cpuidle_set_statedata(state, NULL);
201 
202 	snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
203 	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
204 	state->exit_latency = 0;
205 	state->target_residency = 0;
206 	state->power_usage = -1;
207 	state->flags = 0;
208 	state->enter = poll_idle;
209 }
210 #else
211 static void poll_idle_init(struct cpuidle_device *dev) {}
212 #endif /* CONFIG_ARCH_HAS_CPU_RELAX */
213 
214 /**
215  * cpuidle_enable_device - enables idle PM for a CPU
216  * @dev: the CPU
217  *
218  * This function must be called between cpuidle_pause_and_lock and
219  * cpuidle_resume_and_unlock when used externally.
220  */
221 int cpuidle_enable_device(struct cpuidle_device *dev)
222 {
223 	int ret, i;
224 
225 	if (dev->enabled)
226 		return 0;
227 	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
228 		return -EIO;
229 	if (!dev->state_count)
230 		return -EINVAL;
231 
232 	if (dev->registered == 0) {
233 		ret = __cpuidle_register_device(dev);
234 		if (ret)
235 			return ret;
236 	}
237 
238 	poll_idle_init(dev);
239 
240 	if ((ret = cpuidle_add_state_sysfs(dev)))
241 		return ret;
242 
243 	if (cpuidle_curr_governor->enable &&
244 	    (ret = cpuidle_curr_governor->enable(dev)))
245 		goto fail_sysfs;
246 
247 	for (i = 0; i < dev->state_count; i++) {
248 		dev->states[i].usage = 0;
249 		dev->states[i].time = 0;
250 	}
251 	dev->last_residency = 0;
252 	dev->last_state = NULL;
253 
254 	smp_wmb();
255 
256 	dev->enabled = 1;
257 
258 	enabled_devices++;
259 	return 0;
260 
261 fail_sysfs:
262 	cpuidle_remove_state_sysfs(dev);
263 
264 	return ret;
265 }
266 
267 EXPORT_SYMBOL_GPL(cpuidle_enable_device);
268 
269 /**
270  * cpuidle_disable_device - disables idle PM for a CPU
271  * @dev: the CPU
272  *
273  * This function must be called between cpuidle_pause_and_lock and
274  * cpuidle_resume_and_unlock when used externally.
275  */
276 void cpuidle_disable_device(struct cpuidle_device *dev)
277 {
278 	if (!dev->enabled)
279 		return;
280 	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
281 		return;
282 
283 	dev->enabled = 0;
284 
285 	if (cpuidle_curr_governor->disable)
286 		cpuidle_curr_governor->disable(dev);
287 
288 	cpuidle_remove_state_sysfs(dev);
289 	enabled_devices--;
290 }
291 
292 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
293 
294 /**
295  * __cpuidle_register_device - internal register function called before register
296  * and enable routines
297  * @dev: the cpu
298  *
299  * cpuidle_lock mutex must be held before this is called
300  */
301 static int __cpuidle_register_device(struct cpuidle_device *dev)
302 {
303 	int ret;
304 	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
305 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
306 
307 	if (!sys_dev)
308 		return -EINVAL;
309 	if (!try_module_get(cpuidle_driver->owner))
310 		return -EINVAL;
311 
312 	init_completion(&dev->kobj_unregister);
313 
314 	/*
315 	 * cpuidle driver should set the dev->power_specified bit
316 	 * before registering the device if the driver provides
317 	 * power_usage numbers.
318 	 *
319 	 * For those devices whose ->power_specified is not set,
320 	 * we fill in power_usage with decreasing values as the
321 	 * cpuidle code has an implicit assumption that state Cn
322 	 * uses less power than C(n-1).
323 	 *
324 	 * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
325 	 * an power value of -1.  So we use -2, -3, etc, for other
326 	 * c-states.
327 	 */
328 	if (!dev->power_specified) {
329 		int i;
330 		for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++)
331 			dev->states[i].power_usage = -1 - i;
332 	}
333 
334 	per_cpu(cpuidle_devices, dev->cpu) = dev;
335 	list_add(&dev->device_list, &cpuidle_detected_devices);
336 	if ((ret = cpuidle_add_sysfs(sys_dev))) {
337 		module_put(cpuidle_driver->owner);
338 		return ret;
339 	}
340 
341 	dev->registered = 1;
342 	return 0;
343 }
344 
345 /**
346  * cpuidle_register_device - registers a CPU's idle PM feature
347  * @dev: the cpu
348  */
349 int cpuidle_register_device(struct cpuidle_device *dev)
350 {
351 	int ret;
352 
353 	mutex_lock(&cpuidle_lock);
354 
355 	if ((ret = __cpuidle_register_device(dev))) {
356 		mutex_unlock(&cpuidle_lock);
357 		return ret;
358 	}
359 
360 	cpuidle_enable_device(dev);
361 	cpuidle_install_idle_handler();
362 
363 	mutex_unlock(&cpuidle_lock);
364 
365 	return 0;
366 
367 }
368 
369 EXPORT_SYMBOL_GPL(cpuidle_register_device);
370 
371 /**
372  * cpuidle_unregister_device - unregisters a CPU's idle PM feature
373  * @dev: the cpu
374  */
375 void cpuidle_unregister_device(struct cpuidle_device *dev)
376 {
377 	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
378 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
379 
380 	if (dev->registered == 0)
381 		return;
382 
383 	cpuidle_pause_and_lock();
384 
385 	cpuidle_disable_device(dev);
386 
387 	cpuidle_remove_sysfs(sys_dev);
388 	list_del(&dev->device_list);
389 	wait_for_completion(&dev->kobj_unregister);
390 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
391 
392 	cpuidle_resume_and_unlock();
393 
394 	module_put(cpuidle_driver->owner);
395 }
396 
397 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
398 
399 #ifdef CONFIG_SMP
400 
401 static void smp_callback(void *v)
402 {
403 	/* we already woke the CPU up, nothing more to do */
404 }
405 
406 /*
407  * This function gets called when a part of the kernel has a new latency
408  * requirement.  This means we need to get all processors out of their C-state,
409  * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
410  * wakes them all right up.
411  */
412 static int cpuidle_latency_notify(struct notifier_block *b,
413 		unsigned long l, void *v)
414 {
415 	smp_call_function(smp_callback, NULL, 1);
416 	return NOTIFY_OK;
417 }
418 
419 static struct notifier_block cpuidle_latency_notifier = {
420 	.notifier_call = cpuidle_latency_notify,
421 };
422 
423 static inline void latency_notifier_init(struct notifier_block *n)
424 {
425 	pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
426 }
427 
428 #else /* CONFIG_SMP */
429 
430 #define latency_notifier_init(x) do { } while (0)
431 
432 #endif /* CONFIG_SMP */
433 
434 /**
435  * cpuidle_init - core initializer
436  */
437 static int __init cpuidle_init(void)
438 {
439 	int ret;
440 
441 	if (cpuidle_disabled())
442 		return -ENODEV;
443 
444 	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
445 	if (ret)
446 		return ret;
447 
448 	latency_notifier_init(&cpuidle_latency_notifier);
449 
450 	return 0;
451 }
452 
453 module_param(off, int, 0444);
454 core_initcall(cpuidle_init);
455