xref: /linux/drivers/cpuidle/cpuidle.c (revision b9ccfda293ee6fca9a89a1584f0900e0627b975e)
1 /*
2  * cpuidle.c - core cpuidle infrastructure
3  *
4  * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5  *               Shaohua Li <shaohua.li@intel.com>
6  *               Adam Belay <abelay@novell.com>
7  *
8  * This code is licenced under the GPL.
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/mutex.h>
13 #include <linux/sched.h>
14 #include <linux/notifier.h>
15 #include <linux/pm_qos.h>
16 #include <linux/cpu.h>
17 #include <linux/cpuidle.h>
18 #include <linux/ktime.h>
19 #include <linux/hrtimer.h>
20 #include <linux/module.h>
21 #include <trace/events/power.h>
22 
23 #include "cpuidle.h"
24 
25 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
26 
27 DEFINE_MUTEX(cpuidle_lock);
28 LIST_HEAD(cpuidle_detected_devices);
29 
30 static int enabled_devices;
31 static int off __read_mostly;
32 static int initialized __read_mostly;
33 
34 int cpuidle_disabled(void)
35 {
36 	return off;
37 }
38 void disable_cpuidle(void)
39 {
40 	off = 1;
41 }
42 
43 static int __cpuidle_register_device(struct cpuidle_device *dev);
44 
45 static inline int cpuidle_enter(struct cpuidle_device *dev,
46 				struct cpuidle_driver *drv, int index)
47 {
48 	struct cpuidle_state *target_state = &drv->states[index];
49 	return target_state->enter(dev, drv, index);
50 }
51 
52 static inline int cpuidle_enter_tk(struct cpuidle_device *dev,
53 			       struct cpuidle_driver *drv, int index)
54 {
55 	return cpuidle_wrap_enter(dev, drv, index, cpuidle_enter);
56 }
57 
58 typedef int (*cpuidle_enter_t)(struct cpuidle_device *dev,
59 			       struct cpuidle_driver *drv, int index);
60 
61 static cpuidle_enter_t cpuidle_enter_ops;
62 
63 /**
64  * cpuidle_play_dead - cpu off-lining
65  *
66  * Returns in case of an error or no driver
67  */
68 int cpuidle_play_dead(void)
69 {
70 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
71 	struct cpuidle_driver *drv = cpuidle_get_driver();
72 	int i, dead_state = -1;
73 	int power_usage = -1;
74 
75 	if (!drv)
76 		return -ENODEV;
77 
78 	/* Find lowest-power state that supports long-term idle */
79 	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
80 		struct cpuidle_state *s = &drv->states[i];
81 
82 		if (s->power_usage < power_usage && s->enter_dead) {
83 			power_usage = s->power_usage;
84 			dead_state = i;
85 		}
86 	}
87 
88 	if (dead_state != -1)
89 		return drv->states[dead_state].enter_dead(dev, dead_state);
90 
91 	return -ENODEV;
92 }
93 
94 /**
95  * cpuidle_idle_call - the main idle loop
96  *
97  * NOTE: no locks or semaphores should be used here
98  * return non-zero on failure
99  */
100 int cpuidle_idle_call(void)
101 {
102 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
103 	struct cpuidle_driver *drv = cpuidle_get_driver();
104 	int next_state, entered_state;
105 
106 	if (off)
107 		return -ENODEV;
108 
109 	if (!initialized)
110 		return -ENODEV;
111 
112 	/* check if the device is ready */
113 	if (!dev || !dev->enabled)
114 		return -EBUSY;
115 
116 #if 0
117 	/* shows regressions, re-enable for 2.6.29 */
118 	/*
119 	 * run any timers that can be run now, at this point
120 	 * before calculating the idle duration etc.
121 	 */
122 	hrtimer_peek_ahead_timers();
123 #endif
124 
125 	/* ask the governor for the next state */
126 	next_state = cpuidle_curr_governor->select(drv, dev);
127 	if (need_resched()) {
128 		local_irq_enable();
129 		return 0;
130 	}
131 
132 	trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
133 	trace_cpu_idle_rcuidle(next_state, dev->cpu);
134 
135 	entered_state = cpuidle_enter_ops(dev, drv, next_state);
136 
137 	trace_power_end_rcuidle(dev->cpu);
138 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
139 
140 	if (entered_state >= 0) {
141 		/* Update cpuidle counters */
142 		/* This can be moved to within driver enter routine
143 		 * but that results in multiple copies of same code.
144 		 */
145 		dev->states_usage[entered_state].time +=
146 				(unsigned long long)dev->last_residency;
147 		dev->states_usage[entered_state].usage++;
148 	} else {
149 		dev->last_residency = 0;
150 	}
151 
152 	/* give the governor an opportunity to reflect on the outcome */
153 	if (cpuidle_curr_governor->reflect)
154 		cpuidle_curr_governor->reflect(dev, entered_state);
155 
156 	return 0;
157 }
158 
159 /**
160  * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
161  */
162 void cpuidle_install_idle_handler(void)
163 {
164 	if (enabled_devices) {
165 		/* Make sure all changes finished before we switch to new idle */
166 		smp_wmb();
167 		initialized = 1;
168 	}
169 }
170 
171 /**
172  * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
173  */
174 void cpuidle_uninstall_idle_handler(void)
175 {
176 	if (enabled_devices) {
177 		initialized = 0;
178 		kick_all_cpus_sync();
179 	}
180 }
181 
182 /**
183  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
184  */
185 void cpuidle_pause_and_lock(void)
186 {
187 	mutex_lock(&cpuidle_lock);
188 	cpuidle_uninstall_idle_handler();
189 }
190 
191 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
192 
193 /**
194  * cpuidle_resume_and_unlock - resumes CPUIDLE operation
195  */
196 void cpuidle_resume_and_unlock(void)
197 {
198 	cpuidle_install_idle_handler();
199 	mutex_unlock(&cpuidle_lock);
200 }
201 
202 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
203 
204 /* Currently used in suspend/resume path to suspend cpuidle */
205 void cpuidle_pause(void)
206 {
207 	mutex_lock(&cpuidle_lock);
208 	cpuidle_uninstall_idle_handler();
209 	mutex_unlock(&cpuidle_lock);
210 }
211 
212 /* Currently used in suspend/resume path to resume cpuidle */
213 void cpuidle_resume(void)
214 {
215 	mutex_lock(&cpuidle_lock);
216 	cpuidle_install_idle_handler();
217 	mutex_unlock(&cpuidle_lock);
218 }
219 
220 /**
221  * cpuidle_wrap_enter - performs timekeeping and irqen around enter function
222  * @dev: pointer to a valid cpuidle_device object
223  * @drv: pointer to a valid cpuidle_driver object
224  * @index: index of the target cpuidle state.
225  */
226 int cpuidle_wrap_enter(struct cpuidle_device *dev,
227 				struct cpuidle_driver *drv, int index,
228 				int (*enter)(struct cpuidle_device *dev,
229 					struct cpuidle_driver *drv, int index))
230 {
231 	ktime_t time_start, time_end;
232 	s64 diff;
233 
234 	time_start = ktime_get();
235 
236 	index = enter(dev, drv, index);
237 
238 	time_end = ktime_get();
239 
240 	local_irq_enable();
241 
242 	diff = ktime_to_us(ktime_sub(time_end, time_start));
243 	if (diff > INT_MAX)
244 		diff = INT_MAX;
245 
246 	dev->last_residency = (int) diff;
247 
248 	return index;
249 }
250 
251 #ifdef CONFIG_ARCH_HAS_CPU_RELAX
252 static int poll_idle(struct cpuidle_device *dev,
253 		struct cpuidle_driver *drv, int index)
254 {
255 	ktime_t	t1, t2;
256 	s64 diff;
257 
258 	t1 = ktime_get();
259 	local_irq_enable();
260 	while (!need_resched())
261 		cpu_relax();
262 
263 	t2 = ktime_get();
264 	diff = ktime_to_us(ktime_sub(t2, t1));
265 	if (diff > INT_MAX)
266 		diff = INT_MAX;
267 
268 	dev->last_residency = (int) diff;
269 
270 	return index;
271 }
272 
273 static void poll_idle_init(struct cpuidle_driver *drv)
274 {
275 	struct cpuidle_state *state = &drv->states[0];
276 
277 	snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
278 	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
279 	state->exit_latency = 0;
280 	state->target_residency = 0;
281 	state->power_usage = -1;
282 	state->flags = 0;
283 	state->enter = poll_idle;
284 	state->disabled = false;
285 }
286 #else
287 static void poll_idle_init(struct cpuidle_driver *drv) {}
288 #endif /* CONFIG_ARCH_HAS_CPU_RELAX */
289 
290 /**
291  * cpuidle_enable_device - enables idle PM for a CPU
292  * @dev: the CPU
293  *
294  * This function must be called between cpuidle_pause_and_lock and
295  * cpuidle_resume_and_unlock when used externally.
296  */
297 int cpuidle_enable_device(struct cpuidle_device *dev)
298 {
299 	int ret, i;
300 	struct cpuidle_driver *drv = cpuidle_get_driver();
301 
302 	if (dev->enabled)
303 		return 0;
304 	if (!drv || !cpuidle_curr_governor)
305 		return -EIO;
306 	if (!dev->state_count)
307 		dev->state_count = drv->state_count;
308 
309 	if (dev->registered == 0) {
310 		ret = __cpuidle_register_device(dev);
311 		if (ret)
312 			return ret;
313 	}
314 
315 	cpuidle_enter_ops = drv->en_core_tk_irqen ?
316 		cpuidle_enter_tk : cpuidle_enter;
317 
318 	poll_idle_init(drv);
319 
320 	if ((ret = cpuidle_add_state_sysfs(dev)))
321 		return ret;
322 
323 	if (cpuidle_curr_governor->enable &&
324 	    (ret = cpuidle_curr_governor->enable(drv, dev)))
325 		goto fail_sysfs;
326 
327 	for (i = 0; i < dev->state_count; i++) {
328 		dev->states_usage[i].usage = 0;
329 		dev->states_usage[i].time = 0;
330 	}
331 	dev->last_residency = 0;
332 
333 	smp_wmb();
334 
335 	dev->enabled = 1;
336 
337 	enabled_devices++;
338 	return 0;
339 
340 fail_sysfs:
341 	cpuidle_remove_state_sysfs(dev);
342 
343 	return ret;
344 }
345 
346 EXPORT_SYMBOL_GPL(cpuidle_enable_device);
347 
348 /**
349  * cpuidle_disable_device - disables idle PM for a CPU
350  * @dev: the CPU
351  *
352  * This function must be called between cpuidle_pause_and_lock and
353  * cpuidle_resume_and_unlock when used externally.
354  */
355 void cpuidle_disable_device(struct cpuidle_device *dev)
356 {
357 	if (!dev->enabled)
358 		return;
359 	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
360 		return;
361 
362 	dev->enabled = 0;
363 
364 	if (cpuidle_curr_governor->disable)
365 		cpuidle_curr_governor->disable(cpuidle_get_driver(), dev);
366 
367 	cpuidle_remove_state_sysfs(dev);
368 	enabled_devices--;
369 }
370 
371 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
372 
373 /**
374  * __cpuidle_register_device - internal register function called before register
375  * and enable routines
376  * @dev: the cpu
377  *
378  * cpuidle_lock mutex must be held before this is called
379  */
380 static int __cpuidle_register_device(struct cpuidle_device *dev)
381 {
382 	int ret;
383 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
384 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
385 
386 	if (!dev)
387 		return -EINVAL;
388 	if (!try_module_get(cpuidle_driver->owner))
389 		return -EINVAL;
390 
391 	init_completion(&dev->kobj_unregister);
392 
393 	per_cpu(cpuidle_devices, dev->cpu) = dev;
394 	list_add(&dev->device_list, &cpuidle_detected_devices);
395 	if ((ret = cpuidle_add_sysfs(cpu_dev))) {
396 		module_put(cpuidle_driver->owner);
397 		return ret;
398 	}
399 
400 	dev->registered = 1;
401 	return 0;
402 }
403 
404 /**
405  * cpuidle_register_device - registers a CPU's idle PM feature
406  * @dev: the cpu
407  */
408 int cpuidle_register_device(struct cpuidle_device *dev)
409 {
410 	int ret;
411 
412 	mutex_lock(&cpuidle_lock);
413 
414 	if ((ret = __cpuidle_register_device(dev))) {
415 		mutex_unlock(&cpuidle_lock);
416 		return ret;
417 	}
418 
419 	cpuidle_enable_device(dev);
420 	cpuidle_install_idle_handler();
421 
422 	mutex_unlock(&cpuidle_lock);
423 
424 	return 0;
425 
426 }
427 
428 EXPORT_SYMBOL_GPL(cpuidle_register_device);
429 
430 /**
431  * cpuidle_unregister_device - unregisters a CPU's idle PM feature
432  * @dev: the cpu
433  */
434 void cpuidle_unregister_device(struct cpuidle_device *dev)
435 {
436 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
437 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
438 
439 	if (dev->registered == 0)
440 		return;
441 
442 	cpuidle_pause_and_lock();
443 
444 	cpuidle_disable_device(dev);
445 
446 	cpuidle_remove_sysfs(cpu_dev);
447 	list_del(&dev->device_list);
448 	wait_for_completion(&dev->kobj_unregister);
449 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
450 
451 	cpuidle_resume_and_unlock();
452 
453 	module_put(cpuidle_driver->owner);
454 }
455 
456 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
457 
458 #ifdef CONFIG_SMP
459 
460 static void smp_callback(void *v)
461 {
462 	/* we already woke the CPU up, nothing more to do */
463 }
464 
465 /*
466  * This function gets called when a part of the kernel has a new latency
467  * requirement.  This means we need to get all processors out of their C-state,
468  * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
469  * wakes them all right up.
470  */
471 static int cpuidle_latency_notify(struct notifier_block *b,
472 		unsigned long l, void *v)
473 {
474 	smp_call_function(smp_callback, NULL, 1);
475 	return NOTIFY_OK;
476 }
477 
478 static struct notifier_block cpuidle_latency_notifier = {
479 	.notifier_call = cpuidle_latency_notify,
480 };
481 
482 static inline void latency_notifier_init(struct notifier_block *n)
483 {
484 	pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
485 }
486 
487 #else /* CONFIG_SMP */
488 
489 #define latency_notifier_init(x) do { } while (0)
490 
491 #endif /* CONFIG_SMP */
492 
493 /**
494  * cpuidle_init - core initializer
495  */
496 static int __init cpuidle_init(void)
497 {
498 	int ret;
499 
500 	if (cpuidle_disabled())
501 		return -ENODEV;
502 
503 	ret = cpuidle_add_interface(cpu_subsys.dev_root);
504 	if (ret)
505 		return ret;
506 
507 	latency_notifier_init(&cpuidle_latency_notifier);
508 
509 	return 0;
510 }
511 
512 module_param(off, int, 0444);
513 core_initcall(cpuidle_init);
514