xref: /linux/drivers/cpufreq/cpufreq_ondemand.c (revision 9ce7677cfd7cd871adb457c80bea3b581b839641)
1 /*
2  *  drivers/cpufreq/cpufreq_ondemand.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/smp.h>
16 #include <linux/init.h>
17 #include <linux/interrupt.h>
18 #include <linux/ctype.h>
19 #include <linux/cpufreq.h>
20 #include <linux/sysctl.h>
21 #include <linux/types.h>
22 #include <linux/fs.h>
23 #include <linux/sysfs.h>
24 #include <linux/sched.h>
25 #include <linux/kmod.h>
26 #include <linux/workqueue.h>
27 #include <linux/jiffies.h>
28 #include <linux/kernel_stat.h>
29 #include <linux/percpu.h>
30 
31 /*
32  * dbs is used in this file as a shortform for demandbased switching
33  * It helps to keep variable names smaller, simpler
34  */
35 
36 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
37 #define MIN_FREQUENCY_UP_THRESHOLD		(11)
38 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
39 
40 /*
41  * The polling frequency of this governor depends on the capability of
42  * the processor. Default polling frequency is 1000 times the transition
43  * latency of the processor. The governor will work on any processor with
44  * transition latency <= 10mS, using appropriate sampling
45  * rate.
46  * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
47  * this governor will not work.
48  * All times here are in uS.
49  */
50 static unsigned int 				def_sampling_rate;
51 #define MIN_SAMPLING_RATE_RATIO			(2)
52 /* for correct statistics, we need at least 10 ticks between each measure */
53 #define MIN_STAT_SAMPLING_RATE			(MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
54 #define MIN_SAMPLING_RATE			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
55 #define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
56 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
57 #define DEF_SAMPLING_DOWN_FACTOR		(1)
58 #define MAX_SAMPLING_DOWN_FACTOR		(10)
59 #define TRANSITION_LATENCY_LIMIT		(10 * 1000)
60 
61 static void do_dbs_timer(void *data);
62 
63 struct cpu_dbs_info_s {
64 	struct cpufreq_policy 	*cur_policy;
65 	unsigned int 		prev_cpu_idle_up;
66 	unsigned int 		prev_cpu_idle_down;
67 	unsigned int 		enable;
68 };
69 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
70 
71 static unsigned int dbs_enable;	/* number of CPUs using this policy */
72 
73 static DECLARE_MUTEX 	(dbs_sem);
74 static DECLARE_WORK	(dbs_work, do_dbs_timer, NULL);
75 
76 struct dbs_tuners {
77 	unsigned int 		sampling_rate;
78 	unsigned int		sampling_down_factor;
79 	unsigned int		up_threshold;
80 	unsigned int		ignore_nice;
81 };
82 
83 static struct dbs_tuners dbs_tuners_ins = {
84 	.up_threshold 		= DEF_FREQUENCY_UP_THRESHOLD,
85 	.sampling_down_factor 	= DEF_SAMPLING_DOWN_FACTOR,
86 };
87 
88 static inline unsigned int get_cpu_idle_time(unsigned int cpu)
89 {
90 	return	kstat_cpu(cpu).cpustat.idle +
91 		kstat_cpu(cpu).cpustat.iowait +
92 		( !dbs_tuners_ins.ignore_nice ?
93 		  kstat_cpu(cpu).cpustat.nice :
94 		  0);
95 }
96 
97 /************************** sysfs interface ************************/
98 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
99 {
100 	return sprintf (buf, "%u\n", MAX_SAMPLING_RATE);
101 }
102 
103 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
104 {
105 	return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
106 }
107 
108 #define define_one_ro(_name) 					\
109 static struct freq_attr _name =  				\
110 __ATTR(_name, 0444, show_##_name, NULL)
111 
112 define_one_ro(sampling_rate_max);
113 define_one_ro(sampling_rate_min);
114 
115 /* cpufreq_ondemand Governor Tunables */
116 #define show_one(file_name, object)					\
117 static ssize_t show_##file_name						\
118 (struct cpufreq_policy *unused, char *buf)				\
119 {									\
120 	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
121 }
122 show_one(sampling_rate, sampling_rate);
123 show_one(sampling_down_factor, sampling_down_factor);
124 show_one(up_threshold, up_threshold);
125 show_one(ignore_nice, ignore_nice);
126 
127 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
128 		const char *buf, size_t count)
129 {
130 	unsigned int input;
131 	int ret;
132 	ret = sscanf (buf, "%u", &input);
133 	if (ret != 1 )
134 		return -EINVAL;
135 
136 	if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
137 		return -EINVAL;
138 
139 	down(&dbs_sem);
140 	dbs_tuners_ins.sampling_down_factor = input;
141 	up(&dbs_sem);
142 
143 	return count;
144 }
145 
146 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
147 		const char *buf, size_t count)
148 {
149 	unsigned int input;
150 	int ret;
151 	ret = sscanf (buf, "%u", &input);
152 
153 	down(&dbs_sem);
154 	if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) {
155 		up(&dbs_sem);
156 		return -EINVAL;
157 	}
158 
159 	dbs_tuners_ins.sampling_rate = input;
160 	up(&dbs_sem);
161 
162 	return count;
163 }
164 
165 static ssize_t store_up_threshold(struct cpufreq_policy *unused,
166 		const char *buf, size_t count)
167 {
168 	unsigned int input;
169 	int ret;
170 	ret = sscanf (buf, "%u", &input);
171 
172 	down(&dbs_sem);
173 	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
174 			input < MIN_FREQUENCY_UP_THRESHOLD) {
175 		up(&dbs_sem);
176 		return -EINVAL;
177 	}
178 
179 	dbs_tuners_ins.up_threshold = input;
180 	up(&dbs_sem);
181 
182 	return count;
183 }
184 
185 static ssize_t store_ignore_nice(struct cpufreq_policy *policy,
186 		const char *buf, size_t count)
187 {
188 	unsigned int input;
189 	int ret;
190 
191 	unsigned int j;
192 
193 	ret = sscanf (buf, "%u", &input);
194 	if ( ret != 1 )
195 		return -EINVAL;
196 
197 	if ( input > 1 )
198 		input = 1;
199 
200 	down(&dbs_sem);
201 	if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
202 		up(&dbs_sem);
203 		return count;
204 	}
205 	dbs_tuners_ins.ignore_nice = input;
206 
207 	/* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
208 	for_each_online_cpu(j) {
209 		struct cpu_dbs_info_s *j_dbs_info;
210 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
211 		j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
212 		j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
213 	}
214 	up(&dbs_sem);
215 
216 	return count;
217 }
218 
219 #define define_one_rw(_name) \
220 static struct freq_attr _name = \
221 __ATTR(_name, 0644, show_##_name, store_##_name)
222 
223 define_one_rw(sampling_rate);
224 define_one_rw(sampling_down_factor);
225 define_one_rw(up_threshold);
226 define_one_rw(ignore_nice);
227 
228 static struct attribute * dbs_attributes[] = {
229 	&sampling_rate_max.attr,
230 	&sampling_rate_min.attr,
231 	&sampling_rate.attr,
232 	&sampling_down_factor.attr,
233 	&up_threshold.attr,
234 	&ignore_nice.attr,
235 	NULL
236 };
237 
238 static struct attribute_group dbs_attr_group = {
239 	.attrs = dbs_attributes,
240 	.name = "ondemand",
241 };
242 
243 /************************** sysfs end ************************/
244 
245 static void dbs_check_cpu(int cpu)
246 {
247 	unsigned int idle_ticks, up_idle_ticks, total_ticks;
248 	unsigned int freq_next;
249 	unsigned int freq_down_sampling_rate;
250 	static int down_skip[NR_CPUS];
251 	struct cpu_dbs_info_s *this_dbs_info;
252 
253 	struct cpufreq_policy *policy;
254 	unsigned int j;
255 
256 	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
257 	if (!this_dbs_info->enable)
258 		return;
259 
260 	policy = this_dbs_info->cur_policy;
261 	/*
262 	 * Every sampling_rate, we check, if current idle time is less
263 	 * than 20% (default), then we try to increase frequency
264 	 * Every sampling_rate*sampling_down_factor, we look for a the lowest
265 	 * frequency which can sustain the load while keeping idle time over
266 	 * 30%. If such a frequency exist, we try to decrease to this frequency.
267 	 *
268 	 * Any frequency increase takes it to the maximum frequency.
269 	 * Frequency reduction happens at minimum steps of
270 	 * 5% (default) of current frequency
271 	 */
272 
273 	/* Check for frequency increase */
274 	idle_ticks = UINT_MAX;
275 	for_each_cpu_mask(j, policy->cpus) {
276 		unsigned int tmp_idle_ticks, total_idle_ticks;
277 		struct cpu_dbs_info_s *j_dbs_info;
278 
279 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
280 		total_idle_ticks = get_cpu_idle_time(j);
281 		tmp_idle_ticks = total_idle_ticks -
282 			j_dbs_info->prev_cpu_idle_up;
283 		j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
284 
285 		if (tmp_idle_ticks < idle_ticks)
286 			idle_ticks = tmp_idle_ticks;
287 	}
288 
289 	/* Scale idle ticks by 100 and compare with up and down ticks */
290 	idle_ticks *= 100;
291 	up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
292 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
293 
294 	if (idle_ticks < up_idle_ticks) {
295 		down_skip[cpu] = 0;
296 		for_each_cpu_mask(j, policy->cpus) {
297 			struct cpu_dbs_info_s *j_dbs_info;
298 
299 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
300 			j_dbs_info->prev_cpu_idle_down =
301 					j_dbs_info->prev_cpu_idle_up;
302 		}
303 		/* if we are already at full speed then break out early */
304 		if (policy->cur == policy->max)
305 			return;
306 
307 		__cpufreq_driver_target(policy, policy->max,
308 			CPUFREQ_RELATION_H);
309 		return;
310 	}
311 
312 	/* Check for frequency decrease */
313 	down_skip[cpu]++;
314 	if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
315 		return;
316 
317 	idle_ticks = UINT_MAX;
318 	for_each_cpu_mask(j, policy->cpus) {
319 		unsigned int tmp_idle_ticks, total_idle_ticks;
320 		struct cpu_dbs_info_s *j_dbs_info;
321 
322 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
323 		/* Check for frequency decrease */
324 		total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
325 		tmp_idle_ticks = total_idle_ticks -
326 			j_dbs_info->prev_cpu_idle_down;
327 		j_dbs_info->prev_cpu_idle_down = total_idle_ticks;
328 
329 		if (tmp_idle_ticks < idle_ticks)
330 			idle_ticks = tmp_idle_ticks;
331 	}
332 
333 	down_skip[cpu] = 0;
334 	/* if we cannot reduce the frequency anymore, break out early */
335 	if (policy->cur == policy->min)
336 		return;
337 
338 	/* Compute how many ticks there are between two measurements */
339 	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
340 		dbs_tuners_ins.sampling_down_factor;
341 	total_ticks = usecs_to_jiffies(freq_down_sampling_rate);
342 
343 	/*
344 	 * The optimal frequency is the frequency that is the lowest that
345 	 * can support the current CPU usage without triggering the up
346 	 * policy. To be safe, we focus 10 points under the threshold.
347 	 */
348 	freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks;
349 	freq_next = (freq_next * policy->cur) /
350 			(dbs_tuners_ins.up_threshold - 10);
351 
352 	if (freq_next <= ((policy->cur * 95) / 100))
353 		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
354 }
355 
356 static void do_dbs_timer(void *data)
357 {
358 	int i;
359 	down(&dbs_sem);
360 	for_each_online_cpu(i)
361 		dbs_check_cpu(i);
362 	schedule_delayed_work(&dbs_work,
363 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
364 	up(&dbs_sem);
365 }
366 
367 static inline void dbs_timer_init(void)
368 {
369 	INIT_WORK(&dbs_work, do_dbs_timer, NULL);
370 	schedule_delayed_work(&dbs_work,
371 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
372 	return;
373 }
374 
375 static inline void dbs_timer_exit(void)
376 {
377 	cancel_delayed_work(&dbs_work);
378 	return;
379 }
380 
381 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
382 				   unsigned int event)
383 {
384 	unsigned int cpu = policy->cpu;
385 	struct cpu_dbs_info_s *this_dbs_info;
386 	unsigned int j;
387 
388 	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
389 
390 	switch (event) {
391 	case CPUFREQ_GOV_START:
392 		if ((!cpu_online(cpu)) ||
393 		    (!policy->cur))
394 			return -EINVAL;
395 
396 		if (policy->cpuinfo.transition_latency >
397 				(TRANSITION_LATENCY_LIMIT * 1000))
398 			return -EINVAL;
399 		if (this_dbs_info->enable) /* Already enabled */
400 			break;
401 
402 		down(&dbs_sem);
403 		for_each_cpu_mask(j, policy->cpus) {
404 			struct cpu_dbs_info_s *j_dbs_info;
405 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
406 			j_dbs_info->cur_policy = policy;
407 
408 			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
409 			j_dbs_info->prev_cpu_idle_down
410 				= j_dbs_info->prev_cpu_idle_up;
411 		}
412 		this_dbs_info->enable = 1;
413 		sysfs_create_group(&policy->kobj, &dbs_attr_group);
414 		dbs_enable++;
415 		/*
416 		 * Start the timerschedule work, when this governor
417 		 * is used for first time
418 		 */
419 		if (dbs_enable == 1) {
420 			unsigned int latency;
421 			/* policy latency is in nS. Convert it to uS first */
422 			latency = policy->cpuinfo.transition_latency / 1000;
423 			if (latency == 0)
424 				latency = 1;
425 
426 			def_sampling_rate = latency *
427 					DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
428 
429 			if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
430 				def_sampling_rate = MIN_STAT_SAMPLING_RATE;
431 
432 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
433 			dbs_tuners_ins.ignore_nice = 0;
434 
435 			dbs_timer_init();
436 		}
437 
438 		up(&dbs_sem);
439 		break;
440 
441 	case CPUFREQ_GOV_STOP:
442 		down(&dbs_sem);
443 		this_dbs_info->enable = 0;
444 		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
445 		dbs_enable--;
446 		/*
447 		 * Stop the timerschedule work, when this governor
448 		 * is used for first time
449 		 */
450 		if (dbs_enable == 0)
451 			dbs_timer_exit();
452 
453 		up(&dbs_sem);
454 
455 		break;
456 
457 	case CPUFREQ_GOV_LIMITS:
458 		down(&dbs_sem);
459 		if (policy->max < this_dbs_info->cur_policy->cur)
460 			__cpufreq_driver_target(
461 					this_dbs_info->cur_policy,
462 				       	policy->max, CPUFREQ_RELATION_H);
463 		else if (policy->min > this_dbs_info->cur_policy->cur)
464 			__cpufreq_driver_target(
465 					this_dbs_info->cur_policy,
466 				       	policy->min, CPUFREQ_RELATION_L);
467 		up(&dbs_sem);
468 		break;
469 	}
470 	return 0;
471 }
472 
473 static struct cpufreq_governor cpufreq_gov_dbs = {
474 	.name		= "ondemand",
475 	.governor	= cpufreq_governor_dbs,
476 	.owner		= THIS_MODULE,
477 };
478 
479 static int __init cpufreq_gov_dbs_init(void)
480 {
481 	return cpufreq_register_governor(&cpufreq_gov_dbs);
482 }
483 
484 static void __exit cpufreq_gov_dbs_exit(void)
485 {
486 	/* Make sure that the scheduled work is indeed not running */
487 	flush_scheduled_work();
488 
489 	cpufreq_unregister_governor(&cpufreq_gov_dbs);
490 }
491 
492 
493 MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
494 MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for "
495 		"Low Latency Frequency Transition capable processors");
496 MODULE_LICENSE ("GPL");
497 
498 module_init(cpufreq_gov_dbs_init);
499 module_exit(cpufreq_gov_dbs_exit);
500