1 /* 2 * drivers/cpufreq/cpufreq_conservative.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * (C) 2004 Alexander Clouter <alex-kernel@digriz.org.uk> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/smp.h> 17 #include <linux/init.h> 18 #include <linux/interrupt.h> 19 #include <linux/ctype.h> 20 #include <linux/cpufreq.h> 21 #include <linux/sysctl.h> 22 #include <linux/types.h> 23 #include <linux/fs.h> 24 #include <linux/sysfs.h> 25 #include <linux/sched.h> 26 #include <linux/kmod.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/kernel_stat.h> 30 #include <linux/percpu.h> 31 #include <linux/mutex.h> 32 /* 33 * dbs is used in this file as a shortform for demandbased switching 34 * It helps to keep variable names smaller, simpler 35 */ 36 37 #define DEF_FREQUENCY_UP_THRESHOLD (80) 38 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 39 40 /* 41 * The polling frequency of this governor depends on the capability of 42 * the processor. Default polling frequency is 1000 times the transition 43 * latency of the processor. The governor will work on any processor with 44 * transition latency <= 10mS, using appropriate sampling 45 * rate. 46 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 47 * this governor will not work. 48 * All times here are in uS. 49 */ 50 static unsigned int def_sampling_rate; 51 #define MIN_SAMPLING_RATE_RATIO (2) 52 /* for correct statistics, we need at least 10 ticks between each measure */ 53 #define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) 54 #define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) 55 #define MAX_SAMPLING_RATE (500 * def_sampling_rate) 56 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 57 #define DEF_SAMPLING_DOWN_FACTOR (1) 58 #define MAX_SAMPLING_DOWN_FACTOR (10) 59 #define TRANSITION_LATENCY_LIMIT (10 * 1000) 60 61 static void do_dbs_timer(void *data); 62 63 struct cpu_dbs_info_s { 64 struct cpufreq_policy *cur_policy; 65 unsigned int prev_cpu_idle_up; 66 unsigned int prev_cpu_idle_down; 67 unsigned int enable; 68 unsigned int down_skip; 69 unsigned int requested_freq; 70 }; 71 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 72 73 static unsigned int dbs_enable; /* number of CPUs using this policy */ 74 75 static DEFINE_MUTEX (dbs_mutex); 76 static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); 77 78 struct dbs_tuners { 79 unsigned int sampling_rate; 80 unsigned int sampling_down_factor; 81 unsigned int up_threshold; 82 unsigned int down_threshold; 83 unsigned int ignore_nice; 84 unsigned int freq_step; 85 }; 86 87 static struct dbs_tuners dbs_tuners_ins = { 88 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 89 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 90 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 91 .ignore_nice = 0, 92 .freq_step = 5, 93 }; 94 95 static inline unsigned int get_cpu_idle_time(unsigned int cpu) 96 { 97 return kstat_cpu(cpu).cpustat.idle + 98 kstat_cpu(cpu).cpustat.iowait + 99 ( dbs_tuners_ins.ignore_nice ? 100 kstat_cpu(cpu).cpustat.nice : 101 0); 102 } 103 104 /************************** sysfs interface ************************/ 105 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 106 { 107 return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); 108 } 109 110 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 111 { 112 return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); 113 } 114 115 #define define_one_ro(_name) \ 116 static struct freq_attr _name = \ 117 __ATTR(_name, 0444, show_##_name, NULL) 118 119 define_one_ro(sampling_rate_max); 120 define_one_ro(sampling_rate_min); 121 122 /* cpufreq_conservative Governor Tunables */ 123 #define show_one(file_name, object) \ 124 static ssize_t show_##file_name \ 125 (struct cpufreq_policy *unused, char *buf) \ 126 { \ 127 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 128 } 129 show_one(sampling_rate, sampling_rate); 130 show_one(sampling_down_factor, sampling_down_factor); 131 show_one(up_threshold, up_threshold); 132 show_one(down_threshold, down_threshold); 133 show_one(ignore_nice_load, ignore_nice); 134 show_one(freq_step, freq_step); 135 136 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 137 const char *buf, size_t count) 138 { 139 unsigned int input; 140 int ret; 141 ret = sscanf (buf, "%u", &input); 142 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 143 return -EINVAL; 144 145 mutex_lock(&dbs_mutex); 146 dbs_tuners_ins.sampling_down_factor = input; 147 mutex_unlock(&dbs_mutex); 148 149 return count; 150 } 151 152 static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 153 const char *buf, size_t count) 154 { 155 unsigned int input; 156 int ret; 157 ret = sscanf (buf, "%u", &input); 158 159 mutex_lock(&dbs_mutex); 160 if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { 161 mutex_unlock(&dbs_mutex); 162 return -EINVAL; 163 } 164 165 dbs_tuners_ins.sampling_rate = input; 166 mutex_unlock(&dbs_mutex); 167 168 return count; 169 } 170 171 static ssize_t store_up_threshold(struct cpufreq_policy *unused, 172 const char *buf, size_t count) 173 { 174 unsigned int input; 175 int ret; 176 ret = sscanf (buf, "%u", &input); 177 178 mutex_lock(&dbs_mutex); 179 if (ret != 1 || input > 100 || input < 0 || 180 input <= dbs_tuners_ins.down_threshold) { 181 mutex_unlock(&dbs_mutex); 182 return -EINVAL; 183 } 184 185 dbs_tuners_ins.up_threshold = input; 186 mutex_unlock(&dbs_mutex); 187 188 return count; 189 } 190 191 static ssize_t store_down_threshold(struct cpufreq_policy *unused, 192 const char *buf, size_t count) 193 { 194 unsigned int input; 195 int ret; 196 ret = sscanf (buf, "%u", &input); 197 198 mutex_lock(&dbs_mutex); 199 if (ret != 1 || input > 100 || input < 0 || 200 input >= dbs_tuners_ins.up_threshold) { 201 mutex_unlock(&dbs_mutex); 202 return -EINVAL; 203 } 204 205 dbs_tuners_ins.down_threshold = input; 206 mutex_unlock(&dbs_mutex); 207 208 return count; 209 } 210 211 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, 212 const char *buf, size_t count) 213 { 214 unsigned int input; 215 int ret; 216 217 unsigned int j; 218 219 ret = sscanf (buf, "%u", &input); 220 if ( ret != 1 ) 221 return -EINVAL; 222 223 if ( input > 1 ) 224 input = 1; 225 226 mutex_lock(&dbs_mutex); 227 if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ 228 mutex_unlock(&dbs_mutex); 229 return count; 230 } 231 dbs_tuners_ins.ignore_nice = input; 232 233 /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ 234 for_each_online_cpu(j) { 235 struct cpu_dbs_info_s *j_dbs_info; 236 j_dbs_info = &per_cpu(cpu_dbs_info, j); 237 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 238 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; 239 } 240 mutex_unlock(&dbs_mutex); 241 242 return count; 243 } 244 245 static ssize_t store_freq_step(struct cpufreq_policy *policy, 246 const char *buf, size_t count) 247 { 248 unsigned int input; 249 int ret; 250 251 ret = sscanf (buf, "%u", &input); 252 253 if ( ret != 1 ) 254 return -EINVAL; 255 256 if ( input > 100 ) 257 input = 100; 258 259 /* no need to test here if freq_step is zero as the user might actually 260 * want this, they would be crazy though :) */ 261 mutex_lock(&dbs_mutex); 262 dbs_tuners_ins.freq_step = input; 263 mutex_unlock(&dbs_mutex); 264 265 return count; 266 } 267 268 #define define_one_rw(_name) \ 269 static struct freq_attr _name = \ 270 __ATTR(_name, 0644, show_##_name, store_##_name) 271 272 define_one_rw(sampling_rate); 273 define_one_rw(sampling_down_factor); 274 define_one_rw(up_threshold); 275 define_one_rw(down_threshold); 276 define_one_rw(ignore_nice_load); 277 define_one_rw(freq_step); 278 279 static struct attribute * dbs_attributes[] = { 280 &sampling_rate_max.attr, 281 &sampling_rate_min.attr, 282 &sampling_rate.attr, 283 &sampling_down_factor.attr, 284 &up_threshold.attr, 285 &down_threshold.attr, 286 &ignore_nice_load.attr, 287 &freq_step.attr, 288 NULL 289 }; 290 291 static struct attribute_group dbs_attr_group = { 292 .attrs = dbs_attributes, 293 .name = "conservative", 294 }; 295 296 /************************** sysfs end ************************/ 297 298 static void dbs_check_cpu(int cpu) 299 { 300 unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; 301 unsigned int tmp_idle_ticks, total_idle_ticks; 302 unsigned int freq_step; 303 unsigned int freq_down_sampling_rate; 304 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 305 struct cpufreq_policy *policy; 306 307 if (!this_dbs_info->enable) 308 return; 309 310 policy = this_dbs_info->cur_policy; 311 312 /* 313 * The default safe range is 20% to 80% 314 * Every sampling_rate, we check 315 * - If current idle time is less than 20%, then we try to 316 * increase frequency 317 * Every sampling_rate*sampling_down_factor, we check 318 * - If current idle time is more than 80%, then we try to 319 * decrease frequency 320 * 321 * Any frequency increase takes it to the maximum frequency. 322 * Frequency reduction happens at minimum steps of 323 * 5% (default) of max_frequency 324 */ 325 326 /* Check for frequency increase */ 327 idle_ticks = UINT_MAX; 328 329 /* Check for frequency increase */ 330 total_idle_ticks = get_cpu_idle_time(cpu); 331 tmp_idle_ticks = total_idle_ticks - 332 this_dbs_info->prev_cpu_idle_up; 333 this_dbs_info->prev_cpu_idle_up = total_idle_ticks; 334 335 if (tmp_idle_ticks < idle_ticks) 336 idle_ticks = tmp_idle_ticks; 337 338 /* Scale idle ticks by 100 and compare with up and down ticks */ 339 idle_ticks *= 100; 340 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * 341 usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 342 343 if (idle_ticks < up_idle_ticks) { 344 this_dbs_info->down_skip = 0; 345 this_dbs_info->prev_cpu_idle_down = 346 this_dbs_info->prev_cpu_idle_up; 347 348 /* if we are already at full speed then break out early */ 349 if (this_dbs_info->requested_freq == policy->max) 350 return; 351 352 freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; 353 354 /* max freq cannot be less than 100. But who knows.... */ 355 if (unlikely(freq_step == 0)) 356 freq_step = 5; 357 358 this_dbs_info->requested_freq += freq_step; 359 if (this_dbs_info->requested_freq > policy->max) 360 this_dbs_info->requested_freq = policy->max; 361 362 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 363 CPUFREQ_RELATION_H); 364 return; 365 } 366 367 /* Check for frequency decrease */ 368 this_dbs_info->down_skip++; 369 if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor) 370 return; 371 372 /* Check for frequency decrease */ 373 total_idle_ticks = this_dbs_info->prev_cpu_idle_up; 374 tmp_idle_ticks = total_idle_ticks - 375 this_dbs_info->prev_cpu_idle_down; 376 this_dbs_info->prev_cpu_idle_down = total_idle_ticks; 377 378 if (tmp_idle_ticks < idle_ticks) 379 idle_ticks = tmp_idle_ticks; 380 381 /* Scale idle ticks by 100 and compare with up and down ticks */ 382 idle_ticks *= 100; 383 this_dbs_info->down_skip = 0; 384 385 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * 386 dbs_tuners_ins.sampling_down_factor; 387 down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * 388 usecs_to_jiffies(freq_down_sampling_rate); 389 390 if (idle_ticks > down_idle_ticks) { 391 /* 392 * if we are already at the lowest speed then break out early 393 * or if we 'cannot' reduce the speed as the user might want 394 * freq_step to be zero 395 */ 396 if (this_dbs_info->requested_freq == policy->min 397 || dbs_tuners_ins.freq_step == 0) 398 return; 399 400 freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; 401 402 /* max freq cannot be less than 100. But who knows.... */ 403 if (unlikely(freq_step == 0)) 404 freq_step = 5; 405 406 this_dbs_info->requested_freq -= freq_step; 407 if (this_dbs_info->requested_freq < policy->min) 408 this_dbs_info->requested_freq = policy->min; 409 410 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 411 CPUFREQ_RELATION_H); 412 return; 413 } 414 } 415 416 static void do_dbs_timer(void *data) 417 { 418 int i; 419 mutex_lock(&dbs_mutex); 420 for_each_online_cpu(i) 421 dbs_check_cpu(i); 422 schedule_delayed_work(&dbs_work, 423 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 424 mutex_unlock(&dbs_mutex); 425 } 426 427 static inline void dbs_timer_init(void) 428 { 429 INIT_WORK(&dbs_work, do_dbs_timer, NULL); 430 schedule_delayed_work(&dbs_work, 431 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 432 return; 433 } 434 435 static inline void dbs_timer_exit(void) 436 { 437 cancel_delayed_work(&dbs_work); 438 return; 439 } 440 441 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 442 unsigned int event) 443 { 444 unsigned int cpu = policy->cpu; 445 struct cpu_dbs_info_s *this_dbs_info; 446 unsigned int j; 447 448 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 449 450 switch (event) { 451 case CPUFREQ_GOV_START: 452 if ((!cpu_online(cpu)) || 453 (!policy->cur)) 454 return -EINVAL; 455 456 if (policy->cpuinfo.transition_latency > 457 (TRANSITION_LATENCY_LIMIT * 1000)) 458 return -EINVAL; 459 if (this_dbs_info->enable) /* Already enabled */ 460 break; 461 462 mutex_lock(&dbs_mutex); 463 for_each_cpu_mask(j, policy->cpus) { 464 struct cpu_dbs_info_s *j_dbs_info; 465 j_dbs_info = &per_cpu(cpu_dbs_info, j); 466 j_dbs_info->cur_policy = policy; 467 468 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); 469 j_dbs_info->prev_cpu_idle_down 470 = j_dbs_info->prev_cpu_idle_up; 471 } 472 this_dbs_info->enable = 1; 473 this_dbs_info->down_skip = 0; 474 this_dbs_info->requested_freq = policy->cur; 475 sysfs_create_group(&policy->kobj, &dbs_attr_group); 476 dbs_enable++; 477 /* 478 * Start the timerschedule work, when this governor 479 * is used for first time 480 */ 481 if (dbs_enable == 1) { 482 unsigned int latency; 483 /* policy latency is in nS. Convert it to uS first */ 484 latency = policy->cpuinfo.transition_latency / 1000; 485 if (latency == 0) 486 latency = 1; 487 488 def_sampling_rate = 10 * latency * 489 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 490 491 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) 492 def_sampling_rate = MIN_STAT_SAMPLING_RATE; 493 494 dbs_tuners_ins.sampling_rate = def_sampling_rate; 495 496 dbs_timer_init(); 497 } 498 499 mutex_unlock(&dbs_mutex); 500 break; 501 502 case CPUFREQ_GOV_STOP: 503 mutex_lock(&dbs_mutex); 504 this_dbs_info->enable = 0; 505 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 506 dbs_enable--; 507 /* 508 * Stop the timerschedule work, when this governor 509 * is used for first time 510 */ 511 if (dbs_enable == 0) 512 dbs_timer_exit(); 513 514 mutex_unlock(&dbs_mutex); 515 516 break; 517 518 case CPUFREQ_GOV_LIMITS: 519 mutex_lock(&dbs_mutex); 520 if (policy->max < this_dbs_info->cur_policy->cur) 521 __cpufreq_driver_target( 522 this_dbs_info->cur_policy, 523 policy->max, CPUFREQ_RELATION_H); 524 else if (policy->min > this_dbs_info->cur_policy->cur) 525 __cpufreq_driver_target( 526 this_dbs_info->cur_policy, 527 policy->min, CPUFREQ_RELATION_L); 528 mutex_unlock(&dbs_mutex); 529 break; 530 } 531 return 0; 532 } 533 534 static struct cpufreq_governor cpufreq_gov_dbs = { 535 .name = "conservative", 536 .governor = cpufreq_governor_dbs, 537 .owner = THIS_MODULE, 538 }; 539 540 static int __init cpufreq_gov_dbs_init(void) 541 { 542 return cpufreq_register_governor(&cpufreq_gov_dbs); 543 } 544 545 static void __exit cpufreq_gov_dbs_exit(void) 546 { 547 /* Make sure that the scheduled work is indeed not running */ 548 flush_scheduled_work(); 549 550 cpufreq_unregister_governor(&cpufreq_gov_dbs); 551 } 552 553 554 MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>"); 555 MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for " 556 "Low Latency Frequency Transition capable processors " 557 "optimised for use in a battery environment"); 558 MODULE_LICENSE ("GPL"); 559 560 module_init(cpufreq_gov_dbs_init); 561 module_exit(cpufreq_gov_dbs_exit); 562