1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/smp.h> 16 #include <linux/init.h> 17 #include <linux/interrupt.h> 18 #include <linux/ctype.h> 19 #include <linux/cpufreq.h> 20 #include <linux/sysctl.h> 21 #include <linux/types.h> 22 #include <linux/fs.h> 23 #include <linux/sysfs.h> 24 #include <linux/sched.h> 25 #include <linux/kmod.h> 26 #include <linux/workqueue.h> 27 #include <linux/jiffies.h> 28 #include <linux/kernel_stat.h> 29 #include <linux/percpu.h> 30 #include <linux/mutex.h> 31 32 /* 33 * dbs is used in this file as a shortform for demandbased switching 34 * It helps to keep variable names smaller, simpler 35 */ 36 37 #define DEF_FREQUENCY_UP_THRESHOLD (80) 38 #define MIN_FREQUENCY_UP_THRESHOLD (11) 39 #define MAX_FREQUENCY_UP_THRESHOLD (100) 40 41 /* 42 * The polling frequency of this governor depends on the capability of 43 * the processor. Default polling frequency is 1000 times the transition 44 * latency of the processor. The governor will work on any processor with 45 * transition latency <= 10mS, using appropriate sampling 46 * rate. 47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 48 * this governor will not work. 49 * All times here are in uS. 50 */ 51 static unsigned int def_sampling_rate; 52 #define MIN_SAMPLING_RATE_RATIO (2) 53 /* for correct statistics, we need at least 10 ticks between each measure */ 54 #define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) 55 #define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) 56 #define MAX_SAMPLING_RATE (500 * def_sampling_rate) 57 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 58 #define DEF_SAMPLING_DOWN_FACTOR (1) 59 #define MAX_SAMPLING_DOWN_FACTOR (10) 60 #define TRANSITION_LATENCY_LIMIT (10 * 1000) 61 62 static void do_dbs_timer(void *data); 63 64 struct cpu_dbs_info_s { 65 struct cpufreq_policy *cur_policy; 66 unsigned int prev_cpu_idle_up; 67 unsigned int prev_cpu_idle_down; 68 unsigned int enable; 69 }; 70 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 71 72 static unsigned int dbs_enable; /* number of CPUs using this policy */ 73 74 static DEFINE_MUTEX (dbs_mutex); 75 static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); 76 77 struct dbs_tuners { 78 unsigned int sampling_rate; 79 unsigned int sampling_down_factor; 80 unsigned int up_threshold; 81 unsigned int ignore_nice; 82 }; 83 84 static struct dbs_tuners dbs_tuners_ins = { 85 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 86 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 87 .ignore_nice = 0, 88 }; 89 90 static inline unsigned int get_cpu_idle_time(unsigned int cpu) 91 { 92 return kstat_cpu(cpu).cpustat.idle + 93 kstat_cpu(cpu).cpustat.iowait + 94 ( dbs_tuners_ins.ignore_nice ? 95 kstat_cpu(cpu).cpustat.nice : 96 0); 97 } 98 99 /************************** sysfs interface ************************/ 100 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 101 { 102 return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); 103 } 104 105 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 106 { 107 return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); 108 } 109 110 #define define_one_ro(_name) \ 111 static struct freq_attr _name = \ 112 __ATTR(_name, 0444, show_##_name, NULL) 113 114 define_one_ro(sampling_rate_max); 115 define_one_ro(sampling_rate_min); 116 117 /* cpufreq_ondemand Governor Tunables */ 118 #define show_one(file_name, object) \ 119 static ssize_t show_##file_name \ 120 (struct cpufreq_policy *unused, char *buf) \ 121 { \ 122 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 123 } 124 show_one(sampling_rate, sampling_rate); 125 show_one(sampling_down_factor, sampling_down_factor); 126 show_one(up_threshold, up_threshold); 127 show_one(ignore_nice_load, ignore_nice); 128 129 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 130 const char *buf, size_t count) 131 { 132 unsigned int input; 133 int ret; 134 ret = sscanf (buf, "%u", &input); 135 if (ret != 1 ) 136 return -EINVAL; 137 138 if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 139 return -EINVAL; 140 141 mutex_lock(&dbs_mutex); 142 dbs_tuners_ins.sampling_down_factor = input; 143 mutex_unlock(&dbs_mutex); 144 145 return count; 146 } 147 148 static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 149 const char *buf, size_t count) 150 { 151 unsigned int input; 152 int ret; 153 ret = sscanf (buf, "%u", &input); 154 155 mutex_lock(&dbs_mutex); 156 if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { 157 mutex_unlock(&dbs_mutex); 158 return -EINVAL; 159 } 160 161 dbs_tuners_ins.sampling_rate = input; 162 mutex_unlock(&dbs_mutex); 163 164 return count; 165 } 166 167 static ssize_t store_up_threshold(struct cpufreq_policy *unused, 168 const char *buf, size_t count) 169 { 170 unsigned int input; 171 int ret; 172 ret = sscanf (buf, "%u", &input); 173 174 mutex_lock(&dbs_mutex); 175 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 176 input < MIN_FREQUENCY_UP_THRESHOLD) { 177 mutex_unlock(&dbs_mutex); 178 return -EINVAL; 179 } 180 181 dbs_tuners_ins.up_threshold = input; 182 mutex_unlock(&dbs_mutex); 183 184 return count; 185 } 186 187 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, 188 const char *buf, size_t count) 189 { 190 unsigned int input; 191 int ret; 192 193 unsigned int j; 194 195 ret = sscanf (buf, "%u", &input); 196 if ( ret != 1 ) 197 return -EINVAL; 198 199 if ( input > 1 ) 200 input = 1; 201 202 mutex_lock(&dbs_mutex); 203 if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ 204 mutex_unlock(&dbs_mutex); 205 return count; 206 } 207 dbs_tuners_ins.ignore_nice = input; 208 209 /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ 210 for_each_online_cpu(j) { 211 struct cpu_dbs_info_s *j_dbs_info; 212 j_dbs_info = &per_cpu(cpu_dbs_info, j); 213 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 214 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; 215 } 216 mutex_unlock(&dbs_mutex); 217 218 return count; 219 } 220 221 #define define_one_rw(_name) \ 222 static struct freq_attr _name = \ 223 __ATTR(_name, 0644, show_##_name, store_##_name) 224 225 define_one_rw(sampling_rate); 226 define_one_rw(sampling_down_factor); 227 define_one_rw(up_threshold); 228 define_one_rw(ignore_nice_load); 229 230 static struct attribute * dbs_attributes[] = { 231 &sampling_rate_max.attr, 232 &sampling_rate_min.attr, 233 &sampling_rate.attr, 234 &sampling_down_factor.attr, 235 &up_threshold.attr, 236 &ignore_nice_load.attr, 237 NULL 238 }; 239 240 static struct attribute_group dbs_attr_group = { 241 .attrs = dbs_attributes, 242 .name = "ondemand", 243 }; 244 245 /************************** sysfs end ************************/ 246 247 static void dbs_check_cpu(int cpu) 248 { 249 unsigned int idle_ticks, up_idle_ticks, total_ticks; 250 unsigned int freq_next; 251 unsigned int freq_down_sampling_rate; 252 static int down_skip[NR_CPUS]; 253 struct cpu_dbs_info_s *this_dbs_info; 254 255 struct cpufreq_policy *policy; 256 unsigned int j; 257 258 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 259 if (!this_dbs_info->enable) 260 return; 261 262 policy = this_dbs_info->cur_policy; 263 /* 264 * Every sampling_rate, we check, if current idle time is less 265 * than 20% (default), then we try to increase frequency 266 * Every sampling_rate*sampling_down_factor, we look for a the lowest 267 * frequency which can sustain the load while keeping idle time over 268 * 30%. If such a frequency exist, we try to decrease to this frequency. 269 * 270 * Any frequency increase takes it to the maximum frequency. 271 * Frequency reduction happens at minimum steps of 272 * 5% (default) of current frequency 273 */ 274 275 /* Check for frequency increase */ 276 idle_ticks = UINT_MAX; 277 for_each_cpu_mask(j, policy->cpus) { 278 unsigned int tmp_idle_ticks, total_idle_ticks; 279 struct cpu_dbs_info_s *j_dbs_info; 280 281 j_dbs_info = &per_cpu(cpu_dbs_info, j); 282 total_idle_ticks = get_cpu_idle_time(j); 283 tmp_idle_ticks = total_idle_ticks - 284 j_dbs_info->prev_cpu_idle_up; 285 j_dbs_info->prev_cpu_idle_up = total_idle_ticks; 286 287 if (tmp_idle_ticks < idle_ticks) 288 idle_ticks = tmp_idle_ticks; 289 } 290 291 /* Scale idle ticks by 100 and compare with up and down ticks */ 292 idle_ticks *= 100; 293 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * 294 usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 295 296 if (idle_ticks < up_idle_ticks) { 297 down_skip[cpu] = 0; 298 for_each_cpu_mask(j, policy->cpus) { 299 struct cpu_dbs_info_s *j_dbs_info; 300 301 j_dbs_info = &per_cpu(cpu_dbs_info, j); 302 j_dbs_info->prev_cpu_idle_down = 303 j_dbs_info->prev_cpu_idle_up; 304 } 305 /* if we are already at full speed then break out early */ 306 if (policy->cur == policy->max) 307 return; 308 309 __cpufreq_driver_target(policy, policy->max, 310 CPUFREQ_RELATION_H); 311 return; 312 } 313 314 /* Check for frequency decrease */ 315 down_skip[cpu]++; 316 if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) 317 return; 318 319 idle_ticks = UINT_MAX; 320 for_each_cpu_mask(j, policy->cpus) { 321 unsigned int tmp_idle_ticks, total_idle_ticks; 322 struct cpu_dbs_info_s *j_dbs_info; 323 324 j_dbs_info = &per_cpu(cpu_dbs_info, j); 325 /* Check for frequency decrease */ 326 total_idle_ticks = j_dbs_info->prev_cpu_idle_up; 327 tmp_idle_ticks = total_idle_ticks - 328 j_dbs_info->prev_cpu_idle_down; 329 j_dbs_info->prev_cpu_idle_down = total_idle_ticks; 330 331 if (tmp_idle_ticks < idle_ticks) 332 idle_ticks = tmp_idle_ticks; 333 } 334 335 down_skip[cpu] = 0; 336 /* if we cannot reduce the frequency anymore, break out early */ 337 if (policy->cur == policy->min) 338 return; 339 340 /* Compute how many ticks there are between two measurements */ 341 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * 342 dbs_tuners_ins.sampling_down_factor; 343 total_ticks = usecs_to_jiffies(freq_down_sampling_rate); 344 345 /* 346 * The optimal frequency is the frequency that is the lowest that 347 * can support the current CPU usage without triggering the up 348 * policy. To be safe, we focus 10 points under the threshold. 349 */ 350 freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks; 351 freq_next = (freq_next * policy->cur) / 352 (dbs_tuners_ins.up_threshold - 10); 353 354 if (freq_next < policy->min) 355 freq_next = policy->min; 356 357 if (freq_next <= ((policy->cur * 95) / 100)) 358 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); 359 } 360 361 static void do_dbs_timer(void *data) 362 { 363 int i; 364 mutex_lock(&dbs_mutex); 365 for_each_online_cpu(i) 366 dbs_check_cpu(i); 367 schedule_delayed_work(&dbs_work, 368 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 369 mutex_unlock(&dbs_mutex); 370 } 371 372 static inline void dbs_timer_init(void) 373 { 374 INIT_WORK(&dbs_work, do_dbs_timer, NULL); 375 schedule_delayed_work(&dbs_work, 376 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 377 return; 378 } 379 380 static inline void dbs_timer_exit(void) 381 { 382 cancel_delayed_work(&dbs_work); 383 return; 384 } 385 386 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 387 unsigned int event) 388 { 389 unsigned int cpu = policy->cpu; 390 struct cpu_dbs_info_s *this_dbs_info; 391 unsigned int j; 392 393 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 394 395 switch (event) { 396 case CPUFREQ_GOV_START: 397 if ((!cpu_online(cpu)) || 398 (!policy->cur)) 399 return -EINVAL; 400 401 if (policy->cpuinfo.transition_latency > 402 (TRANSITION_LATENCY_LIMIT * 1000)) { 403 printk(KERN_WARNING "ondemand governor failed to load " 404 "due to too long transition latency\n"); 405 return -EINVAL; 406 } 407 if (this_dbs_info->enable) /* Already enabled */ 408 break; 409 410 mutex_lock(&dbs_mutex); 411 for_each_cpu_mask(j, policy->cpus) { 412 struct cpu_dbs_info_s *j_dbs_info; 413 j_dbs_info = &per_cpu(cpu_dbs_info, j); 414 j_dbs_info->cur_policy = policy; 415 416 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 417 j_dbs_info->prev_cpu_idle_down 418 = j_dbs_info->prev_cpu_idle_up; 419 } 420 this_dbs_info->enable = 1; 421 sysfs_create_group(&policy->kobj, &dbs_attr_group); 422 dbs_enable++; 423 /* 424 * Start the timerschedule work, when this governor 425 * is used for first time 426 */ 427 if (dbs_enable == 1) { 428 unsigned int latency; 429 /* policy latency is in nS. Convert it to uS first */ 430 latency = policy->cpuinfo.transition_latency / 1000; 431 if (latency == 0) 432 latency = 1; 433 434 def_sampling_rate = latency * 435 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 436 437 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) 438 def_sampling_rate = MIN_STAT_SAMPLING_RATE; 439 440 dbs_tuners_ins.sampling_rate = def_sampling_rate; 441 dbs_timer_init(); 442 } 443 444 mutex_unlock(&dbs_mutex); 445 break; 446 447 case CPUFREQ_GOV_STOP: 448 mutex_lock(&dbs_mutex); 449 this_dbs_info->enable = 0; 450 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 451 dbs_enable--; 452 /* 453 * Stop the timerschedule work, when this governor 454 * is used for first time 455 */ 456 if (dbs_enable == 0) 457 dbs_timer_exit(); 458 459 mutex_unlock(&dbs_mutex); 460 461 break; 462 463 case CPUFREQ_GOV_LIMITS: 464 mutex_lock(&dbs_mutex); 465 if (policy->max < this_dbs_info->cur_policy->cur) 466 __cpufreq_driver_target( 467 this_dbs_info->cur_policy, 468 policy->max, CPUFREQ_RELATION_H); 469 else if (policy->min > this_dbs_info->cur_policy->cur) 470 __cpufreq_driver_target( 471 this_dbs_info->cur_policy, 472 policy->min, CPUFREQ_RELATION_L); 473 mutex_unlock(&dbs_mutex); 474 break; 475 } 476 return 0; 477 } 478 479 static struct cpufreq_governor cpufreq_gov_dbs = { 480 .name = "ondemand", 481 .governor = cpufreq_governor_dbs, 482 .owner = THIS_MODULE, 483 }; 484 485 static int __init cpufreq_gov_dbs_init(void) 486 { 487 return cpufreq_register_governor(&cpufreq_gov_dbs); 488 } 489 490 static void __exit cpufreq_gov_dbs_exit(void) 491 { 492 /* Make sure that the scheduled work is indeed not running */ 493 flush_scheduled_work(); 494 495 cpufreq_unregister_governor(&cpufreq_gov_dbs); 496 } 497 498 499 MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 500 MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for " 501 "Low Latency Frequency Transition capable processors"); 502 MODULE_LICENSE ("GPL"); 503 504 module_init(cpufreq_gov_dbs_init); 505 module_exit(cpufreq_gov_dbs_exit); 506