1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/smp.h> 16 #include <linux/init.h> 17 #include <linux/interrupt.h> 18 #include <linux/ctype.h> 19 #include <linux/cpufreq.h> 20 #include <linux/sysctl.h> 21 #include <linux/types.h> 22 #include <linux/fs.h> 23 #include <linux/sysfs.h> 24 #include <linux/sched.h> 25 #include <linux/kmod.h> 26 #include <linux/workqueue.h> 27 #include <linux/jiffies.h> 28 #include <linux/kernel_stat.h> 29 #include <linux/percpu.h> 30 31 /* 32 * dbs is used in this file as a shortform for demandbased switching 33 * It helps to keep variable names smaller, simpler 34 */ 35 36 #define DEF_FREQUENCY_UP_THRESHOLD (80) 37 #define MIN_FREQUENCY_UP_THRESHOLD (11) 38 #define MAX_FREQUENCY_UP_THRESHOLD (100) 39 40 /* 41 * The polling frequency of this governor depends on the capability of 42 * the processor. Default polling frequency is 1000 times the transition 43 * latency of the processor. The governor will work on any processor with 44 * transition latency <= 10mS, using appropriate sampling 45 * rate. 46 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 47 * this governor will not work. 48 * All times here are in uS. 49 */ 50 static unsigned int def_sampling_rate; 51 #define MIN_SAMPLING_RATE_RATIO (2) 52 /* for correct statistics, we need at least 10 ticks between each measure */ 53 #define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) 54 #define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) 55 #define MAX_SAMPLING_RATE (500 * def_sampling_rate) 56 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 57 #define DEF_SAMPLING_DOWN_FACTOR (1) 58 #define MAX_SAMPLING_DOWN_FACTOR (10) 59 #define TRANSITION_LATENCY_LIMIT (10 * 1000) 60 61 static void do_dbs_timer(void *data); 62 63 struct cpu_dbs_info_s { 64 struct cpufreq_policy *cur_policy; 65 unsigned int prev_cpu_idle_up; 66 unsigned int prev_cpu_idle_down; 67 unsigned int enable; 68 }; 69 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 70 71 static unsigned int dbs_enable; /* number of CPUs using this policy */ 72 73 static DECLARE_MUTEX (dbs_sem); 74 static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); 75 76 struct dbs_tuners { 77 unsigned int sampling_rate; 78 unsigned int sampling_down_factor; 79 unsigned int up_threshold; 80 unsigned int ignore_nice; 81 }; 82 83 static struct dbs_tuners dbs_tuners_ins = { 84 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 85 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 86 }; 87 88 static inline unsigned int get_cpu_idle_time(unsigned int cpu) 89 { 90 return kstat_cpu(cpu).cpustat.idle + 91 kstat_cpu(cpu).cpustat.iowait + 92 ( !dbs_tuners_ins.ignore_nice ? 93 kstat_cpu(cpu).cpustat.nice : 94 0); 95 } 96 97 /************************** sysfs interface ************************/ 98 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 99 { 100 return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); 101 } 102 103 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 104 { 105 return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); 106 } 107 108 #define define_one_ro(_name) \ 109 static struct freq_attr _name = \ 110 __ATTR(_name, 0444, show_##_name, NULL) 111 112 define_one_ro(sampling_rate_max); 113 define_one_ro(sampling_rate_min); 114 115 /* cpufreq_ondemand Governor Tunables */ 116 #define show_one(file_name, object) \ 117 static ssize_t show_##file_name \ 118 (struct cpufreq_policy *unused, char *buf) \ 119 { \ 120 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 121 } 122 show_one(sampling_rate, sampling_rate); 123 show_one(sampling_down_factor, sampling_down_factor); 124 show_one(up_threshold, up_threshold); 125 show_one(ignore_nice, ignore_nice); 126 127 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 128 const char *buf, size_t count) 129 { 130 unsigned int input; 131 int ret; 132 ret = sscanf (buf, "%u", &input); 133 if (ret != 1 ) 134 return -EINVAL; 135 136 if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 137 return -EINVAL; 138 139 down(&dbs_sem); 140 dbs_tuners_ins.sampling_down_factor = input; 141 up(&dbs_sem); 142 143 return count; 144 } 145 146 static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 147 const char *buf, size_t count) 148 { 149 unsigned int input; 150 int ret; 151 ret = sscanf (buf, "%u", &input); 152 153 down(&dbs_sem); 154 if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { 155 up(&dbs_sem); 156 return -EINVAL; 157 } 158 159 dbs_tuners_ins.sampling_rate = input; 160 up(&dbs_sem); 161 162 return count; 163 } 164 165 static ssize_t store_up_threshold(struct cpufreq_policy *unused, 166 const char *buf, size_t count) 167 { 168 unsigned int input; 169 int ret; 170 ret = sscanf (buf, "%u", &input); 171 172 down(&dbs_sem); 173 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 174 input < MIN_FREQUENCY_UP_THRESHOLD) { 175 up(&dbs_sem); 176 return -EINVAL; 177 } 178 179 dbs_tuners_ins.up_threshold = input; 180 up(&dbs_sem); 181 182 return count; 183 } 184 185 static ssize_t store_ignore_nice(struct cpufreq_policy *policy, 186 const char *buf, size_t count) 187 { 188 unsigned int input; 189 int ret; 190 191 unsigned int j; 192 193 ret = sscanf (buf, "%u", &input); 194 if ( ret != 1 ) 195 return -EINVAL; 196 197 if ( input > 1 ) 198 input = 1; 199 200 down(&dbs_sem); 201 if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ 202 up(&dbs_sem); 203 return count; 204 } 205 dbs_tuners_ins.ignore_nice = input; 206 207 /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ 208 for_each_online_cpu(j) { 209 struct cpu_dbs_info_s *j_dbs_info; 210 j_dbs_info = &per_cpu(cpu_dbs_info, j); 211 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 212 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; 213 } 214 up(&dbs_sem); 215 216 return count; 217 } 218 219 #define define_one_rw(_name) \ 220 static struct freq_attr _name = \ 221 __ATTR(_name, 0644, show_##_name, store_##_name) 222 223 define_one_rw(sampling_rate); 224 define_one_rw(sampling_down_factor); 225 define_one_rw(up_threshold); 226 define_one_rw(ignore_nice); 227 228 static struct attribute * dbs_attributes[] = { 229 &sampling_rate_max.attr, 230 &sampling_rate_min.attr, 231 &sampling_rate.attr, 232 &sampling_down_factor.attr, 233 &up_threshold.attr, 234 &ignore_nice.attr, 235 NULL 236 }; 237 238 static struct attribute_group dbs_attr_group = { 239 .attrs = dbs_attributes, 240 .name = "ondemand", 241 }; 242 243 /************************** sysfs end ************************/ 244 245 static void dbs_check_cpu(int cpu) 246 { 247 unsigned int idle_ticks, up_idle_ticks, total_ticks; 248 unsigned int freq_next; 249 unsigned int freq_down_sampling_rate; 250 static int down_skip[NR_CPUS]; 251 struct cpu_dbs_info_s *this_dbs_info; 252 253 struct cpufreq_policy *policy; 254 unsigned int j; 255 256 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 257 if (!this_dbs_info->enable) 258 return; 259 260 policy = this_dbs_info->cur_policy; 261 /* 262 * Every sampling_rate, we check, if current idle time is less 263 * than 20% (default), then we try to increase frequency 264 * Every sampling_rate*sampling_down_factor, we look for a the lowest 265 * frequency which can sustain the load while keeping idle time over 266 * 30%. If such a frequency exist, we try to decrease to this frequency. 267 * 268 * Any frequency increase takes it to the maximum frequency. 269 * Frequency reduction happens at minimum steps of 270 * 5% (default) of current frequency 271 */ 272 273 /* Check for frequency increase */ 274 idle_ticks = UINT_MAX; 275 for_each_cpu_mask(j, policy->cpus) { 276 unsigned int tmp_idle_ticks, total_idle_ticks; 277 struct cpu_dbs_info_s *j_dbs_info; 278 279 j_dbs_info = &per_cpu(cpu_dbs_info, j); 280 total_idle_ticks = get_cpu_idle_time(j); 281 tmp_idle_ticks = total_idle_ticks - 282 j_dbs_info->prev_cpu_idle_up; 283 j_dbs_info->prev_cpu_idle_up = total_idle_ticks; 284 285 if (tmp_idle_ticks < idle_ticks) 286 idle_ticks = tmp_idle_ticks; 287 } 288 289 /* Scale idle ticks by 100 and compare with up and down ticks */ 290 idle_ticks *= 100; 291 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * 292 usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 293 294 if (idle_ticks < up_idle_ticks) { 295 down_skip[cpu] = 0; 296 for_each_cpu_mask(j, policy->cpus) { 297 struct cpu_dbs_info_s *j_dbs_info; 298 299 j_dbs_info = &per_cpu(cpu_dbs_info, j); 300 j_dbs_info->prev_cpu_idle_down = 301 j_dbs_info->prev_cpu_idle_up; 302 } 303 /* if we are already at full speed then break out early */ 304 if (policy->cur == policy->max) 305 return; 306 307 __cpufreq_driver_target(policy, policy->max, 308 CPUFREQ_RELATION_H); 309 return; 310 } 311 312 /* Check for frequency decrease */ 313 down_skip[cpu]++; 314 if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) 315 return; 316 317 idle_ticks = UINT_MAX; 318 for_each_cpu_mask(j, policy->cpus) { 319 unsigned int tmp_idle_ticks, total_idle_ticks; 320 struct cpu_dbs_info_s *j_dbs_info; 321 322 j_dbs_info = &per_cpu(cpu_dbs_info, j); 323 /* Check for frequency decrease */ 324 total_idle_ticks = j_dbs_info->prev_cpu_idle_up; 325 tmp_idle_ticks = total_idle_ticks - 326 j_dbs_info->prev_cpu_idle_down; 327 j_dbs_info->prev_cpu_idle_down = total_idle_ticks; 328 329 if (tmp_idle_ticks < idle_ticks) 330 idle_ticks = tmp_idle_ticks; 331 } 332 333 down_skip[cpu] = 0; 334 /* if we cannot reduce the frequency anymore, break out early */ 335 if (policy->cur == policy->min) 336 return; 337 338 /* Compute how many ticks there are between two measurements */ 339 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * 340 dbs_tuners_ins.sampling_down_factor; 341 total_ticks = usecs_to_jiffies(freq_down_sampling_rate); 342 343 /* 344 * The optimal frequency is the frequency that is the lowest that 345 * can support the current CPU usage without triggering the up 346 * policy. To be safe, we focus 10 points under the threshold. 347 */ 348 freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks; 349 freq_next = (freq_next * policy->cur) / 350 (dbs_tuners_ins.up_threshold - 10); 351 352 if (freq_next <= ((policy->cur * 95) / 100)) 353 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); 354 } 355 356 static void do_dbs_timer(void *data) 357 { 358 int i; 359 down(&dbs_sem); 360 for_each_online_cpu(i) 361 dbs_check_cpu(i); 362 schedule_delayed_work(&dbs_work, 363 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 364 up(&dbs_sem); 365 } 366 367 static inline void dbs_timer_init(void) 368 { 369 INIT_WORK(&dbs_work, do_dbs_timer, NULL); 370 schedule_delayed_work(&dbs_work, 371 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 372 return; 373 } 374 375 static inline void dbs_timer_exit(void) 376 { 377 cancel_delayed_work(&dbs_work); 378 return; 379 } 380 381 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 382 unsigned int event) 383 { 384 unsigned int cpu = policy->cpu; 385 struct cpu_dbs_info_s *this_dbs_info; 386 unsigned int j; 387 388 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 389 390 switch (event) { 391 case CPUFREQ_GOV_START: 392 if ((!cpu_online(cpu)) || 393 (!policy->cur)) 394 return -EINVAL; 395 396 if (policy->cpuinfo.transition_latency > 397 (TRANSITION_LATENCY_LIMIT * 1000)) 398 return -EINVAL; 399 if (this_dbs_info->enable) /* Already enabled */ 400 break; 401 402 down(&dbs_sem); 403 for_each_cpu_mask(j, policy->cpus) { 404 struct cpu_dbs_info_s *j_dbs_info; 405 j_dbs_info = &per_cpu(cpu_dbs_info, j); 406 j_dbs_info->cur_policy = policy; 407 408 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 409 j_dbs_info->prev_cpu_idle_down 410 = j_dbs_info->prev_cpu_idle_up; 411 } 412 this_dbs_info->enable = 1; 413 sysfs_create_group(&policy->kobj, &dbs_attr_group); 414 dbs_enable++; 415 /* 416 * Start the timerschedule work, when this governor 417 * is used for first time 418 */ 419 if (dbs_enable == 1) { 420 unsigned int latency; 421 /* policy latency is in nS. Convert it to uS first */ 422 latency = policy->cpuinfo.transition_latency / 1000; 423 if (latency == 0) 424 latency = 1; 425 426 def_sampling_rate = latency * 427 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 428 429 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) 430 def_sampling_rate = MIN_STAT_SAMPLING_RATE; 431 432 dbs_tuners_ins.sampling_rate = def_sampling_rate; 433 dbs_tuners_ins.ignore_nice = 0; 434 435 dbs_timer_init(); 436 } 437 438 up(&dbs_sem); 439 break; 440 441 case CPUFREQ_GOV_STOP: 442 down(&dbs_sem); 443 this_dbs_info->enable = 0; 444 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 445 dbs_enable--; 446 /* 447 * Stop the timerschedule work, when this governor 448 * is used for first time 449 */ 450 if (dbs_enable == 0) 451 dbs_timer_exit(); 452 453 up(&dbs_sem); 454 455 break; 456 457 case CPUFREQ_GOV_LIMITS: 458 down(&dbs_sem); 459 if (policy->max < this_dbs_info->cur_policy->cur) 460 __cpufreq_driver_target( 461 this_dbs_info->cur_policy, 462 policy->max, CPUFREQ_RELATION_H); 463 else if (policy->min > this_dbs_info->cur_policy->cur) 464 __cpufreq_driver_target( 465 this_dbs_info->cur_policy, 466 policy->min, CPUFREQ_RELATION_L); 467 up(&dbs_sem); 468 break; 469 } 470 return 0; 471 } 472 473 static struct cpufreq_governor cpufreq_gov_dbs = { 474 .name = "ondemand", 475 .governor = cpufreq_governor_dbs, 476 .owner = THIS_MODULE, 477 }; 478 479 static int __init cpufreq_gov_dbs_init(void) 480 { 481 return cpufreq_register_governor(&cpufreq_gov_dbs); 482 } 483 484 static void __exit cpufreq_gov_dbs_exit(void) 485 { 486 /* Make sure that the scheduled work is indeed not running */ 487 flush_scheduled_work(); 488 489 cpufreq_unregister_governor(&cpufreq_gov_dbs); 490 } 491 492 493 MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 494 MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for " 495 "Low Latency Frequency Transition capable processors"); 496 MODULE_LICENSE ("GPL"); 497 498 module_init(cpufreq_gov_dbs_init); 499 module_exit(cpufreq_gov_dbs_exit); 500