1 /* 2 * drivers/cpufreq/cpufreq_conservative.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/init.h> 17 #include <linux/cpufreq.h> 18 #include <linux/cpu.h> 19 #include <linux/jiffies.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/mutex.h> 22 #include <linux/hrtimer.h> 23 #include <linux/tick.h> 24 #include <linux/ktime.h> 25 #include <linux/sched.h> 26 27 /* 28 * dbs is used in this file as a shortform for demandbased switching 29 * It helps to keep variable names smaller, simpler 30 */ 31 32 #define DEF_FREQUENCY_UP_THRESHOLD (80) 33 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 34 35 /* 36 * The polling frequency of this governor depends on the capability of 37 * the processor. Default polling frequency is 1000 times the transition 38 * latency of the processor. The governor will work on any processor with 39 * transition latency <= 10mS, using appropriate sampling 40 * rate. 41 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 42 * this governor will not work. 43 * All times here are in uS. 44 */ 45 #define MIN_SAMPLING_RATE_RATIO (2) 46 47 static unsigned int min_sampling_rate; 48 49 #define LATENCY_MULTIPLIER (1000) 50 #define MIN_LATENCY_MULTIPLIER (100) 51 #define DEF_SAMPLING_DOWN_FACTOR (1) 52 #define MAX_SAMPLING_DOWN_FACTOR (10) 53 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 54 55 static void do_dbs_timer(struct work_struct *work); 56 57 struct cpu_dbs_info_s { 58 cputime64_t prev_cpu_idle; 59 cputime64_t prev_cpu_wall; 60 cputime64_t prev_cpu_nice; 61 struct cpufreq_policy *cur_policy; 62 struct delayed_work work; 63 unsigned int down_skip; 64 unsigned int requested_freq; 65 int cpu; 66 /* 67 * percpu mutex that serializes governor limit change with 68 * do_dbs_timer invocation. We do not want do_dbs_timer to run 69 * when user is changing the governor or limits. 70 */ 71 struct mutex timer_mutex; 72 }; 73 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 74 75 static unsigned int dbs_enable; /* number of CPUs using this policy */ 76 77 /* 78 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on 79 * different CPUs. It protects dbs_enable in governor start/stop. 80 */ 81 static DEFINE_MUTEX(dbs_mutex); 82 83 static struct workqueue_struct *kconservative_wq; 84 85 static struct dbs_tuners { 86 unsigned int sampling_rate; 87 unsigned int sampling_down_factor; 88 unsigned int up_threshold; 89 unsigned int down_threshold; 90 unsigned int ignore_nice; 91 unsigned int freq_step; 92 } dbs_tuners_ins = { 93 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 94 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 95 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 96 .ignore_nice = 0, 97 .freq_step = 5, 98 }; 99 100 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 101 cputime64_t *wall) 102 { 103 cputime64_t idle_time; 104 cputime64_t cur_wall_time; 105 cputime64_t busy_time; 106 107 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 108 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 109 kstat_cpu(cpu).cpustat.system); 110 111 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 112 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 113 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 114 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 115 116 idle_time = cputime64_sub(cur_wall_time, busy_time); 117 if (wall) 118 *wall = cur_wall_time; 119 120 return idle_time; 121 } 122 123 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 124 { 125 u64 idle_time = get_cpu_idle_time_us(cpu, wall); 126 127 if (idle_time == -1ULL) 128 return get_cpu_idle_time_jiffy(cpu, wall); 129 130 return idle_time; 131 } 132 133 /* keep track of frequency transitions */ 134 static int 135 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 136 void *data) 137 { 138 struct cpufreq_freqs *freq = data; 139 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, 140 freq->cpu); 141 142 struct cpufreq_policy *policy; 143 144 policy = this_dbs_info->cur_policy; 145 146 /* 147 * we only care if our internally tracked freq moves outside 148 * the 'valid' ranges of freqency available to us otherwise 149 * we do not change it 150 */ 151 if (this_dbs_info->requested_freq > policy->max 152 || this_dbs_info->requested_freq < policy->min) 153 this_dbs_info->requested_freq = freq->new; 154 155 return 0; 156 } 157 158 static struct notifier_block dbs_cpufreq_notifier_block = { 159 .notifier_call = dbs_cpufreq_notifier 160 }; 161 162 /************************** sysfs interface ************************/ 163 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 164 { 165 printk_once(KERN_INFO "CPUFREQ: conservative sampling_rate_max " 166 "sysfs file is deprecated - used by: %s\n", current->comm); 167 return sprintf(buf, "%u\n", -1U); 168 } 169 170 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 171 { 172 return sprintf(buf, "%u\n", min_sampling_rate); 173 } 174 175 #define define_one_ro(_name) \ 176 static struct freq_attr _name = \ 177 __ATTR(_name, 0444, show_##_name, NULL) 178 179 define_one_ro(sampling_rate_max); 180 define_one_ro(sampling_rate_min); 181 182 /* cpufreq_conservative Governor Tunables */ 183 #define show_one(file_name, object) \ 184 static ssize_t show_##file_name \ 185 (struct cpufreq_policy *unused, char *buf) \ 186 { \ 187 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 188 } 189 show_one(sampling_rate, sampling_rate); 190 show_one(sampling_down_factor, sampling_down_factor); 191 show_one(up_threshold, up_threshold); 192 show_one(down_threshold, down_threshold); 193 show_one(ignore_nice_load, ignore_nice); 194 show_one(freq_step, freq_step); 195 196 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 197 const char *buf, size_t count) 198 { 199 unsigned int input; 200 int ret; 201 ret = sscanf(buf, "%u", &input); 202 203 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 204 return -EINVAL; 205 206 mutex_lock(&dbs_mutex); 207 dbs_tuners_ins.sampling_down_factor = input; 208 mutex_unlock(&dbs_mutex); 209 210 return count; 211 } 212 213 static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 214 const char *buf, size_t count) 215 { 216 unsigned int input; 217 int ret; 218 ret = sscanf(buf, "%u", &input); 219 220 if (ret != 1) 221 return -EINVAL; 222 223 mutex_lock(&dbs_mutex); 224 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 225 mutex_unlock(&dbs_mutex); 226 227 return count; 228 } 229 230 static ssize_t store_up_threshold(struct cpufreq_policy *unused, 231 const char *buf, size_t count) 232 { 233 unsigned int input; 234 int ret; 235 ret = sscanf(buf, "%u", &input); 236 237 mutex_lock(&dbs_mutex); 238 if (ret != 1 || input > 100 || 239 input <= dbs_tuners_ins.down_threshold) { 240 mutex_unlock(&dbs_mutex); 241 return -EINVAL; 242 } 243 244 dbs_tuners_ins.up_threshold = input; 245 mutex_unlock(&dbs_mutex); 246 247 return count; 248 } 249 250 static ssize_t store_down_threshold(struct cpufreq_policy *unused, 251 const char *buf, size_t count) 252 { 253 unsigned int input; 254 int ret; 255 ret = sscanf(buf, "%u", &input); 256 257 mutex_lock(&dbs_mutex); 258 /* cannot be lower than 11 otherwise freq will not fall */ 259 if (ret != 1 || input < 11 || input > 100 || 260 input >= dbs_tuners_ins.up_threshold) { 261 mutex_unlock(&dbs_mutex); 262 return -EINVAL; 263 } 264 265 dbs_tuners_ins.down_threshold = input; 266 mutex_unlock(&dbs_mutex); 267 268 return count; 269 } 270 271 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, 272 const char *buf, size_t count) 273 { 274 unsigned int input; 275 int ret; 276 277 unsigned int j; 278 279 ret = sscanf(buf, "%u", &input); 280 if (ret != 1) 281 return -EINVAL; 282 283 if (input > 1) 284 input = 1; 285 286 mutex_lock(&dbs_mutex); 287 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 288 mutex_unlock(&dbs_mutex); 289 return count; 290 } 291 dbs_tuners_ins.ignore_nice = input; 292 293 /* we need to re-evaluate prev_cpu_idle */ 294 for_each_online_cpu(j) { 295 struct cpu_dbs_info_s *dbs_info; 296 dbs_info = &per_cpu(cpu_dbs_info, j); 297 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 298 &dbs_info->prev_cpu_wall); 299 if (dbs_tuners_ins.ignore_nice) 300 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 301 } 302 mutex_unlock(&dbs_mutex); 303 304 return count; 305 } 306 307 static ssize_t store_freq_step(struct cpufreq_policy *policy, 308 const char *buf, size_t count) 309 { 310 unsigned int input; 311 int ret; 312 ret = sscanf(buf, "%u", &input); 313 314 if (ret != 1) 315 return -EINVAL; 316 317 if (input > 100) 318 input = 100; 319 320 /* no need to test here if freq_step is zero as the user might actually 321 * want this, they would be crazy though :) */ 322 mutex_lock(&dbs_mutex); 323 dbs_tuners_ins.freq_step = input; 324 mutex_unlock(&dbs_mutex); 325 326 return count; 327 } 328 329 #define define_one_rw(_name) \ 330 static struct freq_attr _name = \ 331 __ATTR(_name, 0644, show_##_name, store_##_name) 332 333 define_one_rw(sampling_rate); 334 define_one_rw(sampling_down_factor); 335 define_one_rw(up_threshold); 336 define_one_rw(down_threshold); 337 define_one_rw(ignore_nice_load); 338 define_one_rw(freq_step); 339 340 static struct attribute *dbs_attributes[] = { 341 &sampling_rate_max.attr, 342 &sampling_rate_min.attr, 343 &sampling_rate.attr, 344 &sampling_down_factor.attr, 345 &up_threshold.attr, 346 &down_threshold.attr, 347 &ignore_nice_load.attr, 348 &freq_step.attr, 349 NULL 350 }; 351 352 static struct attribute_group dbs_attr_group = { 353 .attrs = dbs_attributes, 354 .name = "conservative", 355 }; 356 357 /************************** sysfs end ************************/ 358 359 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 360 { 361 unsigned int load = 0; 362 unsigned int freq_target; 363 364 struct cpufreq_policy *policy; 365 unsigned int j; 366 367 policy = this_dbs_info->cur_policy; 368 369 /* 370 * Every sampling_rate, we check, if current idle time is less 371 * than 20% (default), then we try to increase frequency 372 * Every sampling_rate*sampling_down_factor, we check, if current 373 * idle time is more than 80%, then we try to decrease frequency 374 * 375 * Any frequency increase takes it to the maximum frequency. 376 * Frequency reduction happens at minimum steps of 377 * 5% (default) of maximum frequency 378 */ 379 380 /* Get Absolute Load */ 381 for_each_cpu(j, policy->cpus) { 382 struct cpu_dbs_info_s *j_dbs_info; 383 cputime64_t cur_wall_time, cur_idle_time; 384 unsigned int idle_time, wall_time; 385 386 j_dbs_info = &per_cpu(cpu_dbs_info, j); 387 388 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 389 390 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 391 j_dbs_info->prev_cpu_wall); 392 j_dbs_info->prev_cpu_wall = cur_wall_time; 393 394 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 395 j_dbs_info->prev_cpu_idle); 396 j_dbs_info->prev_cpu_idle = cur_idle_time; 397 398 if (dbs_tuners_ins.ignore_nice) { 399 cputime64_t cur_nice; 400 unsigned long cur_nice_jiffies; 401 402 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 403 j_dbs_info->prev_cpu_nice); 404 /* 405 * Assumption: nice time between sampling periods will 406 * be less than 2^32 jiffies for 32 bit sys 407 */ 408 cur_nice_jiffies = (unsigned long) 409 cputime64_to_jiffies64(cur_nice); 410 411 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 412 idle_time += jiffies_to_usecs(cur_nice_jiffies); 413 } 414 415 if (unlikely(!wall_time || wall_time < idle_time)) 416 continue; 417 418 load = 100 * (wall_time - idle_time) / wall_time; 419 } 420 421 /* 422 * break out if we 'cannot' reduce the speed as the user might 423 * want freq_step to be zero 424 */ 425 if (dbs_tuners_ins.freq_step == 0) 426 return; 427 428 /* Check for frequency increase */ 429 if (load > dbs_tuners_ins.up_threshold) { 430 this_dbs_info->down_skip = 0; 431 432 /* if we are already at full speed then break out early */ 433 if (this_dbs_info->requested_freq == policy->max) 434 return; 435 436 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 437 438 /* max freq cannot be less than 100. But who knows.... */ 439 if (unlikely(freq_target == 0)) 440 freq_target = 5; 441 442 this_dbs_info->requested_freq += freq_target; 443 if (this_dbs_info->requested_freq > policy->max) 444 this_dbs_info->requested_freq = policy->max; 445 446 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 447 CPUFREQ_RELATION_H); 448 return; 449 } 450 451 /* 452 * The optimal frequency is the frequency that is the lowest that 453 * can support the current CPU usage without triggering the up 454 * policy. To be safe, we focus 10 points under the threshold. 455 */ 456 if (load < (dbs_tuners_ins.down_threshold - 10)) { 457 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 458 459 this_dbs_info->requested_freq -= freq_target; 460 if (this_dbs_info->requested_freq < policy->min) 461 this_dbs_info->requested_freq = policy->min; 462 463 /* 464 * if we cannot reduce the frequency anymore, break out early 465 */ 466 if (policy->cur == policy->min) 467 return; 468 469 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 470 CPUFREQ_RELATION_H); 471 return; 472 } 473 } 474 475 static void do_dbs_timer(struct work_struct *work) 476 { 477 struct cpu_dbs_info_s *dbs_info = 478 container_of(work, struct cpu_dbs_info_s, work.work); 479 unsigned int cpu = dbs_info->cpu; 480 481 /* We want all CPUs to do sampling nearly on same jiffy */ 482 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 483 484 delay -= jiffies % delay; 485 486 mutex_lock(&dbs_info->timer_mutex); 487 488 dbs_check_cpu(dbs_info); 489 490 queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); 491 mutex_unlock(&dbs_info->timer_mutex); 492 } 493 494 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 495 { 496 /* We want all CPUs to do sampling nearly on same jiffy */ 497 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 498 delay -= jiffies % delay; 499 500 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 501 queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, 502 delay); 503 } 504 505 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 506 { 507 cancel_delayed_work_sync(&dbs_info->work); 508 } 509 510 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 511 unsigned int event) 512 { 513 unsigned int cpu = policy->cpu; 514 struct cpu_dbs_info_s *this_dbs_info; 515 unsigned int j; 516 int rc; 517 518 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 519 520 switch (event) { 521 case CPUFREQ_GOV_START: 522 if ((!cpu_online(cpu)) || (!policy->cur)) 523 return -EINVAL; 524 525 mutex_lock(&dbs_mutex); 526 527 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); 528 if (rc) { 529 mutex_unlock(&dbs_mutex); 530 return rc; 531 } 532 533 for_each_cpu(j, policy->cpus) { 534 struct cpu_dbs_info_s *j_dbs_info; 535 j_dbs_info = &per_cpu(cpu_dbs_info, j); 536 j_dbs_info->cur_policy = policy; 537 538 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 539 &j_dbs_info->prev_cpu_wall); 540 if (dbs_tuners_ins.ignore_nice) { 541 j_dbs_info->prev_cpu_nice = 542 kstat_cpu(j).cpustat.nice; 543 } 544 } 545 this_dbs_info->down_skip = 0; 546 this_dbs_info->requested_freq = policy->cur; 547 548 mutex_init(&this_dbs_info->timer_mutex); 549 dbs_enable++; 550 /* 551 * Start the timerschedule work, when this governor 552 * is used for first time 553 */ 554 if (dbs_enable == 1) { 555 unsigned int latency; 556 /* policy latency is in nS. Convert it to uS first */ 557 latency = policy->cpuinfo.transition_latency / 1000; 558 if (latency == 0) 559 latency = 1; 560 561 /* 562 * conservative does not implement micro like ondemand 563 * governor, thus we are bound to jiffes/HZ 564 */ 565 min_sampling_rate = 566 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 567 /* Bring kernel and HW constraints together */ 568 min_sampling_rate = max(min_sampling_rate, 569 MIN_LATENCY_MULTIPLIER * latency); 570 dbs_tuners_ins.sampling_rate = 571 max(min_sampling_rate, 572 latency * LATENCY_MULTIPLIER); 573 574 cpufreq_register_notifier( 575 &dbs_cpufreq_notifier_block, 576 CPUFREQ_TRANSITION_NOTIFIER); 577 } 578 mutex_unlock(&dbs_mutex); 579 580 dbs_timer_init(this_dbs_info); 581 582 break; 583 584 case CPUFREQ_GOV_STOP: 585 dbs_timer_exit(this_dbs_info); 586 587 mutex_lock(&dbs_mutex); 588 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 589 dbs_enable--; 590 mutex_destroy(&this_dbs_info->timer_mutex); 591 592 /* 593 * Stop the timerschedule work, when this governor 594 * is used for first time 595 */ 596 if (dbs_enable == 0) 597 cpufreq_unregister_notifier( 598 &dbs_cpufreq_notifier_block, 599 CPUFREQ_TRANSITION_NOTIFIER); 600 601 mutex_unlock(&dbs_mutex); 602 603 break; 604 605 case CPUFREQ_GOV_LIMITS: 606 mutex_lock(&this_dbs_info->timer_mutex); 607 if (policy->max < this_dbs_info->cur_policy->cur) 608 __cpufreq_driver_target( 609 this_dbs_info->cur_policy, 610 policy->max, CPUFREQ_RELATION_H); 611 else if (policy->min > this_dbs_info->cur_policy->cur) 612 __cpufreq_driver_target( 613 this_dbs_info->cur_policy, 614 policy->min, CPUFREQ_RELATION_L); 615 mutex_unlock(&this_dbs_info->timer_mutex); 616 617 break; 618 } 619 return 0; 620 } 621 622 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 623 static 624 #endif 625 struct cpufreq_governor cpufreq_gov_conservative = { 626 .name = "conservative", 627 .governor = cpufreq_governor_dbs, 628 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 629 .owner = THIS_MODULE, 630 }; 631 632 static int __init cpufreq_gov_dbs_init(void) 633 { 634 int err; 635 636 kconservative_wq = create_workqueue("kconservative"); 637 if (!kconservative_wq) { 638 printk(KERN_ERR "Creation of kconservative failed\n"); 639 return -EFAULT; 640 } 641 642 err = cpufreq_register_governor(&cpufreq_gov_conservative); 643 if (err) 644 destroy_workqueue(kconservative_wq); 645 646 return err; 647 } 648 649 static void __exit cpufreq_gov_dbs_exit(void) 650 { 651 cpufreq_unregister_governor(&cpufreq_gov_conservative); 652 destroy_workqueue(kconservative_wq); 653 } 654 655 656 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); 657 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " 658 "Low Latency Frequency Transition capable processors " 659 "optimised for use in a battery environment"); 660 MODULE_LICENSE("GPL"); 661 662 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 663 fs_initcall(cpufreq_gov_dbs_init); 664 #else 665 module_init(cpufreq_gov_dbs_init); 666 #endif 667 module_exit(cpufreq_gov_dbs_exit); 668