1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/cpufreq.h> 17 #include <linux/cpu.h> 18 #include <linux/jiffies.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/mutex.h> 21 #include <linux/hrtimer.h> 22 #include <linux/tick.h> 23 #include <linux/ktime.h> 24 #include <linux/sched.h> 25 26 /* 27 * dbs is used in this file as a shortform for demandbased switching 28 * It helps to keep variable names smaller, simpler 29 */ 30 31 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 32 #define DEF_FREQUENCY_UP_THRESHOLD (80) 33 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) 34 #define MICRO_FREQUENCY_UP_THRESHOLD (95) 35 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 36 #define MIN_FREQUENCY_UP_THRESHOLD (11) 37 #define MAX_FREQUENCY_UP_THRESHOLD (100) 38 39 /* 40 * The polling frequency of this governor depends on the capability of 41 * the processor. Default polling frequency is 1000 times the transition 42 * latency of the processor. The governor will work on any processor with 43 * transition latency <= 10mS, using appropriate sampling 44 * rate. 45 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 46 * this governor will not work. 47 * All times here are in uS. 48 */ 49 #define MIN_SAMPLING_RATE_RATIO (2) 50 51 static unsigned int min_sampling_rate; 52 53 #define LATENCY_MULTIPLIER (1000) 54 #define MIN_LATENCY_MULTIPLIER (100) 55 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 56 57 static void do_dbs_timer(struct work_struct *work); 58 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 59 unsigned int event); 60 61 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 62 static 63 #endif 64 struct cpufreq_governor cpufreq_gov_ondemand = { 65 .name = "ondemand", 66 .governor = cpufreq_governor_dbs, 67 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 68 .owner = THIS_MODULE, 69 }; 70 71 /* Sampling types */ 72 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 73 74 struct cpu_dbs_info_s { 75 cputime64_t prev_cpu_idle; 76 cputime64_t prev_cpu_iowait; 77 cputime64_t prev_cpu_wall; 78 cputime64_t prev_cpu_nice; 79 struct cpufreq_policy *cur_policy; 80 struct delayed_work work; 81 struct cpufreq_frequency_table *freq_table; 82 unsigned int freq_lo; 83 unsigned int freq_lo_jiffies; 84 unsigned int freq_hi_jiffies; 85 int cpu; 86 unsigned int sample_type:1; 87 /* 88 * percpu mutex that serializes governor limit change with 89 * do_dbs_timer invocation. We do not want do_dbs_timer to run 90 * when user is changing the governor or limits. 91 */ 92 struct mutex timer_mutex; 93 }; 94 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); 95 96 static unsigned int dbs_enable; /* number of CPUs using this policy */ 97 98 /* 99 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on 100 * different CPUs. It protects dbs_enable in governor start/stop. 101 */ 102 static DEFINE_MUTEX(dbs_mutex); 103 104 static struct workqueue_struct *kondemand_wq; 105 106 static struct dbs_tuners { 107 unsigned int sampling_rate; 108 unsigned int up_threshold; 109 unsigned int down_differential; 110 unsigned int ignore_nice; 111 unsigned int powersave_bias; 112 unsigned int io_is_busy; 113 } dbs_tuners_ins = { 114 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 115 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 116 .ignore_nice = 0, 117 .powersave_bias = 0, 118 }; 119 120 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 121 cputime64_t *wall) 122 { 123 cputime64_t idle_time; 124 cputime64_t cur_wall_time; 125 cputime64_t busy_time; 126 127 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 128 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 129 kstat_cpu(cpu).cpustat.system); 130 131 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 132 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 133 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 134 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 135 136 idle_time = cputime64_sub(cur_wall_time, busy_time); 137 if (wall) 138 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); 139 140 return (cputime64_t)jiffies_to_usecs(idle_time); 141 } 142 143 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 144 { 145 u64 idle_time = get_cpu_idle_time_us(cpu, wall); 146 147 if (idle_time == -1ULL) 148 return get_cpu_idle_time_jiffy(cpu, wall); 149 150 return idle_time; 151 } 152 153 static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) 154 { 155 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); 156 157 if (iowait_time == -1ULL) 158 return 0; 159 160 return iowait_time; 161 } 162 163 /* 164 * Find right freq to be set now with powersave_bias on. 165 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 166 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 167 */ 168 static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 169 unsigned int freq_next, 170 unsigned int relation) 171 { 172 unsigned int freq_req, freq_reduc, freq_avg; 173 unsigned int freq_hi, freq_lo; 174 unsigned int index = 0; 175 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 176 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 177 policy->cpu); 178 179 if (!dbs_info->freq_table) { 180 dbs_info->freq_lo = 0; 181 dbs_info->freq_lo_jiffies = 0; 182 return freq_next; 183 } 184 185 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 186 relation, &index); 187 freq_req = dbs_info->freq_table[index].frequency; 188 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 189 freq_avg = freq_req - freq_reduc; 190 191 /* Find freq bounds for freq_avg in freq_table */ 192 index = 0; 193 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 194 CPUFREQ_RELATION_H, &index); 195 freq_lo = dbs_info->freq_table[index].frequency; 196 index = 0; 197 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 198 CPUFREQ_RELATION_L, &index); 199 freq_hi = dbs_info->freq_table[index].frequency; 200 201 /* Find out how long we have to be in hi and lo freqs */ 202 if (freq_hi == freq_lo) { 203 dbs_info->freq_lo = 0; 204 dbs_info->freq_lo_jiffies = 0; 205 return freq_lo; 206 } 207 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 208 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 209 jiffies_hi += ((freq_hi - freq_lo) / 2); 210 jiffies_hi /= (freq_hi - freq_lo); 211 jiffies_lo = jiffies_total - jiffies_hi; 212 dbs_info->freq_lo = freq_lo; 213 dbs_info->freq_lo_jiffies = jiffies_lo; 214 dbs_info->freq_hi_jiffies = jiffies_hi; 215 return freq_hi; 216 } 217 218 static void ondemand_powersave_bias_init_cpu(int cpu) 219 { 220 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 221 dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 222 dbs_info->freq_lo = 0; 223 } 224 225 static void ondemand_powersave_bias_init(void) 226 { 227 int i; 228 for_each_online_cpu(i) { 229 ondemand_powersave_bias_init_cpu(i); 230 } 231 } 232 233 /************************** sysfs interface ************************/ 234 235 static ssize_t show_sampling_rate_max(struct kobject *kobj, 236 struct attribute *attr, char *buf) 237 { 238 printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " 239 "sysfs file is deprecated - used by: %s\n", current->comm); 240 return sprintf(buf, "%u\n", -1U); 241 } 242 243 static ssize_t show_sampling_rate_min(struct kobject *kobj, 244 struct attribute *attr, char *buf) 245 { 246 return sprintf(buf, "%u\n", min_sampling_rate); 247 } 248 249 define_one_global_ro(sampling_rate_max); 250 define_one_global_ro(sampling_rate_min); 251 252 /* cpufreq_ondemand Governor Tunables */ 253 #define show_one(file_name, object) \ 254 static ssize_t show_##file_name \ 255 (struct kobject *kobj, struct attribute *attr, char *buf) \ 256 { \ 257 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 258 } 259 show_one(sampling_rate, sampling_rate); 260 show_one(io_is_busy, io_is_busy); 261 show_one(up_threshold, up_threshold); 262 show_one(ignore_nice_load, ignore_nice); 263 show_one(powersave_bias, powersave_bias); 264 265 /*** delete after deprecation time ***/ 266 267 #define DEPRECATION_MSG(file_name) \ 268 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 269 "interface is deprecated - " #file_name "\n"); 270 271 #define show_one_old(file_name) \ 272 static ssize_t show_##file_name##_old \ 273 (struct cpufreq_policy *unused, char *buf) \ 274 { \ 275 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 276 "interface is deprecated - " #file_name "\n"); \ 277 return show_##file_name(NULL, NULL, buf); \ 278 } 279 show_one_old(sampling_rate); 280 show_one_old(up_threshold); 281 show_one_old(ignore_nice_load); 282 show_one_old(powersave_bias); 283 show_one_old(sampling_rate_min); 284 show_one_old(sampling_rate_max); 285 286 cpufreq_freq_attr_ro_old(sampling_rate_min); 287 cpufreq_freq_attr_ro_old(sampling_rate_max); 288 289 /*** delete after deprecation time ***/ 290 291 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 292 const char *buf, size_t count) 293 { 294 unsigned int input; 295 int ret; 296 ret = sscanf(buf, "%u", &input); 297 if (ret != 1) 298 return -EINVAL; 299 300 mutex_lock(&dbs_mutex); 301 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 302 mutex_unlock(&dbs_mutex); 303 304 return count; 305 } 306 307 static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, 308 const char *buf, size_t count) 309 { 310 unsigned int input; 311 int ret; 312 313 ret = sscanf(buf, "%u", &input); 314 if (ret != 1) 315 return -EINVAL; 316 317 mutex_lock(&dbs_mutex); 318 dbs_tuners_ins.io_is_busy = !!input; 319 mutex_unlock(&dbs_mutex); 320 321 return count; 322 } 323 324 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 325 const char *buf, size_t count) 326 { 327 unsigned int input; 328 int ret; 329 ret = sscanf(buf, "%u", &input); 330 331 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 332 input < MIN_FREQUENCY_UP_THRESHOLD) { 333 return -EINVAL; 334 } 335 336 mutex_lock(&dbs_mutex); 337 dbs_tuners_ins.up_threshold = input; 338 mutex_unlock(&dbs_mutex); 339 340 return count; 341 } 342 343 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 344 const char *buf, size_t count) 345 { 346 unsigned int input; 347 int ret; 348 349 unsigned int j; 350 351 ret = sscanf(buf, "%u", &input); 352 if (ret != 1) 353 return -EINVAL; 354 355 if (input > 1) 356 input = 1; 357 358 mutex_lock(&dbs_mutex); 359 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 360 mutex_unlock(&dbs_mutex); 361 return count; 362 } 363 dbs_tuners_ins.ignore_nice = input; 364 365 /* we need to re-evaluate prev_cpu_idle */ 366 for_each_online_cpu(j) { 367 struct cpu_dbs_info_s *dbs_info; 368 dbs_info = &per_cpu(od_cpu_dbs_info, j); 369 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 370 &dbs_info->prev_cpu_wall); 371 if (dbs_tuners_ins.ignore_nice) 372 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 373 374 } 375 mutex_unlock(&dbs_mutex); 376 377 return count; 378 } 379 380 static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, 381 const char *buf, size_t count) 382 { 383 unsigned int input; 384 int ret; 385 ret = sscanf(buf, "%u", &input); 386 387 if (ret != 1) 388 return -EINVAL; 389 390 if (input > 1000) 391 input = 1000; 392 393 mutex_lock(&dbs_mutex); 394 dbs_tuners_ins.powersave_bias = input; 395 ondemand_powersave_bias_init(); 396 mutex_unlock(&dbs_mutex); 397 398 return count; 399 } 400 401 define_one_global_rw(sampling_rate); 402 define_one_global_rw(io_is_busy); 403 define_one_global_rw(up_threshold); 404 define_one_global_rw(ignore_nice_load); 405 define_one_global_rw(powersave_bias); 406 407 static struct attribute *dbs_attributes[] = { 408 &sampling_rate_max.attr, 409 &sampling_rate_min.attr, 410 &sampling_rate.attr, 411 &up_threshold.attr, 412 &ignore_nice_load.attr, 413 &powersave_bias.attr, 414 &io_is_busy.attr, 415 NULL 416 }; 417 418 static struct attribute_group dbs_attr_group = { 419 .attrs = dbs_attributes, 420 .name = "ondemand", 421 }; 422 423 /*** delete after deprecation time ***/ 424 425 #define write_one_old(file_name) \ 426 static ssize_t store_##file_name##_old \ 427 (struct cpufreq_policy *unused, const char *buf, size_t count) \ 428 { \ 429 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 430 "interface is deprecated - " #file_name "\n"); \ 431 return store_##file_name(NULL, NULL, buf, count); \ 432 } 433 write_one_old(sampling_rate); 434 write_one_old(up_threshold); 435 write_one_old(ignore_nice_load); 436 write_one_old(powersave_bias); 437 438 cpufreq_freq_attr_rw_old(sampling_rate); 439 cpufreq_freq_attr_rw_old(up_threshold); 440 cpufreq_freq_attr_rw_old(ignore_nice_load); 441 cpufreq_freq_attr_rw_old(powersave_bias); 442 443 static struct attribute *dbs_attributes_old[] = { 444 &sampling_rate_max_old.attr, 445 &sampling_rate_min_old.attr, 446 &sampling_rate_old.attr, 447 &up_threshold_old.attr, 448 &ignore_nice_load_old.attr, 449 &powersave_bias_old.attr, 450 NULL 451 }; 452 453 static struct attribute_group dbs_attr_group_old = { 454 .attrs = dbs_attributes_old, 455 .name = "ondemand", 456 }; 457 458 /*** delete after deprecation time ***/ 459 460 /************************** sysfs end ************************/ 461 462 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 463 { 464 unsigned int max_load_freq; 465 466 struct cpufreq_policy *policy; 467 unsigned int j; 468 469 this_dbs_info->freq_lo = 0; 470 policy = this_dbs_info->cur_policy; 471 472 /* 473 * Every sampling_rate, we check, if current idle time is less 474 * than 20% (default), then we try to increase frequency 475 * Every sampling_rate, we look for a the lowest 476 * frequency which can sustain the load while keeping idle time over 477 * 30%. If such a frequency exist, we try to decrease to this frequency. 478 * 479 * Any frequency increase takes it to the maximum frequency. 480 * Frequency reduction happens at minimum steps of 481 * 5% (default) of current frequency 482 */ 483 484 /* Get Absolute Load - in terms of freq */ 485 max_load_freq = 0; 486 487 for_each_cpu(j, policy->cpus) { 488 struct cpu_dbs_info_s *j_dbs_info; 489 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; 490 unsigned int idle_time, wall_time, iowait_time; 491 unsigned int load, load_freq; 492 int freq_avg; 493 494 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 495 496 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 497 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); 498 499 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 500 j_dbs_info->prev_cpu_wall); 501 j_dbs_info->prev_cpu_wall = cur_wall_time; 502 503 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 504 j_dbs_info->prev_cpu_idle); 505 j_dbs_info->prev_cpu_idle = cur_idle_time; 506 507 iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, 508 j_dbs_info->prev_cpu_iowait); 509 j_dbs_info->prev_cpu_iowait = cur_iowait_time; 510 511 if (dbs_tuners_ins.ignore_nice) { 512 cputime64_t cur_nice; 513 unsigned long cur_nice_jiffies; 514 515 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 516 j_dbs_info->prev_cpu_nice); 517 /* 518 * Assumption: nice time between sampling periods will 519 * be less than 2^32 jiffies for 32 bit sys 520 */ 521 cur_nice_jiffies = (unsigned long) 522 cputime64_to_jiffies64(cur_nice); 523 524 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 525 idle_time += jiffies_to_usecs(cur_nice_jiffies); 526 } 527 528 /* 529 * For the purpose of ondemand, waiting for disk IO is an 530 * indication that you're performance critical, and not that 531 * the system is actually idle. So subtract the iowait time 532 * from the cpu idle time. 533 */ 534 535 if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) 536 idle_time -= iowait_time; 537 538 if (unlikely(!wall_time || wall_time < idle_time)) 539 continue; 540 541 load = 100 * (wall_time - idle_time) / wall_time; 542 543 freq_avg = __cpufreq_driver_getavg(policy, j); 544 if (freq_avg <= 0) 545 freq_avg = policy->cur; 546 547 load_freq = load * freq_avg; 548 if (load_freq > max_load_freq) 549 max_load_freq = load_freq; 550 } 551 552 /* Check for frequency increase */ 553 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { 554 /* if we are already at full speed then break out early */ 555 if (!dbs_tuners_ins.powersave_bias) { 556 if (policy->cur == policy->max) 557 return; 558 559 __cpufreq_driver_target(policy, policy->max, 560 CPUFREQ_RELATION_H); 561 } else { 562 int freq = powersave_bias_target(policy, policy->max, 563 CPUFREQ_RELATION_H); 564 __cpufreq_driver_target(policy, freq, 565 CPUFREQ_RELATION_L); 566 } 567 return; 568 } 569 570 /* Check for frequency decrease */ 571 /* if we cannot reduce the frequency anymore, break out early */ 572 if (policy->cur == policy->min) 573 return; 574 575 /* 576 * The optimal frequency is the frequency that is the lowest that 577 * can support the current CPU usage without triggering the up 578 * policy. To be safe, we focus 10 points under the threshold. 579 */ 580 if (max_load_freq < 581 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * 582 policy->cur) { 583 unsigned int freq_next; 584 freq_next = max_load_freq / 585 (dbs_tuners_ins.up_threshold - 586 dbs_tuners_ins.down_differential); 587 588 if (freq_next < policy->min) 589 freq_next = policy->min; 590 591 if (!dbs_tuners_ins.powersave_bias) { 592 __cpufreq_driver_target(policy, freq_next, 593 CPUFREQ_RELATION_L); 594 } else { 595 int freq = powersave_bias_target(policy, freq_next, 596 CPUFREQ_RELATION_L); 597 __cpufreq_driver_target(policy, freq, 598 CPUFREQ_RELATION_L); 599 } 600 } 601 } 602 603 static void do_dbs_timer(struct work_struct *work) 604 { 605 struct cpu_dbs_info_s *dbs_info = 606 container_of(work, struct cpu_dbs_info_s, work.work); 607 unsigned int cpu = dbs_info->cpu; 608 int sample_type = dbs_info->sample_type; 609 610 /* We want all CPUs to do sampling nearly on same jiffy */ 611 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 612 613 delay -= jiffies % delay; 614 mutex_lock(&dbs_info->timer_mutex); 615 616 /* Common NORMAL_SAMPLE setup */ 617 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 618 if (!dbs_tuners_ins.powersave_bias || 619 sample_type == DBS_NORMAL_SAMPLE) { 620 dbs_check_cpu(dbs_info); 621 if (dbs_info->freq_lo) { 622 /* Setup timer for SUB_SAMPLE */ 623 dbs_info->sample_type = DBS_SUB_SAMPLE; 624 delay = dbs_info->freq_hi_jiffies; 625 } 626 } else { 627 __cpufreq_driver_target(dbs_info->cur_policy, 628 dbs_info->freq_lo, CPUFREQ_RELATION_H); 629 } 630 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); 631 mutex_unlock(&dbs_info->timer_mutex); 632 } 633 634 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 635 { 636 /* We want all CPUs to do sampling nearly on same jiffy */ 637 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 638 delay -= jiffies % delay; 639 640 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 641 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 642 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, 643 delay); 644 } 645 646 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 647 { 648 cancel_delayed_work_sync(&dbs_info->work); 649 } 650 651 /* 652 * Not all CPUs want IO time to be accounted as busy; this dependson how 653 * efficient idling at a higher frequency/voltage is. 654 * Pavel Machek says this is not so for various generations of AMD and old 655 * Intel systems. 656 * Mike Chan (androidlcom) calis this is also not true for ARM. 657 * Because of this, whitelist specific known (series) of CPUs by default, and 658 * leave all others up to the user. 659 */ 660 static int should_io_be_busy(void) 661 { 662 #if defined(CONFIG_X86) 663 /* 664 * For Intel, Core 2 (model 15) andl later have an efficient idle. 665 */ 666 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 667 boot_cpu_data.x86 == 6 && 668 boot_cpu_data.x86_model >= 15) 669 return 1; 670 #endif 671 return 0; 672 } 673 674 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 675 unsigned int event) 676 { 677 unsigned int cpu = policy->cpu; 678 struct cpu_dbs_info_s *this_dbs_info; 679 unsigned int j; 680 int rc; 681 682 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 683 684 switch (event) { 685 case CPUFREQ_GOV_START: 686 if ((!cpu_online(cpu)) || (!policy->cur)) 687 return -EINVAL; 688 689 mutex_lock(&dbs_mutex); 690 691 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); 692 if (rc) { 693 mutex_unlock(&dbs_mutex); 694 return rc; 695 } 696 697 dbs_enable++; 698 for_each_cpu(j, policy->cpus) { 699 struct cpu_dbs_info_s *j_dbs_info; 700 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 701 j_dbs_info->cur_policy = policy; 702 703 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 704 &j_dbs_info->prev_cpu_wall); 705 if (dbs_tuners_ins.ignore_nice) { 706 j_dbs_info->prev_cpu_nice = 707 kstat_cpu(j).cpustat.nice; 708 } 709 } 710 this_dbs_info->cpu = cpu; 711 ondemand_powersave_bias_init_cpu(cpu); 712 /* 713 * Start the timerschedule work, when this governor 714 * is used for first time 715 */ 716 if (dbs_enable == 1) { 717 unsigned int latency; 718 719 rc = sysfs_create_group(cpufreq_global_kobject, 720 &dbs_attr_group); 721 if (rc) { 722 mutex_unlock(&dbs_mutex); 723 return rc; 724 } 725 726 /* policy latency is in nS. Convert it to uS first */ 727 latency = policy->cpuinfo.transition_latency / 1000; 728 if (latency == 0) 729 latency = 1; 730 /* Bring kernel and HW constraints together */ 731 min_sampling_rate = max(min_sampling_rate, 732 MIN_LATENCY_MULTIPLIER * latency); 733 dbs_tuners_ins.sampling_rate = 734 max(min_sampling_rate, 735 latency * LATENCY_MULTIPLIER); 736 dbs_tuners_ins.io_is_busy = should_io_be_busy(); 737 } 738 mutex_unlock(&dbs_mutex); 739 740 mutex_init(&this_dbs_info->timer_mutex); 741 dbs_timer_init(this_dbs_info); 742 break; 743 744 case CPUFREQ_GOV_STOP: 745 dbs_timer_exit(this_dbs_info); 746 747 mutex_lock(&dbs_mutex); 748 sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); 749 mutex_destroy(&this_dbs_info->timer_mutex); 750 dbs_enable--; 751 mutex_unlock(&dbs_mutex); 752 if (!dbs_enable) 753 sysfs_remove_group(cpufreq_global_kobject, 754 &dbs_attr_group); 755 756 break; 757 758 case CPUFREQ_GOV_LIMITS: 759 mutex_lock(&this_dbs_info->timer_mutex); 760 if (policy->max < this_dbs_info->cur_policy->cur) 761 __cpufreq_driver_target(this_dbs_info->cur_policy, 762 policy->max, CPUFREQ_RELATION_H); 763 else if (policy->min > this_dbs_info->cur_policy->cur) 764 __cpufreq_driver_target(this_dbs_info->cur_policy, 765 policy->min, CPUFREQ_RELATION_L); 766 mutex_unlock(&this_dbs_info->timer_mutex); 767 break; 768 } 769 return 0; 770 } 771 772 static int __init cpufreq_gov_dbs_init(void) 773 { 774 int err; 775 cputime64_t wall; 776 u64 idle_time; 777 int cpu = get_cpu(); 778 779 idle_time = get_cpu_idle_time_us(cpu, &wall); 780 put_cpu(); 781 if (idle_time != -1ULL) { 782 /* Idle micro accounting is supported. Use finer thresholds */ 783 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 784 dbs_tuners_ins.down_differential = 785 MICRO_FREQUENCY_DOWN_DIFFERENTIAL; 786 /* 787 * In no_hz/micro accounting case we set the minimum frequency 788 * not depending on HZ, but fixed (very low). The deferred 789 * timer might skip some samples if idle/sleeping as needed. 790 */ 791 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 792 } else { 793 /* For correct statistics, we need 10 ticks for each measure */ 794 min_sampling_rate = 795 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 796 } 797 798 kondemand_wq = create_workqueue("kondemand"); 799 if (!kondemand_wq) { 800 printk(KERN_ERR "Creation of kondemand failed\n"); 801 return -EFAULT; 802 } 803 err = cpufreq_register_governor(&cpufreq_gov_ondemand); 804 if (err) 805 destroy_workqueue(kondemand_wq); 806 807 return err; 808 } 809 810 static void __exit cpufreq_gov_dbs_exit(void) 811 { 812 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 813 destroy_workqueue(kondemand_wq); 814 } 815 816 817 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 818 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 819 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 820 "Low Latency Frequency Transition capable processors"); 821 MODULE_LICENSE("GPL"); 822 823 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 824 fs_initcall(cpufreq_gov_dbs_init); 825 #else 826 module_init(cpufreq_gov_dbs_init); 827 #endif 828 module_exit(cpufreq_gov_dbs_exit); 829