1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/cpu.h> 16 #include <linux/percpu-defs.h> 17 #include <linux/slab.h> 18 #include <linux/tick.h> 19 #include "cpufreq_governor.h" 20 21 /* On-demand governor macros */ 22 #define DEF_FREQUENCY_UP_THRESHOLD (80) 23 #define DEF_SAMPLING_DOWN_FACTOR (1) 24 #define MAX_SAMPLING_DOWN_FACTOR (100000) 25 #define MICRO_FREQUENCY_UP_THRESHOLD (95) 26 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 27 #define MIN_FREQUENCY_UP_THRESHOLD (11) 28 #define MAX_FREQUENCY_UP_THRESHOLD (100) 29 30 static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); 31 32 static struct od_ops od_ops; 33 34 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 35 static struct cpufreq_governor cpufreq_gov_ondemand; 36 #endif 37 38 static unsigned int default_powersave_bias; 39 40 static void ondemand_powersave_bias_init_cpu(int cpu) 41 { 42 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 43 44 dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 45 dbs_info->freq_lo = 0; 46 } 47 48 /* 49 * Not all CPUs want IO time to be accounted as busy; this depends on how 50 * efficient idling at a higher frequency/voltage is. 51 * Pavel Machek says this is not so for various generations of AMD and old 52 * Intel systems. 53 * Mike Chan (android.com) claims this is also not true for ARM. 54 * Because of this, whitelist specific known (series) of CPUs by default, and 55 * leave all others up to the user. 56 */ 57 static int should_io_be_busy(void) 58 { 59 #if defined(CONFIG_X86) 60 /* 61 * For Intel, Core 2 (model 15) and later have an efficient idle. 62 */ 63 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 64 boot_cpu_data.x86 == 6 && 65 boot_cpu_data.x86_model >= 15) 66 return 1; 67 #endif 68 return 0; 69 } 70 71 /* 72 * Find right freq to be set now with powersave_bias on. 73 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 74 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 75 */ 76 static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, 77 unsigned int freq_next, unsigned int relation) 78 { 79 unsigned int freq_req, freq_reduc, freq_avg; 80 unsigned int freq_hi, freq_lo; 81 unsigned int index = 0; 82 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 83 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 84 policy->cpu); 85 struct dbs_data *dbs_data = policy->governor_data; 86 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 87 88 if (!dbs_info->freq_table) { 89 dbs_info->freq_lo = 0; 90 dbs_info->freq_lo_jiffies = 0; 91 return freq_next; 92 } 93 94 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 95 relation, &index); 96 freq_req = dbs_info->freq_table[index].frequency; 97 freq_reduc = freq_req * od_tuners->powersave_bias / 1000; 98 freq_avg = freq_req - freq_reduc; 99 100 /* Find freq bounds for freq_avg in freq_table */ 101 index = 0; 102 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 103 CPUFREQ_RELATION_H, &index); 104 freq_lo = dbs_info->freq_table[index].frequency; 105 index = 0; 106 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 107 CPUFREQ_RELATION_L, &index); 108 freq_hi = dbs_info->freq_table[index].frequency; 109 110 /* Find out how long we have to be in hi and lo freqs */ 111 if (freq_hi == freq_lo) { 112 dbs_info->freq_lo = 0; 113 dbs_info->freq_lo_jiffies = 0; 114 return freq_lo; 115 } 116 jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); 117 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 118 jiffies_hi += ((freq_hi - freq_lo) / 2); 119 jiffies_hi /= (freq_hi - freq_lo); 120 jiffies_lo = jiffies_total - jiffies_hi; 121 dbs_info->freq_lo = freq_lo; 122 dbs_info->freq_lo_jiffies = jiffies_lo; 123 dbs_info->freq_hi_jiffies = jiffies_hi; 124 return freq_hi; 125 } 126 127 static void ondemand_powersave_bias_init(void) 128 { 129 int i; 130 for_each_online_cpu(i) { 131 ondemand_powersave_bias_init_cpu(i); 132 } 133 } 134 135 static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) 136 { 137 struct dbs_data *dbs_data = policy->governor_data; 138 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 139 140 if (od_tuners->powersave_bias) 141 freq = od_ops.powersave_bias_target(policy, freq, 142 CPUFREQ_RELATION_H); 143 else if (policy->cur == policy->max) 144 return; 145 146 __cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ? 147 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); 148 } 149 150 /* 151 * Every sampling_rate, we check, if current idle time is less than 20% 152 * (default), then we try to increase frequency. Else, we adjust the frequency 153 * proportional to load. 154 */ 155 static void od_check_cpu(int cpu, unsigned int load) 156 { 157 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 158 struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; 159 struct dbs_data *dbs_data = policy->governor_data; 160 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 161 162 dbs_info->freq_lo = 0; 163 164 /* Check for frequency increase */ 165 if (load > od_tuners->up_threshold) { 166 /* If switching to max speed, apply sampling_down_factor */ 167 if (policy->cur < policy->max) 168 dbs_info->rate_mult = 169 od_tuners->sampling_down_factor; 170 dbs_freq_increase(policy, policy->max); 171 } else { 172 /* Calculate the next frequency proportional to load */ 173 unsigned int freq_next, min_f, max_f; 174 175 min_f = policy->cpuinfo.min_freq; 176 max_f = policy->cpuinfo.max_freq; 177 freq_next = min_f + load * (max_f - min_f) / 100; 178 179 /* No longer fully busy, reset rate_mult */ 180 dbs_info->rate_mult = 1; 181 182 if (!od_tuners->powersave_bias) { 183 __cpufreq_driver_target(policy, freq_next, 184 CPUFREQ_RELATION_C); 185 return; 186 } 187 188 freq_next = od_ops.powersave_bias_target(policy, freq_next, 189 CPUFREQ_RELATION_L); 190 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); 191 } 192 } 193 194 static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs, 195 struct dbs_data *dbs_data, bool modify_all) 196 { 197 struct cpufreq_policy *policy = cdbs->shared->policy; 198 unsigned int cpu = policy->cpu; 199 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 200 cpu); 201 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 202 int delay = 0, sample_type = dbs_info->sample_type; 203 204 if (!modify_all) 205 goto max_delay; 206 207 /* Common NORMAL_SAMPLE setup */ 208 dbs_info->sample_type = OD_NORMAL_SAMPLE; 209 if (sample_type == OD_SUB_SAMPLE) { 210 delay = dbs_info->freq_lo_jiffies; 211 __cpufreq_driver_target(policy, dbs_info->freq_lo, 212 CPUFREQ_RELATION_H); 213 } else { 214 dbs_check_cpu(dbs_data, cpu); 215 if (dbs_info->freq_lo) { 216 /* Setup timer for SUB_SAMPLE */ 217 dbs_info->sample_type = OD_SUB_SAMPLE; 218 delay = dbs_info->freq_hi_jiffies; 219 } 220 } 221 222 max_delay: 223 if (!delay) 224 delay = delay_for_sampling_rate(od_tuners->sampling_rate 225 * dbs_info->rate_mult); 226 227 return delay; 228 } 229 230 /************************** sysfs interface ************************/ 231 static struct common_dbs_data od_dbs_cdata; 232 233 /** 234 * update_sampling_rate - update sampling rate effective immediately if needed. 235 * @new_rate: new sampling rate 236 * 237 * If new rate is smaller than the old, simply updating 238 * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the 239 * original sampling_rate was 1 second and the requested new sampling rate is 10 240 * ms because the user needs immediate reaction from ondemand governor, but not 241 * sure if higher frequency will be required or not, then, the governor may 242 * change the sampling rate too late; up to 1 second later. Thus, if we are 243 * reducing the sampling rate, we need to make the new value effective 244 * immediately. 245 */ 246 static void update_sampling_rate(struct dbs_data *dbs_data, 247 unsigned int new_rate) 248 { 249 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 250 int cpu; 251 252 od_tuners->sampling_rate = new_rate = max(new_rate, 253 dbs_data->min_sampling_rate); 254 255 for_each_online_cpu(cpu) { 256 struct cpufreq_policy *policy; 257 struct od_cpu_dbs_info_s *dbs_info; 258 unsigned long next_sampling, appointed_at; 259 260 policy = cpufreq_cpu_get(cpu); 261 if (!policy) 262 continue; 263 if (policy->governor != &cpufreq_gov_ondemand) { 264 cpufreq_cpu_put(policy); 265 continue; 266 } 267 dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 268 cpufreq_cpu_put(policy); 269 270 mutex_lock(&dbs_info->cdbs.shared->timer_mutex); 271 272 if (!delayed_work_pending(&dbs_info->cdbs.dwork)) { 273 mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); 274 continue; 275 } 276 277 next_sampling = jiffies + usecs_to_jiffies(new_rate); 278 appointed_at = dbs_info->cdbs.dwork.timer.expires; 279 280 if (time_before(next_sampling, appointed_at)) { 281 282 mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); 283 cancel_delayed_work_sync(&dbs_info->cdbs.dwork); 284 mutex_lock(&dbs_info->cdbs.shared->timer_mutex); 285 286 gov_queue_work(dbs_data, policy, 287 usecs_to_jiffies(new_rate), true); 288 289 } 290 mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); 291 } 292 } 293 294 static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, 295 size_t count) 296 { 297 unsigned int input; 298 int ret; 299 ret = sscanf(buf, "%u", &input); 300 if (ret != 1) 301 return -EINVAL; 302 303 update_sampling_rate(dbs_data, input); 304 return count; 305 } 306 307 static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, 308 size_t count) 309 { 310 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 311 unsigned int input; 312 int ret; 313 unsigned int j; 314 315 ret = sscanf(buf, "%u", &input); 316 if (ret != 1) 317 return -EINVAL; 318 od_tuners->io_is_busy = !!input; 319 320 /* we need to re-evaluate prev_cpu_idle */ 321 for_each_online_cpu(j) { 322 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 323 j); 324 dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, 325 &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); 326 } 327 return count; 328 } 329 330 static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, 331 size_t count) 332 { 333 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 334 unsigned int input; 335 int ret; 336 ret = sscanf(buf, "%u", &input); 337 338 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 339 input < MIN_FREQUENCY_UP_THRESHOLD) { 340 return -EINVAL; 341 } 342 343 od_tuners->up_threshold = input; 344 return count; 345 } 346 347 static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, 348 const char *buf, size_t count) 349 { 350 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 351 unsigned int input, j; 352 int ret; 353 ret = sscanf(buf, "%u", &input); 354 355 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 356 return -EINVAL; 357 od_tuners->sampling_down_factor = input; 358 359 /* Reset down sampling multiplier in case it was active */ 360 for_each_online_cpu(j) { 361 struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 362 j); 363 dbs_info->rate_mult = 1; 364 } 365 return count; 366 } 367 368 static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, 369 const char *buf, size_t count) 370 { 371 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 372 unsigned int input; 373 int ret; 374 375 unsigned int j; 376 377 ret = sscanf(buf, "%u", &input); 378 if (ret != 1) 379 return -EINVAL; 380 381 if (input > 1) 382 input = 1; 383 384 if (input == od_tuners->ignore_nice_load) { /* nothing to do */ 385 return count; 386 } 387 od_tuners->ignore_nice_load = input; 388 389 /* we need to re-evaluate prev_cpu_idle */ 390 for_each_online_cpu(j) { 391 struct od_cpu_dbs_info_s *dbs_info; 392 dbs_info = &per_cpu(od_cpu_dbs_info, j); 393 dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, 394 &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); 395 if (od_tuners->ignore_nice_load) 396 dbs_info->cdbs.prev_cpu_nice = 397 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 398 399 } 400 return count; 401 } 402 403 static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, 404 size_t count) 405 { 406 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 407 unsigned int input; 408 int ret; 409 ret = sscanf(buf, "%u", &input); 410 411 if (ret != 1) 412 return -EINVAL; 413 414 if (input > 1000) 415 input = 1000; 416 417 od_tuners->powersave_bias = input; 418 ondemand_powersave_bias_init(); 419 return count; 420 } 421 422 show_store_one(od, sampling_rate); 423 show_store_one(od, io_is_busy); 424 show_store_one(od, up_threshold); 425 show_store_one(od, sampling_down_factor); 426 show_store_one(od, ignore_nice_load); 427 show_store_one(od, powersave_bias); 428 declare_show_sampling_rate_min(od); 429 430 gov_sys_pol_attr_rw(sampling_rate); 431 gov_sys_pol_attr_rw(io_is_busy); 432 gov_sys_pol_attr_rw(up_threshold); 433 gov_sys_pol_attr_rw(sampling_down_factor); 434 gov_sys_pol_attr_rw(ignore_nice_load); 435 gov_sys_pol_attr_rw(powersave_bias); 436 gov_sys_pol_attr_ro(sampling_rate_min); 437 438 static struct attribute *dbs_attributes_gov_sys[] = { 439 &sampling_rate_min_gov_sys.attr, 440 &sampling_rate_gov_sys.attr, 441 &up_threshold_gov_sys.attr, 442 &sampling_down_factor_gov_sys.attr, 443 &ignore_nice_load_gov_sys.attr, 444 &powersave_bias_gov_sys.attr, 445 &io_is_busy_gov_sys.attr, 446 NULL 447 }; 448 449 static struct attribute_group od_attr_group_gov_sys = { 450 .attrs = dbs_attributes_gov_sys, 451 .name = "ondemand", 452 }; 453 454 static struct attribute *dbs_attributes_gov_pol[] = { 455 &sampling_rate_min_gov_pol.attr, 456 &sampling_rate_gov_pol.attr, 457 &up_threshold_gov_pol.attr, 458 &sampling_down_factor_gov_pol.attr, 459 &ignore_nice_load_gov_pol.attr, 460 &powersave_bias_gov_pol.attr, 461 &io_is_busy_gov_pol.attr, 462 NULL 463 }; 464 465 static struct attribute_group od_attr_group_gov_pol = { 466 .attrs = dbs_attributes_gov_pol, 467 .name = "ondemand", 468 }; 469 470 /************************** sysfs end ************************/ 471 472 static int od_init(struct dbs_data *dbs_data, bool notify) 473 { 474 struct od_dbs_tuners *tuners; 475 u64 idle_time; 476 int cpu; 477 478 tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); 479 if (!tuners) { 480 pr_err("%s: kzalloc failed\n", __func__); 481 return -ENOMEM; 482 } 483 484 cpu = get_cpu(); 485 idle_time = get_cpu_idle_time_us(cpu, NULL); 486 put_cpu(); 487 if (idle_time != -1ULL) { 488 /* Idle micro accounting is supported. Use finer thresholds */ 489 tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 490 /* 491 * In nohz/micro accounting case we set the minimum frequency 492 * not depending on HZ, but fixed (very low). The deferred 493 * timer might skip some samples if idle/sleeping as needed. 494 */ 495 dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 496 } else { 497 tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; 498 499 /* For correct statistics, we need 10 ticks for each measure */ 500 dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * 501 jiffies_to_usecs(10); 502 } 503 504 tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; 505 tuners->ignore_nice_load = 0; 506 tuners->powersave_bias = default_powersave_bias; 507 tuners->io_is_busy = should_io_be_busy(); 508 509 dbs_data->tuners = tuners; 510 return 0; 511 } 512 513 static void od_exit(struct dbs_data *dbs_data, bool notify) 514 { 515 kfree(dbs_data->tuners); 516 } 517 518 define_get_cpu_dbs_routines(od_cpu_dbs_info); 519 520 static struct od_ops od_ops = { 521 .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, 522 .powersave_bias_target = generic_powersave_bias_target, 523 .freq_increase = dbs_freq_increase, 524 }; 525 526 static struct common_dbs_data od_dbs_cdata = { 527 .governor = GOV_ONDEMAND, 528 .attr_group_gov_sys = &od_attr_group_gov_sys, 529 .attr_group_gov_pol = &od_attr_group_gov_pol, 530 .get_cpu_cdbs = get_cpu_cdbs, 531 .get_cpu_dbs_info_s = get_cpu_dbs_info_s, 532 .gov_dbs_timer = od_dbs_timer, 533 .gov_check_cpu = od_check_cpu, 534 .gov_ops = &od_ops, 535 .init = od_init, 536 .exit = od_exit, 537 .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), 538 }; 539 540 static void od_set_powersave_bias(unsigned int powersave_bias) 541 { 542 struct cpufreq_policy *policy; 543 struct dbs_data *dbs_data; 544 struct od_dbs_tuners *od_tuners; 545 unsigned int cpu; 546 cpumask_t done; 547 548 default_powersave_bias = powersave_bias; 549 cpumask_clear(&done); 550 551 get_online_cpus(); 552 for_each_online_cpu(cpu) { 553 struct cpu_common_dbs_info *shared; 554 555 if (cpumask_test_cpu(cpu, &done)) 556 continue; 557 558 shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; 559 if (!shared) 560 continue; 561 562 policy = shared->policy; 563 cpumask_or(&done, &done, policy->cpus); 564 565 if (policy->governor != &cpufreq_gov_ondemand) 566 continue; 567 568 dbs_data = policy->governor_data; 569 od_tuners = dbs_data->tuners; 570 od_tuners->powersave_bias = default_powersave_bias; 571 } 572 put_online_cpus(); 573 } 574 575 void od_register_powersave_bias_handler(unsigned int (*f) 576 (struct cpufreq_policy *, unsigned int, unsigned int), 577 unsigned int powersave_bias) 578 { 579 od_ops.powersave_bias_target = f; 580 od_set_powersave_bias(powersave_bias); 581 } 582 EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler); 583 584 void od_unregister_powersave_bias_handler(void) 585 { 586 od_ops.powersave_bias_target = generic_powersave_bias_target; 587 od_set_powersave_bias(0); 588 } 589 EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); 590 591 static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, 592 unsigned int event) 593 { 594 return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); 595 } 596 597 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 598 static 599 #endif 600 struct cpufreq_governor cpufreq_gov_ondemand = { 601 .name = "ondemand", 602 .governor = od_cpufreq_governor_dbs, 603 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 604 .owner = THIS_MODULE, 605 }; 606 607 static int __init cpufreq_gov_dbs_init(void) 608 { 609 return cpufreq_register_governor(&cpufreq_gov_ondemand); 610 } 611 612 static void __exit cpufreq_gov_dbs_exit(void) 613 { 614 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 615 } 616 617 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 618 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 619 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 620 "Low Latency Frequency Transition capable processors"); 621 MODULE_LICENSE("GPL"); 622 623 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 624 fs_initcall(cpufreq_gov_dbs_init); 625 #else 626 module_init(cpufreq_gov_dbs_init); 627 #endif 628 module_exit(cpufreq_gov_dbs_exit); 629