1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * drivers/cpufreq/cpufreq_ondemand.c 4 * 5 * Copyright (C) 2001 Russell King 6 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 7 * Jun Nakajima <jun.nakajima@intel.com> 8 */ 9 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/cpu.h> 13 #include <linux/percpu-defs.h> 14 #include <linux/slab.h> 15 #include <linux/tick.h> 16 #include <linux/sched/cpufreq.h> 17 18 #include "cpufreq_ondemand.h" 19 20 /* On-demand governor macros */ 21 #define DEF_FREQUENCY_UP_THRESHOLD (80) 22 #define DEF_SAMPLING_DOWN_FACTOR (1) 23 #define MAX_SAMPLING_DOWN_FACTOR (100000) 24 #define MICRO_FREQUENCY_UP_THRESHOLD (95) 25 #define MIN_FREQUENCY_UP_THRESHOLD (1) 26 #define MAX_FREQUENCY_UP_THRESHOLD (100) 27 28 static struct od_ops od_ops; 29 30 static unsigned int default_powersave_bias; 31 32 /* 33 * Find right freq to be set now with powersave_bias on. 34 * Returns the freq_hi to be used right now and will set freq_hi_delay_us, 35 * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. 36 */ 37 static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, 38 unsigned int freq_next, unsigned int relation) 39 { 40 unsigned int freq_req, freq_reduc, freq_avg; 41 unsigned int freq_hi, freq_lo; 42 unsigned int index; 43 unsigned int delay_hi_us; 44 struct policy_dbs_info *policy_dbs = policy->governor_data; 45 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 46 struct dbs_data *dbs_data = policy_dbs->dbs_data; 47 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 48 struct cpufreq_frequency_table *freq_table = policy->freq_table; 49 50 if (!freq_table) { 51 dbs_info->freq_lo = 0; 52 dbs_info->freq_lo_delay_us = 0; 53 return freq_next; 54 } 55 56 index = cpufreq_frequency_table_target(policy, freq_next, policy->min, 57 policy->max, relation); 58 freq_req = freq_table[index].frequency; 59 freq_reduc = freq_req * od_tuners->powersave_bias / 1000; 60 freq_avg = freq_req - freq_reduc; 61 62 /* Find freq bounds for freq_avg in freq_table */ 63 index = cpufreq_table_find_index_h(policy, freq_avg, 64 relation & CPUFREQ_RELATION_E); 65 freq_lo = freq_table[index].frequency; 66 index = cpufreq_table_find_index_l(policy, freq_avg, 67 relation & CPUFREQ_RELATION_E); 68 freq_hi = freq_table[index].frequency; 69 70 /* Find out how long we have to be in hi and lo freqs */ 71 if (freq_hi == freq_lo) { 72 dbs_info->freq_lo = 0; 73 dbs_info->freq_lo_delay_us = 0; 74 return freq_lo; 75 } 76 delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; 77 delay_hi_us += (freq_hi - freq_lo) / 2; 78 delay_hi_us /= freq_hi - freq_lo; 79 dbs_info->freq_hi_delay_us = delay_hi_us; 80 dbs_info->freq_lo = freq_lo; 81 dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; 82 return freq_hi; 83 } 84 85 static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) 86 { 87 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); 88 89 dbs_info->freq_lo = 0; 90 } 91 92 static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) 93 { 94 struct policy_dbs_info *policy_dbs = policy->governor_data; 95 struct dbs_data *dbs_data = policy_dbs->dbs_data; 96 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 97 98 if (od_tuners->powersave_bias) 99 freq = od_ops.powersave_bias_target(policy, freq, 100 CPUFREQ_RELATION_HE); 101 else if (policy->cur == policy->max) 102 return; 103 104 __cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ? 105 CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE); 106 } 107 108 /* 109 * Every sampling_rate, we check, if current idle time is less than 20% 110 * (default), then we try to increase frequency. Else, we adjust the frequency 111 * proportional to load. 112 */ 113 static void od_update(struct cpufreq_policy *policy) 114 { 115 struct policy_dbs_info *policy_dbs = policy->governor_data; 116 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 117 struct dbs_data *dbs_data = policy_dbs->dbs_data; 118 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 119 unsigned int load = dbs_update(policy); 120 121 dbs_info->freq_lo = 0; 122 123 /* Check for frequency increase */ 124 if (load > dbs_data->up_threshold) { 125 /* If switching to max speed, apply sampling_down_factor */ 126 if (policy->cur < policy->max) 127 policy_dbs->rate_mult = dbs_data->sampling_down_factor; 128 dbs_freq_increase(policy, policy->max); 129 } else { 130 /* Calculate the next frequency proportional to load */ 131 unsigned int freq_next, min_f, max_f; 132 133 min_f = policy->cpuinfo.min_freq; 134 max_f = policy->cpuinfo.max_freq; 135 freq_next = min_f + load * (max_f - min_f) / 100; 136 137 /* No longer fully busy, reset rate_mult */ 138 policy_dbs->rate_mult = 1; 139 140 if (od_tuners->powersave_bias) 141 freq_next = od_ops.powersave_bias_target(policy, 142 freq_next, 143 CPUFREQ_RELATION_LE); 144 145 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_CE); 146 } 147 } 148 149 static unsigned int od_dbs_update(struct cpufreq_policy *policy) 150 { 151 struct policy_dbs_info *policy_dbs = policy->governor_data; 152 struct dbs_data *dbs_data = policy_dbs->dbs_data; 153 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 154 int sample_type = dbs_info->sample_type; 155 156 /* Common NORMAL_SAMPLE setup */ 157 dbs_info->sample_type = OD_NORMAL_SAMPLE; 158 /* 159 * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore 160 * it then. 161 */ 162 if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { 163 __cpufreq_driver_target(policy, dbs_info->freq_lo, 164 CPUFREQ_RELATION_HE); 165 return dbs_info->freq_lo_delay_us; 166 } 167 168 od_update(policy); 169 170 if (dbs_info->freq_lo) { 171 /* Setup SUB_SAMPLE */ 172 dbs_info->sample_type = OD_SUB_SAMPLE; 173 return dbs_info->freq_hi_delay_us; 174 } 175 176 return dbs_data->sampling_rate * policy_dbs->rate_mult; 177 } 178 179 /************************** sysfs interface ************************/ 180 static struct dbs_governor od_dbs_gov; 181 182 static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf, 183 size_t count) 184 { 185 struct dbs_data *dbs_data = to_dbs_data(attr_set); 186 unsigned int input; 187 int ret; 188 189 ret = sscanf(buf, "%u", &input); 190 if (ret != 1) 191 return -EINVAL; 192 dbs_data->io_is_busy = !!input; 193 194 /* we need to re-evaluate prev_cpu_idle */ 195 gov_update_cpu_data(dbs_data); 196 197 return count; 198 } 199 200 static ssize_t up_threshold_store(struct gov_attr_set *attr_set, 201 const char *buf, size_t count) 202 { 203 struct dbs_data *dbs_data = to_dbs_data(attr_set); 204 unsigned int input; 205 int ret; 206 ret = sscanf(buf, "%u", &input); 207 208 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 209 input < MIN_FREQUENCY_UP_THRESHOLD) { 210 return -EINVAL; 211 } 212 213 dbs_data->up_threshold = input; 214 return count; 215 } 216 217 static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, 218 const char *buf, size_t count) 219 { 220 struct dbs_data *dbs_data = to_dbs_data(attr_set); 221 struct policy_dbs_info *policy_dbs; 222 unsigned int input; 223 int ret; 224 ret = sscanf(buf, "%u", &input); 225 226 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 227 return -EINVAL; 228 229 dbs_data->sampling_down_factor = input; 230 231 /* Reset down sampling multiplier in case it was active */ 232 list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { 233 /* 234 * Doing this without locking might lead to using different 235 * rate_mult values in od_update() and od_dbs_update(). 236 */ 237 mutex_lock(&policy_dbs->update_mutex); 238 policy_dbs->rate_mult = 1; 239 mutex_unlock(&policy_dbs->update_mutex); 240 } 241 242 return count; 243 } 244 245 static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, 246 const char *buf, size_t count) 247 { 248 struct dbs_data *dbs_data = to_dbs_data(attr_set); 249 unsigned int input; 250 int ret; 251 252 ret = sscanf(buf, "%u", &input); 253 if (ret != 1) 254 return -EINVAL; 255 256 if (input > 1) 257 input = 1; 258 259 if (input == dbs_data->ignore_nice_load) { /* nothing to do */ 260 return count; 261 } 262 dbs_data->ignore_nice_load = input; 263 264 /* we need to re-evaluate prev_cpu_idle */ 265 gov_update_cpu_data(dbs_data); 266 267 return count; 268 } 269 270 static ssize_t powersave_bias_store(struct gov_attr_set *attr_set, 271 const char *buf, size_t count) 272 { 273 struct dbs_data *dbs_data = to_dbs_data(attr_set); 274 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 275 struct policy_dbs_info *policy_dbs; 276 unsigned int input; 277 int ret; 278 ret = sscanf(buf, "%u", &input); 279 280 if (ret != 1) 281 return -EINVAL; 282 283 if (input > 1000) 284 input = 1000; 285 286 od_tuners->powersave_bias = input; 287 288 list_for_each_entry(policy_dbs, &attr_set->policy_list, list) 289 ondemand_powersave_bias_init(policy_dbs->policy); 290 291 return count; 292 } 293 294 gov_show_one_common(sampling_rate); 295 gov_show_one_common(up_threshold); 296 gov_show_one_common(sampling_down_factor); 297 gov_show_one_common(ignore_nice_load); 298 gov_show_one_common(io_is_busy); 299 gov_show_one(od, powersave_bias); 300 301 gov_attr_rw(sampling_rate); 302 gov_attr_rw(io_is_busy); 303 gov_attr_rw(up_threshold); 304 gov_attr_rw(sampling_down_factor); 305 gov_attr_rw(ignore_nice_load); 306 gov_attr_rw(powersave_bias); 307 308 static struct attribute *od_attrs[] = { 309 &sampling_rate.attr, 310 &up_threshold.attr, 311 &sampling_down_factor.attr, 312 &ignore_nice_load.attr, 313 &powersave_bias.attr, 314 &io_is_busy.attr, 315 NULL 316 }; 317 ATTRIBUTE_GROUPS(od); 318 319 /************************** sysfs end ************************/ 320 321 static struct policy_dbs_info *od_alloc(void) 322 { 323 struct od_policy_dbs_info *dbs_info; 324 325 dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); 326 return dbs_info ? &dbs_info->policy_dbs : NULL; 327 } 328 329 static void od_free(struct policy_dbs_info *policy_dbs) 330 { 331 kfree(to_dbs_info(policy_dbs)); 332 } 333 334 static int od_init(struct dbs_data *dbs_data) 335 { 336 struct od_dbs_tuners *tuners; 337 u64 idle_time; 338 int cpu; 339 340 tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); 341 if (!tuners) 342 return -ENOMEM; 343 344 cpu = get_cpu(); 345 idle_time = get_cpu_idle_time_us(cpu, NULL); 346 put_cpu(); 347 if (idle_time != -1ULL) { 348 /* Idle micro accounting is supported. Use finer thresholds */ 349 dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 350 } else { 351 dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; 352 } 353 354 dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; 355 dbs_data->ignore_nice_load = 0; 356 tuners->powersave_bias = default_powersave_bias; 357 dbs_data->io_is_busy = od_should_io_be_busy(); 358 359 dbs_data->tuners = tuners; 360 return 0; 361 } 362 363 static void od_exit(struct dbs_data *dbs_data) 364 { 365 kfree(dbs_data->tuners); 366 } 367 368 static void od_start(struct cpufreq_policy *policy) 369 { 370 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); 371 372 dbs_info->sample_type = OD_NORMAL_SAMPLE; 373 ondemand_powersave_bias_init(policy); 374 } 375 376 static struct od_ops od_ops = { 377 .powersave_bias_target = generic_powersave_bias_target, 378 }; 379 380 static struct dbs_governor od_dbs_gov = { 381 .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"), 382 .kobj_type = { .default_groups = od_groups }, 383 .gov_dbs_update = od_dbs_update, 384 .alloc = od_alloc, 385 .free = od_free, 386 .init = od_init, 387 .exit = od_exit, 388 .start = od_start, 389 }; 390 391 #define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) 392 393 static void od_set_powersave_bias(unsigned int powersave_bias) 394 { 395 unsigned int cpu; 396 cpumask_var_t done; 397 398 if (!alloc_cpumask_var(&done, GFP_KERNEL)) 399 return; 400 401 default_powersave_bias = powersave_bias; 402 cpumask_clear(done); 403 404 cpus_read_lock(); 405 for_each_online_cpu(cpu) { 406 struct cpufreq_policy *policy; 407 struct policy_dbs_info *policy_dbs; 408 struct dbs_data *dbs_data; 409 struct od_dbs_tuners *od_tuners; 410 411 if (cpumask_test_cpu(cpu, done)) 412 continue; 413 414 policy = cpufreq_cpu_get_raw(cpu); 415 if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) 416 continue; 417 418 policy_dbs = policy->governor_data; 419 if (!policy_dbs) 420 continue; 421 422 cpumask_or(done, done, policy->cpus); 423 424 dbs_data = policy_dbs->dbs_data; 425 od_tuners = dbs_data->tuners; 426 od_tuners->powersave_bias = default_powersave_bias; 427 } 428 cpus_read_unlock(); 429 430 free_cpumask_var(done); 431 } 432 433 void od_register_powersave_bias_handler(unsigned int (*f) 434 (struct cpufreq_policy *, unsigned int, unsigned int), 435 unsigned int powersave_bias) 436 { 437 od_ops.powersave_bias_target = f; 438 od_set_powersave_bias(powersave_bias); 439 } 440 EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler); 441 442 void od_unregister_powersave_bias_handler(void) 443 { 444 od_ops.powersave_bias_target = generic_powersave_bias_target; 445 od_set_powersave_bias(0); 446 } 447 EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); 448 449 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 450 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 451 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 452 "Low Latency Frequency Transition capable processors"); 453 MODULE_LICENSE("GPL"); 454 455 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 456 struct cpufreq_governor *cpufreq_default_governor(void) 457 { 458 return &CPU_FREQ_GOV_ONDEMAND; 459 } 460 #endif 461 462 cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); 463 cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); 464