1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <asm/cputime.h> 20 #include <linux/cpufreq.h> 21 #include <linux/cpumask.h> 22 #include <linux/export.h> 23 #include <linux/kernel_stat.h> 24 #include <linux/mutex.h> 25 #include <linux/slab.h> 26 #include <linux/types.h> 27 #include <linux/workqueue.h> 28 #include <linux/cpu.h> 29 30 #include "cpufreq_governor.h" 31 32 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 33 { 34 if (have_governor_per_policy()) 35 return dbs_data->cdata->attr_group_gov_pol; 36 else 37 return dbs_data->cdata->attr_group_gov_sys; 38 } 39 40 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 41 { 42 struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 43 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 44 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 45 struct cpufreq_policy *policy; 46 unsigned int max_load = 0; 47 unsigned int ignore_nice; 48 unsigned int j; 49 50 if (dbs_data->cdata->governor == GOV_ONDEMAND) 51 ignore_nice = od_tuners->ignore_nice; 52 else 53 ignore_nice = cs_tuners->ignore_nice; 54 55 policy = cdbs->cur_policy; 56 57 /* Get Absolute Load (in terms of freq for ondemand gov) */ 58 for_each_cpu(j, policy->cpus) { 59 struct cpu_dbs_common_info *j_cdbs; 60 u64 cur_wall_time, cur_idle_time; 61 unsigned int idle_time, wall_time; 62 unsigned int load; 63 int io_busy = 0; 64 65 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 66 67 /* 68 * For the purpose of ondemand, waiting for disk IO is 69 * an indication that you're performance critical, and 70 * not that the system is actually idle. So do not add 71 * the iowait time to the cpu idle time. 72 */ 73 if (dbs_data->cdata->governor == GOV_ONDEMAND) 74 io_busy = od_tuners->io_is_busy; 75 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 76 77 wall_time = (unsigned int) 78 (cur_wall_time - j_cdbs->prev_cpu_wall); 79 j_cdbs->prev_cpu_wall = cur_wall_time; 80 81 idle_time = (unsigned int) 82 (cur_idle_time - j_cdbs->prev_cpu_idle); 83 j_cdbs->prev_cpu_idle = cur_idle_time; 84 85 if (ignore_nice) { 86 u64 cur_nice; 87 unsigned long cur_nice_jiffies; 88 89 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 90 cdbs->prev_cpu_nice; 91 /* 92 * Assumption: nice time between sampling periods will 93 * be less than 2^32 jiffies for 32 bit sys 94 */ 95 cur_nice_jiffies = (unsigned long) 96 cputime64_to_jiffies64(cur_nice); 97 98 cdbs->prev_cpu_nice = 99 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 100 idle_time += jiffies_to_usecs(cur_nice_jiffies); 101 } 102 103 if (unlikely(!wall_time || wall_time < idle_time)) 104 continue; 105 106 load = 100 * (wall_time - idle_time) / wall_time; 107 108 if (dbs_data->cdata->governor == GOV_ONDEMAND) { 109 int freq_avg = __cpufreq_driver_getavg(policy, j); 110 if (freq_avg <= 0) 111 freq_avg = policy->cur; 112 113 load *= freq_avg; 114 } 115 116 if (load > max_load) 117 max_load = load; 118 } 119 120 dbs_data->cdata->gov_check_cpu(cpu, max_load); 121 } 122 EXPORT_SYMBOL_GPL(dbs_check_cpu); 123 124 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, 125 unsigned int delay) 126 { 127 struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 128 129 mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); 130 } 131 132 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, 133 unsigned int delay, bool all_cpus) 134 { 135 int i; 136 137 if (!all_cpus) { 138 __gov_queue_work(smp_processor_id(), dbs_data, delay); 139 } else { 140 get_online_cpus(); 141 for_each_cpu(i, policy->cpus) 142 __gov_queue_work(i, dbs_data, delay); 143 put_online_cpus(); 144 } 145 } 146 EXPORT_SYMBOL_GPL(gov_queue_work); 147 148 static inline void gov_cancel_work(struct dbs_data *dbs_data, 149 struct cpufreq_policy *policy) 150 { 151 struct cpu_dbs_common_info *cdbs; 152 int i; 153 154 for_each_cpu(i, policy->cpus) { 155 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 156 cancel_delayed_work_sync(&cdbs->work); 157 } 158 } 159 160 /* Will return if we need to evaluate cpu load again or not */ 161 bool need_load_eval(struct cpu_dbs_common_info *cdbs, 162 unsigned int sampling_rate) 163 { 164 if (policy_is_shared(cdbs->cur_policy)) { 165 ktime_t time_now = ktime_get(); 166 s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); 167 168 /* Do nothing if we recently have sampled */ 169 if (delta_us < (s64)(sampling_rate / 2)) 170 return false; 171 else 172 cdbs->time_stamp = time_now; 173 } 174 175 return true; 176 } 177 EXPORT_SYMBOL_GPL(need_load_eval); 178 179 static void set_sampling_rate(struct dbs_data *dbs_data, 180 unsigned int sampling_rate) 181 { 182 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 183 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 184 cs_tuners->sampling_rate = sampling_rate; 185 } else { 186 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 187 od_tuners->sampling_rate = sampling_rate; 188 } 189 } 190 191 int cpufreq_governor_dbs(struct cpufreq_policy *policy, 192 struct common_dbs_data *cdata, unsigned int event) 193 { 194 struct dbs_data *dbs_data; 195 struct od_cpu_dbs_info_s *od_dbs_info = NULL; 196 struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; 197 struct od_ops *od_ops = NULL; 198 struct od_dbs_tuners *od_tuners = NULL; 199 struct cs_dbs_tuners *cs_tuners = NULL; 200 struct cpu_dbs_common_info *cpu_cdbs; 201 unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; 202 int io_busy = 0; 203 int rc; 204 205 if (have_governor_per_policy()) 206 dbs_data = policy->governor_data; 207 else 208 dbs_data = cdata->gdbs_data; 209 210 WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)); 211 212 switch (event) { 213 case CPUFREQ_GOV_POLICY_INIT: 214 if (have_governor_per_policy()) { 215 WARN_ON(dbs_data); 216 } else if (dbs_data) { 217 dbs_data->usage_count++; 218 policy->governor_data = dbs_data; 219 return 0; 220 } 221 222 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 223 if (!dbs_data) { 224 pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); 225 return -ENOMEM; 226 } 227 228 dbs_data->cdata = cdata; 229 dbs_data->usage_count = 1; 230 rc = cdata->init(dbs_data); 231 if (rc) { 232 pr_err("%s: POLICY_INIT: init() failed\n", __func__); 233 kfree(dbs_data); 234 return rc; 235 } 236 237 if (!have_governor_per_policy()) 238 WARN_ON(cpufreq_get_global_kobject()); 239 240 rc = sysfs_create_group(get_governor_parent_kobj(policy), 241 get_sysfs_attr(dbs_data)); 242 if (rc) { 243 cdata->exit(dbs_data); 244 kfree(dbs_data); 245 return rc; 246 } 247 248 policy->governor_data = dbs_data; 249 250 /* policy latency is in nS. Convert it to uS first */ 251 latency = policy->cpuinfo.transition_latency / 1000; 252 if (latency == 0) 253 latency = 1; 254 255 /* Bring kernel and HW constraints together */ 256 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 257 MIN_LATENCY_MULTIPLIER * latency); 258 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 259 latency * LATENCY_MULTIPLIER)); 260 261 if ((cdata->governor == GOV_CONSERVATIVE) && 262 (!policy->governor->initialized)) { 263 struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; 264 265 cpufreq_register_notifier(cs_ops->notifier_block, 266 CPUFREQ_TRANSITION_NOTIFIER); 267 } 268 269 if (!have_governor_per_policy()) 270 cdata->gdbs_data = dbs_data; 271 272 return 0; 273 case CPUFREQ_GOV_POLICY_EXIT: 274 if (!--dbs_data->usage_count) { 275 sysfs_remove_group(get_governor_parent_kobj(policy), 276 get_sysfs_attr(dbs_data)); 277 278 if (!have_governor_per_policy()) 279 cpufreq_put_global_kobject(); 280 281 if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) && 282 (policy->governor->initialized == 1)) { 283 struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; 284 285 cpufreq_unregister_notifier(cs_ops->notifier_block, 286 CPUFREQ_TRANSITION_NOTIFIER); 287 } 288 289 cdata->exit(dbs_data); 290 kfree(dbs_data); 291 cdata->gdbs_data = NULL; 292 } 293 294 policy->governor_data = NULL; 295 return 0; 296 } 297 298 cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 299 300 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 301 cs_tuners = dbs_data->tuners; 302 cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); 303 sampling_rate = cs_tuners->sampling_rate; 304 ignore_nice = cs_tuners->ignore_nice; 305 } else { 306 od_tuners = dbs_data->tuners; 307 od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); 308 sampling_rate = od_tuners->sampling_rate; 309 ignore_nice = od_tuners->ignore_nice; 310 od_ops = dbs_data->cdata->gov_ops; 311 io_busy = od_tuners->io_is_busy; 312 } 313 314 switch (event) { 315 case CPUFREQ_GOV_START: 316 if (!policy->cur) 317 return -EINVAL; 318 319 mutex_lock(&dbs_data->mutex); 320 321 for_each_cpu(j, policy->cpus) { 322 struct cpu_dbs_common_info *j_cdbs = 323 dbs_data->cdata->get_cpu_cdbs(j); 324 325 j_cdbs->cpu = j; 326 j_cdbs->cur_policy = policy; 327 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, 328 &j_cdbs->prev_cpu_wall, io_busy); 329 if (ignore_nice) 330 j_cdbs->prev_cpu_nice = 331 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 332 333 mutex_init(&j_cdbs->timer_mutex); 334 INIT_DEFERRABLE_WORK(&j_cdbs->work, 335 dbs_data->cdata->gov_dbs_timer); 336 } 337 338 /* 339 * conservative does not implement micro like ondemand 340 * governor, thus we are bound to jiffes/HZ 341 */ 342 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 343 cs_dbs_info->down_skip = 0; 344 cs_dbs_info->enable = 1; 345 cs_dbs_info->requested_freq = policy->cur; 346 } else { 347 od_dbs_info->rate_mult = 1; 348 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 349 od_ops->powersave_bias_init_cpu(cpu); 350 } 351 352 mutex_unlock(&dbs_data->mutex); 353 354 /* Initiate timer time stamp */ 355 cpu_cdbs->time_stamp = ktime_get(); 356 357 gov_queue_work(dbs_data, policy, 358 delay_for_sampling_rate(sampling_rate), true); 359 break; 360 361 case CPUFREQ_GOV_STOP: 362 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) 363 cs_dbs_info->enable = 0; 364 365 gov_cancel_work(dbs_data, policy); 366 367 mutex_lock(&dbs_data->mutex); 368 mutex_destroy(&cpu_cdbs->timer_mutex); 369 cpu_cdbs->cur_policy = NULL; 370 371 mutex_unlock(&dbs_data->mutex); 372 373 break; 374 375 case CPUFREQ_GOV_LIMITS: 376 mutex_lock(&cpu_cdbs->timer_mutex); 377 if (policy->max < cpu_cdbs->cur_policy->cur) 378 __cpufreq_driver_target(cpu_cdbs->cur_policy, 379 policy->max, CPUFREQ_RELATION_H); 380 else if (policy->min > cpu_cdbs->cur_policy->cur) 381 __cpufreq_driver_target(cpu_cdbs->cur_policy, 382 policy->min, CPUFREQ_RELATION_L); 383 dbs_check_cpu(dbs_data, cpu); 384 mutex_unlock(&cpu_cdbs->timer_mutex); 385 break; 386 } 387 return 0; 388 } 389 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 390