1 /* 2 * CPUFreq governor based on scheduler-provided CPU utilization data. 3 * 4 * Copyright (C) 2016, Intel Corporation 5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/cpufreq.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <trace/events/power.h> 16 17 #include "sched.h" 18 19 struct sugov_tunables { 20 struct gov_attr_set attr_set; 21 unsigned int rate_limit_us; 22 }; 23 24 struct sugov_policy { 25 struct cpufreq_policy *policy; 26 27 struct sugov_tunables *tunables; 28 struct list_head tunables_hook; 29 30 raw_spinlock_t update_lock; /* For shared policies */ 31 u64 last_freq_update_time; 32 s64 freq_update_delay_ns; 33 unsigned int next_freq; 34 35 /* The next fields are only needed if fast switch cannot be used. */ 36 struct irq_work irq_work; 37 struct work_struct work; 38 struct mutex work_lock; 39 bool work_in_progress; 40 41 bool need_freq_update; 42 }; 43 44 struct sugov_cpu { 45 struct update_util_data update_util; 46 struct sugov_policy *sg_policy; 47 48 /* The fields below are only needed when sharing a policy. */ 49 unsigned long util; 50 unsigned long max; 51 u64 last_update; 52 }; 53 54 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 55 56 /************************ Governor internals ***********************/ 57 58 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 59 { 60 s64 delta_ns; 61 62 if (sg_policy->work_in_progress) 63 return false; 64 65 if (unlikely(sg_policy->need_freq_update)) { 66 sg_policy->need_freq_update = false; 67 /* 68 * This happens when limits change, so forget the previous 69 * next_freq value and force an update. 70 */ 71 sg_policy->next_freq = UINT_MAX; 72 return true; 73 } 74 75 delta_ns = time - sg_policy->last_freq_update_time; 76 return delta_ns >= sg_policy->freq_update_delay_ns; 77 } 78 79 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, 80 unsigned int next_freq) 81 { 82 struct cpufreq_policy *policy = sg_policy->policy; 83 84 sg_policy->last_freq_update_time = time; 85 86 if (policy->fast_switch_enabled) { 87 if (sg_policy->next_freq == next_freq) { 88 trace_cpu_frequency(policy->cur, smp_processor_id()); 89 return; 90 } 91 sg_policy->next_freq = next_freq; 92 next_freq = cpufreq_driver_fast_switch(policy, next_freq); 93 if (next_freq == CPUFREQ_ENTRY_INVALID) 94 return; 95 96 policy->cur = next_freq; 97 trace_cpu_frequency(next_freq, smp_processor_id()); 98 } else if (sg_policy->next_freq != next_freq) { 99 sg_policy->next_freq = next_freq; 100 sg_policy->work_in_progress = true; 101 irq_work_queue(&sg_policy->irq_work); 102 } 103 } 104 105 /** 106 * get_next_freq - Compute a new frequency for a given cpufreq policy. 107 * @policy: cpufreq policy object to compute the new frequency for. 108 * @util: Current CPU utilization. 109 * @max: CPU capacity. 110 * 111 * If the utilization is frequency-invariant, choose the new frequency to be 112 * proportional to it, that is 113 * 114 * next_freq = C * max_freq * util / max 115 * 116 * Otherwise, approximate the would-be frequency-invariant utilization by 117 * util_raw * (curr_freq / max_freq) which leads to 118 * 119 * next_freq = C * curr_freq * util_raw / max 120 * 121 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. 122 */ 123 static unsigned int get_next_freq(struct cpufreq_policy *policy, 124 unsigned long util, unsigned long max) 125 { 126 unsigned int freq = arch_scale_freq_invariant() ? 127 policy->cpuinfo.max_freq : policy->cur; 128 129 return (freq + (freq >> 2)) * util / max; 130 } 131 132 static void sugov_update_single(struct update_util_data *hook, u64 time, 133 unsigned long util, unsigned long max) 134 { 135 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 136 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 137 struct cpufreq_policy *policy = sg_policy->policy; 138 unsigned int next_f; 139 140 if (!sugov_should_update_freq(sg_policy, time)) 141 return; 142 143 next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : 144 get_next_freq(policy, util, max); 145 sugov_update_commit(sg_policy, time, next_f); 146 } 147 148 static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, 149 unsigned long util, unsigned long max) 150 { 151 struct cpufreq_policy *policy = sg_policy->policy; 152 unsigned int max_f = policy->cpuinfo.max_freq; 153 u64 last_freq_update_time = sg_policy->last_freq_update_time; 154 unsigned int j; 155 156 if (util == ULONG_MAX) 157 return max_f; 158 159 for_each_cpu(j, policy->cpus) { 160 struct sugov_cpu *j_sg_cpu; 161 unsigned long j_util, j_max; 162 s64 delta_ns; 163 164 if (j == smp_processor_id()) 165 continue; 166 167 j_sg_cpu = &per_cpu(sugov_cpu, j); 168 /* 169 * If the CPU utilization was last updated before the previous 170 * frequency update and the time elapsed between the last update 171 * of the CPU utilization and the last frequency update is long 172 * enough, don't take the CPU into account as it probably is 173 * idle now. 174 */ 175 delta_ns = last_freq_update_time - j_sg_cpu->last_update; 176 if (delta_ns > TICK_NSEC) 177 continue; 178 179 j_util = j_sg_cpu->util; 180 if (j_util == ULONG_MAX) 181 return max_f; 182 183 j_max = j_sg_cpu->max; 184 if (j_util * max > j_max * util) { 185 util = j_util; 186 max = j_max; 187 } 188 } 189 190 return get_next_freq(policy, util, max); 191 } 192 193 static void sugov_update_shared(struct update_util_data *hook, u64 time, 194 unsigned long util, unsigned long max) 195 { 196 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 197 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 198 unsigned int next_f; 199 200 raw_spin_lock(&sg_policy->update_lock); 201 202 sg_cpu->util = util; 203 sg_cpu->max = max; 204 sg_cpu->last_update = time; 205 206 if (sugov_should_update_freq(sg_policy, time)) { 207 next_f = sugov_next_freq_shared(sg_policy, util, max); 208 sugov_update_commit(sg_policy, time, next_f); 209 } 210 211 raw_spin_unlock(&sg_policy->update_lock); 212 } 213 214 static void sugov_work(struct work_struct *work) 215 { 216 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 217 218 mutex_lock(&sg_policy->work_lock); 219 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, 220 CPUFREQ_RELATION_L); 221 mutex_unlock(&sg_policy->work_lock); 222 223 sg_policy->work_in_progress = false; 224 } 225 226 static void sugov_irq_work(struct irq_work *irq_work) 227 { 228 struct sugov_policy *sg_policy; 229 230 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 231 schedule_work_on(smp_processor_id(), &sg_policy->work); 232 } 233 234 /************************** sysfs interface ************************/ 235 236 static struct sugov_tunables *global_tunables; 237 static DEFINE_MUTEX(global_tunables_lock); 238 239 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) 240 { 241 return container_of(attr_set, struct sugov_tunables, attr_set); 242 } 243 244 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) 245 { 246 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 247 248 return sprintf(buf, "%u\n", tunables->rate_limit_us); 249 } 250 251 static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, 252 size_t count) 253 { 254 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 255 struct sugov_policy *sg_policy; 256 unsigned int rate_limit_us; 257 258 if (kstrtouint(buf, 10, &rate_limit_us)) 259 return -EINVAL; 260 261 tunables->rate_limit_us = rate_limit_us; 262 263 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) 264 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; 265 266 return count; 267 } 268 269 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); 270 271 static struct attribute *sugov_attributes[] = { 272 &rate_limit_us.attr, 273 NULL 274 }; 275 276 static struct kobj_type sugov_tunables_ktype = { 277 .default_attrs = sugov_attributes, 278 .sysfs_ops = &governor_sysfs_ops, 279 }; 280 281 /********************** cpufreq governor interface *********************/ 282 283 static struct cpufreq_governor schedutil_gov; 284 285 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 286 { 287 struct sugov_policy *sg_policy; 288 289 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); 290 if (!sg_policy) 291 return NULL; 292 293 sg_policy->policy = policy; 294 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 295 INIT_WORK(&sg_policy->work, sugov_work); 296 mutex_init(&sg_policy->work_lock); 297 raw_spin_lock_init(&sg_policy->update_lock); 298 return sg_policy; 299 } 300 301 static void sugov_policy_free(struct sugov_policy *sg_policy) 302 { 303 mutex_destroy(&sg_policy->work_lock); 304 kfree(sg_policy); 305 } 306 307 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) 308 { 309 struct sugov_tunables *tunables; 310 311 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); 312 if (tunables) { 313 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); 314 if (!have_governor_per_policy()) 315 global_tunables = tunables; 316 } 317 return tunables; 318 } 319 320 static void sugov_tunables_free(struct sugov_tunables *tunables) 321 { 322 if (!have_governor_per_policy()) 323 global_tunables = NULL; 324 325 kfree(tunables); 326 } 327 328 static int sugov_init(struct cpufreq_policy *policy) 329 { 330 struct sugov_policy *sg_policy; 331 struct sugov_tunables *tunables; 332 unsigned int lat; 333 int ret = 0; 334 335 /* State should be equivalent to EXIT */ 336 if (policy->governor_data) 337 return -EBUSY; 338 339 sg_policy = sugov_policy_alloc(policy); 340 if (!sg_policy) 341 return -ENOMEM; 342 343 mutex_lock(&global_tunables_lock); 344 345 if (global_tunables) { 346 if (WARN_ON(have_governor_per_policy())) { 347 ret = -EINVAL; 348 goto free_sg_policy; 349 } 350 policy->governor_data = sg_policy; 351 sg_policy->tunables = global_tunables; 352 353 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); 354 goto out; 355 } 356 357 tunables = sugov_tunables_alloc(sg_policy); 358 if (!tunables) { 359 ret = -ENOMEM; 360 goto free_sg_policy; 361 } 362 363 tunables->rate_limit_us = LATENCY_MULTIPLIER; 364 lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; 365 if (lat) 366 tunables->rate_limit_us *= lat; 367 368 policy->governor_data = sg_policy; 369 sg_policy->tunables = tunables; 370 371 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, 372 get_governor_parent_kobj(policy), "%s", 373 schedutil_gov.name); 374 if (ret) 375 goto fail; 376 377 out: 378 mutex_unlock(&global_tunables_lock); 379 380 cpufreq_enable_fast_switch(policy); 381 return 0; 382 383 fail: 384 policy->governor_data = NULL; 385 sugov_tunables_free(tunables); 386 387 free_sg_policy: 388 mutex_unlock(&global_tunables_lock); 389 390 sugov_policy_free(sg_policy); 391 pr_err("cpufreq: schedutil governor initialization failed (error %d)\n", ret); 392 return ret; 393 } 394 395 static int sugov_exit(struct cpufreq_policy *policy) 396 { 397 struct sugov_policy *sg_policy = policy->governor_data; 398 struct sugov_tunables *tunables = sg_policy->tunables; 399 unsigned int count; 400 401 cpufreq_disable_fast_switch(policy); 402 403 mutex_lock(&global_tunables_lock); 404 405 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); 406 policy->governor_data = NULL; 407 if (!count) 408 sugov_tunables_free(tunables); 409 410 mutex_unlock(&global_tunables_lock); 411 412 sugov_policy_free(sg_policy); 413 return 0; 414 } 415 416 static int sugov_start(struct cpufreq_policy *policy) 417 { 418 struct sugov_policy *sg_policy = policy->governor_data; 419 unsigned int cpu; 420 421 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; 422 sg_policy->last_freq_update_time = 0; 423 sg_policy->next_freq = UINT_MAX; 424 sg_policy->work_in_progress = false; 425 sg_policy->need_freq_update = false; 426 427 for_each_cpu(cpu, policy->cpus) { 428 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 429 430 sg_cpu->sg_policy = sg_policy; 431 if (policy_is_shared(policy)) { 432 sg_cpu->util = ULONG_MAX; 433 sg_cpu->max = 0; 434 sg_cpu->last_update = 0; 435 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 436 sugov_update_shared); 437 } else { 438 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 439 sugov_update_single); 440 } 441 } 442 return 0; 443 } 444 445 static int sugov_stop(struct cpufreq_policy *policy) 446 { 447 struct sugov_policy *sg_policy = policy->governor_data; 448 unsigned int cpu; 449 450 for_each_cpu(cpu, policy->cpus) 451 cpufreq_remove_update_util_hook(cpu); 452 453 synchronize_sched(); 454 455 irq_work_sync(&sg_policy->irq_work); 456 cancel_work_sync(&sg_policy->work); 457 return 0; 458 } 459 460 static int sugov_limits(struct cpufreq_policy *policy) 461 { 462 struct sugov_policy *sg_policy = policy->governor_data; 463 464 if (!policy->fast_switch_enabled) { 465 mutex_lock(&sg_policy->work_lock); 466 467 if (policy->max < policy->cur) 468 __cpufreq_driver_target(policy, policy->max, 469 CPUFREQ_RELATION_H); 470 else if (policy->min > policy->cur) 471 __cpufreq_driver_target(policy, policy->min, 472 CPUFREQ_RELATION_L); 473 474 mutex_unlock(&sg_policy->work_lock); 475 } 476 477 sg_policy->need_freq_update = true; 478 return 0; 479 } 480 481 int sugov_governor(struct cpufreq_policy *policy, unsigned int event) 482 { 483 if (event == CPUFREQ_GOV_POLICY_INIT) { 484 return sugov_init(policy); 485 } else if (policy->governor_data) { 486 switch (event) { 487 case CPUFREQ_GOV_POLICY_EXIT: 488 return sugov_exit(policy); 489 case CPUFREQ_GOV_START: 490 return sugov_start(policy); 491 case CPUFREQ_GOV_STOP: 492 return sugov_stop(policy); 493 case CPUFREQ_GOV_LIMITS: 494 return sugov_limits(policy); 495 } 496 } 497 return -EINVAL; 498 } 499 500 static struct cpufreq_governor schedutil_gov = { 501 .name = "schedutil", 502 .governor = sugov_governor, 503 .owner = THIS_MODULE, 504 }; 505 506 static int __init sugov_module_init(void) 507 { 508 return cpufreq_register_governor(&schedutil_gov); 509 } 510 511 static void __exit sugov_module_exit(void) 512 { 513 cpufreq_unregister_governor(&schedutil_gov); 514 } 515 516 MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); 517 MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); 518 MODULE_LICENSE("GPL"); 519 520 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 521 struct cpufreq_governor *cpufreq_default_governor(void) 522 { 523 return &schedutil_gov; 524 } 525 526 fs_initcall(sugov_module_init); 527 #else 528 module_init(sugov_module_init); 529 #endif 530 module_exit(sugov_module_exit); 531