1 /* 2 * CPUFreq governor based on scheduler-provided CPU utilization data. 3 * 4 * Copyright (C) 2016, Intel Corporation 5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/cpufreq.h> 15 #include <linux/module.h> 16 #include <linux/slab.h> 17 #include <trace/events/power.h> 18 19 #include "sched.h" 20 21 struct sugov_tunables { 22 struct gov_attr_set attr_set; 23 unsigned int rate_limit_us; 24 }; 25 26 struct sugov_policy { 27 struct cpufreq_policy *policy; 28 29 struct sugov_tunables *tunables; 30 struct list_head tunables_hook; 31 32 raw_spinlock_t update_lock; /* For shared policies */ 33 u64 last_freq_update_time; 34 s64 freq_update_delay_ns; 35 unsigned int next_freq; 36 37 /* The next fields are only needed if fast switch cannot be used. */ 38 struct irq_work irq_work; 39 struct work_struct work; 40 struct mutex work_lock; 41 bool work_in_progress; 42 43 bool need_freq_update; 44 }; 45 46 struct sugov_cpu { 47 struct update_util_data update_util; 48 struct sugov_policy *sg_policy; 49 50 /* The fields below are only needed when sharing a policy. */ 51 unsigned long util; 52 unsigned long max; 53 u64 last_update; 54 }; 55 56 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 57 58 /************************ Governor internals ***********************/ 59 60 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 61 { 62 s64 delta_ns; 63 64 if (sg_policy->work_in_progress) 65 return false; 66 67 if (unlikely(sg_policy->need_freq_update)) { 68 sg_policy->need_freq_update = false; 69 /* 70 * This happens when limits change, so forget the previous 71 * next_freq value and force an update. 72 */ 73 sg_policy->next_freq = UINT_MAX; 74 return true; 75 } 76 77 delta_ns = time - sg_policy->last_freq_update_time; 78 return delta_ns >= sg_policy->freq_update_delay_ns; 79 } 80 81 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, 82 unsigned int next_freq) 83 { 84 struct cpufreq_policy *policy = sg_policy->policy; 85 86 sg_policy->last_freq_update_time = time; 87 88 if (policy->fast_switch_enabled) { 89 if (sg_policy->next_freq == next_freq) { 90 trace_cpu_frequency(policy->cur, smp_processor_id()); 91 return; 92 } 93 sg_policy->next_freq = next_freq; 94 next_freq = cpufreq_driver_fast_switch(policy, next_freq); 95 if (next_freq == CPUFREQ_ENTRY_INVALID) 96 return; 97 98 policy->cur = next_freq; 99 trace_cpu_frequency(next_freq, smp_processor_id()); 100 } else if (sg_policy->next_freq != next_freq) { 101 sg_policy->next_freq = next_freq; 102 sg_policy->work_in_progress = true; 103 irq_work_queue(&sg_policy->irq_work); 104 } 105 } 106 107 /** 108 * get_next_freq - Compute a new frequency for a given cpufreq policy. 109 * @policy: cpufreq policy object to compute the new frequency for. 110 * @util: Current CPU utilization. 111 * @max: CPU capacity. 112 * 113 * If the utilization is frequency-invariant, choose the new frequency to be 114 * proportional to it, that is 115 * 116 * next_freq = C * max_freq * util / max 117 * 118 * Otherwise, approximate the would-be frequency-invariant utilization by 119 * util_raw * (curr_freq / max_freq) which leads to 120 * 121 * next_freq = C * curr_freq * util_raw / max 122 * 123 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. 124 */ 125 static unsigned int get_next_freq(struct cpufreq_policy *policy, 126 unsigned long util, unsigned long max) 127 { 128 unsigned int freq = arch_scale_freq_invariant() ? 129 policy->cpuinfo.max_freq : policy->cur; 130 131 return (freq + (freq >> 2)) * util / max; 132 } 133 134 static void sugov_update_single(struct update_util_data *hook, u64 time, 135 unsigned long util, unsigned long max) 136 { 137 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 138 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 139 struct cpufreq_policy *policy = sg_policy->policy; 140 unsigned int next_f; 141 142 if (!sugov_should_update_freq(sg_policy, time)) 143 return; 144 145 next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : 146 get_next_freq(policy, util, max); 147 sugov_update_commit(sg_policy, time, next_f); 148 } 149 150 static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, 151 unsigned long util, unsigned long max) 152 { 153 struct cpufreq_policy *policy = sg_policy->policy; 154 unsigned int max_f = policy->cpuinfo.max_freq; 155 u64 last_freq_update_time = sg_policy->last_freq_update_time; 156 unsigned int j; 157 158 if (util == ULONG_MAX) 159 return max_f; 160 161 for_each_cpu(j, policy->cpus) { 162 struct sugov_cpu *j_sg_cpu; 163 unsigned long j_util, j_max; 164 s64 delta_ns; 165 166 if (j == smp_processor_id()) 167 continue; 168 169 j_sg_cpu = &per_cpu(sugov_cpu, j); 170 /* 171 * If the CPU utilization was last updated before the previous 172 * frequency update and the time elapsed between the last update 173 * of the CPU utilization and the last frequency update is long 174 * enough, don't take the CPU into account as it probably is 175 * idle now. 176 */ 177 delta_ns = last_freq_update_time - j_sg_cpu->last_update; 178 if (delta_ns > TICK_NSEC) 179 continue; 180 181 j_util = j_sg_cpu->util; 182 if (j_util == ULONG_MAX) 183 return max_f; 184 185 j_max = j_sg_cpu->max; 186 if (j_util * max > j_max * util) { 187 util = j_util; 188 max = j_max; 189 } 190 } 191 192 return get_next_freq(policy, util, max); 193 } 194 195 static void sugov_update_shared(struct update_util_data *hook, u64 time, 196 unsigned long util, unsigned long max) 197 { 198 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 199 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 200 unsigned int next_f; 201 202 raw_spin_lock(&sg_policy->update_lock); 203 204 sg_cpu->util = util; 205 sg_cpu->max = max; 206 sg_cpu->last_update = time; 207 208 if (sugov_should_update_freq(sg_policy, time)) { 209 next_f = sugov_next_freq_shared(sg_policy, util, max); 210 sugov_update_commit(sg_policy, time, next_f); 211 } 212 213 raw_spin_unlock(&sg_policy->update_lock); 214 } 215 216 static void sugov_work(struct work_struct *work) 217 { 218 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 219 220 mutex_lock(&sg_policy->work_lock); 221 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, 222 CPUFREQ_RELATION_L); 223 mutex_unlock(&sg_policy->work_lock); 224 225 sg_policy->work_in_progress = false; 226 } 227 228 static void sugov_irq_work(struct irq_work *irq_work) 229 { 230 struct sugov_policy *sg_policy; 231 232 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 233 schedule_work_on(smp_processor_id(), &sg_policy->work); 234 } 235 236 /************************** sysfs interface ************************/ 237 238 static struct sugov_tunables *global_tunables; 239 static DEFINE_MUTEX(global_tunables_lock); 240 241 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) 242 { 243 return container_of(attr_set, struct sugov_tunables, attr_set); 244 } 245 246 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) 247 { 248 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 249 250 return sprintf(buf, "%u\n", tunables->rate_limit_us); 251 } 252 253 static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, 254 size_t count) 255 { 256 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 257 struct sugov_policy *sg_policy; 258 unsigned int rate_limit_us; 259 260 if (kstrtouint(buf, 10, &rate_limit_us)) 261 return -EINVAL; 262 263 tunables->rate_limit_us = rate_limit_us; 264 265 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) 266 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; 267 268 return count; 269 } 270 271 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); 272 273 static struct attribute *sugov_attributes[] = { 274 &rate_limit_us.attr, 275 NULL 276 }; 277 278 static struct kobj_type sugov_tunables_ktype = { 279 .default_attrs = sugov_attributes, 280 .sysfs_ops = &governor_sysfs_ops, 281 }; 282 283 /********************** cpufreq governor interface *********************/ 284 285 static struct cpufreq_governor schedutil_gov; 286 287 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 288 { 289 struct sugov_policy *sg_policy; 290 291 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); 292 if (!sg_policy) 293 return NULL; 294 295 sg_policy->policy = policy; 296 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 297 INIT_WORK(&sg_policy->work, sugov_work); 298 mutex_init(&sg_policy->work_lock); 299 raw_spin_lock_init(&sg_policy->update_lock); 300 return sg_policy; 301 } 302 303 static void sugov_policy_free(struct sugov_policy *sg_policy) 304 { 305 mutex_destroy(&sg_policy->work_lock); 306 kfree(sg_policy); 307 } 308 309 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) 310 { 311 struct sugov_tunables *tunables; 312 313 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); 314 if (tunables) { 315 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); 316 if (!have_governor_per_policy()) 317 global_tunables = tunables; 318 } 319 return tunables; 320 } 321 322 static void sugov_tunables_free(struct sugov_tunables *tunables) 323 { 324 if (!have_governor_per_policy()) 325 global_tunables = NULL; 326 327 kfree(tunables); 328 } 329 330 static int sugov_init(struct cpufreq_policy *policy) 331 { 332 struct sugov_policy *sg_policy; 333 struct sugov_tunables *tunables; 334 unsigned int lat; 335 int ret = 0; 336 337 /* State should be equivalent to EXIT */ 338 if (policy->governor_data) 339 return -EBUSY; 340 341 sg_policy = sugov_policy_alloc(policy); 342 if (!sg_policy) 343 return -ENOMEM; 344 345 mutex_lock(&global_tunables_lock); 346 347 if (global_tunables) { 348 if (WARN_ON(have_governor_per_policy())) { 349 ret = -EINVAL; 350 goto free_sg_policy; 351 } 352 policy->governor_data = sg_policy; 353 sg_policy->tunables = global_tunables; 354 355 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); 356 goto out; 357 } 358 359 tunables = sugov_tunables_alloc(sg_policy); 360 if (!tunables) { 361 ret = -ENOMEM; 362 goto free_sg_policy; 363 } 364 365 tunables->rate_limit_us = LATENCY_MULTIPLIER; 366 lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; 367 if (lat) 368 tunables->rate_limit_us *= lat; 369 370 policy->governor_data = sg_policy; 371 sg_policy->tunables = tunables; 372 373 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, 374 get_governor_parent_kobj(policy), "%s", 375 schedutil_gov.name); 376 if (ret) 377 goto fail; 378 379 out: 380 mutex_unlock(&global_tunables_lock); 381 382 cpufreq_enable_fast_switch(policy); 383 return 0; 384 385 fail: 386 policy->governor_data = NULL; 387 sugov_tunables_free(tunables); 388 389 free_sg_policy: 390 mutex_unlock(&global_tunables_lock); 391 392 sugov_policy_free(sg_policy); 393 pr_err("initialization failed (error %d)\n", ret); 394 return ret; 395 } 396 397 static int sugov_exit(struct cpufreq_policy *policy) 398 { 399 struct sugov_policy *sg_policy = policy->governor_data; 400 struct sugov_tunables *tunables = sg_policy->tunables; 401 unsigned int count; 402 403 cpufreq_disable_fast_switch(policy); 404 405 mutex_lock(&global_tunables_lock); 406 407 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); 408 policy->governor_data = NULL; 409 if (!count) 410 sugov_tunables_free(tunables); 411 412 mutex_unlock(&global_tunables_lock); 413 414 sugov_policy_free(sg_policy); 415 return 0; 416 } 417 418 static int sugov_start(struct cpufreq_policy *policy) 419 { 420 struct sugov_policy *sg_policy = policy->governor_data; 421 unsigned int cpu; 422 423 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; 424 sg_policy->last_freq_update_time = 0; 425 sg_policy->next_freq = UINT_MAX; 426 sg_policy->work_in_progress = false; 427 sg_policy->need_freq_update = false; 428 429 for_each_cpu(cpu, policy->cpus) { 430 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 431 432 sg_cpu->sg_policy = sg_policy; 433 if (policy_is_shared(policy)) { 434 sg_cpu->util = ULONG_MAX; 435 sg_cpu->max = 0; 436 sg_cpu->last_update = 0; 437 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 438 sugov_update_shared); 439 } else { 440 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 441 sugov_update_single); 442 } 443 } 444 return 0; 445 } 446 447 static int sugov_stop(struct cpufreq_policy *policy) 448 { 449 struct sugov_policy *sg_policy = policy->governor_data; 450 unsigned int cpu; 451 452 for_each_cpu(cpu, policy->cpus) 453 cpufreq_remove_update_util_hook(cpu); 454 455 synchronize_sched(); 456 457 irq_work_sync(&sg_policy->irq_work); 458 cancel_work_sync(&sg_policy->work); 459 return 0; 460 } 461 462 static int sugov_limits(struct cpufreq_policy *policy) 463 { 464 struct sugov_policy *sg_policy = policy->governor_data; 465 466 if (!policy->fast_switch_enabled) { 467 mutex_lock(&sg_policy->work_lock); 468 469 if (policy->max < policy->cur) 470 __cpufreq_driver_target(policy, policy->max, 471 CPUFREQ_RELATION_H); 472 else if (policy->min > policy->cur) 473 __cpufreq_driver_target(policy, policy->min, 474 CPUFREQ_RELATION_L); 475 476 mutex_unlock(&sg_policy->work_lock); 477 } 478 479 sg_policy->need_freq_update = true; 480 return 0; 481 } 482 483 int sugov_governor(struct cpufreq_policy *policy, unsigned int event) 484 { 485 if (event == CPUFREQ_GOV_POLICY_INIT) { 486 return sugov_init(policy); 487 } else if (policy->governor_data) { 488 switch (event) { 489 case CPUFREQ_GOV_POLICY_EXIT: 490 return sugov_exit(policy); 491 case CPUFREQ_GOV_START: 492 return sugov_start(policy); 493 case CPUFREQ_GOV_STOP: 494 return sugov_stop(policy); 495 case CPUFREQ_GOV_LIMITS: 496 return sugov_limits(policy); 497 } 498 } 499 return -EINVAL; 500 } 501 502 static struct cpufreq_governor schedutil_gov = { 503 .name = "schedutil", 504 .governor = sugov_governor, 505 .owner = THIS_MODULE, 506 }; 507 508 static int __init sugov_module_init(void) 509 { 510 return cpufreq_register_governor(&schedutil_gov); 511 } 512 513 static void __exit sugov_module_exit(void) 514 { 515 cpufreq_unregister_governor(&schedutil_gov); 516 } 517 518 MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); 519 MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); 520 MODULE_LICENSE("GPL"); 521 522 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 523 struct cpufreq_governor *cpufreq_default_governor(void) 524 { 525 return &schedutil_gov; 526 } 527 528 fs_initcall(sugov_module_init); 529 #else 530 module_init(sugov_module_init); 531 #endif 532 module_exit(sugov_module_exit); 533