1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * CPUFreq governor based on scheduler-provided CPU utilization data. 4 * 5 * Copyright (C) 2016, Intel Corporation 6 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 7 */ 8 9 #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) 10 11 struct sugov_tunables { 12 struct gov_attr_set attr_set; 13 unsigned int rate_limit_us; 14 }; 15 16 struct sugov_policy { 17 struct cpufreq_policy *policy; 18 19 struct sugov_tunables *tunables; 20 struct list_head tunables_hook; 21 22 raw_spinlock_t update_lock; 23 u64 last_freq_update_time; 24 s64 freq_update_delay_ns; 25 unsigned int next_freq; 26 unsigned int cached_raw_freq; 27 28 /* max CPU capacity, which is equal for all CPUs in freq. domain */ 29 unsigned long max; 30 31 /* The next fields are only needed if fast switch cannot be used: */ 32 struct irq_work irq_work; 33 struct kthread_work work; 34 struct mutex work_lock; 35 struct kthread_worker worker; 36 struct task_struct *thread; 37 bool work_in_progress; 38 39 bool limits_changed; 40 bool need_freq_update; 41 }; 42 43 struct sugov_cpu { 44 struct update_util_data update_util; 45 struct sugov_policy *sg_policy; 46 unsigned int cpu; 47 48 bool iowait_boost_pending; 49 unsigned int iowait_boost; 50 u64 last_update; 51 52 unsigned long util; 53 unsigned long bw_dl; 54 55 /* The field below is for single-CPU policies only: */ 56 #ifdef CONFIG_NO_HZ_COMMON 57 unsigned long saved_idle_calls; 58 #endif 59 }; 60 61 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 62 63 /************************ Governor internals ***********************/ 64 65 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 66 { 67 s64 delta_ns; 68 69 /* 70 * Since cpufreq_update_util() is called with rq->lock held for 71 * the @target_cpu, our per-CPU data is fully serialized. 72 * 73 * However, drivers cannot in general deal with cross-CPU 74 * requests, so while get_next_freq() will work, our 75 * sugov_update_commit() call may not for the fast switching platforms. 76 * 77 * Hence stop here for remote requests if they aren't supported 78 * by the hardware, as calculating the frequency is pointless if 79 * we cannot in fact act on it. 80 * 81 * This is needed on the slow switching platforms too to prevent CPUs 82 * going offline from leaving stale IRQ work items behind. 83 */ 84 if (!cpufreq_this_cpu_can_update(sg_policy->policy)) 85 return false; 86 87 if (unlikely(sg_policy->limits_changed)) { 88 sg_policy->limits_changed = false; 89 sg_policy->need_freq_update = true; 90 return true; 91 } 92 93 delta_ns = time - sg_policy->last_freq_update_time; 94 95 return delta_ns >= sg_policy->freq_update_delay_ns; 96 } 97 98 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, 99 unsigned int next_freq) 100 { 101 if (sg_policy->need_freq_update) 102 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); 103 else if (sg_policy->next_freq == next_freq) 104 return false; 105 106 sg_policy->next_freq = next_freq; 107 sg_policy->last_freq_update_time = time; 108 109 return true; 110 } 111 112 static void sugov_deferred_update(struct sugov_policy *sg_policy) 113 { 114 if (!sg_policy->work_in_progress) { 115 sg_policy->work_in_progress = true; 116 irq_work_queue(&sg_policy->irq_work); 117 } 118 } 119 120 /** 121 * get_next_freq - Compute a new frequency for a given cpufreq policy. 122 * @sg_policy: schedutil policy object to compute the new frequency for. 123 * @util: Current CPU utilization. 124 * @max: CPU capacity. 125 * 126 * If the utilization is frequency-invariant, choose the new frequency to be 127 * proportional to it, that is 128 * 129 * next_freq = C * max_freq * util / max 130 * 131 * Otherwise, approximate the would-be frequency-invariant utilization by 132 * util_raw * (curr_freq / max_freq) which leads to 133 * 134 * next_freq = C * curr_freq * util_raw / max 135 * 136 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. 137 * 138 * The lowest driver-supported frequency which is equal or greater than the raw 139 * next_freq (as calculated above) is returned, subject to policy min/max and 140 * cpufreq driver limitations. 141 */ 142 static unsigned int get_next_freq(struct sugov_policy *sg_policy, 143 unsigned long util, unsigned long max) 144 { 145 struct cpufreq_policy *policy = sg_policy->policy; 146 unsigned int freq = arch_scale_freq_invariant() ? 147 policy->cpuinfo.max_freq : policy->cur; 148 149 util = map_util_perf(util); 150 freq = map_util_freq(util, freq, max); 151 152 if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) 153 return sg_policy->next_freq; 154 155 sg_policy->cached_raw_freq = freq; 156 return cpufreq_driver_resolve_freq(policy, freq); 157 } 158 159 static void sugov_get_util(struct sugov_cpu *sg_cpu) 160 { 161 struct rq *rq = cpu_rq(sg_cpu->cpu); 162 163 sg_cpu->bw_dl = cpu_bw_dl(rq); 164 sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), 165 FREQUENCY_UTIL, NULL); 166 } 167 168 /** 169 * sugov_iowait_reset() - Reset the IO boost status of a CPU. 170 * @sg_cpu: the sugov data for the CPU to boost 171 * @time: the update time from the caller 172 * @set_iowait_boost: true if an IO boost has been requested 173 * 174 * The IO wait boost of a task is disabled after a tick since the last update 175 * of a CPU. If a new IO wait boost is requested after more then a tick, then 176 * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy 177 * efficiency by ignoring sporadic wakeups from IO. 178 */ 179 static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, 180 bool set_iowait_boost) 181 { 182 s64 delta_ns = time - sg_cpu->last_update; 183 184 /* Reset boost only if a tick has elapsed since last request */ 185 if (delta_ns <= TICK_NSEC) 186 return false; 187 188 sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; 189 sg_cpu->iowait_boost_pending = set_iowait_boost; 190 191 return true; 192 } 193 194 /** 195 * sugov_iowait_boost() - Updates the IO boost status of a CPU. 196 * @sg_cpu: the sugov data for the CPU to boost 197 * @time: the update time from the caller 198 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait 199 * 200 * Each time a task wakes up after an IO operation, the CPU utilization can be 201 * boosted to a certain utilization which doubles at each "frequent and 202 * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization 203 * of the maximum OPP. 204 * 205 * To keep doubling, an IO boost has to be requested at least once per tick, 206 * otherwise we restart from the utilization of the minimum OPP. 207 */ 208 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, 209 unsigned int flags) 210 { 211 bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; 212 213 /* Reset boost if the CPU appears to have been idle enough */ 214 if (sg_cpu->iowait_boost && 215 sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) 216 return; 217 218 /* Boost only tasks waking up after IO */ 219 if (!set_iowait_boost) 220 return; 221 222 /* Ensure boost doubles only one time at each request */ 223 if (sg_cpu->iowait_boost_pending) 224 return; 225 sg_cpu->iowait_boost_pending = true; 226 227 /* Double the boost at each request */ 228 if (sg_cpu->iowait_boost) { 229 sg_cpu->iowait_boost = 230 min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); 231 return; 232 } 233 234 /* First wakeup after IO: start with minimum boost */ 235 sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; 236 } 237 238 /** 239 * sugov_iowait_apply() - Apply the IO boost to a CPU. 240 * @sg_cpu: the sugov data for the cpu to boost 241 * @time: the update time from the caller 242 * 243 * A CPU running a task which woken up after an IO operation can have its 244 * utilization boosted to speed up the completion of those IO operations. 245 * The IO boost value is increased each time a task wakes up from IO, in 246 * sugov_iowait_apply(), and it's instead decreased by this function, 247 * each time an increase has not been requested (!iowait_boost_pending). 248 * 249 * A CPU which also appears to have been idle for at least one tick has also 250 * its IO boost utilization reset. 251 * 252 * This mechanism is designed to boost high frequently IO waiting tasks, while 253 * being more conservative on tasks which does sporadic IO operations. 254 */ 255 static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) 256 { 257 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 258 unsigned long boost; 259 260 /* No boost currently required */ 261 if (!sg_cpu->iowait_boost) 262 return; 263 264 /* Reset boost if the CPU appears to have been idle enough */ 265 if (sugov_iowait_reset(sg_cpu, time, false)) 266 return; 267 268 if (!sg_cpu->iowait_boost_pending) { 269 /* 270 * No boost pending; reduce the boost value. 271 */ 272 sg_cpu->iowait_boost >>= 1; 273 if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { 274 sg_cpu->iowait_boost = 0; 275 return; 276 } 277 } 278 279 sg_cpu->iowait_boost_pending = false; 280 281 /* 282 * sg_cpu->util is already in capacity scale; convert iowait_boost 283 * into the same scale so we can compare. 284 */ 285 boost = sg_cpu->iowait_boost * sg_policy->max; 286 boost >>= SCHED_CAPACITY_SHIFT; 287 boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); 288 if (sg_cpu->util < boost) 289 sg_cpu->util = boost; 290 } 291 292 #ifdef CONFIG_NO_HZ_COMMON 293 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) 294 { 295 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); 296 bool ret = idle_calls == sg_cpu->saved_idle_calls; 297 298 sg_cpu->saved_idle_calls = idle_calls; 299 return ret; 300 } 301 #else 302 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } 303 #endif /* CONFIG_NO_HZ_COMMON */ 304 305 /* 306 * Make sugov_should_update_freq() ignore the rate limit when DL 307 * has increased the utilization. 308 */ 309 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) 310 { 311 if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) 312 sg_cpu->sg_policy->limits_changed = true; 313 } 314 315 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, 316 u64 time, unsigned int flags) 317 { 318 sugov_iowait_boost(sg_cpu, time, flags); 319 sg_cpu->last_update = time; 320 321 ignore_dl_rate_limit(sg_cpu); 322 323 if (!sugov_should_update_freq(sg_cpu->sg_policy, time)) 324 return false; 325 326 sugov_get_util(sg_cpu); 327 sugov_iowait_apply(sg_cpu, time); 328 329 return true; 330 } 331 332 static void sugov_update_single_freq(struct update_util_data *hook, u64 time, 333 unsigned int flags) 334 { 335 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 336 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 337 unsigned int cached_freq = sg_policy->cached_raw_freq; 338 unsigned int next_f; 339 340 if (!sugov_update_single_common(sg_cpu, time, flags)) 341 return; 342 343 next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); 344 /* 345 * Do not reduce the frequency if the CPU has not been idle 346 * recently, as the reduction is likely to be premature then. 347 * 348 * Except when the rq is capped by uclamp_max. 349 */ 350 if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && 351 sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { 352 next_f = sg_policy->next_freq; 353 354 /* Restore cached freq as next_freq has changed */ 355 sg_policy->cached_raw_freq = cached_freq; 356 } 357 358 if (!sugov_update_next_freq(sg_policy, time, next_f)) 359 return; 360 361 /* 362 * This code runs under rq->lock for the target CPU, so it won't run 363 * concurrently on two different CPUs for the same target and it is not 364 * necessary to acquire the lock in the fast switch case. 365 */ 366 if (sg_policy->policy->fast_switch_enabled) { 367 cpufreq_driver_fast_switch(sg_policy->policy, next_f); 368 } else { 369 raw_spin_lock(&sg_policy->update_lock); 370 sugov_deferred_update(sg_policy); 371 raw_spin_unlock(&sg_policy->update_lock); 372 } 373 } 374 375 static void sugov_update_single_perf(struct update_util_data *hook, u64 time, 376 unsigned int flags) 377 { 378 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 379 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 380 unsigned long prev_util = sg_cpu->util; 381 382 /* 383 * Fall back to the "frequency" path if frequency invariance is not 384 * supported, because the direct mapping between the utilization and 385 * the performance levels depends on the frequency invariance. 386 */ 387 if (!arch_scale_freq_invariant()) { 388 sugov_update_single_freq(hook, time, flags); 389 return; 390 } 391 392 if (!sugov_update_single_common(sg_cpu, time, flags)) 393 return; 394 395 /* 396 * Do not reduce the target performance level if the CPU has not been 397 * idle recently, as the reduction is likely to be premature then. 398 * 399 * Except when the rq is capped by uclamp_max. 400 */ 401 if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && 402 sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) 403 sg_cpu->util = prev_util; 404 405 cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), 406 map_util_perf(sg_cpu->util), 407 sg_policy->max); 408 409 sg_cpu->sg_policy->last_freq_update_time = time; 410 } 411 412 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) 413 { 414 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 415 struct cpufreq_policy *policy = sg_policy->policy; 416 unsigned long util = 0; 417 unsigned int j; 418 419 for_each_cpu(j, policy->cpus) { 420 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); 421 422 sugov_get_util(j_sg_cpu); 423 sugov_iowait_apply(j_sg_cpu, time); 424 425 util = max(j_sg_cpu->util, util); 426 } 427 428 return get_next_freq(sg_policy, util, sg_policy->max); 429 } 430 431 static void 432 sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) 433 { 434 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 435 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 436 unsigned int next_f; 437 438 raw_spin_lock(&sg_policy->update_lock); 439 440 sugov_iowait_boost(sg_cpu, time, flags); 441 sg_cpu->last_update = time; 442 443 ignore_dl_rate_limit(sg_cpu); 444 445 if (sugov_should_update_freq(sg_policy, time)) { 446 next_f = sugov_next_freq_shared(sg_cpu, time); 447 448 if (!sugov_update_next_freq(sg_policy, time, next_f)) 449 goto unlock; 450 451 if (sg_policy->policy->fast_switch_enabled) 452 cpufreq_driver_fast_switch(sg_policy->policy, next_f); 453 else 454 sugov_deferred_update(sg_policy); 455 } 456 unlock: 457 raw_spin_unlock(&sg_policy->update_lock); 458 } 459 460 static void sugov_work(struct kthread_work *work) 461 { 462 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 463 unsigned int freq; 464 unsigned long flags; 465 466 /* 467 * Hold sg_policy->update_lock shortly to handle the case where: 468 * in case sg_policy->next_freq is read here, and then updated by 469 * sugov_deferred_update() just before work_in_progress is set to false 470 * here, we may miss queueing the new update. 471 * 472 * Note: If a work was queued after the update_lock is released, 473 * sugov_work() will just be called again by kthread_work code; and the 474 * request will be proceed before the sugov thread sleeps. 475 */ 476 raw_spin_lock_irqsave(&sg_policy->update_lock, flags); 477 freq = sg_policy->next_freq; 478 sg_policy->work_in_progress = false; 479 raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); 480 481 mutex_lock(&sg_policy->work_lock); 482 __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L); 483 mutex_unlock(&sg_policy->work_lock); 484 } 485 486 static void sugov_irq_work(struct irq_work *irq_work) 487 { 488 struct sugov_policy *sg_policy; 489 490 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 491 492 kthread_queue_work(&sg_policy->worker, &sg_policy->work); 493 } 494 495 /************************** sysfs interface ************************/ 496 497 static struct sugov_tunables *global_tunables; 498 static DEFINE_MUTEX(global_tunables_lock); 499 500 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) 501 { 502 return container_of(attr_set, struct sugov_tunables, attr_set); 503 } 504 505 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) 506 { 507 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 508 509 return sprintf(buf, "%u\n", tunables->rate_limit_us); 510 } 511 512 static ssize_t 513 rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) 514 { 515 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 516 struct sugov_policy *sg_policy; 517 unsigned int rate_limit_us; 518 519 if (kstrtouint(buf, 10, &rate_limit_us)) 520 return -EINVAL; 521 522 tunables->rate_limit_us = rate_limit_us; 523 524 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) 525 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; 526 527 return count; 528 } 529 530 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); 531 532 static struct attribute *sugov_attrs[] = { 533 &rate_limit_us.attr, 534 NULL 535 }; 536 ATTRIBUTE_GROUPS(sugov); 537 538 static void sugov_tunables_free(struct kobject *kobj) 539 { 540 struct gov_attr_set *attr_set = to_gov_attr_set(kobj); 541 542 kfree(to_sugov_tunables(attr_set)); 543 } 544 545 static struct kobj_type sugov_tunables_ktype = { 546 .default_groups = sugov_groups, 547 .sysfs_ops = &governor_sysfs_ops, 548 .release = &sugov_tunables_free, 549 }; 550 551 /********************** cpufreq governor interface *********************/ 552 553 struct cpufreq_governor schedutil_gov; 554 555 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 556 { 557 struct sugov_policy *sg_policy; 558 559 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); 560 if (!sg_policy) 561 return NULL; 562 563 sg_policy->policy = policy; 564 raw_spin_lock_init(&sg_policy->update_lock); 565 return sg_policy; 566 } 567 568 static void sugov_policy_free(struct sugov_policy *sg_policy) 569 { 570 kfree(sg_policy); 571 } 572 573 static int sugov_kthread_create(struct sugov_policy *sg_policy) 574 { 575 struct task_struct *thread; 576 struct sched_attr attr = { 577 .size = sizeof(struct sched_attr), 578 .sched_policy = SCHED_DEADLINE, 579 .sched_flags = SCHED_FLAG_SUGOV, 580 .sched_nice = 0, 581 .sched_priority = 0, 582 /* 583 * Fake (unused) bandwidth; workaround to "fix" 584 * priority inheritance. 585 */ 586 .sched_runtime = 1000000, 587 .sched_deadline = 10000000, 588 .sched_period = 10000000, 589 }; 590 struct cpufreq_policy *policy = sg_policy->policy; 591 int ret; 592 593 /* kthread only required for slow path */ 594 if (policy->fast_switch_enabled) 595 return 0; 596 597 kthread_init_work(&sg_policy->work, sugov_work); 598 kthread_init_worker(&sg_policy->worker); 599 thread = kthread_create(kthread_worker_fn, &sg_policy->worker, 600 "sugov:%d", 601 cpumask_first(policy->related_cpus)); 602 if (IS_ERR(thread)) { 603 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); 604 return PTR_ERR(thread); 605 } 606 607 ret = sched_setattr_nocheck(thread, &attr); 608 if (ret) { 609 kthread_stop(thread); 610 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); 611 return ret; 612 } 613 614 sg_policy->thread = thread; 615 kthread_bind_mask(thread, policy->related_cpus); 616 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 617 mutex_init(&sg_policy->work_lock); 618 619 wake_up_process(thread); 620 621 return 0; 622 } 623 624 static void sugov_kthread_stop(struct sugov_policy *sg_policy) 625 { 626 /* kthread only required for slow path */ 627 if (sg_policy->policy->fast_switch_enabled) 628 return; 629 630 kthread_flush_worker(&sg_policy->worker); 631 kthread_stop(sg_policy->thread); 632 mutex_destroy(&sg_policy->work_lock); 633 } 634 635 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) 636 { 637 struct sugov_tunables *tunables; 638 639 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); 640 if (tunables) { 641 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); 642 if (!have_governor_per_policy()) 643 global_tunables = tunables; 644 } 645 return tunables; 646 } 647 648 static void sugov_clear_global_tunables(void) 649 { 650 if (!have_governor_per_policy()) 651 global_tunables = NULL; 652 } 653 654 static int sugov_init(struct cpufreq_policy *policy) 655 { 656 struct sugov_policy *sg_policy; 657 struct sugov_tunables *tunables; 658 int ret = 0; 659 660 /* State should be equivalent to EXIT */ 661 if (policy->governor_data) 662 return -EBUSY; 663 664 cpufreq_enable_fast_switch(policy); 665 666 sg_policy = sugov_policy_alloc(policy); 667 if (!sg_policy) { 668 ret = -ENOMEM; 669 goto disable_fast_switch; 670 } 671 672 ret = sugov_kthread_create(sg_policy); 673 if (ret) 674 goto free_sg_policy; 675 676 mutex_lock(&global_tunables_lock); 677 678 if (global_tunables) { 679 if (WARN_ON(have_governor_per_policy())) { 680 ret = -EINVAL; 681 goto stop_kthread; 682 } 683 policy->governor_data = sg_policy; 684 sg_policy->tunables = global_tunables; 685 686 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); 687 goto out; 688 } 689 690 tunables = sugov_tunables_alloc(sg_policy); 691 if (!tunables) { 692 ret = -ENOMEM; 693 goto stop_kthread; 694 } 695 696 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); 697 698 policy->governor_data = sg_policy; 699 sg_policy->tunables = tunables; 700 701 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, 702 get_governor_parent_kobj(policy), "%s", 703 schedutil_gov.name); 704 if (ret) 705 goto fail; 706 707 out: 708 mutex_unlock(&global_tunables_lock); 709 return 0; 710 711 fail: 712 kobject_put(&tunables->attr_set.kobj); 713 policy->governor_data = NULL; 714 sugov_clear_global_tunables(); 715 716 stop_kthread: 717 sugov_kthread_stop(sg_policy); 718 mutex_unlock(&global_tunables_lock); 719 720 free_sg_policy: 721 sugov_policy_free(sg_policy); 722 723 disable_fast_switch: 724 cpufreq_disable_fast_switch(policy); 725 726 pr_err("initialization failed (error %d)\n", ret); 727 return ret; 728 } 729 730 static void sugov_exit(struct cpufreq_policy *policy) 731 { 732 struct sugov_policy *sg_policy = policy->governor_data; 733 struct sugov_tunables *tunables = sg_policy->tunables; 734 unsigned int count; 735 736 mutex_lock(&global_tunables_lock); 737 738 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); 739 policy->governor_data = NULL; 740 if (!count) 741 sugov_clear_global_tunables(); 742 743 mutex_unlock(&global_tunables_lock); 744 745 sugov_kthread_stop(sg_policy); 746 sugov_policy_free(sg_policy); 747 cpufreq_disable_fast_switch(policy); 748 } 749 750 static int sugov_start(struct cpufreq_policy *policy) 751 { 752 struct sugov_policy *sg_policy = policy->governor_data; 753 void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); 754 unsigned int cpu = cpumask_first(policy->cpus); 755 756 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; 757 sg_policy->last_freq_update_time = 0; 758 sg_policy->next_freq = 0; 759 sg_policy->work_in_progress = false; 760 sg_policy->limits_changed = false; 761 sg_policy->cached_raw_freq = 0; 762 sg_policy->max = arch_scale_cpu_capacity(cpu); 763 764 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); 765 766 for_each_cpu(cpu, policy->cpus) { 767 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 768 769 memset(sg_cpu, 0, sizeof(*sg_cpu)); 770 sg_cpu->cpu = cpu; 771 sg_cpu->sg_policy = sg_policy; 772 } 773 774 if (policy_is_shared(policy)) 775 uu = sugov_update_shared; 776 else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf()) 777 uu = sugov_update_single_perf; 778 else 779 uu = sugov_update_single_freq; 780 781 for_each_cpu(cpu, policy->cpus) { 782 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 783 784 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu); 785 } 786 return 0; 787 } 788 789 static void sugov_stop(struct cpufreq_policy *policy) 790 { 791 struct sugov_policy *sg_policy = policy->governor_data; 792 unsigned int cpu; 793 794 for_each_cpu(cpu, policy->cpus) 795 cpufreq_remove_update_util_hook(cpu); 796 797 synchronize_rcu(); 798 799 if (!policy->fast_switch_enabled) { 800 irq_work_sync(&sg_policy->irq_work); 801 kthread_cancel_work_sync(&sg_policy->work); 802 } 803 } 804 805 static void sugov_limits(struct cpufreq_policy *policy) 806 { 807 struct sugov_policy *sg_policy = policy->governor_data; 808 809 if (!policy->fast_switch_enabled) { 810 mutex_lock(&sg_policy->work_lock); 811 cpufreq_policy_apply_limits(policy); 812 mutex_unlock(&sg_policy->work_lock); 813 } 814 815 sg_policy->limits_changed = true; 816 } 817 818 struct cpufreq_governor schedutil_gov = { 819 .name = "schedutil", 820 .owner = THIS_MODULE, 821 .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, 822 .init = sugov_init, 823 .exit = sugov_exit, 824 .start = sugov_start, 825 .stop = sugov_stop, 826 .limits = sugov_limits, 827 }; 828 829 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 830 struct cpufreq_governor *cpufreq_default_governor(void) 831 { 832 return &schedutil_gov; 833 } 834 #endif 835 836 cpufreq_governor_init(schedutil_gov); 837 838 #ifdef CONFIG_ENERGY_MODEL 839 static void rebuild_sd_workfn(struct work_struct *work) 840 { 841 rebuild_sched_domains_energy(); 842 } 843 static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); 844 845 /* 846 * EAS shouldn't be attempted without sugov, so rebuild the sched_domains 847 * on governor changes to make sure the scheduler knows about it. 848 */ 849 void sched_cpufreq_governor_change(struct cpufreq_policy *policy, 850 struct cpufreq_governor *old_gov) 851 { 852 if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) { 853 /* 854 * When called from the cpufreq_register_driver() path, the 855 * cpu_hotplug_lock is already held, so use a work item to 856 * avoid nested locking in rebuild_sched_domains(). 857 */ 858 schedule_work(&rebuild_sd_work); 859 } 860 861 } 862 #endif 863