1 /* 2 * CPUFreq governor based on scheduler-provided CPU utilization data. 3 * 4 * Copyright (C) 2016, Intel Corporation 5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include "sched.h" 15 16 #include <trace/events/power.h> 17 18 struct sugov_tunables { 19 struct gov_attr_set attr_set; 20 unsigned int rate_limit_us; 21 }; 22 23 struct sugov_policy { 24 struct cpufreq_policy *policy; 25 26 struct sugov_tunables *tunables; 27 struct list_head tunables_hook; 28 29 raw_spinlock_t update_lock; /* For shared policies */ 30 u64 last_freq_update_time; 31 s64 freq_update_delay_ns; 32 unsigned int next_freq; 33 unsigned int cached_raw_freq; 34 35 /* The next fields are only needed if fast switch cannot be used: */ 36 struct irq_work irq_work; 37 struct kthread_work work; 38 struct mutex work_lock; 39 struct kthread_worker worker; 40 struct task_struct *thread; 41 bool work_in_progress; 42 43 bool need_freq_update; 44 }; 45 46 struct sugov_cpu { 47 struct update_util_data update_util; 48 struct sugov_policy *sg_policy; 49 unsigned int cpu; 50 51 bool iowait_boost_pending; 52 unsigned int iowait_boost; 53 unsigned int iowait_boost_max; 54 u64 last_update; 55 56 /* The fields below are only needed when sharing a policy: */ 57 unsigned long util_cfs; 58 unsigned long util_dl; 59 unsigned long max; 60 61 /* The field below is for single-CPU policies only: */ 62 #ifdef CONFIG_NO_HZ_COMMON 63 unsigned long saved_idle_calls; 64 #endif 65 }; 66 67 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 68 69 /************************ Governor internals ***********************/ 70 71 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 72 { 73 s64 delta_ns; 74 75 /* 76 * Since cpufreq_update_util() is called with rq->lock held for 77 * the @target_cpu, our per-CPU data is fully serialized. 78 * 79 * However, drivers cannot in general deal with cross-CPU 80 * requests, so while get_next_freq() will work, our 81 * sugov_update_commit() call may not for the fast switching platforms. 82 * 83 * Hence stop here for remote requests if they aren't supported 84 * by the hardware, as calculating the frequency is pointless if 85 * we cannot in fact act on it. 86 * 87 * For the slow switching platforms, the kthread is always scheduled on 88 * the right set of CPUs and any CPU can find the next frequency and 89 * schedule the kthread. 90 */ 91 if (sg_policy->policy->fast_switch_enabled && 92 !cpufreq_this_cpu_can_update(sg_policy->policy)) 93 return false; 94 95 if (unlikely(sg_policy->need_freq_update)) 96 return true; 97 98 delta_ns = time - sg_policy->last_freq_update_time; 99 100 return delta_ns >= sg_policy->freq_update_delay_ns; 101 } 102 103 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, 104 unsigned int next_freq) 105 { 106 if (sg_policy->next_freq == next_freq) 107 return false; 108 109 sg_policy->next_freq = next_freq; 110 sg_policy->last_freq_update_time = time; 111 112 return true; 113 } 114 115 static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, 116 unsigned int next_freq) 117 { 118 struct cpufreq_policy *policy = sg_policy->policy; 119 120 if (!sugov_update_next_freq(sg_policy, time, next_freq)) 121 return; 122 123 next_freq = cpufreq_driver_fast_switch(policy, next_freq); 124 if (!next_freq) 125 return; 126 127 policy->cur = next_freq; 128 trace_cpu_frequency(next_freq, smp_processor_id()); 129 } 130 131 static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time, 132 unsigned int next_freq) 133 { 134 if (!sugov_update_next_freq(sg_policy, time, next_freq)) 135 return; 136 137 if (!sg_policy->work_in_progress) { 138 sg_policy->work_in_progress = true; 139 irq_work_queue(&sg_policy->irq_work); 140 } 141 } 142 143 /** 144 * get_next_freq - Compute a new frequency for a given cpufreq policy. 145 * @sg_policy: schedutil policy object to compute the new frequency for. 146 * @util: Current CPU utilization. 147 * @max: CPU capacity. 148 * 149 * If the utilization is frequency-invariant, choose the new frequency to be 150 * proportional to it, that is 151 * 152 * next_freq = C * max_freq * util / max 153 * 154 * Otherwise, approximate the would-be frequency-invariant utilization by 155 * util_raw * (curr_freq / max_freq) which leads to 156 * 157 * next_freq = C * curr_freq * util_raw / max 158 * 159 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. 160 * 161 * The lowest driver-supported frequency which is equal or greater than the raw 162 * next_freq (as calculated above) is returned, subject to policy min/max and 163 * cpufreq driver limitations. 164 */ 165 static unsigned int get_next_freq(struct sugov_policy *sg_policy, 166 unsigned long util, unsigned long max) 167 { 168 struct cpufreq_policy *policy = sg_policy->policy; 169 unsigned int freq = arch_scale_freq_invariant() ? 170 policy->cpuinfo.max_freq : policy->cur; 171 172 freq = (freq + (freq >> 2)) * util / max; 173 174 if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) 175 return sg_policy->next_freq; 176 177 sg_policy->need_freq_update = false; 178 sg_policy->cached_raw_freq = freq; 179 return cpufreq_driver_resolve_freq(policy, freq); 180 } 181 182 static void sugov_get_util(struct sugov_cpu *sg_cpu) 183 { 184 struct rq *rq = cpu_rq(sg_cpu->cpu); 185 186 sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); 187 sg_cpu->util_cfs = cpu_util_cfs(rq); 188 sg_cpu->util_dl = cpu_util_dl(rq); 189 } 190 191 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) 192 { 193 struct rq *rq = cpu_rq(sg_cpu->cpu); 194 195 if (rt_rq_is_runnable(&rq->rt)) 196 return sg_cpu->max; 197 198 /* 199 * Utilization required by DEADLINE must always be granted while, for 200 * FAIR, we use blocked utilization of IDLE CPUs as a mechanism to 201 * gracefully reduce the frequency when no tasks show up for longer 202 * periods of time. 203 * 204 * Ideally we would like to set util_dl as min/guaranteed freq and 205 * util_cfs + util_dl as requested freq. However, cpufreq is not yet 206 * ready for such an interface. So, we only do the latter for now. 207 */ 208 return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs)); 209 } 210 211 /** 212 * sugov_iowait_reset() - Reset the IO boost status of a CPU. 213 * @sg_cpu: the sugov data for the CPU to boost 214 * @time: the update time from the caller 215 * @set_iowait_boost: true if an IO boost has been requested 216 * 217 * The IO wait boost of a task is disabled after a tick since the last update 218 * of a CPU. If a new IO wait boost is requested after more then a tick, then 219 * we enable the boost starting from the minimum frequency, which improves 220 * energy efficiency by ignoring sporadic wakeups from IO. 221 */ 222 static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, 223 bool set_iowait_boost) 224 { 225 s64 delta_ns = time - sg_cpu->last_update; 226 227 /* Reset boost only if a tick has elapsed since last request */ 228 if (delta_ns <= TICK_NSEC) 229 return false; 230 231 sg_cpu->iowait_boost = set_iowait_boost 232 ? sg_cpu->sg_policy->policy->min : 0; 233 sg_cpu->iowait_boost_pending = set_iowait_boost; 234 235 return true; 236 } 237 238 /** 239 * sugov_iowait_boost() - Updates the IO boost status of a CPU. 240 * @sg_cpu: the sugov data for the CPU to boost 241 * @time: the update time from the caller 242 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait 243 * 244 * Each time a task wakes up after an IO operation, the CPU utilization can be 245 * boosted to a certain utilization which doubles at each "frequent and 246 * successive" wakeup from IO, ranging from the utilization of the minimum 247 * OPP to the utilization of the maximum OPP. 248 * To keep doubling, an IO boost has to be requested at least once per tick, 249 * otherwise we restart from the utilization of the minimum OPP. 250 */ 251 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, 252 unsigned int flags) 253 { 254 bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; 255 256 /* Reset boost if the CPU appears to have been idle enough */ 257 if (sg_cpu->iowait_boost && 258 sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) 259 return; 260 261 /* Boost only tasks waking up after IO */ 262 if (!set_iowait_boost) 263 return; 264 265 /* Ensure boost doubles only one time at each request */ 266 if (sg_cpu->iowait_boost_pending) 267 return; 268 sg_cpu->iowait_boost_pending = true; 269 270 /* Double the boost at each request */ 271 if (sg_cpu->iowait_boost) { 272 sg_cpu->iowait_boost <<= 1; 273 if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) 274 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; 275 return; 276 } 277 278 /* First wakeup after IO: start with minimum boost */ 279 sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; 280 } 281 282 /** 283 * sugov_iowait_apply() - Apply the IO boost to a CPU. 284 * @sg_cpu: the sugov data for the cpu to boost 285 * @time: the update time from the caller 286 * @util: the utilization to (eventually) boost 287 * @max: the maximum value the utilization can be boosted to 288 * 289 * A CPU running a task which woken up after an IO operation can have its 290 * utilization boosted to speed up the completion of those IO operations. 291 * The IO boost value is increased each time a task wakes up from IO, in 292 * sugov_iowait_apply(), and it's instead decreased by this function, 293 * each time an increase has not been requested (!iowait_boost_pending). 294 * 295 * A CPU which also appears to have been idle for at least one tick has also 296 * its IO boost utilization reset. 297 * 298 * This mechanism is designed to boost high frequently IO waiting tasks, while 299 * being more conservative on tasks which does sporadic IO operations. 300 */ 301 static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 302 unsigned long *util, unsigned long *max) 303 { 304 unsigned int boost_util, boost_max; 305 306 /* No boost currently required */ 307 if (!sg_cpu->iowait_boost) 308 return; 309 310 /* Reset boost if the CPU appears to have been idle enough */ 311 if (sugov_iowait_reset(sg_cpu, time, false)) 312 return; 313 314 /* 315 * An IO waiting task has just woken up: 316 * allow to further double the boost value 317 */ 318 if (sg_cpu->iowait_boost_pending) { 319 sg_cpu->iowait_boost_pending = false; 320 } else { 321 /* 322 * Otherwise: reduce the boost value and disable it when we 323 * reach the minimum. 324 */ 325 sg_cpu->iowait_boost >>= 1; 326 if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { 327 sg_cpu->iowait_boost = 0; 328 return; 329 } 330 } 331 332 /* 333 * Apply the current boost value: a CPU is boosted only if its current 334 * utilization is smaller then the current IO boost level. 335 */ 336 boost_util = sg_cpu->iowait_boost; 337 boost_max = sg_cpu->iowait_boost_max; 338 if (*util * boost_max < *max * boost_util) { 339 *util = boost_util; 340 *max = boost_max; 341 } 342 } 343 344 #ifdef CONFIG_NO_HZ_COMMON 345 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) 346 { 347 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); 348 bool ret = idle_calls == sg_cpu->saved_idle_calls; 349 350 sg_cpu->saved_idle_calls = idle_calls; 351 return ret; 352 } 353 #else 354 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } 355 #endif /* CONFIG_NO_HZ_COMMON */ 356 357 /* 358 * Make sugov_should_update_freq() ignore the rate limit when DL 359 * has increased the utilization. 360 */ 361 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) 362 { 363 if (cpu_util_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->util_dl) 364 sg_policy->need_freq_update = true; 365 } 366 367 static void sugov_update_single(struct update_util_data *hook, u64 time, 368 unsigned int flags) 369 { 370 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 371 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 372 unsigned long util, max; 373 unsigned int next_f; 374 bool busy; 375 376 sugov_iowait_boost(sg_cpu, time, flags); 377 sg_cpu->last_update = time; 378 379 ignore_dl_rate_limit(sg_cpu, sg_policy); 380 381 if (!sugov_should_update_freq(sg_policy, time)) 382 return; 383 384 busy = sugov_cpu_is_busy(sg_cpu); 385 386 sugov_get_util(sg_cpu); 387 max = sg_cpu->max; 388 util = sugov_aggregate_util(sg_cpu); 389 sugov_iowait_apply(sg_cpu, time, &util, &max); 390 next_f = get_next_freq(sg_policy, util, max); 391 /* 392 * Do not reduce the frequency if the CPU has not been idle 393 * recently, as the reduction is likely to be premature then. 394 */ 395 if (busy && next_f < sg_policy->next_freq) { 396 next_f = sg_policy->next_freq; 397 398 /* Reset cached freq as next_freq has changed */ 399 sg_policy->cached_raw_freq = 0; 400 } 401 402 /* 403 * This code runs under rq->lock for the target CPU, so it won't run 404 * concurrently on two different CPUs for the same target and it is not 405 * necessary to acquire the lock in the fast switch case. 406 */ 407 if (sg_policy->policy->fast_switch_enabled) { 408 sugov_fast_switch(sg_policy, time, next_f); 409 } else { 410 raw_spin_lock(&sg_policy->update_lock); 411 sugov_deferred_update(sg_policy, time, next_f); 412 raw_spin_unlock(&sg_policy->update_lock); 413 } 414 } 415 416 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) 417 { 418 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 419 struct cpufreq_policy *policy = sg_policy->policy; 420 unsigned long util = 0, max = 1; 421 unsigned int j; 422 423 for_each_cpu(j, policy->cpus) { 424 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); 425 unsigned long j_util, j_max; 426 427 sugov_get_util(j_sg_cpu); 428 j_max = j_sg_cpu->max; 429 j_util = sugov_aggregate_util(j_sg_cpu); 430 sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); 431 432 if (j_util * max > j_max * util) { 433 util = j_util; 434 max = j_max; 435 } 436 } 437 438 return get_next_freq(sg_policy, util, max); 439 } 440 441 static void 442 sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) 443 { 444 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 445 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 446 unsigned int next_f; 447 448 raw_spin_lock(&sg_policy->update_lock); 449 450 sugov_iowait_boost(sg_cpu, time, flags); 451 sg_cpu->last_update = time; 452 453 ignore_dl_rate_limit(sg_cpu, sg_policy); 454 455 if (sugov_should_update_freq(sg_policy, time)) { 456 next_f = sugov_next_freq_shared(sg_cpu, time); 457 458 if (sg_policy->policy->fast_switch_enabled) 459 sugov_fast_switch(sg_policy, time, next_f); 460 else 461 sugov_deferred_update(sg_policy, time, next_f); 462 } 463 464 raw_spin_unlock(&sg_policy->update_lock); 465 } 466 467 static void sugov_work(struct kthread_work *work) 468 { 469 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 470 unsigned int freq; 471 unsigned long flags; 472 473 /* 474 * Hold sg_policy->update_lock shortly to handle the case where: 475 * incase sg_policy->next_freq is read here, and then updated by 476 * sugov_deferred_update() just before work_in_progress is set to false 477 * here, we may miss queueing the new update. 478 * 479 * Note: If a work was queued after the update_lock is released, 480 * sugov_work() will just be called again by kthread_work code; and the 481 * request will be proceed before the sugov thread sleeps. 482 */ 483 raw_spin_lock_irqsave(&sg_policy->update_lock, flags); 484 freq = sg_policy->next_freq; 485 sg_policy->work_in_progress = false; 486 raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); 487 488 mutex_lock(&sg_policy->work_lock); 489 __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L); 490 mutex_unlock(&sg_policy->work_lock); 491 } 492 493 static void sugov_irq_work(struct irq_work *irq_work) 494 { 495 struct sugov_policy *sg_policy; 496 497 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 498 499 kthread_queue_work(&sg_policy->worker, &sg_policy->work); 500 } 501 502 /************************** sysfs interface ************************/ 503 504 static struct sugov_tunables *global_tunables; 505 static DEFINE_MUTEX(global_tunables_lock); 506 507 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) 508 { 509 return container_of(attr_set, struct sugov_tunables, attr_set); 510 } 511 512 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) 513 { 514 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 515 516 return sprintf(buf, "%u\n", tunables->rate_limit_us); 517 } 518 519 static ssize_t 520 rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) 521 { 522 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 523 struct sugov_policy *sg_policy; 524 unsigned int rate_limit_us; 525 526 if (kstrtouint(buf, 10, &rate_limit_us)) 527 return -EINVAL; 528 529 tunables->rate_limit_us = rate_limit_us; 530 531 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) 532 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; 533 534 return count; 535 } 536 537 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); 538 539 static struct attribute *sugov_attributes[] = { 540 &rate_limit_us.attr, 541 NULL 542 }; 543 544 static struct kobj_type sugov_tunables_ktype = { 545 .default_attrs = sugov_attributes, 546 .sysfs_ops = &governor_sysfs_ops, 547 }; 548 549 /********************** cpufreq governor interface *********************/ 550 551 static struct cpufreq_governor schedutil_gov; 552 553 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 554 { 555 struct sugov_policy *sg_policy; 556 557 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); 558 if (!sg_policy) 559 return NULL; 560 561 sg_policy->policy = policy; 562 raw_spin_lock_init(&sg_policy->update_lock); 563 return sg_policy; 564 } 565 566 static void sugov_policy_free(struct sugov_policy *sg_policy) 567 { 568 kfree(sg_policy); 569 } 570 571 static int sugov_kthread_create(struct sugov_policy *sg_policy) 572 { 573 struct task_struct *thread; 574 struct sched_attr attr = { 575 .size = sizeof(struct sched_attr), 576 .sched_policy = SCHED_DEADLINE, 577 .sched_flags = SCHED_FLAG_SUGOV, 578 .sched_nice = 0, 579 .sched_priority = 0, 580 /* 581 * Fake (unused) bandwidth; workaround to "fix" 582 * priority inheritance. 583 */ 584 .sched_runtime = 1000000, 585 .sched_deadline = 10000000, 586 .sched_period = 10000000, 587 }; 588 struct cpufreq_policy *policy = sg_policy->policy; 589 int ret; 590 591 /* kthread only required for slow path */ 592 if (policy->fast_switch_enabled) 593 return 0; 594 595 kthread_init_work(&sg_policy->work, sugov_work); 596 kthread_init_worker(&sg_policy->worker); 597 thread = kthread_create(kthread_worker_fn, &sg_policy->worker, 598 "sugov:%d", 599 cpumask_first(policy->related_cpus)); 600 if (IS_ERR(thread)) { 601 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); 602 return PTR_ERR(thread); 603 } 604 605 ret = sched_setattr_nocheck(thread, &attr); 606 if (ret) { 607 kthread_stop(thread); 608 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); 609 return ret; 610 } 611 612 sg_policy->thread = thread; 613 kthread_bind_mask(thread, policy->related_cpus); 614 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 615 mutex_init(&sg_policy->work_lock); 616 617 wake_up_process(thread); 618 619 return 0; 620 } 621 622 static void sugov_kthread_stop(struct sugov_policy *sg_policy) 623 { 624 /* kthread only required for slow path */ 625 if (sg_policy->policy->fast_switch_enabled) 626 return; 627 628 kthread_flush_worker(&sg_policy->worker); 629 kthread_stop(sg_policy->thread); 630 mutex_destroy(&sg_policy->work_lock); 631 } 632 633 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) 634 { 635 struct sugov_tunables *tunables; 636 637 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); 638 if (tunables) { 639 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); 640 if (!have_governor_per_policy()) 641 global_tunables = tunables; 642 } 643 return tunables; 644 } 645 646 static void sugov_tunables_free(struct sugov_tunables *tunables) 647 { 648 if (!have_governor_per_policy()) 649 global_tunables = NULL; 650 651 kfree(tunables); 652 } 653 654 static int sugov_init(struct cpufreq_policy *policy) 655 { 656 struct sugov_policy *sg_policy; 657 struct sugov_tunables *tunables; 658 int ret = 0; 659 660 /* State should be equivalent to EXIT */ 661 if (policy->governor_data) 662 return -EBUSY; 663 664 cpufreq_enable_fast_switch(policy); 665 666 sg_policy = sugov_policy_alloc(policy); 667 if (!sg_policy) { 668 ret = -ENOMEM; 669 goto disable_fast_switch; 670 } 671 672 ret = sugov_kthread_create(sg_policy); 673 if (ret) 674 goto free_sg_policy; 675 676 mutex_lock(&global_tunables_lock); 677 678 if (global_tunables) { 679 if (WARN_ON(have_governor_per_policy())) { 680 ret = -EINVAL; 681 goto stop_kthread; 682 } 683 policy->governor_data = sg_policy; 684 sg_policy->tunables = global_tunables; 685 686 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); 687 goto out; 688 } 689 690 tunables = sugov_tunables_alloc(sg_policy); 691 if (!tunables) { 692 ret = -ENOMEM; 693 goto stop_kthread; 694 } 695 696 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); 697 698 policy->governor_data = sg_policy; 699 sg_policy->tunables = tunables; 700 701 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, 702 get_governor_parent_kobj(policy), "%s", 703 schedutil_gov.name); 704 if (ret) 705 goto fail; 706 707 out: 708 mutex_unlock(&global_tunables_lock); 709 return 0; 710 711 fail: 712 policy->governor_data = NULL; 713 sugov_tunables_free(tunables); 714 715 stop_kthread: 716 sugov_kthread_stop(sg_policy); 717 mutex_unlock(&global_tunables_lock); 718 719 free_sg_policy: 720 sugov_policy_free(sg_policy); 721 722 disable_fast_switch: 723 cpufreq_disable_fast_switch(policy); 724 725 pr_err("initialization failed (error %d)\n", ret); 726 return ret; 727 } 728 729 static void sugov_exit(struct cpufreq_policy *policy) 730 { 731 struct sugov_policy *sg_policy = policy->governor_data; 732 struct sugov_tunables *tunables = sg_policy->tunables; 733 unsigned int count; 734 735 mutex_lock(&global_tunables_lock); 736 737 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); 738 policy->governor_data = NULL; 739 if (!count) 740 sugov_tunables_free(tunables); 741 742 mutex_unlock(&global_tunables_lock); 743 744 sugov_kthread_stop(sg_policy); 745 sugov_policy_free(sg_policy); 746 cpufreq_disable_fast_switch(policy); 747 } 748 749 static int sugov_start(struct cpufreq_policy *policy) 750 { 751 struct sugov_policy *sg_policy = policy->governor_data; 752 unsigned int cpu; 753 754 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; 755 sg_policy->last_freq_update_time = 0; 756 sg_policy->next_freq = 0; 757 sg_policy->work_in_progress = false; 758 sg_policy->need_freq_update = false; 759 sg_policy->cached_raw_freq = 0; 760 761 for_each_cpu(cpu, policy->cpus) { 762 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 763 764 memset(sg_cpu, 0, sizeof(*sg_cpu)); 765 sg_cpu->cpu = cpu; 766 sg_cpu->sg_policy = sg_policy; 767 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; 768 } 769 770 for_each_cpu(cpu, policy->cpus) { 771 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 772 773 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 774 policy_is_shared(policy) ? 775 sugov_update_shared : 776 sugov_update_single); 777 } 778 return 0; 779 } 780 781 static void sugov_stop(struct cpufreq_policy *policy) 782 { 783 struct sugov_policy *sg_policy = policy->governor_data; 784 unsigned int cpu; 785 786 for_each_cpu(cpu, policy->cpus) 787 cpufreq_remove_update_util_hook(cpu); 788 789 synchronize_sched(); 790 791 if (!policy->fast_switch_enabled) { 792 irq_work_sync(&sg_policy->irq_work); 793 kthread_cancel_work_sync(&sg_policy->work); 794 } 795 } 796 797 static void sugov_limits(struct cpufreq_policy *policy) 798 { 799 struct sugov_policy *sg_policy = policy->governor_data; 800 801 if (!policy->fast_switch_enabled) { 802 mutex_lock(&sg_policy->work_lock); 803 cpufreq_policy_apply_limits(policy); 804 mutex_unlock(&sg_policy->work_lock); 805 } 806 807 sg_policy->need_freq_update = true; 808 } 809 810 static struct cpufreq_governor schedutil_gov = { 811 .name = "schedutil", 812 .owner = THIS_MODULE, 813 .dynamic_switching = true, 814 .init = sugov_init, 815 .exit = sugov_exit, 816 .start = sugov_start, 817 .stop = sugov_stop, 818 .limits = sugov_limits, 819 }; 820 821 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 822 struct cpufreq_governor *cpufreq_default_governor(void) 823 { 824 return &schedutil_gov; 825 } 826 #endif 827 828 static int __init sugov_register(void) 829 { 830 return cpufreq_register_governor(&schedutil_gov); 831 } 832 fs_initcall(sugov_register); 833