1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Timer events oriented CPU idle governor 4 * 5 * TEO governor: 6 * Copyright (C) 2018 - 2021 Intel Corporation 7 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 * 9 * Util-awareness mechanism: 10 * Copyright (C) 2022 Arm Ltd. 11 * Author: Kajetan Puchalski <kajetan.puchalski@arm.com> 12 */ 13 14 /** 15 * DOC: teo-description 16 * 17 * The idea of this governor is based on the observation that on many systems 18 * timer events are two or more orders of magnitude more frequent than any 19 * other interrupts, so they are likely to be the most significant cause of CPU 20 * wakeups from idle states. Moreover, information about what happened in the 21 * (relatively recent) past can be used to estimate whether or not the deepest 22 * idle state with target residency within the (known) time till the closest 23 * timer event, referred to as the sleep length, is likely to be suitable for 24 * the upcoming CPU idle period and, if not, then which of the shallower idle 25 * states to choose instead of it. 26 * 27 * Of course, non-timer wakeup sources are more important in some use cases 28 * which can be covered by taking a few most recent idle time intervals of the 29 * CPU into account. However, even in that context it is not necessary to 30 * consider idle duration values greater than the sleep length, because the 31 * closest timer will ultimately wake up the CPU anyway unless it is woken up 32 * earlier. 33 * 34 * Thus this governor estimates whether or not the prospective idle duration of 35 * a CPU is likely to be significantly shorter than the sleep length and selects 36 * an idle state for it accordingly. 37 * 38 * The computations carried out by this governor are based on using bins whose 39 * boundaries are aligned with the target residency parameter values of the CPU 40 * idle states provided by the %CPUIdle driver in the ascending order. That is, 41 * the first bin spans from 0 up to, but not including, the target residency of 42 * the second idle state (idle state 1), the second bin spans from the target 43 * residency of idle state 1 up to, but not including, the target residency of 44 * idle state 2, the third bin spans from the target residency of idle state 2 45 * up to, but not including, the target residency of idle state 3 and so on. 46 * The last bin spans from the target residency of the deepest idle state 47 * supplied by the driver to infinity. 48 * 49 * Two metrics called "hits" and "intercepts" are associated with each bin. 50 * They are updated every time before selecting an idle state for the given CPU 51 * in accordance with what happened last time. 52 * 53 * The "hits" metric reflects the relative frequency of situations in which the 54 * sleep length and the idle duration measured after CPU wakeup fall into the 55 * same bin (that is, the CPU appears to wake up "on time" relative to the sleep 56 * length). In turn, the "intercepts" metric reflects the relative frequency of 57 * situations in which the measured idle duration is so much shorter than the 58 * sleep length that the bin it falls into corresponds to an idle state 59 * shallower than the one whose bin is fallen into by the sleep length (these 60 * situations are referred to as "intercepts" below). 61 * 62 * In addition to the metrics described above, the governor counts recent 63 * intercepts (that is, intercepts that have occurred during the last 64 * %NR_RECENT invocations of it for the given CPU) for each bin. 65 * 66 * In order to select an idle state for a CPU, the governor takes the following 67 * steps (modulo the possible latency constraint that must be taken into account 68 * too): 69 * 70 * 1. Find the deepest CPU idle state whose target residency does not exceed 71 * the current sleep length (the candidate idle state) and compute 3 sums as 72 * follows: 73 * 74 * - The sum of the "hits" and "intercepts" metrics for the candidate state 75 * and all of the deeper idle states (it represents the cases in which the 76 * CPU was idle long enough to avoid being intercepted if the sleep length 77 * had been equal to the current one). 78 * 79 * - The sum of the "intercepts" metrics for all of the idle states shallower 80 * than the candidate one (it represents the cases in which the CPU was not 81 * idle long enough to avoid being intercepted if the sleep length had been 82 * equal to the current one). 83 * 84 * - The sum of the numbers of recent intercepts for all of the idle states 85 * shallower than the candidate one. 86 * 87 * 2. If the second sum is greater than the first one or the third sum is 88 * greater than %NR_RECENT / 2, the CPU is likely to wake up early, so look 89 * for an alternative idle state to select. 90 * 91 * - Traverse the idle states shallower than the candidate one in the 92 * descending order. 93 * 94 * - For each of them compute the sum of the "intercepts" metrics and the sum 95 * of the numbers of recent intercepts over all of the idle states between 96 * it and the candidate one (including the former and excluding the 97 * latter). 98 * 99 * - If each of these sums that needs to be taken into account (because the 100 * check related to it has indicated that the CPU is likely to wake up 101 * early) is greater than a half of the corresponding sum computed in step 102 * 1 (which means that the target residency of the state in question had 103 * not exceeded the idle duration in over a half of the relevant cases), 104 * select the given idle state instead of the candidate one. 105 * 106 * 3. By default, select the candidate state. 107 * 108 * Util-awareness mechanism: 109 * 110 * The idea behind the util-awareness extension is that there are two distinct 111 * scenarios for the CPU which should result in two different approaches to idle 112 * state selection - utilized and not utilized. 113 * 114 * In this case, 'utilized' means that the average runqueue util of the CPU is 115 * above a certain threshold. 116 * 117 * When the CPU is utilized while going into idle, more likely than not it will 118 * be woken up to do more work soon and so a shallower idle state should be 119 * selected to minimise latency and maximise performance. When the CPU is not 120 * being utilized, the usual metrics-based approach to selecting the deepest 121 * available idle state should be preferred to take advantage of the power 122 * saving. 123 * 124 * In order to achieve this, the governor uses a utilization threshold. 125 * The threshold is computed per-CPU as a percentage of the CPU's capacity 126 * by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%) 127 * seems to be getting the best results. 128 * 129 * Before selecting the next idle state, the governor compares the current CPU 130 * util to the precomputed util threshold. If it's below, it defaults to the 131 * TEO metrics mechanism. If it's above, the closest shallower idle state will 132 * be selected instead, as long as is not a polling state. 133 */ 134 135 #include <linux/cpuidle.h> 136 #include <linux/jiffies.h> 137 #include <linux/kernel.h> 138 #include <linux/sched.h> 139 #include <linux/sched/clock.h> 140 #include <linux/sched/topology.h> 141 #include <linux/tick.h> 142 143 #include "gov.h" 144 145 /* 146 * The number of bits to shift the CPU's capacity by in order to determine 147 * the utilized threshold. 148 * 149 * 6 was chosen based on testing as the number that achieved the best balance 150 * of power and performance on average. 151 * 152 * The resulting threshold is high enough to not be triggered by background 153 * noise and low enough to react quickly when activity starts to ramp up. 154 */ 155 #define UTIL_THRESHOLD_SHIFT 6 156 157 /* 158 * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value 159 * is used for decreasing metrics on a regular basis. 160 */ 161 #define PULSE 1024 162 #define DECAY_SHIFT 3 163 164 /* 165 * Number of the most recent idle duration values to take into consideration for 166 * the detection of recent early wakeup patterns. 167 */ 168 #define NR_RECENT 9 169 170 /** 171 * struct teo_bin - Metrics used by the TEO cpuidle governor. 172 * @intercepts: The "intercepts" metric. 173 * @hits: The "hits" metric. 174 * @recent: The number of recent "intercepts". 175 */ 176 struct teo_bin { 177 unsigned int intercepts; 178 unsigned int hits; 179 unsigned int recent; 180 }; 181 182 /** 183 * struct teo_cpu - CPU data used by the TEO cpuidle governor. 184 * @time_span_ns: Time between idle state selection and post-wakeup update. 185 * @sleep_length_ns: Time till the closest timer event (at the selection time). 186 * @state_bins: Idle state data bins for this CPU. 187 * @total: Grand total of the "intercepts" and "hits" metrics for all bins. 188 * @next_recent_idx: Index of the next @recent_idx entry to update. 189 * @recent_idx: Indices of bins corresponding to recent "intercepts". 190 * @tick_hits: Number of "hits" after TICK_NSEC. 191 * @util_threshold: Threshold above which the CPU is considered utilized 192 */ 193 struct teo_cpu { 194 s64 time_span_ns; 195 s64 sleep_length_ns; 196 struct teo_bin state_bins[CPUIDLE_STATE_MAX]; 197 unsigned int total; 198 int next_recent_idx; 199 int recent_idx[NR_RECENT]; 200 unsigned int tick_hits; 201 unsigned long util_threshold; 202 }; 203 204 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); 205 206 /** 207 * teo_cpu_is_utilized - Check if the CPU's util is above the threshold 208 * @cpu: Target CPU 209 * @cpu_data: Governor CPU data for the target CPU 210 */ 211 #ifdef CONFIG_SMP 212 static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data) 213 { 214 return sched_cpu_util(cpu) > cpu_data->util_threshold; 215 } 216 #else 217 static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data) 218 { 219 return false; 220 } 221 #endif 222 223 /** 224 * teo_update - Update CPU metrics after wakeup. 225 * @drv: cpuidle driver containing state data. 226 * @dev: Target CPU. 227 */ 228 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) 229 { 230 struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 231 int i, idx_timer = 0, idx_duration = 0; 232 s64 target_residency_ns; 233 u64 measured_ns; 234 235 if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { 236 /* 237 * One of the safety nets has triggered or the wakeup was close 238 * enough to the closest timer event expected at the idle state 239 * selection time to be discarded. 240 */ 241 measured_ns = U64_MAX; 242 } else { 243 u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; 244 245 /* 246 * The computations below are to determine whether or not the 247 * (saved) time till the next timer event and the measured idle 248 * duration fall into the same "bin", so use last_residency_ns 249 * for that instead of time_span_ns which includes the cpuidle 250 * overhead. 251 */ 252 measured_ns = dev->last_residency_ns; 253 /* 254 * The delay between the wakeup and the first instruction 255 * executed by the CPU is not likely to be worst-case every 256 * time, so take 1/2 of the exit latency as a very rough 257 * approximation of the average of it. 258 */ 259 if (measured_ns >= lat_ns) 260 measured_ns -= lat_ns / 2; 261 else 262 measured_ns /= 2; 263 } 264 265 cpu_data->total = 0; 266 267 /* 268 * Decay the "hits" and "intercepts" metrics for all of the bins and 269 * find the bins that the sleep length and the measured idle duration 270 * fall into. 271 */ 272 for (i = 0; i < drv->state_count; i++) { 273 struct teo_bin *bin = &cpu_data->state_bins[i]; 274 275 bin->hits -= bin->hits >> DECAY_SHIFT; 276 bin->intercepts -= bin->intercepts >> DECAY_SHIFT; 277 278 cpu_data->total += bin->hits + bin->intercepts; 279 280 target_residency_ns = drv->states[i].target_residency_ns; 281 282 if (target_residency_ns <= cpu_data->sleep_length_ns) { 283 idx_timer = i; 284 if (target_residency_ns <= measured_ns) 285 idx_duration = i; 286 } 287 } 288 289 i = cpu_data->next_recent_idx++; 290 if (cpu_data->next_recent_idx >= NR_RECENT) 291 cpu_data->next_recent_idx = 0; 292 293 if (cpu_data->recent_idx[i] >= 0) 294 cpu_data->state_bins[cpu_data->recent_idx[i]].recent--; 295 296 /* 297 * If the deepest state's target residency is below the tick length, 298 * make a record of it to help teo_select() decide whether or not 299 * to stop the tick. This effectively adds an extra hits-only bin 300 * beyond the last state-related one. 301 */ 302 if (target_residency_ns < TICK_NSEC) { 303 cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT; 304 305 cpu_data->total += cpu_data->tick_hits; 306 307 if (TICK_NSEC <= cpu_data->sleep_length_ns) { 308 idx_timer = drv->state_count; 309 if (TICK_NSEC <= measured_ns) { 310 cpu_data->tick_hits += PULSE; 311 goto end; 312 } 313 } 314 } 315 316 /* 317 * If the measured idle duration falls into the same bin as the sleep 318 * length, this is a "hit", so update the "hits" metric for that bin. 319 * Otherwise, update the "intercepts" metric for the bin fallen into by 320 * the measured idle duration. 321 */ 322 if (idx_timer == idx_duration) { 323 cpu_data->state_bins[idx_timer].hits += PULSE; 324 cpu_data->recent_idx[i] = -1; 325 } else { 326 cpu_data->state_bins[idx_duration].intercepts += PULSE; 327 cpu_data->state_bins[idx_duration].recent++; 328 cpu_data->recent_idx[i] = idx_duration; 329 } 330 331 end: 332 cpu_data->total += PULSE; 333 } 334 335 static bool teo_state_ok(int i, struct cpuidle_driver *drv) 336 { 337 return !tick_nohz_tick_stopped() || 338 drv->states[i].target_residency_ns >= TICK_NSEC; 339 } 340 341 /** 342 * teo_find_shallower_state - Find shallower idle state matching given duration. 343 * @drv: cpuidle driver containing state data. 344 * @dev: Target CPU. 345 * @state_idx: Index of the capping idle state. 346 * @duration_ns: Idle duration value to match. 347 * @no_poll: Don't consider polling states. 348 */ 349 static int teo_find_shallower_state(struct cpuidle_driver *drv, 350 struct cpuidle_device *dev, int state_idx, 351 s64 duration_ns, bool no_poll) 352 { 353 int i; 354 355 for (i = state_idx - 1; i >= 0; i--) { 356 if (dev->states_usage[i].disable || 357 (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING)) 358 continue; 359 360 state_idx = i; 361 if (drv->states[i].target_residency_ns <= duration_ns) 362 break; 363 } 364 return state_idx; 365 } 366 367 /** 368 * teo_select - Selects the next idle state to enter. 369 * @drv: cpuidle driver containing state data. 370 * @dev: Target CPU. 371 * @stop_tick: Indication on whether or not to stop the scheduler tick. 372 */ 373 static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, 374 bool *stop_tick) 375 { 376 struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 377 s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 378 ktime_t delta_tick = TICK_NSEC / 2; 379 unsigned int tick_intercept_sum = 0; 380 unsigned int idx_intercept_sum = 0; 381 unsigned int intercept_sum = 0; 382 unsigned int idx_recent_sum = 0; 383 unsigned int recent_sum = 0; 384 unsigned int idx_hit_sum = 0; 385 unsigned int hit_sum = 0; 386 int constraint_idx = 0; 387 int idx0 = 0, idx = -1; 388 bool alt_intercepts, alt_recent; 389 bool cpu_utilized; 390 s64 duration_ns; 391 int i; 392 393 if (dev->last_state_idx >= 0) { 394 teo_update(drv, dev); 395 dev->last_state_idx = -1; 396 } 397 398 cpu_data->time_span_ns = local_clock(); 399 /* 400 * Set the expected sleep length to infinity in case of an early 401 * return. 402 */ 403 cpu_data->sleep_length_ns = KTIME_MAX; 404 405 /* Check if there is any choice in the first place. */ 406 if (drv->state_count < 2) { 407 idx = 0; 408 goto out_tick; 409 } 410 411 if (!dev->states_usage[0].disable) 412 idx = 0; 413 414 cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data); 415 /* 416 * If the CPU is being utilized over the threshold and there are only 2 417 * states to choose from, the metrics need not be considered, so choose 418 * the shallowest non-polling state and exit. 419 */ 420 if (drv->state_count < 3 && cpu_utilized) { 421 /* 422 * If state 0 is enabled and it is not a polling one, select it 423 * right away unless the scheduler tick has been stopped, in 424 * which case care needs to be taken to leave the CPU in a deep 425 * enough state in case it is not woken up any time soon after 426 * all. If state 1 is disabled, though, state 0 must be used 427 * anyway. 428 */ 429 if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) && 430 teo_state_ok(0, drv)) || dev->states_usage[1].disable) { 431 idx = 0; 432 goto out_tick; 433 } 434 /* Assume that state 1 is not a polling one and use it. */ 435 idx = 1; 436 duration_ns = drv->states[1].target_residency_ns; 437 goto end; 438 } 439 440 /* Compute the sums of metrics for early wakeup pattern detection. */ 441 for (i = 1; i < drv->state_count; i++) { 442 struct teo_bin *prev_bin = &cpu_data->state_bins[i-1]; 443 struct cpuidle_state *s = &drv->states[i]; 444 445 /* 446 * Update the sums of idle state mertics for all of the states 447 * shallower than the current one. 448 */ 449 intercept_sum += prev_bin->intercepts; 450 hit_sum += prev_bin->hits; 451 recent_sum += prev_bin->recent; 452 453 if (dev->states_usage[i].disable) 454 continue; 455 456 if (idx < 0) 457 idx0 = i; /* first enabled state */ 458 459 idx = i; 460 461 if (s->exit_latency_ns <= latency_req) 462 constraint_idx = i; 463 464 /* Save the sums for the current state. */ 465 idx_intercept_sum = intercept_sum; 466 idx_hit_sum = hit_sum; 467 idx_recent_sum = recent_sum; 468 } 469 470 /* Avoid unnecessary overhead. */ 471 if (idx < 0) { 472 idx = 0; /* No states enabled, must use 0. */ 473 goto out_tick; 474 } 475 476 if (idx == idx0) { 477 /* 478 * Only one idle state is enabled, so use it, but do not 479 * allow the tick to be stopped it is shallow enough. 480 */ 481 duration_ns = drv->states[idx].target_residency_ns; 482 goto end; 483 } 484 485 tick_intercept_sum = intercept_sum + 486 cpu_data->state_bins[drv->state_count-1].intercepts; 487 488 /* 489 * If the sum of the intercepts metric for all of the idle states 490 * shallower than the current candidate one (idx) is greater than the 491 * sum of the intercepts and hits metrics for the candidate state and 492 * all of the deeper states, or the sum of the numbers of recent 493 * intercepts over all of the states shallower than the candidate one 494 * is greater than a half of the number of recent events taken into 495 * account, a shallower idle state is likely to be a better choice. 496 */ 497 alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; 498 alt_recent = idx_recent_sum > NR_RECENT / 2; 499 if (alt_recent || alt_intercepts) { 500 int first_suitable_idx = idx; 501 502 /* 503 * Look for the deepest idle state whose target residency had 504 * not exceeded the idle duration in over a half of the relevant 505 * cases (both with respect to intercepts overall and with 506 * respect to the recent intercepts only) in the past. 507 * 508 * Take the possible duration limitation present if the tick 509 * has been stopped already into account. 510 */ 511 intercept_sum = 0; 512 recent_sum = 0; 513 514 for (i = idx - 1; i >= 0; i--) { 515 struct teo_bin *bin = &cpu_data->state_bins[i]; 516 517 intercept_sum += bin->intercepts; 518 recent_sum += bin->recent; 519 520 if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && 521 (!alt_intercepts || 522 2 * intercept_sum > idx_intercept_sum)) { 523 /* 524 * Use the current state unless it is too 525 * shallow or disabled, in which case take the 526 * first enabled state that is deep enough. 527 */ 528 if (teo_state_ok(i, drv) && 529 !dev->states_usage[i].disable) 530 idx = i; 531 else 532 idx = first_suitable_idx; 533 534 break; 535 } 536 537 if (dev->states_usage[i].disable) 538 continue; 539 540 if (!teo_state_ok(i, drv)) { 541 /* 542 * The current state is too shallow, but if an 543 * alternative candidate state has been found, 544 * it may still turn out to be a better choice. 545 */ 546 if (first_suitable_idx != idx) 547 continue; 548 549 break; 550 } 551 552 first_suitable_idx = i; 553 } 554 } 555 556 /* 557 * If there is a latency constraint, it may be necessary to select an 558 * idle state shallower than the current candidate one. 559 */ 560 if (idx > constraint_idx) 561 idx = constraint_idx; 562 563 /* 564 * If the CPU is being utilized over the threshold, choose a shallower 565 * non-polling state to improve latency, unless the scheduler tick has 566 * been stopped already and the shallower state's target residency is 567 * not sufficiently large. 568 */ 569 if (cpu_utilized) { 570 i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true); 571 if (teo_state_ok(i, drv)) 572 idx = i; 573 } 574 575 /* 576 * Skip the timers check if state 0 is the current candidate one, 577 * because an immediate non-timer wakeup is expected in that case. 578 */ 579 if (!idx) 580 goto out_tick; 581 582 /* 583 * If state 0 is a polling one, check if the target residency of 584 * the current candidate state is low enough and skip the timers 585 * check in that case too. 586 */ 587 if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) && 588 drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) 589 goto out_tick; 590 591 duration_ns = tick_nohz_get_sleep_length(&delta_tick); 592 cpu_data->sleep_length_ns = duration_ns; 593 594 /* 595 * If the closest expected timer is before the terget residency of the 596 * candidate state, a shallower one needs to be found. 597 */ 598 if (drv->states[idx].target_residency_ns > duration_ns) { 599 i = teo_find_shallower_state(drv, dev, idx, duration_ns, false); 600 if (teo_state_ok(i, drv)) 601 idx = i; 602 } 603 604 /* 605 * If the selected state's target residency is below the tick length 606 * and intercepts occurring before the tick length are the majority of 607 * total wakeup events, do not stop the tick. 608 */ 609 if (drv->states[idx].target_residency_ns < TICK_NSEC && 610 tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8) 611 duration_ns = TICK_NSEC / 2; 612 613 end: 614 /* 615 * Allow the tick to be stopped unless the selected state is a polling 616 * one or the expected idle duration is shorter than the tick period 617 * length. 618 */ 619 if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && 620 duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped()) 621 return idx; 622 623 /* 624 * The tick is not going to be stopped, so if the target residency of 625 * the state to be returned is not within the time till the closest 626 * timer including the tick, try to correct that. 627 */ 628 if (idx > idx0 && 629 drv->states[idx].target_residency_ns > delta_tick) 630 idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false); 631 632 out_tick: 633 *stop_tick = false; 634 return idx; 635 } 636 637 /** 638 * teo_reflect - Note that governor data for the CPU need to be updated. 639 * @dev: Target CPU. 640 * @state: Entered state. 641 */ 642 static void teo_reflect(struct cpuidle_device *dev, int state) 643 { 644 struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 645 646 dev->last_state_idx = state; 647 /* 648 * If the wakeup was not "natural", but triggered by one of the safety 649 * nets, assume that the CPU might have been idle for the entire sleep 650 * length time. 651 */ 652 if (dev->poll_time_limit || 653 (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) { 654 dev->poll_time_limit = false; 655 cpu_data->time_span_ns = cpu_data->sleep_length_ns; 656 } else { 657 cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns; 658 } 659 } 660 661 /** 662 * teo_enable_device - Initialize the governor's data for the target CPU. 663 * @drv: cpuidle driver (not used). 664 * @dev: Target CPU. 665 */ 666 static int teo_enable_device(struct cpuidle_driver *drv, 667 struct cpuidle_device *dev) 668 { 669 struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 670 unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu); 671 int i; 672 673 memset(cpu_data, 0, sizeof(*cpu_data)); 674 cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT; 675 676 for (i = 0; i < NR_RECENT; i++) 677 cpu_data->recent_idx[i] = -1; 678 679 return 0; 680 } 681 682 static struct cpuidle_governor teo_governor = { 683 .name = "teo", 684 .rating = 19, 685 .enable = teo_enable_device, 686 .select = teo_select, 687 .reflect = teo_reflect, 688 }; 689 690 static int __init teo_governor_init(void) 691 { 692 return cpuidle_register_governor(&teo_governor); 693 } 694 695 postcore_initcall(teo_governor_init); 696