1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A power allocator to manage temperature 4 * 5 * Copyright (C) 2014 ARM Ltd. 6 * 7 */ 8 9 #define pr_fmt(fmt) "Power allocator: " fmt 10 11 #include <linux/slab.h> 12 #include <linux/thermal.h> 13 14 #define CREATE_TRACE_POINTS 15 #include "thermal_trace_ipa.h" 16 17 #include "thermal_core.h" 18 19 #define FRAC_BITS 10 20 #define int_to_frac(x) ((x) << FRAC_BITS) 21 #define frac_to_int(x) ((x) >> FRAC_BITS) 22 23 /** 24 * mul_frac() - multiply two fixed-point numbers 25 * @x: first multiplicand 26 * @y: second multiplicand 27 * 28 * Return: the result of multiplying two fixed-point numbers. The 29 * result is also a fixed-point number. 30 */ 31 static inline s64 mul_frac(s64 x, s64 y) 32 { 33 return (x * y) >> FRAC_BITS; 34 } 35 36 /** 37 * div_frac() - divide two fixed-point numbers 38 * @x: the dividend 39 * @y: the divisor 40 * 41 * Return: the result of dividing two fixed-point numbers. The 42 * result is also a fixed-point number. 43 */ 44 static inline s64 div_frac(s64 x, s64 y) 45 { 46 return div_s64(x << FRAC_BITS, y); 47 } 48 49 /** 50 * struct power_allocator_params - parameters for the power allocator governor 51 * @allocated_tzp: whether we have allocated tzp for this thermal zone and 52 * it needs to be freed on unbind 53 * @err_integral: accumulated error in the PID controller. 54 * @prev_err: error in the previous iteration of the PID controller. 55 * Used to calculate the derivative term. 56 * @sustainable_power: Sustainable power (heat) that this thermal zone can 57 * dissipate 58 * @trip_switch_on: first passive trip point of the thermal zone. The 59 * governor switches on when this trip point is crossed. 60 * If the thermal zone only has one passive trip point, 61 * @trip_switch_on should be NULL. 62 * @trip_max_desired_temperature: last passive trip point of the thermal 63 * zone. The temperature we are 64 * controlling for. 65 */ 66 struct power_allocator_params { 67 bool allocated_tzp; 68 s64 err_integral; 69 s32 prev_err; 70 u32 sustainable_power; 71 const struct thermal_trip *trip_switch_on; 72 const struct thermal_trip *trip_max_desired_temperature; 73 }; 74 75 /** 76 * estimate_sustainable_power() - Estimate the sustainable power of a thermal zone 77 * @tz: thermal zone we are operating in 78 * 79 * For thermal zones that don't provide a sustainable_power in their 80 * thermal_zone_params, estimate one. Calculate it using the minimum 81 * power of all the cooling devices as that gives a valid value that 82 * can give some degree of functionality. For optimal performance of 83 * this governor, provide a sustainable_power in the thermal zone's 84 * thermal_zone_params. 85 */ 86 static u32 estimate_sustainable_power(struct thermal_zone_device *tz) 87 { 88 u32 sustainable_power = 0; 89 struct thermal_instance *instance; 90 struct power_allocator_params *params = tz->governor_data; 91 92 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 93 struct thermal_cooling_device *cdev = instance->cdev; 94 u32 min_power; 95 96 if (instance->trip != params->trip_max_desired_temperature) 97 continue; 98 99 if (!cdev_is_power_actor(cdev)) 100 continue; 101 102 if (cdev->ops->state2power(cdev, instance->upper, &min_power)) 103 continue; 104 105 sustainable_power += min_power; 106 } 107 108 return sustainable_power; 109 } 110 111 /** 112 * estimate_pid_constants() - Estimate the constants for the PID controller 113 * @tz: thermal zone for which to estimate the constants 114 * @sustainable_power: sustainable power for the thermal zone 115 * @trip_switch_on: trip point for the switch on temperature 116 * @control_temp: target temperature for the power allocator governor 117 * 118 * This function is used to update the estimation of the PID 119 * controller constants in struct thermal_zone_parameters. 120 */ 121 static void estimate_pid_constants(struct thermal_zone_device *tz, 122 u32 sustainable_power, 123 const struct thermal_trip *trip_switch_on, 124 int control_temp) 125 { 126 u32 temperature_threshold = control_temp; 127 s32 k_i; 128 129 if (trip_switch_on) 130 temperature_threshold -= trip_switch_on->temperature; 131 132 /* 133 * estimate_pid_constants() tries to find appropriate default 134 * values for thermal zones that don't provide them. If a 135 * system integrator has configured a thermal zone with two 136 * passive trip points at the same temperature, that person 137 * hasn't put any effort to set up the thermal zone properly 138 * so just give up. 139 */ 140 if (!temperature_threshold) 141 return; 142 143 tz->tzp->k_po = int_to_frac(sustainable_power) / 144 temperature_threshold; 145 146 tz->tzp->k_pu = int_to_frac(2 * sustainable_power) / 147 temperature_threshold; 148 149 k_i = tz->tzp->k_pu / 10; 150 tz->tzp->k_i = k_i > 0 ? k_i : 1; 151 152 /* 153 * The default for k_d and integral_cutoff is 0, so we can 154 * leave them as they are. 155 */ 156 } 157 158 /** 159 * get_sustainable_power() - Get the right sustainable power 160 * @tz: thermal zone for which to estimate the constants 161 * @params: parameters for the power allocator governor 162 * @control_temp: target temperature for the power allocator governor 163 * 164 * This function is used for getting the proper sustainable power value based 165 * on variables which might be updated by the user sysfs interface. If that 166 * happen the new value is going to be estimated and updated. It is also used 167 * after thermal zone binding, where the initial values where set to 0. 168 */ 169 static u32 get_sustainable_power(struct thermal_zone_device *tz, 170 struct power_allocator_params *params, 171 int control_temp) 172 { 173 u32 sustainable_power; 174 175 if (!tz->tzp->sustainable_power) 176 sustainable_power = estimate_sustainable_power(tz); 177 else 178 sustainable_power = tz->tzp->sustainable_power; 179 180 /* Check if it's init value 0 or there was update via sysfs */ 181 if (sustainable_power != params->sustainable_power) { 182 estimate_pid_constants(tz, sustainable_power, 183 params->trip_switch_on, control_temp); 184 185 /* Do the estimation only once and make available in sysfs */ 186 tz->tzp->sustainable_power = sustainable_power; 187 params->sustainable_power = sustainable_power; 188 } 189 190 return sustainable_power; 191 } 192 193 /** 194 * pid_controller() - PID controller 195 * @tz: thermal zone we are operating in 196 * @control_temp: the target temperature in millicelsius 197 * @max_allocatable_power: maximum allocatable power for this thermal zone 198 * 199 * This PID controller increases the available power budget so that the 200 * temperature of the thermal zone gets as close as possible to 201 * @control_temp and limits the power if it exceeds it. k_po is the 202 * proportional term when we are overshooting, k_pu is the 203 * proportional term when we are undershooting. integral_cutoff is a 204 * threshold below which we stop accumulating the error. The 205 * accumulated error is only valid if the requested power will make 206 * the system warmer. If the system is mostly idle, there's no point 207 * in accumulating positive error. 208 * 209 * Return: The power budget for the next period. 210 */ 211 static u32 pid_controller(struct thermal_zone_device *tz, 212 int control_temp, 213 u32 max_allocatable_power) 214 { 215 s64 p, i, d, power_range; 216 s32 err, max_power_frac; 217 u32 sustainable_power; 218 struct power_allocator_params *params = tz->governor_data; 219 220 max_power_frac = int_to_frac(max_allocatable_power); 221 222 sustainable_power = get_sustainable_power(tz, params, control_temp); 223 224 err = control_temp - tz->temperature; 225 err = int_to_frac(err); 226 227 /* Calculate the proportional term */ 228 p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err); 229 230 /* 231 * Calculate the integral term 232 * 233 * if the error is less than cut off allow integration (but 234 * the integral is limited to max power) 235 */ 236 i = mul_frac(tz->tzp->k_i, params->err_integral); 237 238 if (err < int_to_frac(tz->tzp->integral_cutoff)) { 239 s64 i_next = i + mul_frac(tz->tzp->k_i, err); 240 241 if (abs(i_next) < max_power_frac) { 242 i = i_next; 243 params->err_integral += err; 244 } 245 } 246 247 /* 248 * Calculate the derivative term 249 * 250 * We do err - prev_err, so with a positive k_d, a decreasing 251 * error (i.e. driving closer to the line) results in less 252 * power being applied, slowing down the controller) 253 */ 254 d = mul_frac(tz->tzp->k_d, err - params->prev_err); 255 d = div_frac(d, jiffies_to_msecs(tz->passive_delay_jiffies)); 256 params->prev_err = err; 257 258 power_range = p + i + d; 259 260 /* feed-forward the known sustainable dissipatable power */ 261 power_range = sustainable_power + frac_to_int(power_range); 262 263 power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power); 264 265 trace_thermal_power_allocator_pid(tz, frac_to_int(err), 266 frac_to_int(params->err_integral), 267 frac_to_int(p), frac_to_int(i), 268 frac_to_int(d), power_range); 269 270 return power_range; 271 } 272 273 /** 274 * power_actor_set_power() - limit the maximum power a cooling device consumes 275 * @cdev: pointer to &thermal_cooling_device 276 * @instance: thermal instance to update 277 * @power: the power in milliwatts 278 * 279 * Set the cooling device to consume at most @power milliwatts. The limit is 280 * expected to be a cap at the maximum power consumption. 281 * 282 * Return: 0 on success, -EINVAL if the cooling device does not 283 * implement the power actor API or -E* for other failures. 284 */ 285 static int 286 power_actor_set_power(struct thermal_cooling_device *cdev, 287 struct thermal_instance *instance, u32 power) 288 { 289 unsigned long state; 290 int ret; 291 292 ret = cdev->ops->power2state(cdev, power, &state); 293 if (ret) 294 return ret; 295 296 instance->target = clamp_val(state, instance->lower, instance->upper); 297 mutex_lock(&cdev->lock); 298 __thermal_cdev_update(cdev); 299 mutex_unlock(&cdev->lock); 300 301 return 0; 302 } 303 304 /** 305 * divvy_up_power() - divvy the allocated power between the actors 306 * @req_power: each actor's requested power 307 * @max_power: each actor's maximum available power 308 * @num_actors: size of the @req_power, @max_power and @granted_power's array 309 * @total_req_power: sum of @req_power 310 * @power_range: total allocated power 311 * @granted_power: output array: each actor's granted power 312 * @extra_actor_power: an appropriately sized array to be used in the 313 * function as temporary storage of the extra power given 314 * to the actors 315 * 316 * This function divides the total allocated power (@power_range) 317 * fairly between the actors. It first tries to give each actor a 318 * share of the @power_range according to how much power it requested 319 * compared to the rest of the actors. For example, if only one actor 320 * requests power, then it receives all the @power_range. If 321 * three actors each requests 1mW, each receives a third of the 322 * @power_range. 323 * 324 * If any actor received more than their maximum power, then that 325 * surplus is re-divvied among the actors based on how far they are 326 * from their respective maximums. 327 * 328 * Granted power for each actor is written to @granted_power, which 329 * should've been allocated by the calling function. 330 */ 331 static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, 332 u32 total_req_power, u32 power_range, 333 u32 *granted_power, u32 *extra_actor_power) 334 { 335 u32 extra_power, capped_extra_power; 336 int i; 337 338 /* 339 * Prevent division by 0 if none of the actors request power. 340 */ 341 if (!total_req_power) 342 total_req_power = 1; 343 344 capped_extra_power = 0; 345 extra_power = 0; 346 for (i = 0; i < num_actors; i++) { 347 u64 req_range = (u64)req_power[i] * power_range; 348 349 granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range, 350 total_req_power); 351 352 if (granted_power[i] > max_power[i]) { 353 extra_power += granted_power[i] - max_power[i]; 354 granted_power[i] = max_power[i]; 355 } 356 357 extra_actor_power[i] = max_power[i] - granted_power[i]; 358 capped_extra_power += extra_actor_power[i]; 359 } 360 361 if (!extra_power) 362 return; 363 364 /* 365 * Re-divvy the reclaimed extra among actors based on 366 * how far they are from the max 367 */ 368 extra_power = min(extra_power, capped_extra_power); 369 if (capped_extra_power > 0) 370 for (i = 0; i < num_actors; i++) { 371 u64 extra_range = (u64)extra_actor_power[i] * extra_power; 372 granted_power[i] += DIV_ROUND_CLOSEST_ULL(extra_range, 373 capped_extra_power); 374 } 375 } 376 377 static int allocate_power(struct thermal_zone_device *tz, 378 int control_temp) 379 { 380 struct thermal_instance *instance; 381 struct power_allocator_params *params = tz->governor_data; 382 const struct thermal_trip *trip_max_desired_temperature = 383 params->trip_max_desired_temperature; 384 u32 *req_power, *max_power, *granted_power, *extra_actor_power; 385 u32 *weighted_req_power; 386 u32 total_req_power, max_allocatable_power, total_weighted_req_power; 387 u32 total_granted_power, power_range; 388 int i, num_actors, total_weight, ret = 0; 389 390 num_actors = 0; 391 total_weight = 0; 392 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 393 if ((instance->trip == trip_max_desired_temperature) && 394 cdev_is_power_actor(instance->cdev)) { 395 num_actors++; 396 total_weight += instance->weight; 397 } 398 } 399 400 if (!num_actors) 401 return -ENODEV; 402 403 /* 404 * We need to allocate five arrays of the same size: 405 * req_power, max_power, granted_power, extra_actor_power and 406 * weighted_req_power. They are going to be needed until this 407 * function returns. Allocate them all in one go to simplify 408 * the allocation and deallocation logic. 409 */ 410 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power)); 411 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power)); 412 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power)); 413 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power)); 414 req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL); 415 if (!req_power) 416 return -ENOMEM; 417 418 max_power = &req_power[num_actors]; 419 granted_power = &req_power[2 * num_actors]; 420 extra_actor_power = &req_power[3 * num_actors]; 421 weighted_req_power = &req_power[4 * num_actors]; 422 423 i = 0; 424 total_weighted_req_power = 0; 425 total_req_power = 0; 426 max_allocatable_power = 0; 427 428 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 429 int weight; 430 struct thermal_cooling_device *cdev = instance->cdev; 431 432 if (instance->trip != trip_max_desired_temperature) 433 continue; 434 435 if (!cdev_is_power_actor(cdev)) 436 continue; 437 438 if (cdev->ops->get_requested_power(cdev, &req_power[i])) 439 continue; 440 441 if (!total_weight) 442 weight = 1 << FRAC_BITS; 443 else 444 weight = instance->weight; 445 446 weighted_req_power[i] = frac_to_int(weight * req_power[i]); 447 448 if (cdev->ops->state2power(cdev, instance->lower, 449 &max_power[i])) 450 continue; 451 452 total_req_power += req_power[i]; 453 max_allocatable_power += max_power[i]; 454 total_weighted_req_power += weighted_req_power[i]; 455 456 i++; 457 } 458 459 power_range = pid_controller(tz, control_temp, max_allocatable_power); 460 461 divvy_up_power(weighted_req_power, max_power, num_actors, 462 total_weighted_req_power, power_range, granted_power, 463 extra_actor_power); 464 465 total_granted_power = 0; 466 i = 0; 467 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 468 if (instance->trip != trip_max_desired_temperature) 469 continue; 470 471 if (!cdev_is_power_actor(instance->cdev)) 472 continue; 473 474 power_actor_set_power(instance->cdev, instance, 475 granted_power[i]); 476 total_granted_power += granted_power[i]; 477 478 i++; 479 } 480 481 trace_thermal_power_allocator(tz, req_power, total_req_power, 482 granted_power, total_granted_power, 483 num_actors, power_range, 484 max_allocatable_power, tz->temperature, 485 control_temp - tz->temperature); 486 487 kfree(req_power); 488 489 return ret; 490 } 491 492 /** 493 * get_governor_trips() - get the two trip points that are key for this governor 494 * @tz: thermal zone to operate on 495 * @params: pointer to private data for this governor 496 * 497 * The power allocator governor works optimally with two trips points: 498 * a "switch on" trip point and a "maximum desired temperature". These 499 * are defined as the first and last passive trip points. 500 * 501 * If there is only one trip point, then that's considered to be the 502 * "maximum desired temperature" trip point and the governor is always 503 * on. If there are no passive or active trip points, then the 504 * governor won't do anything. In fact, its throttle function 505 * won't be called at all. 506 */ 507 static void get_governor_trips(struct thermal_zone_device *tz, 508 struct power_allocator_params *params) 509 { 510 const struct thermal_trip *first_passive = NULL; 511 const struct thermal_trip *last_passive = NULL; 512 const struct thermal_trip *last_active = NULL; 513 const struct thermal_trip *trip; 514 515 for_each_trip(tz, trip) { 516 switch (trip->type) { 517 case THERMAL_TRIP_PASSIVE: 518 if (!first_passive) { 519 first_passive = trip; 520 break; 521 } 522 last_passive = trip; 523 break; 524 case THERMAL_TRIP_ACTIVE: 525 last_active = trip; 526 break; 527 default: 528 break; 529 } 530 } 531 532 if (last_passive) { 533 params->trip_switch_on = first_passive; 534 params->trip_max_desired_temperature = last_passive; 535 } else if (first_passive) { 536 params->trip_switch_on = NULL; 537 params->trip_max_desired_temperature = first_passive; 538 } else { 539 params->trip_switch_on = NULL; 540 params->trip_max_desired_temperature = last_active; 541 } 542 } 543 544 static void reset_pid_controller(struct power_allocator_params *params) 545 { 546 params->err_integral = 0; 547 params->prev_err = 0; 548 } 549 550 static void allow_maximum_power(struct thermal_zone_device *tz, bool update) 551 { 552 struct thermal_instance *instance; 553 struct power_allocator_params *params = tz->governor_data; 554 u32 req_power; 555 556 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 557 struct thermal_cooling_device *cdev = instance->cdev; 558 559 if (instance->trip != params->trip_max_desired_temperature || 560 (!cdev_is_power_actor(instance->cdev))) 561 continue; 562 563 instance->target = 0; 564 mutex_lock(&instance->cdev->lock); 565 /* 566 * Call for updating the cooling devices local stats and avoid 567 * periods of dozen of seconds when those have not been 568 * maintained. 569 */ 570 cdev->ops->get_requested_power(cdev, &req_power); 571 572 if (update) 573 __thermal_cdev_update(instance->cdev); 574 575 mutex_unlock(&instance->cdev->lock); 576 } 577 } 578 579 /** 580 * check_power_actors() - Check all cooling devices and warn when they are 581 * not power actors 582 * @tz: thermal zone to operate on 583 * 584 * Check all cooling devices in the @tz and warn every time they are missing 585 * power actor API. The warning should help to investigate the issue, which 586 * could be e.g. lack of Energy Model for a given device. 587 * 588 * Return: 0 on success, -EINVAL if any cooling device does not implement 589 * the power actor API. 590 */ 591 static int check_power_actors(struct thermal_zone_device *tz) 592 { 593 struct thermal_instance *instance; 594 int ret = 0; 595 596 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 597 if (!cdev_is_power_actor(instance->cdev)) { 598 dev_warn(&tz->device, "power_allocator: %s is not a power actor\n", 599 instance->cdev->type); 600 ret = -EINVAL; 601 } 602 } 603 604 return ret; 605 } 606 607 /** 608 * power_allocator_bind() - bind the power_allocator governor to a thermal zone 609 * @tz: thermal zone to bind it to 610 * 611 * Initialize the PID controller parameters and bind it to the thermal 612 * zone. 613 * 614 * Return: 0 on success, or -ENOMEM if we ran out of memory, or -EINVAL 615 * when there are unsupported cooling devices in the @tz. 616 */ 617 static int power_allocator_bind(struct thermal_zone_device *tz) 618 { 619 int ret; 620 struct power_allocator_params *params; 621 622 ret = check_power_actors(tz); 623 if (ret) 624 return ret; 625 626 params = kzalloc(sizeof(*params), GFP_KERNEL); 627 if (!params) 628 return -ENOMEM; 629 630 if (!tz->tzp) { 631 tz->tzp = kzalloc(sizeof(*tz->tzp), GFP_KERNEL); 632 if (!tz->tzp) { 633 ret = -ENOMEM; 634 goto free_params; 635 } 636 637 params->allocated_tzp = true; 638 } 639 640 if (!tz->tzp->sustainable_power) 641 dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); 642 643 get_governor_trips(tz, params); 644 645 if (params->trip_max_desired_temperature) { 646 int temp = params->trip_max_desired_temperature->temperature; 647 648 estimate_pid_constants(tz, tz->tzp->sustainable_power, 649 params->trip_switch_on, temp); 650 } 651 652 reset_pid_controller(params); 653 654 tz->governor_data = params; 655 656 return 0; 657 658 free_params: 659 kfree(params); 660 661 return ret; 662 } 663 664 static void power_allocator_unbind(struct thermal_zone_device *tz) 665 { 666 struct power_allocator_params *params = tz->governor_data; 667 668 dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); 669 670 if (params->allocated_tzp) { 671 kfree(tz->tzp); 672 tz->tzp = NULL; 673 } 674 675 kfree(tz->governor_data); 676 tz->governor_data = NULL; 677 } 678 679 static int power_allocator_throttle(struct thermal_zone_device *tz, 680 const struct thermal_trip *trip) 681 { 682 struct power_allocator_params *params = tz->governor_data; 683 bool update; 684 685 lockdep_assert_held(&tz->lock); 686 687 /* 688 * We get called for every trip point but we only need to do 689 * our calculations once 690 */ 691 if (trip != params->trip_max_desired_temperature) 692 return 0; 693 694 trip = params->trip_switch_on; 695 if (trip && tz->temperature < trip->temperature) { 696 update = tz->last_temperature >= trip->temperature; 697 tz->passive = 0; 698 reset_pid_controller(params); 699 allow_maximum_power(tz, update); 700 return 0; 701 } 702 703 tz->passive = 1; 704 705 return allocate_power(tz, params->trip_max_desired_temperature->temperature); 706 } 707 708 static struct thermal_governor thermal_gov_power_allocator = { 709 .name = "power_allocator", 710 .bind_to_tz = power_allocator_bind, 711 .unbind_from_tz = power_allocator_unbind, 712 .throttle = power_allocator_throttle, 713 }; 714 THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator); 715