1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A power allocator to manage temperature 4 * 5 * Copyright (C) 2014 ARM Ltd. 6 * 7 */ 8 9 #define pr_fmt(fmt) "Power allocator: " fmt 10 11 #include <linux/slab.h> 12 #include <linux/thermal.h> 13 14 #define CREATE_TRACE_POINTS 15 #include <trace/events/thermal_power_allocator.h> 16 17 #include "thermal_core.h" 18 19 #define INVALID_TRIP -1 20 21 #define FRAC_BITS 10 22 #define int_to_frac(x) ((x) << FRAC_BITS) 23 #define frac_to_int(x) ((x) >> FRAC_BITS) 24 25 /** 26 * mul_frac() - multiply two fixed-point numbers 27 * @x: first multiplicand 28 * @y: second multiplicand 29 * 30 * Return: the result of multiplying two fixed-point numbers. The 31 * result is also a fixed-point number. 32 */ 33 static inline s64 mul_frac(s64 x, s64 y) 34 { 35 return (x * y) >> FRAC_BITS; 36 } 37 38 /** 39 * div_frac() - divide two fixed-point numbers 40 * @x: the dividend 41 * @y: the divisor 42 * 43 * Return: the result of dividing two fixed-point numbers. The 44 * result is also a fixed-point number. 45 */ 46 static inline s64 div_frac(s64 x, s64 y) 47 { 48 return div_s64(x << FRAC_BITS, y); 49 } 50 51 /** 52 * struct power_allocator_params - parameters for the power allocator governor 53 * @allocated_tzp: whether we have allocated tzp for this thermal zone and 54 * it needs to be freed on unbind 55 * @err_integral: accumulated error in the PID controller. 56 * @prev_err: error in the previous iteration of the PID controller. 57 * Used to calculate the derivative term. 58 * @trip_switch_on: first passive trip point of the thermal zone. The 59 * governor switches on when this trip point is crossed. 60 * If the thermal zone only has one passive trip point, 61 * @trip_switch_on should be INVALID_TRIP. 62 * @trip_max_desired_temperature: last passive trip point of the thermal 63 * zone. The temperature we are 64 * controlling for. 65 * @sustainable_power: Sustainable power (heat) that this thermal zone can 66 * dissipate 67 */ 68 struct power_allocator_params { 69 bool allocated_tzp; 70 s64 err_integral; 71 s32 prev_err; 72 int trip_switch_on; 73 int trip_max_desired_temperature; 74 u32 sustainable_power; 75 }; 76 77 /** 78 * estimate_sustainable_power() - Estimate the sustainable power of a thermal zone 79 * @tz: thermal zone we are operating in 80 * 81 * For thermal zones that don't provide a sustainable_power in their 82 * thermal_zone_params, estimate one. Calculate it using the minimum 83 * power of all the cooling devices as that gives a valid value that 84 * can give some degree of functionality. For optimal performance of 85 * this governor, provide a sustainable_power in the thermal zone's 86 * thermal_zone_params. 87 */ 88 static u32 estimate_sustainable_power(struct thermal_zone_device *tz) 89 { 90 u32 sustainable_power = 0; 91 struct thermal_instance *instance; 92 struct power_allocator_params *params = tz->governor_data; 93 94 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 95 struct thermal_cooling_device *cdev = instance->cdev; 96 u32 min_power; 97 98 if (instance->trip != params->trip_max_desired_temperature) 99 continue; 100 101 if (!cdev_is_power_actor(cdev)) 102 continue; 103 104 if (cdev->ops->state2power(cdev, instance->upper, &min_power)) 105 continue; 106 107 sustainable_power += min_power; 108 } 109 110 return sustainable_power; 111 } 112 113 /** 114 * estimate_pid_constants() - Estimate the constants for the PID controller 115 * @tz: thermal zone for which to estimate the constants 116 * @sustainable_power: sustainable power for the thermal zone 117 * @trip_switch_on: trip point number for the switch on temperature 118 * @control_temp: target temperature for the power allocator governor 119 * 120 * This function is used to update the estimation of the PID 121 * controller constants in struct thermal_zone_parameters. 122 */ 123 static void estimate_pid_constants(struct thermal_zone_device *tz, 124 u32 sustainable_power, int trip_switch_on, 125 int control_temp) 126 { 127 struct thermal_trip trip; 128 u32 temperature_threshold = control_temp; 129 int ret; 130 s32 k_i; 131 132 ret = __thermal_zone_get_trip(tz, trip_switch_on, &trip); 133 if (!ret) 134 temperature_threshold -= trip.temperature; 135 136 /* 137 * estimate_pid_constants() tries to find appropriate default 138 * values for thermal zones that don't provide them. If a 139 * system integrator has configured a thermal zone with two 140 * passive trip points at the same temperature, that person 141 * hasn't put any effort to set up the thermal zone properly 142 * so just give up. 143 */ 144 if (!temperature_threshold) 145 return; 146 147 tz->tzp->k_po = int_to_frac(sustainable_power) / 148 temperature_threshold; 149 150 tz->tzp->k_pu = int_to_frac(2 * sustainable_power) / 151 temperature_threshold; 152 153 k_i = tz->tzp->k_pu / 10; 154 tz->tzp->k_i = k_i > 0 ? k_i : 1; 155 156 /* 157 * The default for k_d and integral_cutoff is 0, so we can 158 * leave them as they are. 159 */ 160 } 161 162 /** 163 * get_sustainable_power() - Get the right sustainable power 164 * @tz: thermal zone for which to estimate the constants 165 * @params: parameters for the power allocator governor 166 * @control_temp: target temperature for the power allocator governor 167 * 168 * This function is used for getting the proper sustainable power value based 169 * on variables which might be updated by the user sysfs interface. If that 170 * happen the new value is going to be estimated and updated. It is also used 171 * after thermal zone binding, where the initial values where set to 0. 172 */ 173 static u32 get_sustainable_power(struct thermal_zone_device *tz, 174 struct power_allocator_params *params, 175 int control_temp) 176 { 177 u32 sustainable_power; 178 179 if (!tz->tzp->sustainable_power) 180 sustainable_power = estimate_sustainable_power(tz); 181 else 182 sustainable_power = tz->tzp->sustainable_power; 183 184 /* Check if it's init value 0 or there was update via sysfs */ 185 if (sustainable_power != params->sustainable_power) { 186 estimate_pid_constants(tz, sustainable_power, 187 params->trip_switch_on, control_temp); 188 189 /* Do the estimation only once and make available in sysfs */ 190 tz->tzp->sustainable_power = sustainable_power; 191 params->sustainable_power = sustainable_power; 192 } 193 194 return sustainable_power; 195 } 196 197 /** 198 * pid_controller() - PID controller 199 * @tz: thermal zone we are operating in 200 * @control_temp: the target temperature in millicelsius 201 * @max_allocatable_power: maximum allocatable power for this thermal zone 202 * 203 * This PID controller increases the available power budget so that the 204 * temperature of the thermal zone gets as close as possible to 205 * @control_temp and limits the power if it exceeds it. k_po is the 206 * proportional term when we are overshooting, k_pu is the 207 * proportional term when we are undershooting. integral_cutoff is a 208 * threshold below which we stop accumulating the error. The 209 * accumulated error is only valid if the requested power will make 210 * the system warmer. If the system is mostly idle, there's no point 211 * in accumulating positive error. 212 * 213 * Return: The power budget for the next period. 214 */ 215 static u32 pid_controller(struct thermal_zone_device *tz, 216 int control_temp, 217 u32 max_allocatable_power) 218 { 219 s64 p, i, d, power_range; 220 s32 err, max_power_frac; 221 u32 sustainable_power; 222 struct power_allocator_params *params = tz->governor_data; 223 224 max_power_frac = int_to_frac(max_allocatable_power); 225 226 sustainable_power = get_sustainable_power(tz, params, control_temp); 227 228 err = control_temp - tz->temperature; 229 err = int_to_frac(err); 230 231 /* Calculate the proportional term */ 232 p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err); 233 234 /* 235 * Calculate the integral term 236 * 237 * if the error is less than cut off allow integration (but 238 * the integral is limited to max power) 239 */ 240 i = mul_frac(tz->tzp->k_i, params->err_integral); 241 242 if (err < int_to_frac(tz->tzp->integral_cutoff)) { 243 s64 i_next = i + mul_frac(tz->tzp->k_i, err); 244 245 if (abs(i_next) < max_power_frac) { 246 i = i_next; 247 params->err_integral += err; 248 } 249 } 250 251 /* 252 * Calculate the derivative term 253 * 254 * We do err - prev_err, so with a positive k_d, a decreasing 255 * error (i.e. driving closer to the line) results in less 256 * power being applied, slowing down the controller) 257 */ 258 d = mul_frac(tz->tzp->k_d, err - params->prev_err); 259 d = div_frac(d, jiffies_to_msecs(tz->passive_delay_jiffies)); 260 params->prev_err = err; 261 262 power_range = p + i + d; 263 264 /* feed-forward the known sustainable dissipatable power */ 265 power_range = sustainable_power + frac_to_int(power_range); 266 267 power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power); 268 269 trace_thermal_power_allocator_pid(tz, frac_to_int(err), 270 frac_to_int(params->err_integral), 271 frac_to_int(p), frac_to_int(i), 272 frac_to_int(d), power_range); 273 274 return power_range; 275 } 276 277 /** 278 * power_actor_set_power() - limit the maximum power a cooling device consumes 279 * @cdev: pointer to &thermal_cooling_device 280 * @instance: thermal instance to update 281 * @power: the power in milliwatts 282 * 283 * Set the cooling device to consume at most @power milliwatts. The limit is 284 * expected to be a cap at the maximum power consumption. 285 * 286 * Return: 0 on success, -EINVAL if the cooling device does not 287 * implement the power actor API or -E* for other failures. 288 */ 289 static int 290 power_actor_set_power(struct thermal_cooling_device *cdev, 291 struct thermal_instance *instance, u32 power) 292 { 293 unsigned long state; 294 int ret; 295 296 ret = cdev->ops->power2state(cdev, power, &state); 297 if (ret) 298 return ret; 299 300 instance->target = clamp_val(state, instance->lower, instance->upper); 301 mutex_lock(&cdev->lock); 302 __thermal_cdev_update(cdev); 303 mutex_unlock(&cdev->lock); 304 305 return 0; 306 } 307 308 /** 309 * divvy_up_power() - divvy the allocated power between the actors 310 * @req_power: each actor's requested power 311 * @max_power: each actor's maximum available power 312 * @num_actors: size of the @req_power, @max_power and @granted_power's array 313 * @total_req_power: sum of @req_power 314 * @power_range: total allocated power 315 * @granted_power: output array: each actor's granted power 316 * @extra_actor_power: an appropriately sized array to be used in the 317 * function as temporary storage of the extra power given 318 * to the actors 319 * 320 * This function divides the total allocated power (@power_range) 321 * fairly between the actors. It first tries to give each actor a 322 * share of the @power_range according to how much power it requested 323 * compared to the rest of the actors. For example, if only one actor 324 * requests power, then it receives all the @power_range. If 325 * three actors each requests 1mW, each receives a third of the 326 * @power_range. 327 * 328 * If any actor received more than their maximum power, then that 329 * surplus is re-divvied among the actors based on how far they are 330 * from their respective maximums. 331 * 332 * Granted power for each actor is written to @granted_power, which 333 * should've been allocated by the calling function. 334 */ 335 static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, 336 u32 total_req_power, u32 power_range, 337 u32 *granted_power, u32 *extra_actor_power) 338 { 339 u32 extra_power, capped_extra_power; 340 int i; 341 342 /* 343 * Prevent division by 0 if none of the actors request power. 344 */ 345 if (!total_req_power) 346 total_req_power = 1; 347 348 capped_extra_power = 0; 349 extra_power = 0; 350 for (i = 0; i < num_actors; i++) { 351 u64 req_range = (u64)req_power[i] * power_range; 352 353 granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range, 354 total_req_power); 355 356 if (granted_power[i] > max_power[i]) { 357 extra_power += granted_power[i] - max_power[i]; 358 granted_power[i] = max_power[i]; 359 } 360 361 extra_actor_power[i] = max_power[i] - granted_power[i]; 362 capped_extra_power += extra_actor_power[i]; 363 } 364 365 if (!extra_power) 366 return; 367 368 /* 369 * Re-divvy the reclaimed extra among actors based on 370 * how far they are from the max 371 */ 372 extra_power = min(extra_power, capped_extra_power); 373 if (capped_extra_power > 0) 374 for (i = 0; i < num_actors; i++) { 375 u64 extra_range = (u64)extra_actor_power[i] * extra_power; 376 granted_power[i] += DIV_ROUND_CLOSEST_ULL(extra_range, 377 capped_extra_power); 378 } 379 } 380 381 static int allocate_power(struct thermal_zone_device *tz, 382 int control_temp) 383 { 384 struct thermal_instance *instance; 385 struct power_allocator_params *params = tz->governor_data; 386 u32 *req_power, *max_power, *granted_power, *extra_actor_power; 387 u32 *weighted_req_power; 388 u32 total_req_power, max_allocatable_power, total_weighted_req_power; 389 u32 total_granted_power, power_range; 390 int i, num_actors, total_weight, ret = 0; 391 int trip_max_desired_temperature = params->trip_max_desired_temperature; 392 393 num_actors = 0; 394 total_weight = 0; 395 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 396 if ((instance->trip == trip_max_desired_temperature) && 397 cdev_is_power_actor(instance->cdev)) { 398 num_actors++; 399 total_weight += instance->weight; 400 } 401 } 402 403 if (!num_actors) 404 return -ENODEV; 405 406 /* 407 * We need to allocate five arrays of the same size: 408 * req_power, max_power, granted_power, extra_actor_power and 409 * weighted_req_power. They are going to be needed until this 410 * function returns. Allocate them all in one go to simplify 411 * the allocation and deallocation logic. 412 */ 413 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power)); 414 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power)); 415 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power)); 416 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power)); 417 req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL); 418 if (!req_power) 419 return -ENOMEM; 420 421 max_power = &req_power[num_actors]; 422 granted_power = &req_power[2 * num_actors]; 423 extra_actor_power = &req_power[3 * num_actors]; 424 weighted_req_power = &req_power[4 * num_actors]; 425 426 i = 0; 427 total_weighted_req_power = 0; 428 total_req_power = 0; 429 max_allocatable_power = 0; 430 431 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 432 int weight; 433 struct thermal_cooling_device *cdev = instance->cdev; 434 435 if (instance->trip != trip_max_desired_temperature) 436 continue; 437 438 if (!cdev_is_power_actor(cdev)) 439 continue; 440 441 if (cdev->ops->get_requested_power(cdev, &req_power[i])) 442 continue; 443 444 if (!total_weight) 445 weight = 1 << FRAC_BITS; 446 else 447 weight = instance->weight; 448 449 weighted_req_power[i] = frac_to_int(weight * req_power[i]); 450 451 if (cdev->ops->state2power(cdev, instance->lower, 452 &max_power[i])) 453 continue; 454 455 total_req_power += req_power[i]; 456 max_allocatable_power += max_power[i]; 457 total_weighted_req_power += weighted_req_power[i]; 458 459 i++; 460 } 461 462 power_range = pid_controller(tz, control_temp, max_allocatable_power); 463 464 divvy_up_power(weighted_req_power, max_power, num_actors, 465 total_weighted_req_power, power_range, granted_power, 466 extra_actor_power); 467 468 total_granted_power = 0; 469 i = 0; 470 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 471 if (instance->trip != trip_max_desired_temperature) 472 continue; 473 474 if (!cdev_is_power_actor(instance->cdev)) 475 continue; 476 477 power_actor_set_power(instance->cdev, instance, 478 granted_power[i]); 479 total_granted_power += granted_power[i]; 480 481 i++; 482 } 483 484 trace_thermal_power_allocator(tz, req_power, total_req_power, 485 granted_power, total_granted_power, 486 num_actors, power_range, 487 max_allocatable_power, tz->temperature, 488 control_temp - tz->temperature); 489 490 kfree(req_power); 491 492 return ret; 493 } 494 495 /** 496 * get_governor_trips() - get the number of the two trip points that are key for this governor 497 * @tz: thermal zone to operate on 498 * @params: pointer to private data for this governor 499 * 500 * The power allocator governor works optimally with two trips points: 501 * a "switch on" trip point and a "maximum desired temperature". These 502 * are defined as the first and last passive trip points. 503 * 504 * If there is only one trip point, then that's considered to be the 505 * "maximum desired temperature" trip point and the governor is always 506 * on. If there are no passive or active trip points, then the 507 * governor won't do anything. In fact, its throttle function 508 * won't be called at all. 509 */ 510 static void get_governor_trips(struct thermal_zone_device *tz, 511 struct power_allocator_params *params) 512 { 513 int i, last_active, last_passive; 514 bool found_first_passive; 515 516 found_first_passive = false; 517 last_active = INVALID_TRIP; 518 last_passive = INVALID_TRIP; 519 520 for (i = 0; i < tz->num_trips; i++) { 521 struct thermal_trip trip; 522 int ret; 523 524 ret = __thermal_zone_get_trip(tz, i, &trip); 525 if (ret) { 526 dev_warn(&tz->device, 527 "Failed to get trip point %d type: %d\n", i, 528 ret); 529 continue; 530 } 531 532 if (trip.type == THERMAL_TRIP_PASSIVE) { 533 if (!found_first_passive) { 534 params->trip_switch_on = i; 535 found_first_passive = true; 536 } else { 537 last_passive = i; 538 } 539 } else if (trip.type == THERMAL_TRIP_ACTIVE) { 540 last_active = i; 541 } else { 542 break; 543 } 544 } 545 546 if (last_passive != INVALID_TRIP) { 547 params->trip_max_desired_temperature = last_passive; 548 } else if (found_first_passive) { 549 params->trip_max_desired_temperature = params->trip_switch_on; 550 params->trip_switch_on = INVALID_TRIP; 551 } else { 552 params->trip_switch_on = INVALID_TRIP; 553 params->trip_max_desired_temperature = last_active; 554 } 555 } 556 557 static void reset_pid_controller(struct power_allocator_params *params) 558 { 559 params->err_integral = 0; 560 params->prev_err = 0; 561 } 562 563 static void allow_maximum_power(struct thermal_zone_device *tz, bool update) 564 { 565 struct thermal_instance *instance; 566 struct power_allocator_params *params = tz->governor_data; 567 u32 req_power; 568 569 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 570 struct thermal_cooling_device *cdev = instance->cdev; 571 572 if ((instance->trip != params->trip_max_desired_temperature) || 573 (!cdev_is_power_actor(instance->cdev))) 574 continue; 575 576 instance->target = 0; 577 mutex_lock(&instance->cdev->lock); 578 /* 579 * Call for updating the cooling devices local stats and avoid 580 * periods of dozen of seconds when those have not been 581 * maintained. 582 */ 583 cdev->ops->get_requested_power(cdev, &req_power); 584 585 if (update) 586 __thermal_cdev_update(instance->cdev); 587 588 mutex_unlock(&instance->cdev->lock); 589 } 590 } 591 592 /** 593 * check_power_actors() - Check all cooling devices and warn when they are 594 * not power actors 595 * @tz: thermal zone to operate on 596 * 597 * Check all cooling devices in the @tz and warn every time they are missing 598 * power actor API. The warning should help to investigate the issue, which 599 * could be e.g. lack of Energy Model for a given device. 600 * 601 * Return: 0 on success, -EINVAL if any cooling device does not implement 602 * the power actor API. 603 */ 604 static int check_power_actors(struct thermal_zone_device *tz) 605 { 606 struct thermal_instance *instance; 607 int ret = 0; 608 609 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 610 if (!cdev_is_power_actor(instance->cdev)) { 611 dev_warn(&tz->device, "power_allocator: %s is not a power actor\n", 612 instance->cdev->type); 613 ret = -EINVAL; 614 } 615 } 616 617 return ret; 618 } 619 620 /** 621 * power_allocator_bind() - bind the power_allocator governor to a thermal zone 622 * @tz: thermal zone to bind it to 623 * 624 * Initialize the PID controller parameters and bind it to the thermal 625 * zone. 626 * 627 * Return: 0 on success, or -ENOMEM if we ran out of memory, or -EINVAL 628 * when there are unsupported cooling devices in the @tz. 629 */ 630 static int power_allocator_bind(struct thermal_zone_device *tz) 631 { 632 int ret; 633 struct power_allocator_params *params; 634 struct thermal_trip trip; 635 636 ret = check_power_actors(tz); 637 if (ret) 638 return ret; 639 640 params = kzalloc(sizeof(*params), GFP_KERNEL); 641 if (!params) 642 return -ENOMEM; 643 644 if (!tz->tzp) { 645 tz->tzp = kzalloc(sizeof(*tz->tzp), GFP_KERNEL); 646 if (!tz->tzp) { 647 ret = -ENOMEM; 648 goto free_params; 649 } 650 651 params->allocated_tzp = true; 652 } 653 654 if (!tz->tzp->sustainable_power) 655 dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); 656 657 get_governor_trips(tz, params); 658 659 if (tz->num_trips > 0) { 660 ret = __thermal_zone_get_trip(tz, params->trip_max_desired_temperature, 661 &trip); 662 if (!ret) 663 estimate_pid_constants(tz, tz->tzp->sustainable_power, 664 params->trip_switch_on, 665 trip.temperature); 666 } 667 668 reset_pid_controller(params); 669 670 tz->governor_data = params; 671 672 return 0; 673 674 free_params: 675 kfree(params); 676 677 return ret; 678 } 679 680 static void power_allocator_unbind(struct thermal_zone_device *tz) 681 { 682 struct power_allocator_params *params = tz->governor_data; 683 684 dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); 685 686 if (params->allocated_tzp) { 687 kfree(tz->tzp); 688 tz->tzp = NULL; 689 } 690 691 kfree(tz->governor_data); 692 tz->governor_data = NULL; 693 } 694 695 static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id) 696 { 697 struct power_allocator_params *params = tz->governor_data; 698 struct thermal_trip trip; 699 int ret; 700 bool update; 701 702 lockdep_assert_held(&tz->lock); 703 704 /* 705 * We get called for every trip point but we only need to do 706 * our calculations once 707 */ 708 if (trip_id != params->trip_max_desired_temperature) 709 return 0; 710 711 ret = __thermal_zone_get_trip(tz, params->trip_switch_on, &trip); 712 if (!ret && (tz->temperature < trip.temperature)) { 713 update = (tz->last_temperature >= trip.temperature); 714 tz->passive = 0; 715 reset_pid_controller(params); 716 allow_maximum_power(tz, update); 717 return 0; 718 } 719 720 tz->passive = 1; 721 722 ret = __thermal_zone_get_trip(tz, params->trip_max_desired_temperature, &trip); 723 if (ret) { 724 dev_warn(&tz->device, "Failed to get the maximum desired temperature: %d\n", 725 ret); 726 return ret; 727 } 728 729 return allocate_power(tz, trip.temperature); 730 } 731 732 static struct thermal_governor thermal_gov_power_allocator = { 733 .name = "power_allocator", 734 .bind_to_tz = power_allocator_bind, 735 .unbind_from_tz = power_allocator_unbind, 736 .throttle = power_allocator_throttle, 737 }; 738 THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator); 739