1 /* 2 * devfreq_cooling: Thermal cooling device implementation for devices using 3 * devfreq 4 * 5 * Copyright (C) 2014-2015 ARM Limited 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed "as is" WITHOUT ANY WARRANTY of any 12 * kind, whether express or implied; without even the implied warranty 13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * TODO: 17 * - If OPPs are added or removed after devfreq cooling has 18 * registered, the devfreq cooling won't react to it. 19 */ 20 21 #include <linux/devfreq.h> 22 #include <linux/devfreq_cooling.h> 23 #include <linux/export.h> 24 #include <linux/idr.h> 25 #include <linux/slab.h> 26 #include <linux/pm_opp.h> 27 #include <linux/thermal.h> 28 29 #include <trace/events/thermal.h> 30 31 #define SCALE_ERROR_MITIGATION 100 32 33 static DEFINE_IDA(devfreq_ida); 34 35 /** 36 * struct devfreq_cooling_device - Devfreq cooling device 37 * @id: unique integer value corresponding to each 38 * devfreq_cooling_device registered. 39 * @cdev: Pointer to associated thermal cooling device. 40 * @devfreq: Pointer to associated devfreq device. 41 * @cooling_state: Current cooling state. 42 * @power_table: Pointer to table with maximum power draw for each 43 * cooling state. State is the index into the table, and 44 * the power is in mW. 45 * @freq_table: Pointer to a table with the frequencies sorted in descending 46 * order. You can index the table by cooling device state 47 * @freq_table_size: Size of the @freq_table and @power_table 48 * @power_ops: Pointer to devfreq_cooling_power, used to generate the 49 * @power_table. 50 * @res_util: Resource utilization scaling factor for the power. 51 * It is multiplied by 100 to minimize the error. It is used 52 * for estimation of the power budget instead of using 53 * 'utilization' (which is 'busy_time / 'total_time'). 54 * The 'res_util' range is from 100 to (power_table[state] * 100) 55 * for the corresponding 'state'. 56 */ 57 struct devfreq_cooling_device { 58 int id; 59 struct thermal_cooling_device *cdev; 60 struct devfreq *devfreq; 61 unsigned long cooling_state; 62 u32 *power_table; 63 u32 *freq_table; 64 size_t freq_table_size; 65 struct devfreq_cooling_power *power_ops; 66 u32 res_util; 67 int capped_state; 68 }; 69 70 /** 71 * partition_enable_opps() - disable all opps above a given state 72 * @dfc: Pointer to devfreq we are operating on 73 * @cdev_state: cooling device state we're setting 74 * 75 * Go through the OPPs of the device, enabling all OPPs until 76 * @cdev_state and disabling those frequencies above it. 77 */ 78 static int partition_enable_opps(struct devfreq_cooling_device *dfc, 79 unsigned long cdev_state) 80 { 81 int i; 82 struct device *dev = dfc->devfreq->dev.parent; 83 84 for (i = 0; i < dfc->freq_table_size; i++) { 85 struct dev_pm_opp *opp; 86 int ret = 0; 87 unsigned int freq = dfc->freq_table[i]; 88 bool want_enable = i >= cdev_state ? true : false; 89 90 opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); 91 92 if (PTR_ERR(opp) == -ERANGE) 93 continue; 94 else if (IS_ERR(opp)) 95 return PTR_ERR(opp); 96 97 dev_pm_opp_put(opp); 98 99 if (want_enable) 100 ret = dev_pm_opp_enable(dev, freq); 101 else 102 ret = dev_pm_opp_disable(dev, freq); 103 104 if (ret) 105 return ret; 106 } 107 108 return 0; 109 } 110 111 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, 112 unsigned long *state) 113 { 114 struct devfreq_cooling_device *dfc = cdev->devdata; 115 116 *state = dfc->freq_table_size - 1; 117 118 return 0; 119 } 120 121 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, 122 unsigned long *state) 123 { 124 struct devfreq_cooling_device *dfc = cdev->devdata; 125 126 *state = dfc->cooling_state; 127 128 return 0; 129 } 130 131 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, 132 unsigned long state) 133 { 134 struct devfreq_cooling_device *dfc = cdev->devdata; 135 struct devfreq *df = dfc->devfreq; 136 struct device *dev = df->dev.parent; 137 int ret; 138 139 if (state == dfc->cooling_state) 140 return 0; 141 142 dev_dbg(dev, "Setting cooling state %lu\n", state); 143 144 if (state >= dfc->freq_table_size) 145 return -EINVAL; 146 147 ret = partition_enable_opps(dfc, state); 148 if (ret) 149 return ret; 150 151 dfc->cooling_state = state; 152 153 return 0; 154 } 155 156 /** 157 * freq_get_state() - get the cooling state corresponding to a frequency 158 * @dfc: Pointer to devfreq cooling device 159 * @freq: frequency in Hz 160 * 161 * Return: the cooling state associated with the @freq, or 162 * THERMAL_CSTATE_INVALID if it wasn't found. 163 */ 164 static unsigned long 165 freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq) 166 { 167 int i; 168 169 for (i = 0; i < dfc->freq_table_size; i++) { 170 if (dfc->freq_table[i] == freq) 171 return i; 172 } 173 174 return THERMAL_CSTATE_INVALID; 175 } 176 177 static unsigned long get_voltage(struct devfreq *df, unsigned long freq) 178 { 179 struct device *dev = df->dev.parent; 180 unsigned long voltage; 181 struct dev_pm_opp *opp; 182 183 opp = dev_pm_opp_find_freq_exact(dev, freq, true); 184 if (PTR_ERR(opp) == -ERANGE) 185 opp = dev_pm_opp_find_freq_exact(dev, freq, false); 186 187 if (IS_ERR(opp)) { 188 dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", 189 freq, PTR_ERR(opp)); 190 return 0; 191 } 192 193 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 194 dev_pm_opp_put(opp); 195 196 if (voltage == 0) { 197 dev_err_ratelimited(dev, 198 "Failed to get voltage for frequency %lu\n", 199 freq); 200 } 201 202 return voltage; 203 } 204 205 /** 206 * get_static_power() - calculate the static power 207 * @dfc: Pointer to devfreq cooling device 208 * @freq: Frequency in Hz 209 * 210 * Calculate the static power in milliwatts using the supplied 211 * get_static_power(). The current voltage is calculated using the 212 * OPP library. If no get_static_power() was supplied, assume the 213 * static power is negligible. 214 */ 215 static unsigned long 216 get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) 217 { 218 struct devfreq *df = dfc->devfreq; 219 unsigned long voltage; 220 221 if (!dfc->power_ops->get_static_power) 222 return 0; 223 224 voltage = get_voltage(df, freq); 225 226 if (voltage == 0) 227 return 0; 228 229 return dfc->power_ops->get_static_power(df, voltage); 230 } 231 232 /** 233 * get_dynamic_power - calculate the dynamic power 234 * @dfc: Pointer to devfreq cooling device 235 * @freq: Frequency in Hz 236 * @voltage: Voltage in millivolts 237 * 238 * Calculate the dynamic power in milliwatts consumed by the device at 239 * frequency @freq and voltage @voltage. If the get_dynamic_power() 240 * was supplied as part of the devfreq_cooling_power struct, then that 241 * function is used. Otherwise, a simple power model (Pdyn = Coeff * 242 * Voltage^2 * Frequency) is used. 243 */ 244 static unsigned long 245 get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq, 246 unsigned long voltage) 247 { 248 u64 power; 249 u32 freq_mhz; 250 struct devfreq_cooling_power *dfc_power = dfc->power_ops; 251 252 if (dfc_power->get_dynamic_power) 253 return dfc_power->get_dynamic_power(dfc->devfreq, freq, 254 voltage); 255 256 freq_mhz = freq / 1000000; 257 power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage; 258 do_div(power, 1000000000); 259 260 return power; 261 } 262 263 264 static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc, 265 unsigned long freq, 266 unsigned long voltage) 267 { 268 return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq, 269 voltage); 270 } 271 272 273 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 274 struct thermal_zone_device *tz, 275 u32 *power) 276 { 277 struct devfreq_cooling_device *dfc = cdev->devdata; 278 struct devfreq *df = dfc->devfreq; 279 struct devfreq_dev_status *status = &df->last_status; 280 unsigned long state; 281 unsigned long freq = status->current_frequency; 282 unsigned long voltage; 283 u32 dyn_power = 0; 284 u32 static_power = 0; 285 int res; 286 287 state = freq_get_state(dfc, freq); 288 if (state == THERMAL_CSTATE_INVALID) { 289 res = -EAGAIN; 290 goto fail; 291 } 292 293 if (dfc->power_ops->get_real_power) { 294 voltage = get_voltage(df, freq); 295 if (voltage == 0) { 296 res = -EINVAL; 297 goto fail; 298 } 299 300 res = dfc->power_ops->get_real_power(df, power, freq, voltage); 301 if (!res) { 302 state = dfc->capped_state; 303 dfc->res_util = dfc->power_table[state]; 304 dfc->res_util *= SCALE_ERROR_MITIGATION; 305 306 if (*power > 1) 307 dfc->res_util /= *power; 308 } else { 309 goto fail; 310 } 311 } else { 312 dyn_power = dfc->power_table[state]; 313 314 /* Scale dynamic power for utilization */ 315 dyn_power *= status->busy_time; 316 dyn_power /= status->total_time; 317 /* Get static power */ 318 static_power = get_static_power(dfc, freq); 319 320 *power = dyn_power + static_power; 321 } 322 323 trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power, 324 static_power, *power); 325 326 return 0; 327 fail: 328 /* It is safe to set max in this case */ 329 dfc->res_util = SCALE_ERROR_MITIGATION; 330 return res; 331 } 332 333 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, 334 struct thermal_zone_device *tz, 335 unsigned long state, 336 u32 *power) 337 { 338 struct devfreq_cooling_device *dfc = cdev->devdata; 339 unsigned long freq; 340 u32 static_power; 341 342 if (state >= dfc->freq_table_size) 343 return -EINVAL; 344 345 freq = dfc->freq_table[state]; 346 static_power = get_static_power(dfc, freq); 347 348 *power = dfc->power_table[state] + static_power; 349 return 0; 350 } 351 352 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, 353 struct thermal_zone_device *tz, 354 u32 power, unsigned long *state) 355 { 356 struct devfreq_cooling_device *dfc = cdev->devdata; 357 struct devfreq *df = dfc->devfreq; 358 struct devfreq_dev_status *status = &df->last_status; 359 unsigned long freq = status->current_frequency; 360 unsigned long busy_time; 361 s32 dyn_power; 362 u32 static_power; 363 s32 est_power; 364 int i; 365 366 if (dfc->power_ops->get_real_power) { 367 /* Scale for resource utilization */ 368 est_power = power * dfc->res_util; 369 est_power /= SCALE_ERROR_MITIGATION; 370 } else { 371 static_power = get_static_power(dfc, freq); 372 373 dyn_power = power - static_power; 374 dyn_power = dyn_power > 0 ? dyn_power : 0; 375 376 /* Scale dynamic power for utilization */ 377 busy_time = status->busy_time ?: 1; 378 est_power = (dyn_power * status->total_time) / busy_time; 379 } 380 381 /* 382 * Find the first cooling state that is within the power 383 * budget for dynamic power. 384 */ 385 for (i = 0; i < dfc->freq_table_size - 1; i++) 386 if (est_power >= dfc->power_table[i]) 387 break; 388 389 *state = i; 390 dfc->capped_state = i; 391 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 392 return 0; 393 } 394 395 static struct thermal_cooling_device_ops devfreq_cooling_ops = { 396 .get_max_state = devfreq_cooling_get_max_state, 397 .get_cur_state = devfreq_cooling_get_cur_state, 398 .set_cur_state = devfreq_cooling_set_cur_state, 399 }; 400 401 /** 402 * devfreq_cooling_gen_tables() - Generate power and freq tables. 403 * @dfc: Pointer to devfreq cooling device. 404 * 405 * Generate power and frequency tables: the power table hold the 406 * device's maximum power usage at each cooling state (OPP). The 407 * static and dynamic power using the appropriate voltage and 408 * frequency for the state, is acquired from the struct 409 * devfreq_cooling_power, and summed to make the maximum power draw. 410 * 411 * The frequency table holds the frequencies in descending order. 412 * That way its indexed by cooling device state. 413 * 414 * The tables are malloced, and pointers put in dfc. They must be 415 * freed when unregistering the devfreq cooling device. 416 * 417 * Return: 0 on success, negative error code on failure. 418 */ 419 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc) 420 { 421 struct devfreq *df = dfc->devfreq; 422 struct device *dev = df->dev.parent; 423 int ret, num_opps; 424 unsigned long freq; 425 u32 *power_table = NULL; 426 u32 *freq_table; 427 int i; 428 429 num_opps = dev_pm_opp_get_opp_count(dev); 430 431 if (dfc->power_ops) { 432 power_table = kcalloc(num_opps, sizeof(*power_table), 433 GFP_KERNEL); 434 if (!power_table) 435 return -ENOMEM; 436 } 437 438 freq_table = kcalloc(num_opps, sizeof(*freq_table), 439 GFP_KERNEL); 440 if (!freq_table) { 441 ret = -ENOMEM; 442 goto free_power_table; 443 } 444 445 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 446 unsigned long power, voltage; 447 struct dev_pm_opp *opp; 448 449 opp = dev_pm_opp_find_freq_floor(dev, &freq); 450 if (IS_ERR(opp)) { 451 ret = PTR_ERR(opp); 452 goto free_tables; 453 } 454 455 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 456 dev_pm_opp_put(opp); 457 458 if (dfc->power_ops) { 459 if (dfc->power_ops->get_real_power) 460 power = get_total_power(dfc, freq, voltage); 461 else 462 power = get_dynamic_power(dfc, freq, voltage); 463 464 dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n", 465 freq / 1000000, voltage, power, power); 466 467 power_table[i] = power; 468 } 469 470 freq_table[i] = freq; 471 } 472 473 if (dfc->power_ops) 474 dfc->power_table = power_table; 475 476 dfc->freq_table = freq_table; 477 dfc->freq_table_size = num_opps; 478 479 return 0; 480 481 free_tables: 482 kfree(freq_table); 483 free_power_table: 484 kfree(power_table); 485 486 return ret; 487 } 488 489 /** 490 * of_devfreq_cooling_register_power() - Register devfreq cooling device, 491 * with OF and power information. 492 * @np: Pointer to OF device_node. 493 * @df: Pointer to devfreq device. 494 * @dfc_power: Pointer to devfreq_cooling_power. 495 * 496 * Register a devfreq cooling device. The available OPPs must be 497 * registered on the device. 498 * 499 * If @dfc_power is provided, the cooling device is registered with the 500 * power extensions. For the power extensions to work correctly, 501 * devfreq should use the simple_ondemand governor, other governors 502 * are not currently supported. 503 */ 504 struct thermal_cooling_device * 505 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, 506 struct devfreq_cooling_power *dfc_power) 507 { 508 struct thermal_cooling_device *cdev; 509 struct devfreq_cooling_device *dfc; 510 char dev_name[THERMAL_NAME_LENGTH]; 511 int err; 512 513 dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); 514 if (!dfc) 515 return ERR_PTR(-ENOMEM); 516 517 dfc->devfreq = df; 518 519 if (dfc_power) { 520 dfc->power_ops = dfc_power; 521 522 devfreq_cooling_ops.get_requested_power = 523 devfreq_cooling_get_requested_power; 524 devfreq_cooling_ops.state2power = devfreq_cooling_state2power; 525 devfreq_cooling_ops.power2state = devfreq_cooling_power2state; 526 } 527 528 err = devfreq_cooling_gen_tables(dfc); 529 if (err) 530 goto free_dfc; 531 532 err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); 533 if (err < 0) 534 goto free_tables; 535 dfc->id = err; 536 537 snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); 538 539 cdev = thermal_of_cooling_device_register(np, dev_name, dfc, 540 &devfreq_cooling_ops); 541 if (IS_ERR(cdev)) { 542 err = PTR_ERR(cdev); 543 dev_err(df->dev.parent, 544 "Failed to register devfreq cooling device (%d)\n", 545 err); 546 goto release_ida; 547 } 548 549 dfc->cdev = cdev; 550 551 return cdev; 552 553 release_ida: 554 ida_simple_remove(&devfreq_ida, dfc->id); 555 free_tables: 556 kfree(dfc->power_table); 557 kfree(dfc->freq_table); 558 free_dfc: 559 kfree(dfc); 560 561 return ERR_PTR(err); 562 } 563 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); 564 565 /** 566 * of_devfreq_cooling_register() - Register devfreq cooling device, 567 * with OF information. 568 * @np: Pointer to OF device_node. 569 * @df: Pointer to devfreq device. 570 */ 571 struct thermal_cooling_device * 572 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) 573 { 574 return of_devfreq_cooling_register_power(np, df, NULL); 575 } 576 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); 577 578 /** 579 * devfreq_cooling_register() - Register devfreq cooling device. 580 * @df: Pointer to devfreq device. 581 */ 582 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) 583 { 584 return of_devfreq_cooling_register(NULL, df); 585 } 586 EXPORT_SYMBOL_GPL(devfreq_cooling_register); 587 588 /** 589 * devfreq_cooling_unregister() - Unregister devfreq cooling device. 590 * @dfc: Pointer to devfreq cooling device to unregister. 591 */ 592 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) 593 { 594 struct devfreq_cooling_device *dfc; 595 596 if (!cdev) 597 return; 598 599 dfc = cdev->devdata; 600 601 thermal_cooling_device_unregister(dfc->cdev); 602 ida_simple_remove(&devfreq_ida, dfc->id); 603 kfree(dfc->power_table); 604 kfree(dfc->freq_table); 605 606 kfree(dfc); 607 } 608 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); 609