1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * devfreq_cooling: Thermal cooling device implementation for devices using 4 * devfreq 5 * 6 * Copyright (C) 2014-2015 ARM Limited 7 * 8 * TODO: 9 * - If OPPs are added or removed after devfreq cooling has 10 * registered, the devfreq cooling won't react to it. 11 */ 12 13 #include <linux/devfreq.h> 14 #include <linux/devfreq_cooling.h> 15 #include <linux/energy_model.h> 16 #include <linux/export.h> 17 #include <linux/idr.h> 18 #include <linux/slab.h> 19 #include <linux/pm_opp.h> 20 #include <linux/pm_qos.h> 21 #include <linux/thermal.h> 22 23 #include <trace/events/thermal.h> 24 25 #define HZ_PER_KHZ 1000 26 #define SCALE_ERROR_MITIGATION 100 27 28 static DEFINE_IDA(devfreq_ida); 29 30 /** 31 * struct devfreq_cooling_device - Devfreq cooling device 32 * @id: unique integer value corresponding to each 33 * devfreq_cooling_device registered. 34 * @cdev: Pointer to associated thermal cooling device. 35 * @devfreq: Pointer to associated devfreq device. 36 * @cooling_state: Current cooling state. 37 * @freq_table: Pointer to a table with the frequencies sorted in descending 38 * order. You can index the table by cooling device state 39 * @max_state: It is the last index, that is, one less than the number of the 40 * OPPs 41 * @power_ops: Pointer to devfreq_cooling_power, a more precised model. 42 * @res_util: Resource utilization scaling factor for the power. 43 * It is multiplied by 100 to minimize the error. It is used 44 * for estimation of the power budget instead of using 45 * 'utilization' (which is 'busy_time' / 'total_time'). 46 * The 'res_util' range is from 100 to power * 100 for the 47 * corresponding 'state'. 48 * @capped_state: index to cooling state with in dynamic power budget 49 * @req_max_freq: PM QoS request for limiting the maximum frequency 50 * of the devfreq device. 51 * @em_pd: Energy Model for the associated Devfreq device 52 */ 53 struct devfreq_cooling_device { 54 int id; 55 struct thermal_cooling_device *cdev; 56 struct devfreq *devfreq; 57 unsigned long cooling_state; 58 u32 *freq_table; 59 size_t max_state; 60 struct devfreq_cooling_power *power_ops; 61 u32 res_util; 62 int capped_state; 63 struct dev_pm_qos_request req_max_freq; 64 struct em_perf_domain *em_pd; 65 }; 66 67 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, 68 unsigned long *state) 69 { 70 struct devfreq_cooling_device *dfc = cdev->devdata; 71 72 *state = dfc->max_state; 73 74 return 0; 75 } 76 77 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, 78 unsigned long *state) 79 { 80 struct devfreq_cooling_device *dfc = cdev->devdata; 81 82 *state = dfc->cooling_state; 83 84 return 0; 85 } 86 87 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, 88 unsigned long state) 89 { 90 struct devfreq_cooling_device *dfc = cdev->devdata; 91 struct devfreq *df = dfc->devfreq; 92 struct device *dev = df->dev.parent; 93 unsigned long freq; 94 int perf_idx; 95 96 if (state == dfc->cooling_state) 97 return 0; 98 99 dev_dbg(dev, "Setting cooling state %lu\n", state); 100 101 if (state > dfc->max_state) 102 return -EINVAL; 103 104 if (dfc->em_pd) { 105 perf_idx = dfc->max_state - state; 106 freq = dfc->em_pd->table[perf_idx].frequency * 1000; 107 } else { 108 freq = dfc->freq_table[state]; 109 } 110 111 dev_pm_qos_update_request(&dfc->req_max_freq, 112 DIV_ROUND_UP(freq, HZ_PER_KHZ)); 113 114 dfc->cooling_state = state; 115 116 return 0; 117 } 118 119 /** 120 * get_perf_idx() - get the performance index corresponding to a frequency 121 * @em_pd: Pointer to device's Energy Model 122 * @freq: frequency in kHz 123 * 124 * Return: the performance index associated with the @freq, or 125 * -EINVAL if it wasn't found. 126 */ 127 static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq) 128 { 129 int i; 130 131 for (i = 0; i < em_pd->nr_perf_states; i++) { 132 if (em_pd->table[i].frequency == freq) 133 return i; 134 } 135 136 return -EINVAL; 137 } 138 139 static unsigned long get_voltage(struct devfreq *df, unsigned long freq) 140 { 141 struct device *dev = df->dev.parent; 142 unsigned long voltage; 143 struct dev_pm_opp *opp; 144 145 opp = dev_pm_opp_find_freq_exact(dev, freq, true); 146 if (PTR_ERR(opp) == -ERANGE) 147 opp = dev_pm_opp_find_freq_exact(dev, freq, false); 148 149 if (IS_ERR(opp)) { 150 dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", 151 freq, PTR_ERR(opp)); 152 return 0; 153 } 154 155 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 156 dev_pm_opp_put(opp); 157 158 if (voltage == 0) { 159 dev_err_ratelimited(dev, 160 "Failed to get voltage for frequency %lu\n", 161 freq); 162 } 163 164 return voltage; 165 } 166 167 static void _normalize_load(struct devfreq_dev_status *status) 168 { 169 if (status->total_time > 0xfffff) { 170 status->total_time >>= 10; 171 status->busy_time >>= 10; 172 } 173 174 status->busy_time <<= 10; 175 status->busy_time /= status->total_time ? : 1; 176 177 status->busy_time = status->busy_time ? : 1; 178 status->total_time = 1024; 179 } 180 181 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 182 u32 *power) 183 { 184 struct devfreq_cooling_device *dfc = cdev->devdata; 185 struct devfreq *df = dfc->devfreq; 186 struct devfreq_dev_status status; 187 unsigned long state; 188 unsigned long freq; 189 unsigned long voltage; 190 int res, perf_idx; 191 192 mutex_lock(&df->lock); 193 status = df->last_status; 194 mutex_unlock(&df->lock); 195 196 freq = status.current_frequency; 197 198 if (dfc->power_ops && dfc->power_ops->get_real_power) { 199 voltage = get_voltage(df, freq); 200 if (voltage == 0) { 201 res = -EINVAL; 202 goto fail; 203 } 204 205 res = dfc->power_ops->get_real_power(df, power, freq, voltage); 206 if (!res) { 207 state = dfc->capped_state; 208 dfc->res_util = dfc->em_pd->table[state].power; 209 dfc->res_util *= SCALE_ERROR_MITIGATION; 210 211 if (*power > 1) 212 dfc->res_util /= *power; 213 } else { 214 goto fail; 215 } 216 } else { 217 /* Energy Model frequencies are in kHz */ 218 perf_idx = get_perf_idx(dfc->em_pd, freq / 1000); 219 if (perf_idx < 0) { 220 res = -EAGAIN; 221 goto fail; 222 } 223 224 _normalize_load(&status); 225 226 /* Scale power for utilization */ 227 *power = dfc->em_pd->table[perf_idx].power; 228 *power *= status.busy_time; 229 *power >>= 10; 230 } 231 232 trace_thermal_power_devfreq_get_power(cdev, &status, freq, *power); 233 234 return 0; 235 fail: 236 /* It is safe to set max in this case */ 237 dfc->res_util = SCALE_ERROR_MITIGATION; 238 return res; 239 } 240 241 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, 242 unsigned long state, u32 *power) 243 { 244 struct devfreq_cooling_device *dfc = cdev->devdata; 245 int perf_idx; 246 247 if (state > dfc->max_state) 248 return -EINVAL; 249 250 perf_idx = dfc->max_state - state; 251 *power = dfc->em_pd->table[perf_idx].power; 252 253 return 0; 254 } 255 256 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, 257 u32 power, unsigned long *state) 258 { 259 struct devfreq_cooling_device *dfc = cdev->devdata; 260 struct devfreq *df = dfc->devfreq; 261 struct devfreq_dev_status status; 262 unsigned long freq; 263 s32 est_power; 264 int i; 265 266 mutex_lock(&df->lock); 267 status = df->last_status; 268 mutex_unlock(&df->lock); 269 270 freq = status.current_frequency; 271 272 if (dfc->power_ops && dfc->power_ops->get_real_power) { 273 /* Scale for resource utilization */ 274 est_power = power * dfc->res_util; 275 est_power /= SCALE_ERROR_MITIGATION; 276 } else { 277 /* Scale dynamic power for utilization */ 278 _normalize_load(&status); 279 est_power = power << 10; 280 est_power /= status.busy_time; 281 } 282 283 /* 284 * Find the first cooling state that is within the power 285 * budget. The EM power table is sorted ascending. 286 */ 287 for (i = dfc->max_state; i > 0; i--) 288 if (est_power >= dfc->em_pd->table[i].power) 289 break; 290 291 *state = dfc->max_state - i; 292 dfc->capped_state = *state; 293 294 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 295 return 0; 296 } 297 298 static struct thermal_cooling_device_ops devfreq_cooling_ops = { 299 .get_max_state = devfreq_cooling_get_max_state, 300 .get_cur_state = devfreq_cooling_get_cur_state, 301 .set_cur_state = devfreq_cooling_set_cur_state, 302 }; 303 304 /** 305 * devfreq_cooling_gen_tables() - Generate frequency table. 306 * @dfc: Pointer to devfreq cooling device. 307 * @num_opps: Number of OPPs 308 * 309 * Generate frequency table which holds the frequencies in descending 310 * order. That way its indexed by cooling device state. This is for 311 * compatibility with drivers which do not register Energy Model. 312 * 313 * Return: 0 on success, negative error code on failure. 314 */ 315 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc, 316 int num_opps) 317 { 318 struct devfreq *df = dfc->devfreq; 319 struct device *dev = df->dev.parent; 320 unsigned long freq; 321 int i; 322 323 dfc->freq_table = kcalloc(num_opps, sizeof(*dfc->freq_table), 324 GFP_KERNEL); 325 if (!dfc->freq_table) 326 return -ENOMEM; 327 328 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 329 struct dev_pm_opp *opp; 330 331 opp = dev_pm_opp_find_freq_floor(dev, &freq); 332 if (IS_ERR(opp)) { 333 kfree(dfc->freq_table); 334 return PTR_ERR(opp); 335 } 336 337 dev_pm_opp_put(opp); 338 dfc->freq_table[i] = freq; 339 } 340 341 return 0; 342 } 343 344 /** 345 * of_devfreq_cooling_register_power() - Register devfreq cooling device, 346 * with OF and power information. 347 * @np: Pointer to OF device_node. 348 * @df: Pointer to devfreq device. 349 * @dfc_power: Pointer to devfreq_cooling_power. 350 * 351 * Register a devfreq cooling device. The available OPPs must be 352 * registered on the device. 353 * 354 * If @dfc_power is provided, the cooling device is registered with the 355 * power extensions. For the power extensions to work correctly, 356 * devfreq should use the simple_ondemand governor, other governors 357 * are not currently supported. 358 */ 359 struct thermal_cooling_device * 360 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, 361 struct devfreq_cooling_power *dfc_power) 362 { 363 struct thermal_cooling_device *cdev; 364 struct device *dev = df->dev.parent; 365 struct devfreq_cooling_device *dfc; 366 char dev_name[THERMAL_NAME_LENGTH]; 367 int err, num_opps; 368 369 dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); 370 if (!dfc) 371 return ERR_PTR(-ENOMEM); 372 373 dfc->devfreq = df; 374 375 dfc->em_pd = em_pd_get(dev); 376 if (dfc->em_pd) { 377 devfreq_cooling_ops.get_requested_power = 378 devfreq_cooling_get_requested_power; 379 devfreq_cooling_ops.state2power = devfreq_cooling_state2power; 380 devfreq_cooling_ops.power2state = devfreq_cooling_power2state; 381 382 dfc->power_ops = dfc_power; 383 384 num_opps = em_pd_nr_perf_states(dfc->em_pd); 385 } else { 386 /* Backward compatibility for drivers which do not use IPA */ 387 dev_dbg(dev, "missing EM for cooling device\n"); 388 389 num_opps = dev_pm_opp_get_opp_count(dev); 390 391 err = devfreq_cooling_gen_tables(dfc, num_opps); 392 if (err) 393 goto free_dfc; 394 } 395 396 if (num_opps <= 0) { 397 err = -EINVAL; 398 goto free_dfc; 399 } 400 401 /* max_state is an index, not a counter */ 402 dfc->max_state = num_opps - 1; 403 404 err = dev_pm_qos_add_request(dev, &dfc->req_max_freq, 405 DEV_PM_QOS_MAX_FREQUENCY, 406 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 407 if (err < 0) 408 goto free_table; 409 410 err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); 411 if (err < 0) 412 goto remove_qos_req; 413 414 dfc->id = err; 415 416 snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); 417 418 cdev = thermal_of_cooling_device_register(np, dev_name, dfc, 419 &devfreq_cooling_ops); 420 if (IS_ERR(cdev)) { 421 err = PTR_ERR(cdev); 422 dev_err(dev, 423 "Failed to register devfreq cooling device (%d)\n", 424 err); 425 goto release_ida; 426 } 427 428 dfc->cdev = cdev; 429 430 return cdev; 431 432 release_ida: 433 ida_simple_remove(&devfreq_ida, dfc->id); 434 remove_qos_req: 435 dev_pm_qos_remove_request(&dfc->req_max_freq); 436 free_table: 437 kfree(dfc->freq_table); 438 free_dfc: 439 kfree(dfc); 440 441 return ERR_PTR(err); 442 } 443 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); 444 445 /** 446 * of_devfreq_cooling_register() - Register devfreq cooling device, 447 * with OF information. 448 * @np: Pointer to OF device_node. 449 * @df: Pointer to devfreq device. 450 */ 451 struct thermal_cooling_device * 452 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) 453 { 454 return of_devfreq_cooling_register_power(np, df, NULL); 455 } 456 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); 457 458 /** 459 * devfreq_cooling_register() - Register devfreq cooling device. 460 * @df: Pointer to devfreq device. 461 */ 462 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) 463 { 464 return of_devfreq_cooling_register(NULL, df); 465 } 466 EXPORT_SYMBOL_GPL(devfreq_cooling_register); 467 468 /** 469 * devfreq_cooling_em_register_power() - Register devfreq cooling device with 470 * power information and automatically register Energy Model (EM) 471 * @df: Pointer to devfreq device. 472 * @dfc_power: Pointer to devfreq_cooling_power. 473 * 474 * Register a devfreq cooling device and automatically register EM. The 475 * available OPPs must be registered for the device. 476 * 477 * If @dfc_power is provided, the cooling device is registered with the 478 * power extensions. It is using the simple Energy Model which requires 479 * "dynamic-power-coefficient" a devicetree property. To not break drivers 480 * which miss that DT property, the function won't bail out when the EM 481 * registration failed. The cooling device will be registered if everything 482 * else is OK. 483 */ 484 struct thermal_cooling_device * 485 devfreq_cooling_em_register(struct devfreq *df, 486 struct devfreq_cooling_power *dfc_power) 487 { 488 struct thermal_cooling_device *cdev; 489 struct device *dev; 490 int ret; 491 492 if (IS_ERR_OR_NULL(df)) 493 return ERR_PTR(-EINVAL); 494 495 dev = df->dev.parent; 496 497 ret = dev_pm_opp_of_register_em(dev, NULL); 498 if (ret) 499 dev_dbg(dev, "Unable to register EM for devfreq cooling device (%d)\n", 500 ret); 501 502 cdev = of_devfreq_cooling_register_power(dev->of_node, df, dfc_power); 503 504 if (IS_ERR_OR_NULL(cdev)) 505 em_dev_unregister_perf_domain(dev); 506 507 return cdev; 508 } 509 EXPORT_SYMBOL_GPL(devfreq_cooling_em_register); 510 511 /** 512 * devfreq_cooling_unregister() - Unregister devfreq cooling device. 513 * @cdev: Pointer to devfreq cooling device to unregister. 514 * 515 * Unregisters devfreq cooling device and related Energy Model if it was 516 * present. 517 */ 518 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) 519 { 520 struct devfreq_cooling_device *dfc; 521 struct device *dev; 522 523 if (IS_ERR_OR_NULL(cdev)) 524 return; 525 526 dfc = cdev->devdata; 527 dev = dfc->devfreq->dev.parent; 528 529 thermal_cooling_device_unregister(dfc->cdev); 530 ida_simple_remove(&devfreq_ida, dfc->id); 531 dev_pm_qos_remove_request(&dfc->req_max_freq); 532 533 em_dev_unregister_perf_domain(dev); 534 535 kfree(dfc->freq_table); 536 kfree(dfc); 537 } 538 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); 539