1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * devfreq_cooling: Thermal cooling device implementation for devices using 4 * devfreq 5 * 6 * Copyright (C) 2014-2015 ARM Limited 7 * 8 * TODO: 9 * - If OPPs are added or removed after devfreq cooling has 10 * registered, the devfreq cooling won't react to it. 11 */ 12 13 #include <linux/devfreq.h> 14 #include <linux/devfreq_cooling.h> 15 #include <linux/energy_model.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/pm_opp.h> 19 #include <linux/pm_qos.h> 20 #include <linux/thermal.h> 21 #include <linux/units.h> 22 23 #include <trace/events/thermal.h> 24 25 #define SCALE_ERROR_MITIGATION 100 26 27 /** 28 * struct devfreq_cooling_device - Devfreq cooling device 29 * devfreq_cooling_device registered. 30 * @cdev: Pointer to associated thermal cooling device. 31 * @cooling_ops: devfreq callbacks to thermal cooling device ops 32 * @devfreq: Pointer to associated devfreq device. 33 * @cooling_state: Current cooling state. 34 * @freq_table: Pointer to a table with the frequencies sorted in descending 35 * order. You can index the table by cooling device state 36 * @max_state: It is the last index, that is, one less than the number of the 37 * OPPs 38 * @power_ops: Pointer to devfreq_cooling_power, a more precised model. 39 * @res_util: Resource utilization scaling factor for the power. 40 * It is multiplied by 100 to minimize the error. It is used 41 * for estimation of the power budget instead of using 42 * 'utilization' (which is 'busy_time' / 'total_time'). 43 * The 'res_util' range is from 100 to power * 100 for the 44 * corresponding 'state'. 45 * @capped_state: index to cooling state with in dynamic power budget 46 * @req_max_freq: PM QoS request for limiting the maximum frequency 47 * of the devfreq device. 48 * @em_pd: Energy Model for the associated Devfreq device 49 */ 50 struct devfreq_cooling_device { 51 struct thermal_cooling_device *cdev; 52 struct thermal_cooling_device_ops cooling_ops; 53 struct devfreq *devfreq; 54 unsigned long cooling_state; 55 u32 *freq_table; 56 size_t max_state; 57 struct devfreq_cooling_power *power_ops; 58 u32 res_util; 59 int capped_state; 60 struct dev_pm_qos_request req_max_freq; 61 struct em_perf_domain *em_pd; 62 }; 63 64 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, 65 unsigned long *state) 66 { 67 struct devfreq_cooling_device *dfc = cdev->devdata; 68 69 *state = dfc->max_state; 70 71 return 0; 72 } 73 74 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, 75 unsigned long *state) 76 { 77 struct devfreq_cooling_device *dfc = cdev->devdata; 78 79 *state = dfc->cooling_state; 80 81 return 0; 82 } 83 84 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, 85 unsigned long state) 86 { 87 struct devfreq_cooling_device *dfc = cdev->devdata; 88 struct devfreq *df = dfc->devfreq; 89 struct device *dev = df->dev.parent; 90 unsigned long freq; 91 int perf_idx; 92 93 if (state == dfc->cooling_state) 94 return 0; 95 96 dev_dbg(dev, "Setting cooling state %lu\n", state); 97 98 if (state > dfc->max_state) 99 return -EINVAL; 100 101 if (dfc->em_pd) { 102 perf_idx = dfc->max_state - state; 103 freq = dfc->em_pd->table[perf_idx].frequency * 1000; 104 } else { 105 freq = dfc->freq_table[state]; 106 } 107 108 dev_pm_qos_update_request(&dfc->req_max_freq, 109 DIV_ROUND_UP(freq, HZ_PER_KHZ)); 110 111 dfc->cooling_state = state; 112 113 return 0; 114 } 115 116 /** 117 * get_perf_idx() - get the performance index corresponding to a frequency 118 * @em_pd: Pointer to device's Energy Model 119 * @freq: frequency in kHz 120 * 121 * Return: the performance index associated with the @freq, or 122 * -EINVAL if it wasn't found. 123 */ 124 static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq) 125 { 126 int i; 127 128 for (i = 0; i < em_pd->nr_perf_states; i++) { 129 if (em_pd->table[i].frequency == freq) 130 return i; 131 } 132 133 return -EINVAL; 134 } 135 136 static unsigned long get_voltage(struct devfreq *df, unsigned long freq) 137 { 138 struct device *dev = df->dev.parent; 139 unsigned long voltage; 140 struct dev_pm_opp *opp; 141 142 opp = dev_pm_opp_find_freq_exact(dev, freq, true); 143 if (PTR_ERR(opp) == -ERANGE) 144 opp = dev_pm_opp_find_freq_exact(dev, freq, false); 145 146 if (IS_ERR(opp)) { 147 dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", 148 freq, PTR_ERR(opp)); 149 return 0; 150 } 151 152 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 153 dev_pm_opp_put(opp); 154 155 if (voltage == 0) { 156 dev_err_ratelimited(dev, 157 "Failed to get voltage for frequency %lu\n", 158 freq); 159 } 160 161 return voltage; 162 } 163 164 static void _normalize_load(struct devfreq_dev_status *status) 165 { 166 if (status->total_time > 0xfffff) { 167 status->total_time >>= 10; 168 status->busy_time >>= 10; 169 } 170 171 status->busy_time <<= 10; 172 status->busy_time /= status->total_time ? : 1; 173 174 status->busy_time = status->busy_time ? : 1; 175 status->total_time = 1024; 176 } 177 178 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 179 u32 *power) 180 { 181 struct devfreq_cooling_device *dfc = cdev->devdata; 182 struct devfreq *df = dfc->devfreq; 183 struct devfreq_dev_status status; 184 unsigned long state; 185 unsigned long freq; 186 unsigned long voltage; 187 int res, perf_idx; 188 189 mutex_lock(&df->lock); 190 status = df->last_status; 191 mutex_unlock(&df->lock); 192 193 freq = status.current_frequency; 194 195 if (dfc->power_ops && dfc->power_ops->get_real_power) { 196 voltage = get_voltage(df, freq); 197 if (voltage == 0) { 198 res = -EINVAL; 199 goto fail; 200 } 201 202 res = dfc->power_ops->get_real_power(df, power, freq, voltage); 203 if (!res) { 204 state = dfc->capped_state; 205 dfc->res_util = dfc->em_pd->table[state].power; 206 dfc->res_util *= SCALE_ERROR_MITIGATION; 207 208 if (*power > 1) 209 dfc->res_util /= *power; 210 } else { 211 goto fail; 212 } 213 } else { 214 /* Energy Model frequencies are in kHz */ 215 perf_idx = get_perf_idx(dfc->em_pd, freq / 1000); 216 if (perf_idx < 0) { 217 res = -EAGAIN; 218 goto fail; 219 } 220 221 _normalize_load(&status); 222 223 /* Scale power for utilization */ 224 *power = dfc->em_pd->table[perf_idx].power; 225 *power *= status.busy_time; 226 *power >>= 10; 227 } 228 229 trace_thermal_power_devfreq_get_power(cdev, &status, freq, *power); 230 231 return 0; 232 fail: 233 /* It is safe to set max in this case */ 234 dfc->res_util = SCALE_ERROR_MITIGATION; 235 return res; 236 } 237 238 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, 239 unsigned long state, u32 *power) 240 { 241 struct devfreq_cooling_device *dfc = cdev->devdata; 242 int perf_idx; 243 244 if (state > dfc->max_state) 245 return -EINVAL; 246 247 perf_idx = dfc->max_state - state; 248 *power = dfc->em_pd->table[perf_idx].power; 249 250 return 0; 251 } 252 253 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, 254 u32 power, unsigned long *state) 255 { 256 struct devfreq_cooling_device *dfc = cdev->devdata; 257 struct devfreq *df = dfc->devfreq; 258 struct devfreq_dev_status status; 259 unsigned long freq; 260 s32 est_power; 261 int i; 262 263 mutex_lock(&df->lock); 264 status = df->last_status; 265 mutex_unlock(&df->lock); 266 267 freq = status.current_frequency; 268 269 if (dfc->power_ops && dfc->power_ops->get_real_power) { 270 /* Scale for resource utilization */ 271 est_power = power * dfc->res_util; 272 est_power /= SCALE_ERROR_MITIGATION; 273 } else { 274 /* Scale dynamic power for utilization */ 275 _normalize_load(&status); 276 est_power = power << 10; 277 est_power /= status.busy_time; 278 } 279 280 /* 281 * Find the first cooling state that is within the power 282 * budget. The EM power table is sorted ascending. 283 */ 284 for (i = dfc->max_state; i > 0; i--) 285 if (est_power >= dfc->em_pd->table[i].power) 286 break; 287 288 *state = dfc->max_state - i; 289 dfc->capped_state = *state; 290 291 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 292 return 0; 293 } 294 295 /** 296 * devfreq_cooling_gen_tables() - Generate frequency table. 297 * @dfc: Pointer to devfreq cooling device. 298 * @num_opps: Number of OPPs 299 * 300 * Generate frequency table which holds the frequencies in descending 301 * order. That way its indexed by cooling device state. This is for 302 * compatibility with drivers which do not register Energy Model. 303 * 304 * Return: 0 on success, negative error code on failure. 305 */ 306 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc, 307 int num_opps) 308 { 309 struct devfreq *df = dfc->devfreq; 310 struct device *dev = df->dev.parent; 311 unsigned long freq; 312 int i; 313 314 dfc->freq_table = kcalloc(num_opps, sizeof(*dfc->freq_table), 315 GFP_KERNEL); 316 if (!dfc->freq_table) 317 return -ENOMEM; 318 319 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 320 struct dev_pm_opp *opp; 321 322 opp = dev_pm_opp_find_freq_floor(dev, &freq); 323 if (IS_ERR(opp)) { 324 kfree(dfc->freq_table); 325 return PTR_ERR(opp); 326 } 327 328 dev_pm_opp_put(opp); 329 dfc->freq_table[i] = freq; 330 } 331 332 return 0; 333 } 334 335 /** 336 * of_devfreq_cooling_register_power() - Register devfreq cooling device, 337 * with OF and power information. 338 * @np: Pointer to OF device_node. 339 * @df: Pointer to devfreq device. 340 * @dfc_power: Pointer to devfreq_cooling_power. 341 * 342 * Register a devfreq cooling device. The available OPPs must be 343 * registered on the device. 344 * 345 * If @dfc_power is provided, the cooling device is registered with the 346 * power extensions. For the power extensions to work correctly, 347 * devfreq should use the simple_ondemand governor, other governors 348 * are not currently supported. 349 */ 350 struct thermal_cooling_device * 351 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, 352 struct devfreq_cooling_power *dfc_power) 353 { 354 struct thermal_cooling_device *cdev; 355 struct device *dev = df->dev.parent; 356 struct devfreq_cooling_device *dfc; 357 struct em_perf_domain *em; 358 struct thermal_cooling_device_ops *ops; 359 char *name; 360 int err, num_opps; 361 362 363 dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); 364 if (!dfc) 365 return ERR_PTR(-ENOMEM); 366 367 dfc->devfreq = df; 368 369 ops = &dfc->cooling_ops; 370 ops->get_max_state = devfreq_cooling_get_max_state; 371 ops->get_cur_state = devfreq_cooling_get_cur_state; 372 ops->set_cur_state = devfreq_cooling_set_cur_state; 373 374 em = em_pd_get(dev); 375 if (em && !em_is_artificial(em)) { 376 dfc->em_pd = em; 377 ops->get_requested_power = 378 devfreq_cooling_get_requested_power; 379 ops->state2power = devfreq_cooling_state2power; 380 ops->power2state = devfreq_cooling_power2state; 381 382 dfc->power_ops = dfc_power; 383 384 num_opps = em_pd_nr_perf_states(dfc->em_pd); 385 } else { 386 /* Backward compatibility for drivers which do not use IPA */ 387 dev_dbg(dev, "missing proper EM for cooling device\n"); 388 389 num_opps = dev_pm_opp_get_opp_count(dev); 390 391 err = devfreq_cooling_gen_tables(dfc, num_opps); 392 if (err) 393 goto free_dfc; 394 } 395 396 if (num_opps <= 0) { 397 err = -EINVAL; 398 goto free_dfc; 399 } 400 401 /* max_state is an index, not a counter */ 402 dfc->max_state = num_opps - 1; 403 404 err = dev_pm_qos_add_request(dev, &dfc->req_max_freq, 405 DEV_PM_QOS_MAX_FREQUENCY, 406 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 407 if (err < 0) 408 goto free_table; 409 410 err = -ENOMEM; 411 name = kasprintf(GFP_KERNEL, "devfreq-%s", dev_name(dev)); 412 if (!name) 413 goto remove_qos_req; 414 415 cdev = thermal_of_cooling_device_register(np, name, dfc, ops); 416 kfree(name); 417 418 if (IS_ERR(cdev)) { 419 err = PTR_ERR(cdev); 420 dev_err(dev, 421 "Failed to register devfreq cooling device (%d)\n", 422 err); 423 goto remove_qos_req; 424 } 425 426 dfc->cdev = cdev; 427 428 return cdev; 429 430 remove_qos_req: 431 dev_pm_qos_remove_request(&dfc->req_max_freq); 432 free_table: 433 kfree(dfc->freq_table); 434 free_dfc: 435 kfree(dfc); 436 437 return ERR_PTR(err); 438 } 439 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); 440 441 /** 442 * of_devfreq_cooling_register() - Register devfreq cooling device, 443 * with OF information. 444 * @np: Pointer to OF device_node. 445 * @df: Pointer to devfreq device. 446 */ 447 struct thermal_cooling_device * 448 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) 449 { 450 return of_devfreq_cooling_register_power(np, df, NULL); 451 } 452 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); 453 454 /** 455 * devfreq_cooling_register() - Register devfreq cooling device. 456 * @df: Pointer to devfreq device. 457 */ 458 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) 459 { 460 return of_devfreq_cooling_register(NULL, df); 461 } 462 EXPORT_SYMBOL_GPL(devfreq_cooling_register); 463 464 /** 465 * devfreq_cooling_em_register() - Register devfreq cooling device with 466 * power information and automatically register Energy Model (EM) 467 * @df: Pointer to devfreq device. 468 * @dfc_power: Pointer to devfreq_cooling_power. 469 * 470 * Register a devfreq cooling device and automatically register EM. The 471 * available OPPs must be registered for the device. 472 * 473 * If @dfc_power is provided, the cooling device is registered with the 474 * power extensions. It is using the simple Energy Model which requires 475 * "dynamic-power-coefficient" a devicetree property. To not break drivers 476 * which miss that DT property, the function won't bail out when the EM 477 * registration failed. The cooling device will be registered if everything 478 * else is OK. 479 */ 480 struct thermal_cooling_device * 481 devfreq_cooling_em_register(struct devfreq *df, 482 struct devfreq_cooling_power *dfc_power) 483 { 484 struct thermal_cooling_device *cdev; 485 struct device *dev; 486 int ret; 487 488 if (IS_ERR_OR_NULL(df)) 489 return ERR_PTR(-EINVAL); 490 491 dev = df->dev.parent; 492 493 ret = dev_pm_opp_of_register_em(dev, NULL); 494 if (ret) 495 dev_dbg(dev, "Unable to register EM for devfreq cooling device (%d)\n", 496 ret); 497 498 cdev = of_devfreq_cooling_register_power(dev->of_node, df, dfc_power); 499 500 if (IS_ERR_OR_NULL(cdev)) 501 em_dev_unregister_perf_domain(dev); 502 503 return cdev; 504 } 505 EXPORT_SYMBOL_GPL(devfreq_cooling_em_register); 506 507 /** 508 * devfreq_cooling_unregister() - Unregister devfreq cooling device. 509 * @cdev: Pointer to devfreq cooling device to unregister. 510 * 511 * Unregisters devfreq cooling device and related Energy Model if it was 512 * present. 513 */ 514 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) 515 { 516 struct devfreq_cooling_device *dfc; 517 struct device *dev; 518 519 if (IS_ERR_OR_NULL(cdev)) 520 return; 521 522 dfc = cdev->devdata; 523 dev = dfc->devfreq->dev.parent; 524 525 thermal_cooling_device_unregister(dfc->cdev); 526 dev_pm_qos_remove_request(&dfc->req_max_freq); 527 528 em_dev_unregister_perf_domain(dev); 529 530 kfree(dfc->freq_table); 531 kfree(dfc); 532 } 533 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); 534