1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Energy Model of devices 4 * 5 * Copyright (c) 2018-2021, Arm ltd. 6 * Written by: Quentin Perret, Arm ltd. 7 * Improvements provided by: Lukasz Luba, Arm ltd. 8 */ 9 10 #define pr_fmt(fmt) "energy_model: " fmt 11 12 #include <linux/cpu.h> 13 #include <linux/cpufreq.h> 14 #include <linux/cpumask.h> 15 #include <linux/debugfs.h> 16 #include <linux/energy_model.h> 17 #include <linux/sched/topology.h> 18 #include <linux/slab.h> 19 20 /* 21 * Mutex serializing the registrations of performance domains and letting 22 * callbacks defined by drivers sleep. 23 */ 24 static DEFINE_MUTEX(em_pd_mutex); 25 26 static void em_cpufreq_update_efficiencies(struct device *dev, 27 struct em_perf_state *table); 28 static void em_check_capacity_update(void); 29 static void em_update_workfn(struct work_struct *work); 30 static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn); 31 32 static bool _is_cpu_device(struct device *dev) 33 { 34 return (dev->bus == &cpu_subsys); 35 } 36 37 #ifdef CONFIG_DEBUG_FS 38 static struct dentry *rootdir; 39 40 struct em_dbg_info { 41 struct em_perf_domain *pd; 42 int ps_id; 43 }; 44 45 #define DEFINE_EM_DBG_SHOW(name, fname) \ 46 static int em_debug_##fname##_show(struct seq_file *s, void *unused) \ 47 { \ 48 struct em_dbg_info *em_dbg = s->private; \ 49 struct em_perf_state *table; \ 50 unsigned long val; \ 51 \ 52 rcu_read_lock(); \ 53 table = em_perf_state_from_pd(em_dbg->pd); \ 54 val = table[em_dbg->ps_id].name; \ 55 rcu_read_unlock(); \ 56 \ 57 seq_printf(s, "%lu\n", val); \ 58 return 0; \ 59 } \ 60 DEFINE_SHOW_ATTRIBUTE(em_debug_##fname) 61 62 DEFINE_EM_DBG_SHOW(frequency, frequency); 63 DEFINE_EM_DBG_SHOW(power, power); 64 DEFINE_EM_DBG_SHOW(cost, cost); 65 DEFINE_EM_DBG_SHOW(performance, performance); 66 DEFINE_EM_DBG_SHOW(flags, inefficiency); 67 68 static void em_debug_create_ps(struct em_perf_domain *em_pd, 69 struct em_dbg_info *em_dbg, int i, 70 struct dentry *pd) 71 { 72 struct em_perf_state *table; 73 unsigned long freq; 74 struct dentry *d; 75 char name[24]; 76 77 em_dbg[i].pd = em_pd; 78 em_dbg[i].ps_id = i; 79 80 rcu_read_lock(); 81 table = em_perf_state_from_pd(em_pd); 82 freq = table[i].frequency; 83 rcu_read_unlock(); 84 85 snprintf(name, sizeof(name), "ps:%lu", freq); 86 87 /* Create per-ps directory */ 88 d = debugfs_create_dir(name, pd); 89 debugfs_create_file("frequency", 0444, d, &em_dbg[i], 90 &em_debug_frequency_fops); 91 debugfs_create_file("power", 0444, d, &em_dbg[i], 92 &em_debug_power_fops); 93 debugfs_create_file("cost", 0444, d, &em_dbg[i], 94 &em_debug_cost_fops); 95 debugfs_create_file("performance", 0444, d, &em_dbg[i], 96 &em_debug_performance_fops); 97 debugfs_create_file("inefficient", 0444, d, &em_dbg[i], 98 &em_debug_inefficiency_fops); 99 } 100 101 static int em_debug_cpus_show(struct seq_file *s, void *unused) 102 { 103 seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private))); 104 105 return 0; 106 } 107 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); 108 109 static int em_debug_flags_show(struct seq_file *s, void *unused) 110 { 111 struct em_perf_domain *pd = s->private; 112 113 seq_printf(s, "%#lx\n", pd->flags); 114 115 return 0; 116 } 117 DEFINE_SHOW_ATTRIBUTE(em_debug_flags); 118 119 static void em_debug_create_pd(struct device *dev) 120 { 121 struct em_dbg_info *em_dbg; 122 struct dentry *d; 123 int i; 124 125 /* Create the directory of the performance domain */ 126 d = debugfs_create_dir(dev_name(dev), rootdir); 127 128 if (_is_cpu_device(dev)) 129 debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, 130 &em_debug_cpus_fops); 131 132 debugfs_create_file("flags", 0444, d, dev->em_pd, 133 &em_debug_flags_fops); 134 135 em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states, 136 sizeof(*em_dbg), GFP_KERNEL); 137 if (!em_dbg) 138 return; 139 140 /* Create a sub-directory for each performance state */ 141 for (i = 0; i < dev->em_pd->nr_perf_states; i++) 142 em_debug_create_ps(dev->em_pd, em_dbg, i, d); 143 144 } 145 146 static void em_debug_remove_pd(struct device *dev) 147 { 148 debugfs_lookup_and_remove(dev_name(dev), rootdir); 149 } 150 151 static int __init em_debug_init(void) 152 { 153 /* Create /sys/kernel/debug/energy_model directory */ 154 rootdir = debugfs_create_dir("energy_model", NULL); 155 156 return 0; 157 } 158 fs_initcall(em_debug_init); 159 #else /* CONFIG_DEBUG_FS */ 160 static void em_debug_create_pd(struct device *dev) {} 161 static void em_debug_remove_pd(struct device *dev) {} 162 #endif 163 164 static void em_destroy_table_rcu(struct rcu_head *rp) 165 { 166 struct em_perf_table __rcu *table; 167 168 table = container_of(rp, struct em_perf_table, rcu); 169 kfree(table); 170 } 171 172 static void em_release_table_kref(struct kref *kref) 173 { 174 struct em_perf_table __rcu *table; 175 176 /* It was the last owner of this table so we can free */ 177 table = container_of(kref, struct em_perf_table, kref); 178 179 call_rcu(&table->rcu, em_destroy_table_rcu); 180 } 181 182 /** 183 * em_table_free() - Handles safe free of the EM table when needed 184 * @table : EM table which is going to be freed 185 * 186 * No return values. 187 */ 188 void em_table_free(struct em_perf_table __rcu *table) 189 { 190 kref_put(&table->kref, em_release_table_kref); 191 } 192 193 /** 194 * em_table_alloc() - Allocate a new EM table 195 * @pd : EM performance domain for which this must be done 196 * 197 * Allocate a new EM table and initialize its kref to indicate that it 198 * has a user. 199 * Returns allocated table or NULL. 200 */ 201 struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) 202 { 203 struct em_perf_table __rcu *table; 204 int table_size; 205 206 table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; 207 208 table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); 209 if (!table) 210 return NULL; 211 212 kref_init(&table->kref); 213 214 return table; 215 } 216 217 static void em_init_performance(struct device *dev, struct em_perf_domain *pd, 218 struct em_perf_state *table, int nr_states) 219 { 220 u64 fmax, max_cap; 221 int i, cpu; 222 223 /* This is needed only for CPUs and EAS skip other devices */ 224 if (!_is_cpu_device(dev)) 225 return; 226 227 cpu = cpumask_first(em_span_cpus(pd)); 228 229 /* 230 * Calculate the performance value for each frequency with 231 * linear relationship. The final CPU capacity might not be ready at 232 * boot time, but the EM will be updated a bit later with correct one. 233 */ 234 fmax = (u64) table[nr_states - 1].frequency; 235 max_cap = (u64) arch_scale_cpu_capacity(cpu); 236 for (i = 0; i < nr_states; i++) 237 table[i].performance = div64_u64(max_cap * table[i].frequency, 238 fmax); 239 } 240 241 static int em_compute_costs(struct device *dev, struct em_perf_state *table, 242 struct em_data_callback *cb, int nr_states, 243 unsigned long flags) 244 { 245 unsigned long prev_cost = ULONG_MAX; 246 int i, ret; 247 248 /* Compute the cost of each performance state. */ 249 for (i = nr_states - 1; i >= 0; i--) { 250 unsigned long power_res, cost; 251 252 if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) { 253 ret = cb->get_cost(dev, table[i].frequency, &cost); 254 if (ret || !cost || cost > EM_MAX_POWER) { 255 dev_err(dev, "EM: invalid cost %lu %d\n", 256 cost, ret); 257 return -EINVAL; 258 } 259 } else { 260 /* increase resolution of 'cost' precision */ 261 power_res = table[i].power * 10; 262 cost = power_res / table[i].performance; 263 } 264 265 table[i].cost = cost; 266 267 if (table[i].cost >= prev_cost) { 268 table[i].flags = EM_PERF_STATE_INEFFICIENT; 269 dev_dbg(dev, "EM: OPP:%lu is inefficient\n", 270 table[i].frequency); 271 } else { 272 prev_cost = table[i].cost; 273 } 274 } 275 276 return 0; 277 } 278 279 /** 280 * em_dev_compute_costs() - Calculate cost values for new runtime EM table 281 * @dev : Device for which the EM table is to be updated 282 * @table : The new EM table that is going to get the costs calculated 283 * @nr_states : Number of performance states 284 * 285 * Calculate the em_perf_state::cost values for new runtime EM table. The 286 * values are used for EAS during task placement. It also calculates and sets 287 * the efficiency flag for each performance state. When the function finish 288 * successfully the EM table is ready to be updated and used by EAS. 289 * 290 * Return 0 on success or a proper error in case of failure. 291 */ 292 int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, 293 int nr_states) 294 { 295 return em_compute_costs(dev, table, NULL, nr_states, 0); 296 } 297 298 /** 299 * em_dev_update_perf_domain() - Update runtime EM table for a device 300 * @dev : Device for which the EM is to be updated 301 * @new_table : The new EM table that is going to be used from now 302 * 303 * Update EM runtime modifiable table for the @dev using the provided @table. 304 * 305 * This function uses a mutex to serialize writers, so it must not be called 306 * from a non-sleeping context. 307 * 308 * Return 0 on success or an error code on failure. 309 */ 310 int em_dev_update_perf_domain(struct device *dev, 311 struct em_perf_table __rcu *new_table) 312 { 313 struct em_perf_table __rcu *old_table; 314 struct em_perf_domain *pd; 315 316 if (!dev) 317 return -EINVAL; 318 319 /* Serialize update/unregister or concurrent updates */ 320 mutex_lock(&em_pd_mutex); 321 322 if (!dev->em_pd) { 323 mutex_unlock(&em_pd_mutex); 324 return -EINVAL; 325 } 326 pd = dev->em_pd; 327 328 kref_get(&new_table->kref); 329 330 old_table = pd->em_table; 331 rcu_assign_pointer(pd->em_table, new_table); 332 333 em_cpufreq_update_efficiencies(dev, new_table->state); 334 335 em_table_free(old_table); 336 337 mutex_unlock(&em_pd_mutex); 338 return 0; 339 } 340 EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); 341 342 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, 343 struct em_perf_state *table, 344 struct em_data_callback *cb, 345 unsigned long flags) 346 { 347 unsigned long power, freq, prev_freq = 0; 348 int nr_states = pd->nr_perf_states; 349 int i, ret; 350 351 /* Build the list of performance states for this performance domain */ 352 for (i = 0, freq = 0; i < nr_states; i++, freq++) { 353 /* 354 * active_power() is a driver callback which ceils 'freq' to 355 * lowest performance state of 'dev' above 'freq' and updates 356 * 'power' and 'freq' accordingly. 357 */ 358 ret = cb->active_power(dev, &power, &freq); 359 if (ret) { 360 dev_err(dev, "EM: invalid perf. state: %d\n", 361 ret); 362 return -EINVAL; 363 } 364 365 /* 366 * We expect the driver callback to increase the frequency for 367 * higher performance states. 368 */ 369 if (freq <= prev_freq) { 370 dev_err(dev, "EM: non-increasing freq: %lu\n", 371 freq); 372 return -EINVAL; 373 } 374 375 /* 376 * The power returned by active_state() is expected to be 377 * positive and be in range. 378 */ 379 if (!power || power > EM_MAX_POWER) { 380 dev_err(dev, "EM: invalid power: %lu\n", 381 power); 382 return -EINVAL; 383 } 384 385 table[i].power = power; 386 table[i].frequency = prev_freq = freq; 387 } 388 389 em_init_performance(dev, pd, table, nr_states); 390 391 ret = em_compute_costs(dev, table, cb, nr_states, flags); 392 if (ret) 393 return -EINVAL; 394 395 return 0; 396 } 397 398 static int em_create_pd(struct device *dev, int nr_states, 399 struct em_data_callback *cb, cpumask_t *cpus, 400 unsigned long flags) 401 { 402 struct em_perf_table __rcu *em_table; 403 struct em_perf_domain *pd; 404 struct device *cpu_dev; 405 int cpu, ret, num_cpus; 406 407 if (_is_cpu_device(dev)) { 408 num_cpus = cpumask_weight(cpus); 409 410 /* Prevent max possible energy calculation to not overflow */ 411 if (num_cpus > EM_MAX_NUM_CPUS) { 412 dev_err(dev, "EM: too many CPUs, overflow possible\n"); 413 return -EINVAL; 414 } 415 416 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); 417 if (!pd) 418 return -ENOMEM; 419 420 cpumask_copy(em_span_cpus(pd), cpus); 421 } else { 422 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 423 if (!pd) 424 return -ENOMEM; 425 } 426 427 pd->nr_perf_states = nr_states; 428 429 em_table = em_table_alloc(pd); 430 if (!em_table) 431 goto free_pd; 432 433 ret = em_create_perf_table(dev, pd, em_table->state, cb, flags); 434 if (ret) 435 goto free_pd_table; 436 437 rcu_assign_pointer(pd->em_table, em_table); 438 439 if (_is_cpu_device(dev)) 440 for_each_cpu(cpu, cpus) { 441 cpu_dev = get_cpu_device(cpu); 442 cpu_dev->em_pd = pd; 443 } 444 445 dev->em_pd = pd; 446 447 return 0; 448 449 free_pd_table: 450 kfree(em_table); 451 free_pd: 452 kfree(pd); 453 return -EINVAL; 454 } 455 456 static void 457 em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) 458 { 459 struct em_perf_domain *pd = dev->em_pd; 460 struct cpufreq_policy *policy; 461 int found = 0; 462 int i, cpu; 463 464 if (!_is_cpu_device(dev)) 465 return; 466 467 /* Try to get a CPU which is active and in this PD */ 468 cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); 469 if (cpu >= nr_cpu_ids) { 470 dev_warn(dev, "EM: No online CPU for CPUFreq policy\n"); 471 return; 472 } 473 474 policy = cpufreq_cpu_get(cpu); 475 if (!policy) { 476 dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); 477 return; 478 } 479 480 for (i = 0; i < pd->nr_perf_states; i++) { 481 if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) 482 continue; 483 484 if (!cpufreq_table_set_inefficient(policy, table[i].frequency)) 485 found++; 486 } 487 488 cpufreq_cpu_put(policy); 489 490 if (!found) 491 return; 492 493 /* 494 * Efficiencies have been installed in CPUFreq, inefficient frequencies 495 * will be skipped. The EM can do the same. 496 */ 497 pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; 498 } 499 500 /** 501 * em_pd_get() - Return the performance domain for a device 502 * @dev : Device to find the performance domain for 503 * 504 * Returns the performance domain to which @dev belongs, or NULL if it doesn't 505 * exist. 506 */ 507 struct em_perf_domain *em_pd_get(struct device *dev) 508 { 509 if (IS_ERR_OR_NULL(dev)) 510 return NULL; 511 512 return dev->em_pd; 513 } 514 EXPORT_SYMBOL_GPL(em_pd_get); 515 516 /** 517 * em_cpu_get() - Return the performance domain for a CPU 518 * @cpu : CPU to find the performance domain for 519 * 520 * Returns the performance domain to which @cpu belongs, or NULL if it doesn't 521 * exist. 522 */ 523 struct em_perf_domain *em_cpu_get(int cpu) 524 { 525 struct device *cpu_dev; 526 527 cpu_dev = get_cpu_device(cpu); 528 if (!cpu_dev) 529 return NULL; 530 531 return em_pd_get(cpu_dev); 532 } 533 EXPORT_SYMBOL_GPL(em_cpu_get); 534 535 /** 536 * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device 537 * @dev : Device for which the EM is to register 538 * @nr_states : Number of performance states to register 539 * @cb : Callback functions providing the data of the Energy Model 540 * @cpus : Pointer to cpumask_t, which in case of a CPU device is 541 * obligatory. It can be taken from i.e. 'policy->cpus'. For other 542 * type of devices this should be set to NULL. 543 * @microwatts : Flag indicating that the power values are in micro-Watts or 544 * in some other scale. It must be set properly. 545 * 546 * Create Energy Model tables for a performance domain using the callbacks 547 * defined in cb. 548 * 549 * The @microwatts is important to set with correct value. Some kernel 550 * sub-systems might rely on this flag and check if all devices in the EM are 551 * using the same scale. 552 * 553 * If multiple clients register the same performance domain, all but the first 554 * registration will be ignored. 555 * 556 * Return 0 on success 557 */ 558 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, 559 struct em_data_callback *cb, cpumask_t *cpus, 560 bool microwatts) 561 { 562 unsigned long cap, prev_cap = 0; 563 unsigned long flags = 0; 564 int cpu, ret; 565 566 if (!dev || !nr_states || !cb) 567 return -EINVAL; 568 569 /* 570 * Use a mutex to serialize the registration of performance domains and 571 * let the driver-defined callback functions sleep. 572 */ 573 mutex_lock(&em_pd_mutex); 574 575 if (dev->em_pd) { 576 ret = -EEXIST; 577 goto unlock; 578 } 579 580 if (_is_cpu_device(dev)) { 581 if (!cpus) { 582 dev_err(dev, "EM: invalid CPU mask\n"); 583 ret = -EINVAL; 584 goto unlock; 585 } 586 587 for_each_cpu(cpu, cpus) { 588 if (em_cpu_get(cpu)) { 589 dev_err(dev, "EM: exists for CPU%d\n", cpu); 590 ret = -EEXIST; 591 goto unlock; 592 } 593 /* 594 * All CPUs of a domain must have the same 595 * micro-architecture since they all share the same 596 * table. 597 */ 598 cap = arch_scale_cpu_capacity(cpu); 599 if (prev_cap && prev_cap != cap) { 600 dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n", 601 cpumask_pr_args(cpus)); 602 603 ret = -EINVAL; 604 goto unlock; 605 } 606 prev_cap = cap; 607 } 608 } 609 610 if (microwatts) 611 flags |= EM_PERF_DOMAIN_MICROWATTS; 612 else if (cb->get_cost) 613 flags |= EM_PERF_DOMAIN_ARTIFICIAL; 614 615 /* 616 * EM only supports uW (exception is artificial EM). 617 * Therefore, check and force the drivers to provide 618 * power in uW. 619 */ 620 if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) { 621 dev_err(dev, "EM: only supports uW power values\n"); 622 ret = -EINVAL; 623 goto unlock; 624 } 625 626 ret = em_create_pd(dev, nr_states, cb, cpus, flags); 627 if (ret) 628 goto unlock; 629 630 dev->em_pd->flags |= flags; 631 dev->em_pd->min_perf_state = 0; 632 dev->em_pd->max_perf_state = nr_states - 1; 633 634 em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state); 635 636 em_debug_create_pd(dev); 637 dev_info(dev, "EM: created perf domain\n"); 638 639 unlock: 640 mutex_unlock(&em_pd_mutex); 641 642 if (_is_cpu_device(dev)) 643 em_check_capacity_update(); 644 645 return ret; 646 } 647 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); 648 649 /** 650 * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device 651 * @dev : Device for which the EM is registered 652 * 653 * Unregister the EM for the specified @dev (but not a CPU device). 654 */ 655 void em_dev_unregister_perf_domain(struct device *dev) 656 { 657 if (IS_ERR_OR_NULL(dev) || !dev->em_pd) 658 return; 659 660 if (_is_cpu_device(dev)) 661 return; 662 663 /* 664 * The mutex separates all register/unregister requests and protects 665 * from potential clean-up/setup issues in the debugfs directories. 666 * The debugfs directory name is the same as device's name. 667 */ 668 mutex_lock(&em_pd_mutex); 669 em_debug_remove_pd(dev); 670 671 em_table_free(dev->em_pd->em_table); 672 673 kfree(dev->em_pd); 674 dev->em_pd = NULL; 675 mutex_unlock(&em_pd_mutex); 676 } 677 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); 678 679 static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd) 680 { 681 struct em_perf_table __rcu *em_table; 682 struct em_perf_state *ps, *new_ps; 683 int ps_size; 684 685 em_table = em_table_alloc(pd); 686 if (!em_table) 687 return NULL; 688 689 new_ps = em_table->state; 690 691 rcu_read_lock(); 692 ps = em_perf_state_from_pd(pd); 693 /* Initialize data based on old table */ 694 ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states; 695 memcpy(new_ps, ps, ps_size); 696 697 rcu_read_unlock(); 698 699 return em_table; 700 } 701 702 static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, 703 struct em_perf_table __rcu *em_table) 704 { 705 int ret; 706 707 ret = em_compute_costs(dev, em_table->state, NULL, pd->nr_perf_states, 708 pd->flags); 709 if (ret) 710 goto free_em_table; 711 712 ret = em_dev_update_perf_domain(dev, em_table); 713 if (ret) 714 goto free_em_table; 715 716 /* 717 * This is one-time-update, so give up the ownership in this updater. 718 * The EM framework has incremented the usage counter and from now 719 * will keep the reference (then free the memory when needed). 720 */ 721 free_em_table: 722 em_table_free(em_table); 723 return ret; 724 } 725 726 /* 727 * Adjustment of CPU performance values after boot, when all CPUs capacites 728 * are correctly calculated. 729 */ 730 static void em_adjust_new_capacity(struct device *dev, 731 struct em_perf_domain *pd, 732 u64 max_cap) 733 { 734 struct em_perf_table __rcu *em_table; 735 736 em_table = em_table_dup(pd); 737 if (!em_table) { 738 dev_warn(dev, "EM: allocation failed\n"); 739 return; 740 } 741 742 em_init_performance(dev, pd, em_table->state, pd->nr_perf_states); 743 744 em_recalc_and_update(dev, pd, em_table); 745 } 746 747 static void em_check_capacity_update(void) 748 { 749 cpumask_var_t cpu_done_mask; 750 struct em_perf_state *table; 751 struct em_perf_domain *pd; 752 unsigned long cpu_capacity; 753 int cpu; 754 755 if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) { 756 pr_warn("no free memory\n"); 757 return; 758 } 759 760 /* Check if CPUs capacity has changed than update EM */ 761 for_each_possible_cpu(cpu) { 762 struct cpufreq_policy *policy; 763 unsigned long em_max_perf; 764 struct device *dev; 765 766 if (cpumask_test_cpu(cpu, cpu_done_mask)) 767 continue; 768 769 policy = cpufreq_cpu_get(cpu); 770 if (!policy) { 771 pr_debug("Accessing cpu%d policy failed\n", cpu); 772 schedule_delayed_work(&em_update_work, 773 msecs_to_jiffies(1000)); 774 break; 775 } 776 cpufreq_cpu_put(policy); 777 778 pd = em_cpu_get(cpu); 779 if (!pd || em_is_artificial(pd)) 780 continue; 781 782 cpumask_or(cpu_done_mask, cpu_done_mask, 783 em_span_cpus(pd)); 784 785 cpu_capacity = arch_scale_cpu_capacity(cpu); 786 787 rcu_read_lock(); 788 table = em_perf_state_from_pd(pd); 789 em_max_perf = table[pd->nr_perf_states - 1].performance; 790 rcu_read_unlock(); 791 792 /* 793 * Check if the CPU capacity has been adjusted during boot 794 * and trigger the update for new performance values. 795 */ 796 if (em_max_perf == cpu_capacity) 797 continue; 798 799 pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", 800 cpu, cpu_capacity, em_max_perf); 801 802 dev = get_cpu_device(cpu); 803 em_adjust_new_capacity(dev, pd, cpu_capacity); 804 } 805 806 free_cpumask_var(cpu_done_mask); 807 } 808 809 static void em_update_workfn(struct work_struct *work) 810 { 811 em_check_capacity_update(); 812 } 813 814 /** 815 * em_dev_update_chip_binning() - Update Energy Model after the new voltage 816 * information is present in the OPPs. 817 * @dev : Device for which the Energy Model has to be updated. 818 * 819 * This function allows to update easily the EM with new values available in 820 * the OPP framework and DT. It can be used after the chip has been properly 821 * verified by device drivers and the voltages adjusted for the 'chip binning'. 822 */ 823 int em_dev_update_chip_binning(struct device *dev) 824 { 825 struct em_perf_table __rcu *em_table; 826 struct em_perf_domain *pd; 827 int i, ret; 828 829 if (IS_ERR_OR_NULL(dev)) 830 return -EINVAL; 831 832 pd = em_pd_get(dev); 833 if (!pd) { 834 dev_warn(dev, "Couldn't find Energy Model\n"); 835 return -EINVAL; 836 } 837 838 em_table = em_table_dup(pd); 839 if (!em_table) { 840 dev_warn(dev, "EM: allocation failed\n"); 841 return -ENOMEM; 842 } 843 844 /* Update power values which might change due to new voltage in OPPs */ 845 for (i = 0; i < pd->nr_perf_states; i++) { 846 unsigned long freq = em_table->state[i].frequency; 847 unsigned long power; 848 849 ret = dev_pm_opp_calc_power(dev, &power, &freq); 850 if (ret) { 851 em_table_free(em_table); 852 return ret; 853 } 854 855 em_table->state[i].power = power; 856 } 857 858 return em_recalc_and_update(dev, pd, em_table); 859 } 860 EXPORT_SYMBOL_GPL(em_dev_update_chip_binning); 861 862 863 /** 864 * em_update_performance_limits() - Update Energy Model with performance 865 * limits information. 866 * @pd : Performance Domain with EM that has to be updated. 867 * @freq_min_khz : New minimum allowed frequency for this device. 868 * @freq_max_khz : New maximum allowed frequency for this device. 869 * 870 * This function allows to update the EM with information about available 871 * performance levels. It takes the minimum and maximum frequency in kHz 872 * and does internal translation to performance levels. 873 * Returns 0 on success or -EINVAL when failed. 874 */ 875 int em_update_performance_limits(struct em_perf_domain *pd, 876 unsigned long freq_min_khz, unsigned long freq_max_khz) 877 { 878 struct em_perf_state *table; 879 int min_ps = -1; 880 int max_ps = -1; 881 int i; 882 883 if (!pd) 884 return -EINVAL; 885 886 rcu_read_lock(); 887 table = em_perf_state_from_pd(pd); 888 889 for (i = 0; i < pd->nr_perf_states; i++) { 890 if (freq_min_khz == table[i].frequency) 891 min_ps = i; 892 if (freq_max_khz == table[i].frequency) 893 max_ps = i; 894 } 895 rcu_read_unlock(); 896 897 /* Only update when both are found and sane */ 898 if (min_ps < 0 || max_ps < 0 || max_ps < min_ps) 899 return -EINVAL; 900 901 902 /* Guard simultaneous updates and make them atomic */ 903 mutex_lock(&em_pd_mutex); 904 pd->min_perf_state = min_ps; 905 pd->max_perf_state = max_ps; 906 mutex_unlock(&em_pd_mutex); 907 908 return 0; 909 } 910 EXPORT_SYMBOL_GPL(em_update_performance_limits); 911