1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Energy Model of devices 4 * 5 * Copyright (c) 2018-2021, Arm ltd. 6 * Written by: Quentin Perret, Arm ltd. 7 * Improvements provided by: Lukasz Luba, Arm ltd. 8 */ 9 10 #define pr_fmt(fmt) "energy_model: " fmt 11 12 #include <linux/cpu.h> 13 #include <linux/cpufreq.h> 14 #include <linux/cpumask.h> 15 #include <linux/debugfs.h> 16 #include <linux/energy_model.h> 17 #include <linux/sched/topology.h> 18 #include <linux/slab.h> 19 20 /* 21 * Mutex serializing the registrations of performance domains and letting 22 * callbacks defined by drivers sleep. 23 */ 24 static DEFINE_MUTEX(em_pd_mutex); 25 26 static void em_cpufreq_update_efficiencies(struct device *dev, 27 struct em_perf_state *table); 28 static void em_check_capacity_update(void); 29 static void em_update_workfn(struct work_struct *work); 30 static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn); 31 32 static bool _is_cpu_device(struct device *dev) 33 { 34 return (dev->bus == &cpu_subsys); 35 } 36 37 #ifdef CONFIG_DEBUG_FS 38 static struct dentry *rootdir; 39 40 struct em_dbg_info { 41 struct em_perf_domain *pd; 42 int ps_id; 43 }; 44 45 #define DEFINE_EM_DBG_SHOW(name, fname) \ 46 static int em_debug_##fname##_show(struct seq_file *s, void *unused) \ 47 { \ 48 struct em_dbg_info *em_dbg = s->private; \ 49 struct em_perf_state *table; \ 50 unsigned long val; \ 51 \ 52 rcu_read_lock(); \ 53 table = em_perf_state_from_pd(em_dbg->pd); \ 54 val = table[em_dbg->ps_id].name; \ 55 rcu_read_unlock(); \ 56 \ 57 seq_printf(s, "%lu\n", val); \ 58 return 0; \ 59 } \ 60 DEFINE_SHOW_ATTRIBUTE(em_debug_##fname) 61 62 DEFINE_EM_DBG_SHOW(frequency, frequency); 63 DEFINE_EM_DBG_SHOW(power, power); 64 DEFINE_EM_DBG_SHOW(cost, cost); 65 DEFINE_EM_DBG_SHOW(performance, performance); 66 DEFINE_EM_DBG_SHOW(flags, inefficiency); 67 68 static void em_debug_create_ps(struct em_perf_domain *em_pd, 69 struct em_dbg_info *em_dbg, int i, 70 struct dentry *pd) 71 { 72 struct em_perf_state *table; 73 unsigned long freq; 74 struct dentry *d; 75 char name[24]; 76 77 em_dbg[i].pd = em_pd; 78 em_dbg[i].ps_id = i; 79 80 rcu_read_lock(); 81 table = em_perf_state_from_pd(em_pd); 82 freq = table[i].frequency; 83 rcu_read_unlock(); 84 85 snprintf(name, sizeof(name), "ps:%lu", freq); 86 87 /* Create per-ps directory */ 88 d = debugfs_create_dir(name, pd); 89 debugfs_create_file("frequency", 0444, d, &em_dbg[i], 90 &em_debug_frequency_fops); 91 debugfs_create_file("power", 0444, d, &em_dbg[i], 92 &em_debug_power_fops); 93 debugfs_create_file("cost", 0444, d, &em_dbg[i], 94 &em_debug_cost_fops); 95 debugfs_create_file("performance", 0444, d, &em_dbg[i], 96 &em_debug_performance_fops); 97 debugfs_create_file("inefficient", 0444, d, &em_dbg[i], 98 &em_debug_inefficiency_fops); 99 } 100 101 static int em_debug_cpus_show(struct seq_file *s, void *unused) 102 { 103 seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private))); 104 105 return 0; 106 } 107 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); 108 109 static int em_debug_flags_show(struct seq_file *s, void *unused) 110 { 111 struct em_perf_domain *pd = s->private; 112 113 seq_printf(s, "%#lx\n", pd->flags); 114 115 return 0; 116 } 117 DEFINE_SHOW_ATTRIBUTE(em_debug_flags); 118 119 static void em_debug_create_pd(struct device *dev) 120 { 121 struct em_dbg_info *em_dbg; 122 struct dentry *d; 123 int i; 124 125 /* Create the directory of the performance domain */ 126 d = debugfs_create_dir(dev_name(dev), rootdir); 127 128 if (_is_cpu_device(dev)) 129 debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, 130 &em_debug_cpus_fops); 131 132 debugfs_create_file("flags", 0444, d, dev->em_pd, 133 &em_debug_flags_fops); 134 135 em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states, 136 sizeof(*em_dbg), GFP_KERNEL); 137 if (!em_dbg) 138 return; 139 140 /* Create a sub-directory for each performance state */ 141 for (i = 0; i < dev->em_pd->nr_perf_states; i++) 142 em_debug_create_ps(dev->em_pd, em_dbg, i, d); 143 144 } 145 146 static void em_debug_remove_pd(struct device *dev) 147 { 148 debugfs_lookup_and_remove(dev_name(dev), rootdir); 149 } 150 151 static int __init em_debug_init(void) 152 { 153 /* Create /sys/kernel/debug/energy_model directory */ 154 rootdir = debugfs_create_dir("energy_model", NULL); 155 156 return 0; 157 } 158 fs_initcall(em_debug_init); 159 #else /* CONFIG_DEBUG_FS */ 160 static void em_debug_create_pd(struct device *dev) {} 161 static void em_debug_remove_pd(struct device *dev) {} 162 #endif 163 164 static void em_release_table_kref(struct kref *kref) 165 { 166 /* It was the last owner of this table so we can free */ 167 kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu); 168 } 169 170 /** 171 * em_table_free() - Handles safe free of the EM table when needed 172 * @table : EM table which is going to be freed 173 * 174 * No return values. 175 */ 176 void em_table_free(struct em_perf_table *table) 177 { 178 kref_put(&table->kref, em_release_table_kref); 179 } 180 181 /** 182 * em_table_alloc() - Allocate a new EM table 183 * @pd : EM performance domain for which this must be done 184 * 185 * Allocate a new EM table and initialize its kref to indicate that it 186 * has a user. 187 * Returns allocated table or NULL. 188 */ 189 struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) 190 { 191 struct em_perf_table *table; 192 int table_size; 193 194 table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; 195 196 table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); 197 if (!table) 198 return NULL; 199 200 kref_init(&table->kref); 201 202 return table; 203 } 204 205 static void em_init_performance(struct device *dev, struct em_perf_domain *pd, 206 struct em_perf_state *table, int nr_states) 207 { 208 u64 fmax, max_cap; 209 int i, cpu; 210 211 /* This is needed only for CPUs and EAS skip other devices */ 212 if (!_is_cpu_device(dev)) 213 return; 214 215 cpu = cpumask_first(em_span_cpus(pd)); 216 217 /* 218 * Calculate the performance value for each frequency with 219 * linear relationship. The final CPU capacity might not be ready at 220 * boot time, but the EM will be updated a bit later with correct one. 221 */ 222 fmax = (u64) table[nr_states - 1].frequency; 223 max_cap = (u64) arch_scale_cpu_capacity(cpu); 224 for (i = 0; i < nr_states; i++) 225 table[i].performance = div64_u64(max_cap * table[i].frequency, 226 fmax); 227 } 228 229 static int em_compute_costs(struct device *dev, struct em_perf_state *table, 230 const struct em_data_callback *cb, int nr_states, 231 unsigned long flags) 232 { 233 unsigned long prev_cost = ULONG_MAX; 234 int i, ret; 235 236 /* Compute the cost of each performance state. */ 237 for (i = nr_states - 1; i >= 0; i--) { 238 unsigned long power_res, cost; 239 240 if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) { 241 ret = cb->get_cost(dev, table[i].frequency, &cost); 242 if (ret || !cost || cost > EM_MAX_POWER) { 243 dev_err(dev, "EM: invalid cost %lu %d\n", 244 cost, ret); 245 return -EINVAL; 246 } 247 } else { 248 /* increase resolution of 'cost' precision */ 249 power_res = table[i].power * 10; 250 cost = power_res / table[i].performance; 251 } 252 253 table[i].cost = cost; 254 255 if (table[i].cost >= prev_cost) { 256 table[i].flags = EM_PERF_STATE_INEFFICIENT; 257 dev_dbg(dev, "EM: OPP:%lu is inefficient\n", 258 table[i].frequency); 259 } else { 260 prev_cost = table[i].cost; 261 } 262 } 263 264 return 0; 265 } 266 267 /** 268 * em_dev_compute_costs() - Calculate cost values for new runtime EM table 269 * @dev : Device for which the EM table is to be updated 270 * @table : The new EM table that is going to get the costs calculated 271 * @nr_states : Number of performance states 272 * 273 * Calculate the em_perf_state::cost values for new runtime EM table. The 274 * values are used for EAS during task placement. It also calculates and sets 275 * the efficiency flag for each performance state. When the function finish 276 * successfully the EM table is ready to be updated and used by EAS. 277 * 278 * Return 0 on success or a proper error in case of failure. 279 */ 280 int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, 281 int nr_states) 282 { 283 return em_compute_costs(dev, table, NULL, nr_states, 0); 284 } 285 286 /** 287 * em_dev_update_perf_domain() - Update runtime EM table for a device 288 * @dev : Device for which the EM is to be updated 289 * @new_table : The new EM table that is going to be used from now 290 * 291 * Update EM runtime modifiable table for the @dev using the provided @table. 292 * 293 * This function uses a mutex to serialize writers, so it must not be called 294 * from a non-sleeping context. 295 * 296 * Return 0 on success or an error code on failure. 297 */ 298 int em_dev_update_perf_domain(struct device *dev, 299 struct em_perf_table *new_table) 300 { 301 struct em_perf_table *old_table; 302 struct em_perf_domain *pd; 303 304 if (!dev) 305 return -EINVAL; 306 307 /* Serialize update/unregister or concurrent updates */ 308 mutex_lock(&em_pd_mutex); 309 310 if (!dev->em_pd) { 311 mutex_unlock(&em_pd_mutex); 312 return -EINVAL; 313 } 314 pd = dev->em_pd; 315 316 kref_get(&new_table->kref); 317 318 old_table = rcu_dereference_protected(pd->em_table, 319 lockdep_is_held(&em_pd_mutex)); 320 rcu_assign_pointer(pd->em_table, new_table); 321 322 em_cpufreq_update_efficiencies(dev, new_table->state); 323 324 em_table_free(old_table); 325 326 mutex_unlock(&em_pd_mutex); 327 return 0; 328 } 329 EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); 330 331 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, 332 struct em_perf_state *table, 333 const struct em_data_callback *cb, 334 unsigned long flags) 335 { 336 unsigned long power, freq, prev_freq = 0; 337 int nr_states = pd->nr_perf_states; 338 int i, ret; 339 340 /* Build the list of performance states for this performance domain */ 341 for (i = 0, freq = 0; i < nr_states; i++, freq++) { 342 /* 343 * active_power() is a driver callback which ceils 'freq' to 344 * lowest performance state of 'dev' above 'freq' and updates 345 * 'power' and 'freq' accordingly. 346 */ 347 ret = cb->active_power(dev, &power, &freq); 348 if (ret) { 349 dev_err(dev, "EM: invalid perf. state: %d\n", 350 ret); 351 return -EINVAL; 352 } 353 354 /* 355 * We expect the driver callback to increase the frequency for 356 * higher performance states. 357 */ 358 if (freq <= prev_freq) { 359 dev_err(dev, "EM: non-increasing freq: %lu\n", 360 freq); 361 return -EINVAL; 362 } 363 364 /* 365 * The power returned by active_state() is expected to be 366 * positive and be in range. 367 */ 368 if (!power || power > EM_MAX_POWER) { 369 dev_err(dev, "EM: invalid power: %lu\n", 370 power); 371 return -EINVAL; 372 } 373 374 table[i].power = power; 375 table[i].frequency = prev_freq = freq; 376 } 377 378 em_init_performance(dev, pd, table, nr_states); 379 380 ret = em_compute_costs(dev, table, cb, nr_states, flags); 381 if (ret) 382 return -EINVAL; 383 384 return 0; 385 } 386 387 static int em_create_pd(struct device *dev, int nr_states, 388 const struct em_data_callback *cb, 389 const cpumask_t *cpus, 390 unsigned long flags) 391 { 392 struct em_perf_table *em_table; 393 struct em_perf_domain *pd; 394 struct device *cpu_dev; 395 int cpu, ret, num_cpus; 396 397 if (_is_cpu_device(dev)) { 398 num_cpus = cpumask_weight(cpus); 399 400 /* Prevent max possible energy calculation to not overflow */ 401 if (num_cpus > EM_MAX_NUM_CPUS) { 402 dev_err(dev, "EM: too many CPUs, overflow possible\n"); 403 return -EINVAL; 404 } 405 406 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); 407 if (!pd) 408 return -ENOMEM; 409 410 cpumask_copy(em_span_cpus(pd), cpus); 411 } else { 412 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 413 if (!pd) 414 return -ENOMEM; 415 } 416 417 pd->nr_perf_states = nr_states; 418 419 em_table = em_table_alloc(pd); 420 if (!em_table) 421 goto free_pd; 422 423 ret = em_create_perf_table(dev, pd, em_table->state, cb, flags); 424 if (ret) 425 goto free_pd_table; 426 427 rcu_assign_pointer(pd->em_table, em_table); 428 429 if (_is_cpu_device(dev)) 430 for_each_cpu(cpu, cpus) { 431 cpu_dev = get_cpu_device(cpu); 432 cpu_dev->em_pd = pd; 433 } 434 435 dev->em_pd = pd; 436 437 return 0; 438 439 free_pd_table: 440 kfree(em_table); 441 free_pd: 442 kfree(pd); 443 return -EINVAL; 444 } 445 446 static void 447 em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) 448 { 449 struct em_perf_domain *pd = dev->em_pd; 450 struct cpufreq_policy *policy; 451 int found = 0; 452 int i, cpu; 453 454 if (!_is_cpu_device(dev)) 455 return; 456 457 /* Try to get a CPU which is active and in this PD */ 458 cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); 459 if (cpu >= nr_cpu_ids) { 460 dev_warn(dev, "EM: No online CPU for CPUFreq policy\n"); 461 return; 462 } 463 464 policy = cpufreq_cpu_get(cpu); 465 if (!policy) { 466 dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); 467 return; 468 } 469 470 for (i = 0; i < pd->nr_perf_states; i++) { 471 if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) 472 continue; 473 474 if (!cpufreq_table_set_inefficient(policy, table[i].frequency)) 475 found++; 476 } 477 478 cpufreq_cpu_put(policy); 479 480 if (!found) 481 return; 482 483 /* 484 * Efficiencies have been installed in CPUFreq, inefficient frequencies 485 * will be skipped. The EM can do the same. 486 */ 487 pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; 488 } 489 490 /** 491 * em_pd_get() - Return the performance domain for a device 492 * @dev : Device to find the performance domain for 493 * 494 * Returns the performance domain to which @dev belongs, or NULL if it doesn't 495 * exist. 496 */ 497 struct em_perf_domain *em_pd_get(struct device *dev) 498 { 499 if (IS_ERR_OR_NULL(dev)) 500 return NULL; 501 502 return dev->em_pd; 503 } 504 EXPORT_SYMBOL_GPL(em_pd_get); 505 506 /** 507 * em_cpu_get() - Return the performance domain for a CPU 508 * @cpu : CPU to find the performance domain for 509 * 510 * Returns the performance domain to which @cpu belongs, or NULL if it doesn't 511 * exist. 512 */ 513 struct em_perf_domain *em_cpu_get(int cpu) 514 { 515 struct device *cpu_dev; 516 517 cpu_dev = get_cpu_device(cpu); 518 if (!cpu_dev) 519 return NULL; 520 521 return em_pd_get(cpu_dev); 522 } 523 EXPORT_SYMBOL_GPL(em_cpu_get); 524 525 /** 526 * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device 527 * @dev : Device for which the EM is to register 528 * @nr_states : Number of performance states to register 529 * @cb : Callback functions providing the data of the Energy Model 530 * @cpus : Pointer to cpumask_t, which in case of a CPU device is 531 * obligatory. It can be taken from i.e. 'policy->cpus'. For other 532 * type of devices this should be set to NULL. 533 * @microwatts : Flag indicating that the power values are in micro-Watts or 534 * in some other scale. It must be set properly. 535 * 536 * Create Energy Model tables for a performance domain using the callbacks 537 * defined in cb. 538 * 539 * The @microwatts is important to set with correct value. Some kernel 540 * sub-systems might rely on this flag and check if all devices in the EM are 541 * using the same scale. 542 * 543 * If multiple clients register the same performance domain, all but the first 544 * registration will be ignored. 545 * 546 * Return 0 on success 547 */ 548 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, 549 const struct em_data_callback *cb, 550 const cpumask_t *cpus, bool microwatts) 551 { 552 struct em_perf_table *em_table; 553 unsigned long cap, prev_cap = 0; 554 unsigned long flags = 0; 555 int cpu, ret; 556 557 if (!dev || !nr_states || !cb) 558 return -EINVAL; 559 560 /* 561 * Use a mutex to serialize the registration of performance domains and 562 * let the driver-defined callback functions sleep. 563 */ 564 mutex_lock(&em_pd_mutex); 565 566 if (dev->em_pd) { 567 ret = -EEXIST; 568 goto unlock; 569 } 570 571 if (_is_cpu_device(dev)) { 572 if (!cpus) { 573 dev_err(dev, "EM: invalid CPU mask\n"); 574 ret = -EINVAL; 575 goto unlock; 576 } 577 578 for_each_cpu(cpu, cpus) { 579 if (em_cpu_get(cpu)) { 580 dev_err(dev, "EM: exists for CPU%d\n", cpu); 581 ret = -EEXIST; 582 goto unlock; 583 } 584 /* 585 * All CPUs of a domain must have the same 586 * micro-architecture since they all share the same 587 * table. 588 */ 589 cap = arch_scale_cpu_capacity(cpu); 590 if (prev_cap && prev_cap != cap) { 591 dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n", 592 cpumask_pr_args(cpus)); 593 594 ret = -EINVAL; 595 goto unlock; 596 } 597 prev_cap = cap; 598 } 599 } 600 601 if (microwatts) 602 flags |= EM_PERF_DOMAIN_MICROWATTS; 603 else if (cb->get_cost) 604 flags |= EM_PERF_DOMAIN_ARTIFICIAL; 605 606 /* 607 * EM only supports uW (exception is artificial EM). 608 * Therefore, check and force the drivers to provide 609 * power in uW. 610 */ 611 if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) { 612 dev_err(dev, "EM: only supports uW power values\n"); 613 ret = -EINVAL; 614 goto unlock; 615 } 616 617 ret = em_create_pd(dev, nr_states, cb, cpus, flags); 618 if (ret) 619 goto unlock; 620 621 dev->em_pd->flags |= flags; 622 dev->em_pd->min_perf_state = 0; 623 dev->em_pd->max_perf_state = nr_states - 1; 624 625 em_table = rcu_dereference_protected(dev->em_pd->em_table, 626 lockdep_is_held(&em_pd_mutex)); 627 em_cpufreq_update_efficiencies(dev, em_table->state); 628 629 em_debug_create_pd(dev); 630 dev_info(dev, "EM: created perf domain\n"); 631 632 unlock: 633 mutex_unlock(&em_pd_mutex); 634 635 if (_is_cpu_device(dev)) 636 em_check_capacity_update(); 637 638 return ret; 639 } 640 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); 641 642 /** 643 * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device 644 * @dev : Device for which the EM is registered 645 * 646 * Unregister the EM for the specified @dev (but not a CPU device). 647 */ 648 void em_dev_unregister_perf_domain(struct device *dev) 649 { 650 if (IS_ERR_OR_NULL(dev) || !dev->em_pd) 651 return; 652 653 if (_is_cpu_device(dev)) 654 return; 655 656 /* 657 * The mutex separates all register/unregister requests and protects 658 * from potential clean-up/setup issues in the debugfs directories. 659 * The debugfs directory name is the same as device's name. 660 */ 661 mutex_lock(&em_pd_mutex); 662 em_debug_remove_pd(dev); 663 664 em_table_free(rcu_dereference_protected(dev->em_pd->em_table, 665 lockdep_is_held(&em_pd_mutex))); 666 667 kfree(dev->em_pd); 668 dev->em_pd = NULL; 669 mutex_unlock(&em_pd_mutex); 670 } 671 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); 672 673 static struct em_perf_table *em_table_dup(struct em_perf_domain *pd) 674 { 675 struct em_perf_table *em_table; 676 struct em_perf_state *ps, *new_ps; 677 int ps_size; 678 679 em_table = em_table_alloc(pd); 680 if (!em_table) 681 return NULL; 682 683 new_ps = em_table->state; 684 685 rcu_read_lock(); 686 ps = em_perf_state_from_pd(pd); 687 /* Initialize data based on old table */ 688 ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states; 689 memcpy(new_ps, ps, ps_size); 690 691 rcu_read_unlock(); 692 693 return em_table; 694 } 695 696 static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, 697 struct em_perf_table *em_table) 698 { 699 int ret; 700 701 ret = em_compute_costs(dev, em_table->state, NULL, pd->nr_perf_states, 702 pd->flags); 703 if (ret) 704 goto free_em_table; 705 706 ret = em_dev_update_perf_domain(dev, em_table); 707 if (ret) 708 goto free_em_table; 709 710 /* 711 * This is one-time-update, so give up the ownership in this updater. 712 * The EM framework has incremented the usage counter and from now 713 * will keep the reference (then free the memory when needed). 714 */ 715 free_em_table: 716 em_table_free(em_table); 717 return ret; 718 } 719 720 /* 721 * Adjustment of CPU performance values after boot, when all CPUs capacites 722 * are correctly calculated. 723 */ 724 static void em_adjust_new_capacity(struct device *dev, 725 struct em_perf_domain *pd) 726 { 727 struct em_perf_table *em_table; 728 729 em_table = em_table_dup(pd); 730 if (!em_table) { 731 dev_warn(dev, "EM: allocation failed\n"); 732 return; 733 } 734 735 em_init_performance(dev, pd, em_table->state, pd->nr_perf_states); 736 737 em_recalc_and_update(dev, pd, em_table); 738 } 739 740 static void em_check_capacity_update(void) 741 { 742 cpumask_var_t cpu_done_mask; 743 struct em_perf_state *table; 744 struct em_perf_domain *pd; 745 unsigned long cpu_capacity; 746 int cpu; 747 748 if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) { 749 pr_warn("no free memory\n"); 750 return; 751 } 752 753 /* Check if CPUs capacity has changed than update EM */ 754 for_each_possible_cpu(cpu) { 755 struct cpufreq_policy *policy; 756 unsigned long em_max_perf; 757 struct device *dev; 758 759 if (cpumask_test_cpu(cpu, cpu_done_mask)) 760 continue; 761 762 policy = cpufreq_cpu_get(cpu); 763 if (!policy) { 764 pr_debug("Accessing cpu%d policy failed\n", cpu); 765 schedule_delayed_work(&em_update_work, 766 msecs_to_jiffies(1000)); 767 break; 768 } 769 cpufreq_cpu_put(policy); 770 771 dev = get_cpu_device(cpu); 772 pd = em_pd_get(dev); 773 if (!pd || em_is_artificial(pd)) 774 continue; 775 776 cpumask_or(cpu_done_mask, cpu_done_mask, 777 em_span_cpus(pd)); 778 779 cpu_capacity = arch_scale_cpu_capacity(cpu); 780 781 rcu_read_lock(); 782 table = em_perf_state_from_pd(pd); 783 em_max_perf = table[pd->nr_perf_states - 1].performance; 784 rcu_read_unlock(); 785 786 /* 787 * Check if the CPU capacity has been adjusted during boot 788 * and trigger the update for new performance values. 789 */ 790 if (em_max_perf == cpu_capacity) 791 continue; 792 793 pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", 794 cpu, cpu_capacity, em_max_perf); 795 796 em_adjust_new_capacity(dev, pd); 797 } 798 799 free_cpumask_var(cpu_done_mask); 800 } 801 802 static void em_update_workfn(struct work_struct *work) 803 { 804 em_check_capacity_update(); 805 } 806 807 /** 808 * em_dev_update_chip_binning() - Update Energy Model after the new voltage 809 * information is present in the OPPs. 810 * @dev : Device for which the Energy Model has to be updated. 811 * 812 * This function allows to update easily the EM with new values available in 813 * the OPP framework and DT. It can be used after the chip has been properly 814 * verified by device drivers and the voltages adjusted for the 'chip binning'. 815 */ 816 int em_dev_update_chip_binning(struct device *dev) 817 { 818 struct em_perf_table *em_table; 819 struct em_perf_domain *pd; 820 int i, ret; 821 822 if (IS_ERR_OR_NULL(dev)) 823 return -EINVAL; 824 825 pd = em_pd_get(dev); 826 if (!pd) { 827 dev_warn(dev, "Couldn't find Energy Model\n"); 828 return -EINVAL; 829 } 830 831 em_table = em_table_dup(pd); 832 if (!em_table) { 833 dev_warn(dev, "EM: allocation failed\n"); 834 return -ENOMEM; 835 } 836 837 /* Update power values which might change due to new voltage in OPPs */ 838 for (i = 0; i < pd->nr_perf_states; i++) { 839 unsigned long freq = em_table->state[i].frequency; 840 unsigned long power; 841 842 ret = dev_pm_opp_calc_power(dev, &power, &freq); 843 if (ret) { 844 em_table_free(em_table); 845 return ret; 846 } 847 848 em_table->state[i].power = power; 849 } 850 851 return em_recalc_and_update(dev, pd, em_table); 852 } 853 EXPORT_SYMBOL_GPL(em_dev_update_chip_binning); 854 855 856 /** 857 * em_update_performance_limits() - Update Energy Model with performance 858 * limits information. 859 * @pd : Performance Domain with EM that has to be updated. 860 * @freq_min_khz : New minimum allowed frequency for this device. 861 * @freq_max_khz : New maximum allowed frequency for this device. 862 * 863 * This function allows to update the EM with information about available 864 * performance levels. It takes the minimum and maximum frequency in kHz 865 * and does internal translation to performance levels. 866 * Returns 0 on success or -EINVAL when failed. 867 */ 868 int em_update_performance_limits(struct em_perf_domain *pd, 869 unsigned long freq_min_khz, unsigned long freq_max_khz) 870 { 871 struct em_perf_state *table; 872 int min_ps = -1; 873 int max_ps = -1; 874 int i; 875 876 if (!pd) 877 return -EINVAL; 878 879 rcu_read_lock(); 880 table = em_perf_state_from_pd(pd); 881 882 for (i = 0; i < pd->nr_perf_states; i++) { 883 if (freq_min_khz == table[i].frequency) 884 min_ps = i; 885 if (freq_max_khz == table[i].frequency) 886 max_ps = i; 887 } 888 rcu_read_unlock(); 889 890 /* Only update when both are found and sane */ 891 if (min_ps < 0 || max_ps < 0 || max_ps < min_ps) 892 return -EINVAL; 893 894 895 /* Guard simultaneous updates and make them atomic */ 896 mutex_lock(&em_pd_mutex); 897 pd->min_perf_state = min_ps; 898 pd->max_perf_state = max_ps; 899 mutex_unlock(&em_pd_mutex); 900 901 return 0; 902 } 903 EXPORT_SYMBOL_GPL(em_update_performance_limits); 904 905 static void rebuild_sd_workfn(struct work_struct *work) 906 { 907 rebuild_sched_domains_energy(); 908 } 909 910 void em_rebuild_sched_domains(void) 911 { 912 static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); 913 914 /* 915 * When called from the cpufreq_register_driver() path, the 916 * cpu_hotplug_lock is already held, so use a work item to 917 * avoid nested locking in rebuild_sched_domains(). 918 */ 919 schedule_work(&rebuild_sd_work); 920 } 921