1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Intel Performance and Energy Bias Hint support. 4 * 5 * Copyright (C) 2019 Intel Corporation 6 * 7 * Author: 8 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 9 */ 10 11 #include <linux/cpuhotplug.h> 12 #include <linux/cpu.h> 13 #include <linux/device.h> 14 #include <linux/kernel.h> 15 #include <linux/string.h> 16 #include <linux/syscore_ops.h> 17 #include <linux/pm.h> 18 19 #include <asm/cpu_device_id.h> 20 #include <asm/cpufeature.h> 21 #include <asm/msr.h> 22 23 /** 24 * DOC: overview 25 * 26 * The Performance and Energy Bias Hint (EPB) allows software to specify its 27 * preference with respect to the power-performance tradeoffs present in the 28 * processor. Generally, the EPB is expected to be set by user space (directly 29 * via sysfs or with the help of the x86_energy_perf_policy tool), but there are 30 * two reasons for the kernel to update it. 31 * 32 * First, there are systems where the platform firmware resets the EPB during 33 * system-wide transitions from sleep states back into the working state 34 * effectively causing the previous EPB updates by user space to be lost. 35 * Thus the kernel needs to save the current EPB values for all CPUs during 36 * system-wide transitions to sleep states and restore them on the way back to 37 * the working state. That can be achieved by saving EPB for secondary CPUs 38 * when they are taken offline during transitions into system sleep states and 39 * for the boot CPU in a syscore suspend operation, so that it can be restored 40 * for the boot CPU in a syscore resume operation and for the other CPUs when 41 * they are brought back online. However, CPUs that are already offline when 42 * a system-wide PM transition is started are not taken offline again, but their 43 * EPB values may still be reset by the platform firmware during the transition, 44 * so in fact it is necessary to save the EPB of any CPU taken offline and to 45 * restore it when the given CPU goes back online at all times. 46 * 47 * Second, on many systems the initial EPB value coming from the platform 48 * firmware is 0 ('performance') and at least on some of them that is because 49 * the platform firmware does not initialize EPB at all with the assumption that 50 * the OS will do that anyway. That sometimes is problematic, as it may cause 51 * the system battery to drain too fast, for example, so it is better to adjust 52 * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the 53 * kernel changes it to 6 ('normal'). 54 */ 55 56 static DEFINE_PER_CPU(u8, saved_epb); 57 58 #define EPB_MASK 0x0fULL 59 #define EPB_SAVED 0x10ULL 60 #define MAX_EPB EPB_MASK 61 62 enum energy_perf_value_index { 63 EPB_INDEX_PERFORMANCE, 64 EPB_INDEX_BALANCE_PERFORMANCE, 65 EPB_INDEX_NORMAL, 66 EPB_INDEX_BALANCE_POWERSAVE, 67 EPB_INDEX_POWERSAVE, 68 }; 69 70 static u8 energ_perf_values[] = { 71 [EPB_INDEX_PERFORMANCE] = ENERGY_PERF_BIAS_PERFORMANCE, 72 [EPB_INDEX_BALANCE_PERFORMANCE] = ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, 73 [EPB_INDEX_NORMAL] = ENERGY_PERF_BIAS_NORMAL, 74 [EPB_INDEX_BALANCE_POWERSAVE] = ENERGY_PERF_BIAS_BALANCE_POWERSAVE, 75 [EPB_INDEX_POWERSAVE] = ENERGY_PERF_BIAS_POWERSAVE, 76 }; 77 78 static int intel_epb_save(void *data) 79 { 80 u64 epb; 81 82 rdmsrq(MSR_IA32_ENERGY_PERF_BIAS, epb); 83 /* 84 * Ensure that saved_epb will always be nonzero after this write even if 85 * the EPB value read from the MSR is 0. 86 */ 87 this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); 88 89 return 0; 90 } 91 92 static void intel_epb_restore(void *data) 93 { 94 u64 val = this_cpu_read(saved_epb); 95 u64 epb; 96 97 rdmsrq(MSR_IA32_ENERGY_PERF_BIAS, epb); 98 if (val) { 99 val &= EPB_MASK; 100 } else { 101 /* 102 * Because intel_epb_save() has not run for the current CPU yet, 103 * it is going online for the first time, so if its EPB value is 104 * 0 ('performance') at this point, assume that it has not been 105 * initialized by the platform firmware and set it to 6 106 * ('normal'). 107 */ 108 val = epb & EPB_MASK; 109 if (val == ENERGY_PERF_BIAS_PERFORMANCE) { 110 val = energ_perf_values[EPB_INDEX_NORMAL]; 111 pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); 112 } 113 } 114 wrmsrq(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); 115 } 116 117 static const struct syscore_ops intel_epb_syscore_ops = { 118 .suspend = intel_epb_save, 119 .resume = intel_epb_restore, 120 }; 121 122 static struct syscore intel_epb_syscore = { 123 .ops = &intel_epb_syscore_ops, 124 }; 125 126 static const char * const energy_perf_strings[] = { 127 [EPB_INDEX_PERFORMANCE] = "performance", 128 [EPB_INDEX_BALANCE_PERFORMANCE] = "balance-performance", 129 [EPB_INDEX_NORMAL] = "normal", 130 [EPB_INDEX_BALANCE_POWERSAVE] = "balance-power", 131 [EPB_INDEX_POWERSAVE] = "power", 132 }; 133 134 static ssize_t energy_perf_bias_show(struct device *dev, 135 struct device_attribute *attr, 136 char *buf) 137 { 138 unsigned int cpu = dev->id; 139 u64 epb; 140 int ret; 141 142 ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); 143 if (ret < 0) 144 return ret; 145 146 return sprintf(buf, "%llu\n", epb); 147 } 148 149 static ssize_t energy_perf_bias_store(struct device *dev, 150 struct device_attribute *attr, 151 const char *buf, size_t count) 152 { 153 unsigned int cpu = dev->id; 154 u64 epb, val; 155 int ret; 156 157 ret = __sysfs_match_string(energy_perf_strings, 158 ARRAY_SIZE(energy_perf_strings), buf); 159 if (ret >= 0) 160 val = energ_perf_values[ret]; 161 else if (kstrtou64(buf, 0, &val) || val > MAX_EPB) 162 return -EINVAL; 163 164 ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); 165 if (ret < 0) 166 return ret; 167 168 ret = wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, 169 (epb & ~EPB_MASK) | val); 170 if (ret < 0) 171 return ret; 172 173 return count; 174 } 175 176 static DEVICE_ATTR_RW(energy_perf_bias); 177 178 static struct attribute *intel_epb_attrs[] = { 179 &dev_attr_energy_perf_bias.attr, 180 NULL 181 }; 182 183 static const struct attribute_group intel_epb_attr_group = { 184 .name = power_group_name, 185 .attrs = intel_epb_attrs 186 }; 187 188 static int intel_epb_online(unsigned int cpu) 189 { 190 struct device *cpu_dev = get_cpu_device(cpu); 191 192 intel_epb_restore(NULL); 193 if (!cpuhp_tasks_frozen) 194 sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group); 195 196 return 0; 197 } 198 199 static int intel_epb_offline(unsigned int cpu) 200 { 201 struct device *cpu_dev = get_cpu_device(cpu); 202 203 if (!cpuhp_tasks_frozen) 204 sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group); 205 206 intel_epb_save(NULL); 207 return 0; 208 } 209 210 static const struct x86_cpu_id intel_epb_normal[] = { 211 X86_MATCH_VFM(INTEL_ALDERLAKE_L, 212 ENERGY_PERF_BIAS_NORMAL_POWERSAVE), 213 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 214 ENERGY_PERF_BIAS_NORMAL_POWERSAVE), 215 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 216 ENERGY_PERF_BIAS_NORMAL_POWERSAVE), 217 {} 218 }; 219 220 static __init int intel_epb_init(void) 221 { 222 const struct x86_cpu_id *id = x86_match_cpu(intel_epb_normal); 223 int ret; 224 225 if (!boot_cpu_has(X86_FEATURE_EPB)) 226 return -ENODEV; 227 228 if (id) 229 energ_perf_values[EPB_INDEX_NORMAL] = id->driver_data; 230 231 ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE, 232 "x86/intel/epb:online", intel_epb_online, 233 intel_epb_offline); 234 if (ret < 0) 235 goto err_out_online; 236 237 register_syscore(&intel_epb_syscore); 238 return 0; 239 240 err_out_online: 241 cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); 242 return ret; 243 } 244 late_initcall(intel_epb_init); 245