1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/syscore_ops.h>
3 #include <linux/suspend.h>
4 #include <linux/cpu.h>
5
6 #include <asm/msr.h>
7 #include <asm/mwait.h>
8
9 #define UMWAIT_C02_ENABLE 0
10
11 #define UMWAIT_CTRL_VAL(max_time, c02_disable) \
12 (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \
13 ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
14
15 /*
16 * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
17 * umwait max time is 100000 in TSC-quanta and C0.2 is enabled
18 */
19 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
20
21 /*
22 * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
23 * hardware or BIOS before kernel boot.
24 */
25 static u32 orig_umwait_control_cached __ro_after_init;
26
27 /*
28 * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
29 * the sysfs write functions.
30 */
31 static DEFINE_MUTEX(umwait_lock);
32
umwait_update_control_msr(void * unused)33 static void umwait_update_control_msr(void * unused)
34 {
35 lockdep_assert_irqs_disabled();
36 wrmsrq(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached));
37 }
38
39 /*
40 * The CPU hotplug callback sets the control MSR to the global control
41 * value.
42 *
43 * Disable interrupts so the read of umwait_control_cached and the WRMSR
44 * are protected against a concurrent sysfs write. Otherwise the sysfs
45 * write could update the cached value after it had been read on this CPU
46 * and issue the IPI before the old value had been written. The IPI would
47 * interrupt, write the new value and after return from IPI the previous
48 * value would be written by this CPU.
49 *
50 * With interrupts disabled the upcoming CPU either sees the new control
51 * value or the IPI is updating this CPU to the new control value after
52 * interrupts have been reenabled.
53 */
umwait_cpu_online(unsigned int cpu)54 static int umwait_cpu_online(unsigned int cpu)
55 {
56 local_irq_disable();
57 umwait_update_control_msr(NULL);
58 local_irq_enable();
59 return 0;
60 }
61
62 /*
63 * The CPU hotplug callback sets the control MSR to the original control
64 * value.
65 */
umwait_cpu_offline(unsigned int cpu)66 static int umwait_cpu_offline(unsigned int cpu)
67 {
68 /*
69 * This code is protected by the CPU hotplug already and
70 * orig_umwait_control_cached is never changed after it caches
71 * the original control MSR value in umwait_init(). So there
72 * is no race condition here.
73 */
74 wrmsrq(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
75
76 return 0;
77 }
78
79 /*
80 * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
81 * is the only active CPU at this time. The MSR is set up on the APs via the
82 * CPU hotplug callback.
83 *
84 * This function is invoked on resume from suspend and hibernation. On
85 * resume from suspend the restore should be not required, but we neither
86 * trust the firmware nor does it matter if the same value is written
87 * again.
88 */
umwait_syscore_resume(void * data)89 static void umwait_syscore_resume(void *data)
90 {
91 umwait_update_control_msr(NULL);
92 }
93
94 static const struct syscore_ops umwait_syscore_ops = {
95 .resume = umwait_syscore_resume,
96 };
97
98 static struct syscore umwait_syscore = {
99 .ops = &umwait_syscore_ops,
100 };
101
102 /* sysfs interface */
103
104 /*
105 * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
106 * Otherwise, C0.2 is enabled.
107 */
umwait_ctrl_c02_enabled(u32 ctrl)108 static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
109 {
110 return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
111 }
112
umwait_ctrl_max_time(u32 ctrl)113 static inline u32 umwait_ctrl_max_time(u32 ctrl)
114 {
115 return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
116 }
117
umwait_update_control(u32 maxtime,bool c02_enable)118 static inline void umwait_update_control(u32 maxtime, bool c02_enable)
119 {
120 u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
121
122 if (!c02_enable)
123 ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
124
125 WRITE_ONCE(umwait_control_cached, ctrl);
126 /* Propagate to all CPUs */
127 on_each_cpu(umwait_update_control_msr, NULL, 1);
128 }
129
130 static ssize_t
enable_c02_show(struct device * dev,struct device_attribute * attr,char * buf)131 enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
132 {
133 u32 ctrl = READ_ONCE(umwait_control_cached);
134
135 return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
136 }
137
enable_c02_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)138 static ssize_t enable_c02_store(struct device *dev,
139 struct device_attribute *attr,
140 const char *buf, size_t count)
141 {
142 bool c02_enable;
143 u32 ctrl;
144 int ret;
145
146 ret = kstrtobool(buf, &c02_enable);
147 if (ret)
148 return ret;
149
150 mutex_lock(&umwait_lock);
151
152 ctrl = READ_ONCE(umwait_control_cached);
153 if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
154 umwait_update_control(ctrl, c02_enable);
155
156 mutex_unlock(&umwait_lock);
157
158 return count;
159 }
160 static DEVICE_ATTR_RW(enable_c02);
161
162 static ssize_t
max_time_show(struct device * kobj,struct device_attribute * attr,char * buf)163 max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
164 {
165 u32 ctrl = READ_ONCE(umwait_control_cached);
166
167 return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
168 }
169
max_time_store(struct device * kobj,struct device_attribute * attr,const char * buf,size_t count)170 static ssize_t max_time_store(struct device *kobj,
171 struct device_attribute *attr,
172 const char *buf, size_t count)
173 {
174 u32 max_time, ctrl;
175 int ret;
176
177 ret = kstrtou32(buf, 0, &max_time);
178 if (ret)
179 return ret;
180
181 /* bits[1:0] must be zero */
182 if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
183 return -EINVAL;
184
185 mutex_lock(&umwait_lock);
186
187 ctrl = READ_ONCE(umwait_control_cached);
188 if (max_time != umwait_ctrl_max_time(ctrl))
189 umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
190
191 mutex_unlock(&umwait_lock);
192
193 return count;
194 }
195 static DEVICE_ATTR_RW(max_time);
196
197 static struct attribute *umwait_attrs[] = {
198 &dev_attr_enable_c02.attr,
199 &dev_attr_max_time.attr,
200 NULL
201 };
202
203 static struct attribute_group umwait_attr_group = {
204 .attrs = umwait_attrs,
205 .name = "umwait_control",
206 };
207
umwait_init(void)208 static int __init umwait_init(void)
209 {
210 struct device *dev;
211 int ret;
212
213 if (!boot_cpu_has(X86_FEATURE_WAITPKG))
214 return -ENODEV;
215
216 /*
217 * Cache the original control MSR value before the control MSR is
218 * changed. This is the only place where orig_umwait_control_cached
219 * is modified.
220 */
221 rdmsrq(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
222
223 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
224 umwait_cpu_online, umwait_cpu_offline);
225 if (ret < 0) {
226 /*
227 * On failure, the control MSR on all CPUs has the
228 * original control value.
229 */
230 return ret;
231 }
232
233 register_syscore(&umwait_syscore);
234
235 /*
236 * Add umwait control interface. Ignore failure, so at least the
237 * default values are set up in case the machine manages to boot.
238 */
239 dev = bus_get_dev_root(&cpu_subsys);
240 if (dev) {
241 ret = sysfs_create_group(&dev->kobj, &umwait_attr_group);
242 put_device(dev);
243 }
244 return ret;
245 }
246 device_initcall(umwait_init);
247