xref: /linux/arch/x86/kernel/cpu/bus_lock.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define pr_fmt(fmt) "x86/split lock detection: " fmt
4 
5 #include <linux/semaphore.h>
6 #include <linux/workqueue.h>
7 #include <linux/delay.h>
8 #include <linux/cpuhotplug.h>
9 #include <linux/kvm_types.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/cmdline.h>
12 #include <asm/traps.h>
13 #include <asm/cpu.h>
14 #include <asm/msr.h>
15 
16 enum split_lock_detect_state {
17 	sld_off = 0,
18 	sld_warn,
19 	sld_fatal,
20 	sld_ratelimit,
21 };
22 
23 /*
24  * Default to sld_off because most systems do not support split lock detection.
25  * sld_state_setup() will switch this to sld_warn on systems that support
26  * split lock/bus lock detect, unless there is a command line override.
27  */
28 static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
29 static u64 msr_test_ctrl_cache __ro_after_init;
30 
31 /*
32  * With a name like MSR_TEST_CTL it should go without saying, but don't touch
33  * MSR_TEST_CTL unless the CPU is one of the whitelisted models.  Writing it
34  * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
35  */
36 static bool cpu_model_supports_sld __ro_after_init;
37 
38 static const struct {
39 	const char			*option;
40 	enum split_lock_detect_state	state;
41 } sld_options[] __initconst = {
42 	{ "off",	sld_off   },
43 	{ "warn",	sld_warn  },
44 	{ "fatal",	sld_fatal },
45 	{ "ratelimit:", sld_ratelimit },
46 };
47 
48 static struct ratelimit_state bld_ratelimit;
49 
50 static unsigned int sysctl_sld_mitigate = 1;
51 static DEFINE_SEMAPHORE(buslock_sem, 1);
52 
53 #ifdef CONFIG_PROC_SYSCTL
54 static const struct ctl_table sld_sysctls[] = {
55 	{
56 		.procname       = "split_lock_mitigate",
57 		.data           = &sysctl_sld_mitigate,
58 		.maxlen         = sizeof(unsigned int),
59 		.mode           = 0644,
60 		.proc_handler	= proc_douintvec_minmax,
61 		.extra1         = SYSCTL_ZERO,
62 		.extra2         = SYSCTL_ONE,
63 	},
64 };
65 
66 static int __init sld_mitigate_sysctl_init(void)
67 {
68 	register_sysctl_init("kernel", sld_sysctls);
69 	return 0;
70 }
71 
72 late_initcall(sld_mitigate_sysctl_init);
73 #endif
74 
75 static inline bool match_option(const char *arg, int arglen, const char *opt)
76 {
77 	int len = strlen(opt), ratelimit;
78 
79 	if (strncmp(arg, opt, len))
80 		return false;
81 
82 	/*
83 	 * Min ratelimit is 1 bus lock/sec.
84 	 * Max ratelimit is 1000 bus locks/sec.
85 	 */
86 	if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
87 	    ratelimit > 0 && ratelimit <= 1000) {
88 		ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
89 		ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
90 		return true;
91 	}
92 
93 	return len == arglen;
94 }
95 
96 static bool split_lock_verify_msr(bool on)
97 {
98 	u64 ctrl, tmp;
99 
100 	if (rdmsrq_safe(MSR_TEST_CTRL, &ctrl))
101 		return false;
102 	if (on)
103 		ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
104 	else
105 		ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
106 	if (wrmsrq_safe(MSR_TEST_CTRL, ctrl))
107 		return false;
108 	rdmsrq(MSR_TEST_CTRL, tmp);
109 	return ctrl == tmp;
110 }
111 
112 static void __init sld_state_setup(void)
113 {
114 	enum split_lock_detect_state state = sld_warn;
115 	char arg[20];
116 	int i, ret;
117 
118 	if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
119 	    !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
120 		return;
121 
122 	ret = cmdline_find_option(boot_command_line, "split_lock_detect",
123 				  arg, sizeof(arg));
124 	if (ret >= 0) {
125 		for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
126 			if (match_option(arg, ret, sld_options[i].option)) {
127 				state = sld_options[i].state;
128 				break;
129 			}
130 		}
131 	}
132 	sld_state = state;
133 }
134 
135 static void __init __split_lock_setup(void)
136 {
137 	if (!split_lock_verify_msr(false)) {
138 		pr_info("MSR access failed: Disabled\n");
139 		return;
140 	}
141 
142 	rdmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
143 
144 	if (!split_lock_verify_msr(true)) {
145 		pr_info("MSR access failed: Disabled\n");
146 		return;
147 	}
148 
149 	/* Restore the MSR to its cached value. */
150 	wrmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
151 
152 	setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
153 }
154 
155 /*
156  * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
157  * is not implemented as one thread could undo the setting of the other
158  * thread immediately after dropping the lock anyway.
159  */
160 static void sld_update_msr(bool on)
161 {
162 	u64 test_ctrl_val = msr_test_ctrl_cache;
163 
164 	if (on)
165 		test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
166 
167 	wrmsrq(MSR_TEST_CTRL, test_ctrl_val);
168 }
169 
170 void split_lock_init(void)
171 {
172 	/*
173 	 * #DB for bus lock handles ratelimit and #AC for split lock is
174 	 * disabled.
175 	 */
176 	if (sld_state == sld_ratelimit) {
177 		split_lock_verify_msr(false);
178 		return;
179 	}
180 
181 	if (cpu_model_supports_sld)
182 		split_lock_verify_msr(sld_state != sld_off);
183 }
184 
185 static void __split_lock_reenable_unlock(struct work_struct *work)
186 {
187 	sld_update_msr(true);
188 	up(&buslock_sem);
189 }
190 
191 static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
192 
193 static void __split_lock_reenable(struct work_struct *work)
194 {
195 	sld_update_msr(true);
196 }
197 /*
198  * In order for each CPU to schedule its delayed work independently of the
199  * others, delayed work struct must be per-CPU. This is not required when
200  * sysctl_sld_mitigate is enabled because of the semaphore that limits
201  * the number of simultaneously scheduled delayed works to 1.
202  */
203 static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
204 
205 /*
206  * Per-CPU delayed_work can't be statically initialized properly because
207  * the struct address is unknown. Thus per-CPU delayed_work structures
208  * have to be initialized during kernel initialization and after calling
209  * setup_per_cpu_areas().
210  */
211 static int __init setup_split_lock_delayed_work(void)
212 {
213 	unsigned int cpu;
214 
215 	for_each_possible_cpu(cpu) {
216 		struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu);
217 
218 		INIT_DELAYED_WORK(work, __split_lock_reenable);
219 	}
220 
221 	return 0;
222 }
223 pure_initcall(setup_split_lock_delayed_work);
224 
225 /*
226  * If a CPU goes offline with pending delayed work to re-enable split lock
227  * detection then the delayed work will be executed on some other CPU. That
228  * handles releasing the buslock_sem, but because it executes on a
229  * different CPU probably won't re-enable split lock detection. This is a
230  * problem on HT systems since the sibling CPU on the same core may then be
231  * left running with split lock detection disabled.
232  *
233  * Unconditionally re-enable detection here.
234  */
235 static int splitlock_cpu_offline(unsigned int cpu)
236 {
237 	sld_update_msr(true);
238 
239 	return 0;
240 }
241 
242 static void split_lock_warn(unsigned long ip)
243 {
244 	struct delayed_work *work;
245 	int cpu;
246 	unsigned int saved_sld_mitigate = READ_ONCE(sysctl_sld_mitigate);
247 
248 	if (!current->reported_split_lock)
249 		pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
250 				    current->comm, current->pid, ip);
251 	current->reported_split_lock = 1;
252 
253 	if (saved_sld_mitigate) {
254 		/*
255 		 * misery factor #1:
256 		 * sleep 10ms before trying to execute split lock.
257 		 */
258 		if (msleep_interruptible(10) > 0)
259 			return;
260 		/*
261 		 * Misery factor #2:
262 		 * only allow one buslocked disabled core at a time.
263 		 */
264 		if (down_interruptible(&buslock_sem) == -EINTR)
265 			return;
266 	}
267 
268 	cpu = get_cpu();
269 	work = saved_sld_mitigate ? &sl_reenable_unlock : per_cpu_ptr(&sl_reenable, cpu);
270 	schedule_delayed_work_on(cpu, work, 2);
271 
272 	/* Disable split lock detection on this CPU to make progress */
273 	sld_update_msr(false);
274 	put_cpu();
275 }
276 
277 bool handle_guest_split_lock(unsigned long ip)
278 {
279 	if (sld_state == sld_warn) {
280 		split_lock_warn(ip);
281 		return true;
282 	}
283 
284 	pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
285 		     current->comm, current->pid,
286 		     sld_state == sld_fatal ? "fatal" : "bogus", ip);
287 
288 	current->thread.error_code = 0;
289 	current->thread.trap_nr = X86_TRAP_AC;
290 	force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
291 	return false;
292 }
293 EXPORT_SYMBOL_FOR_KVM(handle_guest_split_lock);
294 
295 void bus_lock_init(void)
296 {
297 	u64 val;
298 
299 	if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
300 		return;
301 
302 	rdmsrq(MSR_IA32_DEBUGCTLMSR, val);
303 
304 	if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
305 	    (sld_state == sld_warn || sld_state == sld_fatal)) ||
306 	    sld_state == sld_off) {
307 		/*
308 		 * Warn and fatal are handled by #AC for split lock if #AC for
309 		 * split lock is supported.
310 		 */
311 		val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
312 	} else {
313 		val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
314 	}
315 
316 	wrmsrq(MSR_IA32_DEBUGCTLMSR, val);
317 }
318 
319 bool handle_user_split_lock(struct pt_regs *regs, long error_code)
320 {
321 	if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
322 		return false;
323 	split_lock_warn(regs->ip);
324 	return true;
325 }
326 
327 void handle_bus_lock(struct pt_regs *regs)
328 {
329 	switch (sld_state) {
330 	case sld_off:
331 		break;
332 	case sld_ratelimit:
333 		/* Enforce no more than bld_ratelimit bus locks/sec. */
334 		while (!__ratelimit(&bld_ratelimit))
335 			msleep(20);
336 		/* Warn on the bus lock. */
337 		fallthrough;
338 	case sld_warn:
339 		pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
340 				    current->comm, current->pid, regs->ip);
341 		break;
342 	case sld_fatal:
343 		force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
344 		break;
345 	}
346 }
347 
348 /*
349  * CPU models that are known to have the per-core split-lock detection
350  * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
351  */
352 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
353 	X86_MATCH_VFM(INTEL_ICELAKE_X,	0),
354 	X86_MATCH_VFM(INTEL_ICELAKE_L,	0),
355 	X86_MATCH_VFM(INTEL_ICELAKE_D,	0),
356 	{}
357 };
358 
359 static void __init split_lock_setup(struct cpuinfo_x86 *c)
360 {
361 	const struct x86_cpu_id *m;
362 	u64 ia32_core_caps;
363 
364 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
365 		return;
366 
367 	/* Check for CPUs that have support but do not enumerate it: */
368 	m = x86_match_cpu(split_lock_cpu_ids);
369 	if (m)
370 		goto supported;
371 
372 	if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
373 		return;
374 
375 	/*
376 	 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
377 	 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is.  All CPUs that set
378 	 * it have split lock detection.
379 	 */
380 	rdmsrq(MSR_IA32_CORE_CAPS, ia32_core_caps);
381 	if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
382 		goto supported;
383 
384 	/* CPU is not in the model list and does not have the MSR bit: */
385 	return;
386 
387 supported:
388 	cpu_model_supports_sld = true;
389 	__split_lock_setup();
390 }
391 
392 static void sld_state_show(void)
393 {
394 	if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
395 	    !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
396 		return;
397 
398 	switch (sld_state) {
399 	case sld_off:
400 		pr_info("disabled\n");
401 		break;
402 	case sld_warn:
403 		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
404 			pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
405 			if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
406 					      "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
407 				pr_warn("No splitlock CPU offline handler\n");
408 		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
409 			pr_info("#DB: warning on user-space bus_locks\n");
410 		}
411 		break;
412 	case sld_fatal:
413 		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
414 			pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
415 		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
416 			pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
417 				boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
418 				" from non-WB" : "");
419 		}
420 		break;
421 	case sld_ratelimit:
422 		if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
423 			pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
424 		break;
425 	}
426 }
427 
428 void __init sld_setup(struct cpuinfo_x86 *c)
429 {
430 	split_lock_setup(c);
431 	sld_state_setup();
432 	sld_state_show();
433 }
434