xref: /linux/arch/x86/kernel/cpu/microcode/core.c (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
6  *	      2006	Shaohua Li <shaohua.li@intel.com>
7  *	      2013-2016	Borislav Petkov <bp@alien8.de>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
12  *			   H Peter Anvin" <hpa@zytor.com>
13  *		  (C) 2015 Borislav Petkov <bp@alien8.de>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/stop_machine.h>
21 #include <linux/device/faux.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
32 #include <linux/fs.h>
33 #include <linux/mm.h>
34 
35 #include <asm/apic.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/msr.h>
41 #include <asm/setup.h>
42 
43 #include "internal.h"
44 
45 static struct microcode_ops *microcode_ops;
46 static bool dis_ucode_ldr;
47 
48 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
49 
50 /*
51  * Those below should be behind CONFIG_MICROCODE_DBG ifdeffery but in
52  * order to not uglify the code with ifdeffery and use IS_ENABLED()
53  * instead, leave them in. When microcode debugging is not enabled,
54  * those are meaningless anyway.
55  */
56 /* base microcode revision for debugging */
57 u32 base_rev;
58 u32 microcode_rev[NR_CPUS] = {};
59 
60 bool hypervisor_present;
61 
62 /*
63  * Synchronization.
64  *
65  * All non cpu-hotplug-callback call sites use:
66  *
67  * - cpus_read_lock/unlock() to synchronize with
68  *   the cpu-hotplug-callback call sites.
69  *
70  * We guarantee that only a single cpu is being
71  * updated at any particular moment of time.
72  */
73 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
74 
75 /*
76  * Those patch levels cannot be updated to newer ones and thus should be final.
77  */
78 static u32 final_levels[] = {
79 	0x01000098,
80 	0x0100009f,
81 	0x010000af,
82 	0, /* T-101 terminator */
83 };
84 
85 struct early_load_data early_data;
86 
87 /*
88  * Check the current patch level on this CPU.
89  *
90  * Returns:
91  *  - true: if update should stop
92  *  - false: otherwise
93  */
94 static bool amd_check_current_patch_level(void)
95 {
96 	u32 lvl, dummy, i;
97 	u32 *levels;
98 
99 	if (x86_cpuid_vendor() != X86_VENDOR_AMD)
100 		return false;
101 
102 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
103 
104 	levels = final_levels;
105 
106 	for (i = 0; levels[i]; i++) {
107 		if (lvl == levels[i])
108 			return true;
109 	}
110 	return false;
111 }
112 
113 bool __init microcode_loader_disabled(void)
114 {
115 	if (dis_ucode_ldr)
116 		return true;
117 
118 	/*
119 	 * Disable when:
120 	 *
121 	 * 1) The CPU does not support CPUID.
122 	 */
123 	if (!cpuid_feature()) {
124 		dis_ucode_ldr = true;
125 		return dis_ucode_ldr;
126 	}
127 
128 	/*
129 	 * 2) Bit 31 in CPUID[1]:ECX is clear
130 	 *    The bit is reserved for hypervisor use. This is still not
131 	 *    completely accurate as XEN PV guests don't see that CPUID bit
132 	 *    set, but that's good enough as they don't land on the BSP
133 	 *    path anyway.
134 	 *
135 	 * 3) Certain AMD patch levels are not allowed to be
136 	 *    overwritten.
137 	 */
138 	hypervisor_present = native_cpuid_ecx(1) & BIT(31);
139 
140 	if ((hypervisor_present && !IS_ENABLED(CONFIG_MICROCODE_DBG)) ||
141 	    amd_check_current_patch_level())
142 		dis_ucode_ldr = true;
143 
144 	return dis_ucode_ldr;
145 }
146 
147 static void __init early_parse_cmdline(void)
148 {
149 	char cmd_buf[64] = {};
150 	char *s, *p = cmd_buf;
151 
152 	if (cmdline_find_option(boot_command_line, "microcode", cmd_buf, sizeof(cmd_buf)) > 0) {
153 		while ((s = strsep(&p, ","))) {
154 			if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
155 				if (strstr(s, "base_rev=")) {
156 					/* advance to the option arg */
157 					strsep(&s, "=");
158 					if (kstrtouint(s, 16, &base_rev)) { ; }
159 				}
160 			}
161 
162 			if (!strcmp("force_minrev", s))
163 				force_minrev = true;
164 
165 			if (!strcmp(s, "dis_ucode_ldr"))
166 				dis_ucode_ldr = true;
167 		}
168 	}
169 
170 	/* old, compat option */
171 	if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
172 		dis_ucode_ldr = true;
173 }
174 
175 void __init load_ucode_bsp(void)
176 {
177 	unsigned int cpuid_1_eax;
178 	bool intel = true;
179 
180 	early_parse_cmdline();
181 
182 	if (microcode_loader_disabled())
183 		return;
184 
185 	cpuid_1_eax = native_cpuid_eax(1);
186 
187 	switch (x86_cpuid_vendor()) {
188 	case X86_VENDOR_INTEL:
189 		if (x86_family(cpuid_1_eax) < 6)
190 			return;
191 		break;
192 
193 	case X86_VENDOR_AMD:
194 		if (x86_family(cpuid_1_eax) < 0x10)
195 			return;
196 		intel = false;
197 		break;
198 
199 	default:
200 		return;
201 	}
202 
203 	if (intel)
204 		load_ucode_intel_bsp(&early_data);
205 	else
206 		load_ucode_amd_bsp(&early_data, cpuid_1_eax);
207 }
208 
209 void load_ucode_ap(void)
210 {
211 	unsigned int cpuid_1_eax;
212 
213 	/*
214 	 * Can't use microcode_loader_disabled() here - .init section
215 	 * hell. It doesn't have to either - the BSP variant must've
216 	 * parsed cmdline already anyway.
217 	 */
218 	if (dis_ucode_ldr)
219 		return;
220 
221 	cpuid_1_eax = native_cpuid_eax(1);
222 
223 	switch (x86_cpuid_vendor()) {
224 	case X86_VENDOR_INTEL:
225 		if (x86_family(cpuid_1_eax) >= 6)
226 			load_ucode_intel_ap();
227 		break;
228 	case X86_VENDOR_AMD:
229 		if (x86_family(cpuid_1_eax) >= 0x10)
230 			load_ucode_amd_ap(cpuid_1_eax);
231 		break;
232 	default:
233 		break;
234 	}
235 }
236 
237 struct cpio_data __init find_microcode_in_initrd(const char *path)
238 {
239 #ifdef CONFIG_BLK_DEV_INITRD
240 	unsigned long start = 0;
241 	size_t size;
242 
243 #ifdef CONFIG_X86_32
244 	size = boot_params.hdr.ramdisk_size;
245 	/* Early load on BSP has a temporary mapping. */
246 	if (size)
247 		start = initrd_start_early;
248 
249 #else /* CONFIG_X86_64 */
250 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
251 	size |= boot_params.hdr.ramdisk_size;
252 
253 	if (size) {
254 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
255 		start |= boot_params.hdr.ramdisk_image;
256 		start += PAGE_OFFSET;
257 	}
258 #endif
259 
260 	/*
261 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
262 	 * has the virtual address of the beginning of the initrd. It also
263 	 * possibly relocates the ramdisk. In either case, initrd_start contains
264 	 * the updated address so use that instead.
265 	 */
266 	if (initrd_start)
267 		start = initrd_start;
268 
269 	return find_cpio_data(path, (void *)start, size, NULL);
270 #else /* !CONFIG_BLK_DEV_INITRD */
271 	return (struct cpio_data){ NULL, 0, "" };
272 #endif
273 }
274 
275 static void reload_early_microcode(unsigned int cpu)
276 {
277 	int vendor, family;
278 
279 	vendor = x86_cpuid_vendor();
280 	family = x86_cpuid_family();
281 
282 	switch (vendor) {
283 	case X86_VENDOR_INTEL:
284 		if (family >= 6)
285 			reload_ucode_intel();
286 		break;
287 	case X86_VENDOR_AMD:
288 		if (family >= 0x10)
289 			reload_ucode_amd(cpu);
290 		break;
291 	default:
292 		break;
293 	}
294 }
295 
296 /* fake device for request_firmware */
297 static struct faux_device *microcode_fdev;
298 
299 #ifdef CONFIG_MICROCODE_LATE_LOADING
300 /*
301  * Late loading dance. Why the heavy-handed stomp_machine effort?
302  *
303  * - HT siblings must be idle and not execute other code while the other sibling
304  *   is loading microcode in order to avoid any negative interactions caused by
305  *   the loading.
306  *
307  * - In addition, microcode update on the cores must be serialized until this
308  *   requirement can be relaxed in the future. Right now, this is conservative
309  *   and good.
310  */
311 enum sibling_ctrl {
312 	/* Spinwait with timeout */
313 	SCTRL_WAIT,
314 	/* Invoke the microcode_apply() callback */
315 	SCTRL_APPLY,
316 	/* Proceed without invoking the microcode_apply() callback */
317 	SCTRL_DONE,
318 };
319 
320 struct microcode_ctrl {
321 	enum sibling_ctrl	ctrl;
322 	enum ucode_state	result;
323 	unsigned int		ctrl_cpu;
324 	bool			nmi_enabled;
325 };
326 
327 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
328 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
329 static atomic_t late_cpus_in, offline_in_nmi;
330 static unsigned int loops_per_usec;
331 static cpumask_t cpu_offline_mask;
332 
333 static noinstr bool wait_for_cpus(atomic_t *cnt)
334 {
335 	unsigned int timeout, loops;
336 
337 	WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
338 
339 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
340 		if (!raw_atomic_read(cnt))
341 			return true;
342 
343 		for (loops = 0; loops < loops_per_usec; loops++)
344 			cpu_relax();
345 
346 		/* If invoked directly, tickle the NMI watchdog */
347 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
348 			instrumentation_begin();
349 			touch_nmi_watchdog();
350 			instrumentation_end();
351 		}
352 	}
353 	/* Prevent the late comers from making progress and let them time out */
354 	raw_atomic_inc(cnt);
355 	return false;
356 }
357 
358 static noinstr bool wait_for_ctrl(void)
359 {
360 	unsigned int timeout, loops;
361 
362 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
363 		if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
364 			return true;
365 
366 		for (loops = 0; loops < loops_per_usec; loops++)
367 			cpu_relax();
368 
369 		/* If invoked directly, tickle the NMI watchdog */
370 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
371 			instrumentation_begin();
372 			touch_nmi_watchdog();
373 			instrumentation_end();
374 		}
375 	}
376 	return false;
377 }
378 
379 /*
380  * Protected against instrumentation up to the point where the primary
381  * thread completed the update. See microcode_nmi_handler() for details.
382  */
383 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
384 {
385 	/* Initial rendezvous to ensure that all CPUs have arrived */
386 	if (!wait_for_cpus(&late_cpus_in)) {
387 		raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
388 		return false;
389 	}
390 
391 	/*
392 	 * Wait for primary threads to complete. If one of them hangs due
393 	 * to the update, there is no way out. This is non-recoverable
394 	 * because the CPU might hold locks or resources and confuse the
395 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
396 	 * machine.
397 	 */
398 	if (wait_for_ctrl())
399 		return true;
400 
401 	instrumentation_begin();
402 	panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
403 	instrumentation_end();
404 }
405 
406 /*
407  * Protected against instrumentation up to the point where the primary
408  * thread completed the update. See microcode_nmi_handler() for details.
409  */
410 static noinstr void load_secondary(unsigned int cpu)
411 {
412 	unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
413 	enum ucode_state ret;
414 
415 	if (!load_secondary_wait(ctrl_cpu)) {
416 		instrumentation_begin();
417 		pr_err_once("load: %d CPUs timed out\n",
418 			    atomic_read(&late_cpus_in) - 1);
419 		instrumentation_end();
420 		return;
421 	}
422 
423 	/* Primary thread completed. Allow to invoke instrumentable code */
424 	instrumentation_begin();
425 	/*
426 	 * If the primary succeeded then invoke the apply() callback,
427 	 * otherwise copy the state from the primary thread.
428 	 */
429 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
430 		ret = microcode_ops->apply_microcode(cpu);
431 	else
432 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
433 
434 	this_cpu_write(ucode_ctrl.result, ret);
435 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
436 	instrumentation_end();
437 }
438 
439 static void __load_primary(unsigned int cpu)
440 {
441 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
442 	enum sibling_ctrl ctrl;
443 	enum ucode_state ret;
444 	unsigned int sibling;
445 
446 	/* Initial rendezvous to ensure that all CPUs have arrived */
447 	if (!wait_for_cpus(&late_cpus_in)) {
448 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
449 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
450 		return;
451 	}
452 
453 	ret = microcode_ops->apply_microcode(cpu);
454 	this_cpu_write(ucode_ctrl.result, ret);
455 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
456 
457 	/*
458 	 * If the update was successful, let the siblings run the apply()
459 	 * callback. If not, tell them it's done. This also covers the
460 	 * case where the CPU has uniform loading at package or system
461 	 * scope implemented but does not advertise it.
462 	 */
463 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
464 		ctrl = SCTRL_APPLY;
465 	else
466 		ctrl = SCTRL_DONE;
467 
468 	for_each_cpu(sibling, secondaries) {
469 		if (sibling != cpu)
470 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
471 	}
472 }
473 
474 static bool kick_offline_cpus(unsigned int nr_offl)
475 {
476 	unsigned int cpu, timeout;
477 
478 	for_each_cpu(cpu, &cpu_offline_mask) {
479 		/* Enable the rendezvous handler and send NMI */
480 		per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
481 		apic_send_nmi_to_offline_cpu(cpu);
482 	}
483 
484 	/* Wait for them to arrive */
485 	for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
486 		if (atomic_read(&offline_in_nmi) == nr_offl)
487 			return true;
488 		udelay(1);
489 	}
490 	/* Let the others time out */
491 	return false;
492 }
493 
494 static void release_offline_cpus(void)
495 {
496 	unsigned int cpu;
497 
498 	for_each_cpu(cpu, &cpu_offline_mask)
499 		per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
500 }
501 
502 static void load_primary(unsigned int cpu)
503 {
504 	unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
505 	bool proceed = true;
506 
507 	/* Kick soft-offlined SMT siblings if required */
508 	if (!cpu && nr_offl)
509 		proceed = kick_offline_cpus(nr_offl);
510 
511 	/* If the soft-offlined CPUs did not respond, abort */
512 	if (proceed)
513 		__load_primary(cpu);
514 
515 	/* Unconditionally release soft-offlined SMT siblings if required */
516 	if (!cpu && nr_offl)
517 		release_offline_cpus();
518 }
519 
520 /*
521  * Minimal stub rendezvous handler for soft-offlined CPUs which participate
522  * in the NMI rendezvous to protect against a concurrent NMI on affected
523  * CPUs.
524  */
525 void noinstr microcode_offline_nmi_handler(void)
526 {
527 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
528 		return;
529 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
530 	raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
531 	raw_atomic_inc(&offline_in_nmi);
532 	wait_for_ctrl();
533 }
534 
535 static noinstr bool microcode_update_handler(void)
536 {
537 	unsigned int cpu = raw_smp_processor_id();
538 
539 	if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
540 		instrumentation_begin();
541 		load_primary(cpu);
542 		instrumentation_end();
543 	} else {
544 		load_secondary(cpu);
545 	}
546 
547 	instrumentation_begin();
548 	touch_nmi_watchdog();
549 	instrumentation_end();
550 
551 	return true;
552 }
553 
554 /*
555  * Protection against instrumentation is required for CPUs which are not
556  * safe against an NMI which is delivered to the secondary SMT sibling
557  * while the primary thread updates the microcode. Instrumentation can end
558  * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
559  * which is the opposite of what the NMI rendezvous is trying to achieve.
560  *
561  * The primary thread is safe versus instrumentation as the actual
562  * microcode update handles this correctly. It's only the sibling code
563  * path which must be NMI safe until the primary thread completed the
564  * update.
565  */
566 bool noinstr microcode_nmi_handler(void)
567 {
568 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
569 		return false;
570 
571 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
572 	return microcode_update_handler();
573 }
574 
575 static int load_cpus_stopped(void *unused)
576 {
577 	if (microcode_ops->use_nmi) {
578 		/* Enable the NMI handler and raise NMI */
579 		this_cpu_write(ucode_ctrl.nmi_enabled, true);
580 		apic->send_IPI(smp_processor_id(), NMI_VECTOR);
581 	} else {
582 		/* Just invoke the handler directly */
583 		microcode_update_handler();
584 	}
585 	return 0;
586 }
587 
588 static int load_late_stop_cpus(bool is_safe)
589 {
590 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
591 	unsigned int nr_offl, offline = 0;
592 	int old_rev = boot_cpu_data.microcode;
593 	struct cpuinfo_x86 prev_info;
594 
595 	if (!is_safe) {
596 		pr_err("Late microcode loading without minimal revision check.\n");
597 		pr_err("You should switch to early loading, if possible.\n");
598 	}
599 
600 	/*
601 	 * Pre-load the microcode image into a staging device. This
602 	 * process is preemptible and does not require stopping CPUs.
603 	 * Successful staging simplifies the subsequent late-loading
604 	 * process, reducing rendezvous time.
605 	 *
606 	 * Even if the transfer fails, the update will proceed as usual.
607 	 */
608 	if (microcode_ops->use_staging)
609 		microcode_ops->stage_microcode();
610 
611 	atomic_set(&late_cpus_in, num_online_cpus());
612 	atomic_set(&offline_in_nmi, 0);
613 	loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
614 
615 	/*
616 	 * Take a snapshot before the microcode update in order to compare and
617 	 * check whether any bits changed after an update.
618 	 */
619 	store_cpu_caps(&prev_info);
620 
621 	if (microcode_ops->use_nmi)
622 		static_branch_enable_cpuslocked(&microcode_nmi_handler_enable);
623 
624 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
625 
626 	if (microcode_ops->use_nmi)
627 		static_branch_disable_cpuslocked(&microcode_nmi_handler_enable);
628 
629 	/* Analyze the results */
630 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
631 		switch (per_cpu(ucode_ctrl.result, cpu)) {
632 		case UCODE_UPDATED:	updated++; break;
633 		case UCODE_TIMEOUT:	timedout++; break;
634 		case UCODE_OK:		siblings++; break;
635 		case UCODE_OFFLINE:	offline++; break;
636 		default:		failed++; break;
637 		}
638 	}
639 
640 	if (microcode_ops->finalize_late_load)
641 		microcode_ops->finalize_late_load(!updated);
642 
643 	if (!updated) {
644 		/* Nothing changed. */
645 		if (!failed && !timedout)
646 			return 0;
647 
648 		nr_offl = cpumask_weight(&cpu_offline_mask);
649 		if (offline < nr_offl) {
650 			pr_warn("%u offline siblings did not respond.\n",
651 				nr_offl - atomic_read(&offline_in_nmi));
652 			return -EIO;
653 		}
654 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
655 		       failed, timedout);
656 		return -EIO;
657 	}
658 
659 	if (!is_safe || failed || timedout)
660 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
661 
662 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
663 	if (failed || timedout) {
664 		pr_err("load incomplete. %u CPUs timed out or failed\n",
665 		       num_online_cpus() - (updated + siblings));
666 	}
667 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
668 	microcode_check(&prev_info);
669 
670 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
671 }
672 
673 /*
674  * This function does two things:
675  *
676  * 1) Ensure that all required CPUs which are present and have been booted
677  *    once are online.
678  *
679  *    To pass this check, all primary threads must be online.
680  *
681  *    If the microcode load is not safe against NMI then all SMT threads
682  *    must be online as well because they still react to NMIs when they are
683  *    soft-offlined and parked in one of the play_dead() variants. So if a
684  *    NMI hits while the primary thread updates the microcode the resulting
685  *    behaviour is undefined. The default play_dead() implementation on
686  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
687  *    against a microcode update which affects MWAIT.
688  *
689  *    As soft-offlined CPUs still react on NMIs, the SMT sibling
690  *    restriction can be lifted when the vendor driver signals to use NMI
691  *    for rendezvous and the APIC provides a mechanism to send an NMI to a
692  *    soft-offlined CPU. The soft-offlined CPUs are then able to
693  *    participate in the rendezvous in a trivial stub handler.
694  *
695  * 2) Initialize the per CPU control structure and create a cpumask
696  *    which contains "offline"; secondary threads, so they can be handled
697  *    correctly by a control CPU.
698  */
699 static bool setup_cpus(void)
700 {
701 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
702 	bool allow_smt_offline;
703 	unsigned int cpu;
704 
705 	allow_smt_offline = microcode_ops->nmi_safe ||
706 		(microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
707 
708 	cpumask_clear(&cpu_offline_mask);
709 
710 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
711 		/*
712 		 * Offline CPUs sit in one of the play_dead() functions
713 		 * with interrupts disabled, but they still react on NMIs
714 		 * and execute arbitrary code. Also MWAIT being updated
715 		 * while the offline CPU sits there is not necessarily safe
716 		 * on all CPU variants.
717 		 *
718 		 * Mark them in the offline_cpus mask which will be handled
719 		 * by CPU0 later in the update process.
720 		 *
721 		 * Ensure that the primary thread is online so that it is
722 		 * guaranteed that all cores are updated.
723 		 */
724 		if (!cpu_online(cpu)) {
725 			if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
726 				pr_err("CPU %u not online, loading aborted\n", cpu);
727 				return false;
728 			}
729 			cpumask_set_cpu(cpu, &cpu_offline_mask);
730 			per_cpu(ucode_ctrl, cpu) = ctrl;
731 			continue;
732 		}
733 
734 		/*
735 		 * Initialize the per CPU state. This is core scope for now,
736 		 * but prepared to take package or system scope into account.
737 		 */
738 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
739 		per_cpu(ucode_ctrl, cpu) = ctrl;
740 	}
741 	return true;
742 }
743 
744 static int load_late_locked(void)
745 {
746 	if (!setup_cpus())
747 		return -EBUSY;
748 
749 	switch (microcode_ops->request_microcode_fw(0, &microcode_fdev->dev)) {
750 	case UCODE_NEW:
751 		return load_late_stop_cpus(false);
752 	case UCODE_NEW_SAFE:
753 		return load_late_stop_cpus(true);
754 	case UCODE_NFOUND:
755 		return -ENOENT;
756 	case UCODE_OK:
757 		return 0;
758 	default:
759 		return -EBADFD;
760 	}
761 }
762 
763 static ssize_t reload_store(struct device *dev,
764 			    struct device_attribute *attr,
765 			    const char *buf, size_t size)
766 {
767 	unsigned long val;
768 	ssize_t ret;
769 
770 	ret = kstrtoul(buf, 0, &val);
771 	if (ret || val != 1)
772 		return -EINVAL;
773 
774 	cpus_read_lock();
775 	ret = load_late_locked();
776 	cpus_read_unlock();
777 
778 	return ret ? : size;
779 }
780 
781 static DEVICE_ATTR_WO(reload);
782 #endif
783 
784 static ssize_t version_show(struct device *dev,
785 			struct device_attribute *attr, char *buf)
786 {
787 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
788 
789 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
790 }
791 
792 static ssize_t processor_flags_show(struct device *dev,
793 			struct device_attribute *attr, char *buf)
794 {
795 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
796 
797 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
798 }
799 
800 static DEVICE_ATTR_RO(version);
801 static DEVICE_ATTR_RO(processor_flags);
802 
803 static struct attribute *mc_default_attrs[] = {
804 	&dev_attr_version.attr,
805 	&dev_attr_processor_flags.attr,
806 	NULL
807 };
808 
809 static const struct attribute_group mc_attr_group = {
810 	.attrs			= mc_default_attrs,
811 	.name			= "microcode",
812 };
813 
814 static void microcode_fini_cpu(int cpu)
815 {
816 	if (microcode_ops->microcode_fini_cpu)
817 		microcode_ops->microcode_fini_cpu(cpu);
818 }
819 
820 /**
821  * microcode_bsp_resume - Update boot CPU microcode during resume.
822  */
823 void microcode_bsp_resume(void)
824 {
825 	int cpu = smp_processor_id();
826 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
827 
828 	if (uci->mc)
829 		microcode_ops->apply_microcode(cpu);
830 	else
831 		reload_early_microcode(cpu);
832 }
833 
834 static void microcode_bsp_syscore_resume(void *data)
835 {
836 	microcode_bsp_resume();
837 }
838 
839 static const struct syscore_ops mc_syscore_ops = {
840 	.resume	= microcode_bsp_syscore_resume,
841 };
842 
843 static struct syscore mc_syscore = {
844 	.ops = &mc_syscore_ops,
845 };
846 
847 static int mc_cpu_online(unsigned int cpu)
848 {
849 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
850 	struct device *dev = get_cpu_device(cpu);
851 
852 	memset(uci, 0, sizeof(*uci));
853 
854 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
855 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
856 	if (!cpu)
857 		boot_cpu_data.microcode = uci->cpu_sig.rev;
858 
859 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
860 		pr_err("Failed to create group for CPU%d\n", cpu);
861 	return 0;
862 }
863 
864 static int mc_cpu_down_prep(unsigned int cpu)
865 {
866 	struct device *dev = get_cpu_device(cpu);
867 
868 	microcode_fini_cpu(cpu);
869 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
870 	return 0;
871 }
872 
873 static struct attribute *cpu_root_microcode_attrs[] = {
874 #ifdef CONFIG_MICROCODE_LATE_LOADING
875 	&dev_attr_reload.attr,
876 #endif
877 	NULL
878 };
879 
880 static const struct attribute_group cpu_root_microcode_group = {
881 	.name  = "microcode",
882 	.attrs = cpu_root_microcode_attrs,
883 };
884 
885 static int __init microcode_init(void)
886 {
887 	struct device *dev_root;
888 	struct cpuinfo_x86 *c = &boot_cpu_data;
889 	int error;
890 
891 	if (microcode_loader_disabled())
892 		return -EINVAL;
893 
894 	if (c->x86_vendor == X86_VENDOR_INTEL)
895 		microcode_ops = init_intel_microcode();
896 	else if (c->x86_vendor == X86_VENDOR_AMD)
897 		microcode_ops = init_amd_microcode();
898 	else
899 		pr_err("no support for this CPU vendor\n");
900 
901 	if (!microcode_ops)
902 		return -ENODEV;
903 
904 	pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
905 
906 	if (early_data.new_rev)
907 		pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
908 
909 	microcode_fdev = faux_device_create("microcode", NULL, NULL);
910 	if (!microcode_fdev)
911 		return -ENODEV;
912 
913 	dev_root = bus_get_dev_root(&cpu_subsys);
914 	if (dev_root) {
915 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
916 		put_device(dev_root);
917 		if (error) {
918 			pr_err("Error creating microcode group!\n");
919 			goto out_pdev;
920 		}
921 	}
922 
923 	register_syscore(&mc_syscore);
924 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
925 			  mc_cpu_online, mc_cpu_down_prep);
926 
927 	return 0;
928 
929  out_pdev:
930 	faux_device_destroy(microcode_fdev);
931 	return error;
932 
933 }
934 late_initcall(microcode_init);
935