xref: /linux/arch/x86/kernel/cpu/microcode/core.c (revision 65b09bfa8aa7ebe087093b591525385efb2d58b0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
6  *	      2006	Shaohua Li <shaohua.li@intel.com>
7  *	      2013-2016	Borislav Petkov <bp@alien8.de>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
12  *			   H Peter Anvin" <hpa@zytor.com>
13  *		  (C) 2015 Borislav Petkov <bp@alien8.de>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/stop_machine.h>
21 #include <linux/device/faux.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
32 #include <linux/fs.h>
33 #include <linux/mm.h>
34 
35 #include <asm/apic.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/msr.h>
41 #include <asm/setup.h>
42 
43 #include "internal.h"
44 
45 static struct microcode_ops *microcode_ops;
46 static bool dis_ucode_ldr;
47 
48 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
49 
50 /*
51  * Those below should be behind CONFIG_MICROCODE_DBG ifdeffery but in
52  * order to not uglify the code with ifdeffery and use IS_ENABLED()
53  * instead, leave them in. When microcode debugging is not enabled,
54  * those are meaningless anyway.
55  */
56 /* base microcode revision for debugging */
57 u32 base_rev;
58 u32 microcode_rev[NR_CPUS] = {};
59 
60 bool __ro_after_init x86_hypervisor_present;
61 
62 /*
63  * Synchronization.
64  *
65  * All non cpu-hotplug-callback call sites use:
66  *
67  * - cpus_read_lock/unlock() to synchronize with
68  *   the cpu-hotplug-callback call sites.
69  *
70  * We guarantee that only a single cpu is being
71  * updated at any particular moment of time.
72  */
73 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
74 
75 /*
76  * Those patch levels cannot be updated to newer ones and thus should be final.
77  */
78 static u32 final_levels[] = {
79 	0x01000098,
80 	0x0100009f,
81 	0x010000af,
82 	0, /* T-101 terminator */
83 };
84 
85 struct early_load_data early_data;
86 
87 /*
88  * Check the current patch level on this CPU.
89  *
90  * Returns:
91  *  - true: if update should stop
92  *  - false: otherwise
93  */
94 static bool amd_check_current_patch_level(void)
95 {
96 	u32 lvl, dummy, i;
97 	u32 *levels;
98 
99 	if (x86_cpuid_vendor() != X86_VENDOR_AMD)
100 		return false;
101 
102 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
103 
104 	levels = final_levels;
105 
106 	for (i = 0; levels[i]; i++) {
107 		if (lvl == levels[i])
108 			return true;
109 	}
110 	return false;
111 }
112 
113 bool __init microcode_loader_disabled(void)
114 {
115 	if (dis_ucode_ldr)
116 		return true;
117 
118 	/*
119 	 * Disable when:
120 	 *
121 	 * 1) The CPU does not support CPUID, detected below in
122 	 *    load_ucode_bsp().
123 	 *
124 	 * 2) Bit 31 in CPUID[1]:ECX is clear
125 	 *    The bit is reserved for hypervisor use. This is still not
126 	 *    completely accurate as XEN PV guests don't see that CPUID bit
127 	 *    set, but that's good enough as they don't land on the BSP
128 	 *    path anyway.
129 	 *
130 	 * 3) Certain AMD patch levels are not allowed to be
131 	 *    overwritten.
132 	 */
133 	if ((x86_hypervisor_present && !IS_ENABLED(CONFIG_MICROCODE_DBG)) ||
134 	    amd_check_current_patch_level())
135 		dis_ucode_ldr = true;
136 
137 	return dis_ucode_ldr;
138 }
139 
140 static void __init early_parse_cmdline(void)
141 {
142 	char cmd_buf[64] = {};
143 	char *s, *p = cmd_buf;
144 
145 	if (cmdline_find_option(boot_command_line, "microcode", cmd_buf, sizeof(cmd_buf)) > 0) {
146 		while ((s = strsep(&p, ","))) {
147 			if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
148 				if (strstr(s, "base_rev=")) {
149 					/* advance to the option arg */
150 					strsep(&s, "=");
151 					if (kstrtouint(s, 16, &base_rev)) { ; }
152 				}
153 			}
154 
155 			if (!strcmp("force_minrev", s))
156 				force_minrev = true;
157 
158 			if (!strcmp(s, "dis_ucode_ldr"))
159 				dis_ucode_ldr = true;
160 		}
161 	}
162 
163 	/* old, compat option */
164 	if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
165 		dis_ucode_ldr = true;
166 }
167 
168 void __init load_ucode_bsp(void)
169 {
170 	unsigned int cpuid_1_eax;
171 	bool intel = true;
172 
173 	early_parse_cmdline();
174 
175 	if (!cpuid_feature())
176 		dis_ucode_ldr = true;
177 	else
178 		x86_hypervisor_present = native_cpuid_ecx(1) & BIT(31);
179 
180 	if (microcode_loader_disabled())
181 		return;
182 
183 	cpuid_1_eax = native_cpuid_eax(1);
184 
185 	switch (x86_cpuid_vendor()) {
186 	case X86_VENDOR_INTEL:
187 		if (x86_family(cpuid_1_eax) < 6)
188 			return;
189 		break;
190 
191 	case X86_VENDOR_AMD:
192 		if (x86_family(cpuid_1_eax) < 0x10)
193 			return;
194 		intel = false;
195 		break;
196 
197 	default:
198 		return;
199 	}
200 
201 	if (intel)
202 		load_ucode_intel_bsp(&early_data);
203 	else
204 		load_ucode_amd_bsp(&early_data, cpuid_1_eax);
205 }
206 
207 void load_ucode_ap(void)
208 {
209 	unsigned int cpuid_1_eax;
210 
211 	/*
212 	 * Can't use microcode_loader_disabled() here - .init section
213 	 * hell. It doesn't have to either - the BSP variant must've
214 	 * parsed cmdline already anyway.
215 	 */
216 	if (dis_ucode_ldr)
217 		return;
218 
219 	cpuid_1_eax = native_cpuid_eax(1);
220 
221 	switch (x86_cpuid_vendor()) {
222 	case X86_VENDOR_INTEL:
223 		if (x86_family(cpuid_1_eax) >= 6)
224 			load_ucode_intel_ap();
225 		break;
226 	case X86_VENDOR_AMD:
227 		if (x86_family(cpuid_1_eax) >= 0x10)
228 			load_ucode_amd_ap(cpuid_1_eax);
229 		break;
230 	default:
231 		break;
232 	}
233 }
234 
235 struct cpio_data __init find_microcode_in_initrd(const char *path)
236 {
237 #ifdef CONFIG_BLK_DEV_INITRD
238 	unsigned long start = 0;
239 	size_t size;
240 
241 #ifdef CONFIG_X86_32
242 	size = boot_params.hdr.ramdisk_size;
243 	/* Early load on BSP has a temporary mapping. */
244 	if (size)
245 		start = initrd_start_early;
246 
247 #else /* CONFIG_X86_64 */
248 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
249 	size |= boot_params.hdr.ramdisk_size;
250 
251 	if (size) {
252 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
253 		start |= boot_params.hdr.ramdisk_image;
254 		start += PAGE_OFFSET;
255 	}
256 #endif
257 
258 	/*
259 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
260 	 * has the virtual address of the beginning of the initrd. It also
261 	 * possibly relocates the ramdisk. In either case, initrd_start contains
262 	 * the updated address so use that instead.
263 	 */
264 	if (initrd_start)
265 		start = initrd_start;
266 
267 	return find_cpio_data(path, (void *)start, size, NULL);
268 #else /* !CONFIG_BLK_DEV_INITRD */
269 	return (struct cpio_data){ NULL, 0, "" };
270 #endif
271 }
272 
273 static void reload_early_microcode(unsigned int cpu)
274 {
275 	int vendor, family;
276 
277 	vendor = x86_cpuid_vendor();
278 	family = x86_cpuid_family();
279 
280 	switch (vendor) {
281 	case X86_VENDOR_INTEL:
282 		if (family >= 6)
283 			reload_ucode_intel();
284 		break;
285 	case X86_VENDOR_AMD:
286 		if (family >= 0x10)
287 			reload_ucode_amd(cpu);
288 		break;
289 	default:
290 		break;
291 	}
292 }
293 
294 /* fake device for request_firmware */
295 static struct faux_device *microcode_fdev;
296 
297 #ifdef CONFIG_MICROCODE_LATE_LOADING
298 /*
299  * Late loading dance. Why the heavy-handed stomp_machine effort?
300  *
301  * - HT siblings must be idle and not execute other code while the other sibling
302  *   is loading microcode in order to avoid any negative interactions caused by
303  *   the loading.
304  *
305  * - In addition, microcode update on the cores must be serialized until this
306  *   requirement can be relaxed in the future. Right now, this is conservative
307  *   and good.
308  */
309 enum sibling_ctrl {
310 	/* Spinwait with timeout */
311 	SCTRL_WAIT,
312 	/* Invoke the microcode_apply() callback */
313 	SCTRL_APPLY,
314 	/* Proceed without invoking the microcode_apply() callback */
315 	SCTRL_DONE,
316 };
317 
318 struct microcode_ctrl {
319 	enum sibling_ctrl	ctrl;
320 	enum ucode_state	result;
321 	unsigned int		ctrl_cpu;
322 	bool			nmi_enabled;
323 };
324 
325 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
326 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
327 static atomic_t late_cpus_in, offline_in_nmi;
328 static unsigned int loops_per_usec;
329 static cpumask_t cpu_offline_mask;
330 
331 static noinstr bool wait_for_cpus(atomic_t *cnt)
332 {
333 	unsigned int timeout, loops;
334 
335 	WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
336 
337 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
338 		if (!raw_atomic_read(cnt))
339 			return true;
340 
341 		for (loops = 0; loops < loops_per_usec; loops++)
342 			cpu_relax();
343 
344 		/* If invoked directly, tickle the NMI watchdog */
345 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
346 			instrumentation_begin();
347 			touch_nmi_watchdog();
348 			instrumentation_end();
349 		}
350 	}
351 	/* Prevent the late comers from making progress and let them time out */
352 	raw_atomic_inc(cnt);
353 	return false;
354 }
355 
356 static noinstr bool wait_for_ctrl(void)
357 {
358 	unsigned int timeout, loops;
359 
360 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
361 		if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
362 			return true;
363 
364 		for (loops = 0; loops < loops_per_usec; loops++)
365 			cpu_relax();
366 
367 		/* If invoked directly, tickle the NMI watchdog */
368 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
369 			instrumentation_begin();
370 			touch_nmi_watchdog();
371 			instrumentation_end();
372 		}
373 	}
374 	return false;
375 }
376 
377 /*
378  * Protected against instrumentation up to the point where the primary
379  * thread completed the update. See microcode_nmi_handler() for details.
380  */
381 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
382 {
383 	/* Initial rendezvous to ensure that all CPUs have arrived */
384 	if (!wait_for_cpus(&late_cpus_in)) {
385 		raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
386 		return false;
387 	}
388 
389 	/*
390 	 * Wait for primary threads to complete. If one of them hangs due
391 	 * to the update, there is no way out. This is non-recoverable
392 	 * because the CPU might hold locks or resources and confuse the
393 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
394 	 * machine.
395 	 */
396 	if (wait_for_ctrl())
397 		return true;
398 
399 	instrumentation_begin();
400 	panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
401 	instrumentation_end();
402 }
403 
404 /*
405  * Protected against instrumentation up to the point where the primary
406  * thread completed the update. See microcode_nmi_handler() for details.
407  */
408 static noinstr void load_secondary(unsigned int cpu)
409 {
410 	unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
411 	enum ucode_state ret;
412 
413 	if (!load_secondary_wait(ctrl_cpu)) {
414 		instrumentation_begin();
415 		pr_err_once("load: %d CPUs timed out\n",
416 			    atomic_read(&late_cpus_in) - 1);
417 		instrumentation_end();
418 		return;
419 	}
420 
421 	/* Primary thread completed. Allow to invoke instrumentable code */
422 	instrumentation_begin();
423 	/*
424 	 * If the primary succeeded then invoke the apply() callback,
425 	 * otherwise copy the state from the primary thread.
426 	 */
427 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
428 		ret = microcode_ops->apply_microcode(cpu);
429 	else
430 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
431 
432 	this_cpu_write(ucode_ctrl.result, ret);
433 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
434 	instrumentation_end();
435 }
436 
437 static void __load_primary(unsigned int cpu)
438 {
439 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
440 	enum sibling_ctrl ctrl;
441 	enum ucode_state ret;
442 	unsigned int sibling;
443 
444 	/* Initial rendezvous to ensure that all CPUs have arrived */
445 	if (!wait_for_cpus(&late_cpus_in)) {
446 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
447 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
448 		return;
449 	}
450 
451 	ret = microcode_ops->apply_microcode(cpu);
452 	this_cpu_write(ucode_ctrl.result, ret);
453 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
454 
455 	/*
456 	 * If the update was successful, let the siblings run the apply()
457 	 * callback. If not, tell them it's done. This also covers the
458 	 * case where the CPU has uniform loading at package or system
459 	 * scope implemented but does not advertise it.
460 	 */
461 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
462 		ctrl = SCTRL_APPLY;
463 	else
464 		ctrl = SCTRL_DONE;
465 
466 	for_each_cpu(sibling, secondaries) {
467 		if (sibling != cpu)
468 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
469 	}
470 }
471 
472 static bool kick_offline_cpus(unsigned int nr_offl)
473 {
474 	unsigned int cpu, timeout;
475 
476 	for_each_cpu(cpu, &cpu_offline_mask) {
477 		/* Enable the rendezvous handler and send NMI */
478 		per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
479 		apic_send_nmi_to_offline_cpu(cpu);
480 	}
481 
482 	/* Wait for them to arrive */
483 	for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
484 		if (atomic_read(&offline_in_nmi) == nr_offl)
485 			return true;
486 		udelay(1);
487 	}
488 	/* Let the others time out */
489 	return false;
490 }
491 
492 static void release_offline_cpus(void)
493 {
494 	unsigned int cpu;
495 
496 	for_each_cpu(cpu, &cpu_offline_mask)
497 		per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
498 }
499 
500 static void load_primary(unsigned int cpu)
501 {
502 	unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
503 	bool proceed = true;
504 
505 	/* Kick soft-offlined SMT siblings if required */
506 	if (!cpu && nr_offl)
507 		proceed = kick_offline_cpus(nr_offl);
508 
509 	/* If the soft-offlined CPUs did not respond, abort */
510 	if (proceed)
511 		__load_primary(cpu);
512 
513 	/* Unconditionally release soft-offlined SMT siblings if required */
514 	if (!cpu && nr_offl)
515 		release_offline_cpus();
516 }
517 
518 /*
519  * Minimal stub rendezvous handler for soft-offlined CPUs which participate
520  * in the NMI rendezvous to protect against a concurrent NMI on affected
521  * CPUs.
522  */
523 void noinstr microcode_offline_nmi_handler(void)
524 {
525 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
526 		return;
527 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
528 	raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
529 	raw_atomic_inc(&offline_in_nmi);
530 	wait_for_ctrl();
531 }
532 
533 static noinstr bool microcode_update_handler(void)
534 {
535 	unsigned int cpu = raw_smp_processor_id();
536 
537 	if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
538 		instrumentation_begin();
539 		load_primary(cpu);
540 		instrumentation_end();
541 	} else {
542 		load_secondary(cpu);
543 	}
544 
545 	instrumentation_begin();
546 	touch_nmi_watchdog();
547 	instrumentation_end();
548 
549 	return true;
550 }
551 
552 /*
553  * Protection against instrumentation is required for CPUs which are not
554  * safe against an NMI which is delivered to the secondary SMT sibling
555  * while the primary thread updates the microcode. Instrumentation can end
556  * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
557  * which is the opposite of what the NMI rendezvous is trying to achieve.
558  *
559  * The primary thread is safe versus instrumentation as the actual
560  * microcode update handles this correctly. It's only the sibling code
561  * path which must be NMI safe until the primary thread completed the
562  * update.
563  */
564 bool noinstr microcode_nmi_handler(void)
565 {
566 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
567 		return false;
568 
569 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
570 	return microcode_update_handler();
571 }
572 
573 static int load_cpus_stopped(void *unused)
574 {
575 	if (microcode_ops->use_nmi) {
576 		/* Enable the NMI handler and raise NMI */
577 		this_cpu_write(ucode_ctrl.nmi_enabled, true);
578 		apic->send_IPI(smp_processor_id(), NMI_VECTOR);
579 	} else {
580 		/* Just invoke the handler directly */
581 		microcode_update_handler();
582 	}
583 	return 0;
584 }
585 
586 static int load_late_stop_cpus(bool is_safe)
587 {
588 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
589 	unsigned int nr_offl, offline = 0;
590 	int old_rev = boot_cpu_data.microcode;
591 	struct cpuinfo_x86 prev_info;
592 
593 	if (!is_safe) {
594 		pr_err("Late microcode loading without minimal revision check.\n");
595 		pr_err("You should switch to early loading, if possible.\n");
596 	}
597 
598 	/*
599 	 * Pre-load the microcode image into a staging device. This
600 	 * process is preemptible and does not require stopping CPUs.
601 	 * Successful staging simplifies the subsequent late-loading
602 	 * process, reducing rendezvous time.
603 	 *
604 	 * Even if the transfer fails, the update will proceed as usual.
605 	 */
606 	if (microcode_ops->use_staging)
607 		microcode_ops->stage_microcode();
608 
609 	atomic_set(&late_cpus_in, num_online_cpus());
610 	atomic_set(&offline_in_nmi, 0);
611 	loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
612 
613 	/*
614 	 * Take a snapshot before the microcode update in order to compare and
615 	 * check whether any bits changed after an update.
616 	 */
617 	store_cpu_caps(&prev_info);
618 
619 	if (microcode_ops->use_nmi)
620 		static_branch_enable_cpuslocked(&microcode_nmi_handler_enable);
621 
622 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
623 
624 	if (microcode_ops->use_nmi)
625 		static_branch_disable_cpuslocked(&microcode_nmi_handler_enable);
626 
627 	/* Analyze the results */
628 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
629 		switch (per_cpu(ucode_ctrl.result, cpu)) {
630 		case UCODE_UPDATED:	updated++; break;
631 		case UCODE_TIMEOUT:	timedout++; break;
632 		case UCODE_OK:		siblings++; break;
633 		case UCODE_OFFLINE:	offline++; break;
634 		default:		failed++; break;
635 		}
636 	}
637 
638 	if (microcode_ops->finalize_late_load)
639 		microcode_ops->finalize_late_load(!updated);
640 
641 	if (!updated) {
642 		/* Nothing changed. */
643 		if (!failed && !timedout)
644 			return 0;
645 
646 		nr_offl = cpumask_weight(&cpu_offline_mask);
647 		if (offline < nr_offl) {
648 			pr_warn("%u offline siblings did not respond.\n",
649 				nr_offl - atomic_read(&offline_in_nmi));
650 			return -EIO;
651 		}
652 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
653 		       failed, timedout);
654 		return -EIO;
655 	}
656 
657 	if (!is_safe || failed || timedout)
658 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
659 
660 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
661 	if (failed || timedout) {
662 		pr_err("load incomplete. %u CPUs timed out or failed\n",
663 		       num_online_cpus() - (updated + siblings));
664 	}
665 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
666 	microcode_check(&prev_info);
667 
668 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
669 }
670 
671 /*
672  * This function does two things:
673  *
674  * 1) Ensure that all required CPUs which are present and have been booted
675  *    once are online.
676  *
677  *    To pass this check, all primary threads must be online.
678  *
679  *    If the microcode load is not safe against NMI then all SMT threads
680  *    must be online as well because they still react to NMIs when they are
681  *    soft-offlined and parked in one of the play_dead() variants. So if a
682  *    NMI hits while the primary thread updates the microcode the resulting
683  *    behaviour is undefined. The default play_dead() implementation on
684  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
685  *    against a microcode update which affects MWAIT.
686  *
687  *    As soft-offlined CPUs still react on NMIs, the SMT sibling
688  *    restriction can be lifted when the vendor driver signals to use NMI
689  *    for rendezvous and the APIC provides a mechanism to send an NMI to a
690  *    soft-offlined CPU. The soft-offlined CPUs are then able to
691  *    participate in the rendezvous in a trivial stub handler.
692  *
693  * 2) Initialize the per CPU control structure and create a cpumask
694  *    which contains "offline"; secondary threads, so they can be handled
695  *    correctly by a control CPU.
696  */
697 static bool setup_cpus(void)
698 {
699 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
700 	bool allow_smt_offline;
701 	unsigned int cpu;
702 
703 	allow_smt_offline = microcode_ops->nmi_safe ||
704 		(microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
705 
706 	cpumask_clear(&cpu_offline_mask);
707 
708 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
709 		/*
710 		 * Offline CPUs sit in one of the play_dead() functions
711 		 * with interrupts disabled, but they still react on NMIs
712 		 * and execute arbitrary code. Also MWAIT being updated
713 		 * while the offline CPU sits there is not necessarily safe
714 		 * on all CPU variants.
715 		 *
716 		 * Mark them in the offline_cpus mask which will be handled
717 		 * by CPU0 later in the update process.
718 		 *
719 		 * Ensure that the primary thread is online so that it is
720 		 * guaranteed that all cores are updated.
721 		 */
722 		if (!cpu_online(cpu)) {
723 			if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
724 				pr_err("CPU %u not online, loading aborted\n", cpu);
725 				return false;
726 			}
727 			cpumask_set_cpu(cpu, &cpu_offline_mask);
728 			per_cpu(ucode_ctrl, cpu) = ctrl;
729 			continue;
730 		}
731 
732 		/*
733 		 * Initialize the per CPU state. This is core scope for now,
734 		 * but prepared to take package or system scope into account.
735 		 */
736 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
737 		per_cpu(ucode_ctrl, cpu) = ctrl;
738 	}
739 	return true;
740 }
741 
742 static int load_late_locked(void)
743 {
744 	if (!setup_cpus())
745 		return -EBUSY;
746 
747 	switch (microcode_ops->request_microcode_fw(0, &microcode_fdev->dev)) {
748 	case UCODE_NEW:
749 		return load_late_stop_cpus(false);
750 	case UCODE_NEW_SAFE:
751 		return load_late_stop_cpus(true);
752 	case UCODE_NFOUND:
753 		return -ENOENT;
754 	case UCODE_OK:
755 		return 0;
756 	default:
757 		return -EBADFD;
758 	}
759 }
760 
761 static ssize_t reload_store(struct device *dev,
762 			    struct device_attribute *attr,
763 			    const char *buf, size_t size)
764 {
765 	unsigned long val;
766 	ssize_t ret;
767 
768 	ret = kstrtoul(buf, 0, &val);
769 	if (ret || val != 1)
770 		return -EINVAL;
771 
772 	cpus_read_lock();
773 	ret = load_late_locked();
774 	cpus_read_unlock();
775 
776 	return ret ? : size;
777 }
778 
779 static DEVICE_ATTR_WO(reload);
780 #endif
781 
782 static ssize_t version_show(struct device *dev,
783 			struct device_attribute *attr, char *buf)
784 {
785 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
786 
787 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
788 }
789 
790 static ssize_t processor_flags_show(struct device *dev,
791 			struct device_attribute *attr, char *buf)
792 {
793 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
794 
795 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
796 }
797 
798 static DEVICE_ATTR_RO(version);
799 static DEVICE_ATTR_RO(processor_flags);
800 
801 static struct attribute *mc_default_attrs[] = {
802 	&dev_attr_version.attr,
803 	&dev_attr_processor_flags.attr,
804 	NULL
805 };
806 
807 static const struct attribute_group mc_attr_group = {
808 	.attrs			= mc_default_attrs,
809 	.name			= "microcode",
810 };
811 
812 static void microcode_fini_cpu(int cpu)
813 {
814 	if (microcode_ops->microcode_fini_cpu)
815 		microcode_ops->microcode_fini_cpu(cpu);
816 }
817 
818 /**
819  * microcode_bsp_resume - Update boot CPU microcode during resume.
820  */
821 void microcode_bsp_resume(void)
822 {
823 	int cpu = smp_processor_id();
824 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
825 
826 	if (uci->mc)
827 		microcode_ops->apply_microcode(cpu);
828 	else
829 		reload_early_microcode(cpu);
830 }
831 
832 static void microcode_bsp_syscore_resume(void *data)
833 {
834 	microcode_bsp_resume();
835 }
836 
837 static const struct syscore_ops mc_syscore_ops = {
838 	.resume	= microcode_bsp_syscore_resume,
839 };
840 
841 static struct syscore mc_syscore = {
842 	.ops = &mc_syscore_ops,
843 };
844 
845 static int mc_cpu_online(unsigned int cpu)
846 {
847 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
848 	struct device *dev = get_cpu_device(cpu);
849 
850 	memset(uci, 0, sizeof(*uci));
851 
852 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
853 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
854 	if (!cpu)
855 		boot_cpu_data.microcode = uci->cpu_sig.rev;
856 
857 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
858 		pr_err("Failed to create group for CPU%d\n", cpu);
859 	return 0;
860 }
861 
862 static int mc_cpu_down_prep(unsigned int cpu)
863 {
864 	struct device *dev = get_cpu_device(cpu);
865 
866 	microcode_fini_cpu(cpu);
867 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
868 	return 0;
869 }
870 
871 static struct attribute *cpu_root_microcode_attrs[] = {
872 #ifdef CONFIG_MICROCODE_LATE_LOADING
873 	&dev_attr_reload.attr,
874 #endif
875 	NULL
876 };
877 
878 static const struct attribute_group cpu_root_microcode_group = {
879 	.name  = "microcode",
880 	.attrs = cpu_root_microcode_attrs,
881 };
882 
883 static int __init microcode_init(void)
884 {
885 	struct device *dev_root;
886 	struct cpuinfo_x86 *c = &boot_cpu_data;
887 	int error;
888 
889 	if (microcode_loader_disabled())
890 		return -EINVAL;
891 
892 	if (c->x86_vendor == X86_VENDOR_INTEL)
893 		microcode_ops = init_intel_microcode();
894 	else if (c->x86_vendor == X86_VENDOR_AMD)
895 		microcode_ops = init_amd_microcode();
896 	else
897 		pr_err("no support for this CPU vendor\n");
898 
899 	if (!microcode_ops)
900 		return -ENODEV;
901 
902 	pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
903 
904 	if (early_data.new_rev)
905 		pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
906 
907 	microcode_fdev = faux_device_create("microcode", NULL, NULL);
908 	if (!microcode_fdev)
909 		return -ENODEV;
910 
911 	dev_root = bus_get_dev_root(&cpu_subsys);
912 	if (dev_root) {
913 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
914 		put_device(dev_root);
915 		if (error) {
916 			pr_err("Error creating microcode group!\n");
917 			goto out_pdev;
918 		}
919 	}
920 
921 	register_syscore(&mc_syscore);
922 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
923 			  mc_cpu_online, mc_cpu_down_prep);
924 
925 	return 0;
926 
927  out_pdev:
928 	faux_device_destroy(microcode_fdev);
929 	return error;
930 
931 }
932 late_initcall(microcode_init);
933