xref: /linux/arch/x86/kernel/cpu/microcode/core.c (revision 7b49a3fb69e785a2425c8dc7dbd0779a0a4c0eb2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
6  *	      2006	Shaohua Li <shaohua.li@intel.com>
7  *	      2013-2016	Borislav Petkov <bp@alien8.de>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
12  *			   H Peter Anvin" <hpa@zytor.com>
13  *		  (C) 2015 Borislav Petkov <bp@alien8.de>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/stop_machine.h>
21 #include <linux/device/faux.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
32 #include <linux/fs.h>
33 #include <linux/mm.h>
34 
35 #include <asm/apic.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/cpuid/api.h>
38 #include <asm/perf_event.h>
39 #include <asm/processor.h>
40 #include <asm/cmdline.h>
41 #include <asm/msr.h>
42 #include <asm/setup.h>
43 
44 #include "internal.h"
45 
46 static struct microcode_ops *microcode_ops;
47 static bool dis_ucode_ldr;
48 
49 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
50 
51 /*
52  * Those below should be behind CONFIG_MICROCODE_DBG ifdeffery but in
53  * order to not uglify the code with ifdeffery and use IS_ENABLED()
54  * instead, leave them in. When microcode debugging is not enabled,
55  * those are meaningless anyway.
56  */
57 /* base microcode revision for debugging */
58 u32 base_rev;
59 u32 microcode_rev[NR_CPUS] = {};
60 
61 bool hypervisor_present;
62 
63 /*
64  * Synchronization.
65  *
66  * All non cpu-hotplug-callback call sites use:
67  *
68  * - cpus_read_lock/unlock() to synchronize with
69  *   the cpu-hotplug-callback call sites.
70  *
71  * We guarantee that only a single cpu is being
72  * updated at any particular moment of time.
73  */
74 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
75 
76 /*
77  * Those patch levels cannot be updated to newer ones and thus should be final.
78  */
79 static u32 final_levels[] = {
80 	0x01000098,
81 	0x0100009f,
82 	0x010000af,
83 	0, /* T-101 terminator */
84 };
85 
86 struct early_load_data early_data;
87 
88 /*
89  * Check the current patch level on this CPU.
90  *
91  * Returns:
92  *  - true: if update should stop
93  *  - false: otherwise
94  */
95 static bool amd_check_current_patch_level(void)
96 {
97 	u32 lvl, dummy, i;
98 	u32 *levels;
99 
100 	if (x86_cpuid_vendor() != X86_VENDOR_AMD)
101 		return false;
102 
103 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
104 
105 	levels = final_levels;
106 
107 	for (i = 0; levels[i]; i++) {
108 		if (lvl == levels[i])
109 			return true;
110 	}
111 	return false;
112 }
113 
114 bool __init microcode_loader_disabled(void)
115 {
116 	if (dis_ucode_ldr)
117 		return true;
118 
119 	/*
120 	 * Disable when:
121 	 *
122 	 * 1) The CPU does not support CPUID.
123 	 */
124 	if (!cpuid_feature()) {
125 		dis_ucode_ldr = true;
126 		return dis_ucode_ldr;
127 	}
128 
129 	/*
130 	 * 2) Bit 31 in CPUID[1]:ECX is clear
131 	 *    The bit is reserved for hypervisor use. This is still not
132 	 *    completely accurate as XEN PV guests don't see that CPUID bit
133 	 *    set, but that's good enough as they don't land on the BSP
134 	 *    path anyway.
135 	 *
136 	 * 3) Certain AMD patch levels are not allowed to be
137 	 *    overwritten.
138 	 */
139 	hypervisor_present = native_cpuid_ecx(1) & BIT(31);
140 
141 	if ((hypervisor_present && !IS_ENABLED(CONFIG_MICROCODE_DBG)) ||
142 	    amd_check_current_patch_level())
143 		dis_ucode_ldr = true;
144 
145 	return dis_ucode_ldr;
146 }
147 
148 static void __init early_parse_cmdline(void)
149 {
150 	char cmd_buf[64] = {};
151 	char *s, *p = cmd_buf;
152 
153 	if (cmdline_find_option(boot_command_line, "microcode", cmd_buf, sizeof(cmd_buf)) > 0) {
154 		while ((s = strsep(&p, ","))) {
155 			if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
156 				if (strstr(s, "base_rev=")) {
157 					/* advance to the option arg */
158 					strsep(&s, "=");
159 					if (kstrtouint(s, 16, &base_rev)) { ; }
160 				}
161 			}
162 
163 			if (!strcmp("force_minrev", s))
164 				force_minrev = true;
165 
166 			if (!strcmp(s, "dis_ucode_ldr"))
167 				dis_ucode_ldr = true;
168 		}
169 	}
170 
171 	/* old, compat option */
172 	if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
173 		dis_ucode_ldr = true;
174 }
175 
176 void __init load_ucode_bsp(void)
177 {
178 	unsigned int cpuid_1_eax;
179 	bool intel = true;
180 
181 	early_parse_cmdline();
182 
183 	if (microcode_loader_disabled())
184 		return;
185 
186 	cpuid_1_eax = native_cpuid_eax(1);
187 
188 	switch (x86_cpuid_vendor()) {
189 	case X86_VENDOR_INTEL:
190 		if (x86_family(cpuid_1_eax) < 6)
191 			return;
192 		break;
193 
194 	case X86_VENDOR_AMD:
195 		if (x86_family(cpuid_1_eax) < 0x10)
196 			return;
197 		intel = false;
198 		break;
199 
200 	default:
201 		return;
202 	}
203 
204 	if (intel)
205 		load_ucode_intel_bsp(&early_data);
206 	else
207 		load_ucode_amd_bsp(&early_data, cpuid_1_eax);
208 }
209 
210 void load_ucode_ap(void)
211 {
212 	unsigned int cpuid_1_eax;
213 
214 	/*
215 	 * Can't use microcode_loader_disabled() here - .init section
216 	 * hell. It doesn't have to either - the BSP variant must've
217 	 * parsed cmdline already anyway.
218 	 */
219 	if (dis_ucode_ldr)
220 		return;
221 
222 	cpuid_1_eax = native_cpuid_eax(1);
223 
224 	switch (x86_cpuid_vendor()) {
225 	case X86_VENDOR_INTEL:
226 		if (x86_family(cpuid_1_eax) >= 6)
227 			load_ucode_intel_ap();
228 		break;
229 	case X86_VENDOR_AMD:
230 		if (x86_family(cpuid_1_eax) >= 0x10)
231 			load_ucode_amd_ap(cpuid_1_eax);
232 		break;
233 	default:
234 		break;
235 	}
236 }
237 
238 struct cpio_data __init find_microcode_in_initrd(const char *path)
239 {
240 #ifdef CONFIG_BLK_DEV_INITRD
241 	unsigned long start = 0;
242 	size_t size;
243 
244 #ifdef CONFIG_X86_32
245 	size = boot_params.hdr.ramdisk_size;
246 	/* Early load on BSP has a temporary mapping. */
247 	if (size)
248 		start = initrd_start_early;
249 
250 #else /* CONFIG_X86_64 */
251 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
252 	size |= boot_params.hdr.ramdisk_size;
253 
254 	if (size) {
255 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
256 		start |= boot_params.hdr.ramdisk_image;
257 		start += PAGE_OFFSET;
258 	}
259 #endif
260 
261 	/*
262 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
263 	 * has the virtual address of the beginning of the initrd. It also
264 	 * possibly relocates the ramdisk. In either case, initrd_start contains
265 	 * the updated address so use that instead.
266 	 */
267 	if (initrd_start)
268 		start = initrd_start;
269 
270 	return find_cpio_data(path, (void *)start, size, NULL);
271 #else /* !CONFIG_BLK_DEV_INITRD */
272 	return (struct cpio_data){ NULL, 0, "" };
273 #endif
274 }
275 
276 static void reload_early_microcode(unsigned int cpu)
277 {
278 	int vendor, family;
279 
280 	vendor = x86_cpuid_vendor();
281 	family = x86_cpuid_family();
282 
283 	switch (vendor) {
284 	case X86_VENDOR_INTEL:
285 		if (family >= 6)
286 			reload_ucode_intel();
287 		break;
288 	case X86_VENDOR_AMD:
289 		if (family >= 0x10)
290 			reload_ucode_amd(cpu);
291 		break;
292 	default:
293 		break;
294 	}
295 }
296 
297 /* fake device for request_firmware */
298 static struct faux_device *microcode_fdev;
299 
300 #ifdef CONFIG_MICROCODE_LATE_LOADING
301 /*
302  * Late loading dance. Why the heavy-handed stomp_machine effort?
303  *
304  * - HT siblings must be idle and not execute other code while the other sibling
305  *   is loading microcode in order to avoid any negative interactions caused by
306  *   the loading.
307  *
308  * - In addition, microcode update on the cores must be serialized until this
309  *   requirement can be relaxed in the future. Right now, this is conservative
310  *   and good.
311  */
312 enum sibling_ctrl {
313 	/* Spinwait with timeout */
314 	SCTRL_WAIT,
315 	/* Invoke the microcode_apply() callback */
316 	SCTRL_APPLY,
317 	/* Proceed without invoking the microcode_apply() callback */
318 	SCTRL_DONE,
319 };
320 
321 struct microcode_ctrl {
322 	enum sibling_ctrl	ctrl;
323 	enum ucode_state	result;
324 	unsigned int		ctrl_cpu;
325 	bool			nmi_enabled;
326 };
327 
328 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
329 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
330 static atomic_t late_cpus_in, offline_in_nmi;
331 static unsigned int loops_per_usec;
332 static cpumask_t cpu_offline_mask;
333 
334 static noinstr bool wait_for_cpus(atomic_t *cnt)
335 {
336 	unsigned int timeout, loops;
337 
338 	WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
339 
340 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
341 		if (!raw_atomic_read(cnt))
342 			return true;
343 
344 		for (loops = 0; loops < loops_per_usec; loops++)
345 			cpu_relax();
346 
347 		/* If invoked directly, tickle the NMI watchdog */
348 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
349 			instrumentation_begin();
350 			touch_nmi_watchdog();
351 			instrumentation_end();
352 		}
353 	}
354 	/* Prevent the late comers from making progress and let them time out */
355 	raw_atomic_inc(cnt);
356 	return false;
357 }
358 
359 static noinstr bool wait_for_ctrl(void)
360 {
361 	unsigned int timeout, loops;
362 
363 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
364 		if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
365 			return true;
366 
367 		for (loops = 0; loops < loops_per_usec; loops++)
368 			cpu_relax();
369 
370 		/* If invoked directly, tickle the NMI watchdog */
371 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
372 			instrumentation_begin();
373 			touch_nmi_watchdog();
374 			instrumentation_end();
375 		}
376 	}
377 	return false;
378 }
379 
380 /*
381  * Protected against instrumentation up to the point where the primary
382  * thread completed the update. See microcode_nmi_handler() for details.
383  */
384 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
385 {
386 	/* Initial rendezvous to ensure that all CPUs have arrived */
387 	if (!wait_for_cpus(&late_cpus_in)) {
388 		raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
389 		return false;
390 	}
391 
392 	/*
393 	 * Wait for primary threads to complete. If one of them hangs due
394 	 * to the update, there is no way out. This is non-recoverable
395 	 * because the CPU might hold locks or resources and confuse the
396 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
397 	 * machine.
398 	 */
399 	if (wait_for_ctrl())
400 		return true;
401 
402 	instrumentation_begin();
403 	panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
404 	instrumentation_end();
405 }
406 
407 /*
408  * Protected against instrumentation up to the point where the primary
409  * thread completed the update. See microcode_nmi_handler() for details.
410  */
411 static noinstr void load_secondary(unsigned int cpu)
412 {
413 	unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
414 	enum ucode_state ret;
415 
416 	if (!load_secondary_wait(ctrl_cpu)) {
417 		instrumentation_begin();
418 		pr_err_once("load: %d CPUs timed out\n",
419 			    atomic_read(&late_cpus_in) - 1);
420 		instrumentation_end();
421 		return;
422 	}
423 
424 	/* Primary thread completed. Allow to invoke instrumentable code */
425 	instrumentation_begin();
426 	/*
427 	 * If the primary succeeded then invoke the apply() callback,
428 	 * otherwise copy the state from the primary thread.
429 	 */
430 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
431 		ret = microcode_ops->apply_microcode(cpu);
432 	else
433 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
434 
435 	this_cpu_write(ucode_ctrl.result, ret);
436 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
437 	instrumentation_end();
438 }
439 
440 static void __load_primary(unsigned int cpu)
441 {
442 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
443 	enum sibling_ctrl ctrl;
444 	enum ucode_state ret;
445 	unsigned int sibling;
446 
447 	/* Initial rendezvous to ensure that all CPUs have arrived */
448 	if (!wait_for_cpus(&late_cpus_in)) {
449 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
450 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
451 		return;
452 	}
453 
454 	ret = microcode_ops->apply_microcode(cpu);
455 	this_cpu_write(ucode_ctrl.result, ret);
456 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
457 
458 	/*
459 	 * If the update was successful, let the siblings run the apply()
460 	 * callback. If not, tell them it's done. This also covers the
461 	 * case where the CPU has uniform loading at package or system
462 	 * scope implemented but does not advertise it.
463 	 */
464 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
465 		ctrl = SCTRL_APPLY;
466 	else
467 		ctrl = SCTRL_DONE;
468 
469 	for_each_cpu(sibling, secondaries) {
470 		if (sibling != cpu)
471 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
472 	}
473 }
474 
475 static bool kick_offline_cpus(unsigned int nr_offl)
476 {
477 	unsigned int cpu, timeout;
478 
479 	for_each_cpu(cpu, &cpu_offline_mask) {
480 		/* Enable the rendezvous handler and send NMI */
481 		per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
482 		apic_send_nmi_to_offline_cpu(cpu);
483 	}
484 
485 	/* Wait for them to arrive */
486 	for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
487 		if (atomic_read(&offline_in_nmi) == nr_offl)
488 			return true;
489 		udelay(1);
490 	}
491 	/* Let the others time out */
492 	return false;
493 }
494 
495 static void release_offline_cpus(void)
496 {
497 	unsigned int cpu;
498 
499 	for_each_cpu(cpu, &cpu_offline_mask)
500 		per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
501 }
502 
503 static void load_primary(unsigned int cpu)
504 {
505 	unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
506 	bool proceed = true;
507 
508 	/* Kick soft-offlined SMT siblings if required */
509 	if (!cpu && nr_offl)
510 		proceed = kick_offline_cpus(nr_offl);
511 
512 	/* If the soft-offlined CPUs did not respond, abort */
513 	if (proceed)
514 		__load_primary(cpu);
515 
516 	/* Unconditionally release soft-offlined SMT siblings if required */
517 	if (!cpu && nr_offl)
518 		release_offline_cpus();
519 }
520 
521 /*
522  * Minimal stub rendezvous handler for soft-offlined CPUs which participate
523  * in the NMI rendezvous to protect against a concurrent NMI on affected
524  * CPUs.
525  */
526 void noinstr microcode_offline_nmi_handler(void)
527 {
528 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
529 		return;
530 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
531 	raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
532 	raw_atomic_inc(&offline_in_nmi);
533 	wait_for_ctrl();
534 }
535 
536 static noinstr bool microcode_update_handler(void)
537 {
538 	unsigned int cpu = raw_smp_processor_id();
539 
540 	if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
541 		instrumentation_begin();
542 		load_primary(cpu);
543 		instrumentation_end();
544 	} else {
545 		load_secondary(cpu);
546 	}
547 
548 	instrumentation_begin();
549 	touch_nmi_watchdog();
550 	instrumentation_end();
551 
552 	return true;
553 }
554 
555 /*
556  * Protection against instrumentation is required for CPUs which are not
557  * safe against an NMI which is delivered to the secondary SMT sibling
558  * while the primary thread updates the microcode. Instrumentation can end
559  * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
560  * which is the opposite of what the NMI rendezvous is trying to achieve.
561  *
562  * The primary thread is safe versus instrumentation as the actual
563  * microcode update handles this correctly. It's only the sibling code
564  * path which must be NMI safe until the primary thread completed the
565  * update.
566  */
567 bool noinstr microcode_nmi_handler(void)
568 {
569 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
570 		return false;
571 
572 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
573 	return microcode_update_handler();
574 }
575 
576 static int load_cpus_stopped(void *unused)
577 {
578 	if (microcode_ops->use_nmi) {
579 		/* Enable the NMI handler and raise NMI */
580 		this_cpu_write(ucode_ctrl.nmi_enabled, true);
581 		apic->send_IPI(smp_processor_id(), NMI_VECTOR);
582 	} else {
583 		/* Just invoke the handler directly */
584 		microcode_update_handler();
585 	}
586 	return 0;
587 }
588 
589 static int load_late_stop_cpus(bool is_safe)
590 {
591 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
592 	unsigned int nr_offl, offline = 0;
593 	int old_rev = boot_cpu_data.microcode;
594 	struct cpuinfo_x86 prev_info;
595 
596 	if (!is_safe) {
597 		pr_err("Late microcode loading without minimal revision check.\n");
598 		pr_err("You should switch to early loading, if possible.\n");
599 	}
600 
601 	/*
602 	 * Pre-load the microcode image into a staging device. This
603 	 * process is preemptible and does not require stopping CPUs.
604 	 * Successful staging simplifies the subsequent late-loading
605 	 * process, reducing rendezvous time.
606 	 *
607 	 * Even if the transfer fails, the update will proceed as usual.
608 	 */
609 	if (microcode_ops->use_staging)
610 		microcode_ops->stage_microcode();
611 
612 	atomic_set(&late_cpus_in, num_online_cpus());
613 	atomic_set(&offline_in_nmi, 0);
614 	loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
615 
616 	/*
617 	 * Take a snapshot before the microcode update in order to compare and
618 	 * check whether any bits changed after an update.
619 	 */
620 	store_cpu_caps(&prev_info);
621 
622 	if (microcode_ops->use_nmi)
623 		static_branch_enable_cpuslocked(&microcode_nmi_handler_enable);
624 
625 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
626 
627 	if (microcode_ops->use_nmi)
628 		static_branch_disable_cpuslocked(&microcode_nmi_handler_enable);
629 
630 	/* Analyze the results */
631 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
632 		switch (per_cpu(ucode_ctrl.result, cpu)) {
633 		case UCODE_UPDATED:	updated++; break;
634 		case UCODE_TIMEOUT:	timedout++; break;
635 		case UCODE_OK:		siblings++; break;
636 		case UCODE_OFFLINE:	offline++; break;
637 		default:		failed++; break;
638 		}
639 	}
640 
641 	if (microcode_ops->finalize_late_load)
642 		microcode_ops->finalize_late_load(!updated);
643 
644 	if (!updated) {
645 		/* Nothing changed. */
646 		if (!failed && !timedout)
647 			return 0;
648 
649 		nr_offl = cpumask_weight(&cpu_offline_mask);
650 		if (offline < nr_offl) {
651 			pr_warn("%u offline siblings did not respond.\n",
652 				nr_offl - atomic_read(&offline_in_nmi));
653 			return -EIO;
654 		}
655 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
656 		       failed, timedout);
657 		return -EIO;
658 	}
659 
660 	if (!is_safe || failed || timedout)
661 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
662 
663 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
664 	if (failed || timedout) {
665 		pr_err("load incomplete. %u CPUs timed out or failed\n",
666 		       num_online_cpus() - (updated + siblings));
667 	}
668 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
669 	microcode_check(&prev_info);
670 
671 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
672 }
673 
674 /*
675  * This function does two things:
676  *
677  * 1) Ensure that all required CPUs which are present and have been booted
678  *    once are online.
679  *
680  *    To pass this check, all primary threads must be online.
681  *
682  *    If the microcode load is not safe against NMI then all SMT threads
683  *    must be online as well because they still react to NMIs when they are
684  *    soft-offlined and parked in one of the play_dead() variants. So if a
685  *    NMI hits while the primary thread updates the microcode the resulting
686  *    behaviour is undefined. The default play_dead() implementation on
687  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
688  *    against a microcode update which affects MWAIT.
689  *
690  *    As soft-offlined CPUs still react on NMIs, the SMT sibling
691  *    restriction can be lifted when the vendor driver signals to use NMI
692  *    for rendezvous and the APIC provides a mechanism to send an NMI to a
693  *    soft-offlined CPU. The soft-offlined CPUs are then able to
694  *    participate in the rendezvous in a trivial stub handler.
695  *
696  * 2) Initialize the per CPU control structure and create a cpumask
697  *    which contains "offline"; secondary threads, so they can be handled
698  *    correctly by a control CPU.
699  */
700 static bool setup_cpus(void)
701 {
702 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
703 	bool allow_smt_offline;
704 	unsigned int cpu;
705 
706 	allow_smt_offline = microcode_ops->nmi_safe ||
707 		(microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
708 
709 	cpumask_clear(&cpu_offline_mask);
710 
711 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
712 		/*
713 		 * Offline CPUs sit in one of the play_dead() functions
714 		 * with interrupts disabled, but they still react on NMIs
715 		 * and execute arbitrary code. Also MWAIT being updated
716 		 * while the offline CPU sits there is not necessarily safe
717 		 * on all CPU variants.
718 		 *
719 		 * Mark them in the offline_cpus mask which will be handled
720 		 * by CPU0 later in the update process.
721 		 *
722 		 * Ensure that the primary thread is online so that it is
723 		 * guaranteed that all cores are updated.
724 		 */
725 		if (!cpu_online(cpu)) {
726 			if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
727 				pr_err("CPU %u not online, loading aborted\n", cpu);
728 				return false;
729 			}
730 			cpumask_set_cpu(cpu, &cpu_offline_mask);
731 			per_cpu(ucode_ctrl, cpu) = ctrl;
732 			continue;
733 		}
734 
735 		/*
736 		 * Initialize the per CPU state. This is core scope for now,
737 		 * but prepared to take package or system scope into account.
738 		 */
739 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
740 		per_cpu(ucode_ctrl, cpu) = ctrl;
741 	}
742 	return true;
743 }
744 
745 static int load_late_locked(void)
746 {
747 	if (!setup_cpus())
748 		return -EBUSY;
749 
750 	switch (microcode_ops->request_microcode_fw(0, &microcode_fdev->dev)) {
751 	case UCODE_NEW:
752 		return load_late_stop_cpus(false);
753 	case UCODE_NEW_SAFE:
754 		return load_late_stop_cpus(true);
755 	case UCODE_NFOUND:
756 		return -ENOENT;
757 	case UCODE_OK:
758 		return 0;
759 	default:
760 		return -EBADFD;
761 	}
762 }
763 
764 static ssize_t reload_store(struct device *dev,
765 			    struct device_attribute *attr,
766 			    const char *buf, size_t size)
767 {
768 	unsigned long val;
769 	ssize_t ret;
770 
771 	ret = kstrtoul(buf, 0, &val);
772 	if (ret || val != 1)
773 		return -EINVAL;
774 
775 	cpus_read_lock();
776 	ret = load_late_locked();
777 	cpus_read_unlock();
778 
779 	return ret ? : size;
780 }
781 
782 static DEVICE_ATTR_WO(reload);
783 #endif
784 
785 static ssize_t version_show(struct device *dev,
786 			struct device_attribute *attr, char *buf)
787 {
788 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
789 
790 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
791 }
792 
793 static ssize_t processor_flags_show(struct device *dev,
794 			struct device_attribute *attr, char *buf)
795 {
796 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
797 
798 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
799 }
800 
801 static DEVICE_ATTR_RO(version);
802 static DEVICE_ATTR_RO(processor_flags);
803 
804 static struct attribute *mc_default_attrs[] = {
805 	&dev_attr_version.attr,
806 	&dev_attr_processor_flags.attr,
807 	NULL
808 };
809 
810 static const struct attribute_group mc_attr_group = {
811 	.attrs			= mc_default_attrs,
812 	.name			= "microcode",
813 };
814 
815 static void microcode_fini_cpu(int cpu)
816 {
817 	if (microcode_ops->microcode_fini_cpu)
818 		microcode_ops->microcode_fini_cpu(cpu);
819 }
820 
821 /**
822  * microcode_bsp_resume - Update boot CPU microcode during resume.
823  */
824 void microcode_bsp_resume(void)
825 {
826 	int cpu = smp_processor_id();
827 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
828 
829 	if (uci->mc)
830 		microcode_ops->apply_microcode(cpu);
831 	else
832 		reload_early_microcode(cpu);
833 }
834 
835 static void microcode_bsp_syscore_resume(void *data)
836 {
837 	microcode_bsp_resume();
838 }
839 
840 static const struct syscore_ops mc_syscore_ops = {
841 	.resume	= microcode_bsp_syscore_resume,
842 };
843 
844 static struct syscore mc_syscore = {
845 	.ops = &mc_syscore_ops,
846 };
847 
848 static int mc_cpu_online(unsigned int cpu)
849 {
850 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
851 	struct device *dev = get_cpu_device(cpu);
852 
853 	memset(uci, 0, sizeof(*uci));
854 
855 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
856 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
857 	if (!cpu)
858 		boot_cpu_data.microcode = uci->cpu_sig.rev;
859 
860 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
861 		pr_err("Failed to create group for CPU%d\n", cpu);
862 	return 0;
863 }
864 
865 static int mc_cpu_down_prep(unsigned int cpu)
866 {
867 	struct device *dev = get_cpu_device(cpu);
868 
869 	microcode_fini_cpu(cpu);
870 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
871 	return 0;
872 }
873 
874 static struct attribute *cpu_root_microcode_attrs[] = {
875 #ifdef CONFIG_MICROCODE_LATE_LOADING
876 	&dev_attr_reload.attr,
877 #endif
878 	NULL
879 };
880 
881 static const struct attribute_group cpu_root_microcode_group = {
882 	.name  = "microcode",
883 	.attrs = cpu_root_microcode_attrs,
884 };
885 
886 static int __init microcode_init(void)
887 {
888 	struct device *dev_root;
889 	struct cpuinfo_x86 *c = &boot_cpu_data;
890 	int error;
891 
892 	if (microcode_loader_disabled())
893 		return -EINVAL;
894 
895 	if (c->x86_vendor == X86_VENDOR_INTEL)
896 		microcode_ops = init_intel_microcode();
897 	else if (c->x86_vendor == X86_VENDOR_AMD)
898 		microcode_ops = init_amd_microcode();
899 	else
900 		pr_err("no support for this CPU vendor\n");
901 
902 	if (!microcode_ops)
903 		return -ENODEV;
904 
905 	pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
906 
907 	if (early_data.new_rev)
908 		pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
909 
910 	microcode_fdev = faux_device_create("microcode", NULL, NULL);
911 	if (!microcode_fdev)
912 		return -ENODEV;
913 
914 	dev_root = bus_get_dev_root(&cpu_subsys);
915 	if (dev_root) {
916 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
917 		put_device(dev_root);
918 		if (error) {
919 			pr_err("Error creating microcode group!\n");
920 			goto out_pdev;
921 		}
922 	}
923 
924 	register_syscore(&mc_syscore);
925 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
926 			  mc_cpu_online, mc_cpu_down_prep);
927 
928 	return 0;
929 
930  out_pdev:
931 	faux_device_destroy(microcode_fdev);
932 	return error;
933 
934 }
935 late_initcall(microcode_init);
936