xref: /linux/arch/x86/kernel/cpu/microcode/core.c (revision b615879dbfea6cf1236acbc3f2fb25ae84e07071)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
6  *	      2006	Shaohua Li <shaohua.li@intel.com>
7  *	      2013-2016	Borislav Petkov <bp@alien8.de>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
12  *			   H Peter Anvin" <hpa@zytor.com>
13  *		  (C) 2015 Borislav Petkov <bp@alien8.de>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/stop_machine.h>
21 #include <linux/device/faux.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
32 #include <linux/fs.h>
33 #include <linux/mm.h>
34 
35 #include <asm/apic.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/msr.h>
41 #include <asm/setup.h>
42 
43 #include "internal.h"
44 
45 static struct microcode_ops *microcode_ops;
46 static bool dis_ucode_ldr;
47 
48 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
49 
50 /*
51  * Those below should be behind CONFIG_MICROCODE_DBG ifdeffery but in
52  * order to not uglify the code with ifdeffery and use IS_ENABLED()
53  * instead, leave them in. When microcode debugging is not enabled,
54  * those are meaningless anyway.
55  */
56 /* base microcode revision for debugging */
57 u32 base_rev;
58 u32 microcode_rev[NR_CPUS] = {};
59 
60 /*
61  * Synchronization.
62  *
63  * All non cpu-hotplug-callback call sites use:
64  *
65  * - cpus_read_lock/unlock() to synchronize with
66  *   the cpu-hotplug-callback call sites.
67  *
68  * We guarantee that only a single cpu is being
69  * updated at any particular moment of time.
70  */
71 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
72 
73 /*
74  * Those patch levels cannot be updated to newer ones and thus should be final.
75  */
76 static u32 final_levels[] = {
77 	0x01000098,
78 	0x0100009f,
79 	0x010000af,
80 	0, /* T-101 terminator */
81 };
82 
83 struct early_load_data early_data;
84 
85 /*
86  * Check the current patch level on this CPU.
87  *
88  * Returns:
89  *  - true: if update should stop
90  *  - false: otherwise
91  */
92 static bool amd_check_current_patch_level(void)
93 {
94 	u32 lvl, dummy, i;
95 	u32 *levels;
96 
97 	if (x86_cpuid_vendor() != X86_VENDOR_AMD)
98 		return false;
99 
100 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
101 
102 	levels = final_levels;
103 
104 	for (i = 0; levels[i]; i++) {
105 		if (lvl == levels[i])
106 			return true;
107 	}
108 	return false;
109 }
110 
111 bool __init microcode_loader_disabled(void)
112 {
113 	if (dis_ucode_ldr)
114 		return true;
115 
116 	/*
117 	 * Disable when:
118 	 *
119 	 * 1) The CPU does not support CPUID.
120 	 *
121 	 * 2) Bit 31 in CPUID[1]:ECX is clear
122 	 *    The bit is reserved for hypervisor use. This is still not
123 	 *    completely accurate as XEN PV guests don't see that CPUID bit
124 	 *    set, but that's good enough as they don't land on the BSP
125 	 *    path anyway.
126 	 *
127 	 * 3) Certain AMD patch levels are not allowed to be
128 	 *    overwritten.
129 	 */
130 	if (!cpuid_feature() ||
131 	    ((native_cpuid_ecx(1) & BIT(31)) &&
132 	      !IS_ENABLED(CONFIG_MICROCODE_DBG)) ||
133 	    amd_check_current_patch_level())
134 		dis_ucode_ldr = true;
135 
136 	return dis_ucode_ldr;
137 }
138 
139 static void early_parse_cmdline(void)
140 {
141 	char cmd_buf[64] = {};
142 	char *s, *p = cmd_buf;
143 
144 	if (cmdline_find_option(boot_command_line, "microcode", cmd_buf, sizeof(cmd_buf)) > 0) {
145 		while ((s = strsep(&p, ","))) {
146 			if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
147 				if (strstr(s, "base_rev=")) {
148 					/* advance to the option arg */
149 					strsep(&s, "=");
150 					if (kstrtouint(s, 16, &base_rev)) { ; }
151 				}
152 			}
153 
154 			if (!strcmp("force_minrev", s))
155 				force_minrev = true;
156 
157 			if (!strcmp(s, "dis_ucode_ldr"))
158 				dis_ucode_ldr = true;
159 		}
160 	}
161 
162 	/* old, compat option */
163 	if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
164 		dis_ucode_ldr = true;
165 }
166 
167 void __init load_ucode_bsp(void)
168 {
169 	unsigned int cpuid_1_eax;
170 	bool intel = true;
171 
172 	early_parse_cmdline();
173 
174 	if (microcode_loader_disabled())
175 		return;
176 
177 	cpuid_1_eax = native_cpuid_eax(1);
178 
179 	switch (x86_cpuid_vendor()) {
180 	case X86_VENDOR_INTEL:
181 		if (x86_family(cpuid_1_eax) < 6)
182 			return;
183 		break;
184 
185 	case X86_VENDOR_AMD:
186 		if (x86_family(cpuid_1_eax) < 0x10)
187 			return;
188 		intel = false;
189 		break;
190 
191 	default:
192 		return;
193 	}
194 
195 	if (intel)
196 		load_ucode_intel_bsp(&early_data);
197 	else
198 		load_ucode_amd_bsp(&early_data, cpuid_1_eax);
199 }
200 
201 void load_ucode_ap(void)
202 {
203 	unsigned int cpuid_1_eax;
204 
205 	/*
206 	 * Can't use microcode_loader_disabled() here - .init section
207 	 * hell. It doesn't have to either - the BSP variant must've
208 	 * parsed cmdline already anyway.
209 	 */
210 	if (dis_ucode_ldr)
211 		return;
212 
213 	cpuid_1_eax = native_cpuid_eax(1);
214 
215 	switch (x86_cpuid_vendor()) {
216 	case X86_VENDOR_INTEL:
217 		if (x86_family(cpuid_1_eax) >= 6)
218 			load_ucode_intel_ap();
219 		break;
220 	case X86_VENDOR_AMD:
221 		if (x86_family(cpuid_1_eax) >= 0x10)
222 			load_ucode_amd_ap(cpuid_1_eax);
223 		break;
224 	default:
225 		break;
226 	}
227 }
228 
229 struct cpio_data __init find_microcode_in_initrd(const char *path)
230 {
231 #ifdef CONFIG_BLK_DEV_INITRD
232 	unsigned long start = 0;
233 	size_t size;
234 
235 #ifdef CONFIG_X86_32
236 	size = boot_params.hdr.ramdisk_size;
237 	/* Early load on BSP has a temporary mapping. */
238 	if (size)
239 		start = initrd_start_early;
240 
241 #else /* CONFIG_X86_64 */
242 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
243 	size |= boot_params.hdr.ramdisk_size;
244 
245 	if (size) {
246 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
247 		start |= boot_params.hdr.ramdisk_image;
248 		start += PAGE_OFFSET;
249 	}
250 #endif
251 
252 	/*
253 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
254 	 * has the virtual address of the beginning of the initrd. It also
255 	 * possibly relocates the ramdisk. In either case, initrd_start contains
256 	 * the updated address so use that instead.
257 	 */
258 	if (initrd_start)
259 		start = initrd_start;
260 
261 	return find_cpio_data(path, (void *)start, size, NULL);
262 #else /* !CONFIG_BLK_DEV_INITRD */
263 	return (struct cpio_data){ NULL, 0, "" };
264 #endif
265 }
266 
267 static void reload_early_microcode(unsigned int cpu)
268 {
269 	int vendor, family;
270 
271 	vendor = x86_cpuid_vendor();
272 	family = x86_cpuid_family();
273 
274 	switch (vendor) {
275 	case X86_VENDOR_INTEL:
276 		if (family >= 6)
277 			reload_ucode_intel();
278 		break;
279 	case X86_VENDOR_AMD:
280 		if (family >= 0x10)
281 			reload_ucode_amd(cpu);
282 		break;
283 	default:
284 		break;
285 	}
286 }
287 
288 /* fake device for request_firmware */
289 static struct faux_device *microcode_fdev;
290 
291 #ifdef CONFIG_MICROCODE_LATE_LOADING
292 /*
293  * Late loading dance. Why the heavy-handed stomp_machine effort?
294  *
295  * - HT siblings must be idle and not execute other code while the other sibling
296  *   is loading microcode in order to avoid any negative interactions caused by
297  *   the loading.
298  *
299  * - In addition, microcode update on the cores must be serialized until this
300  *   requirement can be relaxed in the future. Right now, this is conservative
301  *   and good.
302  */
303 enum sibling_ctrl {
304 	/* Spinwait with timeout */
305 	SCTRL_WAIT,
306 	/* Invoke the microcode_apply() callback */
307 	SCTRL_APPLY,
308 	/* Proceed without invoking the microcode_apply() callback */
309 	SCTRL_DONE,
310 };
311 
312 struct microcode_ctrl {
313 	enum sibling_ctrl	ctrl;
314 	enum ucode_state	result;
315 	unsigned int		ctrl_cpu;
316 	bool			nmi_enabled;
317 };
318 
319 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
320 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
321 static atomic_t late_cpus_in, offline_in_nmi;
322 static unsigned int loops_per_usec;
323 static cpumask_t cpu_offline_mask;
324 
325 static noinstr bool wait_for_cpus(atomic_t *cnt)
326 {
327 	unsigned int timeout, loops;
328 
329 	WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
330 
331 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
332 		if (!raw_atomic_read(cnt))
333 			return true;
334 
335 		for (loops = 0; loops < loops_per_usec; loops++)
336 			cpu_relax();
337 
338 		/* If invoked directly, tickle the NMI watchdog */
339 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
340 			instrumentation_begin();
341 			touch_nmi_watchdog();
342 			instrumentation_end();
343 		}
344 	}
345 	/* Prevent the late comers from making progress and let them time out */
346 	raw_atomic_inc(cnt);
347 	return false;
348 }
349 
350 static noinstr bool wait_for_ctrl(void)
351 {
352 	unsigned int timeout, loops;
353 
354 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
355 		if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
356 			return true;
357 
358 		for (loops = 0; loops < loops_per_usec; loops++)
359 			cpu_relax();
360 
361 		/* If invoked directly, tickle the NMI watchdog */
362 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
363 			instrumentation_begin();
364 			touch_nmi_watchdog();
365 			instrumentation_end();
366 		}
367 	}
368 	return false;
369 }
370 
371 /*
372  * Protected against instrumentation up to the point where the primary
373  * thread completed the update. See microcode_nmi_handler() for details.
374  */
375 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
376 {
377 	/* Initial rendezvous to ensure that all CPUs have arrived */
378 	if (!wait_for_cpus(&late_cpus_in)) {
379 		raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
380 		return false;
381 	}
382 
383 	/*
384 	 * Wait for primary threads to complete. If one of them hangs due
385 	 * to the update, there is no way out. This is non-recoverable
386 	 * because the CPU might hold locks or resources and confuse the
387 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
388 	 * machine.
389 	 */
390 	if (wait_for_ctrl())
391 		return true;
392 
393 	instrumentation_begin();
394 	panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
395 	instrumentation_end();
396 }
397 
398 /*
399  * Protected against instrumentation up to the point where the primary
400  * thread completed the update. See microcode_nmi_handler() for details.
401  */
402 static noinstr void load_secondary(unsigned int cpu)
403 {
404 	unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
405 	enum ucode_state ret;
406 
407 	if (!load_secondary_wait(ctrl_cpu)) {
408 		instrumentation_begin();
409 		pr_err_once("load: %d CPUs timed out\n",
410 			    atomic_read(&late_cpus_in) - 1);
411 		instrumentation_end();
412 		return;
413 	}
414 
415 	/* Primary thread completed. Allow to invoke instrumentable code */
416 	instrumentation_begin();
417 	/*
418 	 * If the primary succeeded then invoke the apply() callback,
419 	 * otherwise copy the state from the primary thread.
420 	 */
421 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
422 		ret = microcode_ops->apply_microcode(cpu);
423 	else
424 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
425 
426 	this_cpu_write(ucode_ctrl.result, ret);
427 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
428 	instrumentation_end();
429 }
430 
431 static void __load_primary(unsigned int cpu)
432 {
433 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
434 	enum sibling_ctrl ctrl;
435 	enum ucode_state ret;
436 	unsigned int sibling;
437 
438 	/* Initial rendezvous to ensure that all CPUs have arrived */
439 	if (!wait_for_cpus(&late_cpus_in)) {
440 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
441 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
442 		return;
443 	}
444 
445 	ret = microcode_ops->apply_microcode(cpu);
446 	this_cpu_write(ucode_ctrl.result, ret);
447 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
448 
449 	/*
450 	 * If the update was successful, let the siblings run the apply()
451 	 * callback. If not, tell them it's done. This also covers the
452 	 * case where the CPU has uniform loading at package or system
453 	 * scope implemented but does not advertise it.
454 	 */
455 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
456 		ctrl = SCTRL_APPLY;
457 	else
458 		ctrl = SCTRL_DONE;
459 
460 	for_each_cpu(sibling, secondaries) {
461 		if (sibling != cpu)
462 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
463 	}
464 }
465 
466 static bool kick_offline_cpus(unsigned int nr_offl)
467 {
468 	unsigned int cpu, timeout;
469 
470 	for_each_cpu(cpu, &cpu_offline_mask) {
471 		/* Enable the rendezvous handler and send NMI */
472 		per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
473 		apic_send_nmi_to_offline_cpu(cpu);
474 	}
475 
476 	/* Wait for them to arrive */
477 	for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
478 		if (atomic_read(&offline_in_nmi) == nr_offl)
479 			return true;
480 		udelay(1);
481 	}
482 	/* Let the others time out */
483 	return false;
484 }
485 
486 static void release_offline_cpus(void)
487 {
488 	unsigned int cpu;
489 
490 	for_each_cpu(cpu, &cpu_offline_mask)
491 		per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
492 }
493 
494 static void load_primary(unsigned int cpu)
495 {
496 	unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
497 	bool proceed = true;
498 
499 	/* Kick soft-offlined SMT siblings if required */
500 	if (!cpu && nr_offl)
501 		proceed = kick_offline_cpus(nr_offl);
502 
503 	/* If the soft-offlined CPUs did not respond, abort */
504 	if (proceed)
505 		__load_primary(cpu);
506 
507 	/* Unconditionally release soft-offlined SMT siblings if required */
508 	if (!cpu && nr_offl)
509 		release_offline_cpus();
510 }
511 
512 /*
513  * Minimal stub rendezvous handler for soft-offlined CPUs which participate
514  * in the NMI rendezvous to protect against a concurrent NMI on affected
515  * CPUs.
516  */
517 void noinstr microcode_offline_nmi_handler(void)
518 {
519 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
520 		return;
521 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
522 	raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
523 	raw_atomic_inc(&offline_in_nmi);
524 	wait_for_ctrl();
525 }
526 
527 static noinstr bool microcode_update_handler(void)
528 {
529 	unsigned int cpu = raw_smp_processor_id();
530 
531 	if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
532 		instrumentation_begin();
533 		load_primary(cpu);
534 		instrumentation_end();
535 	} else {
536 		load_secondary(cpu);
537 	}
538 
539 	instrumentation_begin();
540 	touch_nmi_watchdog();
541 	instrumentation_end();
542 
543 	return true;
544 }
545 
546 /*
547  * Protection against instrumentation is required for CPUs which are not
548  * safe against an NMI which is delivered to the secondary SMT sibling
549  * while the primary thread updates the microcode. Instrumentation can end
550  * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
551  * which is the opposite of what the NMI rendezvous is trying to achieve.
552  *
553  * The primary thread is safe versus instrumentation as the actual
554  * microcode update handles this correctly. It's only the sibling code
555  * path which must be NMI safe until the primary thread completed the
556  * update.
557  */
558 bool noinstr microcode_nmi_handler(void)
559 {
560 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
561 		return false;
562 
563 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
564 	return microcode_update_handler();
565 }
566 
567 static int load_cpus_stopped(void *unused)
568 {
569 	if (microcode_ops->use_nmi) {
570 		/* Enable the NMI handler and raise NMI */
571 		this_cpu_write(ucode_ctrl.nmi_enabled, true);
572 		apic->send_IPI(smp_processor_id(), NMI_VECTOR);
573 	} else {
574 		/* Just invoke the handler directly */
575 		microcode_update_handler();
576 	}
577 	return 0;
578 }
579 
580 static int load_late_stop_cpus(bool is_safe)
581 {
582 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
583 	unsigned int nr_offl, offline = 0;
584 	int old_rev = boot_cpu_data.microcode;
585 	struct cpuinfo_x86 prev_info;
586 
587 	if (!is_safe) {
588 		pr_err("Late microcode loading without minimal revision check.\n");
589 		pr_err("You should switch to early loading, if possible.\n");
590 	}
591 
592 	atomic_set(&late_cpus_in, num_online_cpus());
593 	atomic_set(&offline_in_nmi, 0);
594 	loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
595 
596 	/*
597 	 * Take a snapshot before the microcode update in order to compare and
598 	 * check whether any bits changed after an update.
599 	 */
600 	store_cpu_caps(&prev_info);
601 
602 	if (microcode_ops->use_nmi)
603 		static_branch_enable_cpuslocked(&microcode_nmi_handler_enable);
604 
605 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
606 
607 	if (microcode_ops->use_nmi)
608 		static_branch_disable_cpuslocked(&microcode_nmi_handler_enable);
609 
610 	/* Analyze the results */
611 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
612 		switch (per_cpu(ucode_ctrl.result, cpu)) {
613 		case UCODE_UPDATED:	updated++; break;
614 		case UCODE_TIMEOUT:	timedout++; break;
615 		case UCODE_OK:		siblings++; break;
616 		case UCODE_OFFLINE:	offline++; break;
617 		default:		failed++; break;
618 		}
619 	}
620 
621 	if (microcode_ops->finalize_late_load)
622 		microcode_ops->finalize_late_load(!updated);
623 
624 	if (!updated) {
625 		/* Nothing changed. */
626 		if (!failed && !timedout)
627 			return 0;
628 
629 		nr_offl = cpumask_weight(&cpu_offline_mask);
630 		if (offline < nr_offl) {
631 			pr_warn("%u offline siblings did not respond.\n",
632 				nr_offl - atomic_read(&offline_in_nmi));
633 			return -EIO;
634 		}
635 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
636 		       failed, timedout);
637 		return -EIO;
638 	}
639 
640 	if (!is_safe || failed || timedout)
641 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
642 
643 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
644 	if (failed || timedout) {
645 		pr_err("load incomplete. %u CPUs timed out or failed\n",
646 		       num_online_cpus() - (updated + siblings));
647 	}
648 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
649 	microcode_check(&prev_info);
650 
651 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
652 }
653 
654 /*
655  * This function does two things:
656  *
657  * 1) Ensure that all required CPUs which are present and have been booted
658  *    once are online.
659  *
660  *    To pass this check, all primary threads must be online.
661  *
662  *    If the microcode load is not safe against NMI then all SMT threads
663  *    must be online as well because they still react to NMIs when they are
664  *    soft-offlined and parked in one of the play_dead() variants. So if a
665  *    NMI hits while the primary thread updates the microcode the resulting
666  *    behaviour is undefined. The default play_dead() implementation on
667  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
668  *    against a microcode update which affects MWAIT.
669  *
670  *    As soft-offlined CPUs still react on NMIs, the SMT sibling
671  *    restriction can be lifted when the vendor driver signals to use NMI
672  *    for rendezvous and the APIC provides a mechanism to send an NMI to a
673  *    soft-offlined CPU. The soft-offlined CPUs are then able to
674  *    participate in the rendezvous in a trivial stub handler.
675  *
676  * 2) Initialize the per CPU control structure and create a cpumask
677  *    which contains "offline"; secondary threads, so they can be handled
678  *    correctly by a control CPU.
679  */
680 static bool setup_cpus(void)
681 {
682 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
683 	bool allow_smt_offline;
684 	unsigned int cpu;
685 
686 	allow_smt_offline = microcode_ops->nmi_safe ||
687 		(microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
688 
689 	cpumask_clear(&cpu_offline_mask);
690 
691 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
692 		/*
693 		 * Offline CPUs sit in one of the play_dead() functions
694 		 * with interrupts disabled, but they still react on NMIs
695 		 * and execute arbitrary code. Also MWAIT being updated
696 		 * while the offline CPU sits there is not necessarily safe
697 		 * on all CPU variants.
698 		 *
699 		 * Mark them in the offline_cpus mask which will be handled
700 		 * by CPU0 later in the update process.
701 		 *
702 		 * Ensure that the primary thread is online so that it is
703 		 * guaranteed that all cores are updated.
704 		 */
705 		if (!cpu_online(cpu)) {
706 			if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
707 				pr_err("CPU %u not online, loading aborted\n", cpu);
708 				return false;
709 			}
710 			cpumask_set_cpu(cpu, &cpu_offline_mask);
711 			per_cpu(ucode_ctrl, cpu) = ctrl;
712 			continue;
713 		}
714 
715 		/*
716 		 * Initialize the per CPU state. This is core scope for now,
717 		 * but prepared to take package or system scope into account.
718 		 */
719 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
720 		per_cpu(ucode_ctrl, cpu) = ctrl;
721 	}
722 	return true;
723 }
724 
725 static int load_late_locked(void)
726 {
727 	if (!setup_cpus())
728 		return -EBUSY;
729 
730 	switch (microcode_ops->request_microcode_fw(0, &microcode_fdev->dev)) {
731 	case UCODE_NEW:
732 		return load_late_stop_cpus(false);
733 	case UCODE_NEW_SAFE:
734 		return load_late_stop_cpus(true);
735 	case UCODE_NFOUND:
736 		return -ENOENT;
737 	case UCODE_OK:
738 		return 0;
739 	default:
740 		return -EBADFD;
741 	}
742 }
743 
744 static ssize_t reload_store(struct device *dev,
745 			    struct device_attribute *attr,
746 			    const char *buf, size_t size)
747 {
748 	unsigned long val;
749 	ssize_t ret;
750 
751 	ret = kstrtoul(buf, 0, &val);
752 	if (ret || val != 1)
753 		return -EINVAL;
754 
755 	cpus_read_lock();
756 	ret = load_late_locked();
757 	cpus_read_unlock();
758 
759 	return ret ? : size;
760 }
761 
762 static DEVICE_ATTR_WO(reload);
763 #endif
764 
765 static ssize_t version_show(struct device *dev,
766 			struct device_attribute *attr, char *buf)
767 {
768 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
769 
770 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
771 }
772 
773 static ssize_t processor_flags_show(struct device *dev,
774 			struct device_attribute *attr, char *buf)
775 {
776 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
777 
778 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
779 }
780 
781 static DEVICE_ATTR_RO(version);
782 static DEVICE_ATTR_RO(processor_flags);
783 
784 static struct attribute *mc_default_attrs[] = {
785 	&dev_attr_version.attr,
786 	&dev_attr_processor_flags.attr,
787 	NULL
788 };
789 
790 static const struct attribute_group mc_attr_group = {
791 	.attrs			= mc_default_attrs,
792 	.name			= "microcode",
793 };
794 
795 static void microcode_fini_cpu(int cpu)
796 {
797 	if (microcode_ops->microcode_fini_cpu)
798 		microcode_ops->microcode_fini_cpu(cpu);
799 }
800 
801 /**
802  * microcode_bsp_resume - Update boot CPU microcode during resume.
803  */
804 void microcode_bsp_resume(void)
805 {
806 	int cpu = smp_processor_id();
807 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
808 
809 	if (uci->mc)
810 		microcode_ops->apply_microcode(cpu);
811 	else
812 		reload_early_microcode(cpu);
813 }
814 
815 static struct syscore_ops mc_syscore_ops = {
816 	.resume	= microcode_bsp_resume,
817 };
818 
819 static int mc_cpu_online(unsigned int cpu)
820 {
821 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
822 	struct device *dev = get_cpu_device(cpu);
823 
824 	memset(uci, 0, sizeof(*uci));
825 
826 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
827 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
828 	if (!cpu)
829 		boot_cpu_data.microcode = uci->cpu_sig.rev;
830 
831 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
832 		pr_err("Failed to create group for CPU%d\n", cpu);
833 	return 0;
834 }
835 
836 static int mc_cpu_down_prep(unsigned int cpu)
837 {
838 	struct device *dev = get_cpu_device(cpu);
839 
840 	microcode_fini_cpu(cpu);
841 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
842 	return 0;
843 }
844 
845 static struct attribute *cpu_root_microcode_attrs[] = {
846 #ifdef CONFIG_MICROCODE_LATE_LOADING
847 	&dev_attr_reload.attr,
848 #endif
849 	NULL
850 };
851 
852 static const struct attribute_group cpu_root_microcode_group = {
853 	.name  = "microcode",
854 	.attrs = cpu_root_microcode_attrs,
855 };
856 
857 static int __init microcode_init(void)
858 {
859 	struct device *dev_root;
860 	struct cpuinfo_x86 *c = &boot_cpu_data;
861 	int error;
862 
863 	if (microcode_loader_disabled())
864 		return -EINVAL;
865 
866 	if (c->x86_vendor == X86_VENDOR_INTEL)
867 		microcode_ops = init_intel_microcode();
868 	else if (c->x86_vendor == X86_VENDOR_AMD)
869 		microcode_ops = init_amd_microcode();
870 	else
871 		pr_err("no support for this CPU vendor\n");
872 
873 	if (!microcode_ops)
874 		return -ENODEV;
875 
876 	pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
877 
878 	if (early_data.new_rev)
879 		pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
880 
881 	microcode_fdev = faux_device_create("microcode", NULL, NULL);
882 	if (!microcode_fdev)
883 		return -ENODEV;
884 
885 	dev_root = bus_get_dev_root(&cpu_subsys);
886 	if (dev_root) {
887 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
888 		put_device(dev_root);
889 		if (error) {
890 			pr_err("Error creating microcode group!\n");
891 			goto out_pdev;
892 		}
893 	}
894 
895 	register_syscore_ops(&mc_syscore_ops);
896 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
897 			  mc_cpu_online, mc_cpu_down_prep);
898 
899 	return 0;
900 
901  out_pdev:
902 	faux_device_destroy(microcode_fdev);
903 	return error;
904 
905 }
906 late_initcall(microcode_init);
907