xref: /linux/arch/x86/kernel/cpu/microcode/core.c (revision 97cc7dc16aaee163e15173009c063fc9cd42b5ff)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
6  *	      2006	Shaohua Li <shaohua.li@intel.com>
7  *	      2013-2016	Borislav Petkov <bp@alien8.de>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
12  *			   H Peter Anvin" <hpa@zytor.com>
13  *		  (C) 2015 Borislav Petkov <bp@alien8.de>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/stop_machine.h>
21 #include <linux/device/faux.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
32 #include <linux/fs.h>
33 #include <linux/mm.h>
34 
35 #include <asm/apic.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/cpuid/api.h>
38 #include <asm/perf_event.h>
39 #include <asm/processor.h>
40 #include <asm/cmdline.h>
41 #include <asm/msr.h>
42 #include <asm/setup.h>
43 
44 #include "internal.h"
45 
46 static struct microcode_ops *microcode_ops;
47 static bool dis_ucode_ldr;
48 
49 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
50 
51 /*
52  * Those below should be behind CONFIG_MICROCODE_DBG ifdeffery but in
53  * order to not uglify the code with ifdeffery and use IS_ENABLED()
54  * instead, leave them in. When microcode debugging is not enabled,
55  * those are meaningless anyway.
56  */
57 /* base microcode revision for debugging */
58 u32 base_rev;
59 u32 microcode_rev[NR_CPUS] = {};
60 
61 bool __ro_after_init x86_hypervisor_present;
62 
63 /*
64  * Synchronization.
65  *
66  * All non cpu-hotplug-callback call sites use:
67  *
68  * - cpus_read_lock/unlock() to synchronize with
69  *   the cpu-hotplug-callback call sites.
70  *
71  * We guarantee that only a single cpu is being
72  * updated at any particular moment of time.
73  */
74 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
75 
76 /*
77  * Those patch levels cannot be updated to newer ones and thus should be final.
78  */
79 static u32 final_levels[] = {
80 	0x01000098,
81 	0x0100009f,
82 	0x010000af,
83 	0, /* T-101 terminator */
84 };
85 
86 struct early_load_data early_data;
87 
88 /*
89  * Check the current patch level on this CPU.
90  *
91  * Returns:
92  *  - true: if update should stop
93  *  - false: otherwise
94  */
95 static bool amd_check_current_patch_level(void)
96 {
97 	u32 lvl, dummy, i;
98 	u32 *levels;
99 
100 	if (x86_cpuid_vendor() != X86_VENDOR_AMD)
101 		return false;
102 
103 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
104 
105 	levels = final_levels;
106 
107 	for (i = 0; levels[i]; i++) {
108 		if (lvl == levels[i])
109 			return true;
110 	}
111 	return false;
112 }
113 
114 bool __init microcode_loader_disabled(void)
115 {
116 	if (dis_ucode_ldr)
117 		return true;
118 
119 	/*
120 	 * Disable when:
121 	 *
122 	 * 1) The CPU does not support CPUID, detected below in
123 	 *    load_ucode_bsp().
124 	 *
125 	 * 2) Bit 31 in CPUID[1]:ECX is set
126 	 *    The bit is reserved for hypervisor use. This is still not
127 	 *    completely accurate as XEN PV guests don't see that CPUID bit
128 	 *    set, but that's good enough as they don't land on the BSP
129 	 *    path anyway.
130 	 *
131 	 * 3) Certain AMD patch levels are not allowed to be
132 	 *    overwritten.
133 	 */
134 	if ((x86_hypervisor_present && !IS_ENABLED(CONFIG_MICROCODE_DBG)) ||
135 	    amd_check_current_patch_level())
136 		dis_ucode_ldr = true;
137 
138 	return dis_ucode_ldr;
139 }
140 
141 static void __init early_parse_cmdline(void)
142 {
143 	char cmd_buf[64] = {};
144 	char *s, *p = cmd_buf;
145 
146 	if (cmdline_find_option(boot_command_line, "microcode", cmd_buf, sizeof(cmd_buf)) > 0) {
147 		while ((s = strsep(&p, ","))) {
148 			if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
149 				if (strstr(s, "base_rev=")) {
150 					/* advance to the option arg */
151 					strsep(&s, "=");
152 					if (kstrtouint(s, 16, &base_rev)) { ; }
153 				}
154 			}
155 
156 			if (!strcmp("force_minrev", s))
157 				force_minrev = true;
158 
159 			if (!strcmp(s, "dis_ucode_ldr"))
160 				dis_ucode_ldr = true;
161 		}
162 	}
163 
164 	/* old, compat option */
165 	if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
166 		dis_ucode_ldr = true;
167 }
168 
169 void __init load_ucode_bsp(void)
170 {
171 	unsigned int cpuid_1_eax;
172 	bool intel = true;
173 
174 	early_parse_cmdline();
175 
176 	if (!cpuid_feature())
177 		dis_ucode_ldr = true;
178 	else
179 		x86_hypervisor_present = native_cpuid_ecx(1) & BIT(31);
180 
181 	if (microcode_loader_disabled())
182 		return;
183 
184 	cpuid_1_eax = native_cpuid_eax(1);
185 
186 	switch (x86_cpuid_vendor()) {
187 	case X86_VENDOR_INTEL:
188 		if (x86_family(cpuid_1_eax) < 6)
189 			return;
190 		break;
191 
192 	case X86_VENDOR_AMD:
193 		if (x86_family(cpuid_1_eax) < 0x10)
194 			return;
195 		intel = false;
196 		break;
197 
198 	default:
199 		return;
200 	}
201 
202 	if (intel)
203 		load_ucode_intel_bsp(&early_data);
204 	else
205 		load_ucode_amd_bsp(&early_data, cpuid_1_eax);
206 }
207 
208 void load_ucode_ap(void)
209 {
210 	unsigned int cpuid_1_eax;
211 
212 	/*
213 	 * Can't use microcode_loader_disabled() here - .init section
214 	 * hell. It doesn't have to either - the BSP variant must've
215 	 * parsed cmdline already anyway.
216 	 */
217 	if (dis_ucode_ldr)
218 		return;
219 
220 	cpuid_1_eax = native_cpuid_eax(1);
221 
222 	switch (x86_cpuid_vendor()) {
223 	case X86_VENDOR_INTEL:
224 		if (x86_family(cpuid_1_eax) >= 6)
225 			load_ucode_intel_ap();
226 		break;
227 	case X86_VENDOR_AMD:
228 		if (x86_family(cpuid_1_eax) >= 0x10)
229 			load_ucode_amd_ap(cpuid_1_eax);
230 		break;
231 	default:
232 		break;
233 	}
234 }
235 
236 struct cpio_data __init find_microcode_in_initrd(const char *path)
237 {
238 #ifdef CONFIG_BLK_DEV_INITRD
239 	unsigned long start = 0;
240 	size_t size;
241 
242 #ifdef CONFIG_X86_32
243 	size = boot_params.hdr.ramdisk_size;
244 	/* Early load on BSP has a temporary mapping. */
245 	if (size)
246 		start = initrd_start_early;
247 
248 #else /* CONFIG_X86_64 */
249 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
250 	size |= boot_params.hdr.ramdisk_size;
251 
252 	if (size) {
253 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
254 		start |= boot_params.hdr.ramdisk_image;
255 		start += PAGE_OFFSET;
256 	}
257 #endif
258 
259 	/*
260 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
261 	 * has the virtual address of the beginning of the initrd. It also
262 	 * possibly relocates the ramdisk. In either case, initrd_start contains
263 	 * the updated address so use that instead.
264 	 */
265 	if (initrd_start)
266 		start = initrd_start;
267 
268 	return find_cpio_data(path, (void *)start, size, NULL);
269 #else /* !CONFIG_BLK_DEV_INITRD */
270 	return (struct cpio_data){ NULL, 0, "" };
271 #endif
272 }
273 
274 static void reload_early_microcode(unsigned int cpu)
275 {
276 	int vendor, family;
277 
278 	vendor = x86_cpuid_vendor();
279 	family = x86_cpuid_family();
280 
281 	switch (vendor) {
282 	case X86_VENDOR_INTEL:
283 		if (family >= 6)
284 			reload_ucode_intel();
285 		break;
286 	case X86_VENDOR_AMD:
287 		if (family >= 0x10)
288 			reload_ucode_amd(cpu);
289 		break;
290 	default:
291 		break;
292 	}
293 }
294 
295 /* fake device for request_firmware */
296 static struct faux_device *microcode_fdev;
297 
298 #ifdef CONFIG_MICROCODE_LATE_LOADING
299 /*
300  * Late loading dance. Why the heavy-handed stomp_machine effort?
301  *
302  * - HT siblings must be idle and not execute other code while the other sibling
303  *   is loading microcode in order to avoid any negative interactions caused by
304  *   the loading.
305  *
306  * - In addition, microcode update on the cores must be serialized until this
307  *   requirement can be relaxed in the future. Right now, this is conservative
308  *   and good.
309  */
310 enum sibling_ctrl {
311 	/* Spinwait with timeout */
312 	SCTRL_WAIT,
313 	/* Invoke the microcode_apply() callback */
314 	SCTRL_APPLY,
315 	/* Proceed without invoking the microcode_apply() callback */
316 	SCTRL_DONE,
317 };
318 
319 struct microcode_ctrl {
320 	enum sibling_ctrl	ctrl;
321 	enum ucode_state	result;
322 	unsigned int		ctrl_cpu;
323 	bool			nmi_enabled;
324 };
325 
326 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
327 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
328 static atomic_t late_cpus_in, offline_in_nmi;
329 static unsigned int loops_per_usec;
330 static cpumask_t cpu_offline_mask;
331 
332 static noinstr bool wait_for_cpus(atomic_t *cnt)
333 {
334 	unsigned int timeout, loops;
335 
336 	WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
337 
338 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
339 		if (!raw_atomic_read(cnt))
340 			return true;
341 
342 		for (loops = 0; loops < loops_per_usec; loops++)
343 			cpu_relax();
344 
345 		/* If invoked directly, tickle the NMI watchdog */
346 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
347 			instrumentation_begin();
348 			touch_nmi_watchdog();
349 			instrumentation_end();
350 		}
351 	}
352 	/* Prevent the late comers from making progress and let them time out */
353 	raw_atomic_inc(cnt);
354 	return false;
355 }
356 
357 static noinstr bool wait_for_ctrl(void)
358 {
359 	unsigned int timeout, loops;
360 
361 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
362 		if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
363 			return true;
364 
365 		for (loops = 0; loops < loops_per_usec; loops++)
366 			cpu_relax();
367 
368 		/* If invoked directly, tickle the NMI watchdog */
369 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
370 			instrumentation_begin();
371 			touch_nmi_watchdog();
372 			instrumentation_end();
373 		}
374 	}
375 	return false;
376 }
377 
378 /*
379  * Protected against instrumentation up to the point where the primary
380  * thread completed the update. See microcode_nmi_handler() for details.
381  */
382 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
383 {
384 	/* Initial rendezvous to ensure that all CPUs have arrived */
385 	if (!wait_for_cpus(&late_cpus_in)) {
386 		raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
387 		return false;
388 	}
389 
390 	/*
391 	 * Wait for primary threads to complete. If one of them hangs due
392 	 * to the update, there is no way out. This is non-recoverable
393 	 * because the CPU might hold locks or resources and confuse the
394 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
395 	 * machine.
396 	 */
397 	if (wait_for_ctrl())
398 		return true;
399 
400 	instrumentation_begin();
401 	panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
402 	instrumentation_end();
403 }
404 
405 /*
406  * Protected against instrumentation up to the point where the primary
407  * thread completed the update. See microcode_nmi_handler() for details.
408  */
409 static noinstr void load_secondary(unsigned int cpu)
410 {
411 	unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
412 	enum ucode_state ret;
413 
414 	if (!load_secondary_wait(ctrl_cpu)) {
415 		instrumentation_begin();
416 		pr_err_once("load: %d CPUs timed out\n",
417 			    atomic_read(&late_cpus_in) - 1);
418 		instrumentation_end();
419 		return;
420 	}
421 
422 	/* Primary thread completed. Allow to invoke instrumentable code */
423 	instrumentation_begin();
424 	/*
425 	 * If the primary succeeded then invoke the apply() callback,
426 	 * otherwise copy the state from the primary thread.
427 	 */
428 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
429 		ret = microcode_ops->apply_microcode(cpu);
430 	else
431 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
432 
433 	this_cpu_write(ucode_ctrl.result, ret);
434 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
435 	instrumentation_end();
436 }
437 
438 static void __load_primary(unsigned int cpu)
439 {
440 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
441 	enum sibling_ctrl ctrl;
442 	enum ucode_state ret;
443 	unsigned int sibling;
444 
445 	/* Initial rendezvous to ensure that all CPUs have arrived */
446 	if (!wait_for_cpus(&late_cpus_in)) {
447 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
448 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
449 		return;
450 	}
451 
452 	ret = microcode_ops->apply_microcode(cpu);
453 	this_cpu_write(ucode_ctrl.result, ret);
454 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
455 
456 	/*
457 	 * If the update was successful, let the siblings run the apply()
458 	 * callback. If not, tell them it's done. This also covers the
459 	 * case where the CPU has uniform loading at package or system
460 	 * scope implemented but does not advertise it.
461 	 */
462 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
463 		ctrl = SCTRL_APPLY;
464 	else
465 		ctrl = SCTRL_DONE;
466 
467 	for_each_cpu(sibling, secondaries) {
468 		if (sibling != cpu)
469 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
470 	}
471 }
472 
473 static bool kick_offline_cpus(unsigned int nr_offl)
474 {
475 	unsigned int cpu, timeout;
476 
477 	for_each_cpu(cpu, &cpu_offline_mask) {
478 		/* Enable the rendezvous handler and send NMI */
479 		per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
480 		apic_send_nmi_to_offline_cpu(cpu);
481 	}
482 
483 	/* Wait for them to arrive */
484 	for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
485 		if (atomic_read(&offline_in_nmi) == nr_offl)
486 			return true;
487 		udelay(1);
488 	}
489 	/* Let the others time out */
490 	return false;
491 }
492 
493 static void release_offline_cpus(void)
494 {
495 	unsigned int cpu;
496 
497 	for_each_cpu(cpu, &cpu_offline_mask)
498 		per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
499 }
500 
501 static void load_primary(unsigned int cpu)
502 {
503 	unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
504 	bool proceed = true;
505 
506 	/* Kick soft-offlined SMT siblings if required */
507 	if (!cpu && nr_offl)
508 		proceed = kick_offline_cpus(nr_offl);
509 
510 	/* If the soft-offlined CPUs did not respond, abort */
511 	if (proceed)
512 		__load_primary(cpu);
513 
514 	/* Unconditionally release soft-offlined SMT siblings if required */
515 	if (!cpu && nr_offl)
516 		release_offline_cpus();
517 }
518 
519 /*
520  * Minimal stub rendezvous handler for soft-offlined CPUs which participate
521  * in the NMI rendezvous to protect against a concurrent NMI on affected
522  * CPUs.
523  */
524 void noinstr microcode_offline_nmi_handler(void)
525 {
526 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
527 		return;
528 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
529 	raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
530 	raw_atomic_inc(&offline_in_nmi);
531 	wait_for_ctrl();
532 }
533 
534 static noinstr bool microcode_update_handler(void)
535 {
536 	unsigned int cpu = raw_smp_processor_id();
537 
538 	if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
539 		instrumentation_begin();
540 		load_primary(cpu);
541 		instrumentation_end();
542 	} else {
543 		load_secondary(cpu);
544 	}
545 
546 	instrumentation_begin();
547 	touch_nmi_watchdog();
548 	instrumentation_end();
549 
550 	return true;
551 }
552 
553 /*
554  * Protection against instrumentation is required for CPUs which are not
555  * safe against an NMI which is delivered to the secondary SMT sibling
556  * while the primary thread updates the microcode. Instrumentation can end
557  * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
558  * which is the opposite of what the NMI rendezvous is trying to achieve.
559  *
560  * The primary thread is safe versus instrumentation as the actual
561  * microcode update handles this correctly. It's only the sibling code
562  * path which must be NMI safe until the primary thread completed the
563  * update.
564  */
565 bool noinstr microcode_nmi_handler(void)
566 {
567 	if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
568 		return false;
569 
570 	raw_cpu_write(ucode_ctrl.nmi_enabled, false);
571 	return microcode_update_handler();
572 }
573 
574 static int load_cpus_stopped(void *unused)
575 {
576 	if (microcode_ops->use_nmi) {
577 		/* Enable the NMI handler and raise NMI */
578 		this_cpu_write(ucode_ctrl.nmi_enabled, true);
579 		apic->send_IPI(smp_processor_id(), NMI_VECTOR);
580 	} else {
581 		/* Just invoke the handler directly */
582 		microcode_update_handler();
583 	}
584 	return 0;
585 }
586 
587 static int load_late_stop_cpus(bool is_safe)
588 {
589 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
590 	unsigned int nr_offl, offline = 0;
591 	int old_rev = boot_cpu_data.microcode;
592 	struct cpuinfo_x86 prev_info;
593 
594 	if (!is_safe) {
595 		pr_err("Late microcode loading without minimal revision check.\n");
596 		pr_err("You should switch to early loading, if possible.\n");
597 	}
598 
599 	/*
600 	 * Pre-load the microcode image into a staging device. This
601 	 * process is preemptible and does not require stopping CPUs.
602 	 * Successful staging simplifies the subsequent late-loading
603 	 * process, reducing rendezvous time.
604 	 *
605 	 * Even if the transfer fails, the update will proceed as usual.
606 	 */
607 	if (microcode_ops->use_staging)
608 		microcode_ops->stage_microcode();
609 
610 	atomic_set(&late_cpus_in, num_online_cpus());
611 	atomic_set(&offline_in_nmi, 0);
612 	loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
613 
614 	/*
615 	 * Take a snapshot before the microcode update in order to compare and
616 	 * check whether any bits changed after an update.
617 	 */
618 	store_cpu_caps(&prev_info);
619 
620 	if (microcode_ops->use_nmi)
621 		static_branch_enable_cpuslocked(&microcode_nmi_handler_enable);
622 
623 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
624 
625 	if (microcode_ops->use_nmi)
626 		static_branch_disable_cpuslocked(&microcode_nmi_handler_enable);
627 
628 	/* Analyze the results */
629 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
630 		switch (per_cpu(ucode_ctrl.result, cpu)) {
631 		case UCODE_UPDATED:	updated++; break;
632 		case UCODE_TIMEOUT:	timedout++; break;
633 		case UCODE_OK:		siblings++; break;
634 		case UCODE_OFFLINE:	offline++; break;
635 		default:		failed++; break;
636 		}
637 	}
638 
639 	if (microcode_ops->finalize_late_load)
640 		microcode_ops->finalize_late_load(!updated);
641 
642 	if (!updated) {
643 		/* Nothing changed. */
644 		if (!failed && !timedout)
645 			return 0;
646 
647 		nr_offl = cpumask_weight(&cpu_offline_mask);
648 		if (offline < nr_offl) {
649 			pr_warn("%u offline siblings did not respond.\n",
650 				nr_offl - atomic_read(&offline_in_nmi));
651 			return -EIO;
652 		}
653 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
654 		       failed, timedout);
655 		return -EIO;
656 	}
657 
658 	if (!is_safe || failed || timedout)
659 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
660 
661 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
662 	if (failed || timedout) {
663 		pr_err("load incomplete. %u CPUs timed out or failed\n",
664 		       num_online_cpus() - (updated + siblings));
665 	}
666 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
667 	microcode_check(&prev_info);
668 
669 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
670 }
671 
672 /*
673  * This function does two things:
674  *
675  * 1) Ensure that all required CPUs which are present and have been booted
676  *    once are online.
677  *
678  *    To pass this check, all primary threads must be online.
679  *
680  *    If the microcode load is not safe against NMI then all SMT threads
681  *    must be online as well because they still react to NMIs when they are
682  *    soft-offlined and parked in one of the play_dead() variants. So if a
683  *    NMI hits while the primary thread updates the microcode the resulting
684  *    behaviour is undefined. The default play_dead() implementation on
685  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
686  *    against a microcode update which affects MWAIT.
687  *
688  *    As soft-offlined CPUs still react on NMIs, the SMT sibling
689  *    restriction can be lifted when the vendor driver signals to use NMI
690  *    for rendezvous and the APIC provides a mechanism to send an NMI to a
691  *    soft-offlined CPU. The soft-offlined CPUs are then able to
692  *    participate in the rendezvous in a trivial stub handler.
693  *
694  * 2) Initialize the per CPU control structure and create a cpumask
695  *    which contains "offline"; secondary threads, so they can be handled
696  *    correctly by a control CPU.
697  */
698 static bool setup_cpus(void)
699 {
700 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
701 	bool allow_smt_offline;
702 	unsigned int cpu;
703 
704 	allow_smt_offline = microcode_ops->nmi_safe ||
705 		(microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
706 
707 	cpumask_clear(&cpu_offline_mask);
708 
709 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
710 		/*
711 		 * Offline CPUs sit in one of the play_dead() functions
712 		 * with interrupts disabled, but they still react on NMIs
713 		 * and execute arbitrary code. Also MWAIT being updated
714 		 * while the offline CPU sits there is not necessarily safe
715 		 * on all CPU variants.
716 		 *
717 		 * Mark them in the offline_cpus mask which will be handled
718 		 * by CPU0 later in the update process.
719 		 *
720 		 * Ensure that the primary thread is online so that it is
721 		 * guaranteed that all cores are updated.
722 		 */
723 		if (!cpu_online(cpu)) {
724 			if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
725 				pr_err("CPU %u not online, loading aborted\n", cpu);
726 				return false;
727 			}
728 			cpumask_set_cpu(cpu, &cpu_offline_mask);
729 			per_cpu(ucode_ctrl, cpu) = ctrl;
730 			continue;
731 		}
732 
733 		/*
734 		 * Initialize the per CPU state. This is core scope for now,
735 		 * but prepared to take package or system scope into account.
736 		 */
737 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
738 		per_cpu(ucode_ctrl, cpu) = ctrl;
739 	}
740 	return true;
741 }
742 
743 static int load_late_locked(void)
744 {
745 	if (!setup_cpus())
746 		return -EBUSY;
747 
748 	switch (microcode_ops->request_microcode_fw(0, &microcode_fdev->dev)) {
749 	case UCODE_NEW:
750 		return load_late_stop_cpus(false);
751 	case UCODE_NEW_SAFE:
752 		return load_late_stop_cpus(true);
753 	case UCODE_NFOUND:
754 		return -ENOENT;
755 	case UCODE_OK:
756 		return 0;
757 	default:
758 		return -EBADFD;
759 	}
760 }
761 
762 static ssize_t reload_store(struct device *dev,
763 			    struct device_attribute *attr,
764 			    const char *buf, size_t size)
765 {
766 	unsigned long val;
767 	ssize_t ret;
768 
769 	ret = kstrtoul(buf, 0, &val);
770 	if (ret || val != 1)
771 		return -EINVAL;
772 
773 	cpus_read_lock();
774 	ret = load_late_locked();
775 	cpus_read_unlock();
776 
777 	return ret ? : size;
778 }
779 
780 static DEVICE_ATTR_WO(reload);
781 #endif
782 
783 static ssize_t version_show(struct device *dev,
784 			struct device_attribute *attr, char *buf)
785 {
786 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
787 
788 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
789 }
790 
791 static ssize_t processor_flags_show(struct device *dev,
792 			struct device_attribute *attr, char *buf)
793 {
794 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
795 
796 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
797 }
798 
799 static DEVICE_ATTR_RO(version);
800 static DEVICE_ATTR_RO(processor_flags);
801 
802 static struct attribute *mc_default_attrs[] = {
803 	&dev_attr_version.attr,
804 	&dev_attr_processor_flags.attr,
805 	NULL
806 };
807 
808 static const struct attribute_group mc_attr_group = {
809 	.attrs			= mc_default_attrs,
810 	.name			= "microcode",
811 };
812 
813 static void microcode_fini_cpu(int cpu)
814 {
815 	if (microcode_ops->microcode_fini_cpu)
816 		microcode_ops->microcode_fini_cpu(cpu);
817 }
818 
819 /**
820  * microcode_bsp_resume - Update boot CPU microcode during resume.
821  */
822 void microcode_bsp_resume(void)
823 {
824 	int cpu = smp_processor_id();
825 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
826 
827 	if (uci->mc)
828 		microcode_ops->apply_microcode(cpu);
829 	else
830 		reload_early_microcode(cpu);
831 }
832 
833 static void microcode_bsp_syscore_resume(void *data)
834 {
835 	microcode_bsp_resume();
836 }
837 
838 static const struct syscore_ops mc_syscore_ops = {
839 	.resume	= microcode_bsp_syscore_resume,
840 };
841 
842 static struct syscore mc_syscore = {
843 	.ops = &mc_syscore_ops,
844 };
845 
846 static int mc_cpu_online(unsigned int cpu)
847 {
848 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
849 	struct device *dev = get_cpu_device(cpu);
850 
851 	memset(uci, 0, sizeof(*uci));
852 
853 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
854 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
855 	if (!cpu)
856 		boot_cpu_data.microcode = uci->cpu_sig.rev;
857 
858 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
859 		pr_err("Failed to create group for CPU%d\n", cpu);
860 	return 0;
861 }
862 
863 static int mc_cpu_down_prep(unsigned int cpu)
864 {
865 	struct device *dev = get_cpu_device(cpu);
866 
867 	microcode_fini_cpu(cpu);
868 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
869 	return 0;
870 }
871 
872 static struct attribute *cpu_root_microcode_attrs[] = {
873 #ifdef CONFIG_MICROCODE_LATE_LOADING
874 	&dev_attr_reload.attr,
875 #endif
876 	NULL
877 };
878 
879 static const struct attribute_group cpu_root_microcode_group = {
880 	.name  = "microcode",
881 	.attrs = cpu_root_microcode_attrs,
882 };
883 
884 static int __init microcode_init(void)
885 {
886 	struct device *dev_root;
887 	struct cpuinfo_x86 *c = &boot_cpu_data;
888 	int error;
889 
890 	if (microcode_loader_disabled())
891 		return -EINVAL;
892 
893 	if (c->x86_vendor == X86_VENDOR_INTEL)
894 		microcode_ops = init_intel_microcode();
895 	else if (c->x86_vendor == X86_VENDOR_AMD)
896 		microcode_ops = init_amd_microcode();
897 	else
898 		pr_err("no support for this CPU vendor\n");
899 
900 	if (!microcode_ops)
901 		return -ENODEV;
902 
903 	pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
904 
905 	if (early_data.new_rev)
906 		pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
907 
908 	microcode_fdev = faux_device_create("microcode", NULL, NULL);
909 	if (!microcode_fdev)
910 		return -ENODEV;
911 
912 	dev_root = bus_get_dev_root(&cpu_subsys);
913 	if (dev_root) {
914 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
915 		put_device(dev_root);
916 		if (error) {
917 			pr_err("Error creating microcode group!\n");
918 			goto out_pdev;
919 		}
920 	}
921 
922 	register_syscore(&mc_syscore);
923 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
924 			  mc_cpu_online, mc_cpu_down_prep);
925 
926 	return 0;
927 
928  out_pdev:
929 	faux_device_destroy(microcode_fdev);
930 	return error;
931 
932 }
933 late_initcall(microcode_init);
934