xref: /linux/arch/x86/kernel/nmi.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  Copyright (C) 1991, 1992  Linus Torvalds
4  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
5  *  Copyright (C) 2011	Don Zickus Red Hat, Inc.
6  *
7  *  Pentium III FXSR, SSE support
8  *	Gareth Hughes <gareth@valinux.com>, May 2000
9  */
10 
11 /*
12  * Handle hardware traps and faults.
13  */
14 #include <linux/spinlock.h>
15 #include <linux/kprobes.h>
16 #include <linux/kdebug.h>
17 #include <linux/sched/debug.h>
18 #include <linux/nmi.h>
19 #include <linux/debugfs.h>
20 #include <linux/delay.h>
21 #include <linux/hardirq.h>
22 #include <linux/ratelimit.h>
23 #include <linux/slab.h>
24 #include <linux/export.h>
25 #include <linux/atomic.h>
26 #include <linux/sched/clock.h>
27 #include <linux/kvm_types.h>
28 
29 #include <asm/cpu_entry_area.h>
30 #include <asm/traps.h>
31 #include <asm/mach_traps.h>
32 #include <asm/nmi.h>
33 #include <asm/x86_init.h>
34 #include <asm/reboot.h>
35 #include <asm/cache.h>
36 #include <asm/nospec-branch.h>
37 #include <asm/microcode.h>
38 #include <asm/sev.h>
39 #include <asm/fred.h>
40 
41 #define CREATE_TRACE_POINTS
42 #include <trace/events/nmi.h>
43 
44 /*
45  * An emergency handler can be set in any context including NMI
46  */
47 struct nmi_desc {
48 	raw_spinlock_t lock;
49 	nmi_handler_t emerg_handler;
50 	struct list_head head;
51 };
52 
53 #define NMI_DESC_INIT(type) { \
54 	.lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[type].lock), \
55 	.head = LIST_HEAD_INIT(nmi_desc[type].head), \
56 }
57 
58 static struct nmi_desc nmi_desc[NMI_MAX] = {
59 	NMI_DESC_INIT(NMI_LOCAL),
60 	NMI_DESC_INIT(NMI_UNKNOWN),
61 	NMI_DESC_INIT(NMI_SERR),
62 	NMI_DESC_INIT(NMI_IO_CHECK),
63 };
64 
65 #define nmi_to_desc(type) (&nmi_desc[type])
66 
67 struct nmi_stats {
68 	unsigned int normal;
69 	unsigned int unknown;
70 	unsigned int external;
71 	unsigned int swallow;
72 	unsigned long recv_jiffies;
73 	unsigned long idt_seq;
74 	unsigned long idt_nmi_seq;
75 	unsigned long idt_ignored;
76 	atomic_long_t idt_calls;
77 	unsigned long idt_seq_snap;
78 	unsigned long idt_nmi_seq_snap;
79 	unsigned long idt_ignored_snap;
80 	long idt_calls_snap;
81 };
82 
83 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
84 
85 static int ignore_nmis __read_mostly;
86 
87 int unknown_nmi_panic;
88 int panic_on_unrecovered_nmi;
89 int panic_on_io_nmi;
90 
91 /*
92  * Prevent NMI reason port (0x61) being accessed simultaneously, can
93  * only be used in NMI handler.
94  */
95 static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
96 
97 static int __init setup_unknown_nmi_panic(char *str)
98 {
99 	unknown_nmi_panic = 1;
100 	return 1;
101 }
102 __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
103 
104 static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
105 
106 static int __init nmi_warning_debugfs(void)
107 {
108 	debugfs_create_u64("nmi_longest_ns", 0644,
109 			arch_debugfs_dir, &nmi_longest_ns);
110 	return 0;
111 }
112 fs_initcall(nmi_warning_debugfs);
113 
114 static void nmi_check_duration(struct nmiaction *action, u64 duration)
115 {
116 	int remainder_ns, decimal_msecs;
117 
118 	if (duration < nmi_longest_ns || duration < action->max_duration)
119 		return;
120 
121 	action->max_duration = duration;
122 
123 	/* Convert duration from nsec to msec */
124 	remainder_ns = do_div(duration, NSEC_PER_MSEC);
125 	decimal_msecs = remainder_ns / NSEC_PER_USEC;
126 
127 	pr_info_ratelimited("INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
128 			    action->handler, duration, decimal_msecs);
129 }
130 
131 static int nmi_handle(unsigned int type, struct pt_regs *regs)
132 {
133 	struct nmi_desc *desc = nmi_to_desc(type);
134 	nmi_handler_t ehandler;
135 	struct nmiaction *a;
136 	int handled=0;
137 
138 	/*
139 	 * Call the emergency handler, if set
140 	 *
141 	 * In the case of crash_nmi_callback() emergency handler, it will
142 	 * return in the case of the crashing CPU to enable it to complete
143 	 * other necessary crashing actions ASAP. Other handlers in the
144 	 * linked list won't need to be run.
145 	 */
146 	ehandler = desc->emerg_handler;
147 	if (ehandler)
148 		return ehandler(type, regs);
149 
150 	rcu_read_lock();
151 
152 	/*
153 	 * NMIs are edge-triggered, which means if you have enough
154 	 * of them concurrently, you can lose some because only one
155 	 * can be latched at any given time.  Walk the whole list
156 	 * to handle those situations.
157 	 */
158 	list_for_each_entry_rcu(a, &desc->head, list) {
159 		int thishandled;
160 		u64 delta;
161 
162 		delta = sched_clock();
163 		thishandled = a->handler(type, regs);
164 		handled += thishandled;
165 		delta = sched_clock() - delta;
166 		trace_nmi_handler(a->handler, (int)delta, thishandled);
167 
168 		nmi_check_duration(a, delta);
169 	}
170 
171 	rcu_read_unlock();
172 
173 	/* return total number of NMI events handled */
174 	return handled;
175 }
176 NOKPROBE_SYMBOL(nmi_handle);
177 
178 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
179 {
180 	struct nmi_desc *desc = nmi_to_desc(type);
181 	unsigned long flags;
182 
183 	if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
184 		return -EINVAL;
185 
186 	raw_spin_lock_irqsave(&desc->lock, flags);
187 
188 	/*
189 	 * Indicate if there are multiple registrations on the
190 	 * internal NMI handler call chains (SERR and IO_CHECK).
191 	 */
192 	WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
193 	WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
194 
195 	/*
196 	 * some handlers need to be executed first otherwise a fake
197 	 * event confuses some handlers (kdump uses this flag)
198 	 */
199 	if (action->flags & NMI_FLAG_FIRST)
200 		list_add_rcu(&action->list, &desc->head);
201 	else
202 		list_add_tail_rcu(&action->list, &desc->head);
203 
204 	raw_spin_unlock_irqrestore(&desc->lock, flags);
205 	return 0;
206 }
207 EXPORT_SYMBOL(__register_nmi_handler);
208 
209 void unregister_nmi_handler(unsigned int type, const char *name)
210 {
211 	struct nmi_desc *desc = nmi_to_desc(type);
212 	struct nmiaction *n, *found = NULL;
213 	unsigned long flags;
214 
215 	raw_spin_lock_irqsave(&desc->lock, flags);
216 
217 	list_for_each_entry_rcu(n, &desc->head, list) {
218 		/*
219 		 * the name passed in to describe the nmi handler
220 		 * is used as the lookup key
221 		 */
222 		if (!strcmp(n->name, name)) {
223 			WARN(in_nmi(),
224 				"Trying to free NMI (%s) from NMI context!\n", n->name);
225 			list_del_rcu(&n->list);
226 			found = n;
227 			break;
228 		}
229 	}
230 
231 	raw_spin_unlock_irqrestore(&desc->lock, flags);
232 	if (found) {
233 		synchronize_rcu();
234 		INIT_LIST_HEAD(&found->list);
235 	}
236 }
237 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
238 
239 /**
240  * set_emergency_nmi_handler - Set emergency handler
241  * @type:    NMI type
242  * @handler: the emergency handler to be stored
243  *
244  * Set an emergency NMI handler which, if set, will preempt all the other
245  * handlers in the linked list. If a NULL handler is passed in, it will clear
246  * it. It is expected that concurrent calls to this function will not happen
247  * or the system is screwed beyond repair.
248  */
249 void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler)
250 {
251 	struct nmi_desc *desc = nmi_to_desc(type);
252 
253 	if (WARN_ON_ONCE(desc->emerg_handler == handler))
254 		return;
255 	desc->emerg_handler = handler;
256 
257 	/*
258 	 * Ensure the emergency handler is visible to other CPUs before
259 	 * function return
260 	 */
261 	smp_wmb();
262 }
263 
264 static void
265 pci_serr_error(unsigned char reason, struct pt_regs *regs)
266 {
267 	/* check to see if anyone registered against these types of errors */
268 	if (nmi_handle(NMI_SERR, regs))
269 		return;
270 
271 	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
272 		 reason, smp_processor_id());
273 
274 	if (panic_on_unrecovered_nmi)
275 		nmi_panic(regs, "NMI: Not continuing");
276 
277 	pr_emerg("Dazed and confused, but trying to continue\n");
278 
279 	/* Clear and disable the PCI SERR error line. */
280 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
281 	outb(reason, NMI_REASON_PORT);
282 }
283 NOKPROBE_SYMBOL(pci_serr_error);
284 
285 static void
286 io_check_error(unsigned char reason, struct pt_regs *regs)
287 {
288 	unsigned long i;
289 
290 	/* check to see if anyone registered against these types of errors */
291 	if (nmi_handle(NMI_IO_CHECK, regs))
292 		return;
293 
294 	pr_emerg(
295 	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
296 		 reason, smp_processor_id());
297 	show_regs(regs);
298 
299 	if (panic_on_io_nmi) {
300 		nmi_panic(regs, "NMI IOCK error: Not continuing");
301 
302 		/*
303 		 * If we end up here, it means we have received an NMI while
304 		 * processing panic(). Simply return without delaying and
305 		 * re-enabling NMIs.
306 		 */
307 		return;
308 	}
309 
310 	/* Re-enable the IOCK line, wait for a few seconds */
311 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
312 	outb(reason, NMI_REASON_PORT);
313 
314 	i = 20000;
315 	while (--i) {
316 		touch_nmi_watchdog();
317 		udelay(100);
318 	}
319 
320 	reason &= ~NMI_REASON_CLEAR_IOCHK;
321 	outb(reason, NMI_REASON_PORT);
322 }
323 NOKPROBE_SYMBOL(io_check_error);
324 
325 static void
326 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
327 {
328 	int handled;
329 
330 	/*
331 	 * As a last resort, let the "unknown" handlers make a
332 	 * best-effort attempt to figure out if they can claim
333 	 * responsibility for this Unknown NMI.
334 	 */
335 	handled = nmi_handle(NMI_UNKNOWN, regs);
336 	if (handled) {
337 		__this_cpu_add(nmi_stats.unknown, handled);
338 		return;
339 	}
340 
341 	__this_cpu_add(nmi_stats.unknown, 1);
342 
343 	pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
344 			     reason, smp_processor_id());
345 
346 	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
347 		nmi_panic(regs, "NMI: Not continuing");
348 
349 	pr_emerg_ratelimited("Dazed and confused, but trying to continue\n");
350 }
351 NOKPROBE_SYMBOL(unknown_nmi_error);
352 
353 static DEFINE_PER_CPU(bool, swallow_nmi);
354 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
355 
356 static noinstr void default_do_nmi(struct pt_regs *regs)
357 {
358 	unsigned char reason = 0;
359 	int handled;
360 	bool b2b = false;
361 
362 	/*
363 	 * Back-to-back NMIs are detected by comparing the RIP of the
364 	 * current NMI with that of the previous NMI. If it is the same,
365 	 * it is assumed that the CPU did not have a chance to jump back
366 	 * into a non-NMI context and execute code in between the two
367 	 * NMIs.
368 	 *
369 	 * They are interesting because even if there are more than two,
370 	 * only a maximum of two can be detected (anything over two is
371 	 * dropped due to NMI being edge-triggered). If this is the
372 	 * second half of the back-to-back NMI, assume we dropped things
373 	 * and process more handlers. Otherwise, reset the 'swallow' NMI
374 	 * behavior.
375 	 */
376 	if (regs->ip == __this_cpu_read(last_nmi_rip))
377 		b2b = true;
378 	else
379 		__this_cpu_write(swallow_nmi, false);
380 
381 	__this_cpu_write(last_nmi_rip, regs->ip);
382 
383 	instrumentation_begin();
384 
385 	if (microcode_nmi_handler_enabled() && microcode_nmi_handler())
386 		goto out;
387 
388 	/*
389 	 * CPU-specific NMI must be processed before non-CPU-specific
390 	 * NMI, otherwise we may lose it, because the CPU-specific
391 	 * NMI can not be detected/processed on other CPUs.
392 	 */
393 	handled = nmi_handle(NMI_LOCAL, regs);
394 	__this_cpu_add(nmi_stats.normal, handled);
395 	if (handled) {
396 		/*
397 		 * There are cases when a NMI handler handles multiple
398 		 * events in the current NMI.  One of these events may
399 		 * be queued for in the next NMI.  Because the event is
400 		 * already handled, the next NMI will result in an unknown
401 		 * NMI.  Instead lets flag this for a potential NMI to
402 		 * swallow.
403 		 */
404 		if (handled > 1)
405 			__this_cpu_write(swallow_nmi, true);
406 		goto out;
407 	}
408 
409 	/*
410 	 * Non-CPU-specific NMI: NMI sources can be processed on any CPU.
411 	 *
412 	 * Another CPU may be processing panic routines while holding
413 	 * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping,
414 	 * and if so, call its callback directly.  If there is no CPU preparing
415 	 * crash dump, we simply loop here.
416 	 */
417 	while (!raw_spin_trylock(&nmi_reason_lock)) {
418 		run_crash_ipi_callback(regs);
419 		cpu_relax();
420 	}
421 
422 	reason = x86_platform.get_nmi_reason();
423 
424 	if (reason & NMI_REASON_MASK) {
425 		if (reason & NMI_REASON_SERR)
426 			pci_serr_error(reason, regs);
427 		else if (reason & NMI_REASON_IOCHK)
428 			io_check_error(reason, regs);
429 
430 		/*
431 		 * Reassert NMI in case it became active
432 		 * meanwhile as it's edge-triggered:
433 		 */
434 		if (IS_ENABLED(CONFIG_X86_32))
435 			reassert_nmi();
436 
437 		__this_cpu_add(nmi_stats.external, 1);
438 		raw_spin_unlock(&nmi_reason_lock);
439 		goto out;
440 	}
441 	raw_spin_unlock(&nmi_reason_lock);
442 
443 	/*
444 	 * Only one NMI can be latched at a time.  To handle
445 	 * this we may process multiple nmi handlers at once to
446 	 * cover the case where an NMI is dropped.  The downside
447 	 * to this approach is we may process an NMI prematurely,
448 	 * while its real NMI is sitting latched.  This will cause
449 	 * an unknown NMI on the next run of the NMI processing.
450 	 *
451 	 * We tried to flag that condition above, by setting the
452 	 * swallow_nmi flag when we process more than one event.
453 	 * This condition is also only present on the second half
454 	 * of a back-to-back NMI, so we flag that condition too.
455 	 *
456 	 * If both are true, we assume we already processed this
457 	 * NMI previously and we swallow it.  Otherwise we reset
458 	 * the logic.
459 	 *
460 	 * There are scenarios where we may accidentally swallow
461 	 * a 'real' unknown NMI.  For example, while processing
462 	 * a perf NMI another perf NMI comes in along with a
463 	 * 'real' unknown NMI.  These two NMIs get combined into
464 	 * one (as described above).  When the next NMI gets
465 	 * processed, it will be flagged by perf as handled, but
466 	 * no one will know that there was a 'real' unknown NMI sent
467 	 * also.  As a result it gets swallowed.  Or if the first
468 	 * perf NMI returns two events handled then the second
469 	 * NMI will get eaten by the logic below, again losing a
470 	 * 'real' unknown NMI.  But this is the best we can do
471 	 * for now.
472 	 */
473 	if (b2b && __this_cpu_read(swallow_nmi))
474 		__this_cpu_add(nmi_stats.swallow, 1);
475 	else
476 		unknown_nmi_error(reason, regs);
477 
478 out:
479 	instrumentation_end();
480 }
481 
482 /*
483  * NMIs can page fault or hit breakpoints which will cause it to lose
484  * its NMI context with the CPU when the breakpoint or page fault does an IRET.
485  *
486  * As a result, NMIs can nest if NMIs get unmasked due an IRET during
487  * NMI processing.  On x86_64, the asm glue protects us from nested NMIs
488  * if the outer NMI came from kernel mode, but we can still nest if the
489  * outer NMI came from user mode.
490  *
491  * To handle these nested NMIs, we have three states:
492  *
493  *  1) not running
494  *  2) executing
495  *  3) latched
496  *
497  * When no NMI is in progress, it is in the "not running" state.
498  * When an NMI comes in, it goes into the "executing" state.
499  * Normally, if another NMI is triggered, it does not interrupt
500  * the running NMI and the HW will simply latch it so that when
501  * the first NMI finishes, it will restart the second NMI.
502  * (Note, the latch is binary, thus multiple NMIs triggering,
503  *  when one is running, are ignored. Only one NMI is restarted.)
504  *
505  * If an NMI executes an iret, another NMI can preempt it. We do not
506  * want to allow this new NMI to run, but we want to execute it when the
507  * first one finishes.  We set the state to "latched", and the exit of
508  * the first NMI will perform a dec_return, if the result is zero
509  * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
510  * dec_return would have set the state to NMI_EXECUTING (what we want it
511  * to be when we are running). In this case, we simply jump back to
512  * rerun the NMI handler again, and restart the 'latched' NMI.
513  *
514  * No trap (breakpoint or page fault) should be hit before nmi_restart,
515  * thus there is no race between the first check of state for NOT_RUNNING
516  * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
517  * at this point.
518  *
519  * In case the NMI takes a page fault, we need to save off the CR2
520  * because the NMI could have preempted another page fault and corrupt
521  * the CR2 that is about to be read. As nested NMIs must be restarted
522  * and they can not take breakpoints or page faults, the update of the
523  * CR2 must be done before converting the nmi state back to NOT_RUNNING.
524  * Otherwise, there would be a race of another nested NMI coming in
525  * after setting state to NOT_RUNNING but before updating the nmi_cr2.
526  */
527 enum nmi_states {
528 	NMI_NOT_RUNNING = 0,
529 	NMI_EXECUTING,
530 	NMI_LATCHED,
531 };
532 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
533 static DEFINE_PER_CPU(unsigned long, nmi_cr2);
534 static DEFINE_PER_CPU(unsigned long, nmi_dr7);
535 
536 DEFINE_IDTENTRY_RAW(exc_nmi)
537 {
538 	irqentry_state_t irq_state;
539 	struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats);
540 
541 	/*
542 	 * Re-enable NMIs right here when running as an SEV-ES guest. This might
543 	 * cause nested NMIs, but those can be handled safely.
544 	 */
545 	sev_es_nmi_complete();
546 	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
547 		raw_atomic_long_inc(&nsp->idt_calls);
548 
549 	if (arch_cpu_is_offline(smp_processor_id())) {
550 		if (microcode_nmi_handler_enabled())
551 			microcode_offline_nmi_handler();
552 		return;
553 	}
554 
555 	if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
556 		this_cpu_write(nmi_state, NMI_LATCHED);
557 		return;
558 	}
559 	this_cpu_write(nmi_state, NMI_EXECUTING);
560 	this_cpu_write(nmi_cr2, read_cr2());
561 
562 nmi_restart:
563 	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
564 		WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
565 		WARN_ON_ONCE(!(nsp->idt_seq & 0x1));
566 		WRITE_ONCE(nsp->recv_jiffies, jiffies);
567 	}
568 
569 	/*
570 	 * Needs to happen before DR7 is accessed, because the hypervisor can
571 	 * intercept DR7 reads/writes, turning those into #VC exceptions.
572 	 */
573 	sev_es_ist_enter(regs);
574 
575 	this_cpu_write(nmi_dr7, local_db_save());
576 
577 	irq_state = irqentry_nmi_enter(regs);
578 
579 	inc_irq_stat(__nmi_count);
580 
581 	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
582 		WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
583 	} else if (!ignore_nmis) {
584 		if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
585 			WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
586 			WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
587 		}
588 		default_do_nmi(regs);
589 		if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
590 			WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
591 			WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
592 		}
593 	}
594 
595 	irqentry_nmi_exit(regs, irq_state);
596 
597 	local_db_restore(this_cpu_read(nmi_dr7));
598 
599 	sev_es_ist_exit();
600 
601 	if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
602 		write_cr2(this_cpu_read(nmi_cr2));
603 	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
604 		WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
605 		WARN_ON_ONCE(nsp->idt_seq & 0x1);
606 		WRITE_ONCE(nsp->recv_jiffies, jiffies);
607 	}
608 	if (this_cpu_dec_return(nmi_state))
609 		goto nmi_restart;
610 }
611 
612 #if IS_ENABLED(CONFIG_KVM_INTEL)
613 DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
614 {
615 	exc_nmi(regs);
616 }
617 EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx);
618 #endif
619 
620 #ifdef CONFIG_NMI_CHECK_CPU
621 
622 static char *nmi_check_stall_msg[] = {
623 /*									*/
624 /* +--------- nmi_seq & 0x1: CPU is currently in NMI handler.		*/
625 /* | +------ cpu_is_offline(cpu)					*/
626 /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls):	*/
627 /* | | |	NMI handler has been invoked.				*/
628 /* | | |								*/
629 /* V V V								*/
630 /* 0 0 0 */ "NMIs are not reaching exc_nmi() handler",
631 /* 0 0 1 */ "exc_nmi() handler is ignoring NMIs",
632 /* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler",
633 /* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs",
634 /* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler",
635 /* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs",
636 /* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler",
637 /* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs",
638 };
639 
640 void nmi_backtrace_stall_snap(const struct cpumask *btp)
641 {
642 	int cpu;
643 	struct nmi_stats *nsp;
644 
645 	for_each_cpu(cpu, btp) {
646 		nsp = per_cpu_ptr(&nmi_stats, cpu);
647 		nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
648 		nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
649 		nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
650 		nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
651 	}
652 }
653 
654 void nmi_backtrace_stall_check(const struct cpumask *btp)
655 {
656 	int cpu;
657 	int idx;
658 	unsigned long nmi_seq;
659 	unsigned long j = jiffies;
660 	char *modp;
661 	char *msgp;
662 	char *msghp;
663 	struct nmi_stats *nsp;
664 
665 	for_each_cpu(cpu, btp) {
666 		nsp = per_cpu_ptr(&nmi_stats, cpu);
667 		modp = "";
668 		msghp = "";
669 		nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
670 		if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
671 			msgp = "CPU entered NMI handler function, but has not exited";
672 		} else if (nsp->idt_nmi_seq_snap == nmi_seq ||
673 			   nsp->idt_nmi_seq_snap + 1 == nmi_seq) {
674 			idx = ((nmi_seq & 0x1) << 2) |
675 			      (cpu_is_offline(cpu) << 1) |
676 			      (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
677 			msgp = nmi_check_stall_msg[idx];
678 			if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
679 				modp = ", but OK because ignore_nmis was set";
680 			if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
681 				msghp = " (CPU exited one NMI handler function)";
682 			else if (nmi_seq & 0x1)
683 				msghp = " (CPU currently in NMI handler function)";
684 			else
685 				msghp = " (CPU was never in an NMI handler function)";
686 		} else {
687 			msgp = "CPU is handling NMIs";
688 		}
689 		pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp);
690 		pr_alert("%s: last activity: %lu jiffies ago.\n",
691 			 __func__, j - READ_ONCE(nsp->recv_jiffies));
692 	}
693 }
694 
695 #endif
696 
697 #ifdef CONFIG_X86_FRED
698 /*
699  * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED
700  * event delivery, i.e., there is no problem of transient states.
701  * And NMI unblocking only happens when the stack frame indicates
702  * that so should happen.
703  *
704  * Thus, the NMI entry stub for FRED is really straightforward and
705  * as simple as most exception handlers. As such, #DB is allowed
706  * during NMI handling.
707  */
708 DEFINE_FREDENTRY_NMI(exc_nmi)
709 {
710 	irqentry_state_t irq_state;
711 
712 	if (arch_cpu_is_offline(smp_processor_id())) {
713 		if (microcode_nmi_handler_enabled())
714 			microcode_offline_nmi_handler();
715 		return;
716 	}
717 
718 	/*
719 	 * Save CR2 for eventual restore to cover the case where the NMI
720 	 * hits the VMENTER/VMEXIT region where guest CR2 is life. This
721 	 * prevents guest state corruption in case that the NMI handler
722 	 * takes a page fault.
723 	 */
724 	this_cpu_write(nmi_cr2, read_cr2());
725 
726 	irq_state = irqentry_nmi_enter(regs);
727 
728 	inc_irq_stat(__nmi_count);
729 	default_do_nmi(regs);
730 
731 	irqentry_nmi_exit(regs, irq_state);
732 
733 	if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
734 		write_cr2(this_cpu_read(nmi_cr2));
735 }
736 #endif
737 
738 void stop_nmi(void)
739 {
740 	ignore_nmis++;
741 }
742 
743 void restart_nmi(void)
744 {
745 	ignore_nmis--;
746 }
747 
748 /* reset the back-to-back NMI logic */
749 void local_touch_nmi(void)
750 {
751 	__this_cpu_write(last_nmi_rip, 0);
752 }
753