1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 * Copyright (C) 2011 Don Zickus Red Hat, Inc. 5 * 6 * Pentium III FXSR, SSE support 7 * Gareth Hughes <gareth@valinux.com>, May 2000 8 */ 9 10 /* 11 * Handle hardware traps and faults. 12 */ 13 #include <linux/spinlock.h> 14 #include <linux/kprobes.h> 15 #include <linux/kdebug.h> 16 #include <linux/nmi.h> 17 #include <linux/delay.h> 18 #include <linux/hardirq.h> 19 #include <linux/slab.h> 20 #include <linux/export.h> 21 22 #include <linux/mca.h> 23 24 #if defined(CONFIG_EDAC) 25 #include <linux/edac.h> 26 #endif 27 28 #include <linux/atomic.h> 29 #include <asm/traps.h> 30 #include <asm/mach_traps.h> 31 #include <asm/nmi.h> 32 #include <asm/x86_init.h> 33 34 struct nmi_desc { 35 spinlock_t lock; 36 struct list_head head; 37 }; 38 39 static struct nmi_desc nmi_desc[NMI_MAX] = 40 { 41 { 42 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), 43 .head = LIST_HEAD_INIT(nmi_desc[0].head), 44 }, 45 { 46 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), 47 .head = LIST_HEAD_INIT(nmi_desc[1].head), 48 }, 49 { 50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), 51 .head = LIST_HEAD_INIT(nmi_desc[2].head), 52 }, 53 { 54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), 55 .head = LIST_HEAD_INIT(nmi_desc[3].head), 56 }, 57 58 }; 59 60 struct nmi_stats { 61 unsigned int normal; 62 unsigned int unknown; 63 unsigned int external; 64 unsigned int swallow; 65 }; 66 67 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); 68 69 static int ignore_nmis; 70 71 int unknown_nmi_panic; 72 /* 73 * Prevent NMI reason port (0x61) being accessed simultaneously, can 74 * only be used in NMI handler. 75 */ 76 static DEFINE_RAW_SPINLOCK(nmi_reason_lock); 77 78 static int __init setup_unknown_nmi_panic(char *str) 79 { 80 unknown_nmi_panic = 1; 81 return 1; 82 } 83 __setup("unknown_nmi_panic", setup_unknown_nmi_panic); 84 85 #define nmi_to_desc(type) (&nmi_desc[type]) 86 87 static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 88 { 89 struct nmi_desc *desc = nmi_to_desc(type); 90 struct nmiaction *a; 91 int handled=0; 92 93 rcu_read_lock(); 94 95 /* 96 * NMIs are edge-triggered, which means if you have enough 97 * of them concurrently, you can lose some because only one 98 * can be latched at any given time. Walk the whole list 99 * to handle those situations. 100 */ 101 list_for_each_entry_rcu(a, &desc->head, list) 102 handled += a->handler(type, regs); 103 104 rcu_read_unlock(); 105 106 /* return total number of NMI events handled */ 107 return handled; 108 } 109 110 int __register_nmi_handler(unsigned int type, struct nmiaction *action) 111 { 112 struct nmi_desc *desc = nmi_to_desc(type); 113 unsigned long flags; 114 115 if (!action->handler) 116 return -EINVAL; 117 118 spin_lock_irqsave(&desc->lock, flags); 119 120 /* 121 * most handlers of type NMI_UNKNOWN never return because 122 * they just assume the NMI is theirs. Just a sanity check 123 * to manage expectations 124 */ 125 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); 126 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); 127 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); 128 129 /* 130 * some handlers need to be executed first otherwise a fake 131 * event confuses some handlers (kdump uses this flag) 132 */ 133 if (action->flags & NMI_FLAG_FIRST) 134 list_add_rcu(&action->list, &desc->head); 135 else 136 list_add_tail_rcu(&action->list, &desc->head); 137 138 spin_unlock_irqrestore(&desc->lock, flags); 139 return 0; 140 } 141 EXPORT_SYMBOL(__register_nmi_handler); 142 143 void unregister_nmi_handler(unsigned int type, const char *name) 144 { 145 struct nmi_desc *desc = nmi_to_desc(type); 146 struct nmiaction *n; 147 unsigned long flags; 148 149 spin_lock_irqsave(&desc->lock, flags); 150 151 list_for_each_entry_rcu(n, &desc->head, list) { 152 /* 153 * the name passed in to describe the nmi handler 154 * is used as the lookup key 155 */ 156 if (!strcmp(n->name, name)) { 157 WARN(in_nmi(), 158 "Trying to free NMI (%s) from NMI context!\n", n->name); 159 list_del_rcu(&n->list); 160 break; 161 } 162 } 163 164 spin_unlock_irqrestore(&desc->lock, flags); 165 synchronize_rcu(); 166 } 167 EXPORT_SYMBOL_GPL(unregister_nmi_handler); 168 169 static __kprobes void 170 pci_serr_error(unsigned char reason, struct pt_regs *regs) 171 { 172 /* check to see if anyone registered against these types of errors */ 173 if (nmi_handle(NMI_SERR, regs, false)) 174 return; 175 176 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 177 reason, smp_processor_id()); 178 179 /* 180 * On some machines, PCI SERR line is used to report memory 181 * errors. EDAC makes use of it. 182 */ 183 #if defined(CONFIG_EDAC) 184 if (edac_handler_set()) { 185 edac_atomic_assert_error(); 186 return; 187 } 188 #endif 189 190 if (panic_on_unrecovered_nmi) 191 panic("NMI: Not continuing"); 192 193 pr_emerg("Dazed and confused, but trying to continue\n"); 194 195 /* Clear and disable the PCI SERR error line. */ 196 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; 197 outb(reason, NMI_REASON_PORT); 198 } 199 200 static __kprobes void 201 io_check_error(unsigned char reason, struct pt_regs *regs) 202 { 203 unsigned long i; 204 205 /* check to see if anyone registered against these types of errors */ 206 if (nmi_handle(NMI_IO_CHECK, regs, false)) 207 return; 208 209 pr_emerg( 210 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 211 reason, smp_processor_id()); 212 show_registers(regs); 213 214 if (panic_on_io_nmi) 215 panic("NMI IOCK error: Not continuing"); 216 217 /* Re-enable the IOCK line, wait for a few seconds */ 218 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; 219 outb(reason, NMI_REASON_PORT); 220 221 i = 20000; 222 while (--i) { 223 touch_nmi_watchdog(); 224 udelay(100); 225 } 226 227 reason &= ~NMI_REASON_CLEAR_IOCHK; 228 outb(reason, NMI_REASON_PORT); 229 } 230 231 static __kprobes void 232 unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 233 { 234 int handled; 235 236 /* 237 * Use 'false' as back-to-back NMIs are dealt with one level up. 238 * Of course this makes having multiple 'unknown' handlers useless 239 * as only the first one is ever run (unless it can actually determine 240 * if it caused the NMI) 241 */ 242 handled = nmi_handle(NMI_UNKNOWN, regs, false); 243 if (handled) { 244 __this_cpu_add(nmi_stats.unknown, handled); 245 return; 246 } 247 248 __this_cpu_add(nmi_stats.unknown, 1); 249 250 #ifdef CONFIG_MCA 251 /* 252 * Might actually be able to figure out what the guilty party 253 * is: 254 */ 255 if (MCA_bus) { 256 mca_handle_nmi(); 257 return; 258 } 259 #endif 260 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 261 reason, smp_processor_id()); 262 263 pr_emerg("Do you have a strange power saving mode enabled?\n"); 264 if (unknown_nmi_panic || panic_on_unrecovered_nmi) 265 panic("NMI: Not continuing"); 266 267 pr_emerg("Dazed and confused, but trying to continue\n"); 268 } 269 270 static DEFINE_PER_CPU(bool, swallow_nmi); 271 static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 272 273 static __kprobes void default_do_nmi(struct pt_regs *regs) 274 { 275 unsigned char reason = 0; 276 int handled; 277 bool b2b = false; 278 279 /* 280 * CPU-specific NMI must be processed before non-CPU-specific 281 * NMI, otherwise we may lose it, because the CPU-specific 282 * NMI can not be detected/processed on other CPUs. 283 */ 284 285 /* 286 * Back-to-back NMIs are interesting because they can either 287 * be two NMI or more than two NMIs (any thing over two is dropped 288 * due to NMI being edge-triggered). If this is the second half 289 * of the back-to-back NMI, assume we dropped things and process 290 * more handlers. Otherwise reset the 'swallow' NMI behaviour 291 */ 292 if (regs->ip == __this_cpu_read(last_nmi_rip)) 293 b2b = true; 294 else 295 __this_cpu_write(swallow_nmi, false); 296 297 __this_cpu_write(last_nmi_rip, regs->ip); 298 299 handled = nmi_handle(NMI_LOCAL, regs, b2b); 300 __this_cpu_add(nmi_stats.normal, handled); 301 if (handled) { 302 /* 303 * There are cases when a NMI handler handles multiple 304 * events in the current NMI. One of these events may 305 * be queued for in the next NMI. Because the event is 306 * already handled, the next NMI will result in an unknown 307 * NMI. Instead lets flag this for a potential NMI to 308 * swallow. 309 */ 310 if (handled > 1) 311 __this_cpu_write(swallow_nmi, true); 312 return; 313 } 314 315 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ 316 raw_spin_lock(&nmi_reason_lock); 317 reason = x86_platform.get_nmi_reason(); 318 319 if (reason & NMI_REASON_MASK) { 320 if (reason & NMI_REASON_SERR) 321 pci_serr_error(reason, regs); 322 else if (reason & NMI_REASON_IOCHK) 323 io_check_error(reason, regs); 324 #ifdef CONFIG_X86_32 325 /* 326 * Reassert NMI in case it became active 327 * meanwhile as it's edge-triggered: 328 */ 329 reassert_nmi(); 330 #endif 331 __this_cpu_add(nmi_stats.external, 1); 332 raw_spin_unlock(&nmi_reason_lock); 333 return; 334 } 335 raw_spin_unlock(&nmi_reason_lock); 336 337 /* 338 * Only one NMI can be latched at a time. To handle 339 * this we may process multiple nmi handlers at once to 340 * cover the case where an NMI is dropped. The downside 341 * to this approach is we may process an NMI prematurely, 342 * while its real NMI is sitting latched. This will cause 343 * an unknown NMI on the next run of the NMI processing. 344 * 345 * We tried to flag that condition above, by setting the 346 * swallow_nmi flag when we process more than one event. 347 * This condition is also only present on the second half 348 * of a back-to-back NMI, so we flag that condition too. 349 * 350 * If both are true, we assume we already processed this 351 * NMI previously and we swallow it. Otherwise we reset 352 * the logic. 353 * 354 * There are scenarios where we may accidentally swallow 355 * a 'real' unknown NMI. For example, while processing 356 * a perf NMI another perf NMI comes in along with a 357 * 'real' unknown NMI. These two NMIs get combined into 358 * one (as descibed above). When the next NMI gets 359 * processed, it will be flagged by perf as handled, but 360 * noone will know that there was a 'real' unknown NMI sent 361 * also. As a result it gets swallowed. Or if the first 362 * perf NMI returns two events handled then the second 363 * NMI will get eaten by the logic below, again losing a 364 * 'real' unknown NMI. But this is the best we can do 365 * for now. 366 */ 367 if (b2b && __this_cpu_read(swallow_nmi)) 368 __this_cpu_add(nmi_stats.swallow, 1); 369 else 370 unknown_nmi_error(reason, regs); 371 } 372 373 /* 374 * NMIs can hit breakpoints which will cause it to lose its 375 * NMI context with the CPU when the breakpoint does an iret. 376 */ 377 #ifdef CONFIG_X86_32 378 /* 379 * For i386, NMIs use the same stack as the kernel, and we can 380 * add a workaround to the iret problem in C. Simply have 3 states 381 * the NMI can be in. 382 * 383 * 1) not running 384 * 2) executing 385 * 3) latched 386 * 387 * When no NMI is in progress, it is in the "not running" state. 388 * When an NMI comes in, it goes into the "executing" state. 389 * Normally, if another NMI is triggered, it does not interrupt 390 * the running NMI and the HW will simply latch it so that when 391 * the first NMI finishes, it will restart the second NMI. 392 * (Note, the latch is binary, thus multiple NMIs triggering, 393 * when one is running, are ignored. Only one NMI is restarted.) 394 * 395 * If an NMI hits a breakpoint that executes an iret, another 396 * NMI can preempt it. We do not want to allow this new NMI 397 * to run, but we want to execute it when the first one finishes. 398 * We set the state to "latched", and the first NMI will perform 399 * an cmpxchg on the state, and if it doesn't successfully 400 * reset the state to "not running" it will restart the next 401 * NMI. 402 */ 403 enum nmi_states { 404 NMI_NOT_RUNNING, 405 NMI_EXECUTING, 406 NMI_LATCHED, 407 }; 408 static DEFINE_PER_CPU(enum nmi_states, nmi_state); 409 410 #define nmi_nesting_preprocess(regs) \ 411 do { \ 412 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ 413 __get_cpu_var(nmi_state) = NMI_LATCHED; \ 414 return; \ 415 } \ 416 nmi_restart: \ 417 __get_cpu_var(nmi_state) = NMI_EXECUTING; \ 418 } while (0) 419 420 #define nmi_nesting_postprocess() \ 421 do { \ 422 if (cmpxchg(&__get_cpu_var(nmi_state), \ 423 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ 424 goto nmi_restart; \ 425 } while (0) 426 #else /* x86_64 */ 427 /* 428 * In x86_64 things are a bit more difficult. This has the same problem 429 * where an NMI hitting a breakpoint that calls iret will remove the 430 * NMI context, allowing a nested NMI to enter. What makes this more 431 * difficult is that both NMIs and breakpoints have their own stack. 432 * When a new NMI or breakpoint is executed, the stack is set to a fixed 433 * point. If an NMI is nested, it will have its stack set at that same 434 * fixed address that the first NMI had, and will start corrupting the 435 * stack. This is handled in entry_64.S, but the same problem exists with 436 * the breakpoint stack. 437 * 438 * If a breakpoint is being processed, and the debug stack is being used, 439 * if an NMI comes in and also hits a breakpoint, the stack pointer 440 * will be set to the same fixed address as the breakpoint that was 441 * interrupted, causing that stack to be corrupted. To handle this case, 442 * check if the stack that was interrupted is the debug stack, and if 443 * so, change the IDT so that new breakpoints will use the current stack 444 * and not switch to the fixed address. On return of the NMI, switch back 445 * to the original IDT. 446 */ 447 static DEFINE_PER_CPU(int, update_debug_stack); 448 449 static inline void nmi_nesting_preprocess(struct pt_regs *regs) 450 { 451 /* 452 * If we interrupted a breakpoint, it is possible that 453 * the nmi handler will have breakpoints too. We need to 454 * change the IDT such that breakpoints that happen here 455 * continue to use the NMI stack. 456 */ 457 if (unlikely(is_debug_stack(regs->sp))) { 458 debug_stack_set_zero(); 459 __get_cpu_var(update_debug_stack) = 1; 460 } 461 } 462 463 static inline void nmi_nesting_postprocess(void) 464 { 465 if (unlikely(__get_cpu_var(update_debug_stack))) 466 debug_stack_reset(); 467 } 468 #endif 469 470 dotraplinkage notrace __kprobes void 471 do_nmi(struct pt_regs *regs, long error_code) 472 { 473 nmi_nesting_preprocess(regs); 474 475 nmi_enter(); 476 477 inc_irq_stat(__nmi_count); 478 479 if (!ignore_nmis) 480 default_do_nmi(regs); 481 482 nmi_exit(); 483 484 /* On i386, may loop back to preprocess */ 485 nmi_nesting_postprocess(); 486 } 487 488 void stop_nmi(void) 489 { 490 ignore_nmis++; 491 } 492 493 void restart_nmi(void) 494 { 495 ignore_nmis--; 496 } 497 498 /* reset the back-to-back NMI logic */ 499 void local_touch_nmi(void) 500 { 501 __this_cpu_write(last_nmi_rip, 0); 502 } 503