1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 * Copyright (C) 2011 Don Zickus Red Hat, Inc. 5 * 6 * Pentium III FXSR, SSE support 7 * Gareth Hughes <gareth@valinux.com>, May 2000 8 */ 9 10 /* 11 * Handle hardware traps and faults. 12 */ 13 #include <linux/spinlock.h> 14 #include <linux/kprobes.h> 15 #include <linux/kdebug.h> 16 #include <linux/nmi.h> 17 #include <linux/delay.h> 18 #include <linux/hardirq.h> 19 #include <linux/slab.h> 20 #include <linux/export.h> 21 22 #include <linux/mca.h> 23 24 #if defined(CONFIG_EDAC) 25 #include <linux/edac.h> 26 #endif 27 28 #include <linux/atomic.h> 29 #include <asm/traps.h> 30 #include <asm/mach_traps.h> 31 #include <asm/nmi.h> 32 #include <asm/x86_init.h> 33 34 #define NMI_MAX_NAMELEN 16 35 struct nmiaction { 36 struct list_head list; 37 nmi_handler_t handler; 38 unsigned int flags; 39 char *name; 40 }; 41 42 struct nmi_desc { 43 spinlock_t lock; 44 struct list_head head; 45 }; 46 47 static struct nmi_desc nmi_desc[NMI_MAX] = 48 { 49 { 50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), 51 .head = LIST_HEAD_INIT(nmi_desc[0].head), 52 }, 53 { 54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), 55 .head = LIST_HEAD_INIT(nmi_desc[1].head), 56 }, 57 { 58 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), 59 .head = LIST_HEAD_INIT(nmi_desc[2].head), 60 }, 61 { 62 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), 63 .head = LIST_HEAD_INIT(nmi_desc[3].head), 64 }, 65 66 }; 67 68 struct nmi_stats { 69 unsigned int normal; 70 unsigned int unknown; 71 unsigned int external; 72 unsigned int swallow; 73 }; 74 75 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); 76 77 static int ignore_nmis; 78 79 int unknown_nmi_panic; 80 /* 81 * Prevent NMI reason port (0x61) being accessed simultaneously, can 82 * only be used in NMI handler. 83 */ 84 static DEFINE_RAW_SPINLOCK(nmi_reason_lock); 85 86 static int __init setup_unknown_nmi_panic(char *str) 87 { 88 unknown_nmi_panic = 1; 89 return 1; 90 } 91 __setup("unknown_nmi_panic", setup_unknown_nmi_panic); 92 93 #define nmi_to_desc(type) (&nmi_desc[type]) 94 95 static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 96 { 97 struct nmi_desc *desc = nmi_to_desc(type); 98 struct nmiaction *a; 99 int handled=0; 100 101 rcu_read_lock(); 102 103 /* 104 * NMIs are edge-triggered, which means if you have enough 105 * of them concurrently, you can lose some because only one 106 * can be latched at any given time. Walk the whole list 107 * to handle those situations. 108 */ 109 list_for_each_entry_rcu(a, &desc->head, list) 110 handled += a->handler(type, regs); 111 112 rcu_read_unlock(); 113 114 /* return total number of NMI events handled */ 115 return handled; 116 } 117 118 static int __setup_nmi(unsigned int type, struct nmiaction *action) 119 { 120 struct nmi_desc *desc = nmi_to_desc(type); 121 unsigned long flags; 122 123 spin_lock_irqsave(&desc->lock, flags); 124 125 /* 126 * most handlers of type NMI_UNKNOWN never return because 127 * they just assume the NMI is theirs. Just a sanity check 128 * to manage expectations 129 */ 130 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); 131 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); 132 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); 133 134 /* 135 * some handlers need to be executed first otherwise a fake 136 * event confuses some handlers (kdump uses this flag) 137 */ 138 if (action->flags & NMI_FLAG_FIRST) 139 list_add_rcu(&action->list, &desc->head); 140 else 141 list_add_tail_rcu(&action->list, &desc->head); 142 143 spin_unlock_irqrestore(&desc->lock, flags); 144 return 0; 145 } 146 147 static struct nmiaction *__free_nmi(unsigned int type, const char *name) 148 { 149 struct nmi_desc *desc = nmi_to_desc(type); 150 struct nmiaction *n; 151 unsigned long flags; 152 153 spin_lock_irqsave(&desc->lock, flags); 154 155 list_for_each_entry_rcu(n, &desc->head, list) { 156 /* 157 * the name passed in to describe the nmi handler 158 * is used as the lookup key 159 */ 160 if (!strcmp(n->name, name)) { 161 WARN(in_nmi(), 162 "Trying to free NMI (%s) from NMI context!\n", n->name); 163 list_del_rcu(&n->list); 164 break; 165 } 166 } 167 168 spin_unlock_irqrestore(&desc->lock, flags); 169 synchronize_rcu(); 170 return (n); 171 } 172 173 int register_nmi_handler(unsigned int type, nmi_handler_t handler, 174 unsigned long nmiflags, const char *devname) 175 { 176 struct nmiaction *action; 177 int retval = -ENOMEM; 178 179 if (!handler) 180 return -EINVAL; 181 182 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); 183 if (!action) 184 goto fail_action; 185 186 action->handler = handler; 187 action->flags = nmiflags; 188 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); 189 if (!action->name) 190 goto fail_action_name; 191 192 retval = __setup_nmi(type, action); 193 194 if (retval) 195 goto fail_setup_nmi; 196 197 return retval; 198 199 fail_setup_nmi: 200 kfree(action->name); 201 fail_action_name: 202 kfree(action); 203 fail_action: 204 205 return retval; 206 } 207 EXPORT_SYMBOL_GPL(register_nmi_handler); 208 209 void unregister_nmi_handler(unsigned int type, const char *name) 210 { 211 struct nmiaction *a; 212 213 a = __free_nmi(type, name); 214 if (a) { 215 kfree(a->name); 216 kfree(a); 217 } 218 } 219 220 EXPORT_SYMBOL_GPL(unregister_nmi_handler); 221 222 static notrace __kprobes void 223 pci_serr_error(unsigned char reason, struct pt_regs *regs) 224 { 225 /* check to see if anyone registered against these types of errors */ 226 if (nmi_handle(NMI_SERR, regs, false)) 227 return; 228 229 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 230 reason, smp_processor_id()); 231 232 /* 233 * On some machines, PCI SERR line is used to report memory 234 * errors. EDAC makes use of it. 235 */ 236 #if defined(CONFIG_EDAC) 237 if (edac_handler_set()) { 238 edac_atomic_assert_error(); 239 return; 240 } 241 #endif 242 243 if (panic_on_unrecovered_nmi) 244 panic("NMI: Not continuing"); 245 246 pr_emerg("Dazed and confused, but trying to continue\n"); 247 248 /* Clear and disable the PCI SERR error line. */ 249 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; 250 outb(reason, NMI_REASON_PORT); 251 } 252 253 static notrace __kprobes void 254 io_check_error(unsigned char reason, struct pt_regs *regs) 255 { 256 unsigned long i; 257 258 /* check to see if anyone registered against these types of errors */ 259 if (nmi_handle(NMI_IO_CHECK, regs, false)) 260 return; 261 262 pr_emerg( 263 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 264 reason, smp_processor_id()); 265 show_registers(regs); 266 267 if (panic_on_io_nmi) 268 panic("NMI IOCK error: Not continuing"); 269 270 /* Re-enable the IOCK line, wait for a few seconds */ 271 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; 272 outb(reason, NMI_REASON_PORT); 273 274 i = 20000; 275 while (--i) { 276 touch_nmi_watchdog(); 277 udelay(100); 278 } 279 280 reason &= ~NMI_REASON_CLEAR_IOCHK; 281 outb(reason, NMI_REASON_PORT); 282 } 283 284 static notrace __kprobes void 285 unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 286 { 287 int handled; 288 289 /* 290 * Use 'false' as back-to-back NMIs are dealt with one level up. 291 * Of course this makes having multiple 'unknown' handlers useless 292 * as only the first one is ever run (unless it can actually determine 293 * if it caused the NMI) 294 */ 295 handled = nmi_handle(NMI_UNKNOWN, regs, false); 296 if (handled) { 297 __this_cpu_add(nmi_stats.unknown, handled); 298 return; 299 } 300 301 __this_cpu_add(nmi_stats.unknown, 1); 302 303 #ifdef CONFIG_MCA 304 /* 305 * Might actually be able to figure out what the guilty party 306 * is: 307 */ 308 if (MCA_bus) { 309 mca_handle_nmi(); 310 return; 311 } 312 #endif 313 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 314 reason, smp_processor_id()); 315 316 pr_emerg("Do you have a strange power saving mode enabled?\n"); 317 if (unknown_nmi_panic || panic_on_unrecovered_nmi) 318 panic("NMI: Not continuing"); 319 320 pr_emerg("Dazed and confused, but trying to continue\n"); 321 } 322 323 static DEFINE_PER_CPU(bool, swallow_nmi); 324 static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 325 326 static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 327 { 328 unsigned char reason = 0; 329 int handled; 330 bool b2b = false; 331 332 /* 333 * CPU-specific NMI must be processed before non-CPU-specific 334 * NMI, otherwise we may lose it, because the CPU-specific 335 * NMI can not be detected/processed on other CPUs. 336 */ 337 338 /* 339 * Back-to-back NMIs are interesting because they can either 340 * be two NMI or more than two NMIs (any thing over two is dropped 341 * due to NMI being edge-triggered). If this is the second half 342 * of the back-to-back NMI, assume we dropped things and process 343 * more handlers. Otherwise reset the 'swallow' NMI behaviour 344 */ 345 if (regs->ip == __this_cpu_read(last_nmi_rip)) 346 b2b = true; 347 else 348 __this_cpu_write(swallow_nmi, false); 349 350 __this_cpu_write(last_nmi_rip, regs->ip); 351 352 handled = nmi_handle(NMI_LOCAL, regs, b2b); 353 __this_cpu_add(nmi_stats.normal, handled); 354 if (handled) { 355 /* 356 * There are cases when a NMI handler handles multiple 357 * events in the current NMI. One of these events may 358 * be queued for in the next NMI. Because the event is 359 * already handled, the next NMI will result in an unknown 360 * NMI. Instead lets flag this for a potential NMI to 361 * swallow. 362 */ 363 if (handled > 1) 364 __this_cpu_write(swallow_nmi, true); 365 return; 366 } 367 368 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ 369 raw_spin_lock(&nmi_reason_lock); 370 reason = x86_platform.get_nmi_reason(); 371 372 if (reason & NMI_REASON_MASK) { 373 if (reason & NMI_REASON_SERR) 374 pci_serr_error(reason, regs); 375 else if (reason & NMI_REASON_IOCHK) 376 io_check_error(reason, regs); 377 #ifdef CONFIG_X86_32 378 /* 379 * Reassert NMI in case it became active 380 * meanwhile as it's edge-triggered: 381 */ 382 reassert_nmi(); 383 #endif 384 __this_cpu_add(nmi_stats.external, 1); 385 raw_spin_unlock(&nmi_reason_lock); 386 return; 387 } 388 raw_spin_unlock(&nmi_reason_lock); 389 390 /* 391 * Only one NMI can be latched at a time. To handle 392 * this we may process multiple nmi handlers at once to 393 * cover the case where an NMI is dropped. The downside 394 * to this approach is we may process an NMI prematurely, 395 * while its real NMI is sitting latched. This will cause 396 * an unknown NMI on the next run of the NMI processing. 397 * 398 * We tried to flag that condition above, by setting the 399 * swallow_nmi flag when we process more than one event. 400 * This condition is also only present on the second half 401 * of a back-to-back NMI, so we flag that condition too. 402 * 403 * If both are true, we assume we already processed this 404 * NMI previously and we swallow it. Otherwise we reset 405 * the logic. 406 * 407 * There are scenarios where we may accidentally swallow 408 * a 'real' unknown NMI. For example, while processing 409 * a perf NMI another perf NMI comes in along with a 410 * 'real' unknown NMI. These two NMIs get combined into 411 * one (as descibed above). When the next NMI gets 412 * processed, it will be flagged by perf as handled, but 413 * noone will know that there was a 'real' unknown NMI sent 414 * also. As a result it gets swallowed. Or if the first 415 * perf NMI returns two events handled then the second 416 * NMI will get eaten by the logic below, again losing a 417 * 'real' unknown NMI. But this is the best we can do 418 * for now. 419 */ 420 if (b2b && __this_cpu_read(swallow_nmi)) 421 __this_cpu_add(nmi_stats.swallow, 1); 422 else 423 unknown_nmi_error(reason, regs); 424 } 425 426 /* 427 * NMIs can hit breakpoints which will cause it to lose its 428 * NMI context with the CPU when the breakpoint does an iret. 429 */ 430 #ifdef CONFIG_X86_32 431 /* 432 * For i386, NMIs use the same stack as the kernel, and we can 433 * add a workaround to the iret problem in C. Simply have 3 states 434 * the NMI can be in. 435 * 436 * 1) not running 437 * 2) executing 438 * 3) latched 439 * 440 * When no NMI is in progress, it is in the "not running" state. 441 * When an NMI comes in, it goes into the "executing" state. 442 * Normally, if another NMI is triggered, it does not interrupt 443 * the running NMI and the HW will simply latch it so that when 444 * the first NMI finishes, it will restart the second NMI. 445 * (Note, the latch is binary, thus multiple NMIs triggering, 446 * when one is running, are ignored. Only one NMI is restarted.) 447 * 448 * If an NMI hits a breakpoint that executes an iret, another 449 * NMI can preempt it. We do not want to allow this new NMI 450 * to run, but we want to execute it when the first one finishes. 451 * We set the state to "latched", and the first NMI will perform 452 * an cmpxchg on the state, and if it doesn't successfully 453 * reset the state to "not running" it will restart the next 454 * NMI. 455 */ 456 enum nmi_states { 457 NMI_NOT_RUNNING, 458 NMI_EXECUTING, 459 NMI_LATCHED, 460 }; 461 static DEFINE_PER_CPU(enum nmi_states, nmi_state); 462 463 #define nmi_nesting_preprocess(regs) \ 464 do { \ 465 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ 466 __get_cpu_var(nmi_state) = NMI_LATCHED; \ 467 return; \ 468 } \ 469 nmi_restart: \ 470 __get_cpu_var(nmi_state) = NMI_EXECUTING; \ 471 } while (0) 472 473 #define nmi_nesting_postprocess() \ 474 do { \ 475 if (cmpxchg(&__get_cpu_var(nmi_state), \ 476 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ 477 goto nmi_restart; \ 478 } while (0) 479 #else /* x86_64 */ 480 /* 481 * In x86_64 things are a bit more difficult. This has the same problem 482 * where an NMI hitting a breakpoint that calls iret will remove the 483 * NMI context, allowing a nested NMI to enter. What makes this more 484 * difficult is that both NMIs and breakpoints have their own stack. 485 * When a new NMI or breakpoint is executed, the stack is set to a fixed 486 * point. If an NMI is nested, it will have its stack set at that same 487 * fixed address that the first NMI had, and will start corrupting the 488 * stack. This is handled in entry_64.S, but the same problem exists with 489 * the breakpoint stack. 490 * 491 * If a breakpoint is being processed, and the debug stack is being used, 492 * if an NMI comes in and also hits a breakpoint, the stack pointer 493 * will be set to the same fixed address as the breakpoint that was 494 * interrupted, causing that stack to be corrupted. To handle this case, 495 * check if the stack that was interrupted is the debug stack, and if 496 * so, change the IDT so that new breakpoints will use the current stack 497 * and not switch to the fixed address. On return of the NMI, switch back 498 * to the original IDT. 499 */ 500 static DEFINE_PER_CPU(int, update_debug_stack); 501 502 static inline void nmi_nesting_preprocess(struct pt_regs *regs) 503 { 504 /* 505 * If we interrupted a breakpoint, it is possible that 506 * the nmi handler will have breakpoints too. We need to 507 * change the IDT such that breakpoints that happen here 508 * continue to use the NMI stack. 509 */ 510 if (unlikely(is_debug_stack(regs->sp))) { 511 debug_stack_set_zero(); 512 __get_cpu_var(update_debug_stack) = 1; 513 } 514 } 515 516 static inline void nmi_nesting_postprocess(void) 517 { 518 if (unlikely(__get_cpu_var(update_debug_stack))) 519 debug_stack_reset(); 520 } 521 #endif 522 523 dotraplinkage notrace __kprobes void 524 do_nmi(struct pt_regs *regs, long error_code) 525 { 526 nmi_nesting_preprocess(regs); 527 528 nmi_enter(); 529 530 inc_irq_stat(__nmi_count); 531 532 if (!ignore_nmis) 533 default_do_nmi(regs); 534 535 nmi_exit(); 536 537 /* On i386, may loop back to preprocess */ 538 nmi_nesting_postprocess(); 539 } 540 541 void stop_nmi(void) 542 { 543 ignore_nmis++; 544 } 545 546 void restart_nmi(void) 547 { 548 ignore_nmis--; 549 } 550 551 /* reset the back-to-back NMI logic */ 552 void local_touch_nmi(void) 553 { 554 __this_cpu_write(last_nmi_rip, 0); 555 } 556