1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 * Copyright (C) 2011 Don Zickus Red Hat, Inc. 5 * 6 * Pentium III FXSR, SSE support 7 * Gareth Hughes <gareth@valinux.com>, May 2000 8 */ 9 10 /* 11 * Handle hardware traps and faults. 12 */ 13 #include <linux/spinlock.h> 14 #include <linux/kprobes.h> 15 #include <linux/kdebug.h> 16 #include <linux/nmi.h> 17 #include <linux/delay.h> 18 #include <linux/hardirq.h> 19 #include <linux/slab.h> 20 #include <linux/export.h> 21 22 #include <linux/mca.h> 23 24 #if defined(CONFIG_EDAC) 25 #include <linux/edac.h> 26 #endif 27 28 #include <linux/atomic.h> 29 #include <asm/traps.h> 30 #include <asm/mach_traps.h> 31 #include <asm/nmi.h> 32 #include <asm/x86_init.h> 33 34 #define NMI_MAX_NAMELEN 16 35 struct nmiaction { 36 struct list_head list; 37 nmi_handler_t handler; 38 unsigned int flags; 39 char *name; 40 }; 41 42 struct nmi_desc { 43 spinlock_t lock; 44 struct list_head head; 45 }; 46 47 static struct nmi_desc nmi_desc[NMI_MAX] = 48 { 49 { 50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), 51 .head = LIST_HEAD_INIT(nmi_desc[0].head), 52 }, 53 { 54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), 55 .head = LIST_HEAD_INIT(nmi_desc[1].head), 56 }, 57 58 }; 59 60 struct nmi_stats { 61 unsigned int normal; 62 unsigned int unknown; 63 unsigned int external; 64 unsigned int swallow; 65 }; 66 67 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); 68 69 static int ignore_nmis; 70 71 int unknown_nmi_panic; 72 /* 73 * Prevent NMI reason port (0x61) being accessed simultaneously, can 74 * only be used in NMI handler. 75 */ 76 static DEFINE_RAW_SPINLOCK(nmi_reason_lock); 77 78 static int __init setup_unknown_nmi_panic(char *str) 79 { 80 unknown_nmi_panic = 1; 81 return 1; 82 } 83 __setup("unknown_nmi_panic", setup_unknown_nmi_panic); 84 85 #define nmi_to_desc(type) (&nmi_desc[type]) 86 87 static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 88 { 89 struct nmi_desc *desc = nmi_to_desc(type); 90 struct nmiaction *a; 91 int handled=0; 92 93 rcu_read_lock(); 94 95 /* 96 * NMIs are edge-triggered, which means if you have enough 97 * of them concurrently, you can lose some because only one 98 * can be latched at any given time. Walk the whole list 99 * to handle those situations. 100 */ 101 list_for_each_entry_rcu(a, &desc->head, list) 102 handled += a->handler(type, regs); 103 104 rcu_read_unlock(); 105 106 /* return total number of NMI events handled */ 107 return handled; 108 } 109 110 static int __setup_nmi(unsigned int type, struct nmiaction *action) 111 { 112 struct nmi_desc *desc = nmi_to_desc(type); 113 unsigned long flags; 114 115 spin_lock_irqsave(&desc->lock, flags); 116 117 /* 118 * most handlers of type NMI_UNKNOWN never return because 119 * they just assume the NMI is theirs. Just a sanity check 120 * to manage expectations 121 */ 122 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); 123 124 /* 125 * some handlers need to be executed first otherwise a fake 126 * event confuses some handlers (kdump uses this flag) 127 */ 128 if (action->flags & NMI_FLAG_FIRST) 129 list_add_rcu(&action->list, &desc->head); 130 else 131 list_add_tail_rcu(&action->list, &desc->head); 132 133 spin_unlock_irqrestore(&desc->lock, flags); 134 return 0; 135 } 136 137 static struct nmiaction *__free_nmi(unsigned int type, const char *name) 138 { 139 struct nmi_desc *desc = nmi_to_desc(type); 140 struct nmiaction *n; 141 unsigned long flags; 142 143 spin_lock_irqsave(&desc->lock, flags); 144 145 list_for_each_entry_rcu(n, &desc->head, list) { 146 /* 147 * the name passed in to describe the nmi handler 148 * is used as the lookup key 149 */ 150 if (!strcmp(n->name, name)) { 151 WARN(in_nmi(), 152 "Trying to free NMI (%s) from NMI context!\n", n->name); 153 list_del_rcu(&n->list); 154 break; 155 } 156 } 157 158 spin_unlock_irqrestore(&desc->lock, flags); 159 synchronize_rcu(); 160 return (n); 161 } 162 163 int register_nmi_handler(unsigned int type, nmi_handler_t handler, 164 unsigned long nmiflags, const char *devname) 165 { 166 struct nmiaction *action; 167 int retval = -ENOMEM; 168 169 if (!handler) 170 return -EINVAL; 171 172 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); 173 if (!action) 174 goto fail_action; 175 176 action->handler = handler; 177 action->flags = nmiflags; 178 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); 179 if (!action->name) 180 goto fail_action_name; 181 182 retval = __setup_nmi(type, action); 183 184 if (retval) 185 goto fail_setup_nmi; 186 187 return retval; 188 189 fail_setup_nmi: 190 kfree(action->name); 191 fail_action_name: 192 kfree(action); 193 fail_action: 194 195 return retval; 196 } 197 EXPORT_SYMBOL_GPL(register_nmi_handler); 198 199 void unregister_nmi_handler(unsigned int type, const char *name) 200 { 201 struct nmiaction *a; 202 203 a = __free_nmi(type, name); 204 if (a) { 205 kfree(a->name); 206 kfree(a); 207 } 208 } 209 210 EXPORT_SYMBOL_GPL(unregister_nmi_handler); 211 212 static notrace __kprobes void 213 pci_serr_error(unsigned char reason, struct pt_regs *regs) 214 { 215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 216 reason, smp_processor_id()); 217 218 /* 219 * On some machines, PCI SERR line is used to report memory 220 * errors. EDAC makes use of it. 221 */ 222 #if defined(CONFIG_EDAC) 223 if (edac_handler_set()) { 224 edac_atomic_assert_error(); 225 return; 226 } 227 #endif 228 229 if (panic_on_unrecovered_nmi) 230 panic("NMI: Not continuing"); 231 232 pr_emerg("Dazed and confused, but trying to continue\n"); 233 234 /* Clear and disable the PCI SERR error line. */ 235 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; 236 outb(reason, NMI_REASON_PORT); 237 } 238 239 static notrace __kprobes void 240 io_check_error(unsigned char reason, struct pt_regs *regs) 241 { 242 unsigned long i; 243 244 pr_emerg( 245 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 246 reason, smp_processor_id()); 247 show_registers(regs); 248 249 if (panic_on_io_nmi) 250 panic("NMI IOCK error: Not continuing"); 251 252 /* Re-enable the IOCK line, wait for a few seconds */ 253 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; 254 outb(reason, NMI_REASON_PORT); 255 256 i = 20000; 257 while (--i) { 258 touch_nmi_watchdog(); 259 udelay(100); 260 } 261 262 reason &= ~NMI_REASON_CLEAR_IOCHK; 263 outb(reason, NMI_REASON_PORT); 264 } 265 266 static notrace __kprobes void 267 unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 268 { 269 int handled; 270 271 /* 272 * Use 'false' as back-to-back NMIs are dealt with one level up. 273 * Of course this makes having multiple 'unknown' handlers useless 274 * as only the first one is ever run (unless it can actually determine 275 * if it caused the NMI) 276 */ 277 handled = nmi_handle(NMI_UNKNOWN, regs, false); 278 if (handled) { 279 __this_cpu_add(nmi_stats.unknown, handled); 280 return; 281 } 282 283 __this_cpu_add(nmi_stats.unknown, 1); 284 285 #ifdef CONFIG_MCA 286 /* 287 * Might actually be able to figure out what the guilty party 288 * is: 289 */ 290 if (MCA_bus) { 291 mca_handle_nmi(); 292 return; 293 } 294 #endif 295 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 296 reason, smp_processor_id()); 297 298 pr_emerg("Do you have a strange power saving mode enabled?\n"); 299 if (unknown_nmi_panic || panic_on_unrecovered_nmi) 300 panic("NMI: Not continuing"); 301 302 pr_emerg("Dazed and confused, but trying to continue\n"); 303 } 304 305 static DEFINE_PER_CPU(bool, swallow_nmi); 306 static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 307 308 static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 309 { 310 unsigned char reason = 0; 311 int handled; 312 bool b2b = false; 313 314 /* 315 * CPU-specific NMI must be processed before non-CPU-specific 316 * NMI, otherwise we may lose it, because the CPU-specific 317 * NMI can not be detected/processed on other CPUs. 318 */ 319 320 /* 321 * Back-to-back NMIs are interesting because they can either 322 * be two NMI or more than two NMIs (any thing over two is dropped 323 * due to NMI being edge-triggered). If this is the second half 324 * of the back-to-back NMI, assume we dropped things and process 325 * more handlers. Otherwise reset the 'swallow' NMI behaviour 326 */ 327 if (regs->ip == __this_cpu_read(last_nmi_rip)) 328 b2b = true; 329 else 330 __this_cpu_write(swallow_nmi, false); 331 332 __this_cpu_write(last_nmi_rip, regs->ip); 333 334 handled = nmi_handle(NMI_LOCAL, regs, b2b); 335 __this_cpu_add(nmi_stats.normal, handled); 336 if (handled) { 337 /* 338 * There are cases when a NMI handler handles multiple 339 * events in the current NMI. One of these events may 340 * be queued for in the next NMI. Because the event is 341 * already handled, the next NMI will result in an unknown 342 * NMI. Instead lets flag this for a potential NMI to 343 * swallow. 344 */ 345 if (handled > 1) 346 __this_cpu_write(swallow_nmi, true); 347 return; 348 } 349 350 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ 351 raw_spin_lock(&nmi_reason_lock); 352 reason = x86_platform.get_nmi_reason(); 353 354 if (reason & NMI_REASON_MASK) { 355 if (reason & NMI_REASON_SERR) 356 pci_serr_error(reason, regs); 357 else if (reason & NMI_REASON_IOCHK) 358 io_check_error(reason, regs); 359 #ifdef CONFIG_X86_32 360 /* 361 * Reassert NMI in case it became active 362 * meanwhile as it's edge-triggered: 363 */ 364 reassert_nmi(); 365 #endif 366 __this_cpu_add(nmi_stats.external, 1); 367 raw_spin_unlock(&nmi_reason_lock); 368 return; 369 } 370 raw_spin_unlock(&nmi_reason_lock); 371 372 /* 373 * Only one NMI can be latched at a time. To handle 374 * this we may process multiple nmi handlers at once to 375 * cover the case where an NMI is dropped. The downside 376 * to this approach is we may process an NMI prematurely, 377 * while its real NMI is sitting latched. This will cause 378 * an unknown NMI on the next run of the NMI processing. 379 * 380 * We tried to flag that condition above, by setting the 381 * swallow_nmi flag when we process more than one event. 382 * This condition is also only present on the second half 383 * of a back-to-back NMI, so we flag that condition too. 384 * 385 * If both are true, we assume we already processed this 386 * NMI previously and we swallow it. Otherwise we reset 387 * the logic. 388 * 389 * There are scenarios where we may accidentally swallow 390 * a 'real' unknown NMI. For example, while processing 391 * a perf NMI another perf NMI comes in along with a 392 * 'real' unknown NMI. These two NMIs get combined into 393 * one (as descibed above). When the next NMI gets 394 * processed, it will be flagged by perf as handled, but 395 * noone will know that there was a 'real' unknown NMI sent 396 * also. As a result it gets swallowed. Or if the first 397 * perf NMI returns two events handled then the second 398 * NMI will get eaten by the logic below, again losing a 399 * 'real' unknown NMI. But this is the best we can do 400 * for now. 401 */ 402 if (b2b && __this_cpu_read(swallow_nmi)) 403 __this_cpu_add(nmi_stats.swallow, 1); 404 else 405 unknown_nmi_error(reason, regs); 406 } 407 408 /* 409 * NMIs can hit breakpoints which will cause it to lose its 410 * NMI context with the CPU when the breakpoint does an iret. 411 */ 412 #ifdef CONFIG_X86_32 413 /* 414 * For i386, NMIs use the same stack as the kernel, and we can 415 * add a workaround to the iret problem in C. Simply have 3 states 416 * the NMI can be in. 417 * 418 * 1) not running 419 * 2) executing 420 * 3) latched 421 * 422 * When no NMI is in progress, it is in the "not running" state. 423 * When an NMI comes in, it goes into the "executing" state. 424 * Normally, if another NMI is triggered, it does not interrupt 425 * the running NMI and the HW will simply latch it so that when 426 * the first NMI finishes, it will restart the second NMI. 427 * (Note, the latch is binary, thus multiple NMIs triggering, 428 * when one is running, are ignored. Only one NMI is restarted.) 429 * 430 * If an NMI hits a breakpoint that executes an iret, another 431 * NMI can preempt it. We do not want to allow this new NMI 432 * to run, but we want to execute it when the first one finishes. 433 * We set the state to "latched", and the first NMI will perform 434 * an cmpxchg on the state, and if it doesn't successfully 435 * reset the state to "not running" it will restart the next 436 * NMI. 437 */ 438 enum nmi_states { 439 NMI_NOT_RUNNING, 440 NMI_EXECUTING, 441 NMI_LATCHED, 442 }; 443 static DEFINE_PER_CPU(enum nmi_states, nmi_state); 444 445 #define nmi_nesting_preprocess(regs) \ 446 do { \ 447 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ 448 __get_cpu_var(nmi_state) = NMI_LATCHED; \ 449 return; \ 450 } \ 451 nmi_restart: \ 452 __get_cpu_var(nmi_state) = NMI_EXECUTING; \ 453 } while (0) 454 455 #define nmi_nesting_postprocess() \ 456 do { \ 457 if (cmpxchg(&__get_cpu_var(nmi_state), \ 458 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ 459 goto nmi_restart; \ 460 } while (0) 461 #else /* x86_64 */ 462 /* 463 * In x86_64 things are a bit more difficult. This has the same problem 464 * where an NMI hitting a breakpoint that calls iret will remove the 465 * NMI context, allowing a nested NMI to enter. What makes this more 466 * difficult is that both NMIs and breakpoints have their own stack. 467 * When a new NMI or breakpoint is executed, the stack is set to a fixed 468 * point. If an NMI is nested, it will have its stack set at that same 469 * fixed address that the first NMI had, and will start corrupting the 470 * stack. This is handled in entry_64.S, but the same problem exists with 471 * the breakpoint stack. 472 * 473 * If a breakpoint is being processed, and the debug stack is being used, 474 * if an NMI comes in and also hits a breakpoint, the stack pointer 475 * will be set to the same fixed address as the breakpoint that was 476 * interrupted, causing that stack to be corrupted. To handle this case, 477 * check if the stack that was interrupted is the debug stack, and if 478 * so, change the IDT so that new breakpoints will use the current stack 479 * and not switch to the fixed address. On return of the NMI, switch back 480 * to the original IDT. 481 */ 482 static DEFINE_PER_CPU(int, update_debug_stack); 483 484 static inline void nmi_nesting_preprocess(struct pt_regs *regs) 485 { 486 /* 487 * If we interrupted a breakpoint, it is possible that 488 * the nmi handler will have breakpoints too. We need to 489 * change the IDT such that breakpoints that happen here 490 * continue to use the NMI stack. 491 */ 492 if (unlikely(is_debug_stack(regs->sp))) { 493 debug_stack_set_zero(); 494 __get_cpu_var(update_debug_stack) = 1; 495 } 496 } 497 498 static inline void nmi_nesting_postprocess(void) 499 { 500 if (unlikely(__get_cpu_var(update_debug_stack))) 501 debug_stack_reset(); 502 } 503 #endif 504 505 dotraplinkage notrace __kprobes void 506 do_nmi(struct pt_regs *regs, long error_code) 507 { 508 nmi_nesting_preprocess(regs); 509 510 nmi_enter(); 511 512 inc_irq_stat(__nmi_count); 513 514 if (!ignore_nmis) 515 default_do_nmi(regs); 516 517 nmi_exit(); 518 519 /* On i386, may loop back to preprocess */ 520 nmi_nesting_postprocess(); 521 } 522 523 void stop_nmi(void) 524 { 525 ignore_nmis++; 526 } 527 528 void restart_nmi(void) 529 { 530 ignore_nmis--; 531 } 532 533 /* reset the back-to-back NMI logic */ 534 void local_touch_nmi(void) 535 { 536 __this_cpu_write(last_nmi_rip, 0); 537 } 538