1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 1991,1992 Linus Torvalds 4 * 5 * entry_32.S contains the system-call and low-level fault and trap handling routines. 6 * 7 * Stack layout while running C code: 8 * ptrace needs to have all registers on the stack. 9 * If the order here is changed, it needs to be 10 * updated in fork.c:copy_process(), signal.c:do_signal(), 11 * ptrace.c and ptrace.h 12 * 13 * 0(%esp) - %ebx 14 * 4(%esp) - %ecx 15 * 8(%esp) - %edx 16 * C(%esp) - %esi 17 * 10(%esp) - %edi 18 * 14(%esp) - %ebp 19 * 18(%esp) - %eax 20 * 1C(%esp) - %ds 21 * 20(%esp) - %es 22 * 24(%esp) - %fs 23 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 24 * 2C(%esp) - orig_eax 25 * 30(%esp) - %eip 26 * 34(%esp) - %cs 27 * 38(%esp) - %eflags 28 * 3C(%esp) - %oldesp 29 * 40(%esp) - %oldss 30 */ 31 32#include <linux/linkage.h> 33#include <linux/err.h> 34#include <asm/thread_info.h> 35#include <asm/irqflags.h> 36#include <asm/errno.h> 37#include <asm/segment.h> 38#include <asm/smp.h> 39#include <asm/percpu.h> 40#include <asm/processor-flags.h> 41#include <asm/irq_vectors.h> 42#include <asm/cpufeatures.h> 43#include <asm/alternative-asm.h> 44#include <asm/asm.h> 45#include <asm/smap.h> 46#include <asm/frame.h> 47 48 .section .entry.text, "ax" 49 50/* 51 * We use macros for low-level operations which need to be overridden 52 * for paravirtualization. The following will never clobber any registers: 53 * INTERRUPT_RETURN (aka. "iret") 54 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 55 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 56 * 57 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 58 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 59 * Allowing a register to be clobbered can shrink the paravirt replacement 60 * enough to patch inline, increasing performance. 61 */ 62 63#ifdef CONFIG_PREEMPT 64# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 65#else 66# define preempt_stop(clobbers) 67# define resume_kernel restore_all 68#endif 69 70.macro TRACE_IRQS_IRET 71#ifdef CONFIG_TRACE_IRQFLAGS 72 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? 73 jz 1f 74 TRACE_IRQS_ON 751: 76#endif 77.endm 78 79/* 80 * User gs save/restore 81 * 82 * %gs is used for userland TLS and kernel only uses it for stack 83 * canary which is required to be at %gs:20 by gcc. Read the comment 84 * at the top of stackprotector.h for more info. 85 * 86 * Local labels 98 and 99 are used. 87 */ 88#ifdef CONFIG_X86_32_LAZY_GS 89 90 /* unfortunately push/pop can't be no-op */ 91.macro PUSH_GS 92 pushl $0 93.endm 94.macro POP_GS pop=0 95 addl $(4 + \pop), %esp 96.endm 97.macro POP_GS_EX 98.endm 99 100 /* all the rest are no-op */ 101.macro PTGS_TO_GS 102.endm 103.macro PTGS_TO_GS_EX 104.endm 105.macro GS_TO_REG reg 106.endm 107.macro REG_TO_PTGS reg 108.endm 109.macro SET_KERNEL_GS reg 110.endm 111 112#else /* CONFIG_X86_32_LAZY_GS */ 113 114.macro PUSH_GS 115 pushl %gs 116.endm 117 118.macro POP_GS pop=0 11998: popl %gs 120 .if \pop <> 0 121 add $\pop, %esp 122 .endif 123.endm 124.macro POP_GS_EX 125.pushsection .fixup, "ax" 12699: movl $0, (%esp) 127 jmp 98b 128.popsection 129 _ASM_EXTABLE(98b, 99b) 130.endm 131 132.macro PTGS_TO_GS 13398: mov PT_GS(%esp), %gs 134.endm 135.macro PTGS_TO_GS_EX 136.pushsection .fixup, "ax" 13799: movl $0, PT_GS(%esp) 138 jmp 98b 139.popsection 140 _ASM_EXTABLE(98b, 99b) 141.endm 142 143.macro GS_TO_REG reg 144 movl %gs, \reg 145.endm 146.macro REG_TO_PTGS reg 147 movl \reg, PT_GS(%esp) 148.endm 149.macro SET_KERNEL_GS reg 150 movl $(__KERNEL_STACK_CANARY), \reg 151 movl \reg, %gs 152.endm 153 154#endif /* CONFIG_X86_32_LAZY_GS */ 155 156.macro SAVE_ALL pt_regs_ax=%eax 157 cld 158 PUSH_GS 159 pushl %fs 160 pushl %es 161 pushl %ds 162 pushl \pt_regs_ax 163 pushl %ebp 164 pushl %edi 165 pushl %esi 166 pushl %edx 167 pushl %ecx 168 pushl %ebx 169 movl $(__USER_DS), %edx 170 movl %edx, %ds 171 movl %edx, %es 172 movl $(__KERNEL_PERCPU), %edx 173 movl %edx, %fs 174 SET_KERNEL_GS %edx 175.endm 176 177/* 178 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The 179 * frame pointer is replaced with an encoded pointer to pt_regs. The encoding 180 * is just clearing the MSB, which makes it an invalid stack address and is also 181 * a signal to the unwinder that it's a pt_regs pointer in disguise. 182 * 183 * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the 184 * original rbp. 185 */ 186.macro ENCODE_FRAME_POINTER 187#ifdef CONFIG_FRAME_POINTER 188 mov %esp, %ebp 189 andl $0x7fffffff, %ebp 190#endif 191.endm 192 193.macro RESTORE_INT_REGS 194 popl %ebx 195 popl %ecx 196 popl %edx 197 popl %esi 198 popl %edi 199 popl %ebp 200 popl %eax 201.endm 202 203.macro RESTORE_REGS pop=0 204 RESTORE_INT_REGS 2051: popl %ds 2062: popl %es 2073: popl %fs 208 POP_GS \pop 209.pushsection .fixup, "ax" 2104: movl $0, (%esp) 211 jmp 1b 2125: movl $0, (%esp) 213 jmp 2b 2146: movl $0, (%esp) 215 jmp 3b 216.popsection 217 _ASM_EXTABLE(1b, 4b) 218 _ASM_EXTABLE(2b, 5b) 219 _ASM_EXTABLE(3b, 6b) 220 POP_GS_EX 221.endm 222 223/* 224 * %eax: prev task 225 * %edx: next task 226 */ 227ENTRY(__switch_to_asm) 228 /* 229 * Save callee-saved registers 230 * This must match the order in struct inactive_task_frame 231 */ 232 pushl %ebp 233 pushl %ebx 234 pushl %edi 235 pushl %esi 236 237 /* switch stack */ 238 movl %esp, TASK_threadsp(%eax) 239 movl TASK_threadsp(%edx), %esp 240 241#ifdef CONFIG_CC_STACKPROTECTOR 242 movl TASK_stack_canary(%edx), %ebx 243 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset 244#endif 245 246 /* restore callee-saved registers */ 247 popl %esi 248 popl %edi 249 popl %ebx 250 popl %ebp 251 252 jmp __switch_to 253END(__switch_to_asm) 254 255/* 256 * The unwinder expects the last frame on the stack to always be at the same 257 * offset from the end of the page, which allows it to validate the stack. 258 * Calling schedule_tail() directly would break that convention because its an 259 * asmlinkage function so its argument has to be pushed on the stack. This 260 * wrapper creates a proper "end of stack" frame header before the call. 261 */ 262ENTRY(schedule_tail_wrapper) 263 FRAME_BEGIN 264 265 pushl %eax 266 call schedule_tail 267 popl %eax 268 269 FRAME_END 270 ret 271ENDPROC(schedule_tail_wrapper) 272/* 273 * A newly forked process directly context switches into this address. 274 * 275 * eax: prev task we switched from 276 * ebx: kernel thread func (NULL for user thread) 277 * edi: kernel thread arg 278 */ 279ENTRY(ret_from_fork) 280 call schedule_tail_wrapper 281 282 testl %ebx, %ebx 283 jnz 1f /* kernel threads are uncommon */ 284 2852: 286 /* When we fork, we trace the syscall return in the child, too. */ 287 movl %esp, %eax 288 call syscall_return_slowpath 289 jmp restore_all 290 291 /* kernel thread */ 2921: movl %edi, %eax 293 call *%ebx 294 /* 295 * A kernel thread is allowed to return here after successfully 296 * calling do_execve(). Exit to userspace to complete the execve() 297 * syscall. 298 */ 299 movl $0, PT_EAX(%esp) 300 jmp 2b 301END(ret_from_fork) 302 303/* 304 * Return to user mode is not as complex as all this looks, 305 * but we want the default path for a system call return to 306 * go as quickly as possible which is why some of this is 307 * less clear than it otherwise should be. 308 */ 309 310 # userspace resumption stub bypassing syscall exit tracing 311 ALIGN 312ret_from_exception: 313 preempt_stop(CLBR_ANY) 314ret_from_intr: 315#ifdef CONFIG_VM86 316 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 317 movb PT_CS(%esp), %al 318 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 319#else 320 /* 321 * We can be coming here from child spawned by kernel_thread(). 322 */ 323 movl PT_CS(%esp), %eax 324 andl $SEGMENT_RPL_MASK, %eax 325#endif 326 cmpl $USER_RPL, %eax 327 jb resume_kernel # not returning to v8086 or userspace 328 329ENTRY(resume_userspace) 330 DISABLE_INTERRUPTS(CLBR_ANY) 331 TRACE_IRQS_OFF 332 movl %esp, %eax 333 call prepare_exit_to_usermode 334 jmp restore_all 335END(ret_from_exception) 336 337#ifdef CONFIG_PREEMPT 338ENTRY(resume_kernel) 339 DISABLE_INTERRUPTS(CLBR_ANY) 340.Lneed_resched: 341 cmpl $0, PER_CPU_VAR(__preempt_count) 342 jnz restore_all 343 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 344 jz restore_all 345 call preempt_schedule_irq 346 jmp .Lneed_resched 347END(resume_kernel) 348#endif 349 350GLOBAL(__begin_SYSENTER_singlestep_region) 351/* 352 * All code from here through __end_SYSENTER_singlestep_region is subject 353 * to being single-stepped if a user program sets TF and executes SYSENTER. 354 * There is absolutely nothing that we can do to prevent this from happening 355 * (thanks Intel!). To keep our handling of this situation as simple as 356 * possible, we handle TF just like AC and NT, except that our #DB handler 357 * will ignore all of the single-step traps generated in this range. 358 */ 359 360#ifdef CONFIG_XEN 361/* 362 * Xen doesn't set %esp to be precisely what the normal SYSENTER 363 * entry point expects, so fix it up before using the normal path. 364 */ 365ENTRY(xen_sysenter_target) 366 addl $5*4, %esp /* remove xen-provided frame */ 367 jmp .Lsysenter_past_esp 368#endif 369 370/* 371 * 32-bit SYSENTER entry. 372 * 373 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here 374 * if X86_FEATURE_SEP is available. This is the preferred system call 375 * entry on 32-bit systems. 376 * 377 * The SYSENTER instruction, in principle, should *only* occur in the 378 * vDSO. In practice, a small number of Android devices were shipped 379 * with a copy of Bionic that inlined a SYSENTER instruction. This 380 * never happened in any of Google's Bionic versions -- it only happened 381 * in a narrow range of Intel-provided versions. 382 * 383 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. 384 * IF and VM in RFLAGS are cleared (IOW: interrupts are off). 385 * SYSENTER does not save anything on the stack, 386 * and does not save old EIP (!!!), ESP, or EFLAGS. 387 * 388 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting 389 * user and/or vm86 state), we explicitly disable the SYSENTER 390 * instruction in vm86 mode by reprogramming the MSRs. 391 * 392 * Arguments: 393 * eax system call number 394 * ebx arg1 395 * ecx arg2 396 * edx arg3 397 * esi arg4 398 * edi arg5 399 * ebp user stack 400 * 0(%ebp) arg6 401 */ 402ENTRY(entry_SYSENTER_32) 403 movl TSS_sysenter_sp0(%esp), %esp 404.Lsysenter_past_esp: 405 pushl $__USER_DS /* pt_regs->ss */ 406 pushl %ebp /* pt_regs->sp (stashed in bp) */ 407 pushfl /* pt_regs->flags (except IF = 0) */ 408 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ 409 pushl $__USER_CS /* pt_regs->cs */ 410 pushl $0 /* pt_regs->ip = 0 (placeholder) */ 411 pushl %eax /* pt_regs->orig_ax */ 412 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 413 414 /* 415 * SYSENTER doesn't filter flags, so we need to clear NT, AC 416 * and TF ourselves. To save a few cycles, we can check whether 417 * either was set instead of doing an unconditional popfq. 418 * This needs to happen before enabling interrupts so that 419 * we don't get preempted with NT set. 420 * 421 * If TF is set, we will single-step all the way to here -- do_debug 422 * will ignore all the traps. (Yes, this is slow, but so is 423 * single-stepping in general. This allows us to avoid having 424 * a more complicated code to handle the case where a user program 425 * forces us to single-step through the SYSENTER entry code.) 426 * 427 * NB.: .Lsysenter_fix_flags is a label with the code under it moved 428 * out-of-line as an optimization: NT is unlikely to be set in the 429 * majority of the cases and instead of polluting the I$ unnecessarily, 430 * we're keeping that code behind a branch which will predict as 431 * not-taken and therefore its instructions won't be fetched. 432 */ 433 testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) 434 jnz .Lsysenter_fix_flags 435.Lsysenter_flags_fixed: 436 437 /* 438 * User mode is traced as though IRQs are on, and SYSENTER 439 * turned them off. 440 */ 441 TRACE_IRQS_OFF 442 443 movl %esp, %eax 444 call do_fast_syscall_32 445 /* XEN PV guests always use IRET path */ 446 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 447 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 448 449/* Opportunistic SYSEXIT */ 450 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 451 movl PT_EIP(%esp), %edx /* pt_regs->ip */ 452 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 4531: mov PT_FS(%esp), %fs 454 PTGS_TO_GS 455 popl %ebx /* pt_regs->bx */ 456 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ 457 popl %esi /* pt_regs->si */ 458 popl %edi /* pt_regs->di */ 459 popl %ebp /* pt_regs->bp */ 460 popl %eax /* pt_regs->ax */ 461 462 /* 463 * Restore all flags except IF. (We restore IF separately because 464 * STI gives a one-instruction window in which we won't be interrupted, 465 * whereas POPF does not.) 466 */ 467 addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ 468 btr $X86_EFLAGS_IF_BIT, (%esp) 469 popfl 470 471 /* 472 * Return back to the vDSO, which will pop ecx and edx. 473 * Don't bother with DS and ES (they already contain __USER_DS). 474 */ 475 sti 476 sysexit 477 478.pushsection .fixup, "ax" 4792: movl $0, PT_FS(%esp) 480 jmp 1b 481.popsection 482 _ASM_EXTABLE(1b, 2b) 483 PTGS_TO_GS_EX 484 485.Lsysenter_fix_flags: 486 pushl $X86_EFLAGS_FIXED 487 popfl 488 jmp .Lsysenter_flags_fixed 489GLOBAL(__end_SYSENTER_singlestep_region) 490ENDPROC(entry_SYSENTER_32) 491 492/* 493 * 32-bit legacy system call entry. 494 * 495 * 32-bit x86 Linux system calls traditionally used the INT $0x80 496 * instruction. INT $0x80 lands here. 497 * 498 * This entry point can be used by any 32-bit perform system calls. 499 * Instances of INT $0x80 can be found inline in various programs and 500 * libraries. It is also used by the vDSO's __kernel_vsyscall 501 * fallback for hardware that doesn't support a faster entry method. 502 * Restarted 32-bit system calls also fall back to INT $0x80 503 * regardless of what instruction was originally used to do the system 504 * call. (64-bit programs can use INT $0x80 as well, but they can 505 * only run on 64-bit kernels and therefore land in 506 * entry_INT80_compat.) 507 * 508 * This is considered a slow path. It is not used by most libc 509 * implementations on modern hardware except during process startup. 510 * 511 * Arguments: 512 * eax system call number 513 * ebx arg1 514 * ecx arg2 515 * edx arg3 516 * esi arg4 517 * edi arg5 518 * ebp arg6 519 */ 520ENTRY(entry_INT80_32) 521 ASM_CLAC 522 pushl %eax /* pt_regs->orig_ax */ 523 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 524 525 /* 526 * User mode is traced as though IRQs are on, and the interrupt gate 527 * turned them off. 528 */ 529 TRACE_IRQS_OFF 530 531 movl %esp, %eax 532 call do_int80_syscall_32 533.Lsyscall_32_done: 534 535restore_all: 536 TRACE_IRQS_IRET 537.Lrestore_all_notrace: 538#ifdef CONFIG_X86_ESPFIX32 539 ALTERNATIVE "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX 540 541 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 542 /* 543 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we 544 * are returning to the kernel. 545 * See comments in process.c:copy_thread() for details. 546 */ 547 movb PT_OLDSS(%esp), %ah 548 movb PT_CS(%esp), %al 549 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 550 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 551 je .Lldt_ss # returning to user-space with LDT SS 552#endif 553.Lrestore_nocheck: 554 RESTORE_REGS 4 # skip orig_eax/error_code 555.Lirq_return: 556 INTERRUPT_RETURN 557 558.section .fixup, "ax" 559ENTRY(iret_exc ) 560 pushl $0 # no error code 561 pushl $do_iret_error 562 jmp common_exception 563.previous 564 _ASM_EXTABLE(.Lirq_return, iret_exc) 565 566#ifdef CONFIG_X86_ESPFIX32 567.Lldt_ss: 568/* 569 * Setup and switch to ESPFIX stack 570 * 571 * We're returning to userspace with a 16 bit stack. The CPU will not 572 * restore the high word of ESP for us on executing iret... This is an 573 * "official" bug of all the x86-compatible CPUs, which we can work 574 * around to make dosemu and wine happy. We do this by preloading the 575 * high word of ESP with the high word of the userspace ESP while 576 * compensating for the offset by changing to the ESPFIX segment with 577 * a base address that matches for the difference. 578 */ 579#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) 580 mov %esp, %edx /* load kernel esp */ 581 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 582 mov %dx, %ax /* eax: new kernel esp */ 583 sub %eax, %edx /* offset (low word is 0) */ 584 shr $16, %edx 585 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 586 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 587 pushl $__ESPFIX_SS 588 pushl %eax /* new kernel esp */ 589 /* 590 * Disable interrupts, but do not irqtrace this section: we 591 * will soon execute iret and the tracer was already set to 592 * the irqstate after the IRET: 593 */ 594 DISABLE_INTERRUPTS(CLBR_ANY) 595 lss (%esp), %esp /* switch to espfix segment */ 596 jmp .Lrestore_nocheck 597#endif 598ENDPROC(entry_INT80_32) 599 600.macro FIXUP_ESPFIX_STACK 601/* 602 * Switch back for ESPFIX stack to the normal zerobased stack 603 * 604 * We can't call C functions using the ESPFIX stack. This code reads 605 * the high word of the segment base from the GDT and swiches to the 606 * normal stack and adjusts ESP with the matching offset. 607 */ 608#ifdef CONFIG_X86_ESPFIX32 609 /* fixup the stack */ 610 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ 611 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 612 shl $16, %eax 613 addl %esp, %eax /* the adjusted stack pointer */ 614 pushl $__KERNEL_DS 615 pushl %eax 616 lss (%esp), %esp /* switch to the normal stack segment */ 617#endif 618.endm 619.macro UNWIND_ESPFIX_STACK 620#ifdef CONFIG_X86_ESPFIX32 621 movl %ss, %eax 622 /* see if on espfix stack */ 623 cmpw $__ESPFIX_SS, %ax 624 jne 27f 625 movl $__KERNEL_DS, %eax 626 movl %eax, %ds 627 movl %eax, %es 628 /* switch to normal stack */ 629 FIXUP_ESPFIX_STACK 63027: 631#endif 632.endm 633 634/* 635 * Build the entry stubs with some assembler magic. 636 * We pack 1 stub into every 8-byte block. 637 */ 638 .align 8 639ENTRY(irq_entries_start) 640 vector=FIRST_EXTERNAL_VECTOR 641 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 642 pushl $(~vector+0x80) /* Note: always in signed byte range */ 643 vector=vector+1 644 jmp common_interrupt 645 .align 8 646 .endr 647END(irq_entries_start) 648 649/* 650 * the CPU automatically disables interrupts when executing an IRQ vector, 651 * so IRQ-flags tracing has to follow that: 652 */ 653 .p2align CONFIG_X86_L1_CACHE_SHIFT 654common_interrupt: 655 ASM_CLAC 656 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 657 SAVE_ALL 658 ENCODE_FRAME_POINTER 659 TRACE_IRQS_OFF 660 movl %esp, %eax 661 call do_IRQ 662 jmp ret_from_intr 663ENDPROC(common_interrupt) 664 665#define BUILD_INTERRUPT3(name, nr, fn) \ 666ENTRY(name) \ 667 ASM_CLAC; \ 668 pushl $~(nr); \ 669 SAVE_ALL; \ 670 ENCODE_FRAME_POINTER; \ 671 TRACE_IRQS_OFF \ 672 movl %esp, %eax; \ 673 call fn; \ 674 jmp ret_from_intr; \ 675ENDPROC(name) 676 677#define BUILD_INTERRUPT(name, nr) \ 678 BUILD_INTERRUPT3(name, nr, smp_##name); \ 679 680/* The include is where all of the SMP etc. interrupts come from */ 681#include <asm/entry_arch.h> 682 683ENTRY(coprocessor_error) 684 ASM_CLAC 685 pushl $0 686 pushl $do_coprocessor_error 687 jmp common_exception 688END(coprocessor_error) 689 690ENTRY(simd_coprocessor_error) 691 ASM_CLAC 692 pushl $0 693#ifdef CONFIG_X86_INVD_BUG 694 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 695 ALTERNATIVE "pushl $do_general_protection", \ 696 "pushl $do_simd_coprocessor_error", \ 697 X86_FEATURE_XMM 698#else 699 pushl $do_simd_coprocessor_error 700#endif 701 jmp common_exception 702END(simd_coprocessor_error) 703 704ENTRY(device_not_available) 705 ASM_CLAC 706 pushl $-1 # mark this as an int 707 pushl $do_device_not_available 708 jmp common_exception 709END(device_not_available) 710 711#ifdef CONFIG_PARAVIRT 712ENTRY(native_iret) 713 iret 714 _ASM_EXTABLE(native_iret, iret_exc) 715END(native_iret) 716#endif 717 718ENTRY(overflow) 719 ASM_CLAC 720 pushl $0 721 pushl $do_overflow 722 jmp common_exception 723END(overflow) 724 725ENTRY(bounds) 726 ASM_CLAC 727 pushl $0 728 pushl $do_bounds 729 jmp common_exception 730END(bounds) 731 732ENTRY(invalid_op) 733 ASM_CLAC 734 pushl $0 735 pushl $do_invalid_op 736 jmp common_exception 737END(invalid_op) 738 739ENTRY(coprocessor_segment_overrun) 740 ASM_CLAC 741 pushl $0 742 pushl $do_coprocessor_segment_overrun 743 jmp common_exception 744END(coprocessor_segment_overrun) 745 746ENTRY(invalid_TSS) 747 ASM_CLAC 748 pushl $do_invalid_TSS 749 jmp common_exception 750END(invalid_TSS) 751 752ENTRY(segment_not_present) 753 ASM_CLAC 754 pushl $do_segment_not_present 755 jmp common_exception 756END(segment_not_present) 757 758ENTRY(stack_segment) 759 ASM_CLAC 760 pushl $do_stack_segment 761 jmp common_exception 762END(stack_segment) 763 764ENTRY(alignment_check) 765 ASM_CLAC 766 pushl $do_alignment_check 767 jmp common_exception 768END(alignment_check) 769 770ENTRY(divide_error) 771 ASM_CLAC 772 pushl $0 # no error code 773 pushl $do_divide_error 774 jmp common_exception 775END(divide_error) 776 777#ifdef CONFIG_X86_MCE 778ENTRY(machine_check) 779 ASM_CLAC 780 pushl $0 781 pushl machine_check_vector 782 jmp common_exception 783END(machine_check) 784#endif 785 786ENTRY(spurious_interrupt_bug) 787 ASM_CLAC 788 pushl $0 789 pushl $do_spurious_interrupt_bug 790 jmp common_exception 791END(spurious_interrupt_bug) 792 793#ifdef CONFIG_XEN 794ENTRY(xen_hypervisor_callback) 795 pushl $-1 /* orig_ax = -1 => not a system call */ 796 SAVE_ALL 797 ENCODE_FRAME_POINTER 798 TRACE_IRQS_OFF 799 800 /* 801 * Check to see if we got the event in the critical 802 * region in xen_iret_direct, after we've reenabled 803 * events and checked for pending events. This simulates 804 * iret instruction's behaviour where it delivers a 805 * pending interrupt when enabling interrupts: 806 */ 807 movl PT_EIP(%esp), %eax 808 cmpl $xen_iret_start_crit, %eax 809 jb 1f 810 cmpl $xen_iret_end_crit, %eax 811 jae 1f 812 813 jmp xen_iret_crit_fixup 814 815ENTRY(xen_do_upcall) 8161: mov %esp, %eax 817 call xen_evtchn_do_upcall 818#ifndef CONFIG_PREEMPT 819 call xen_maybe_preempt_hcall 820#endif 821 jmp ret_from_intr 822ENDPROC(xen_hypervisor_callback) 823 824/* 825 * Hypervisor uses this for application faults while it executes. 826 * We get here for two reasons: 827 * 1. Fault while reloading DS, ES, FS or GS 828 * 2. Fault while executing IRET 829 * Category 1 we fix up by reattempting the load, and zeroing the segment 830 * register if the load fails. 831 * Category 2 we fix up by jumping to do_iret_error. We cannot use the 832 * normal Linux return path in this case because if we use the IRET hypercall 833 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 834 * We distinguish between categories by maintaining a status value in EAX. 835 */ 836ENTRY(xen_failsafe_callback) 837 pushl %eax 838 movl $1, %eax 8391: mov 4(%esp), %ds 8402: mov 8(%esp), %es 8413: mov 12(%esp), %fs 8424: mov 16(%esp), %gs 843 /* EAX == 0 => Category 1 (Bad segment) 844 EAX != 0 => Category 2 (Bad IRET) */ 845 testl %eax, %eax 846 popl %eax 847 lea 16(%esp), %esp 848 jz 5f 849 jmp iret_exc 8505: pushl $-1 /* orig_ax = -1 => not a system call */ 851 SAVE_ALL 852 ENCODE_FRAME_POINTER 853 jmp ret_from_exception 854 855.section .fixup, "ax" 8566: xorl %eax, %eax 857 movl %eax, 4(%esp) 858 jmp 1b 8597: xorl %eax, %eax 860 movl %eax, 8(%esp) 861 jmp 2b 8628: xorl %eax, %eax 863 movl %eax, 12(%esp) 864 jmp 3b 8659: xorl %eax, %eax 866 movl %eax, 16(%esp) 867 jmp 4b 868.previous 869 _ASM_EXTABLE(1b, 6b) 870 _ASM_EXTABLE(2b, 7b) 871 _ASM_EXTABLE(3b, 8b) 872 _ASM_EXTABLE(4b, 9b) 873ENDPROC(xen_failsafe_callback) 874 875BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 876 xen_evtchn_do_upcall) 877 878#endif /* CONFIG_XEN */ 879 880#if IS_ENABLED(CONFIG_HYPERV) 881 882BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 883 hyperv_vector_handler) 884 885#endif /* CONFIG_HYPERV */ 886 887ENTRY(page_fault) 888 ASM_CLAC 889 pushl $do_page_fault 890 ALIGN 891 jmp common_exception 892END(page_fault) 893 894common_exception: 895 /* the function address is in %gs's slot on the stack */ 896 pushl %fs 897 pushl %es 898 pushl %ds 899 pushl %eax 900 pushl %ebp 901 pushl %edi 902 pushl %esi 903 pushl %edx 904 pushl %ecx 905 pushl %ebx 906 ENCODE_FRAME_POINTER 907 cld 908 movl $(__KERNEL_PERCPU), %ecx 909 movl %ecx, %fs 910 UNWIND_ESPFIX_STACK 911 GS_TO_REG %ecx 912 movl PT_GS(%esp), %edi # get the function address 913 movl PT_ORIG_EAX(%esp), %edx # get the error code 914 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 915 REG_TO_PTGS %ecx 916 SET_KERNEL_GS %ecx 917 movl $(__USER_DS), %ecx 918 movl %ecx, %ds 919 movl %ecx, %es 920 TRACE_IRQS_OFF 921 movl %esp, %eax # pt_regs pointer 922 call *%edi 923 jmp ret_from_exception 924END(common_exception) 925 926ENTRY(debug) 927 /* 928 * #DB can happen at the first instruction of 929 * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this 930 * happens, then we will be running on a very small stack. We 931 * need to detect this condition and switch to the thread 932 * stack before calling any C code at all. 933 * 934 * If you edit this code, keep in mind that NMIs can happen in here. 935 */ 936 ASM_CLAC 937 pushl $-1 # mark this as an int 938 SAVE_ALL 939 ENCODE_FRAME_POINTER 940 xorl %edx, %edx # error code 0 941 movl %esp, %eax # pt_regs pointer 942 943 /* Are we currently on the SYSENTER stack? */ 944 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 945 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 946 cmpl $SIZEOF_SYSENTER_stack, %ecx 947 jb .Ldebug_from_sysenter_stack 948 949 TRACE_IRQS_OFF 950 call do_debug 951 jmp ret_from_exception 952 953.Ldebug_from_sysenter_stack: 954 /* We're on the SYSENTER stack. Switch off. */ 955 movl %esp, %ebx 956 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 957 TRACE_IRQS_OFF 958 call do_debug 959 movl %ebx, %esp 960 jmp ret_from_exception 961END(debug) 962 963/* 964 * NMI is doubly nasty. It can happen on the first instruction of 965 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning 966 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 967 * switched stacks. We handle both conditions by simply checking whether we 968 * interrupted kernel code running on the SYSENTER stack. 969 */ 970ENTRY(nmi) 971 ASM_CLAC 972#ifdef CONFIG_X86_ESPFIX32 973 pushl %eax 974 movl %ss, %eax 975 cmpw $__ESPFIX_SS, %ax 976 popl %eax 977 je .Lnmi_espfix_stack 978#endif 979 980 pushl %eax # pt_regs->orig_ax 981 SAVE_ALL 982 ENCODE_FRAME_POINTER 983 xorl %edx, %edx # zero error code 984 movl %esp, %eax # pt_regs pointer 985 986 /* Are we currently on the SYSENTER stack? */ 987 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 988 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 989 cmpl $SIZEOF_SYSENTER_stack, %ecx 990 jb .Lnmi_from_sysenter_stack 991 992 /* Not on SYSENTER stack. */ 993 call do_nmi 994 jmp .Lrestore_all_notrace 995 996.Lnmi_from_sysenter_stack: 997 /* 998 * We're on the SYSENTER stack. Switch off. No one (not even debug) 999 * is using the thread stack right now, so it's safe for us to use it. 1000 */ 1001 movl %esp, %ebx 1002 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1003 call do_nmi 1004 movl %ebx, %esp 1005 jmp .Lrestore_all_notrace 1006 1007#ifdef CONFIG_X86_ESPFIX32 1008.Lnmi_espfix_stack: 1009 /* 1010 * create the pointer to lss back 1011 */ 1012 pushl %ss 1013 pushl %esp 1014 addl $4, (%esp) 1015 /* copy the iret frame of 12 bytes */ 1016 .rept 3 1017 pushl 16(%esp) 1018 .endr 1019 pushl %eax 1020 SAVE_ALL 1021 ENCODE_FRAME_POINTER 1022 FIXUP_ESPFIX_STACK # %eax == %esp 1023 xorl %edx, %edx # zero error code 1024 call do_nmi 1025 RESTORE_REGS 1026 lss 12+4(%esp), %esp # back to espfix stack 1027 jmp .Lirq_return 1028#endif 1029END(nmi) 1030 1031ENTRY(int3) 1032 ASM_CLAC 1033 pushl $-1 # mark this as an int 1034 SAVE_ALL 1035 ENCODE_FRAME_POINTER 1036 TRACE_IRQS_OFF 1037 xorl %edx, %edx # zero error code 1038 movl %esp, %eax # pt_regs pointer 1039 call do_int3 1040 jmp ret_from_exception 1041END(int3) 1042 1043ENTRY(general_protection) 1044 pushl $do_general_protection 1045 jmp common_exception 1046END(general_protection) 1047 1048#ifdef CONFIG_KVM_GUEST 1049ENTRY(async_page_fault) 1050 ASM_CLAC 1051 pushl $do_async_page_fault 1052 jmp common_exception 1053END(async_page_fault) 1054#endif 1055 1056ENTRY(rewind_stack_do_exit) 1057 /* Prevent any naive code from trying to unwind to our caller. */ 1058 xorl %ebp, %ebp 1059 1060 movl PER_CPU_VAR(cpu_current_top_of_stack), %esi 1061 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp 1062 1063 call do_exit 10641: jmp 1b 1065END(rewind_stack_do_exit) 1066