1/* 2 * Copyright (C) 1991,1992 Linus Torvalds 3 * 4 * entry_32.S contains the system-call and low-level fault and trap handling routines. 5 * 6 * Stack layout while running C code: 7 * ptrace needs to have all registers on the stack. 8 * If the order here is changed, it needs to be 9 * updated in fork.c:copy_process(), signal.c:do_signal(), 10 * ptrace.c and ptrace.h 11 * 12 * 0(%esp) - %ebx 13 * 4(%esp) - %ecx 14 * 8(%esp) - %edx 15 * C(%esp) - %esi 16 * 10(%esp) - %edi 17 * 14(%esp) - %ebp 18 * 18(%esp) - %eax 19 * 1C(%esp) - %ds 20 * 20(%esp) - %es 21 * 24(%esp) - %fs 22 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 23 * 2C(%esp) - orig_eax 24 * 30(%esp) - %eip 25 * 34(%esp) - %cs 26 * 38(%esp) - %eflags 27 * 3C(%esp) - %oldesp 28 * 40(%esp) - %oldss 29 */ 30 31#include <linux/linkage.h> 32#include <linux/err.h> 33#include <asm/thread_info.h> 34#include <asm/irqflags.h> 35#include <asm/errno.h> 36#include <asm/segment.h> 37#include <asm/smp.h> 38#include <asm/page_types.h> 39#include <asm/percpu.h> 40#include <asm/processor-flags.h> 41#include <asm/ftrace.h> 42#include <asm/irq_vectors.h> 43#include <asm/cpufeatures.h> 44#include <asm/alternative-asm.h> 45#include <asm/asm.h> 46#include <asm/smap.h> 47 48 .section .entry.text, "ax" 49 50/* 51 * We use macros for low-level operations which need to be overridden 52 * for paravirtualization. The following will never clobber any registers: 53 * INTERRUPT_RETURN (aka. "iret") 54 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 55 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 56 * 57 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 58 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 59 * Allowing a register to be clobbered can shrink the paravirt replacement 60 * enough to patch inline, increasing performance. 61 */ 62 63#ifdef CONFIG_PREEMPT 64# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 65#else 66# define preempt_stop(clobbers) 67# define resume_kernel restore_all 68#endif 69 70.macro TRACE_IRQS_IRET 71#ifdef CONFIG_TRACE_IRQFLAGS 72 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? 73 jz 1f 74 TRACE_IRQS_ON 751: 76#endif 77.endm 78 79/* 80 * User gs save/restore 81 * 82 * %gs is used for userland TLS and kernel only uses it for stack 83 * canary which is required to be at %gs:20 by gcc. Read the comment 84 * at the top of stackprotector.h for more info. 85 * 86 * Local labels 98 and 99 are used. 87 */ 88#ifdef CONFIG_X86_32_LAZY_GS 89 90 /* unfortunately push/pop can't be no-op */ 91.macro PUSH_GS 92 pushl $0 93.endm 94.macro POP_GS pop=0 95 addl $(4 + \pop), %esp 96.endm 97.macro POP_GS_EX 98.endm 99 100 /* all the rest are no-op */ 101.macro PTGS_TO_GS 102.endm 103.macro PTGS_TO_GS_EX 104.endm 105.macro GS_TO_REG reg 106.endm 107.macro REG_TO_PTGS reg 108.endm 109.macro SET_KERNEL_GS reg 110.endm 111 112#else /* CONFIG_X86_32_LAZY_GS */ 113 114.macro PUSH_GS 115 pushl %gs 116.endm 117 118.macro POP_GS pop=0 11998: popl %gs 120 .if \pop <> 0 121 add $\pop, %esp 122 .endif 123.endm 124.macro POP_GS_EX 125.pushsection .fixup, "ax" 12699: movl $0, (%esp) 127 jmp 98b 128.popsection 129 _ASM_EXTABLE(98b, 99b) 130.endm 131 132.macro PTGS_TO_GS 13398: mov PT_GS(%esp), %gs 134.endm 135.macro PTGS_TO_GS_EX 136.pushsection .fixup, "ax" 13799: movl $0, PT_GS(%esp) 138 jmp 98b 139.popsection 140 _ASM_EXTABLE(98b, 99b) 141.endm 142 143.macro GS_TO_REG reg 144 movl %gs, \reg 145.endm 146.macro REG_TO_PTGS reg 147 movl \reg, PT_GS(%esp) 148.endm 149.macro SET_KERNEL_GS reg 150 movl $(__KERNEL_STACK_CANARY), \reg 151 movl \reg, %gs 152.endm 153 154#endif /* CONFIG_X86_32_LAZY_GS */ 155 156.macro SAVE_ALL pt_regs_ax=%eax 157 cld 158 PUSH_GS 159 pushl %fs 160 pushl %es 161 pushl %ds 162 pushl \pt_regs_ax 163 pushl %ebp 164 pushl %edi 165 pushl %esi 166 pushl %edx 167 pushl %ecx 168 pushl %ebx 169 movl $(__USER_DS), %edx 170 movl %edx, %ds 171 movl %edx, %es 172 movl $(__KERNEL_PERCPU), %edx 173 movl %edx, %fs 174 SET_KERNEL_GS %edx 175.endm 176 177.macro RESTORE_INT_REGS 178 popl %ebx 179 popl %ecx 180 popl %edx 181 popl %esi 182 popl %edi 183 popl %ebp 184 popl %eax 185.endm 186 187.macro RESTORE_REGS pop=0 188 RESTORE_INT_REGS 1891: popl %ds 1902: popl %es 1913: popl %fs 192 POP_GS \pop 193.pushsection .fixup, "ax" 1944: movl $0, (%esp) 195 jmp 1b 1965: movl $0, (%esp) 197 jmp 2b 1986: movl $0, (%esp) 199 jmp 3b 200.popsection 201 _ASM_EXTABLE(1b, 4b) 202 _ASM_EXTABLE(2b, 5b) 203 _ASM_EXTABLE(3b, 6b) 204 POP_GS_EX 205.endm 206 207/* 208 * %eax: prev task 209 * %edx: next task 210 */ 211ENTRY(__switch_to_asm) 212 /* 213 * Save callee-saved registers 214 * This must match the order in struct inactive_task_frame 215 */ 216 pushl %ebp 217 pushl %ebx 218 pushl %edi 219 pushl %esi 220 221 /* switch stack */ 222 movl %esp, TASK_threadsp(%eax) 223 movl TASK_threadsp(%edx), %esp 224 225#ifdef CONFIG_CC_STACKPROTECTOR 226 movl TASK_stack_canary(%edx), %ebx 227 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset 228#endif 229 230 /* restore callee-saved registers */ 231 popl %esi 232 popl %edi 233 popl %ebx 234 popl %ebp 235 236 jmp __switch_to 237END(__switch_to_asm) 238 239/* 240 * A newly forked process directly context switches into this address. 241 * 242 * eax: prev task we switched from 243 * ebx: kernel thread func (NULL for user thread) 244 * edi: kernel thread arg 245 */ 246ENTRY(ret_from_fork) 247 pushl %eax 248 call schedule_tail 249 popl %eax 250 251 testl %ebx, %ebx 252 jnz 1f /* kernel threads are uncommon */ 253 2542: 255 /* When we fork, we trace the syscall return in the child, too. */ 256 movl %esp, %eax 257 call syscall_return_slowpath 258 jmp restore_all 259 260 /* kernel thread */ 2611: movl %edi, %eax 262 call *%ebx 263 /* 264 * A kernel thread is allowed to return here after successfully 265 * calling do_execve(). Exit to userspace to complete the execve() 266 * syscall. 267 */ 268 movl $0, PT_EAX(%esp) 269 jmp 2b 270END(ret_from_fork) 271 272/* 273 * Return to user mode is not as complex as all this looks, 274 * but we want the default path for a system call return to 275 * go as quickly as possible which is why some of this is 276 * less clear than it otherwise should be. 277 */ 278 279 # userspace resumption stub bypassing syscall exit tracing 280 ALIGN 281ret_from_exception: 282 preempt_stop(CLBR_ANY) 283ret_from_intr: 284#ifdef CONFIG_VM86 285 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 286 movb PT_CS(%esp), %al 287 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 288#else 289 /* 290 * We can be coming here from child spawned by kernel_thread(). 291 */ 292 movl PT_CS(%esp), %eax 293 andl $SEGMENT_RPL_MASK, %eax 294#endif 295 cmpl $USER_RPL, %eax 296 jb resume_kernel # not returning to v8086 or userspace 297 298ENTRY(resume_userspace) 299 DISABLE_INTERRUPTS(CLBR_ANY) 300 TRACE_IRQS_OFF 301 movl %esp, %eax 302 call prepare_exit_to_usermode 303 jmp restore_all 304END(ret_from_exception) 305 306#ifdef CONFIG_PREEMPT 307ENTRY(resume_kernel) 308 DISABLE_INTERRUPTS(CLBR_ANY) 309need_resched: 310 cmpl $0, PER_CPU_VAR(__preempt_count) 311 jnz restore_all 312 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 313 jz restore_all 314 call preempt_schedule_irq 315 jmp need_resched 316END(resume_kernel) 317#endif 318 319GLOBAL(__begin_SYSENTER_singlestep_region) 320/* 321 * All code from here through __end_SYSENTER_singlestep_region is subject 322 * to being single-stepped if a user program sets TF and executes SYSENTER. 323 * There is absolutely nothing that we can do to prevent this from happening 324 * (thanks Intel!). To keep our handling of this situation as simple as 325 * possible, we handle TF just like AC and NT, except that our #DB handler 326 * will ignore all of the single-step traps generated in this range. 327 */ 328 329#ifdef CONFIG_XEN 330/* 331 * Xen doesn't set %esp to be precisely what the normal SYSENTER 332 * entry point expects, so fix it up before using the normal path. 333 */ 334ENTRY(xen_sysenter_target) 335 addl $5*4, %esp /* remove xen-provided frame */ 336 jmp sysenter_past_esp 337#endif 338 339/* 340 * 32-bit SYSENTER entry. 341 * 342 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here 343 * if X86_FEATURE_SEP is available. This is the preferred system call 344 * entry on 32-bit systems. 345 * 346 * The SYSENTER instruction, in principle, should *only* occur in the 347 * vDSO. In practice, a small number of Android devices were shipped 348 * with a copy of Bionic that inlined a SYSENTER instruction. This 349 * never happened in any of Google's Bionic versions -- it only happened 350 * in a narrow range of Intel-provided versions. 351 * 352 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. 353 * IF and VM in RFLAGS are cleared (IOW: interrupts are off). 354 * SYSENTER does not save anything on the stack, 355 * and does not save old EIP (!!!), ESP, or EFLAGS. 356 * 357 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting 358 * user and/or vm86 state), we explicitly disable the SYSENTER 359 * instruction in vm86 mode by reprogramming the MSRs. 360 * 361 * Arguments: 362 * eax system call number 363 * ebx arg1 364 * ecx arg2 365 * edx arg3 366 * esi arg4 367 * edi arg5 368 * ebp user stack 369 * 0(%ebp) arg6 370 */ 371ENTRY(entry_SYSENTER_32) 372 movl TSS_sysenter_sp0(%esp), %esp 373sysenter_past_esp: 374 pushl $__USER_DS /* pt_regs->ss */ 375 pushl %ebp /* pt_regs->sp (stashed in bp) */ 376 pushfl /* pt_regs->flags (except IF = 0) */ 377 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ 378 pushl $__USER_CS /* pt_regs->cs */ 379 pushl $0 /* pt_regs->ip = 0 (placeholder) */ 380 pushl %eax /* pt_regs->orig_ax */ 381 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 382 383 /* 384 * SYSENTER doesn't filter flags, so we need to clear NT, AC 385 * and TF ourselves. To save a few cycles, we can check whether 386 * either was set instead of doing an unconditional popfq. 387 * This needs to happen before enabling interrupts so that 388 * we don't get preempted with NT set. 389 * 390 * If TF is set, we will single-step all the way to here -- do_debug 391 * will ignore all the traps. (Yes, this is slow, but so is 392 * single-stepping in general. This allows us to avoid having 393 * a more complicated code to handle the case where a user program 394 * forces us to single-step through the SYSENTER entry code.) 395 * 396 * NB.: .Lsysenter_fix_flags is a label with the code under it moved 397 * out-of-line as an optimization: NT is unlikely to be set in the 398 * majority of the cases and instead of polluting the I$ unnecessarily, 399 * we're keeping that code behind a branch which will predict as 400 * not-taken and therefore its instructions won't be fetched. 401 */ 402 testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) 403 jnz .Lsysenter_fix_flags 404.Lsysenter_flags_fixed: 405 406 /* 407 * User mode is traced as though IRQs are on, and SYSENTER 408 * turned them off. 409 */ 410 TRACE_IRQS_OFF 411 412 movl %esp, %eax 413 call do_fast_syscall_32 414 /* XEN PV guests always use IRET path */ 415 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 416 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 417 418/* Opportunistic SYSEXIT */ 419 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 420 movl PT_EIP(%esp), %edx /* pt_regs->ip */ 421 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 4221: mov PT_FS(%esp), %fs 423 PTGS_TO_GS 424 popl %ebx /* pt_regs->bx */ 425 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ 426 popl %esi /* pt_regs->si */ 427 popl %edi /* pt_regs->di */ 428 popl %ebp /* pt_regs->bp */ 429 popl %eax /* pt_regs->ax */ 430 431 /* 432 * Restore all flags except IF. (We restore IF separately because 433 * STI gives a one-instruction window in which we won't be interrupted, 434 * whereas POPF does not.) 435 */ 436 addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ 437 btr $X86_EFLAGS_IF_BIT, (%esp) 438 popfl 439 440 /* 441 * Return back to the vDSO, which will pop ecx and edx. 442 * Don't bother with DS and ES (they already contain __USER_DS). 443 */ 444 sti 445 sysexit 446 447.pushsection .fixup, "ax" 4482: movl $0, PT_FS(%esp) 449 jmp 1b 450.popsection 451 _ASM_EXTABLE(1b, 2b) 452 PTGS_TO_GS_EX 453 454.Lsysenter_fix_flags: 455 pushl $X86_EFLAGS_FIXED 456 popfl 457 jmp .Lsysenter_flags_fixed 458GLOBAL(__end_SYSENTER_singlestep_region) 459ENDPROC(entry_SYSENTER_32) 460 461/* 462 * 32-bit legacy system call entry. 463 * 464 * 32-bit x86 Linux system calls traditionally used the INT $0x80 465 * instruction. INT $0x80 lands here. 466 * 467 * This entry point can be used by any 32-bit perform system calls. 468 * Instances of INT $0x80 can be found inline in various programs and 469 * libraries. It is also used by the vDSO's __kernel_vsyscall 470 * fallback for hardware that doesn't support a faster entry method. 471 * Restarted 32-bit system calls also fall back to INT $0x80 472 * regardless of what instruction was originally used to do the system 473 * call. (64-bit programs can use INT $0x80 as well, but they can 474 * only run on 64-bit kernels and therefore land in 475 * entry_INT80_compat.) 476 * 477 * This is considered a slow path. It is not used by most libc 478 * implementations on modern hardware except during process startup. 479 * 480 * Arguments: 481 * eax system call number 482 * ebx arg1 483 * ecx arg2 484 * edx arg3 485 * esi arg4 486 * edi arg5 487 * ebp arg6 488 */ 489ENTRY(entry_INT80_32) 490 ASM_CLAC 491 pushl %eax /* pt_regs->orig_ax */ 492 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 493 494 /* 495 * User mode is traced as though IRQs are on, and the interrupt gate 496 * turned them off. 497 */ 498 TRACE_IRQS_OFF 499 500 movl %esp, %eax 501 call do_int80_syscall_32 502.Lsyscall_32_done: 503 504restore_all: 505 TRACE_IRQS_IRET 506restore_all_notrace: 507#ifdef CONFIG_X86_ESPFIX32 508 ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX 509 510 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 511 /* 512 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we 513 * are returning to the kernel. 514 * See comments in process.c:copy_thread() for details. 515 */ 516 movb PT_OLDSS(%esp), %ah 517 movb PT_CS(%esp), %al 518 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 519 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 520 je ldt_ss # returning to user-space with LDT SS 521#endif 522restore_nocheck: 523 RESTORE_REGS 4 # skip orig_eax/error_code 524irq_return: 525 INTERRUPT_RETURN 526.section .fixup, "ax" 527ENTRY(iret_exc ) 528 pushl $0 # no error code 529 pushl $do_iret_error 530 jmp error_code 531.previous 532 _ASM_EXTABLE(irq_return, iret_exc) 533 534#ifdef CONFIG_X86_ESPFIX32 535ldt_ss: 536/* 537 * Setup and switch to ESPFIX stack 538 * 539 * We're returning to userspace with a 16 bit stack. The CPU will not 540 * restore the high word of ESP for us on executing iret... This is an 541 * "official" bug of all the x86-compatible CPUs, which we can work 542 * around to make dosemu and wine happy. We do this by preloading the 543 * high word of ESP with the high word of the userspace ESP while 544 * compensating for the offset by changing to the ESPFIX segment with 545 * a base address that matches for the difference. 546 */ 547#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) 548 mov %esp, %edx /* load kernel esp */ 549 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 550 mov %dx, %ax /* eax: new kernel esp */ 551 sub %eax, %edx /* offset (low word is 0) */ 552 shr $16, %edx 553 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 554 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 555 pushl $__ESPFIX_SS 556 pushl %eax /* new kernel esp */ 557 /* 558 * Disable interrupts, but do not irqtrace this section: we 559 * will soon execute iret and the tracer was already set to 560 * the irqstate after the IRET: 561 */ 562 DISABLE_INTERRUPTS(CLBR_EAX) 563 lss (%esp), %esp /* switch to espfix segment */ 564 jmp restore_nocheck 565#endif 566ENDPROC(entry_INT80_32) 567 568.macro FIXUP_ESPFIX_STACK 569/* 570 * Switch back for ESPFIX stack to the normal zerobased stack 571 * 572 * We can't call C functions using the ESPFIX stack. This code reads 573 * the high word of the segment base from the GDT and swiches to the 574 * normal stack and adjusts ESP with the matching offset. 575 */ 576#ifdef CONFIG_X86_ESPFIX32 577 /* fixup the stack */ 578 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ 579 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 580 shl $16, %eax 581 addl %esp, %eax /* the adjusted stack pointer */ 582 pushl $__KERNEL_DS 583 pushl %eax 584 lss (%esp), %esp /* switch to the normal stack segment */ 585#endif 586.endm 587.macro UNWIND_ESPFIX_STACK 588#ifdef CONFIG_X86_ESPFIX32 589 movl %ss, %eax 590 /* see if on espfix stack */ 591 cmpw $__ESPFIX_SS, %ax 592 jne 27f 593 movl $__KERNEL_DS, %eax 594 movl %eax, %ds 595 movl %eax, %es 596 /* switch to normal stack */ 597 FIXUP_ESPFIX_STACK 59827: 599#endif 600.endm 601 602/* 603 * Build the entry stubs with some assembler magic. 604 * We pack 1 stub into every 8-byte block. 605 */ 606 .align 8 607ENTRY(irq_entries_start) 608 vector=FIRST_EXTERNAL_VECTOR 609 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 610 pushl $(~vector+0x80) /* Note: always in signed byte range */ 611 vector=vector+1 612 jmp common_interrupt 613 .align 8 614 .endr 615END(irq_entries_start) 616 617/* 618 * the CPU automatically disables interrupts when executing an IRQ vector, 619 * so IRQ-flags tracing has to follow that: 620 */ 621 .p2align CONFIG_X86_L1_CACHE_SHIFT 622common_interrupt: 623 ASM_CLAC 624 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 625 SAVE_ALL 626 TRACE_IRQS_OFF 627 movl %esp, %eax 628 call do_IRQ 629 jmp ret_from_intr 630ENDPROC(common_interrupt) 631 632#define BUILD_INTERRUPT3(name, nr, fn) \ 633ENTRY(name) \ 634 ASM_CLAC; \ 635 pushl $~(nr); \ 636 SAVE_ALL; \ 637 TRACE_IRQS_OFF \ 638 movl %esp, %eax; \ 639 call fn; \ 640 jmp ret_from_intr; \ 641ENDPROC(name) 642 643 644#ifdef CONFIG_TRACING 645# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) 646#else 647# define TRACE_BUILD_INTERRUPT(name, nr) 648#endif 649 650#define BUILD_INTERRUPT(name, nr) \ 651 BUILD_INTERRUPT3(name, nr, smp_##name); \ 652 TRACE_BUILD_INTERRUPT(name, nr) 653 654/* The include is where all of the SMP etc. interrupts come from */ 655#include <asm/entry_arch.h> 656 657ENTRY(coprocessor_error) 658 ASM_CLAC 659 pushl $0 660 pushl $do_coprocessor_error 661 jmp error_code 662END(coprocessor_error) 663 664ENTRY(simd_coprocessor_error) 665 ASM_CLAC 666 pushl $0 667#ifdef CONFIG_X86_INVD_BUG 668 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 669 ALTERNATIVE "pushl $do_general_protection", \ 670 "pushl $do_simd_coprocessor_error", \ 671 X86_FEATURE_XMM 672#else 673 pushl $do_simd_coprocessor_error 674#endif 675 jmp error_code 676END(simd_coprocessor_error) 677 678ENTRY(device_not_available) 679 ASM_CLAC 680 pushl $-1 # mark this as an int 681 pushl $do_device_not_available 682 jmp error_code 683END(device_not_available) 684 685#ifdef CONFIG_PARAVIRT 686ENTRY(native_iret) 687 iret 688 _ASM_EXTABLE(native_iret, iret_exc) 689END(native_iret) 690#endif 691 692ENTRY(overflow) 693 ASM_CLAC 694 pushl $0 695 pushl $do_overflow 696 jmp error_code 697END(overflow) 698 699ENTRY(bounds) 700 ASM_CLAC 701 pushl $0 702 pushl $do_bounds 703 jmp error_code 704END(bounds) 705 706ENTRY(invalid_op) 707 ASM_CLAC 708 pushl $0 709 pushl $do_invalid_op 710 jmp error_code 711END(invalid_op) 712 713ENTRY(coprocessor_segment_overrun) 714 ASM_CLAC 715 pushl $0 716 pushl $do_coprocessor_segment_overrun 717 jmp error_code 718END(coprocessor_segment_overrun) 719 720ENTRY(invalid_TSS) 721 ASM_CLAC 722 pushl $do_invalid_TSS 723 jmp error_code 724END(invalid_TSS) 725 726ENTRY(segment_not_present) 727 ASM_CLAC 728 pushl $do_segment_not_present 729 jmp error_code 730END(segment_not_present) 731 732ENTRY(stack_segment) 733 ASM_CLAC 734 pushl $do_stack_segment 735 jmp error_code 736END(stack_segment) 737 738ENTRY(alignment_check) 739 ASM_CLAC 740 pushl $do_alignment_check 741 jmp error_code 742END(alignment_check) 743 744ENTRY(divide_error) 745 ASM_CLAC 746 pushl $0 # no error code 747 pushl $do_divide_error 748 jmp error_code 749END(divide_error) 750 751#ifdef CONFIG_X86_MCE 752ENTRY(machine_check) 753 ASM_CLAC 754 pushl $0 755 pushl machine_check_vector 756 jmp error_code 757END(machine_check) 758#endif 759 760ENTRY(spurious_interrupt_bug) 761 ASM_CLAC 762 pushl $0 763 pushl $do_spurious_interrupt_bug 764 jmp error_code 765END(spurious_interrupt_bug) 766 767#ifdef CONFIG_XEN 768ENTRY(xen_hypervisor_callback) 769 pushl $-1 /* orig_ax = -1 => not a system call */ 770 SAVE_ALL 771 TRACE_IRQS_OFF 772 773 /* 774 * Check to see if we got the event in the critical 775 * region in xen_iret_direct, after we've reenabled 776 * events and checked for pending events. This simulates 777 * iret instruction's behaviour where it delivers a 778 * pending interrupt when enabling interrupts: 779 */ 780 movl PT_EIP(%esp), %eax 781 cmpl $xen_iret_start_crit, %eax 782 jb 1f 783 cmpl $xen_iret_end_crit, %eax 784 jae 1f 785 786 jmp xen_iret_crit_fixup 787 788ENTRY(xen_do_upcall) 7891: mov %esp, %eax 790 call xen_evtchn_do_upcall 791#ifndef CONFIG_PREEMPT 792 call xen_maybe_preempt_hcall 793#endif 794 jmp ret_from_intr 795ENDPROC(xen_hypervisor_callback) 796 797/* 798 * Hypervisor uses this for application faults while it executes. 799 * We get here for two reasons: 800 * 1. Fault while reloading DS, ES, FS or GS 801 * 2. Fault while executing IRET 802 * Category 1 we fix up by reattempting the load, and zeroing the segment 803 * register if the load fails. 804 * Category 2 we fix up by jumping to do_iret_error. We cannot use the 805 * normal Linux return path in this case because if we use the IRET hypercall 806 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 807 * We distinguish between categories by maintaining a status value in EAX. 808 */ 809ENTRY(xen_failsafe_callback) 810 pushl %eax 811 movl $1, %eax 8121: mov 4(%esp), %ds 8132: mov 8(%esp), %es 8143: mov 12(%esp), %fs 8154: mov 16(%esp), %gs 816 /* EAX == 0 => Category 1 (Bad segment) 817 EAX != 0 => Category 2 (Bad IRET) */ 818 testl %eax, %eax 819 popl %eax 820 lea 16(%esp), %esp 821 jz 5f 822 jmp iret_exc 8235: pushl $-1 /* orig_ax = -1 => not a system call */ 824 SAVE_ALL 825 jmp ret_from_exception 826 827.section .fixup, "ax" 8286: xorl %eax, %eax 829 movl %eax, 4(%esp) 830 jmp 1b 8317: xorl %eax, %eax 832 movl %eax, 8(%esp) 833 jmp 2b 8348: xorl %eax, %eax 835 movl %eax, 12(%esp) 836 jmp 3b 8379: xorl %eax, %eax 838 movl %eax, 16(%esp) 839 jmp 4b 840.previous 841 _ASM_EXTABLE(1b, 6b) 842 _ASM_EXTABLE(2b, 7b) 843 _ASM_EXTABLE(3b, 8b) 844 _ASM_EXTABLE(4b, 9b) 845ENDPROC(xen_failsafe_callback) 846 847BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 848 xen_evtchn_do_upcall) 849 850#endif /* CONFIG_XEN */ 851 852#if IS_ENABLED(CONFIG_HYPERV) 853 854BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 855 hyperv_vector_handler) 856 857#endif /* CONFIG_HYPERV */ 858 859#ifdef CONFIG_FUNCTION_TRACER 860#ifdef CONFIG_DYNAMIC_FTRACE 861 862ENTRY(mcount) 863 ret 864END(mcount) 865 866ENTRY(ftrace_caller) 867 pushl %eax 868 pushl %ecx 869 pushl %edx 870 pushl $0 /* Pass NULL as regs pointer */ 871 movl 4*4(%esp), %eax 872 movl 0x4(%ebp), %edx 873 movl function_trace_op, %ecx 874 subl $MCOUNT_INSN_SIZE, %eax 875 876.globl ftrace_call 877ftrace_call: 878 call ftrace_stub 879 880 addl $4, %esp /* skip NULL pointer */ 881 popl %edx 882 popl %ecx 883 popl %eax 884ftrace_ret: 885#ifdef CONFIG_FUNCTION_GRAPH_TRACER 886.globl ftrace_graph_call 887ftrace_graph_call: 888 jmp ftrace_stub 889#endif 890 891.globl ftrace_stub 892ftrace_stub: 893 ret 894END(ftrace_caller) 895 896ENTRY(ftrace_regs_caller) 897 pushf /* push flags before compare (in cs location) */ 898 899 /* 900 * i386 does not save SS and ESP when coming from kernel. 901 * Instead, to get sp, ®s->sp is used (see ptrace.h). 902 * Unfortunately, that means eflags must be at the same location 903 * as the current return ip is. We move the return ip into the 904 * ip location, and move flags into the return ip location. 905 */ 906 pushl 4(%esp) /* save return ip into ip slot */ 907 908 pushl $0 /* Load 0 into orig_ax */ 909 pushl %gs 910 pushl %fs 911 pushl %es 912 pushl %ds 913 pushl %eax 914 pushl %ebp 915 pushl %edi 916 pushl %esi 917 pushl %edx 918 pushl %ecx 919 pushl %ebx 920 921 movl 13*4(%esp), %eax /* Get the saved flags */ 922 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ 923 /* clobbering return ip */ 924 movl $__KERNEL_CS, 13*4(%esp) 925 926 movl 12*4(%esp), %eax /* Load ip (1st parameter) */ 927 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ 928 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ 929 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ 930 pushl %esp /* Save pt_regs as 4th parameter */ 931 932GLOBAL(ftrace_regs_call) 933 call ftrace_stub 934 935 addl $4, %esp /* Skip pt_regs */ 936 movl 14*4(%esp), %eax /* Move flags back into cs */ 937 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ 938 movl 12*4(%esp), %eax /* Get return ip from regs->ip */ 939 movl %eax, 14*4(%esp) /* Put return ip back for ret */ 940 941 popl %ebx 942 popl %ecx 943 popl %edx 944 popl %esi 945 popl %edi 946 popl %ebp 947 popl %eax 948 popl %ds 949 popl %es 950 popl %fs 951 popl %gs 952 addl $8, %esp /* Skip orig_ax and ip */ 953 popf /* Pop flags at end (no addl to corrupt flags) */ 954 jmp ftrace_ret 955 956 popf 957 jmp ftrace_stub 958#else /* ! CONFIG_DYNAMIC_FTRACE */ 959 960ENTRY(mcount) 961 cmpl $__PAGE_OFFSET, %esp 962 jb ftrace_stub /* Paging not enabled yet? */ 963 964 cmpl $ftrace_stub, ftrace_trace_function 965 jnz trace 966#ifdef CONFIG_FUNCTION_GRAPH_TRACER 967 cmpl $ftrace_stub, ftrace_graph_return 968 jnz ftrace_graph_caller 969 970 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry 971 jnz ftrace_graph_caller 972#endif 973.globl ftrace_stub 974ftrace_stub: 975 ret 976 977 /* taken from glibc */ 978trace: 979 pushl %eax 980 pushl %ecx 981 pushl %edx 982 movl 0xc(%esp), %eax 983 movl 0x4(%ebp), %edx 984 subl $MCOUNT_INSN_SIZE, %eax 985 986 call *ftrace_trace_function 987 988 popl %edx 989 popl %ecx 990 popl %eax 991 jmp ftrace_stub 992END(mcount) 993#endif /* CONFIG_DYNAMIC_FTRACE */ 994#endif /* CONFIG_FUNCTION_TRACER */ 995 996#ifdef CONFIG_FUNCTION_GRAPH_TRACER 997ENTRY(ftrace_graph_caller) 998 pushl %eax 999 pushl %ecx 1000 pushl %edx 1001 movl 0xc(%esp), %eax 1002 lea 0x4(%ebp), %edx 1003 movl (%ebp), %ecx 1004 subl $MCOUNT_INSN_SIZE, %eax 1005 call prepare_ftrace_return 1006 popl %edx 1007 popl %ecx 1008 popl %eax 1009 ret 1010END(ftrace_graph_caller) 1011 1012.globl return_to_handler 1013return_to_handler: 1014 pushl %eax 1015 pushl %edx 1016 movl %ebp, %eax 1017 call ftrace_return_to_handler 1018 movl %eax, %ecx 1019 popl %edx 1020 popl %eax 1021 jmp *%ecx 1022#endif 1023 1024#ifdef CONFIG_TRACING 1025ENTRY(trace_page_fault) 1026 ASM_CLAC 1027 pushl $trace_do_page_fault 1028 jmp error_code 1029END(trace_page_fault) 1030#endif 1031 1032ENTRY(page_fault) 1033 ASM_CLAC 1034 pushl $do_page_fault 1035 ALIGN 1036error_code: 1037 /* the function address is in %gs's slot on the stack */ 1038 pushl %fs 1039 pushl %es 1040 pushl %ds 1041 pushl %eax 1042 pushl %ebp 1043 pushl %edi 1044 pushl %esi 1045 pushl %edx 1046 pushl %ecx 1047 pushl %ebx 1048 cld 1049 movl $(__KERNEL_PERCPU), %ecx 1050 movl %ecx, %fs 1051 UNWIND_ESPFIX_STACK 1052 GS_TO_REG %ecx 1053 movl PT_GS(%esp), %edi # get the function address 1054 movl PT_ORIG_EAX(%esp), %edx # get the error code 1055 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1056 REG_TO_PTGS %ecx 1057 SET_KERNEL_GS %ecx 1058 movl $(__USER_DS), %ecx 1059 movl %ecx, %ds 1060 movl %ecx, %es 1061 TRACE_IRQS_OFF 1062 movl %esp, %eax # pt_regs pointer 1063 call *%edi 1064 jmp ret_from_exception 1065END(page_fault) 1066 1067ENTRY(debug) 1068 /* 1069 * #DB can happen at the first instruction of 1070 * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this 1071 * happens, then we will be running on a very small stack. We 1072 * need to detect this condition and switch to the thread 1073 * stack before calling any C code at all. 1074 * 1075 * If you edit this code, keep in mind that NMIs can happen in here. 1076 */ 1077 ASM_CLAC 1078 pushl $-1 # mark this as an int 1079 SAVE_ALL 1080 xorl %edx, %edx # error code 0 1081 movl %esp, %eax # pt_regs pointer 1082 1083 /* Are we currently on the SYSENTER stack? */ 1084 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 1085 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 1086 cmpl $SIZEOF_SYSENTER_stack, %ecx 1087 jb .Ldebug_from_sysenter_stack 1088 1089 TRACE_IRQS_OFF 1090 call do_debug 1091 jmp ret_from_exception 1092 1093.Ldebug_from_sysenter_stack: 1094 /* We're on the SYSENTER stack. Switch off. */ 1095 movl %esp, %ebp 1096 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1097 TRACE_IRQS_OFF 1098 call do_debug 1099 movl %ebp, %esp 1100 jmp ret_from_exception 1101END(debug) 1102 1103/* 1104 * NMI is doubly nasty. It can happen on the first instruction of 1105 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning 1106 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 1107 * switched stacks. We handle both conditions by simply checking whether we 1108 * interrupted kernel code running on the SYSENTER stack. 1109 */ 1110ENTRY(nmi) 1111 ASM_CLAC 1112#ifdef CONFIG_X86_ESPFIX32 1113 pushl %eax 1114 movl %ss, %eax 1115 cmpw $__ESPFIX_SS, %ax 1116 popl %eax 1117 je nmi_espfix_stack 1118#endif 1119 1120 pushl %eax # pt_regs->orig_ax 1121 SAVE_ALL 1122 xorl %edx, %edx # zero error code 1123 movl %esp, %eax # pt_regs pointer 1124 1125 /* Are we currently on the SYSENTER stack? */ 1126 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 1127 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 1128 cmpl $SIZEOF_SYSENTER_stack, %ecx 1129 jb .Lnmi_from_sysenter_stack 1130 1131 /* Not on SYSENTER stack. */ 1132 call do_nmi 1133 jmp restore_all_notrace 1134 1135.Lnmi_from_sysenter_stack: 1136 /* 1137 * We're on the SYSENTER stack. Switch off. No one (not even debug) 1138 * is using the thread stack right now, so it's safe for us to use it. 1139 */ 1140 movl %esp, %ebp 1141 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1142 call do_nmi 1143 movl %ebp, %esp 1144 jmp restore_all_notrace 1145 1146#ifdef CONFIG_X86_ESPFIX32 1147nmi_espfix_stack: 1148 /* 1149 * create the pointer to lss back 1150 */ 1151 pushl %ss 1152 pushl %esp 1153 addl $4, (%esp) 1154 /* copy the iret frame of 12 bytes */ 1155 .rept 3 1156 pushl 16(%esp) 1157 .endr 1158 pushl %eax 1159 SAVE_ALL 1160 FIXUP_ESPFIX_STACK # %eax == %esp 1161 xorl %edx, %edx # zero error code 1162 call do_nmi 1163 RESTORE_REGS 1164 lss 12+4(%esp), %esp # back to espfix stack 1165 jmp irq_return 1166#endif 1167END(nmi) 1168 1169ENTRY(int3) 1170 ASM_CLAC 1171 pushl $-1 # mark this as an int 1172 SAVE_ALL 1173 TRACE_IRQS_OFF 1174 xorl %edx, %edx # zero error code 1175 movl %esp, %eax # pt_regs pointer 1176 call do_int3 1177 jmp ret_from_exception 1178END(int3) 1179 1180ENTRY(general_protection) 1181 pushl $do_general_protection 1182 jmp error_code 1183END(general_protection) 1184 1185#ifdef CONFIG_KVM_GUEST 1186ENTRY(async_page_fault) 1187 ASM_CLAC 1188 pushl $do_async_page_fault 1189 jmp error_code 1190END(async_page_fault) 1191#endif 1192 1193ENTRY(rewind_stack_do_exit) 1194 /* Prevent any naive code from trying to unwind to our caller. */ 1195 xorl %ebp, %ebp 1196 1197 movl PER_CPU_VAR(cpu_current_top_of_stack), %esi 1198 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp 1199 1200 call do_exit 12011: jmp 1b 1202END(rewind_stack_do_exit) 1203