1/* 2 * Copyright (C) 1991,1992 Linus Torvalds 3 * 4 * entry_32.S contains the system-call and low-level fault and trap handling routines. 5 * 6 * Stack layout in 'syscall_exit': 7 * ptrace needs to have all registers on the stack. 8 * If the order here is changed, it needs to be 9 * updated in fork.c:copy_process(), signal.c:do_signal(), 10 * ptrace.c and ptrace.h 11 * 12 * 0(%esp) - %ebx 13 * 4(%esp) - %ecx 14 * 8(%esp) - %edx 15 * C(%esp) - %esi 16 * 10(%esp) - %edi 17 * 14(%esp) - %ebp 18 * 18(%esp) - %eax 19 * 1C(%esp) - %ds 20 * 20(%esp) - %es 21 * 24(%esp) - %fs 22 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 23 * 2C(%esp) - orig_eax 24 * 30(%esp) - %eip 25 * 34(%esp) - %cs 26 * 38(%esp) - %eflags 27 * 3C(%esp) - %oldesp 28 * 40(%esp) - %oldss 29 */ 30 31#include <linux/linkage.h> 32#include <linux/err.h> 33#include <asm/thread_info.h> 34#include <asm/irqflags.h> 35#include <asm/errno.h> 36#include <asm/segment.h> 37#include <asm/smp.h> 38#include <asm/page_types.h> 39#include <asm/percpu.h> 40#include <asm/processor-flags.h> 41#include <asm/ftrace.h> 42#include <asm/irq_vectors.h> 43#include <asm/cpufeature.h> 44#include <asm/alternative-asm.h> 45#include <asm/asm.h> 46#include <asm/smap.h> 47 48/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 49#include <linux/elf-em.h> 50#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) 51#define __AUDIT_ARCH_LE 0x40000000 52 53#ifndef CONFIG_AUDITSYSCALL 54# define sysenter_audit syscall_trace_entry 55# define sysexit_audit syscall_exit_work 56#endif 57 58 .section .entry.text, "ax" 59 60/* 61 * We use macros for low-level operations which need to be overridden 62 * for paravirtualization. The following will never clobber any registers: 63 * INTERRUPT_RETURN (aka. "iret") 64 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 65 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 66 * 67 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 68 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 69 * Allowing a register to be clobbered can shrink the paravirt replacement 70 * enough to patch inline, increasing performance. 71 */ 72 73#ifdef CONFIG_PREEMPT 74# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 75#else 76# define preempt_stop(clobbers) 77# define resume_kernel restore_all 78#endif 79 80.macro TRACE_IRQS_IRET 81#ifdef CONFIG_TRACE_IRQFLAGS 82 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? 83 jz 1f 84 TRACE_IRQS_ON 851: 86#endif 87.endm 88 89/* 90 * User gs save/restore 91 * 92 * %gs is used for userland TLS and kernel only uses it for stack 93 * canary which is required to be at %gs:20 by gcc. Read the comment 94 * at the top of stackprotector.h for more info. 95 * 96 * Local labels 98 and 99 are used. 97 */ 98#ifdef CONFIG_X86_32_LAZY_GS 99 100 /* unfortunately push/pop can't be no-op */ 101.macro PUSH_GS 102 pushl $0 103.endm 104.macro POP_GS pop=0 105 addl $(4 + \pop), %esp 106.endm 107.macro POP_GS_EX 108.endm 109 110 /* all the rest are no-op */ 111.macro PTGS_TO_GS 112.endm 113.macro PTGS_TO_GS_EX 114.endm 115.macro GS_TO_REG reg 116.endm 117.macro REG_TO_PTGS reg 118.endm 119.macro SET_KERNEL_GS reg 120.endm 121 122#else /* CONFIG_X86_32_LAZY_GS */ 123 124.macro PUSH_GS 125 pushl %gs 126.endm 127 128.macro POP_GS pop=0 12998: popl %gs 130 .if \pop <> 0 131 add $\pop, %esp 132 .endif 133.endm 134.macro POP_GS_EX 135.pushsection .fixup, "ax" 13699: movl $0, (%esp) 137 jmp 98b 138.popsection 139 _ASM_EXTABLE(98b, 99b) 140.endm 141 142.macro PTGS_TO_GS 14398: mov PT_GS(%esp), %gs 144.endm 145.macro PTGS_TO_GS_EX 146.pushsection .fixup, "ax" 14799: movl $0, PT_GS(%esp) 148 jmp 98b 149.popsection 150 _ASM_EXTABLE(98b, 99b) 151.endm 152 153.macro GS_TO_REG reg 154 movl %gs, \reg 155.endm 156.macro REG_TO_PTGS reg 157 movl \reg, PT_GS(%esp) 158.endm 159.macro SET_KERNEL_GS reg 160 movl $(__KERNEL_STACK_CANARY), \reg 161 movl \reg, %gs 162.endm 163 164#endif /* CONFIG_X86_32_LAZY_GS */ 165 166.macro SAVE_ALL 167 cld 168 PUSH_GS 169 pushl %fs 170 pushl %es 171 pushl %ds 172 pushl %eax 173 pushl %ebp 174 pushl %edi 175 pushl %esi 176 pushl %edx 177 pushl %ecx 178 pushl %ebx 179 movl $(__USER_DS), %edx 180 movl %edx, %ds 181 movl %edx, %es 182 movl $(__KERNEL_PERCPU), %edx 183 movl %edx, %fs 184 SET_KERNEL_GS %edx 185.endm 186 187.macro RESTORE_INT_REGS 188 popl %ebx 189 popl %ecx 190 popl %edx 191 popl %esi 192 popl %edi 193 popl %ebp 194 popl %eax 195.endm 196 197.macro RESTORE_REGS pop=0 198 RESTORE_INT_REGS 1991: popl %ds 2002: popl %es 2013: popl %fs 202 POP_GS \pop 203.pushsection .fixup, "ax" 2044: movl $0, (%esp) 205 jmp 1b 2065: movl $0, (%esp) 207 jmp 2b 2086: movl $0, (%esp) 209 jmp 3b 210.popsection 211 _ASM_EXTABLE(1b, 4b) 212 _ASM_EXTABLE(2b, 5b) 213 _ASM_EXTABLE(3b, 6b) 214 POP_GS_EX 215.endm 216 217ENTRY(ret_from_fork) 218 pushl %eax 219 call schedule_tail 220 GET_THREAD_INFO(%ebp) 221 popl %eax 222 pushl $0x0202 # Reset kernel eflags 223 popfl 224 jmp syscall_exit 225END(ret_from_fork) 226 227ENTRY(ret_from_kernel_thread) 228 pushl %eax 229 call schedule_tail 230 GET_THREAD_INFO(%ebp) 231 popl %eax 232 pushl $0x0202 # Reset kernel eflags 233 popfl 234 movl PT_EBP(%esp), %eax 235 call *PT_EBX(%esp) 236 movl $0, PT_EAX(%esp) 237 jmp syscall_exit 238ENDPROC(ret_from_kernel_thread) 239 240/* 241 * Return to user mode is not as complex as all this looks, 242 * but we want the default path for a system call return to 243 * go as quickly as possible which is why some of this is 244 * less clear than it otherwise should be. 245 */ 246 247 # userspace resumption stub bypassing syscall exit tracing 248 ALIGN 249ret_from_exception: 250 preempt_stop(CLBR_ANY) 251ret_from_intr: 252 GET_THREAD_INFO(%ebp) 253#ifdef CONFIG_VM86 254 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 255 movb PT_CS(%esp), %al 256 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 257#else 258 /* 259 * We can be coming here from child spawned by kernel_thread(). 260 */ 261 movl PT_CS(%esp), %eax 262 andl $SEGMENT_RPL_MASK, %eax 263#endif 264 cmpl $USER_RPL, %eax 265 jb resume_kernel # not returning to v8086 or userspace 266 267ENTRY(resume_userspace) 268 LOCKDEP_SYS_EXIT 269 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 270 # setting need_resched or sigpending 271 # between sampling and the iret 272 TRACE_IRQS_OFF 273 movl TI_flags(%ebp), %ecx 274 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on 275 # int/exception return? 276 jne work_pending 277 jmp restore_all 278END(ret_from_exception) 279 280#ifdef CONFIG_PREEMPT 281ENTRY(resume_kernel) 282 DISABLE_INTERRUPTS(CLBR_ANY) 283need_resched: 284 cmpl $0, PER_CPU_VAR(__preempt_count) 285 jnz restore_all 286 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 287 jz restore_all 288 call preempt_schedule_irq 289 jmp need_resched 290END(resume_kernel) 291#endif 292 293/* 294 * SYSENTER_RETURN points to after the SYSENTER instruction 295 * in the vsyscall page. See vsyscall-sysentry.S, which defines 296 * the symbol. 297 */ 298 299 # SYSENTER call handler stub 300ENTRY(entry_SYSENTER_32) 301 movl TSS_sysenter_sp0(%esp), %esp 302sysenter_past_esp: 303 /* 304 * Interrupts are disabled here, but we can't trace it until 305 * enough kernel state to call TRACE_IRQS_OFF can be called - but 306 * we immediately enable interrupts at that point anyway. 307 */ 308 pushl $__USER_DS 309 pushl %ebp 310 pushfl 311 orl $X86_EFLAGS_IF, (%esp) 312 pushl $__USER_CS 313 /* 314 * Push current_thread_info()->sysenter_return to the stack. 315 * A tiny bit of offset fixup is necessary: TI_sysenter_return 316 * is relative to thread_info, which is at the bottom of the 317 * kernel stack page. 4*4 means the 4 words pushed above; 318 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; 319 * and THREAD_SIZE takes us to the bottom. 320 */ 321 pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) 322 323 pushl %eax 324 SAVE_ALL 325 ENABLE_INTERRUPTS(CLBR_NONE) 326 327/* 328 * Load the potential sixth argument from user stack. 329 * Careful about security. 330 */ 331 cmpl $__PAGE_OFFSET-3, %ebp 332 jae syscall_fault 333 ASM_STAC 3341: movl (%ebp), %ebp 335 ASM_CLAC 336 movl %ebp, PT_EBP(%esp) 337 _ASM_EXTABLE(1b, syscall_fault) 338 339 GET_THREAD_INFO(%ebp) 340 341 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) 342 jnz sysenter_audit 343sysenter_do_call: 344 cmpl $(NR_syscalls), %eax 345 jae sysenter_badsys 346 call *sys_call_table(, %eax, 4) 347sysenter_after_call: 348 movl %eax, PT_EAX(%esp) 349 LOCKDEP_SYS_EXIT 350 DISABLE_INTERRUPTS(CLBR_ANY) 351 TRACE_IRQS_OFF 352 movl TI_flags(%ebp), %ecx 353 testl $_TIF_ALLWORK_MASK, %ecx 354 jnz sysexit_audit 355sysenter_exit: 356/* if something modifies registers it must also disable sysexit */ 357 movl PT_EIP(%esp), %edx 358 movl PT_OLDESP(%esp), %ecx 359 xorl %ebp, %ebp 360 TRACE_IRQS_ON 3611: mov PT_FS(%esp), %fs 362 PTGS_TO_GS 363 ENABLE_INTERRUPTS_SYSEXIT 364 365#ifdef CONFIG_AUDITSYSCALL 366sysenter_audit: 367 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp) 368 jnz syscall_trace_entry 369 /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ 370 movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ 371 /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ 372 pushl PT_ESI(%esp) /* a3: 5th arg */ 373 pushl PT_EDX+4(%esp) /* a2: 4th arg */ 374 call __audit_syscall_entry 375 popl %ecx /* get that remapped edx off the stack */ 376 popl %ecx /* get that remapped esi off the stack */ 377 movl PT_EAX(%esp), %eax /* reload syscall number */ 378 jmp sysenter_do_call 379 380sysexit_audit: 381 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx 382 jnz syscall_exit_work 383 TRACE_IRQS_ON 384 ENABLE_INTERRUPTS(CLBR_ANY) 385 movl %eax, %edx /* second arg, syscall return value */ 386 cmpl $-MAX_ERRNO, %eax /* is it an error ? */ 387 setbe %al /* 1 if so, 0 if not */ 388 movzbl %al, %eax /* zero-extend that */ 389 call __audit_syscall_exit 390 DISABLE_INTERRUPTS(CLBR_ANY) 391 TRACE_IRQS_OFF 392 movl TI_flags(%ebp), %ecx 393 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx 394 jnz syscall_exit_work 395 movl PT_EAX(%esp), %eax /* reload syscall return value */ 396 jmp sysenter_exit 397#endif 398 399.pushsection .fixup, "ax" 4002: movl $0, PT_FS(%esp) 401 jmp 1b 402.popsection 403 _ASM_EXTABLE(1b, 2b) 404 PTGS_TO_GS_EX 405ENDPROC(entry_SYSENTER_32) 406 407 # system call handler stub 408ENTRY(entry_INT80_32) 409 ASM_CLAC 410 pushl %eax # save orig_eax 411 SAVE_ALL 412 GET_THREAD_INFO(%ebp) 413 # system call tracing in operation / emulation 414 testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) 415 jnz syscall_trace_entry 416 cmpl $(NR_syscalls), %eax 417 jae syscall_badsys 418syscall_call: 419 call *sys_call_table(, %eax, 4) 420syscall_after_call: 421 movl %eax, PT_EAX(%esp) # store the return value 422syscall_exit: 423 LOCKDEP_SYS_EXIT 424 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 425 # setting need_resched or sigpending 426 # between sampling and the iret 427 TRACE_IRQS_OFF 428 movl TI_flags(%ebp), %ecx 429 testl $_TIF_ALLWORK_MASK, %ecx # current->work 430 jnz syscall_exit_work 431 432restore_all: 433 TRACE_IRQS_IRET 434restore_all_notrace: 435#ifdef CONFIG_X86_ESPFIX32 436 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 437 /* 438 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we 439 * are returning to the kernel. 440 * See comments in process.c:copy_thread() for details. 441 */ 442 movb PT_OLDSS(%esp), %ah 443 movb PT_CS(%esp), %al 444 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 445 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 446 je ldt_ss # returning to user-space with LDT SS 447#endif 448restore_nocheck: 449 RESTORE_REGS 4 # skip orig_eax/error_code 450irq_return: 451 INTERRUPT_RETURN 452.section .fixup, "ax" 453ENTRY(iret_exc ) 454 pushl $0 # no error code 455 pushl $do_iret_error 456 jmp error_code 457.previous 458 _ASM_EXTABLE(irq_return, iret_exc) 459 460#ifdef CONFIG_X86_ESPFIX32 461ldt_ss: 462#ifdef CONFIG_PARAVIRT 463 /* 464 * The kernel can't run on a non-flat stack if paravirt mode 465 * is active. Rather than try to fixup the high bits of 466 * ESP, bypass this code entirely. This may break DOSemu 467 * and/or Wine support in a paravirt VM, although the option 468 * is still available to implement the setting of the high 469 * 16-bits in the INTERRUPT_RETURN paravirt-op. 470 */ 471 cmpl $0, pv_info+PARAVIRT_enabled 472 jne restore_nocheck 473#endif 474 475/* 476 * Setup and switch to ESPFIX stack 477 * 478 * We're returning to userspace with a 16 bit stack. The CPU will not 479 * restore the high word of ESP for us on executing iret... This is an 480 * "official" bug of all the x86-compatible CPUs, which we can work 481 * around to make dosemu and wine happy. We do this by preloading the 482 * high word of ESP with the high word of the userspace ESP while 483 * compensating for the offset by changing to the ESPFIX segment with 484 * a base address that matches for the difference. 485 */ 486#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) 487 mov %esp, %edx /* load kernel esp */ 488 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 489 mov %dx, %ax /* eax: new kernel esp */ 490 sub %eax, %edx /* offset (low word is 0) */ 491 shr $16, %edx 492 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 493 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 494 pushl $__ESPFIX_SS 495 pushl %eax /* new kernel esp */ 496 /* 497 * Disable interrupts, but do not irqtrace this section: we 498 * will soon execute iret and the tracer was already set to 499 * the irqstate after the IRET: 500 */ 501 DISABLE_INTERRUPTS(CLBR_EAX) 502 lss (%esp), %esp /* switch to espfix segment */ 503 jmp restore_nocheck 504#endif 505ENDPROC(entry_INT80_32) 506 507 # perform work that needs to be done immediately before resumption 508 ALIGN 509work_pending: 510 testb $_TIF_NEED_RESCHED, %cl 511 jz work_notifysig 512work_resched: 513 call schedule 514 LOCKDEP_SYS_EXIT 515 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 516 # setting need_resched or sigpending 517 # between sampling and the iret 518 TRACE_IRQS_OFF 519 movl TI_flags(%ebp), %ecx 520 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other 521 # than syscall tracing? 522 jz restore_all 523 testb $_TIF_NEED_RESCHED, %cl 524 jnz work_resched 525 526work_notifysig: # deal with pending signals and 527 # notify-resume requests 528#ifdef CONFIG_VM86 529 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) 530 movl %esp, %eax 531 jnz work_notifysig_v86 # returning to kernel-space or 532 # vm86-space 5331: 534#else 535 movl %esp, %eax 536#endif 537 TRACE_IRQS_ON 538 ENABLE_INTERRUPTS(CLBR_NONE) 539 movb PT_CS(%esp), %bl 540 andb $SEGMENT_RPL_MASK, %bl 541 cmpb $USER_RPL, %bl 542 jb resume_kernel 543 xorl %edx, %edx 544 call do_notify_resume 545 jmp resume_userspace 546 547#ifdef CONFIG_VM86 548 ALIGN 549work_notifysig_v86: 550 pushl %ecx # save ti_flags for do_notify_resume 551 call save_v86_state # %eax contains pt_regs pointer 552 popl %ecx 553 movl %eax, %esp 554 jmp 1b 555#endif 556END(work_pending) 557 558 # perform syscall exit tracing 559 ALIGN 560syscall_trace_entry: 561 movl $-ENOSYS, PT_EAX(%esp) 562 movl %esp, %eax 563 call syscall_trace_enter 564 /* What it returned is what we'll actually use. */ 565 cmpl $(NR_syscalls), %eax 566 jnae syscall_call 567 jmp syscall_exit 568END(syscall_trace_entry) 569 570 # perform syscall exit tracing 571 ALIGN 572syscall_exit_work: 573 testl $_TIF_WORK_SYSCALL_EXIT, %ecx 574 jz work_pending 575 TRACE_IRQS_ON 576 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call 577 # schedule() instead 578 movl %esp, %eax 579 call syscall_trace_leave 580 jmp resume_userspace 581END(syscall_exit_work) 582 583syscall_fault: 584 ASM_CLAC 585 GET_THREAD_INFO(%ebp) 586 movl $-EFAULT, PT_EAX(%esp) 587 jmp resume_userspace 588END(syscall_fault) 589 590syscall_badsys: 591 movl $-ENOSYS, %eax 592 jmp syscall_after_call 593END(syscall_badsys) 594 595sysenter_badsys: 596 movl $-ENOSYS, %eax 597 jmp sysenter_after_call 598END(sysenter_badsys) 599 600.macro FIXUP_ESPFIX_STACK 601/* 602 * Switch back for ESPFIX stack to the normal zerobased stack 603 * 604 * We can't call C functions using the ESPFIX stack. This code reads 605 * the high word of the segment base from the GDT and swiches to the 606 * normal stack and adjusts ESP with the matching offset. 607 */ 608#ifdef CONFIG_X86_ESPFIX32 609 /* fixup the stack */ 610 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ 611 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 612 shl $16, %eax 613 addl %esp, %eax /* the adjusted stack pointer */ 614 pushl $__KERNEL_DS 615 pushl %eax 616 lss (%esp), %esp /* switch to the normal stack segment */ 617#endif 618.endm 619.macro UNWIND_ESPFIX_STACK 620#ifdef CONFIG_X86_ESPFIX32 621 movl %ss, %eax 622 /* see if on espfix stack */ 623 cmpw $__ESPFIX_SS, %ax 624 jne 27f 625 movl $__KERNEL_DS, %eax 626 movl %eax, %ds 627 movl %eax, %es 628 /* switch to normal stack */ 629 FIXUP_ESPFIX_STACK 63027: 631#endif 632.endm 633 634/* 635 * Build the entry stubs with some assembler magic. 636 * We pack 1 stub into every 8-byte block. 637 */ 638 .align 8 639ENTRY(irq_entries_start) 640 vector=FIRST_EXTERNAL_VECTOR 641 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 642 pushl $(~vector+0x80) /* Note: always in signed byte range */ 643 vector=vector+1 644 jmp common_interrupt 645 .align 8 646 .endr 647END(irq_entries_start) 648 649/* 650 * the CPU automatically disables interrupts when executing an IRQ vector, 651 * so IRQ-flags tracing has to follow that: 652 */ 653 .p2align CONFIG_X86_L1_CACHE_SHIFT 654common_interrupt: 655 ASM_CLAC 656 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 657 SAVE_ALL 658 TRACE_IRQS_OFF 659 movl %esp, %eax 660 call do_IRQ 661 jmp ret_from_intr 662ENDPROC(common_interrupt) 663 664#define BUILD_INTERRUPT3(name, nr, fn) \ 665ENTRY(name) \ 666 ASM_CLAC; \ 667 pushl $~(nr); \ 668 SAVE_ALL; \ 669 TRACE_IRQS_OFF \ 670 movl %esp, %eax; \ 671 call fn; \ 672 jmp ret_from_intr; \ 673ENDPROC(name) 674 675 676#ifdef CONFIG_TRACING 677# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) 678#else 679# define TRACE_BUILD_INTERRUPT(name, nr) 680#endif 681 682#define BUILD_INTERRUPT(name, nr) \ 683 BUILD_INTERRUPT3(name, nr, smp_##name); \ 684 TRACE_BUILD_INTERRUPT(name, nr) 685 686/* The include is where all of the SMP etc. interrupts come from */ 687#include <asm/entry_arch.h> 688 689ENTRY(coprocessor_error) 690 ASM_CLAC 691 pushl $0 692 pushl $do_coprocessor_error 693 jmp error_code 694END(coprocessor_error) 695 696ENTRY(simd_coprocessor_error) 697 ASM_CLAC 698 pushl $0 699#ifdef CONFIG_X86_INVD_BUG 700 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 701 ALTERNATIVE "pushl $do_general_protection", \ 702 "pushl $do_simd_coprocessor_error", \ 703 X86_FEATURE_XMM 704#else 705 pushl $do_simd_coprocessor_error 706#endif 707 jmp error_code 708END(simd_coprocessor_error) 709 710ENTRY(device_not_available) 711 ASM_CLAC 712 pushl $-1 # mark this as an int 713 pushl $do_device_not_available 714 jmp error_code 715END(device_not_available) 716 717#ifdef CONFIG_PARAVIRT 718ENTRY(native_iret) 719 iret 720 _ASM_EXTABLE(native_iret, iret_exc) 721END(native_iret) 722 723ENTRY(native_irq_enable_sysexit) 724 sti 725 sysexit 726END(native_irq_enable_sysexit) 727#endif 728 729ENTRY(overflow) 730 ASM_CLAC 731 pushl $0 732 pushl $do_overflow 733 jmp error_code 734END(overflow) 735 736ENTRY(bounds) 737 ASM_CLAC 738 pushl $0 739 pushl $do_bounds 740 jmp error_code 741END(bounds) 742 743ENTRY(invalid_op) 744 ASM_CLAC 745 pushl $0 746 pushl $do_invalid_op 747 jmp error_code 748END(invalid_op) 749 750ENTRY(coprocessor_segment_overrun) 751 ASM_CLAC 752 pushl $0 753 pushl $do_coprocessor_segment_overrun 754 jmp error_code 755END(coprocessor_segment_overrun) 756 757ENTRY(invalid_TSS) 758 ASM_CLAC 759 pushl $do_invalid_TSS 760 jmp error_code 761END(invalid_TSS) 762 763ENTRY(segment_not_present) 764 ASM_CLAC 765 pushl $do_segment_not_present 766 jmp error_code 767END(segment_not_present) 768 769ENTRY(stack_segment) 770 ASM_CLAC 771 pushl $do_stack_segment 772 jmp error_code 773END(stack_segment) 774 775ENTRY(alignment_check) 776 ASM_CLAC 777 pushl $do_alignment_check 778 jmp error_code 779END(alignment_check) 780 781ENTRY(divide_error) 782 ASM_CLAC 783 pushl $0 # no error code 784 pushl $do_divide_error 785 jmp error_code 786END(divide_error) 787 788#ifdef CONFIG_X86_MCE 789ENTRY(machine_check) 790 ASM_CLAC 791 pushl $0 792 pushl machine_check_vector 793 jmp error_code 794END(machine_check) 795#endif 796 797ENTRY(spurious_interrupt_bug) 798 ASM_CLAC 799 pushl $0 800 pushl $do_spurious_interrupt_bug 801 jmp error_code 802END(spurious_interrupt_bug) 803 804#ifdef CONFIG_XEN 805/* 806 * Xen doesn't set %esp to be precisely what the normal SYSENTER 807 * entry point expects, so fix it up before using the normal path. 808 */ 809ENTRY(xen_sysenter_target) 810 addl $5*4, %esp /* remove xen-provided frame */ 811 jmp sysenter_past_esp 812 813ENTRY(xen_hypervisor_callback) 814 pushl $-1 /* orig_ax = -1 => not a system call */ 815 SAVE_ALL 816 TRACE_IRQS_OFF 817 818 /* 819 * Check to see if we got the event in the critical 820 * region in xen_iret_direct, after we've reenabled 821 * events and checked for pending events. This simulates 822 * iret instruction's behaviour where it delivers a 823 * pending interrupt when enabling interrupts: 824 */ 825 movl PT_EIP(%esp), %eax 826 cmpl $xen_iret_start_crit, %eax 827 jb 1f 828 cmpl $xen_iret_end_crit, %eax 829 jae 1f 830 831 jmp xen_iret_crit_fixup 832 833ENTRY(xen_do_upcall) 8341: mov %esp, %eax 835 call xen_evtchn_do_upcall 836#ifndef CONFIG_PREEMPT 837 call xen_maybe_preempt_hcall 838#endif 839 jmp ret_from_intr 840ENDPROC(xen_hypervisor_callback) 841 842/* 843 * Hypervisor uses this for application faults while it executes. 844 * We get here for two reasons: 845 * 1. Fault while reloading DS, ES, FS or GS 846 * 2. Fault while executing IRET 847 * Category 1 we fix up by reattempting the load, and zeroing the segment 848 * register if the load fails. 849 * Category 2 we fix up by jumping to do_iret_error. We cannot use the 850 * normal Linux return path in this case because if we use the IRET hypercall 851 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 852 * We distinguish between categories by maintaining a status value in EAX. 853 */ 854ENTRY(xen_failsafe_callback) 855 pushl %eax 856 movl $1, %eax 8571: mov 4(%esp), %ds 8582: mov 8(%esp), %es 8593: mov 12(%esp), %fs 8604: mov 16(%esp), %gs 861 /* EAX == 0 => Category 1 (Bad segment) 862 EAX != 0 => Category 2 (Bad IRET) */ 863 testl %eax, %eax 864 popl %eax 865 lea 16(%esp), %esp 866 jz 5f 867 jmp iret_exc 8685: pushl $-1 /* orig_ax = -1 => not a system call */ 869 SAVE_ALL 870 jmp ret_from_exception 871 872.section .fixup, "ax" 8736: xorl %eax, %eax 874 movl %eax, 4(%esp) 875 jmp 1b 8767: xorl %eax, %eax 877 movl %eax, 8(%esp) 878 jmp 2b 8798: xorl %eax, %eax 880 movl %eax, 12(%esp) 881 jmp 3b 8829: xorl %eax, %eax 883 movl %eax, 16(%esp) 884 jmp 4b 885.previous 886 _ASM_EXTABLE(1b, 6b) 887 _ASM_EXTABLE(2b, 7b) 888 _ASM_EXTABLE(3b, 8b) 889 _ASM_EXTABLE(4b, 9b) 890ENDPROC(xen_failsafe_callback) 891 892BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 893 xen_evtchn_do_upcall) 894 895#endif /* CONFIG_XEN */ 896 897#if IS_ENABLED(CONFIG_HYPERV) 898 899BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 900 hyperv_vector_handler) 901 902#endif /* CONFIG_HYPERV */ 903 904#ifdef CONFIG_FUNCTION_TRACER 905#ifdef CONFIG_DYNAMIC_FTRACE 906 907ENTRY(mcount) 908 ret 909END(mcount) 910 911ENTRY(ftrace_caller) 912 pushl %eax 913 pushl %ecx 914 pushl %edx 915 pushl $0 /* Pass NULL as regs pointer */ 916 movl 4*4(%esp), %eax 917 movl 0x4(%ebp), %edx 918 movl function_trace_op, %ecx 919 subl $MCOUNT_INSN_SIZE, %eax 920 921.globl ftrace_call 922ftrace_call: 923 call ftrace_stub 924 925 addl $4, %esp /* skip NULL pointer */ 926 popl %edx 927 popl %ecx 928 popl %eax 929ftrace_ret: 930#ifdef CONFIG_FUNCTION_GRAPH_TRACER 931.globl ftrace_graph_call 932ftrace_graph_call: 933 jmp ftrace_stub 934#endif 935 936.globl ftrace_stub 937ftrace_stub: 938 ret 939END(ftrace_caller) 940 941ENTRY(ftrace_regs_caller) 942 pushf /* push flags before compare (in cs location) */ 943 944 /* 945 * i386 does not save SS and ESP when coming from kernel. 946 * Instead, to get sp, ®s->sp is used (see ptrace.h). 947 * Unfortunately, that means eflags must be at the same location 948 * as the current return ip is. We move the return ip into the 949 * ip location, and move flags into the return ip location. 950 */ 951 pushl 4(%esp) /* save return ip into ip slot */ 952 953 pushl $0 /* Load 0 into orig_ax */ 954 pushl %gs 955 pushl %fs 956 pushl %es 957 pushl %ds 958 pushl %eax 959 pushl %ebp 960 pushl %edi 961 pushl %esi 962 pushl %edx 963 pushl %ecx 964 pushl %ebx 965 966 movl 13*4(%esp), %eax /* Get the saved flags */ 967 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ 968 /* clobbering return ip */ 969 movl $__KERNEL_CS, 13*4(%esp) 970 971 movl 12*4(%esp), %eax /* Load ip (1st parameter) */ 972 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ 973 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ 974 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ 975 pushl %esp /* Save pt_regs as 4th parameter */ 976 977GLOBAL(ftrace_regs_call) 978 call ftrace_stub 979 980 addl $4, %esp /* Skip pt_regs */ 981 movl 14*4(%esp), %eax /* Move flags back into cs */ 982 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ 983 movl 12*4(%esp), %eax /* Get return ip from regs->ip */ 984 movl %eax, 14*4(%esp) /* Put return ip back for ret */ 985 986 popl %ebx 987 popl %ecx 988 popl %edx 989 popl %esi 990 popl %edi 991 popl %ebp 992 popl %eax 993 popl %ds 994 popl %es 995 popl %fs 996 popl %gs 997 addl $8, %esp /* Skip orig_ax and ip */ 998 popf /* Pop flags at end (no addl to corrupt flags) */ 999 jmp ftrace_ret 1000 1001 popf 1002 jmp ftrace_stub 1003#else /* ! CONFIG_DYNAMIC_FTRACE */ 1004 1005ENTRY(mcount) 1006 cmpl $__PAGE_OFFSET, %esp 1007 jb ftrace_stub /* Paging not enabled yet? */ 1008 1009 cmpl $ftrace_stub, ftrace_trace_function 1010 jnz trace 1011#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1012 cmpl $ftrace_stub, ftrace_graph_return 1013 jnz ftrace_graph_caller 1014 1015 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry 1016 jnz ftrace_graph_caller 1017#endif 1018.globl ftrace_stub 1019ftrace_stub: 1020 ret 1021 1022 /* taken from glibc */ 1023trace: 1024 pushl %eax 1025 pushl %ecx 1026 pushl %edx 1027 movl 0xc(%esp), %eax 1028 movl 0x4(%ebp), %edx 1029 subl $MCOUNT_INSN_SIZE, %eax 1030 1031 call *ftrace_trace_function 1032 1033 popl %edx 1034 popl %ecx 1035 popl %eax 1036 jmp ftrace_stub 1037END(mcount) 1038#endif /* CONFIG_DYNAMIC_FTRACE */ 1039#endif /* CONFIG_FUNCTION_TRACER */ 1040 1041#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1042ENTRY(ftrace_graph_caller) 1043 pushl %eax 1044 pushl %ecx 1045 pushl %edx 1046 movl 0xc(%esp), %eax 1047 lea 0x4(%ebp), %edx 1048 movl (%ebp), %ecx 1049 subl $MCOUNT_INSN_SIZE, %eax 1050 call prepare_ftrace_return 1051 popl %edx 1052 popl %ecx 1053 popl %eax 1054 ret 1055END(ftrace_graph_caller) 1056 1057.globl return_to_handler 1058return_to_handler: 1059 pushl %eax 1060 pushl %edx 1061 movl %ebp, %eax 1062 call ftrace_return_to_handler 1063 movl %eax, %ecx 1064 popl %edx 1065 popl %eax 1066 jmp *%ecx 1067#endif 1068 1069#ifdef CONFIG_TRACING 1070ENTRY(trace_page_fault) 1071 ASM_CLAC 1072 pushl $trace_do_page_fault 1073 jmp error_code 1074END(trace_page_fault) 1075#endif 1076 1077ENTRY(page_fault) 1078 ASM_CLAC 1079 pushl $do_page_fault 1080 ALIGN 1081error_code: 1082 /* the function address is in %gs's slot on the stack */ 1083 pushl %fs 1084 pushl %es 1085 pushl %ds 1086 pushl %eax 1087 pushl %ebp 1088 pushl %edi 1089 pushl %esi 1090 pushl %edx 1091 pushl %ecx 1092 pushl %ebx 1093 cld 1094 movl $(__KERNEL_PERCPU), %ecx 1095 movl %ecx, %fs 1096 UNWIND_ESPFIX_STACK 1097 GS_TO_REG %ecx 1098 movl PT_GS(%esp), %edi # get the function address 1099 movl PT_ORIG_EAX(%esp), %edx # get the error code 1100 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1101 REG_TO_PTGS %ecx 1102 SET_KERNEL_GS %ecx 1103 movl $(__USER_DS), %ecx 1104 movl %ecx, %ds 1105 movl %ecx, %es 1106 TRACE_IRQS_OFF 1107 movl %esp, %eax # pt_regs pointer 1108 call *%edi 1109 jmp ret_from_exception 1110END(page_fault) 1111 1112/* 1113 * Debug traps and NMI can happen at the one SYSENTER instruction 1114 * that sets up the real kernel stack. Check here, since we can't 1115 * allow the wrong stack to be used. 1116 * 1117 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have 1118 * already pushed 3 words if it hits on the sysenter instruction: 1119 * eflags, cs and eip. 1120 * 1121 * We just load the right stack, and push the three (known) values 1122 * by hand onto the new stack - while updating the return eip past 1123 * the instruction that would have done it for sysenter. 1124 */ 1125.macro FIX_STACK offset ok label 1126 cmpw $__KERNEL_CS, 4(%esp) 1127 jne \ok 1128\label: 1129 movl TSS_sysenter_sp0 + \offset(%esp), %esp 1130 pushfl 1131 pushl $__KERNEL_CS 1132 pushl $sysenter_past_esp 1133.endm 1134 1135ENTRY(debug) 1136 ASM_CLAC 1137 cmpl $entry_SYSENTER_32, (%esp) 1138 jne debug_stack_correct 1139 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1140debug_stack_correct: 1141 pushl $-1 # mark this as an int 1142 SAVE_ALL 1143 TRACE_IRQS_OFF 1144 xorl %edx, %edx # error code 0 1145 movl %esp, %eax # pt_regs pointer 1146 call do_debug 1147 jmp ret_from_exception 1148END(debug) 1149 1150/* 1151 * NMI is doubly nasty. It can happen _while_ we're handling 1152 * a debug fault, and the debug fault hasn't yet been able to 1153 * clear up the stack. So we first check whether we got an 1154 * NMI on the sysenter entry path, but after that we need to 1155 * check whether we got an NMI on the debug path where the debug 1156 * fault happened on the sysenter path. 1157 */ 1158ENTRY(nmi) 1159 ASM_CLAC 1160#ifdef CONFIG_X86_ESPFIX32 1161 pushl %eax 1162 movl %ss, %eax 1163 cmpw $__ESPFIX_SS, %ax 1164 popl %eax 1165 je nmi_espfix_stack 1166#endif 1167 cmpl $entry_SYSENTER_32, (%esp) 1168 je nmi_stack_fixup 1169 pushl %eax 1170 movl %esp, %eax 1171 /* 1172 * Do not access memory above the end of our stack page, 1173 * it might not exist. 1174 */ 1175 andl $(THREAD_SIZE-1), %eax 1176 cmpl $(THREAD_SIZE-20), %eax 1177 popl %eax 1178 jae nmi_stack_correct 1179 cmpl $entry_SYSENTER_32, 12(%esp) 1180 je nmi_debug_stack_check 1181nmi_stack_correct: 1182 pushl %eax 1183 SAVE_ALL 1184 xorl %edx, %edx # zero error code 1185 movl %esp, %eax # pt_regs pointer 1186 call do_nmi 1187 jmp restore_all_notrace 1188 1189nmi_stack_fixup: 1190 FIX_STACK 12, nmi_stack_correct, 1 1191 jmp nmi_stack_correct 1192 1193nmi_debug_stack_check: 1194 cmpw $__KERNEL_CS, 16(%esp) 1195 jne nmi_stack_correct 1196 cmpl $debug, (%esp) 1197 jb nmi_stack_correct 1198 cmpl $debug_esp_fix_insn, (%esp) 1199 ja nmi_stack_correct 1200 FIX_STACK 24, nmi_stack_correct, 1 1201 jmp nmi_stack_correct 1202 1203#ifdef CONFIG_X86_ESPFIX32 1204nmi_espfix_stack: 1205 /* 1206 * create the pointer to lss back 1207 */ 1208 pushl %ss 1209 pushl %esp 1210 addl $4, (%esp) 1211 /* copy the iret frame of 12 bytes */ 1212 .rept 3 1213 pushl 16(%esp) 1214 .endr 1215 pushl %eax 1216 SAVE_ALL 1217 FIXUP_ESPFIX_STACK # %eax == %esp 1218 xorl %edx, %edx # zero error code 1219 call do_nmi 1220 RESTORE_REGS 1221 lss 12+4(%esp), %esp # back to espfix stack 1222 jmp irq_return 1223#endif 1224END(nmi) 1225 1226ENTRY(int3) 1227 ASM_CLAC 1228 pushl $-1 # mark this as an int 1229 SAVE_ALL 1230 TRACE_IRQS_OFF 1231 xorl %edx, %edx # zero error code 1232 movl %esp, %eax # pt_regs pointer 1233 call do_int3 1234 jmp ret_from_exception 1235END(int3) 1236 1237ENTRY(general_protection) 1238 pushl $do_general_protection 1239 jmp error_code 1240END(general_protection) 1241 1242#ifdef CONFIG_KVM_GUEST 1243ENTRY(async_page_fault) 1244 ASM_CLAC 1245 pushl $do_async_page_fault 1246 jmp error_code 1247END(async_page_fault) 1248#endif 1249