xref: /linux/arch/x86/entry/entry_32.S (revision bcb63314e2c23f1ed622418b65f9409512659c73)
1/*
2 *  Copyright (C) 1991,1992  Linus Torvalds
3 *
4 * entry_32.S contains the system-call and low-level fault and trap handling routines.
5 *
6 * Stack layout while running C code:
7 *	ptrace needs to have all registers on the stack.
8 *	If the order here is changed, it needs to be
9 *	updated in fork.c:copy_process(), signal.c:do_signal(),
10 *	ptrace.c and ptrace.h
11 *
12 *	 0(%esp) - %ebx
13 *	 4(%esp) - %ecx
14 *	 8(%esp) - %edx
15 *	 C(%esp) - %esi
16 *	10(%esp) - %edi
17 *	14(%esp) - %ebp
18 *	18(%esp) - %eax
19 *	1C(%esp) - %ds
20 *	20(%esp) - %es
21 *	24(%esp) - %fs
22 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
23 *	2C(%esp) - orig_eax
24 *	30(%esp) - %eip
25 *	34(%esp) - %cs
26 *	38(%esp) - %eflags
27 *	3C(%esp) - %oldesp
28 *	40(%esp) - %oldss
29 */
30
31#include <linux/linkage.h>
32#include <linux/err.h>
33#include <asm/thread_info.h>
34#include <asm/irqflags.h>
35#include <asm/errno.h>
36#include <asm/segment.h>
37#include <asm/smp.h>
38#include <asm/page_types.h>
39#include <asm/percpu.h>
40#include <asm/processor-flags.h>
41#include <asm/ftrace.h>
42#include <asm/irq_vectors.h>
43#include <asm/cpufeatures.h>
44#include <asm/alternative-asm.h>
45#include <asm/asm.h>
46#include <asm/smap.h>
47#include <asm/export.h>
48#include <asm/frame.h>
49
50	.section .entry.text, "ax"
51
52/*
53 * We use macros for low-level operations which need to be overridden
54 * for paravirtualization.  The following will never clobber any registers:
55 *   INTERRUPT_RETURN (aka. "iret")
56 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
57 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
58 *
59 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
60 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
61 * Allowing a register to be clobbered can shrink the paravirt replacement
62 * enough to patch inline, increasing performance.
63 */
64
65#ifdef CONFIG_PREEMPT
66# define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
67#else
68# define preempt_stop(clobbers)
69# define resume_kernel		restore_all
70#endif
71
72.macro TRACE_IRQS_IRET
73#ifdef CONFIG_TRACE_IRQFLAGS
74	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)     # interrupts off?
75	jz	1f
76	TRACE_IRQS_ON
771:
78#endif
79.endm
80
81/*
82 * User gs save/restore
83 *
84 * %gs is used for userland TLS and kernel only uses it for stack
85 * canary which is required to be at %gs:20 by gcc.  Read the comment
86 * at the top of stackprotector.h for more info.
87 *
88 * Local labels 98 and 99 are used.
89 */
90#ifdef CONFIG_X86_32_LAZY_GS
91
92 /* unfortunately push/pop can't be no-op */
93.macro PUSH_GS
94	pushl	$0
95.endm
96.macro POP_GS pop=0
97	addl	$(4 + \pop), %esp
98.endm
99.macro POP_GS_EX
100.endm
101
102 /* all the rest are no-op */
103.macro PTGS_TO_GS
104.endm
105.macro PTGS_TO_GS_EX
106.endm
107.macro GS_TO_REG reg
108.endm
109.macro REG_TO_PTGS reg
110.endm
111.macro SET_KERNEL_GS reg
112.endm
113
114#else	/* CONFIG_X86_32_LAZY_GS */
115
116.macro PUSH_GS
117	pushl	%gs
118.endm
119
120.macro POP_GS pop=0
12198:	popl	%gs
122  .if \pop <> 0
123	add	$\pop, %esp
124  .endif
125.endm
126.macro POP_GS_EX
127.pushsection .fixup, "ax"
12899:	movl	$0, (%esp)
129	jmp	98b
130.popsection
131	_ASM_EXTABLE(98b, 99b)
132.endm
133
134.macro PTGS_TO_GS
13598:	mov	PT_GS(%esp), %gs
136.endm
137.macro PTGS_TO_GS_EX
138.pushsection .fixup, "ax"
13999:	movl	$0, PT_GS(%esp)
140	jmp	98b
141.popsection
142	_ASM_EXTABLE(98b, 99b)
143.endm
144
145.macro GS_TO_REG reg
146	movl	%gs, \reg
147.endm
148.macro REG_TO_PTGS reg
149	movl	\reg, PT_GS(%esp)
150.endm
151.macro SET_KERNEL_GS reg
152	movl	$(__KERNEL_STACK_CANARY), \reg
153	movl	\reg, %gs
154.endm
155
156#endif /* CONFIG_X86_32_LAZY_GS */
157
158.macro SAVE_ALL pt_regs_ax=%eax
159	cld
160	PUSH_GS
161	pushl	%fs
162	pushl	%es
163	pushl	%ds
164	pushl	\pt_regs_ax
165	pushl	%ebp
166	pushl	%edi
167	pushl	%esi
168	pushl	%edx
169	pushl	%ecx
170	pushl	%ebx
171	movl	$(__USER_DS), %edx
172	movl	%edx, %ds
173	movl	%edx, %es
174	movl	$(__KERNEL_PERCPU), %edx
175	movl	%edx, %fs
176	SET_KERNEL_GS %edx
177.endm
178
179/*
180 * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
181 * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
182 * is just setting the LSB, which makes it an invalid stack address and is also
183 * a signal to the unwinder that it's a pt_regs pointer in disguise.
184 *
185 * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
186 * original rbp.
187 */
188.macro ENCODE_FRAME_POINTER
189#ifdef CONFIG_FRAME_POINTER
190	mov %esp, %ebp
191	orl $0x1, %ebp
192#endif
193.endm
194
195.macro RESTORE_INT_REGS
196	popl	%ebx
197	popl	%ecx
198	popl	%edx
199	popl	%esi
200	popl	%edi
201	popl	%ebp
202	popl	%eax
203.endm
204
205.macro RESTORE_REGS pop=0
206	RESTORE_INT_REGS
2071:	popl	%ds
2082:	popl	%es
2093:	popl	%fs
210	POP_GS \pop
211.pushsection .fixup, "ax"
2124:	movl	$0, (%esp)
213	jmp	1b
2145:	movl	$0, (%esp)
215	jmp	2b
2166:	movl	$0, (%esp)
217	jmp	3b
218.popsection
219	_ASM_EXTABLE(1b, 4b)
220	_ASM_EXTABLE(2b, 5b)
221	_ASM_EXTABLE(3b, 6b)
222	POP_GS_EX
223.endm
224
225/*
226 * %eax: prev task
227 * %edx: next task
228 */
229ENTRY(__switch_to_asm)
230	/*
231	 * Save callee-saved registers
232	 * This must match the order in struct inactive_task_frame
233	 */
234	pushl	%ebp
235	pushl	%ebx
236	pushl	%edi
237	pushl	%esi
238
239	/* switch stack */
240	movl	%esp, TASK_threadsp(%eax)
241	movl	TASK_threadsp(%edx), %esp
242
243#ifdef CONFIG_CC_STACKPROTECTOR
244	movl	TASK_stack_canary(%edx), %ebx
245	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
246#endif
247
248	/* restore callee-saved registers */
249	popl	%esi
250	popl	%edi
251	popl	%ebx
252	popl	%ebp
253
254	jmp	__switch_to
255END(__switch_to_asm)
256
257/*
258 * The unwinder expects the last frame on the stack to always be at the same
259 * offset from the end of the page, which allows it to validate the stack.
260 * Calling schedule_tail() directly would break that convention because its an
261 * asmlinkage function so its argument has to be pushed on the stack.  This
262 * wrapper creates a proper "end of stack" frame header before the call.
263 */
264ENTRY(schedule_tail_wrapper)
265	FRAME_BEGIN
266
267	pushl	%eax
268	call	schedule_tail
269	popl	%eax
270
271	FRAME_END
272	ret
273ENDPROC(schedule_tail_wrapper)
274/*
275 * A newly forked process directly context switches into this address.
276 *
277 * eax: prev task we switched from
278 * ebx: kernel thread func (NULL for user thread)
279 * edi: kernel thread arg
280 */
281ENTRY(ret_from_fork)
282	call	schedule_tail_wrapper
283
284	testl	%ebx, %ebx
285	jnz	1f		/* kernel threads are uncommon */
286
2872:
288	/* When we fork, we trace the syscall return in the child, too. */
289	movl    %esp, %eax
290	call    syscall_return_slowpath
291	jmp     restore_all
292
293	/* kernel thread */
2941:	movl	%edi, %eax
295	call	*%ebx
296	/*
297	 * A kernel thread is allowed to return here after successfully
298	 * calling do_execve().  Exit to userspace to complete the execve()
299	 * syscall.
300	 */
301	movl	$0, PT_EAX(%esp)
302	jmp	2b
303END(ret_from_fork)
304
305/*
306 * Return to user mode is not as complex as all this looks,
307 * but we want the default path for a system call return to
308 * go as quickly as possible which is why some of this is
309 * less clear than it otherwise should be.
310 */
311
312	# userspace resumption stub bypassing syscall exit tracing
313	ALIGN
314ret_from_exception:
315	preempt_stop(CLBR_ANY)
316ret_from_intr:
317#ifdef CONFIG_VM86
318	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
319	movb	PT_CS(%esp), %al
320	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
321#else
322	/*
323	 * We can be coming here from child spawned by kernel_thread().
324	 */
325	movl	PT_CS(%esp), %eax
326	andl	$SEGMENT_RPL_MASK, %eax
327#endif
328	cmpl	$USER_RPL, %eax
329	jb	resume_kernel			# not returning to v8086 or userspace
330
331ENTRY(resume_userspace)
332	DISABLE_INTERRUPTS(CLBR_ANY)
333	TRACE_IRQS_OFF
334	movl	%esp, %eax
335	call	prepare_exit_to_usermode
336	jmp	restore_all
337END(ret_from_exception)
338
339#ifdef CONFIG_PREEMPT
340ENTRY(resume_kernel)
341	DISABLE_INTERRUPTS(CLBR_ANY)
342.Lneed_resched:
343	cmpl	$0, PER_CPU_VAR(__preempt_count)
344	jnz	restore_all
345	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
346	jz	restore_all
347	call	preempt_schedule_irq
348	jmp	.Lneed_resched
349END(resume_kernel)
350#endif
351
352GLOBAL(__begin_SYSENTER_singlestep_region)
353/*
354 * All code from here through __end_SYSENTER_singlestep_region is subject
355 * to being single-stepped if a user program sets TF and executes SYSENTER.
356 * There is absolutely nothing that we can do to prevent this from happening
357 * (thanks Intel!).  To keep our handling of this situation as simple as
358 * possible, we handle TF just like AC and NT, except that our #DB handler
359 * will ignore all of the single-step traps generated in this range.
360 */
361
362#ifdef CONFIG_XEN
363/*
364 * Xen doesn't set %esp to be precisely what the normal SYSENTER
365 * entry point expects, so fix it up before using the normal path.
366 */
367ENTRY(xen_sysenter_target)
368	addl	$5*4, %esp			/* remove xen-provided frame */
369	jmp	.Lsysenter_past_esp
370#endif
371
372/*
373 * 32-bit SYSENTER entry.
374 *
375 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
376 * if X86_FEATURE_SEP is available.  This is the preferred system call
377 * entry on 32-bit systems.
378 *
379 * The SYSENTER instruction, in principle, should *only* occur in the
380 * vDSO.  In practice, a small number of Android devices were shipped
381 * with a copy of Bionic that inlined a SYSENTER instruction.  This
382 * never happened in any of Google's Bionic versions -- it only happened
383 * in a narrow range of Intel-provided versions.
384 *
385 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
386 * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
387 * SYSENTER does not save anything on the stack,
388 * and does not save old EIP (!!!), ESP, or EFLAGS.
389 *
390 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
391 * user and/or vm86 state), we explicitly disable the SYSENTER
392 * instruction in vm86 mode by reprogramming the MSRs.
393 *
394 * Arguments:
395 * eax  system call number
396 * ebx  arg1
397 * ecx  arg2
398 * edx  arg3
399 * esi  arg4
400 * edi  arg5
401 * ebp  user stack
402 * 0(%ebp) arg6
403 */
404ENTRY(entry_SYSENTER_32)
405	movl	TSS_sysenter_sp0(%esp), %esp
406.Lsysenter_past_esp:
407	pushl	$__USER_DS		/* pt_regs->ss */
408	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
409	pushfl				/* pt_regs->flags (except IF = 0) */
410	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
411	pushl	$__USER_CS		/* pt_regs->cs */
412	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
413	pushl	%eax			/* pt_regs->orig_ax */
414	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
415
416	/*
417	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
418	 * and TF ourselves.  To save a few cycles, we can check whether
419	 * either was set instead of doing an unconditional popfq.
420	 * This needs to happen before enabling interrupts so that
421	 * we don't get preempted with NT set.
422	 *
423	 * If TF is set, we will single-step all the way to here -- do_debug
424	 * will ignore all the traps.  (Yes, this is slow, but so is
425	 * single-stepping in general.  This allows us to avoid having
426	 * a more complicated code to handle the case where a user program
427	 * forces us to single-step through the SYSENTER entry code.)
428	 *
429	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
430	 * out-of-line as an optimization: NT is unlikely to be set in the
431	 * majority of the cases and instead of polluting the I$ unnecessarily,
432	 * we're keeping that code behind a branch which will predict as
433	 * not-taken and therefore its instructions won't be fetched.
434	 */
435	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
436	jnz	.Lsysenter_fix_flags
437.Lsysenter_flags_fixed:
438
439	/*
440	 * User mode is traced as though IRQs are on, and SYSENTER
441	 * turned them off.
442	 */
443	TRACE_IRQS_OFF
444
445	movl	%esp, %eax
446	call	do_fast_syscall_32
447	/* XEN PV guests always use IRET path */
448	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
449		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
450
451/* Opportunistic SYSEXIT */
452	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
453	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
454	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
4551:	mov	PT_FS(%esp), %fs
456	PTGS_TO_GS
457	popl	%ebx			/* pt_regs->bx */
458	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
459	popl	%esi			/* pt_regs->si */
460	popl	%edi			/* pt_regs->di */
461	popl	%ebp			/* pt_regs->bp */
462	popl	%eax			/* pt_regs->ax */
463
464	/*
465	 * Restore all flags except IF. (We restore IF separately because
466	 * STI gives a one-instruction window in which we won't be interrupted,
467	 * whereas POPF does not.)
468	 */
469	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
470	btr	$X86_EFLAGS_IF_BIT, (%esp)
471	popfl
472
473	/*
474	 * Return back to the vDSO, which will pop ecx and edx.
475	 * Don't bother with DS and ES (they already contain __USER_DS).
476	 */
477	sti
478	sysexit
479
480.pushsection .fixup, "ax"
4812:	movl	$0, PT_FS(%esp)
482	jmp	1b
483.popsection
484	_ASM_EXTABLE(1b, 2b)
485	PTGS_TO_GS_EX
486
487.Lsysenter_fix_flags:
488	pushl	$X86_EFLAGS_FIXED
489	popfl
490	jmp	.Lsysenter_flags_fixed
491GLOBAL(__end_SYSENTER_singlestep_region)
492ENDPROC(entry_SYSENTER_32)
493
494/*
495 * 32-bit legacy system call entry.
496 *
497 * 32-bit x86 Linux system calls traditionally used the INT $0x80
498 * instruction.  INT $0x80 lands here.
499 *
500 * This entry point can be used by any 32-bit perform system calls.
501 * Instances of INT $0x80 can be found inline in various programs and
502 * libraries.  It is also used by the vDSO's __kernel_vsyscall
503 * fallback for hardware that doesn't support a faster entry method.
504 * Restarted 32-bit system calls also fall back to INT $0x80
505 * regardless of what instruction was originally used to do the system
506 * call.  (64-bit programs can use INT $0x80 as well, but they can
507 * only run on 64-bit kernels and therefore land in
508 * entry_INT80_compat.)
509 *
510 * This is considered a slow path.  It is not used by most libc
511 * implementations on modern hardware except during process startup.
512 *
513 * Arguments:
514 * eax  system call number
515 * ebx  arg1
516 * ecx  arg2
517 * edx  arg3
518 * esi  arg4
519 * edi  arg5
520 * ebp  arg6
521 */
522ENTRY(entry_INT80_32)
523	ASM_CLAC
524	pushl	%eax			/* pt_regs->orig_ax */
525	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
526
527	/*
528	 * User mode is traced as though IRQs are on, and the interrupt gate
529	 * turned them off.
530	 */
531	TRACE_IRQS_OFF
532
533	movl	%esp, %eax
534	call	do_int80_syscall_32
535.Lsyscall_32_done:
536
537restore_all:
538	TRACE_IRQS_IRET
539.Lrestore_all_notrace:
540#ifdef CONFIG_X86_ESPFIX32
541	ALTERNATIVE	"jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
542
543	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
544	/*
545	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
546	 * are returning to the kernel.
547	 * See comments in process.c:copy_thread() for details.
548	 */
549	movb	PT_OLDSS(%esp), %ah
550	movb	PT_CS(%esp), %al
551	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
552	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
553	je .Lldt_ss				# returning to user-space with LDT SS
554#endif
555.Lrestore_nocheck:
556	RESTORE_REGS 4				# skip orig_eax/error_code
557.Lirq_return:
558	INTERRUPT_RETURN
559
560.section .fixup, "ax"
561ENTRY(iret_exc	)
562	pushl	$0				# no error code
563	pushl	$do_iret_error
564	jmp	common_exception
565.previous
566	_ASM_EXTABLE(.Lirq_return, iret_exc)
567
568#ifdef CONFIG_X86_ESPFIX32
569.Lldt_ss:
570/*
571 * Setup and switch to ESPFIX stack
572 *
573 * We're returning to userspace with a 16 bit stack. The CPU will not
574 * restore the high word of ESP for us on executing iret... This is an
575 * "official" bug of all the x86-compatible CPUs, which we can work
576 * around to make dosemu and wine happy. We do this by preloading the
577 * high word of ESP with the high word of the userspace ESP while
578 * compensating for the offset by changing to the ESPFIX segment with
579 * a base address that matches for the difference.
580 */
581#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
582	mov	%esp, %edx			/* load kernel esp */
583	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
584	mov	%dx, %ax			/* eax: new kernel esp */
585	sub	%eax, %edx			/* offset (low word is 0) */
586	shr	$16, %edx
587	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
588	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
589	pushl	$__ESPFIX_SS
590	pushl	%eax				/* new kernel esp */
591	/*
592	 * Disable interrupts, but do not irqtrace this section: we
593	 * will soon execute iret and the tracer was already set to
594	 * the irqstate after the IRET:
595	 */
596	DISABLE_INTERRUPTS(CLBR_EAX)
597	lss	(%esp), %esp			/* switch to espfix segment */
598	jmp	.Lrestore_nocheck
599#endif
600ENDPROC(entry_INT80_32)
601
602.macro FIXUP_ESPFIX_STACK
603/*
604 * Switch back for ESPFIX stack to the normal zerobased stack
605 *
606 * We can't call C functions using the ESPFIX stack. This code reads
607 * the high word of the segment base from the GDT and swiches to the
608 * normal stack and adjusts ESP with the matching offset.
609 */
610#ifdef CONFIG_X86_ESPFIX32
611	/* fixup the stack */
612	mov	GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
613	mov	GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
614	shl	$16, %eax
615	addl	%esp, %eax			/* the adjusted stack pointer */
616	pushl	$__KERNEL_DS
617	pushl	%eax
618	lss	(%esp), %esp			/* switch to the normal stack segment */
619#endif
620.endm
621.macro UNWIND_ESPFIX_STACK
622#ifdef CONFIG_X86_ESPFIX32
623	movl	%ss, %eax
624	/* see if on espfix stack */
625	cmpw	$__ESPFIX_SS, %ax
626	jne	27f
627	movl	$__KERNEL_DS, %eax
628	movl	%eax, %ds
629	movl	%eax, %es
630	/* switch to normal stack */
631	FIXUP_ESPFIX_STACK
63227:
633#endif
634.endm
635
636/*
637 * Build the entry stubs with some assembler magic.
638 * We pack 1 stub into every 8-byte block.
639 */
640	.align 8
641ENTRY(irq_entries_start)
642    vector=FIRST_EXTERNAL_VECTOR
643    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
644	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
645    vector=vector+1
646	jmp	common_interrupt
647	.align	8
648    .endr
649END(irq_entries_start)
650
651/*
652 * the CPU automatically disables interrupts when executing an IRQ vector,
653 * so IRQ-flags tracing has to follow that:
654 */
655	.p2align CONFIG_X86_L1_CACHE_SHIFT
656common_interrupt:
657	ASM_CLAC
658	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
659	SAVE_ALL
660	ENCODE_FRAME_POINTER
661	TRACE_IRQS_OFF
662	movl	%esp, %eax
663	call	do_IRQ
664	jmp	ret_from_intr
665ENDPROC(common_interrupt)
666
667#define BUILD_INTERRUPT3(name, nr, fn)	\
668ENTRY(name)				\
669	ASM_CLAC;			\
670	pushl	$~(nr);			\
671	SAVE_ALL;			\
672	ENCODE_FRAME_POINTER;		\
673	TRACE_IRQS_OFF			\
674	movl	%esp, %eax;		\
675	call	fn;			\
676	jmp	ret_from_intr;		\
677ENDPROC(name)
678
679
680#ifdef CONFIG_TRACING
681# define TRACE_BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
682#else
683# define TRACE_BUILD_INTERRUPT(name, nr)
684#endif
685
686#define BUILD_INTERRUPT(name, nr)		\
687	BUILD_INTERRUPT3(name, nr, smp_##name);	\
688	TRACE_BUILD_INTERRUPT(name, nr)
689
690/* The include is where all of the SMP etc. interrupts come from */
691#include <asm/entry_arch.h>
692
693ENTRY(coprocessor_error)
694	ASM_CLAC
695	pushl	$0
696	pushl	$do_coprocessor_error
697	jmp	common_exception
698END(coprocessor_error)
699
700ENTRY(simd_coprocessor_error)
701	ASM_CLAC
702	pushl	$0
703#ifdef CONFIG_X86_INVD_BUG
704	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
705	ALTERNATIVE "pushl	$do_general_protection",	\
706		    "pushl	$do_simd_coprocessor_error",	\
707		    X86_FEATURE_XMM
708#else
709	pushl	$do_simd_coprocessor_error
710#endif
711	jmp	common_exception
712END(simd_coprocessor_error)
713
714ENTRY(device_not_available)
715	ASM_CLAC
716	pushl	$-1				# mark this as an int
717	pushl	$do_device_not_available
718	jmp	common_exception
719END(device_not_available)
720
721#ifdef CONFIG_PARAVIRT
722ENTRY(native_iret)
723	iret
724	_ASM_EXTABLE(native_iret, iret_exc)
725END(native_iret)
726#endif
727
728ENTRY(overflow)
729	ASM_CLAC
730	pushl	$0
731	pushl	$do_overflow
732	jmp	common_exception
733END(overflow)
734
735ENTRY(bounds)
736	ASM_CLAC
737	pushl	$0
738	pushl	$do_bounds
739	jmp	common_exception
740END(bounds)
741
742ENTRY(invalid_op)
743	ASM_CLAC
744	pushl	$0
745	pushl	$do_invalid_op
746	jmp	common_exception
747END(invalid_op)
748
749ENTRY(coprocessor_segment_overrun)
750	ASM_CLAC
751	pushl	$0
752	pushl	$do_coprocessor_segment_overrun
753	jmp	common_exception
754END(coprocessor_segment_overrun)
755
756ENTRY(invalid_TSS)
757	ASM_CLAC
758	pushl	$do_invalid_TSS
759	jmp	common_exception
760END(invalid_TSS)
761
762ENTRY(segment_not_present)
763	ASM_CLAC
764	pushl	$do_segment_not_present
765	jmp	common_exception
766END(segment_not_present)
767
768ENTRY(stack_segment)
769	ASM_CLAC
770	pushl	$do_stack_segment
771	jmp	common_exception
772END(stack_segment)
773
774ENTRY(alignment_check)
775	ASM_CLAC
776	pushl	$do_alignment_check
777	jmp	common_exception
778END(alignment_check)
779
780ENTRY(divide_error)
781	ASM_CLAC
782	pushl	$0				# no error code
783	pushl	$do_divide_error
784	jmp	common_exception
785END(divide_error)
786
787#ifdef CONFIG_X86_MCE
788ENTRY(machine_check)
789	ASM_CLAC
790	pushl	$0
791	pushl	machine_check_vector
792	jmp	common_exception
793END(machine_check)
794#endif
795
796ENTRY(spurious_interrupt_bug)
797	ASM_CLAC
798	pushl	$0
799	pushl	$do_spurious_interrupt_bug
800	jmp	common_exception
801END(spurious_interrupt_bug)
802
803#ifdef CONFIG_XEN
804ENTRY(xen_hypervisor_callback)
805	pushl	$-1				/* orig_ax = -1 => not a system call */
806	SAVE_ALL
807	ENCODE_FRAME_POINTER
808	TRACE_IRQS_OFF
809
810	/*
811	 * Check to see if we got the event in the critical
812	 * region in xen_iret_direct, after we've reenabled
813	 * events and checked for pending events.  This simulates
814	 * iret instruction's behaviour where it delivers a
815	 * pending interrupt when enabling interrupts:
816	 */
817	movl	PT_EIP(%esp), %eax
818	cmpl	$xen_iret_start_crit, %eax
819	jb	1f
820	cmpl	$xen_iret_end_crit, %eax
821	jae	1f
822
823	jmp	xen_iret_crit_fixup
824
825ENTRY(xen_do_upcall)
8261:	mov	%esp, %eax
827	call	xen_evtchn_do_upcall
828#ifndef CONFIG_PREEMPT
829	call	xen_maybe_preempt_hcall
830#endif
831	jmp	ret_from_intr
832ENDPROC(xen_hypervisor_callback)
833
834/*
835 * Hypervisor uses this for application faults while it executes.
836 * We get here for two reasons:
837 *  1. Fault while reloading DS, ES, FS or GS
838 *  2. Fault while executing IRET
839 * Category 1 we fix up by reattempting the load, and zeroing the segment
840 * register if the load fails.
841 * Category 2 we fix up by jumping to do_iret_error. We cannot use the
842 * normal Linux return path in this case because if we use the IRET hypercall
843 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
844 * We distinguish between categories by maintaining a status value in EAX.
845 */
846ENTRY(xen_failsafe_callback)
847	pushl	%eax
848	movl	$1, %eax
8491:	mov	4(%esp), %ds
8502:	mov	8(%esp), %es
8513:	mov	12(%esp), %fs
8524:	mov	16(%esp), %gs
853	/* EAX == 0 => Category 1 (Bad segment)
854	   EAX != 0 => Category 2 (Bad IRET) */
855	testl	%eax, %eax
856	popl	%eax
857	lea	16(%esp), %esp
858	jz	5f
859	jmp	iret_exc
8605:	pushl	$-1				/* orig_ax = -1 => not a system call */
861	SAVE_ALL
862	ENCODE_FRAME_POINTER
863	jmp	ret_from_exception
864
865.section .fixup, "ax"
8666:	xorl	%eax, %eax
867	movl	%eax, 4(%esp)
868	jmp	1b
8697:	xorl	%eax, %eax
870	movl	%eax, 8(%esp)
871	jmp	2b
8728:	xorl	%eax, %eax
873	movl	%eax, 12(%esp)
874	jmp	3b
8759:	xorl	%eax, %eax
876	movl	%eax, 16(%esp)
877	jmp	4b
878.previous
879	_ASM_EXTABLE(1b, 6b)
880	_ASM_EXTABLE(2b, 7b)
881	_ASM_EXTABLE(3b, 8b)
882	_ASM_EXTABLE(4b, 9b)
883ENDPROC(xen_failsafe_callback)
884
885BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
886		xen_evtchn_do_upcall)
887
888#endif /* CONFIG_XEN */
889
890#if IS_ENABLED(CONFIG_HYPERV)
891
892BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
893	hyperv_vector_handler)
894
895#endif /* CONFIG_HYPERV */
896
897#ifdef CONFIG_FUNCTION_TRACER
898#ifdef CONFIG_DYNAMIC_FTRACE
899
900ENTRY(mcount)
901	ret
902END(mcount)
903
904ENTRY(ftrace_caller)
905	pushl	%eax
906	pushl	%ecx
907	pushl	%edx
908	pushl	$0				/* Pass NULL as regs pointer */
909	movl	4*4(%esp), %eax
910	movl	0x4(%ebp), %edx
911	movl	function_trace_op, %ecx
912	subl	$MCOUNT_INSN_SIZE, %eax
913
914.globl ftrace_call
915ftrace_call:
916	call	ftrace_stub
917
918	addl	$4, %esp			/* skip NULL pointer */
919	popl	%edx
920	popl	%ecx
921	popl	%eax
922.Lftrace_ret:
923#ifdef CONFIG_FUNCTION_GRAPH_TRACER
924.globl ftrace_graph_call
925ftrace_graph_call:
926	jmp	ftrace_stub
927#endif
928
929/* This is weak to keep gas from relaxing the jumps */
930WEAK(ftrace_stub)
931	ret
932END(ftrace_caller)
933
934ENTRY(ftrace_regs_caller)
935	pushf	/* push flags before compare (in cs location) */
936
937	/*
938	 * i386 does not save SS and ESP when coming from kernel.
939	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
940	 * Unfortunately, that means eflags must be at the same location
941	 * as the current return ip is. We move the return ip into the
942	 * ip location, and move flags into the return ip location.
943	 */
944	pushl	4(%esp)				/* save return ip into ip slot */
945
946	pushl	$0				/* Load 0 into orig_ax */
947	pushl	%gs
948	pushl	%fs
949	pushl	%es
950	pushl	%ds
951	pushl	%eax
952	pushl	%ebp
953	pushl	%edi
954	pushl	%esi
955	pushl	%edx
956	pushl	%ecx
957	pushl	%ebx
958
959	movl	13*4(%esp), %eax		/* Get the saved flags */
960	movl	%eax, 14*4(%esp)		/* Move saved flags into regs->flags location */
961						/* clobbering return ip */
962	movl	$__KERNEL_CS, 13*4(%esp)
963
964	movl	12*4(%esp), %eax		/* Load ip (1st parameter) */
965	subl	$MCOUNT_INSN_SIZE, %eax		/* Adjust ip */
966	movl	0x4(%ebp), %edx			/* Load parent ip (2nd parameter) */
967	movl	function_trace_op, %ecx		/* Save ftrace_pos in 3rd parameter */
968	pushl	%esp				/* Save pt_regs as 4th parameter */
969
970GLOBAL(ftrace_regs_call)
971	call	ftrace_stub
972
973	addl	$4, %esp			/* Skip pt_regs */
974	movl	14*4(%esp), %eax		/* Move flags back into cs */
975	movl	%eax, 13*4(%esp)		/* Needed to keep addl	from modifying flags */
976	movl	12*4(%esp), %eax		/* Get return ip from regs->ip */
977	movl	%eax, 14*4(%esp)		/* Put return ip back for ret */
978
979	popl	%ebx
980	popl	%ecx
981	popl	%edx
982	popl	%esi
983	popl	%edi
984	popl	%ebp
985	popl	%eax
986	popl	%ds
987	popl	%es
988	popl	%fs
989	popl	%gs
990	addl	$8, %esp			/* Skip orig_ax and ip */
991	popf					/* Pop flags at end (no addl to corrupt flags) */
992	jmp	.Lftrace_ret
993
994	popf
995	jmp	ftrace_stub
996#else /* ! CONFIG_DYNAMIC_FTRACE */
997
998ENTRY(mcount)
999	cmpl	$__PAGE_OFFSET, %esp
1000	jb	ftrace_stub			/* Paging not enabled yet? */
1001
1002	cmpl	$ftrace_stub, ftrace_trace_function
1003	jnz	.Ltrace
1004#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1005	cmpl	$ftrace_stub, ftrace_graph_return
1006	jnz	ftrace_graph_caller
1007
1008	cmpl	$ftrace_graph_entry_stub, ftrace_graph_entry
1009	jnz	ftrace_graph_caller
1010#endif
1011.globl ftrace_stub
1012ftrace_stub:
1013	ret
1014
1015	/* taken from glibc */
1016.Ltrace:
1017	pushl	%eax
1018	pushl	%ecx
1019	pushl	%edx
1020	movl	0xc(%esp), %eax
1021	movl	0x4(%ebp), %edx
1022	subl	$MCOUNT_INSN_SIZE, %eax
1023
1024	call	*ftrace_trace_function
1025
1026	popl	%edx
1027	popl	%ecx
1028	popl	%eax
1029	jmp	ftrace_stub
1030END(mcount)
1031#endif /* CONFIG_DYNAMIC_FTRACE */
1032EXPORT_SYMBOL(mcount)
1033#endif /* CONFIG_FUNCTION_TRACER */
1034
1035#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1036ENTRY(ftrace_graph_caller)
1037	pushl	%eax
1038	pushl	%ecx
1039	pushl	%edx
1040	movl	0xc(%esp), %eax
1041	lea	0x4(%ebp), %edx
1042	movl	(%ebp), %ecx
1043	subl	$MCOUNT_INSN_SIZE, %eax
1044	call	prepare_ftrace_return
1045	popl	%edx
1046	popl	%ecx
1047	popl	%eax
1048	ret
1049END(ftrace_graph_caller)
1050
1051.globl return_to_handler
1052return_to_handler:
1053	pushl	%eax
1054	pushl	%edx
1055	movl	%ebp, %eax
1056	call	ftrace_return_to_handler
1057	movl	%eax, %ecx
1058	popl	%edx
1059	popl	%eax
1060	jmp	*%ecx
1061#endif
1062
1063#ifdef CONFIG_TRACING
1064ENTRY(trace_page_fault)
1065	ASM_CLAC
1066	pushl	$trace_do_page_fault
1067	jmp	common_exception
1068END(trace_page_fault)
1069#endif
1070
1071ENTRY(page_fault)
1072	ASM_CLAC
1073	pushl	$do_page_fault
1074	ALIGN
1075	jmp common_exception
1076END(page_fault)
1077
1078common_exception:
1079	/* the function address is in %gs's slot on the stack */
1080	pushl	%fs
1081	pushl	%es
1082	pushl	%ds
1083	pushl	%eax
1084	pushl	%ebp
1085	pushl	%edi
1086	pushl	%esi
1087	pushl	%edx
1088	pushl	%ecx
1089	pushl	%ebx
1090	ENCODE_FRAME_POINTER
1091	cld
1092	movl	$(__KERNEL_PERCPU), %ecx
1093	movl	%ecx, %fs
1094	UNWIND_ESPFIX_STACK
1095	GS_TO_REG %ecx
1096	movl	PT_GS(%esp), %edi		# get the function address
1097	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
1098	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
1099	REG_TO_PTGS %ecx
1100	SET_KERNEL_GS %ecx
1101	movl	$(__USER_DS), %ecx
1102	movl	%ecx, %ds
1103	movl	%ecx, %es
1104	TRACE_IRQS_OFF
1105	movl	%esp, %eax			# pt_regs pointer
1106	call	*%edi
1107	jmp	ret_from_exception
1108END(common_exception)
1109
1110ENTRY(debug)
1111	/*
1112	 * #DB can happen at the first instruction of
1113	 * entry_SYSENTER_32 or in Xen's SYSENTER prologue.  If this
1114	 * happens, then we will be running on a very small stack.  We
1115	 * need to detect this condition and switch to the thread
1116	 * stack before calling any C code at all.
1117	 *
1118	 * If you edit this code, keep in mind that NMIs can happen in here.
1119	 */
1120	ASM_CLAC
1121	pushl	$-1				# mark this as an int
1122	SAVE_ALL
1123	ENCODE_FRAME_POINTER
1124	xorl	%edx, %edx			# error code 0
1125	movl	%esp, %eax			# pt_regs pointer
1126
1127	/* Are we currently on the SYSENTER stack? */
1128	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
1129	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
1130	cmpl	$SIZEOF_SYSENTER_stack, %ecx
1131	jb	.Ldebug_from_sysenter_stack
1132
1133	TRACE_IRQS_OFF
1134	call	do_debug
1135	jmp	ret_from_exception
1136
1137.Ldebug_from_sysenter_stack:
1138	/* We're on the SYSENTER stack.  Switch off. */
1139	movl	%esp, %ebx
1140	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1141	TRACE_IRQS_OFF
1142	call	do_debug
1143	movl	%ebx, %esp
1144	jmp	ret_from_exception
1145END(debug)
1146
1147/*
1148 * NMI is doubly nasty.  It can happen on the first instruction of
1149 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
1150 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
1151 * switched stacks.  We handle both conditions by simply checking whether we
1152 * interrupted kernel code running on the SYSENTER stack.
1153 */
1154ENTRY(nmi)
1155	ASM_CLAC
1156#ifdef CONFIG_X86_ESPFIX32
1157	pushl	%eax
1158	movl	%ss, %eax
1159	cmpw	$__ESPFIX_SS, %ax
1160	popl	%eax
1161	je	.Lnmi_espfix_stack
1162#endif
1163
1164	pushl	%eax				# pt_regs->orig_ax
1165	SAVE_ALL
1166	ENCODE_FRAME_POINTER
1167	xorl	%edx, %edx			# zero error code
1168	movl	%esp, %eax			# pt_regs pointer
1169
1170	/* Are we currently on the SYSENTER stack? */
1171	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
1172	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
1173	cmpl	$SIZEOF_SYSENTER_stack, %ecx
1174	jb	.Lnmi_from_sysenter_stack
1175
1176	/* Not on SYSENTER stack. */
1177	call	do_nmi
1178	jmp	.Lrestore_all_notrace
1179
1180.Lnmi_from_sysenter_stack:
1181	/*
1182	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
1183	 * is using the thread stack right now, so it's safe for us to use it.
1184	 */
1185	movl	%esp, %ebx
1186	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1187	call	do_nmi
1188	movl	%ebx, %esp
1189	jmp	.Lrestore_all_notrace
1190
1191#ifdef CONFIG_X86_ESPFIX32
1192.Lnmi_espfix_stack:
1193	/*
1194	 * create the pointer to lss back
1195	 */
1196	pushl	%ss
1197	pushl	%esp
1198	addl	$4, (%esp)
1199	/* copy the iret frame of 12 bytes */
1200	.rept 3
1201	pushl	16(%esp)
1202	.endr
1203	pushl	%eax
1204	SAVE_ALL
1205	ENCODE_FRAME_POINTER
1206	FIXUP_ESPFIX_STACK			# %eax == %esp
1207	xorl	%edx, %edx			# zero error code
1208	call	do_nmi
1209	RESTORE_REGS
1210	lss	12+4(%esp), %esp		# back to espfix stack
1211	jmp	.Lirq_return
1212#endif
1213END(nmi)
1214
1215ENTRY(int3)
1216	ASM_CLAC
1217	pushl	$-1				# mark this as an int
1218	SAVE_ALL
1219	ENCODE_FRAME_POINTER
1220	TRACE_IRQS_OFF
1221	xorl	%edx, %edx			# zero error code
1222	movl	%esp, %eax			# pt_regs pointer
1223	call	do_int3
1224	jmp	ret_from_exception
1225END(int3)
1226
1227ENTRY(general_protection)
1228	pushl	$do_general_protection
1229	jmp	common_exception
1230END(general_protection)
1231
1232#ifdef CONFIG_KVM_GUEST
1233ENTRY(async_page_fault)
1234	ASM_CLAC
1235	pushl	$do_async_page_fault
1236	jmp	common_exception
1237END(async_page_fault)
1238#endif
1239
1240ENTRY(rewind_stack_do_exit)
1241	/* Prevent any naive code from trying to unwind to our caller. */
1242	xorl	%ebp, %ebp
1243
1244	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
1245	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
1246
1247	call	do_exit
12481:	jmp 1b
1249END(rewind_stack_do_exit)
1250