1/* 2 * Compatibility mode system call entry point for x86-64. 3 * 4 * Copyright 2000-2002 Andi Kleen, SuSE Labs. 5 */ 6#include "calling.h" 7#include <asm/asm-offsets.h> 8#include <asm/current.h> 9#include <asm/errno.h> 10#include <asm/ia32_unistd.h> 11#include <asm/thread_info.h> 12#include <asm/segment.h> 13#include <asm/irqflags.h> 14#include <asm/asm.h> 15#include <asm/smap.h> 16#include <linux/linkage.h> 17#include <linux/err.h> 18 19 .section .entry.text, "ax" 20 21#ifdef CONFIG_PARAVIRT 22ENTRY(native_usergs_sysret32) 23 swapgs 24 sysretl 25ENDPROC(native_usergs_sysret32) 26#endif 27 28/* 29 * 32-bit SYSENTER instruction entry. 30 * 31 * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. 32 * IF and VM in rflags are cleared (IOW: interrupts are off). 33 * SYSENTER does not save anything on the stack, 34 * and does not save old rip (!!!) and rflags. 35 * 36 * Arguments: 37 * eax system call number 38 * ebx arg1 39 * ecx arg2 40 * edx arg3 41 * esi arg4 42 * edi arg5 43 * ebp user stack 44 * 0(%ebp) arg6 45 * 46 * This is purely a fast path. For anything complicated we use the int 0x80 47 * path below. We set up a complete hardware stack frame to share code 48 * with the int 0x80 path. 49 */ 50ENTRY(entry_SYSENTER_compat) 51 /* Interrupts are off on entry. */ 52 SWAPGS_UNSAFE_STACK 53 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 54 55 /* 56 * User tracing code (ptrace or signal handlers) might assume that 57 * the saved RAX contains a 32-bit number when we're invoking a 32-bit 58 * syscall. Just in case the high bits are nonzero, zero-extend 59 * the syscall number. (This could almost certainly be deleted 60 * with no ill effects.) 61 */ 62 movl %eax, %eax 63 64 /* Construct struct pt_regs on stack */ 65 pushq $__USER32_DS /* pt_regs->ss */ 66 pushq %rbp /* pt_regs->sp (stashed in bp) */ 67 68 /* 69 * Push flags. This is nasty. First, interrupts are currently 70 * off, but we need pt_regs->flags to have IF set. Second, even 71 * if TF was set when SYSENTER started, it's clear by now. We fix 72 * that later using TIF_SINGLESTEP. 73 */ 74 pushfq /* pt_regs->flags (except IF = 0) */ 75 orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ 76 ASM_CLAC /* Clear AC after saving FLAGS */ 77 78 pushq $__USER32_CS /* pt_regs->cs */ 79 xorq %r8,%r8 80 pushq %r8 /* pt_regs->ip = 0 (placeholder) */ 81 pushq %rax /* pt_regs->orig_ax */ 82 pushq %rdi /* pt_regs->di */ 83 pushq %rsi /* pt_regs->si */ 84 pushq %rdx /* pt_regs->dx */ 85 pushq %rcx /* pt_regs->cx */ 86 pushq $-ENOSYS /* pt_regs->ax */ 87 pushq %r8 /* pt_regs->r8 = 0 */ 88 pushq %r8 /* pt_regs->r9 = 0 */ 89 pushq %r8 /* pt_regs->r10 = 0 */ 90 pushq %r8 /* pt_regs->r11 = 0 */ 91 pushq %rbx /* pt_regs->rbx */ 92 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 93 pushq %r8 /* pt_regs->r12 = 0 */ 94 pushq %r8 /* pt_regs->r13 = 0 */ 95 pushq %r8 /* pt_regs->r14 = 0 */ 96 pushq %r8 /* pt_regs->r15 = 0 */ 97 cld 98 99 /* 100 * Sysenter doesn't filter flags, so we need to clear NT 101 * ourselves. To save a few cycles, we can check whether 102 * NT was set instead of doing an unconditional popfq. 103 * This needs to happen before enabling interrupts so that 104 * we don't get preempted with NT set. 105 * 106 * NB.: sysenter_fix_flags is a label with the code under it moved 107 * out-of-line as an optimization: NT is unlikely to be set in the 108 * majority of the cases and instead of polluting the I$ unnecessarily, 109 * we're keeping that code behind a branch which will predict as 110 * not-taken and therefore its instructions won't be fetched. 111 */ 112 testl $X86_EFLAGS_NT, EFLAGS(%rsp) 113 jnz sysenter_fix_flags 114sysenter_flags_fixed: 115 116 /* 117 * User mode is traced as though IRQs are on, and SYSENTER 118 * turned them off. 119 */ 120 TRACE_IRQS_OFF 121 122 movq %rsp, %rdi 123 call do_fast_syscall_32 124 /* XEN PV guests always use IRET path */ 125 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 126 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 127 jmp sysret32_from_system_call 128 129sysenter_fix_flags: 130 pushq $X86_EFLAGS_FIXED 131 popfq 132 jmp sysenter_flags_fixed 133ENDPROC(entry_SYSENTER_compat) 134 135/* 136 * 32-bit SYSCALL instruction entry. 137 * 138 * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, 139 * then loads new ss, cs, and rip from previously programmed MSRs. 140 * rflags gets masked by a value from another MSR (so CLD and CLAC 141 * are not needed). SYSCALL does not save anything on the stack 142 * and does not change rsp. 143 * 144 * Note: rflags saving+masking-with-MSR happens only in Long mode 145 * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). 146 * Don't get confused: rflags saving+masking depends on Long Mode Active bit 147 * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes 148 * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). 149 * 150 * Arguments: 151 * eax system call number 152 * ecx return address 153 * ebx arg1 154 * ebp arg2 (note: not saved in the stack frame, should not be touched) 155 * edx arg3 156 * esi arg4 157 * edi arg5 158 * esp user stack 159 * 0(%esp) arg6 160 */ 161ENTRY(entry_SYSCALL_compat) 162 /* Interrupts are off on entry. */ 163 SWAPGS_UNSAFE_STACK 164 165 /* Stash user ESP and switch to the kernel stack. */ 166 movl %esp, %r8d 167 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 168 169 /* Zero-extending 32-bit regs, do not remove */ 170 movl %eax, %eax 171 172 /* Construct struct pt_regs on stack */ 173 pushq $__USER32_DS /* pt_regs->ss */ 174 pushq %r8 /* pt_regs->sp */ 175 pushq %r11 /* pt_regs->flags */ 176 pushq $__USER32_CS /* pt_regs->cs */ 177 pushq %rcx /* pt_regs->ip */ 178 pushq %rax /* pt_regs->orig_ax */ 179 pushq %rdi /* pt_regs->di */ 180 pushq %rsi /* pt_regs->si */ 181 pushq %rdx /* pt_regs->dx */ 182 pushq %rbp /* pt_regs->cx (stashed in bp) */ 183 pushq $-ENOSYS /* pt_regs->ax */ 184 xorq %r8,%r8 185 pushq %r8 /* pt_regs->r8 = 0 */ 186 pushq %r8 /* pt_regs->r9 = 0 */ 187 pushq %r8 /* pt_regs->r10 = 0 */ 188 pushq %r8 /* pt_regs->r11 = 0 */ 189 pushq %rbx /* pt_regs->rbx */ 190 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 191 pushq %r8 /* pt_regs->r12 = 0 */ 192 pushq %r8 /* pt_regs->r13 = 0 */ 193 pushq %r8 /* pt_regs->r14 = 0 */ 194 pushq %r8 /* pt_regs->r15 = 0 */ 195 196 /* 197 * User mode is traced as though IRQs are on, and SYSENTER 198 * turned them off. 199 */ 200 TRACE_IRQS_OFF 201 202 movq %rsp, %rdi 203 call do_fast_syscall_32 204 /* XEN PV guests always use IRET path */ 205 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 206 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 207 208 /* Opportunistic SYSRET */ 209sysret32_from_system_call: 210 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 211 movq RBX(%rsp), %rbx /* pt_regs->rbx */ 212 movq RBP(%rsp), %rbp /* pt_regs->rbp */ 213 movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ 214 movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */ 215 addq $RAX, %rsp /* Skip r8-r15 */ 216 popq %rax /* pt_regs->rax */ 217 popq %rdx /* Skip pt_regs->cx */ 218 popq %rdx /* pt_regs->dx */ 219 popq %rsi /* pt_regs->si */ 220 popq %rdi /* pt_regs->di */ 221 222 /* 223 * USERGS_SYSRET32 does: 224 * GSBASE = user's GS base 225 * EIP = ECX 226 * RFLAGS = R11 227 * CS = __USER32_CS 228 * SS = __USER_DS 229 * 230 * ECX will not match pt_regs->cx, but we're returning to a vDSO 231 * trampoline that will fix up RCX, so this is okay. 232 * 233 * R12-R15 are callee-saved, so they contain whatever was in them 234 * when the system call started, which is already known to user 235 * code. We zero R8-R10 to avoid info leaks. 236 */ 237 xorq %r8, %r8 238 xorq %r9, %r9 239 xorq %r10, %r10 240 movq RSP-ORIG_RAX(%rsp), %rsp 241 USERGS_SYSRET32 242END(entry_SYSCALL_compat) 243 244/* 245 * Emulated IA32 system calls via int 0x80. 246 * 247 * Arguments: 248 * eax system call number 249 * ebx arg1 250 * ecx arg2 251 * edx arg3 252 * esi arg4 253 * edi arg5 254 * ebp arg6 (note: not saved in the stack frame, should not be touched) 255 * 256 * Notes: 257 * Uses the same stack frame as the x86-64 version. 258 * All registers except eax must be saved (but ptrace may violate that). 259 * Arguments are zero extended. For system calls that want sign extension and 260 * take long arguments a wrapper is needed. Most calls can just be called 261 * directly. 262 * Assumes it is only called from user space and entered with interrupts off. 263 */ 264 265ENTRY(entry_INT80_compat) 266 /* 267 * Interrupts are off on entry. 268 */ 269 PARAVIRT_ADJUST_EXCEPTION_FRAME 270 SWAPGS 271 272 /* 273 * User tracing code (ptrace or signal handlers) might assume that 274 * the saved RAX contains a 32-bit number when we're invoking a 32-bit 275 * syscall. Just in case the high bits are nonzero, zero-extend 276 * the syscall number. (This could almost certainly be deleted 277 * with no ill effects.) 278 */ 279 movl %eax, %eax 280 281 /* Construct struct pt_regs on stack (iret frame is already on stack) */ 282 pushq %rax /* pt_regs->orig_ax */ 283 pushq %rdi /* pt_regs->di */ 284 pushq %rsi /* pt_regs->si */ 285 pushq %rdx /* pt_regs->dx */ 286 pushq %rcx /* pt_regs->cx */ 287 pushq $-ENOSYS /* pt_regs->ax */ 288 xorq %r8,%r8 289 pushq %r8 /* pt_regs->r8 = 0 */ 290 pushq %r8 /* pt_regs->r9 = 0 */ 291 pushq %r8 /* pt_regs->r10 = 0 */ 292 pushq %r8 /* pt_regs->r11 = 0 */ 293 pushq %rbx /* pt_regs->rbx */ 294 pushq %rbp /* pt_regs->rbp */ 295 pushq %r12 /* pt_regs->r12 */ 296 pushq %r13 /* pt_regs->r13 */ 297 pushq %r14 /* pt_regs->r14 */ 298 pushq %r15 /* pt_regs->r15 */ 299 cld 300 301 /* 302 * User mode is traced as though IRQs are on, and the interrupt 303 * gate turned them off. 304 */ 305 TRACE_IRQS_OFF 306 307 movq %rsp, %rdi 308 call do_syscall_32_irqs_off 309.Lsyscall_32_done: 310 311 /* Go back to user mode. */ 312 TRACE_IRQS_ON 313 SWAPGS 314 jmp restore_regs_and_iret 315END(entry_INT80_compat) 316 317 ALIGN 318GLOBAL(stub32_clone) 319 /* 320 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). 321 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). 322 * 323 * The native 64-bit kernel's sys_clone() implements the latter, 324 * so we need to swap arguments here before calling it: 325 */ 326 xchg %r8, %rcx 327 jmp sys_clone 328