xref: /titanic_51/usr/src/uts/i86pc/ml/syscall_asm_amd64.s (revision 76b27f93c5149222cfc6babb8a5b4f1a06a4ead5)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/asm_linkage.h>
29#include <sys/asm_misc.h>
30#include <sys/regset.h>
31#include <sys/privregs.h>
32#include <sys/psw.h>
33#include <sys/machbrand.h>
34
35#if defined(__lint)
36
37#include <sys/types.h>
38#include <sys/thread.h>
39#include <sys/systm.h>
40
41#else	/* __lint */
42
43#include <sys/segments.h>
44#include <sys/pcb.h>
45#include <sys/trap.h>
46#include <sys/ftrace.h>
47#include <sys/traptrace.h>
48#include <sys/clock.h>
49#include <sys/model.h>
50#include <sys/panic.h>
51
52#if defined(__xpv)
53#include <sys/hypervisor.h>
54#endif
55
56#include "assym.h"
57
58#endif	/* __lint */
59
60/*
61 * We implement five flavours of system call entry points
62 *
63 * -	syscall/sysretq		(amd64 generic)
64 * -	syscall/sysretl		(i386 plus SYSC bit)
65 * -	sysenter/sysexit	(i386 plus SEP bit)
66 * -	int/iret		(i386 generic)
67 * -	lcall/iret		(i386 generic)
68 *
69 * The current libc included in Solaris uses int/iret as the base unoptimized
70 * kernel entry method. Older libc implementations and legacy binaries may use
71 * the lcall call gate, so it must continue to be supported.
72 *
73 * System calls that use an lcall call gate are processed in trap() via a
74 * segment-not-present trap, i.e. lcalls are extremely slow(!).
75 *
76 * The basic pattern used in the 32-bit SYSC handler at this point in time is
77 * to have the bare minimum of assembler, and get to the C handlers as
78 * quickly as possible.
79 *
80 * The 64-bit handler is much closer to the sparcv9 handler; that's
81 * because of passing arguments in registers.  The 32-bit world still
82 * passes arguments on the stack -- that makes that handler substantially
83 * more complex.
84 *
85 * The two handlers share a few code fragments which are broken
86 * out into preprocessor macros below.
87 *
88 * XX64	come back and speed all this up later.  The 32-bit stuff looks
89 * especially easy to speed up the argument copying part ..
90 *
91 *
92 * Notes about segment register usage (c.f. the 32-bit kernel)
93 *
94 * In the 32-bit kernel, segment registers are dutifully saved and
95 * restored on all mode transitions because the kernel uses them directly.
96 * When the processor is running in 64-bit mode, segment registers are
97 * largely ignored.
98 *
99 * %cs and %ss
100 *	controlled by the hardware mechanisms that make mode transitions
101 *
102 * The remaining segment registers have to either be pointing at a valid
103 * descriptor i.e. with the 'present' bit set, or they can NULL descriptors
104 *
105 * %ds and %es
106 *	always ignored
107 *
108 * %fs and %gs
109 *	fsbase and gsbase are used to control the place they really point at.
110 *	The kernel only depends on %gs, and controls its own gsbase via swapgs
111 *
112 * Note that loading segment registers is still costly because the GDT
113 * lookup still happens (this is because the hardware can't know that we're
114 * not setting up these segment registers for a 32-bit program).  Thus we
115 * avoid doing this in the syscall path, and defer them to lwp context switch
116 * handlers, so the register values remain virtualized to the lwp.
117 */
118
119#if defined(SYSCALLTRACE)
120#define	ORL_SYSCALLTRACE(r32)		\
121	orl	syscalltrace(%rip), r32
122#else
123#define	ORL_SYSCALLTRACE(r32)
124#endif
125
126/*
127 * In the 32-bit kernel, we do absolutely nothing before getting into the
128 * brand callback checks.  In 64-bit land, we do swapgs and then come here.
129 * We assume that the %rsp- and %r15-stashing fields in the CPU structure
130 * are still unused.
131 *
132 * When the callback is invoked, we will be on the user's %gs and
133 * the stack will look like this:
134 *
135 * stack:  --------------------------------------
136 *         | callback pointer			|
137 *    |    | user stack pointer			|
138 *    |    | lwp pointer			|
139 *    v    | userland return address		|
140 *         | callback wrapper return addr	|
141 *         --------------------------------------
142 *
143 */
144#define	BRAND_CALLBACK(callback_id)					    \
145	movq	%rsp, %gs:CPU_RTMP_RSP	/* save the stack pointer	*/ ;\
146	movq	%r15, %gs:CPU_RTMP_R15	/* save %r15			*/ ;\
147	movq	%gs:CPU_THREAD, %r15	/* load the thread pointer	*/ ;\
148	movq	T_STACK(%r15), %rsp	/* switch to the kernel stack	*/ ;\
149	subq	$16, %rsp		/* save space for two pointers	*/ ;\
150	pushq	%r14			/* save %r14			*/ ;\
151	movq	%gs:CPU_RTMP_RSP, %r14					   ;\
152	movq	%r14, 8(%rsp)		/* stash the user stack pointer	*/ ;\
153	popq	%r14			/* restore %r14			*/ ;\
154	movq	T_LWP(%r15), %r15	/* load the lwp pointer		*/ ;\
155	pushq	%r15			/* push the lwp pointer		*/ ;\
156	movq	LWP_PROCP(%r15), %r15	/* load the proc pointer	*/ ;\
157	movq	P_BRAND(%r15), %r15	/* load the brand pointer	*/ ;\
158	movq	B_MACHOPS(%r15), %r15	/* load the machops pointer	*/ ;\
159	movq	_CONST(_MUL(callback_id, CPTRSIZE))(%r15), %r15		   ;\
160	cmpq	$0, %r15						   ;\
161	je	1f							   ;\
162	movq	%r15, 16(%rsp)		/* save the callback pointer	*/ ;\
163	movq	%gs:CPU_RTMP_RSP, %r15	/* grab the user stack pointer	*/ ;\
164	pushq	(%r15)			/* push the return address	*/ ;\
165	movq	%gs:CPU_RTMP_R15, %r15	/* restore %r15			*/ ;\
166	SWAPGS				/* user gsbase                  */ ;\
167	call	*24(%rsp)		/* call callback		*/ ;\
168	SWAPGS				/* kernel gsbase		*/ ;\
1691:	movq	%gs:CPU_RTMP_R15, %r15	/* restore %r15			*/ ;\
170	movq	%gs:CPU_RTMP_RSP, %rsp	/* restore the stack pointer	*/
171
172#define	MSTATE_TRANSITION(from, to)		\
173	movl	$from, %edi;			\
174	movl	$to, %esi;			\
175	call	syscall_mstate
176
177/*
178 * Check to see if a simple (direct) return is possible i.e.
179 *
180 *	if (t->t_post_sys_ast | syscalltrace |
181 *	    lwp->lwp_pcb.pcb_rupdate == 1)
182 *		do full version	;
183 *
184 * Preconditions:
185 * -	t is curthread
186 * Postconditions:
187 * -	condition code NE is set if post-sys is too complex
188 * -	rtmp is zeroed if it isn't (we rely on this!)
189 * -	ltmp is smashed
190 */
191#define	CHECK_POSTSYS_NE(t, ltmp, rtmp)			\
192	movq	T_LWP(t), ltmp;				\
193	movzbl	PCB_RUPDATE(ltmp), rtmp;		\
194	ORL_SYSCALLTRACE(rtmp);				\
195	orl	T_POST_SYS_AST(t), rtmp;		\
196	cmpl	$0, rtmp
197
198/*
199 * Fix up the lwp, thread, and eflags for a successful return
200 *
201 * Preconditions:
202 * -	zwreg contains zero
203 */
204#define	SIMPLE_SYSCALL_POSTSYS(t, lwp, zwreg)		\
205	movb	$LWP_USER, LWP_STATE(lwp);		\
206	movw	zwreg, T_SYSNUM(t);			\
207	andb	$_CONST(0xffff - PS_C), REGOFF_RFL(%rsp)
208
209/*
210 * ASSERT(lwptoregs(lwp) == rp);
211 *
212 * This may seem obvious, but very odd things happen if this
213 * assertion is false
214 *
215 * Preconditions:
216 *	(%rsp is ready for normal call sequence)
217 * Postconditions (if assertion is true):
218 *	%r11 is smashed
219 *
220 * ASSERT(rp->r_cs == descnum)
221 *
222 * The code selector is written into the regs structure when the
223 * lwp stack is created.  We use this ASSERT to validate that
224 * the regs structure really matches how we came in.
225 *
226 * Preconditions:
227 *	(%rsp is ready for normal call sequence)
228 * Postconditions (if assertion is true):
229 *	-none-
230 *
231 * ASSERT(lwp->lwp_pcb.pcb_rupdate == 0);
232 *
233 * If this is false, it meant that we returned to userland without
234 * updating the segment registers as we were supposed to.
235 *
236 * Note that we must ensure no interrupts or other traps intervene
237 * between entering privileged mode and performing the assertion,
238 * otherwise we may perform a context switch on the thread, which
239 * will end up setting pcb_rupdate to 1 again.
240 */
241#if defined(DEBUG)
242
243#if !defined(__lint)
244
245__lwptoregs_msg:
246	.string	"%M%:%d lwptoregs(%p) [%p] != rp [%p]"
247
248__codesel_msg:
249	.string	"%M%:%d rp->r_cs [%ld] != %ld"
250
251__no_rupdate_msg:
252	.string	"%M%:%d lwp %p, pcb_rupdate != 0"
253
254#endif	/* !__lint */
255
256#define	ASSERT_LWPTOREGS(lwp, rp)			\
257	movq	LWP_REGS(lwp), %r11;			\
258	cmpq	rp, %r11;				\
259	je	7f;					\
260	leaq	__lwptoregs_msg(%rip), %rdi;		\
261	movl	$__LINE__, %esi;			\
262	movq	lwp, %rdx;				\
263	movq	%r11, %rcx;				\
264	movq	rp, %r8;				\
265	xorl	%eax, %eax;				\
266	call	panic;					\
2677:
268
269#define	ASSERT_NO_RUPDATE_PENDING(lwp)			\
270	testb	$0x1, PCB_RUPDATE(lwp);			\
271	je	8f;					\
272	movq	lwp, %rdx;				\
273	leaq	__no_rupdate_msg(%rip), %rdi;		\
274	movl	$__LINE__, %esi;			\
275	xorl	%eax, %eax;				\
276	call	panic;					\
2778:
278
279#else
280#define	ASSERT_LWPTOREGS(lwp, rp)
281#define	ASSERT_NO_RUPDATE_PENDING(lwp)
282#endif
283
284/*
285 * Do the traptrace thing and restore any registers we used
286 * in situ.  Assumes that %rsp is pointing at the base of
287 * the struct regs, obviously ..
288 */
289#ifdef TRAPTRACE
290#define	SYSCALL_TRAPTRACE(ttype)				\
291	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, ttype);		\
292	TRACE_REGS(%rdi, %rsp, %rbx, %rcx);			\
293	TRACE_STAMP(%rdi);	/* rdtsc clobbers %eax, %edx */	\
294	movq	REGOFF_RAX(%rsp), %rax;				\
295	movq	REGOFF_RBX(%rsp), %rbx;				\
296	movq	REGOFF_RCX(%rsp), %rcx;				\
297	movq	REGOFF_RDX(%rsp), %rdx;				\
298	movl	%eax, TTR_SYSNUM(%rdi);				\
299	movq	REGOFF_RDI(%rsp), %rdi
300
301#define	SYSCALL_TRAPTRACE32(ttype)				\
302	SYSCALL_TRAPTRACE(ttype);				\
303	/* paranoia: clean the top 32-bits of the registers */	\
304	orl	%eax, %eax;					\
305	orl	%ebx, %ebx;					\
306	orl	%ecx, %ecx;					\
307	orl	%edx, %edx;					\
308	orl	%edi, %edi
309#else	/* TRAPTRACE */
310#define	SYSCALL_TRAPTRACE(ttype)
311#define	SYSCALL_TRAPTRACE32(ttype)
312#endif	/* TRAPTRACE */
313
314/*
315 * The 64-bit libc syscall wrapper does this:
316 *
317 * fn(<args>)
318 * {
319 *	movq	%rcx, %r10	-- because syscall smashes %rcx
320 *	movl	$CODE, %eax
321 *	syscall
322 *	<error processing>
323 * }
324 *
325 * Thus when we come into the kernel:
326 *
327 *	%rdi, %rsi, %rdx, %r10, %r8, %r9 contain first six args
328 *	%rax is the syscall number
329 *	%r12-%r15 contain caller state
330 *
331 * The syscall instruction arranges that:
332 *
333 *	%rcx contains the return %rip
334 *	%r11d contains bottom 32-bits of %rflags
335 *	%rflags is masked (as determined by the SFMASK msr)
336 *	%cs is set to UCS_SEL (as determined by the STAR msr)
337 *	%ss is set to UDS_SEL (as determined by the STAR msr)
338 *	%rip is set to sys_syscall (as determined by the LSTAR msr)
339 *
340 * Or in other words, we have no registers available at all.
341 * Only swapgs can save us!
342 */
343
344#if defined(__lint)
345
346/*ARGSUSED*/
347void
348sys_syscall()
349{}
350
351void
352_allsyscalls()
353{}
354
355size_t _allsyscalls_size;
356
357#else	/* __lint */
358
359	ENTRY_NP2(brand_sys_syscall,_allsyscalls)
360	SWAPGS				/* kernel gsbase */
361	XPV_TRAP_POP
362	BRAND_CALLBACK(BRAND_CB_SYSCALL)
363	SWAPGS				/* user gsbase */
364
365#if defined(__xpv)
366	/*
367	 * Note that swapgs is handled for us by the hypervisor. Here
368	 * it is empty.
369	 */
370	jmp	nopop_sys_syscall
371#endif
372
373	ALTENTRY(sys_syscall)
374	SWAPGS				/* kernel gsbase */
375#if defined(__xpv)
376	/*
377	 * Even though we got here by a syscall instruction from user land
378	 * the hypervisor constructs our stack the same way as is done
379	 * for interrupt gates. The only exception is that it pushes kernel
380	 * cs and ss instead of user cs and ss for some reason.  This is all
381	 * different from running native on the metal.
382	 *
383	 * Stack on entry:
384	 *      (0x0)rsp	rcx	(user rip)
385	 *      (0x8)rsp	r11	(user rflags)
386	 *      (0x10)rsp	user rip
387	 *      (0x18)rsp	kernel cs
388	 *      (0x20)rsp	user rflags
389	 *      (0x28)rsp	user rsp
390	 *      (0x30)rsp	kernel ss
391	 */
392
393	XPV_TRAP_POP
394nopop_sys_syscall:
395	ASSERT_UPCALL_MASK_IS_SET
396
397	movq	%r15, %gs:CPU_RTMP_R15
398	movq	0x18(%rsp), %r15		/* save user stack */
399	movq	%r15, %gs:CPU_RTMP_RSP
400#else
401	movq	%r15, %gs:CPU_RTMP_R15
402	movq	%rsp, %gs:CPU_RTMP_RSP
403#endif	/* __xpv */
404
405	movq	%gs:CPU_THREAD, %r15
406	movq	T_STACK(%r15), %rsp
407
408	movl	$UCS_SEL, REGOFF_CS(%rsp)
409	movq	%rcx, REGOFF_RIP(%rsp)		/* syscall: %rip -> %rcx */
410	movq	%r11, REGOFF_RFL(%rsp)		/* syscall: %rfl -> %r11d */
411	movl	$UDS_SEL, REGOFF_SS(%rsp)
412
413	movl	%eax, %eax			/* wrapper: sysc# -> %eax */
414	movq	%rdi, REGOFF_RDI(%rsp)
415	movq	%rsi, REGOFF_RSI(%rsp)
416	movq	%rdx, REGOFF_RDX(%rsp)
417	movq	%r10, REGOFF_RCX(%rsp)		/* wrapper: %rcx -> %r10 */
418	movq	%r10, %rcx			/* arg[3] for direct calls */
419
420	movq	%r8, REGOFF_R8(%rsp)
421	movq	%r9, REGOFF_R9(%rsp)
422	movq	%rax, REGOFF_RAX(%rsp)
423	movq	%rbx, REGOFF_RBX(%rsp)
424
425	movq	%rbp, REGOFF_RBP(%rsp)
426	movq	%r10, REGOFF_R10(%rsp)
427	movq	%gs:CPU_RTMP_RSP, %r11
428	movq	%r11, REGOFF_RSP(%rsp)
429	movq	%r12, REGOFF_R12(%rsp)
430
431	movq	%r13, REGOFF_R13(%rsp)
432	movq	%r14, REGOFF_R14(%rsp)
433	movq	%gs:CPU_RTMP_R15, %r10
434	movq	%r10, REGOFF_R15(%rsp)
435	movq	$0, REGOFF_SAVFP(%rsp)
436	movq	$0, REGOFF_SAVPC(%rsp)
437
438	/*
439	 * Copy these registers here in case we end up stopped with
440	 * someone (like, say, /proc) messing with our register state.
441	 * We don't -restore- them unless we have to in update_sregs.
442	 *
443	 * Since userland -can't- change fsbase or gsbase directly,
444	 * and capturing them involves two serializing instructions,
445	 * we don't bother to capture them here.
446	 */
447	xorl	%ebx, %ebx
448	movw	%ds, %bx
449	movq	%rbx, REGOFF_DS(%rsp)
450	movw	%es, %bx
451	movq	%rbx, REGOFF_ES(%rsp)
452	movw	%fs, %bx
453	movq	%rbx, REGOFF_FS(%rsp)
454	movw	%gs, %bx
455	movq	%rbx, REGOFF_GS(%rsp)
456
457	/*
458	 * Machine state saved in the regs structure on the stack
459	 * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
460	 * %eax is the syscall number
461	 * %rsp is the thread's stack, %r15 is curthread
462	 * REG_RSP(%rsp) is the user's stack
463	 */
464
465	SYSCALL_TRAPTRACE($TT_SYSC64)
466
467	movq	%rsp, %rbp
468
469	movq	T_LWP(%r15), %r14
470	ASSERT_NO_RUPDATE_PENDING(%r14)
471	ENABLE_INTR_FLAGS
472
473	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
474	movl	REGOFF_RAX(%rsp), %eax	/* (%rax damaged by mstate call) */
475
476	ASSERT_LWPTOREGS(%r14, %rsp)
477
478	movb	$LWP_SYS, LWP_STATE(%r14)
479	incq	LWP_RU_SYSC(%r14)
480	movb	$NORMALRETURN, LWP_EOSYS(%r14)
481
482	incq	%gs:CPU_STATS_SYS_SYSCALL
483
484	movw	%ax, T_SYSNUM(%r15)
485	movzbl	T_PRE_SYS(%r15), %ebx
486	ORL_SYSCALLTRACE(%ebx)
487	testl	%ebx, %ebx
488	jne	_syscall_pre
489
490_syscall_invoke:
491	movq	REGOFF_RDI(%rbp), %rdi
492	movq	REGOFF_RSI(%rbp), %rsi
493	movq	REGOFF_RDX(%rbp), %rdx
494	movq	REGOFF_RCX(%rbp), %rcx
495	movq	REGOFF_R8(%rbp), %r8
496	movq	REGOFF_R9(%rbp), %r9
497
498	cmpl	$NSYSCALL, %eax
499	jae	_syscall_ill
500	shll	$SYSENT_SIZE_SHIFT, %eax
501	leaq	sysent(%rax), %rbx
502
503	call	*SY_CALLC(%rbx)
504
505	movq	%rax, %r12
506	movq	%rdx, %r13
507
508	/*
509	 * If the handler returns two ints, then we need to split the
510	 * 64-bit return value into two 32-bit values.
511	 */
512	testw	$SE_32RVAL2, SY_FLAGS(%rbx)
513	je	5f
514	movq	%r12, %r13
515	shrq	$32, %r13	/* upper 32-bits into %edx */
516	movl	%r12d, %r12d	/* lower 32-bits into %eax */
5175:
518	/*
519	 * Optimistically assume that there's no post-syscall
520	 * work to do.  (This is to avoid having to call syscall_mstate()
521	 * with interrupts disabled)
522	 */
523	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
524
525	/*
526	 * We must protect ourselves from being descheduled here;
527	 * If we were, and we ended up on another cpu, or another
528	 * lwp got in ahead of us, it could change the segment
529	 * registers without us noticing before we return to userland.
530	 */
531	CLI(%r14)
532	CHECK_POSTSYS_NE(%r15, %r14, %ebx)
533	jne	_syscall_post
534	SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
535
536	movq	%r12, REGOFF_RAX(%rsp)
537	movq	%r13, REGOFF_RDX(%rsp)
538
539	/*
540	 * To get back to userland, we need the return %rip in %rcx and
541	 * the return %rfl in %r11d.  The sysretq instruction also arranges
542	 * to fix up %cs and %ss; everything else is our responsibility.
543	 */
544	movq	REGOFF_RDI(%rsp), %rdi
545	movq	REGOFF_RSI(%rsp), %rsi
546	movq	REGOFF_RDX(%rsp), %rdx
547	/* %rcx used to restore %rip value */
548
549	movq	REGOFF_R8(%rsp), %r8
550	movq	REGOFF_R9(%rsp), %r9
551	movq	REGOFF_RAX(%rsp), %rax
552	movq	REGOFF_RBX(%rsp), %rbx
553
554	movq	REGOFF_RBP(%rsp), %rbp
555	movq	REGOFF_R10(%rsp), %r10
556	/* %r11 used to restore %rfl value */
557	movq	REGOFF_R12(%rsp), %r12
558
559	movq	REGOFF_R13(%rsp), %r13
560	movq	REGOFF_R14(%rsp), %r14
561	movq	REGOFF_R15(%rsp), %r15
562
563	movq	REGOFF_RIP(%rsp), %rcx
564	movl	REGOFF_RFL(%rsp), %r11d
565
566#if defined(__xpv)
567	addq	$REGOFF_RIP, %rsp
568#else
569	movq	REGOFF_RSP(%rsp), %rsp
570#endif
571
572        /*
573         * There can be no instructions between the ALTENTRY below and
574	 * SYSRET or we could end up breaking brand support. See label usage
575         * in sn1_brand_syscall_callback for an example.
576         */
577	ASSERT_UPCALL_MASK_IS_SET
578	SWAPGS				/* user gsbase */
579        ALTENTRY(nopop_sys_syscall_sysretq)
580	SYSRETQ
581        /*NOTREACHED*/
582        SET_SIZE(nopop_sys_syscall_sysretq)
583
584_syscall_pre:
585	call	pre_syscall
586	movl	%eax, %r12d
587	testl	%eax, %eax
588	jne	_syscall_post_call
589	/*
590	 * Didn't abort, so reload the syscall args and invoke the handler.
591	 */
592	movzwl	T_SYSNUM(%r15), %eax
593	jmp	_syscall_invoke
594
595_syscall_ill:
596	call	nosys
597	movq	%rax, %r12
598	movq	%rdx, %r13
599	jmp	_syscall_post_call
600
601_syscall_post:
602	STI
603	/*
604	 * Sigh, our optimism wasn't justified, put it back to LMS_SYSTEM
605	 * so that we can account for the extra work it takes us to finish.
606	 */
607	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
608_syscall_post_call:
609	movq	%r12, %rdi
610	movq	%r13, %rsi
611	call	post_syscall
612	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
613	jmp	_sys_rtt
614	SET_SIZE(sys_syscall)
615	SET_SIZE(brand_sys_syscall)
616
617#endif	/* __lint */
618
619#if defined(__lint)
620
621/*ARGSUSED*/
622void
623sys_syscall32()
624{}
625
626#else	/* __lint */
627
628	ENTRY_NP(brand_sys_syscall32)
629	SWAPGS				/* kernel gsbase */
630	XPV_TRAP_POP
631	BRAND_CALLBACK(BRAND_CB_SYSCALL32)
632	SWAPGS				/* user gsbase */
633
634#if defined(__xpv)
635	jmp	nopop_sys_syscall32
636#endif
637
638	ALTENTRY(sys_syscall32)
639	SWAPGS				/* kernel gsbase */
640
641#if defined(__xpv)
642	XPV_TRAP_POP
643nopop_sys_syscall32:
644#endif
645
646	movl	%esp, %r10d
647	movq	%gs:CPU_THREAD, %r15
648	movq	T_STACK(%r15), %rsp
649	movl	%eax, %eax
650
651	movl	$U32CS_SEL, REGOFF_CS(%rsp)
652	movl	%ecx, REGOFF_RIP(%rsp)		/* syscall: %rip -> %rcx */
653	movq	%r11, REGOFF_RFL(%rsp)		/* syscall: %rfl -> %r11d */
654	movq	%r10, REGOFF_RSP(%rsp)
655	movl	$UDS_SEL, REGOFF_SS(%rsp)
656
657_syscall32_save:
658	movl	%edi, REGOFF_RDI(%rsp)
659	movl	%esi, REGOFF_RSI(%rsp)
660	movl	%ebp, REGOFF_RBP(%rsp)
661	movl	%ebx, REGOFF_RBX(%rsp)
662	movl	%edx, REGOFF_RDX(%rsp)
663	movl	%ecx, REGOFF_RCX(%rsp)
664	movl	%eax, REGOFF_RAX(%rsp)		/* wrapper: sysc# -> %eax */
665	movq	$0, REGOFF_SAVFP(%rsp)
666	movq	$0, REGOFF_SAVPC(%rsp)
667
668	/*
669	 * Copy these registers here in case we end up stopped with
670	 * someone (like, say, /proc) messing with our register state.
671	 * We don't -restore- them unless we have to in update_sregs.
672	 *
673	 * Since userland -can't- change fsbase or gsbase directly,
674	 * we don't bother to capture them here.
675	 */
676	xorl	%ebx, %ebx
677	movw	%ds, %bx
678	movq	%rbx, REGOFF_DS(%rsp)
679	movw	%es, %bx
680	movq	%rbx, REGOFF_ES(%rsp)
681	movw	%fs, %bx
682	movq	%rbx, REGOFF_FS(%rsp)
683	movw	%gs, %bx
684	movq	%rbx, REGOFF_GS(%rsp)
685
686	/*
687	 * Application state saved in the regs structure on the stack
688	 * %eax is the syscall number
689	 * %rsp is the thread's stack, %r15 is curthread
690	 * REG_RSP(%rsp) is the user's stack
691	 */
692
693	SYSCALL_TRAPTRACE32($TT_SYSC)
694
695	movq	%rsp, %rbp
696
697	movq	T_LWP(%r15), %r14
698	ASSERT_NO_RUPDATE_PENDING(%r14)
699
700	ENABLE_INTR_FLAGS
701
702	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
703	movl	REGOFF_RAX(%rsp), %eax	/* (%rax damaged by mstate call) */
704
705	ASSERT_LWPTOREGS(%r14, %rsp)
706
707	incq	 %gs:CPU_STATS_SYS_SYSCALL
708
709	/*
710	 * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
711	 * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
712	 * more succinctly:
713	 *
714	 *	SA(MAXSYSARGS * sizeof (long)) == 64
715	 */
716#define	SYS_DROP	64			/* drop for args */
717	subq	$SYS_DROP, %rsp
718	movb	$LWP_SYS, LWP_STATE(%r14)
719	movq	%r15, %rdi
720	movq	%rsp, %rsi
721	call	syscall_entry
722
723	/*
724	 * Fetch the arguments copied onto the kernel stack and put
725	 * them in the right registers to invoke a C-style syscall handler.
726	 * %rax contains the handler address.
727	 *
728	 * Ideas for making all this go faster of course include simply
729	 * forcibly fetching 6 arguments from the user stack under lofault
730	 * protection, reverting to copyin_args only when watchpoints
731	 * are in effect.
732	 *
733	 * (If we do this, make sure that exec and libthread leave
734	 * enough space at the top of the stack to ensure that we'll
735	 * never do a fetch from an invalid page.)
736	 *
737	 * Lots of ideas here, but they won't really help with bringup B-)
738	 * Correctness can't wait, performance can wait a little longer ..
739	 */
740
741	movq	%rax, %rbx
742	movl	0(%rsp), %edi
743	movl	8(%rsp), %esi
744	movl	0x10(%rsp), %edx
745	movl	0x18(%rsp), %ecx
746	movl	0x20(%rsp), %r8d
747	movl	0x28(%rsp), %r9d
748
749	call	*SY_CALLC(%rbx)
750
751	movq	%rbp, %rsp	/* pop the args */
752
753	/*
754	 * amd64 syscall handlers -always- return a 64-bit value in %rax.
755	 * On the 32-bit kernel, they always return that value in %eax:%edx
756	 * as required by the 32-bit ABI.
757	 *
758	 * Simulate the same behaviour by unconditionally splitting the
759	 * return value in the same way.
760	 */
761	movq	%rax, %r13
762	shrq	$32, %r13	/* upper 32-bits into %edx */
763	movl	%eax, %r12d	/* lower 32-bits into %eax */
764
765	/*
766	 * Optimistically assume that there's no post-syscall
767	 * work to do.  (This is to avoid having to call syscall_mstate()
768	 * with interrupts disabled)
769	 */
770	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
771
772	/*
773	 * We must protect ourselves from being descheduled here;
774	 * If we were, and we ended up on another cpu, or another
775	 * lwp got in ahead of us, it could change the segment
776	 * registers without us noticing before we return to userland.
777	 */
778	CLI(%r14)
779	CHECK_POSTSYS_NE(%r15, %r14, %ebx)
780	jne	_full_syscall_postsys32
781	SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
782
783	/*
784	 * To get back to userland, we need to put the return %rip in %rcx and
785	 * the return %rfl in %r11d.  The sysret instruction also arranges
786	 * to fix up %cs and %ss; everything else is our responsibility.
787	 */
788
789	movl	%r12d, %eax			/* %eax: rval1 */
790	movl	REGOFF_RBX(%rsp), %ebx
791	/* %ecx used for return pointer */
792	movl	%r13d, %edx			/* %edx: rval2 */
793	movl	REGOFF_RBP(%rsp), %ebp
794	movl	REGOFF_RSI(%rsp), %esi
795	movl	REGOFF_RDI(%rsp), %edi
796
797	movl	REGOFF_RFL(%rsp), %r11d		/* %r11 -> eflags */
798	movl	REGOFF_RIP(%rsp), %ecx		/* %ecx -> %eip */
799	movl	REGOFF_RSP(%rsp), %esp
800
801	ASSERT_UPCALL_MASK_IS_SET
802	SWAPGS				/* user gsbase */
803        ALTENTRY(nopop_sys_syscall32_sysretl)
804	SYSRETL
805        SET_SIZE(nopop_sys_syscall32_sysretl)
806	/*NOTREACHED*/
807
808_full_syscall_postsys32:
809	STI
810	/*
811	 * Sigh, our optimism wasn't justified, put it back to LMS_SYSTEM
812	 * so that we can account for the extra work it takes us to finish.
813	 */
814	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
815	movq	%r15, %rdi
816	movq	%r12, %rsi			/* rval1 - %eax */
817	movq	%r13, %rdx			/* rval2 - %edx */
818	call	syscall_exit
819	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
820	jmp	_sys_rtt
821	SET_SIZE(sys_syscall32)
822	SET_SIZE(brand_sys_syscall32)
823
824#endif	/* __lint */
825
826/*
827 * System call handler via the sysenter instruction
828 * Used only for 32-bit system calls on the 64-bit kernel.
829 *
830 * The caller in userland has arranged that:
831 *
832 * -	%eax contains the syscall number
833 * -	%ecx contains the user %esp
834 * -	%edx contains the return %eip
835 * -	the user stack contains the args to the syscall
836 *
837 * Hardware and (privileged) initialization code have arranged that by
838 * the time the sysenter instructions completes:
839 *
840 * - %rip is pointing to sys_sysenter (below).
841 * - %cs and %ss are set to kernel text and stack (data) selectors.
842 * - %rsp is pointing at the lwp's stack
843 * - interrupts have been disabled.
844 *
845 * Note that we are unable to return both "rvals" to userland with
846 * this call, as %edx is used by the sysexit instruction.
847 *
848 * One final complication in this routine is its interaction with
849 * single-stepping in a debugger.  For most of the system call mechanisms,
850 * the CPU automatically clears the single-step flag before we enter the
851 * kernel.  The sysenter mechanism does not clear the flag, so a user
852 * single-stepping through a libc routine may suddenly find him/herself
853 * single-stepping through the kernel.  To detect this, kmdb compares the
854 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
855 * If it finds that we have single-stepped to a sysenter entry point, it
856 * explicitly clears the flag and executes the sys_sysenter routine.
857 *
858 * One final complication in this final complication is the fact that we
859 * have two different entry points for sysenter: brand_sys_sysenter and
860 * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
861 * through the kernel with kmdb, we will eventually hit the instruction at
862 * sys_sysenter.  kmdb cannot distinguish between that valid single-step
863 * and the undesirable one mentioned above.  To avoid this situation, we
864 * simply add a jump over the instruction at sys_sysenter to make it
865 * impossible to single-step to it.
866 */
867#if defined(__lint)
868
869void
870sys_sysenter()
871{}
872
873#else	/* __lint */
874
875	ENTRY_NP(brand_sys_sysenter)
876	SWAPGS				/* kernel gsbase */
877	ALTENTRY(_brand_sys_sysenter_post_swapgs)
878	BRAND_CALLBACK(BRAND_CB_SYSENTER)
879	/*
880	 * Jump over sys_sysenter to allow single-stepping as described
881	 * above.
882	 */
883	jmp	_sys_sysenter_post_swapgs
884
885	ALTENTRY(sys_sysenter)
886	SWAPGS				/* kernel gsbase */
887
888	ALTENTRY(_sys_sysenter_post_swapgs)
889	movq	%gs:CPU_THREAD, %r15
890
891	movl	$U32CS_SEL, REGOFF_CS(%rsp)
892	movl	%ecx, REGOFF_RSP(%rsp)		/* wrapper: %esp -> %ecx */
893	movl	%edx, REGOFF_RIP(%rsp)		/* wrapper: %eip -> %edx */
894	pushfq
895	popq	%r10
896	movl	$UDS_SEL, REGOFF_SS(%rsp)
897
898	/*
899	 * Set the interrupt flag before storing the flags to the
900	 * flags image on the stack so we can return to user with
901	 * interrupts enabled if we return via sys_rtt_syscall32
902	 */
903	orq	$PS_IE, %r10
904	movq	%r10, REGOFF_RFL(%rsp)
905
906	movl	%edi, REGOFF_RDI(%rsp)
907	movl	%esi, REGOFF_RSI(%rsp)
908	movl	%ebp, REGOFF_RBP(%rsp)
909	movl	%ebx, REGOFF_RBX(%rsp)
910	movl	%edx, REGOFF_RDX(%rsp)
911	movl	%ecx, REGOFF_RCX(%rsp)
912	movl	%eax, REGOFF_RAX(%rsp)		/* wrapper: sysc# -> %eax */
913	movq	$0, REGOFF_SAVFP(%rsp)
914	movq	$0, REGOFF_SAVPC(%rsp)
915
916	/*
917	 * Copy these registers here in case we end up stopped with
918	 * someone (like, say, /proc) messing with our register state.
919	 * We don't -restore- them unless we have to in update_sregs.
920	 *
921	 * Since userland -can't- change fsbase or gsbase directly,
922	 * we don't bother to capture them here.
923	 */
924	xorl	%ebx, %ebx
925	movw	%ds, %bx
926	movq	%rbx, REGOFF_DS(%rsp)
927	movw	%es, %bx
928	movq	%rbx, REGOFF_ES(%rsp)
929	movw	%fs, %bx
930	movq	%rbx, REGOFF_FS(%rsp)
931	movw	%gs, %bx
932	movq	%rbx, REGOFF_GS(%rsp)
933
934	/*
935	 * Application state saved in the regs structure on the stack
936	 * %eax is the syscall number
937	 * %rsp is the thread's stack, %r15 is curthread
938	 * REG_RSP(%rsp) is the user's stack
939	 */
940
941	SYSCALL_TRAPTRACE($TT_SYSENTER)
942
943	movq	%rsp, %rbp
944
945	movq	T_LWP(%r15), %r14
946	ASSERT_NO_RUPDATE_PENDING(%r14)
947
948	ENABLE_INTR_FLAGS
949
950	/*
951	 * Catch 64-bit process trying to issue sysenter instruction
952	 * on Nocona based systems.
953	 */
954	movq	LWP_PROCP(%r14), %rax
955	cmpq	$DATAMODEL_ILP32, P_MODEL(%rax)
956	je	7f
957
958	/*
959	 * For a non-32-bit process, simulate a #ud, since that's what
960	 * native hardware does.  The traptrace entry (above) will
961	 * let you know what really happened.
962	 */
963	movq	$T_ILLINST, REGOFF_TRAPNO(%rsp)
964	movq	REGOFF_CS(%rsp), %rdi
965	movq	%rdi, REGOFF_ERR(%rsp)
966	movq	%rsp, %rdi
967	movq	REGOFF_RIP(%rsp), %rsi
968	movl	%gs:CPU_ID, %edx
969	call	trap
970	jmp	_sys_rtt
9717:
972
973	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
974	movl	REGOFF_RAX(%rsp), %eax	/* (%rax damaged by mstate calls) */
975
976	ASSERT_LWPTOREGS(%r14, %rsp)
977
978	incq	%gs:CPU_STATS_SYS_SYSCALL
979
980	/*
981	 * Make some space for MAXSYSARGS (currently 8) 32-bit args
982	 * placed into 64-bit (long) arg slots, plus one 64-bit
983	 * (long) arg count, maintaining 16 byte alignment.
984	 */
985	subq	$SYS_DROP, %rsp
986	movb	$LWP_SYS, LWP_STATE(%r14)
987	movq	%r15, %rdi
988	movq	%rsp, %rsi
989	call	syscall_entry
990
991	/*
992	 * Fetch the arguments copied onto the kernel stack and put
993	 * them in the right registers to invoke a C-style syscall handler.
994	 * %rax contains the handler address.
995	 */
996	movq	%rax, %rbx
997	movl	0(%rsp), %edi
998	movl	8(%rsp), %esi
999	movl	0x10(%rsp), %edx
1000	movl	0x18(%rsp), %ecx
1001	movl	0x20(%rsp), %r8d
1002	movl	0x28(%rsp), %r9d
1003
1004	call	*SY_CALLC(%rbx)
1005
1006	movq	%rbp, %rsp	/* pop the args */
1007
1008	/*
1009	 * amd64 syscall handlers -always- return a 64-bit value in %rax.
1010	 * On the 32-bit kernel, the always return that value in %eax:%edx
1011	 * as required by the 32-bit ABI.
1012	 *
1013	 * Simulate the same behaviour by unconditionally splitting the
1014	 * return value in the same way.
1015	 */
1016	movq	%rax, %r13
1017	shrq	$32, %r13	/* upper 32-bits into %edx */
1018	movl	%eax, %r12d	/* lower 32-bits into %eax */
1019
1020	/*
1021	 * Optimistically assume that there's no post-syscall
1022	 * work to do.  (This is to avoid having to call syscall_mstate()
1023	 * with interrupts disabled)
1024	 */
1025	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
1026
1027	/*
1028	 * We must protect ourselves from being descheduled here;
1029	 * If we were, and we ended up on another cpu, or another
1030	 * lwp got int ahead of us, it could change the segment
1031	 * registers without us noticing before we return to userland.
1032	 */
1033	cli
1034	CHECK_POSTSYS_NE(%r15, %r14, %ebx)
1035	jne	_full_syscall_postsys32
1036	SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
1037
1038	/*
1039	 * To get back to userland, load up the 32-bit registers and
1040	 * sysexit back where we came from.
1041	 */
1042
1043	/*
1044	 * Interrupts will be turned on by the 'sti' executed just before
1045	 * sysexit.  The following ensures that restoring the user's rflags
1046	 * doesn't enable interrupts too soon.
1047	 */
1048	andq	$_BITNOT(PS_IE), REGOFF_RFL(%rsp)
1049
1050	/*
1051	 * (There's no point in loading up %edx because the sysexit
1052	 * mechanism smashes it.)
1053	 */
1054	movl	%r12d, %eax
1055	movl	REGOFF_RBX(%rsp), %ebx
1056	movl	REGOFF_RBP(%rsp), %ebp
1057	movl	REGOFF_RSI(%rsp), %esi
1058	movl	REGOFF_RDI(%rsp), %edi
1059
1060	movl	REGOFF_RIP(%rsp), %edx	/* sysexit: %edx -> %eip */
1061	pushq	REGOFF_RFL(%rsp)
1062	popfq
1063	movl	REGOFF_RSP(%rsp), %ecx	/* sysexit: %ecx -> %esp */
1064	swapgs
1065	sti
1066	sysexit
1067	SET_SIZE(sys_sysenter)
1068	SET_SIZE(_sys_sysenter_post_swapgs)
1069	SET_SIZE(brand_sys_sysenter)
1070
1071#endif	/* __lint */
1072
1073#if defined(__lint)
1074/*
1075 * System call via an int80.  This entry point is only used by the Linux
1076 * application environment.  Unlike the other entry points, there is no
1077 * default action to take if no callback is registered for this process.
1078 */
1079void
1080sys_int80()
1081{}
1082
1083#else	/* __lint */
1084
1085	ENTRY_NP(brand_sys_int80)
1086	SWAPGS				/* kernel gsbase */
1087	XPV_TRAP_POP
1088	BRAND_CALLBACK(BRAND_CB_INT80)
1089	SWAPGS				/* user gsbase */
1090#if defined(__xpv)
1091	jmp	nopop_int80
1092#endif
1093
1094	ENTRY_NP(sys_int80)
1095	/*
1096	 * We hit an int80, but this process isn't of a brand with an int80
1097	 * handler.  Bad process!  Make it look as if the INT failed.
1098	 * Modify %rip to point before the INT, push the expected error
1099	 * code and fake a GP fault. Note on 64-bit hypervisor we need
1100	 * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1101	 * because gptrap will pop them again with its own XPV_TRAP_POP.
1102	 */
1103#if defined(__xpv)
1104	XPV_TRAP_POP
1105nopop_int80:
1106#endif
1107	subq	$2, (%rsp)	/* int insn 2-bytes */
1108	pushq	$_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1109#if defined(__xpv)
1110	push	%r11
1111	push	%rcx
1112#endif
1113	jmp	gptrap			/ GP fault
1114	SET_SIZE(sys_int80)
1115	SET_SIZE(brand_sys_int80)
1116#endif	/* __lint */
1117
1118
1119/*
1120 * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1121 * the generic i386 libc to do system calls. We do a small amount of setup
1122 * before jumping into the existing sys_syscall32 path.
1123 */
1124#if defined(__lint)
1125
1126/*ARGSUSED*/
1127void
1128sys_syscall_int()
1129{}
1130
1131#else	/* __lint */
1132
1133	ENTRY_NP(brand_sys_syscall_int)
1134	SWAPGS				/* kernel gsbase */
1135	XPV_TRAP_POP
1136	BRAND_CALLBACK(BRAND_CB_INT91)
1137	SWAPGS				/* user gsbase */
1138
1139#if defined(__xpv)
1140	jmp	nopop_syscall_int
1141#endif
1142
1143	ALTENTRY(sys_syscall_int)
1144	SWAPGS				/* kernel gsbase */
1145
1146#if defined(__xpv)
1147	XPV_TRAP_POP
1148nopop_syscall_int:
1149#endif
1150
1151	movq	%gs:CPU_THREAD, %r15
1152	movq	T_STACK(%r15), %rsp
1153	movl	%eax, %eax
1154	/*
1155	 * Set t_post_sys on this thread to force ourselves out via the slow
1156	 * path. It might be possible at some later date to optimize this out
1157	 * and use a faster return mechanism.
1158	 */
1159	movb	$1, T_POST_SYS(%r15)
1160	CLEAN_CS
1161	jmp	_syscall32_save
1162	SET_SIZE(sys_syscall_int)
1163	SET_SIZE(brand_sys_syscall_int)
1164
1165#endif	/* __lint */
1166
1167/*
1168 * Legacy 32-bit applications and old libc implementations do lcalls;
1169 * we should never get here because the LDT entry containing the syscall
1170 * segment descriptor has the "segment present" bit cleared, which means
1171 * we end up processing those system calls in trap() via a not-present trap.
1172 *
1173 * We do it this way because a call gate unhelpfully does -nothing- to the
1174 * interrupt flag bit, so an interrupt can run us just after the lcall
1175 * completes, but just before the swapgs takes effect.   Thus the INTR_PUSH and
1176 * INTR_POP paths would have to be slightly more complex to dance around
1177 * this problem, and end up depending explicitly on the first
1178 * instruction of this handler being either swapgs or cli.
1179 */
1180
1181#if defined(__lint)
1182
1183/*ARGSUSED*/
1184void
1185sys_lcall32()
1186{}
1187
1188#else	/* __lint */
1189
1190	ENTRY_NP(sys_lcall32)
1191	SWAPGS				/* kernel gsbase */
1192	pushq	$0
1193	pushq	%rbp
1194	movq	%rsp, %rbp
1195	leaq	__lcall_panic_str(%rip), %rdi
1196	xorl	%eax, %eax
1197	call	panic
1198	SET_SIZE(sys_lcall32)
1199
1200__lcall_panic_str:
1201	.string	"sys_lcall32: shouldn't be here!"
1202
1203/*
1204 * Declare a uintptr_t which covers the entire pc range of syscall
1205 * handlers for the stack walkers that need this.
1206 */
1207	.align	CPTRSIZE
1208	.globl	_allsyscalls_size
1209	.type	_allsyscalls_size, @object
1210_allsyscalls_size:
1211	.NWORD	. - _allsyscalls
1212	SET_SIZE(_allsyscalls_size)
1213
1214#endif	/* __lint */
1215
1216/*
1217 * These are the thread context handlers for lwps using sysenter/sysexit.
1218 */
1219
1220#if defined(__lint)
1221
1222/*ARGSUSED*/
1223void
1224sep_save(void *ksp)
1225{}
1226
1227/*ARGSUSED*/
1228void
1229sep_restore(void *ksp)
1230{}
1231
1232#else	/* __lint */
1233
1234	/*
1235	 * setting this value to zero as we switch away causes the
1236	 * stack-pointer-on-sysenter to be NULL, ensuring that we
1237	 * don't silently corrupt another (preempted) thread stack
1238	 * when running an lwp that (somehow) didn't get sep_restore'd
1239	 */
1240	ENTRY_NP(sep_save)
1241	xorl	%edx, %edx
1242	xorl	%eax, %eax
1243	movl	$MSR_INTC_SEP_ESP, %ecx
1244	wrmsr
1245	ret
1246	SET_SIZE(sep_save)
1247
1248	/*
1249	 * Update the kernel stack pointer as we resume onto this cpu.
1250	 */
1251	ENTRY_NP(sep_restore)
1252	movq	%rdi, %rdx
1253	shrq	$32, %rdx
1254	movl	%edi, %eax
1255	movl	$MSR_INTC_SEP_ESP, %ecx
1256	wrmsr
1257	ret
1258	SET_SIZE(sep_restore)
1259
1260#endif	/* __lint */
1261