xref: /titanic_52/usr/src/uts/i86pc/ml/syscall_asm_amd64.s (revision 3b0164d5a9177d3e9e05e26ac275cc5bac564ed1)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/asm_linkage.h>
29#include <sys/asm_misc.h>
30#include <sys/regset.h>
31#include <sys/privregs.h>
32#include <sys/psw.h>
33#include <sys/machbrand.h>
34
35#if defined(__lint)
36
37#include <sys/types.h>
38#include <sys/thread.h>
39#include <sys/systm.h>
40
41#else	/* __lint */
42
43#include <sys/segments.h>
44#include <sys/pcb.h>
45#include <sys/trap.h>
46#include <sys/ftrace.h>
47#include <sys/traptrace.h>
48#include <sys/clock.h>
49#include <sys/model.h>
50#include <sys/panic.h>
51
52#if defined(__xpv)
53#include <sys/hypervisor.h>
54#endif
55
56#include "assym.h"
57
58#endif	/* __lint */
59
60/*
61 * We implement five flavours of system call entry points
62 *
63 * -	syscall/sysretq		(amd64 generic)
64 * -	syscall/sysretl		(i386 plus SYSC bit)
65 * -	sysenter/sysexit	(i386 plus SEP bit)
66 * -	int/iret		(i386 generic)
67 * -	lcall/iret		(i386 generic)
68 *
69 * The current libc included in Solaris uses int/iret as the base unoptimized
70 * kernel entry method. Older libc implementations and legacy binaries may use
71 * the lcall call gate, so it must continue to be supported.
72 *
73 * System calls that use an lcall call gate are processed in trap() via a
74 * segment-not-present trap, i.e. lcalls are extremely slow(!).
75 *
76 * The basic pattern used in the 32-bit SYSC handler at this point in time is
77 * to have the bare minimum of assembler, and get to the C handlers as
78 * quickly as possible.
79 *
80 * The 64-bit handler is much closer to the sparcv9 handler; that's
81 * because of passing arguments in registers.  The 32-bit world still
82 * passes arguments on the stack -- that makes that handler substantially
83 * more complex.
84 *
85 * The two handlers share a few code fragments which are broken
86 * out into preprocessor macros below.
87 *
88 * XX64	come back and speed all this up later.  The 32-bit stuff looks
89 * especially easy to speed up the argument copying part ..
90 *
91 *
92 * Notes about segment register usage (c.f. the 32-bit kernel)
93 *
94 * In the 32-bit kernel, segment registers are dutifully saved and
95 * restored on all mode transitions because the kernel uses them directly.
96 * When the processor is running in 64-bit mode, segment registers are
97 * largely ignored.
98 *
99 * %cs and %ss
100 *	controlled by the hardware mechanisms that make mode transitions
101 *
102 * The remaining segment registers have to either be pointing at a valid
103 * descriptor i.e. with the 'present' bit set, or they can NULL descriptors
104 *
105 * %ds and %es
106 *	always ignored
107 *
108 * %fs and %gs
109 *	fsbase and gsbase are used to control the place they really point at.
110 *	The kernel only depends on %gs, and controls its own gsbase via swapgs
111 *
112 * Note that loading segment registers is still costly because the GDT
113 * lookup still happens (this is because the hardware can't know that we're
114 * not setting up these segment registers for a 32-bit program).  Thus we
115 * avoid doing this in the syscall path, and defer them to lwp context switch
116 * handlers, so the register values remain virtualized to the lwp.
117 */
118
119#if defined(SYSCALLTRACE)
120#define	ORL_SYSCALLTRACE(r32)		\
121	orl	syscalltrace(%rip), r32
122#else
123#define	ORL_SYSCALLTRACE(r32)
124#endif
125
126/*
127 * In the 32-bit kernel, we do absolutely nothing before getting into the
128 * brand callback checks.  In 64-bit land, we do swapgs and then come here.
129 * We assume that the %rsp- and %r15-stashing fields in the CPU structure
130 * are still unused.
131 *
132 * When the callback is invoked, we will be on the user's %gs and
133 * the stack will look like this:
134 *
135 * stack:  --------------------------------------
136 *         | callback pointer			|
137 *    |    | user stack pointer			|
138 *    |    | lwp pointer			|
139 *    v    | userland return address		|
140 *         | callback wrapper return addr	|
141 *         --------------------------------------
142 *
143 */
144#define	BRAND_CALLBACK(callback_id)					    \
145	movq	%rsp, %gs:CPU_RTMP_RSP	/* save the stack pointer	*/ ;\
146	movq	%r15, %gs:CPU_RTMP_R15	/* save %r15			*/ ;\
147	movq	%gs:CPU_THREAD, %r15	/* load the thread pointer	*/ ;\
148	movq	T_STACK(%r15), %rsp	/* switch to the kernel stack	*/ ;\
149	subq	$16, %rsp		/* save space for two pointers	*/ ;\
150	pushq	%r14			/* save %r14			*/ ;\
151	movq	%gs:CPU_RTMP_RSP, %r14					   ;\
152	movq	%r14, 8(%rsp)		/* stash the user stack pointer	*/ ;\
153	popq	%r14			/* restore %r14			*/ ;\
154	movq	T_LWP(%r15), %r15	/* load the lwp pointer		*/ ;\
155	pushq	%r15			/* push the lwp pointer		*/ ;\
156	movq	LWP_PROCP(%r15), %r15	/* load the proc pointer	*/ ;\
157	movq	P_BRAND(%r15), %r15	/* load the brand pointer	*/ ;\
158	movq	B_MACHOPS(%r15), %r15	/* load the machops pointer	*/ ;\
159	movq	_CONST(_MUL(callback_id, CPTRSIZE))(%r15), %r15		   ;\
160	cmpq	$0, %r15						   ;\
161	je	1f							   ;\
162	movq	%r15, 16(%rsp)		/* save the callback pointer	*/ ;\
163	movq	%gs:CPU_RTMP_RSP, %r15	/* grab the user stack pointer	*/ ;\
164	pushq	(%r15)			/* push the return address	*/ ;\
165	movq	%gs:CPU_RTMP_R15, %r15	/* restore %r15			*/ ;\
166	SWAPGS				/* user gsbase                  */ ;\
167	call	*24(%rsp)		/* call callback		*/ ;\
168	SWAPGS				/* kernel gsbase		*/ ;\
1691:	movq	%gs:CPU_RTMP_R15, %r15	/* restore %r15			*/ ;\
170	movq	%gs:CPU_RTMP_RSP, %rsp	/* restore the stack pointer	*/
171
172#define	MSTATE_TRANSITION(from, to)		\
173	movl	$from, %edi;			\
174	movl	$to, %esi;			\
175	call	syscall_mstate
176
177/*
178 * Check to see if a simple (direct) return is possible i.e.
179 *
180 *	if (t->t_post_sys_ast | syscalltrace |
181 *	    lwp->lwp_pcb.pcb_rupdate == 1)
182 *		do full version	;
183 *
184 * Preconditions:
185 * -	t is curthread
186 * Postconditions:
187 * -	condition code NE is set if post-sys is too complex
188 * -	rtmp is zeroed if it isn't (we rely on this!)
189 * -	ltmp is smashed
190 */
191#define	CHECK_POSTSYS_NE(t, ltmp, rtmp)			\
192	movq	T_LWP(t), ltmp;				\
193	movzbl	PCB_RUPDATE(ltmp), rtmp;		\
194	ORL_SYSCALLTRACE(rtmp);				\
195	orl	T_POST_SYS_AST(t), rtmp;		\
196	cmpl	$0, rtmp
197
198/*
199 * Fix up the lwp, thread, and eflags for a successful return
200 *
201 * Preconditions:
202 * -	zwreg contains zero
203 */
204#define	SIMPLE_SYSCALL_POSTSYS(t, lwp, zwreg)		\
205	movb	$LWP_USER, LWP_STATE(lwp);		\
206	movw	zwreg, T_SYSNUM(t);			\
207	andb	$_CONST(0xffff - PS_C), REGOFF_RFL(%rsp)
208
209/*
210 * ASSERT(lwptoregs(lwp) == rp);
211 *
212 * This may seem obvious, but very odd things happen if this
213 * assertion is false
214 *
215 * Preconditions:
216 *	(%rsp is ready for normal call sequence)
217 * Postconditions (if assertion is true):
218 *	%r11 is smashed
219 *
220 * ASSERT(rp->r_cs == descnum)
221 *
222 * The code selector is written into the regs structure when the
223 * lwp stack is created.  We use this ASSERT to validate that
224 * the regs structure really matches how we came in.
225 *
226 * Preconditions:
227 *	(%rsp is ready for normal call sequence)
228 * Postconditions (if assertion is true):
229 *	-none-
230 *
231 * ASSERT(lwp->lwp_pcb.pcb_rupdate == 0);
232 *
233 * If this is false, it meant that we returned to userland without
234 * updating the segment registers as we were supposed to.
235 *
236 * Note that we must ensure no interrupts or other traps intervene
237 * between entering privileged mode and performing the assertion,
238 * otherwise we may perform a context switch on the thread, which
239 * will end up setting pcb_rupdate to 1 again.
240 */
241#if defined(DEBUG)
242
243#if !defined(__lint)
244
245__lwptoregs_msg:
246	.string	"%M%:%d lwptoregs(%p) [%p] != rp [%p]"
247
248__codesel_msg:
249	.string	"%M%:%d rp->r_cs [%ld] != %ld"
250
251__no_rupdate_msg:
252	.string	"%M%:%d lwp %p, pcb_rupdate != 0"
253
254#endif	/* !__lint */
255
256#define	ASSERT_LWPTOREGS(lwp, rp)			\
257	movq	LWP_REGS(lwp), %r11;			\
258	cmpq	rp, %r11;				\
259	je	7f;					\
260	leaq	__lwptoregs_msg(%rip), %rdi;		\
261	movl	$__LINE__, %esi;			\
262	movq	lwp, %rdx;				\
263	movq	%r11, %rcx;				\
264	movq	rp, %r8;				\
265	xorl	%eax, %eax;				\
266	call	panic;					\
2677:
268
269#define	ASSERT_NO_RUPDATE_PENDING(lwp)			\
270	testb	$0x1, PCB_RUPDATE(lwp);			\
271	je	8f;					\
272	movq	lwp, %rdx;				\
273	leaq	__no_rupdate_msg(%rip), %rdi;		\
274	movl	$__LINE__, %esi;			\
275	xorl	%eax, %eax;				\
276	call	panic;					\
2778:
278
279#else
280#define	ASSERT_LWPTOREGS(lwp, rp)
281#define	ASSERT_NO_RUPDATE_PENDING(lwp)
282#endif
283
284/*
285 * Do the traptrace thing and restore any registers we used
286 * in situ.  Assumes that %rsp is pointing at the base of
287 * the struct regs, obviously ..
288 */
289#ifdef TRAPTRACE
290#define	SYSCALL_TRAPTRACE(ttype)				\
291	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, ttype);		\
292	TRACE_REGS(%rdi, %rsp, %rbx, %rcx);			\
293	TRACE_STAMP(%rdi);	/* rdtsc clobbers %eax, %edx */	\
294	movq	REGOFF_RAX(%rsp), %rax;				\
295	movq	REGOFF_RBX(%rsp), %rbx;				\
296	movq	REGOFF_RCX(%rsp), %rcx;				\
297	movq	REGOFF_RDX(%rsp), %rdx;				\
298	movl	%eax, TTR_SYSNUM(%rdi);				\
299	movq	REGOFF_RDI(%rsp), %rdi
300
301#define	SYSCALL_TRAPTRACE32(ttype)				\
302	SYSCALL_TRAPTRACE(ttype);				\
303	/* paranoia: clean the top 32-bits of the registers */	\
304	orl	%eax, %eax;					\
305	orl	%ebx, %ebx;					\
306	orl	%ecx, %ecx;					\
307	orl	%edx, %edx;					\
308	orl	%edi, %edi
309#else	/* TRAPTRACE */
310#define	SYSCALL_TRAPTRACE(ttype)
311#define	SYSCALL_TRAPTRACE32(ttype)
312#endif	/* TRAPTRACE */
313
314/*
315 * The 64-bit libc syscall wrapper does this:
316 *
317 * fn(<args>)
318 * {
319 *	movq	%rcx, %r10	-- because syscall smashes %rcx
320 *	movl	$CODE, %eax
321 *	syscall
322 *	<error processing>
323 * }
324 *
325 * Thus when we come into the kernel:
326 *
327 *	%rdi, %rsi, %rdx, %r10, %r8, %r9 contain first six args
328 *	%rax is the syscall number
329 *	%r12-%r15 contain caller state
330 *
331 * The syscall instruction arranges that:
332 *
333 *	%rcx contains the return %rip
334 *	%r11d contains bottom 32-bits of %rflags
335 *	%rflags is masked (as determined by the SFMASK msr)
336 *	%cs is set to UCS_SEL (as determined by the STAR msr)
337 *	%ss is set to UDS_SEL (as determined by the STAR msr)
338 *	%rip is set to sys_syscall (as determined by the LSTAR msr)
339 *
340 * Or in other words, we have no registers available at all.
341 * Only swapgs can save us!
342 *
343 * Under the hypervisor, the swapgs has happened already.  However, the
344 * state of the world is very different from that we're familiar with.
345 *
346 * In particular, we have a stack structure like that for interrupt
347 * gates, except that the %cs and %ss registers are modified for reasons
348 * that are not entirely clear.  Critically, the %rcx/%r11 values do
349 * *not* reflect the usage of those registers under a 'real' syscall[1];
350 * the stack, therefore, looks like this:
351 *
352 *	0x0(rsp)	potentially junk %rcx
353 *	0x8(rsp)	potentially junk %r11
354 *	0x10(rsp)	user %rip
355 *	0x18(rsp)	modified %cs
356 *	0x20(rsp)	user %rflags
357 *	0x28(rsp)	user %rsp
358 *	0x30(rsp)	modified %ss
359 *
360 *
361 * and before continuing on, we must load the %rip into %rcx and the
362 * %rflags into %r11.
363 *
364 * [1] They used to, and we relied on it, but this was broken in 3.1.1.
365 * Sigh.
366 */
367
368#if defined(__xpv)
369#define	XPV_SYSCALL_PROD		\
370	XPV_TRAP_POP;			\
371	movq	(%rsp), %rcx;		\
372	movq	0x10(%rsp), %r11
373#else
374#define	XPV_SYSCALL_PROD /* nothing */
375#endif
376
377#if defined(__lint)
378
379/*ARGSUSED*/
380void
381sys_syscall()
382{}
383
384void
385_allsyscalls()
386{}
387
388size_t _allsyscalls_size;
389
390#else	/* __lint */
391
392	ENTRY_NP2(brand_sys_syscall,_allsyscalls)
393	SWAPGS				/* kernel gsbase */
394	XPV_SYSCALL_PROD
395	BRAND_CALLBACK(BRAND_CB_SYSCALL)
396	SWAPGS				/* user gsbase */
397
398#if defined(__xpv)
399	jmp	noprod_sys_syscall
400#endif
401
402	ALTENTRY(sys_syscall)
403	SWAPGS				/* kernel gsbase */
404	XPV_SYSCALL_PROD
405
406noprod_sys_syscall:
407	ASSERT_UPCALL_MASK_IS_SET
408
409	movq	%r15, %gs:CPU_RTMP_R15
410#if defined(__xpv)
411	movq	0x18(%rsp), %r15		/* save user stack */
412	movq	%r15, %gs:CPU_RTMP_RSP
413#else
414	movq	%rsp, %gs:CPU_RTMP_RSP
415#endif	/* __xpv */
416
417	movq	%gs:CPU_THREAD, %r15
418	movq	T_STACK(%r15), %rsp
419
420	movl	$UCS_SEL, REGOFF_CS(%rsp)
421	movq	%rcx, REGOFF_RIP(%rsp)		/* syscall: %rip -> %rcx */
422	movq	%r11, REGOFF_RFL(%rsp)		/* syscall: %rfl -> %r11d */
423	movl	$UDS_SEL, REGOFF_SS(%rsp)
424
425	movl	%eax, %eax			/* wrapper: sysc# -> %eax */
426	movq	%rdi, REGOFF_RDI(%rsp)
427	movq	%rsi, REGOFF_RSI(%rsp)
428	movq	%rdx, REGOFF_RDX(%rsp)
429	movq	%r10, REGOFF_RCX(%rsp)		/* wrapper: %rcx -> %r10 */
430	movq	%r10, %rcx			/* arg[3] for direct calls */
431
432	movq	%r8, REGOFF_R8(%rsp)
433	movq	%r9, REGOFF_R9(%rsp)
434	movq	%rax, REGOFF_RAX(%rsp)
435	movq	%rbx, REGOFF_RBX(%rsp)
436
437	movq	%rbp, REGOFF_RBP(%rsp)
438	movq	%r10, REGOFF_R10(%rsp)
439	movq	%gs:CPU_RTMP_RSP, %r11
440	movq	%r11, REGOFF_RSP(%rsp)
441	movq	%r12, REGOFF_R12(%rsp)
442
443	movq	%r13, REGOFF_R13(%rsp)
444	movq	%r14, REGOFF_R14(%rsp)
445	movq	%gs:CPU_RTMP_R15, %r10
446	movq	%r10, REGOFF_R15(%rsp)
447	movq	$0, REGOFF_SAVFP(%rsp)
448	movq	$0, REGOFF_SAVPC(%rsp)
449
450	/*
451	 * Copy these registers here in case we end up stopped with
452	 * someone (like, say, /proc) messing with our register state.
453	 * We don't -restore- them unless we have to in update_sregs.
454	 *
455	 * Since userland -can't- change fsbase or gsbase directly,
456	 * and capturing them involves two serializing instructions,
457	 * we don't bother to capture them here.
458	 */
459	xorl	%ebx, %ebx
460	movw	%ds, %bx
461	movq	%rbx, REGOFF_DS(%rsp)
462	movw	%es, %bx
463	movq	%rbx, REGOFF_ES(%rsp)
464	movw	%fs, %bx
465	movq	%rbx, REGOFF_FS(%rsp)
466	movw	%gs, %bx
467	movq	%rbx, REGOFF_GS(%rsp)
468
469	/*
470	 * Machine state saved in the regs structure on the stack
471	 * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
472	 * %eax is the syscall number
473	 * %rsp is the thread's stack, %r15 is curthread
474	 * REG_RSP(%rsp) is the user's stack
475	 */
476
477	SYSCALL_TRAPTRACE($TT_SYSC64)
478
479	movq	%rsp, %rbp
480
481	movq	T_LWP(%r15), %r14
482	ASSERT_NO_RUPDATE_PENDING(%r14)
483	ENABLE_INTR_FLAGS
484
485	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
486	movl	REGOFF_RAX(%rsp), %eax	/* (%rax damaged by mstate call) */
487
488	ASSERT_LWPTOREGS(%r14, %rsp)
489
490	movb	$LWP_SYS, LWP_STATE(%r14)
491	incq	LWP_RU_SYSC(%r14)
492	movb	$NORMALRETURN, LWP_EOSYS(%r14)
493
494	incq	%gs:CPU_STATS_SYS_SYSCALL
495
496	movw	%ax, T_SYSNUM(%r15)
497	movzbl	T_PRE_SYS(%r15), %ebx
498	ORL_SYSCALLTRACE(%ebx)
499	testl	%ebx, %ebx
500	jne	_syscall_pre
501
502_syscall_invoke:
503	movq	REGOFF_RDI(%rbp), %rdi
504	movq	REGOFF_RSI(%rbp), %rsi
505	movq	REGOFF_RDX(%rbp), %rdx
506	movq	REGOFF_RCX(%rbp), %rcx
507	movq	REGOFF_R8(%rbp), %r8
508	movq	REGOFF_R9(%rbp), %r9
509
510	cmpl	$NSYSCALL, %eax
511	jae	_syscall_ill
512	shll	$SYSENT_SIZE_SHIFT, %eax
513	leaq	sysent(%rax), %rbx
514
515	call	*SY_CALLC(%rbx)
516
517	movq	%rax, %r12
518	movq	%rdx, %r13
519
520	/*
521	 * If the handler returns two ints, then we need to split the
522	 * 64-bit return value into two 32-bit values.
523	 */
524	testw	$SE_32RVAL2, SY_FLAGS(%rbx)
525	je	5f
526	movq	%r12, %r13
527	shrq	$32, %r13	/* upper 32-bits into %edx */
528	movl	%r12d, %r12d	/* lower 32-bits into %eax */
5295:
530	/*
531	 * Optimistically assume that there's no post-syscall
532	 * work to do.  (This is to avoid having to call syscall_mstate()
533	 * with interrupts disabled)
534	 */
535	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
536
537	/*
538	 * We must protect ourselves from being descheduled here;
539	 * If we were, and we ended up on another cpu, or another
540	 * lwp got in ahead of us, it could change the segment
541	 * registers without us noticing before we return to userland.
542	 */
543	CLI(%r14)
544	CHECK_POSTSYS_NE(%r15, %r14, %ebx)
545	jne	_syscall_post
546	SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
547
548	movq	%r12, REGOFF_RAX(%rsp)
549	movq	%r13, REGOFF_RDX(%rsp)
550
551	/*
552	 * To get back to userland, we need the return %rip in %rcx and
553	 * the return %rfl in %r11d.  The sysretq instruction also arranges
554	 * to fix up %cs and %ss; everything else is our responsibility.
555	 */
556	movq	REGOFF_RDI(%rsp), %rdi
557	movq	REGOFF_RSI(%rsp), %rsi
558	movq	REGOFF_RDX(%rsp), %rdx
559	/* %rcx used to restore %rip value */
560
561	movq	REGOFF_R8(%rsp), %r8
562	movq	REGOFF_R9(%rsp), %r9
563	movq	REGOFF_RAX(%rsp), %rax
564	movq	REGOFF_RBX(%rsp), %rbx
565
566	movq	REGOFF_RBP(%rsp), %rbp
567	movq	REGOFF_R10(%rsp), %r10
568	/* %r11 used to restore %rfl value */
569	movq	REGOFF_R12(%rsp), %r12
570
571	movq	REGOFF_R13(%rsp), %r13
572	movq	REGOFF_R14(%rsp), %r14
573	movq	REGOFF_R15(%rsp), %r15
574
575	movq	REGOFF_RIP(%rsp), %rcx
576	movl	REGOFF_RFL(%rsp), %r11d
577
578#if defined(__xpv)
579	addq	$REGOFF_RIP, %rsp
580#else
581	movq	REGOFF_RSP(%rsp), %rsp
582#endif
583
584        /*
585         * There can be no instructions between the ALTENTRY below and
586	 * SYSRET or we could end up breaking brand support. See label usage
587         * in sn1_brand_syscall_callback for an example.
588         */
589	ASSERT_UPCALL_MASK_IS_SET
590	SWAPGS				/* user gsbase */
591        ALTENTRY(nopop_sys_syscall_sysretq)
592	SYSRETQ
593        /*NOTREACHED*/
594        SET_SIZE(nopop_sys_syscall_sysretq)
595
596_syscall_pre:
597	call	pre_syscall
598	movl	%eax, %r12d
599	testl	%eax, %eax
600	jne	_syscall_post_call
601	/*
602	 * Didn't abort, so reload the syscall args and invoke the handler.
603	 */
604	movzwl	T_SYSNUM(%r15), %eax
605	jmp	_syscall_invoke
606
607_syscall_ill:
608	call	nosys
609	movq	%rax, %r12
610	movq	%rdx, %r13
611	jmp	_syscall_post_call
612
613_syscall_post:
614	STI
615	/*
616	 * Sigh, our optimism wasn't justified, put it back to LMS_SYSTEM
617	 * so that we can account for the extra work it takes us to finish.
618	 */
619	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
620_syscall_post_call:
621	movq	%r12, %rdi
622	movq	%r13, %rsi
623	call	post_syscall
624	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
625	jmp	_sys_rtt
626	SET_SIZE(sys_syscall)
627	SET_SIZE(brand_sys_syscall)
628
629#endif	/* __lint */
630
631#if defined(__lint)
632
633/*ARGSUSED*/
634void
635sys_syscall32()
636{}
637
638#else	/* __lint */
639
640	ENTRY_NP(brand_sys_syscall32)
641	SWAPGS				/* kernel gsbase */
642	XPV_TRAP_POP
643	BRAND_CALLBACK(BRAND_CB_SYSCALL32)
644	SWAPGS				/* user gsbase */
645
646#if defined(__xpv)
647	jmp	nopop_sys_syscall32
648#endif
649
650	ALTENTRY(sys_syscall32)
651	SWAPGS				/* kernel gsbase */
652
653#if defined(__xpv)
654	XPV_TRAP_POP
655nopop_sys_syscall32:
656#endif
657
658	movl	%esp, %r10d
659	movq	%gs:CPU_THREAD, %r15
660	movq	T_STACK(%r15), %rsp
661	movl	%eax, %eax
662
663	movl	$U32CS_SEL, REGOFF_CS(%rsp)
664	movl	%ecx, REGOFF_RIP(%rsp)		/* syscall: %rip -> %rcx */
665	movq	%r11, REGOFF_RFL(%rsp)		/* syscall: %rfl -> %r11d */
666	movq	%r10, REGOFF_RSP(%rsp)
667	movl	$UDS_SEL, REGOFF_SS(%rsp)
668
669_syscall32_save:
670	movl	%edi, REGOFF_RDI(%rsp)
671	movl	%esi, REGOFF_RSI(%rsp)
672	movl	%ebp, REGOFF_RBP(%rsp)
673	movl	%ebx, REGOFF_RBX(%rsp)
674	movl	%edx, REGOFF_RDX(%rsp)
675	movl	%ecx, REGOFF_RCX(%rsp)
676	movl	%eax, REGOFF_RAX(%rsp)		/* wrapper: sysc# -> %eax */
677	movq	$0, REGOFF_SAVFP(%rsp)
678	movq	$0, REGOFF_SAVPC(%rsp)
679
680	/*
681	 * Copy these registers here in case we end up stopped with
682	 * someone (like, say, /proc) messing with our register state.
683	 * We don't -restore- them unless we have to in update_sregs.
684	 *
685	 * Since userland -can't- change fsbase or gsbase directly,
686	 * we don't bother to capture them here.
687	 */
688	xorl	%ebx, %ebx
689	movw	%ds, %bx
690	movq	%rbx, REGOFF_DS(%rsp)
691	movw	%es, %bx
692	movq	%rbx, REGOFF_ES(%rsp)
693	movw	%fs, %bx
694	movq	%rbx, REGOFF_FS(%rsp)
695	movw	%gs, %bx
696	movq	%rbx, REGOFF_GS(%rsp)
697
698	/*
699	 * Application state saved in the regs structure on the stack
700	 * %eax is the syscall number
701	 * %rsp is the thread's stack, %r15 is curthread
702	 * REG_RSP(%rsp) is the user's stack
703	 */
704
705	SYSCALL_TRAPTRACE32($TT_SYSC)
706
707	movq	%rsp, %rbp
708
709	movq	T_LWP(%r15), %r14
710	ASSERT_NO_RUPDATE_PENDING(%r14)
711
712	ENABLE_INTR_FLAGS
713
714	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
715	movl	REGOFF_RAX(%rsp), %eax	/* (%rax damaged by mstate call) */
716
717	ASSERT_LWPTOREGS(%r14, %rsp)
718
719	incq	 %gs:CPU_STATS_SYS_SYSCALL
720
721	/*
722	 * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
723	 * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
724	 * more succinctly:
725	 *
726	 *	SA(MAXSYSARGS * sizeof (long)) == 64
727	 */
728#define	SYS_DROP	64			/* drop for args */
729	subq	$SYS_DROP, %rsp
730	movb	$LWP_SYS, LWP_STATE(%r14)
731	movq	%r15, %rdi
732	movq	%rsp, %rsi
733	call	syscall_entry
734
735	/*
736	 * Fetch the arguments copied onto the kernel stack and put
737	 * them in the right registers to invoke a C-style syscall handler.
738	 * %rax contains the handler address.
739	 *
740	 * Ideas for making all this go faster of course include simply
741	 * forcibly fetching 6 arguments from the user stack under lofault
742	 * protection, reverting to copyin_args only when watchpoints
743	 * are in effect.
744	 *
745	 * (If we do this, make sure that exec and libthread leave
746	 * enough space at the top of the stack to ensure that we'll
747	 * never do a fetch from an invalid page.)
748	 *
749	 * Lots of ideas here, but they won't really help with bringup B-)
750	 * Correctness can't wait, performance can wait a little longer ..
751	 */
752
753	movq	%rax, %rbx
754	movl	0(%rsp), %edi
755	movl	8(%rsp), %esi
756	movl	0x10(%rsp), %edx
757	movl	0x18(%rsp), %ecx
758	movl	0x20(%rsp), %r8d
759	movl	0x28(%rsp), %r9d
760
761	call	*SY_CALLC(%rbx)
762
763	movq	%rbp, %rsp	/* pop the args */
764
765	/*
766	 * amd64 syscall handlers -always- return a 64-bit value in %rax.
767	 * On the 32-bit kernel, they always return that value in %eax:%edx
768	 * as required by the 32-bit ABI.
769	 *
770	 * Simulate the same behaviour by unconditionally splitting the
771	 * return value in the same way.
772	 */
773	movq	%rax, %r13
774	shrq	$32, %r13	/* upper 32-bits into %edx */
775	movl	%eax, %r12d	/* lower 32-bits into %eax */
776
777	/*
778	 * Optimistically assume that there's no post-syscall
779	 * work to do.  (This is to avoid having to call syscall_mstate()
780	 * with interrupts disabled)
781	 */
782	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
783
784	/*
785	 * We must protect ourselves from being descheduled here;
786	 * If we were, and we ended up on another cpu, or another
787	 * lwp got in ahead of us, it could change the segment
788	 * registers without us noticing before we return to userland.
789	 */
790	CLI(%r14)
791	CHECK_POSTSYS_NE(%r15, %r14, %ebx)
792	jne	_full_syscall_postsys32
793	SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
794
795	/*
796	 * To get back to userland, we need to put the return %rip in %rcx and
797	 * the return %rfl in %r11d.  The sysret instruction also arranges
798	 * to fix up %cs and %ss; everything else is our responsibility.
799	 */
800
801	movl	%r12d, %eax			/* %eax: rval1 */
802	movl	REGOFF_RBX(%rsp), %ebx
803	/* %ecx used for return pointer */
804	movl	%r13d, %edx			/* %edx: rval2 */
805	movl	REGOFF_RBP(%rsp), %ebp
806	movl	REGOFF_RSI(%rsp), %esi
807	movl	REGOFF_RDI(%rsp), %edi
808
809	movl	REGOFF_RFL(%rsp), %r11d		/* %r11 -> eflags */
810	movl	REGOFF_RIP(%rsp), %ecx		/* %ecx -> %eip */
811	movl	REGOFF_RSP(%rsp), %esp
812
813	ASSERT_UPCALL_MASK_IS_SET
814	SWAPGS				/* user gsbase */
815        ALTENTRY(nopop_sys_syscall32_sysretl)
816	SYSRETL
817        SET_SIZE(nopop_sys_syscall32_sysretl)
818	/*NOTREACHED*/
819
820_full_syscall_postsys32:
821	STI
822	/*
823	 * Sigh, our optimism wasn't justified, put it back to LMS_SYSTEM
824	 * so that we can account for the extra work it takes us to finish.
825	 */
826	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
827	movq	%r15, %rdi
828	movq	%r12, %rsi			/* rval1 - %eax */
829	movq	%r13, %rdx			/* rval2 - %edx */
830	call	syscall_exit
831	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
832	jmp	_sys_rtt
833	SET_SIZE(sys_syscall32)
834	SET_SIZE(brand_sys_syscall32)
835
836#endif	/* __lint */
837
838/*
839 * System call handler via the sysenter instruction
840 * Used only for 32-bit system calls on the 64-bit kernel.
841 *
842 * The caller in userland has arranged that:
843 *
844 * -	%eax contains the syscall number
845 * -	%ecx contains the user %esp
846 * -	%edx contains the return %eip
847 * -	the user stack contains the args to the syscall
848 *
849 * Hardware and (privileged) initialization code have arranged that by
850 * the time the sysenter instructions completes:
851 *
852 * - %rip is pointing to sys_sysenter (below).
853 * - %cs and %ss are set to kernel text and stack (data) selectors.
854 * - %rsp is pointing at the lwp's stack
855 * - interrupts have been disabled.
856 *
857 * Note that we are unable to return both "rvals" to userland with
858 * this call, as %edx is used by the sysexit instruction.
859 *
860 * One final complication in this routine is its interaction with
861 * single-stepping in a debugger.  For most of the system call mechanisms,
862 * the CPU automatically clears the single-step flag before we enter the
863 * kernel.  The sysenter mechanism does not clear the flag, so a user
864 * single-stepping through a libc routine may suddenly find him/herself
865 * single-stepping through the kernel.  To detect this, kmdb compares the
866 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
867 * If it finds that we have single-stepped to a sysenter entry point, it
868 * explicitly clears the flag and executes the sys_sysenter routine.
869 *
870 * One final complication in this final complication is the fact that we
871 * have two different entry points for sysenter: brand_sys_sysenter and
872 * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
873 * through the kernel with kmdb, we will eventually hit the instruction at
874 * sys_sysenter.  kmdb cannot distinguish between that valid single-step
875 * and the undesirable one mentioned above.  To avoid this situation, we
876 * simply add a jump over the instruction at sys_sysenter to make it
877 * impossible to single-step to it.
878 */
879#if defined(__lint)
880
881void
882sys_sysenter()
883{}
884
885#else	/* __lint */
886
887	ENTRY_NP(brand_sys_sysenter)
888	SWAPGS				/* kernel gsbase */
889	ALTENTRY(_brand_sys_sysenter_post_swapgs)
890	BRAND_CALLBACK(BRAND_CB_SYSENTER)
891	/*
892	 * Jump over sys_sysenter to allow single-stepping as described
893	 * above.
894	 */
895	jmp	_sys_sysenter_post_swapgs
896
897	ALTENTRY(sys_sysenter)
898	SWAPGS				/* kernel gsbase */
899
900	ALTENTRY(_sys_sysenter_post_swapgs)
901	movq	%gs:CPU_THREAD, %r15
902
903	movl	$U32CS_SEL, REGOFF_CS(%rsp)
904	movl	%ecx, REGOFF_RSP(%rsp)		/* wrapper: %esp -> %ecx */
905	movl	%edx, REGOFF_RIP(%rsp)		/* wrapper: %eip -> %edx */
906	pushfq
907	popq	%r10
908	movl	$UDS_SEL, REGOFF_SS(%rsp)
909
910	/*
911	 * Set the interrupt flag before storing the flags to the
912	 * flags image on the stack so we can return to user with
913	 * interrupts enabled if we return via sys_rtt_syscall32
914	 */
915	orq	$PS_IE, %r10
916	movq	%r10, REGOFF_RFL(%rsp)
917
918	movl	%edi, REGOFF_RDI(%rsp)
919	movl	%esi, REGOFF_RSI(%rsp)
920	movl	%ebp, REGOFF_RBP(%rsp)
921	movl	%ebx, REGOFF_RBX(%rsp)
922	movl	%edx, REGOFF_RDX(%rsp)
923	movl	%ecx, REGOFF_RCX(%rsp)
924	movl	%eax, REGOFF_RAX(%rsp)		/* wrapper: sysc# -> %eax */
925	movq	$0, REGOFF_SAVFP(%rsp)
926	movq	$0, REGOFF_SAVPC(%rsp)
927
928	/*
929	 * Copy these registers here in case we end up stopped with
930	 * someone (like, say, /proc) messing with our register state.
931	 * We don't -restore- them unless we have to in update_sregs.
932	 *
933	 * Since userland -can't- change fsbase or gsbase directly,
934	 * we don't bother to capture them here.
935	 */
936	xorl	%ebx, %ebx
937	movw	%ds, %bx
938	movq	%rbx, REGOFF_DS(%rsp)
939	movw	%es, %bx
940	movq	%rbx, REGOFF_ES(%rsp)
941	movw	%fs, %bx
942	movq	%rbx, REGOFF_FS(%rsp)
943	movw	%gs, %bx
944	movq	%rbx, REGOFF_GS(%rsp)
945
946	/*
947	 * Application state saved in the regs structure on the stack
948	 * %eax is the syscall number
949	 * %rsp is the thread's stack, %r15 is curthread
950	 * REG_RSP(%rsp) is the user's stack
951	 */
952
953	SYSCALL_TRAPTRACE($TT_SYSENTER)
954
955	movq	%rsp, %rbp
956
957	movq	T_LWP(%r15), %r14
958	ASSERT_NO_RUPDATE_PENDING(%r14)
959
960	ENABLE_INTR_FLAGS
961
962	/*
963	 * Catch 64-bit process trying to issue sysenter instruction
964	 * on Nocona based systems.
965	 */
966	movq	LWP_PROCP(%r14), %rax
967	cmpq	$DATAMODEL_ILP32, P_MODEL(%rax)
968	je	7f
969
970	/*
971	 * For a non-32-bit process, simulate a #ud, since that's what
972	 * native hardware does.  The traptrace entry (above) will
973	 * let you know what really happened.
974	 */
975	movq	$T_ILLINST, REGOFF_TRAPNO(%rsp)
976	movq	REGOFF_CS(%rsp), %rdi
977	movq	%rdi, REGOFF_ERR(%rsp)
978	movq	%rsp, %rdi
979	movq	REGOFF_RIP(%rsp), %rsi
980	movl	%gs:CPU_ID, %edx
981	call	trap
982	jmp	_sys_rtt
9837:
984
985	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
986	movl	REGOFF_RAX(%rsp), %eax	/* (%rax damaged by mstate calls) */
987
988	ASSERT_LWPTOREGS(%r14, %rsp)
989
990	incq	%gs:CPU_STATS_SYS_SYSCALL
991
992	/*
993	 * Make some space for MAXSYSARGS (currently 8) 32-bit args
994	 * placed into 64-bit (long) arg slots, plus one 64-bit
995	 * (long) arg count, maintaining 16 byte alignment.
996	 */
997	subq	$SYS_DROP, %rsp
998	movb	$LWP_SYS, LWP_STATE(%r14)
999	movq	%r15, %rdi
1000	movq	%rsp, %rsi
1001	call	syscall_entry
1002
1003	/*
1004	 * Fetch the arguments copied onto the kernel stack and put
1005	 * them in the right registers to invoke a C-style syscall handler.
1006	 * %rax contains the handler address.
1007	 */
1008	movq	%rax, %rbx
1009	movl	0(%rsp), %edi
1010	movl	8(%rsp), %esi
1011	movl	0x10(%rsp), %edx
1012	movl	0x18(%rsp), %ecx
1013	movl	0x20(%rsp), %r8d
1014	movl	0x28(%rsp), %r9d
1015
1016	call	*SY_CALLC(%rbx)
1017
1018	movq	%rbp, %rsp	/* pop the args */
1019
1020	/*
1021	 * amd64 syscall handlers -always- return a 64-bit value in %rax.
1022	 * On the 32-bit kernel, the always return that value in %eax:%edx
1023	 * as required by the 32-bit ABI.
1024	 *
1025	 * Simulate the same behaviour by unconditionally splitting the
1026	 * return value in the same way.
1027	 */
1028	movq	%rax, %r13
1029	shrq	$32, %r13	/* upper 32-bits into %edx */
1030	movl	%eax, %r12d	/* lower 32-bits into %eax */
1031
1032	/*
1033	 * Optimistically assume that there's no post-syscall
1034	 * work to do.  (This is to avoid having to call syscall_mstate()
1035	 * with interrupts disabled)
1036	 */
1037	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
1038
1039	/*
1040	 * We must protect ourselves from being descheduled here;
1041	 * If we were, and we ended up on another cpu, or another
1042	 * lwp got int ahead of us, it could change the segment
1043	 * registers without us noticing before we return to userland.
1044	 */
1045	cli
1046	CHECK_POSTSYS_NE(%r15, %r14, %ebx)
1047	jne	_full_syscall_postsys32
1048	SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
1049
1050	/*
1051	 * To get back to userland, load up the 32-bit registers and
1052	 * sysexit back where we came from.
1053	 */
1054
1055	/*
1056	 * Interrupts will be turned on by the 'sti' executed just before
1057	 * sysexit.  The following ensures that restoring the user's rflags
1058	 * doesn't enable interrupts too soon.
1059	 */
1060	andq	$_BITNOT(PS_IE), REGOFF_RFL(%rsp)
1061
1062	/*
1063	 * (There's no point in loading up %edx because the sysexit
1064	 * mechanism smashes it.)
1065	 */
1066	movl	%r12d, %eax
1067	movl	REGOFF_RBX(%rsp), %ebx
1068	movl	REGOFF_RBP(%rsp), %ebp
1069	movl	REGOFF_RSI(%rsp), %esi
1070	movl	REGOFF_RDI(%rsp), %edi
1071
1072	movl	REGOFF_RIP(%rsp), %edx	/* sysexit: %edx -> %eip */
1073	pushq	REGOFF_RFL(%rsp)
1074	popfq
1075	movl	REGOFF_RSP(%rsp), %ecx	/* sysexit: %ecx -> %esp */
1076	swapgs
1077	sti
1078	sysexit
1079	SET_SIZE(sys_sysenter)
1080	SET_SIZE(_sys_sysenter_post_swapgs)
1081	SET_SIZE(brand_sys_sysenter)
1082
1083#endif	/* __lint */
1084
1085#if defined(__lint)
1086/*
1087 * System call via an int80.  This entry point is only used by the Linux
1088 * application environment.  Unlike the other entry points, there is no
1089 * default action to take if no callback is registered for this process.
1090 */
1091void
1092sys_int80()
1093{}
1094
1095#else	/* __lint */
1096
1097	ENTRY_NP(brand_sys_int80)
1098	SWAPGS				/* kernel gsbase */
1099	XPV_TRAP_POP
1100	BRAND_CALLBACK(BRAND_CB_INT80)
1101	SWAPGS				/* user gsbase */
1102#if defined(__xpv)
1103	jmp	nopop_int80
1104#endif
1105
1106	ENTRY_NP(sys_int80)
1107	/*
1108	 * We hit an int80, but this process isn't of a brand with an int80
1109	 * handler.  Bad process!  Make it look as if the INT failed.
1110	 * Modify %rip to point before the INT, push the expected error
1111	 * code and fake a GP fault. Note on 64-bit hypervisor we need
1112	 * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1113	 * because gptrap will pop them again with its own XPV_TRAP_POP.
1114	 */
1115#if defined(__xpv)
1116	XPV_TRAP_POP
1117nopop_int80:
1118#endif
1119	subq	$2, (%rsp)	/* int insn 2-bytes */
1120	pushq	$_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1121#if defined(__xpv)
1122	push	%r11
1123	push	%rcx
1124#endif
1125	jmp	gptrap			/ GP fault
1126	SET_SIZE(sys_int80)
1127	SET_SIZE(brand_sys_int80)
1128#endif	/* __lint */
1129
1130
1131/*
1132 * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1133 * the generic i386 libc to do system calls. We do a small amount of setup
1134 * before jumping into the existing sys_syscall32 path.
1135 */
1136#if defined(__lint)
1137
1138/*ARGSUSED*/
1139void
1140sys_syscall_int()
1141{}
1142
1143#else	/* __lint */
1144
1145	ENTRY_NP(brand_sys_syscall_int)
1146	SWAPGS				/* kernel gsbase */
1147	XPV_TRAP_POP
1148	BRAND_CALLBACK(BRAND_CB_INT91)
1149	SWAPGS				/* user gsbase */
1150
1151#if defined(__xpv)
1152	jmp	nopop_syscall_int
1153#endif
1154
1155	ALTENTRY(sys_syscall_int)
1156	SWAPGS				/* kernel gsbase */
1157
1158#if defined(__xpv)
1159	XPV_TRAP_POP
1160nopop_syscall_int:
1161#endif
1162
1163	movq	%gs:CPU_THREAD, %r15
1164	movq	T_STACK(%r15), %rsp
1165	movl	%eax, %eax
1166	/*
1167	 * Set t_post_sys on this thread to force ourselves out via the slow
1168	 * path. It might be possible at some later date to optimize this out
1169	 * and use a faster return mechanism.
1170	 */
1171	movb	$1, T_POST_SYS(%r15)
1172	CLEAN_CS
1173	jmp	_syscall32_save
1174	SET_SIZE(sys_syscall_int)
1175	SET_SIZE(brand_sys_syscall_int)
1176
1177#endif	/* __lint */
1178
1179/*
1180 * Legacy 32-bit applications and old libc implementations do lcalls;
1181 * we should never get here because the LDT entry containing the syscall
1182 * segment descriptor has the "segment present" bit cleared, which means
1183 * we end up processing those system calls in trap() via a not-present trap.
1184 *
1185 * We do it this way because a call gate unhelpfully does -nothing- to the
1186 * interrupt flag bit, so an interrupt can run us just after the lcall
1187 * completes, but just before the swapgs takes effect.   Thus the INTR_PUSH and
1188 * INTR_POP paths would have to be slightly more complex to dance around
1189 * this problem, and end up depending explicitly on the first
1190 * instruction of this handler being either swapgs or cli.
1191 */
1192
1193#if defined(__lint)
1194
1195/*ARGSUSED*/
1196void
1197sys_lcall32()
1198{}
1199
1200#else	/* __lint */
1201
1202	ENTRY_NP(sys_lcall32)
1203	SWAPGS				/* kernel gsbase */
1204	pushq	$0
1205	pushq	%rbp
1206	movq	%rsp, %rbp
1207	leaq	__lcall_panic_str(%rip), %rdi
1208	xorl	%eax, %eax
1209	call	panic
1210	SET_SIZE(sys_lcall32)
1211
1212__lcall_panic_str:
1213	.string	"sys_lcall32: shouldn't be here!"
1214
1215/*
1216 * Declare a uintptr_t which covers the entire pc range of syscall
1217 * handlers for the stack walkers that need this.
1218 */
1219	.align	CPTRSIZE
1220	.globl	_allsyscalls_size
1221	.type	_allsyscalls_size, @object
1222_allsyscalls_size:
1223	.NWORD	. - _allsyscalls
1224	SET_SIZE(_allsyscalls_size)
1225
1226#endif	/* __lint */
1227
1228/*
1229 * These are the thread context handlers for lwps using sysenter/sysexit.
1230 */
1231
1232#if defined(__lint)
1233
1234/*ARGSUSED*/
1235void
1236sep_save(void *ksp)
1237{}
1238
1239/*ARGSUSED*/
1240void
1241sep_restore(void *ksp)
1242{}
1243
1244#else	/* __lint */
1245
1246	/*
1247	 * setting this value to zero as we switch away causes the
1248	 * stack-pointer-on-sysenter to be NULL, ensuring that we
1249	 * don't silently corrupt another (preempted) thread stack
1250	 * when running an lwp that (somehow) didn't get sep_restore'd
1251	 */
1252	ENTRY_NP(sep_save)
1253	xorl	%edx, %edx
1254	xorl	%eax, %eax
1255	movl	$MSR_INTC_SEP_ESP, %ecx
1256	wrmsr
1257	ret
1258	SET_SIZE(sep_save)
1259
1260	/*
1261	 * Update the kernel stack pointer as we resume onto this cpu.
1262	 */
1263	ENTRY_NP(sep_restore)
1264	movq	%rdi, %rdx
1265	shrq	$32, %rdx
1266	movl	%edi, %eax
1267	movl	$MSR_INTC_SEP_ESP, %ecx
1268	wrmsr
1269	ret
1270	SET_SIZE(sep_restore)
1271
1272#endif	/* __lint */
1273