xref: /titanic_41/usr/src/uts/i86pc/ml/syscall_asm.s (revision 744947dc83c634d985ed3ad79ac9c5e28d1865fd)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
27/*	  All Rights Reserved					*/
28
29/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
30/*	  All Rights Reserved					*/
31
32#include <sys/asm_linkage.h>
33#include <sys/asm_misc.h>
34#include <sys/regset.h>
35#include <sys/psw.h>
36#include <sys/x86_archext.h>
37#include <sys/machbrand.h>
38#include <sys/privregs.h>
39
40#if defined(__lint)
41
42#include <sys/types.h>
43#include <sys/thread.h>
44#include <sys/systm.h>
45
46#else	/* __lint */
47
48#include <sys/segments.h>
49#include <sys/pcb.h>
50#include <sys/trap.h>
51#include <sys/ftrace.h>
52#include <sys/traptrace.h>
53#include <sys/clock.h>
54#include <sys/panic.h>
55#include "assym.h"
56
57#endif	/* __lint */
58
59/*
60 * We implement two flavours of system call entry points
61 *
62 * -	{int,lcall}/iret	(i386)
63 * -	sysenter/sysexit	(Pentium II and beyond)
64 *
65 * The basic pattern used in the handlers is to check to see if we can
66 * do fast (simple) version of the system call; if we can't we use various
67 * C routines that handle corner cases and debugging.
68 *
69 * To reduce the amount of assembler replication, yet keep the system call
70 * implementations vaguely comprehensible, the common code in the body
71 * of the handlers is broken up into a set of preprocessor definitions
72 * below.
73 */
74
75/*
76 * When we have SYSCALLTRACE defined, we sneak an extra
77 * predicate into a couple of tests.
78 */
79#if defined(SYSCALLTRACE)
80#define	ORL_SYSCALLTRACE(r32)	\
81	orl	syscalltrace, r32
82#else
83#define	ORL_SYSCALLTRACE(r32)
84#endif
85
86/*
87 * This check is false whenever we want to go fast i.e.
88 *
89 *	if (code >= NSYSCALL ||
90 *	    t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
91 *		do full version
92 * #ifdef SYSCALLTRACE
93 *	if (syscalltrace)
94 *		do full version
95 * #endif
96 *
97 * Preconditions:
98 * -	t	curthread
99 * -	code	contains the syscall number
100 * Postconditions:
101 * -	%ecx and %edi are smashed
102 * -	condition code flag ZF is cleared if pre-sys is too complex
103 */
104#define	CHECK_PRESYS_NE(t, code)		\
105	movzbl	T_PRE_SYS(t), %edi;		\
106	movzwl	T_PROC_FLAG(t), %ecx;		\
107	andl	$TP_WATCHPT, %ecx;		\
108	orl	%ecx, %edi;			\
109	cmpl	$NSYSCALL, code;		\
110	setae	%cl;				\
111	movzbl	%cl, %ecx;			\
112	orl	%ecx, %edi;			\
113	ORL_SYSCALLTRACE(%edi)
114
115/*
116 * Check if a brand_mach_ops callback is defined for the specified callback_id
117 * type.  If so invoke it with the user's %gs value loaded and the following
118 * data on the stack:
119 *	   --------------------------------------
120 *         | user's %ss                         |
121 *    |    | user's %esp                        |
122 *    |    | EFLAGS register                    |
123 *    |    | user's %cs                         |
124 *    |    | user's %eip (user return address)  |
125 *    |    | 'scratch space'			|
126 *    |    | user's %ebx			|
127 *    |    | user's %gs selector		|
128 *    v    | lwp pointer			|
129 *         | callback wrapper return addr 	|
130 *         --------------------------------------
131 *
132 * If the brand code returns, we assume that we are meant to execute the
133 * normal system call path.
134 *
135 * The interface to the brand callbacks on the 32-bit kernel assumes %ebx
136 * is available as a scratch register within the callback.  If the callback
137 * returns within the kernel then this macro will restore %ebx.  If the
138 * callback is going to return directly to userland then it should restore
139 * %ebx before returning to userland.
140 */
141#define	BRAND_CALLBACK(callback_id)					    \
142	subl	$4, %esp		/* save some scratch space	*/ ;\
143	pushl	%ebx			/* save %ebx to use for scratch	*/ ;\
144	pushl	%gs			/* save the user %gs		*/ ;\
145	movl	$KGS_SEL, %ebx						   ;\
146	movw	%bx, %gs		/* switch to the kernel's %gs	*/ ;\
147	movl	%gs:CPU_THREAD, %ebx	/* load the thread pointer	*/ ;\
148	movl	T_LWP(%ebx), %ebx	/* load the lwp pointer		*/ ;\
149	pushl	%ebx			/* push the lwp pointer		*/ ;\
150	movl	LWP_PROCP(%ebx), %ebx	/* load the proc pointer	*/ ;\
151	movl	P_BRAND(%ebx), %ebx	/* load the brand pointer	*/ ;\
152	movl	B_MACHOPS(%ebx), %ebx	/* load the machops pointer	*/ ;\
153	movl	_CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx		   ;\
154	cmpl	$0, %ebx						   ;\
155	je	1f							   ;\
156	movl	%ebx, 12(%esp)		/* save callback to scratch	*/ ;\
157	movl	4(%esp), %ebx		/* grab the user %gs		*/ ;\
158	movw	%bx, %gs		/* restore the user %gs		*/ ;\
159	call	*12(%esp)		/* call callback in scratch	*/ ;\
1601:	movl	4(%esp), %ebx		/* restore user %gs (re-do if	*/ ;\
161	movw	%bx, %gs		/* branch due to no callback)	*/ ;\
162	movl	8(%esp), %ebx		/* restore user's %ebx		*/ ;\
163	addl	$16, %esp		/* restore stack ptr		*/
164
165#define	MSTATE_TRANSITION(from, to)		\
166	pushl	$to;				\
167	pushl	$from;				\
168	call	syscall_mstate;			\
169	addl	$0x8, %esp
170
171/*
172 * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
173 * This must be called with interrupts or preemption disabled.
174 */
175#define	CPU_STATS_SYS_SYSCALL_INC			\
176	addl	$1, %gs:CPU_STATS_SYS_SYSCALL;		\
177	adcl	$0, %gs:CPU_STATS_SYS_SYSCALL+4;
178
179#if !defined(__lint)
180
181/*
182 * ASSERT(lwptoregs(lwp) == rp);
183 *
184 * this may seem obvious, but very odd things happen if this
185 * assertion is false
186 *
187 * Preconditions:
188 *	-none-
189 * Postconditions (if assertion is true):
190 *	%esi and %edi are smashed
191 */
192#if defined(DEBUG)
193
194__lwptoregs_msg:
195	.string	"syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
196
197#define	ASSERT_LWPTOREGS(t, rp)				\
198	movl	T_LWP(t), %esi;				\
199	movl	LWP_REGS(%esi), %edi;			\
200	cmpl	rp, %edi;				\
201	je	7f;					\
202	pushl	rp;					\
203	pushl	%edi;					\
204	pushl	%esi;					\
205	pushl	$__LINE__;				\
206	pushl	$__lwptoregs_msg;			\
207	call	panic;					\
2087:
209#else
210#define	ASSERT_LWPTOREGS(t, rp)
211#endif
212
213#endif	/* __lint */
214
215/*
216 * This is an assembler version of this fragment:
217 *
218 * lwp->lwp_state = LWP_SYS;
219 * lwp->lwp_ru.sysc++;
220 * lwp->lwp_eosys = NORMALRETURN;
221 * lwp->lwp_ap = argp;
222 *
223 * Preconditions:
224 *	-none-
225 * Postconditions:
226 *	-none-
227 */
228#define	SET_LWP(lwp, argp)				\
229	movb	$LWP_SYS, LWP_STATE(lwp);		\
230	addl	$1, LWP_RU_SYSC(lwp);			\
231	adcl	$0, LWP_RU_SYSC+4(lwp);			\
232	movb	$NORMALRETURN, LWP_EOSYS(lwp);		\
233	movl	argp, LWP_AP(lwp)
234
235/*
236 * Set up the thread, lwp, find the handler, and copy
237 * in the arguments from userland to the kernel stack.
238 *
239 * Preconditions:
240 * -	%eax contains the syscall number
241 * Postconditions:
242 * -	%eax contains a pointer to the sysent structure
243 * -	%ecx is zeroed
244 * -	%esi, %edi are smashed
245 * -	%esp is SYS_DROPped ready for the syscall
246 */
247#define	SIMPLE_SYSCALL_PRESYS(t, faultlabel)		\
248	movl	T_LWP(t), %esi;				\
249	movw	%ax, T_SYSNUM(t);			\
250	subl	$SYS_DROP, %esp;			\
251	shll	$SYSENT_SIZE_SHIFT, %eax;			\
252	SET_LWP(%esi, %esp);				\
253	leal	sysent(%eax), %eax;			\
254	movzbl	SY_NARG(%eax), %ecx;			\
255	testl	%ecx, %ecx;				\
256	jz	4f;					\
257	movl	%esp, %edi;				\
258	movl	SYS_DROP + REGOFF_UESP(%esp), %esi;	\
259	movl	$faultlabel, T_LOFAULT(t);		\
260	addl	$4, %esi;				\
261	rep;						\
262	  smovl;					\
263	movl	%ecx, T_LOFAULT(t);			\
2644:
265
266/*
267 * Check to see if a simple return is possible i.e.
268 *
269 *	if ((t->t_post_sys_ast | syscalltrace) != 0)
270 *		do full version;
271 *
272 * Preconditions:
273 * -	t is curthread
274 * Postconditions:
275 * -	condition code NE is set if post-sys is too complex
276 * -	rtmp is zeroed if it isn't (we rely on this!)
277 */
278#define	CHECK_POSTSYS_NE(t, rtmp)			\
279	xorl	rtmp, rtmp;				\
280	ORL_SYSCALLTRACE(rtmp);				\
281	orl	T_POST_SYS_AST(t), rtmp;		\
282	cmpl	$0, rtmp
283
284/*
285 * Fix up the lwp, thread, and eflags for a successful return
286 *
287 * Preconditions:
288 * -	zwreg contains zero
289 * Postconditions:
290 * -	%esp has been unSYS_DROPped
291 * -	%esi is smashed (points to lwp)
292 */
293#define	SIMPLE_SYSCALL_POSTSYS(t, zwreg)		\
294	movl	T_LWP(t), %esi;				\
295	addl	$SYS_DROP, %esp;			\
296	movw	zwreg, T_SYSNUM(t);			\
297	movb	$LWP_USER, LWP_STATE(%esi);		\
298	andb	$_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
299
300/*
301 * System call handler.  This is the destination of both the call
302 * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
303 * there are two significant differences between an interrupt gate and a call
304 * gate:
305 *
306 * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
307 * call gate runs the handler with whatever EFLAGS settings were in effect at
308 * the time of the call.
309 *
310 * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
311 * of the interrupt onto the stack, whereas a call gate does not.
312 *
313 * Because we use the following code sequence to handle system calls made from
314 * _both_ a call gate _and_ an interrupt gate, these two differences must be
315 * respected. In regards to number 1) above, the handler must ensure that a sane
316 * EFLAGS snapshot is stored on the stack so that when the kernel returns back
317 * to the user via iret (which returns to user with the EFLAGS value saved on
318 * the stack), interrupts are re-enabled.
319 *
320 * In regards to number 2) above, the handler must always put a current snapshot
321 * of EFLAGS onto the stack in the appropriate place. If we came in via an
322 * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
323 * the interrupt gate. This is OK, as the only bit that was changed by the
324 * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
325 * now off. If we were to do nothing, the stack would contain an EFLAGS with
326 * IE off, resulting in us eventually returning back to the user with interrupts
327 * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
328 * the stack.
329 *
330 * Another subtlety which deserves mention is the difference between the two
331 * descriptors. The call gate descriptor is set to instruct the hardware to copy
332 * one parameter from the user stack to the kernel stack, whereas the interrupt
333 * gate descriptor doesn't use the parameter passing mechanism at all. The
334 * kernel doesn't actually use the parameter that is copied by the hardware; the
335 * only reason it does this is so that there is a space on the stack large
336 * enough to hold an EFLAGS register value, which happens to be in the correct
337 * place for use by iret when we go back to userland. How convenient.
338 *
339 * Stack frame description in syscall() and callees.
340 *
341 * |------------|
342 * | regs	| +(8*4)+4	registers
343 * |------------|
344 * | 8 args	| <- %esp	MAXSYSARGS (currently 8) arguments
345 * |------------|
346 *
347 */
348#define	SYS_DROP	_CONST(_MUL(MAXSYSARGS, 4))
349
350#if defined(__lint)
351
352/*ARGSUSED*/
353void
354sys_call()
355{}
356
357void
358_allsyscalls()
359{}
360
361size_t _allsyscalls_size;
362
363#else	/* __lint */
364
365	ENTRY_NP2(brand_sys_call, _allsyscalls)
366	BRAND_CALLBACK(BRAND_CB_SYSCALL)
367
368	ALTENTRY(sys_call)
369	/ on entry	eax = system call number
370
371	/ set up the stack to look as in reg.h
372	subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
373
374	SYSCALL_PUSH
375
376#ifdef TRAPTRACE
377	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
378	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/ Uses label "9"
379	pushl	%eax
380	TRACE_STAMP(%edi)		/ Clobbers %eax, %edx, uses "9"
381	popl	%eax
382	movl	%eax, TTR_SYSNUM(%edi)
383#endif
384
385_watch_do_syscall:
386	movl	%esp, %ebp
387
388	/ Interrupts may be enabled here, so we must make sure this thread
389	/ doesn't migrate off the CPU while it updates the CPU stats.
390	/
391	/ XXX This is only true if we got here via call gate thru the LDT for
392	/ old style syscalls. Perhaps this preempt++-- will go away soon?
393	movl	%gs:CPU_THREAD, %ebx
394	addb	$1, T_PREEMPT(%ebx)
395	CPU_STATS_SYS_SYSCALL_INC
396	subb	$1, T_PREEMPT(%ebx)
397
398	ENABLE_INTR_FLAGS
399
400	pushl	%eax				/ preserve across mstate call
401	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
402	popl	%eax
403
404	movl	%gs:CPU_THREAD, %ebx
405
406	ASSERT_LWPTOREGS(%ebx, %esp)
407
408	CHECK_PRESYS_NE(%ebx, %eax)
409	jne	_full_syscall_presys
410	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
411
412_syslcall_call:
413	call	*SY_CALLC(%eax)
414
415_syslcall_done:
416	CHECK_POSTSYS_NE(%ebx, %ecx)
417	jne	_full_syscall_postsys
418	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
419	movl	%eax, REGOFF_EAX(%esp)
420	movl	%edx, REGOFF_EDX(%esp)
421
422	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
423
424	/
425	/ get back via iret
426	/
427	CLI(%edx)
428	jmp	sys_rtt_syscall
429
430_full_syscall_presys:
431	movl	T_LWP(%ebx), %esi
432	subl	$SYS_DROP, %esp
433	movb	$LWP_SYS, LWP_STATE(%esi)
434	pushl	%esp
435	pushl	%ebx
436	call	syscall_entry
437	addl	$8, %esp
438	jmp	_syslcall_call
439
440_full_syscall_postsys:
441	addl	$SYS_DROP, %esp
442	pushl	%edx
443	pushl	%eax
444	pushl	%ebx
445	call	syscall_exit
446	addl	$12, %esp
447	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
448	jmp	_sys_rtt
449
450_syscall_fault:
451	push	$0xe			/ EFAULT
452	call	set_errno
453	addl	$4, %esp
454	xorl	%eax, %eax		/ fake syscall_err()
455	xorl	%edx, %edx
456	jmp	_syslcall_done
457	SET_SIZE(sys_call)
458	SET_SIZE(brand_sys_call)
459
460#endif	/* __lint */
461
462/*
463 * System call handler via the sysenter instruction
464 *
465 * Here's how syscall entry usually works (see sys_call for details).
466 *
467 * There, the caller (lcall or int) in userland has arranged that:
468 *
469 * -	%eax contains the syscall number
470 * -	the user stack contains the args to the syscall
471 *
472 * Normally the lcall instruction into the call gate causes the processor
473 * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
474 * The sys_call handler then leaves space for r_trapno and r_err, and
475 * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
476 * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
477 * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
478 * extracts %efl and puts it into r_efl (which happens to live at the offset
479 * that <top-of-stack> was copied into). Note that the value in r_efl has
480 * the IF (interrupt enable) flag turned on. (The int instruction into the
481 * interrupt gate does essentially the same thing, only instead of
482 * <top-of-stack> we get eflags - see comment above.)
483 *
484 * In the sysenter case, things are a lot more primitive.
485 *
486 * The caller in userland has arranged that:
487 *
488 * -	%eax contains the syscall number
489 * -	%ecx contains the user %esp
490 * -	%edx contains the return %eip
491 * -	the user stack contains the args to the syscall
492 *
493 * e.g.
494 *	<args on the stack>
495 *	mov	$SYS_callnum, %eax
496 *	mov	$1f, %edx	/ return %eip
497 *	mov	%esp, %ecx	/ return %esp
498 *	sysenter
499 * 1:
500 *
501 * Hardware and (privileged) initialization code have arranged that by
502 * the time the sysenter instructions completes:
503 *
504 * - %eip is pointing to sys_sysenter (below).
505 * - %cs and %ss are set to kernel text and stack (data) selectors.
506 * - %esp is pointing at the lwp's stack
507 * - Interrupts have been disabled.
508 *
509 * The task for the sysenter handler is:
510 *
511 * -	recreate the same regs structure on the stack and the same
512 *	kernel state as if we'd come in on an lcall
513 * -	do the normal work of a syscall
514 * -	execute the system call epilogue, use sysexit to return to userland.
515 *
516 * Note that we are unable to return both "rvals" to userland with this
517 * call, as %edx is used by the sysexit instruction.
518 *
519 * One final complication in this routine is its interaction with
520 * single-stepping in a debugger.  For most of the system call mechanisms,
521 * the CPU automatically clears the single-step flag before we enter the
522 * kernel.  The sysenter mechanism does not clear the flag, so a user
523 * single-stepping through a libc routine may suddenly find him/herself
524 * single-stepping through the kernel.  To detect this, kmdb compares the
525 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
526 * If it finds that we have single-stepped to a sysenter entry point, it
527 * explicitly clears the flag and executes the sys_sysenter routine.
528 *
529 * One final complication in this final complication is the fact that we
530 * have two different entry points for sysenter: brand_sys_sysenter and
531 * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
532 * through the kernel with kmdb, we will eventually hit the instruction at
533 * sys_sysenter.  kmdb cannot distinguish between that valid single-step
534 * and the undesirable one mentioned above.  To avoid this situation, we
535 * simply add a jump over the instruction at sys_sysenter to make it
536 * impossible to single-step to it.
537 */
538#if defined(__lint)
539
540void
541sys_sysenter()
542{}
543
544#else	/* __lint */
545
546	ENTRY_NP(brand_sys_sysenter)
547	pushl	%edx
548	BRAND_CALLBACK(BRAND_CB_SYSENTER)
549	popl	%edx
550	/*
551	 * Jump over sys_sysenter to allow single-stepping as described
552	 * above.
553	 */
554	ja	1f
555
556	ALTENTRY(sys_sysenter)
557	nop
5581:
559	/
560	/ do what the call gate would've done to the stack ..
561	/
562	pushl	$UDS_SEL	/ (really %ss, but it's the same ..)
563	pushl	%ecx		/ userland makes this a copy of %esp
564	pushfl
565	orl	$PS_IE, (%esp)	/ turn interrupts on when we return to user
566	pushl	$UCS_SEL
567	pushl	%edx		/ userland makes this a copy of %eip
568	/
569	/ done.  finish building the stack frame
570	/
571	subl	$8, %esp	/ leave space for ERR and TRAPNO
572
573	SYSENTER_PUSH
574
575#ifdef TRAPTRACE
576	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER)	/ uses labels 8 and 9
577	TRACE_REGS(%edi, %esp, %ebx, %ecx)		/ uses label 9
578	pushl	%eax
579	TRACE_STAMP(%edi)		/ clobbers %eax, %edx, uses label 9
580	popl	%eax
581	movl	%eax, TTR_SYSNUM(%edi)
582#endif
583	movl	%esp, %ebp
584
585	CPU_STATS_SYS_SYSCALL_INC
586
587	ENABLE_INTR_FLAGS
588
589	pushl	%eax				/ preserve across mstate call
590	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
591	popl	%eax
592
593	movl	%gs:CPU_THREAD, %ebx
594
595	ASSERT_LWPTOREGS(%ebx, %esp)
596
597	CHECK_PRESYS_NE(%ebx, %eax)
598	jne	_full_syscall_presys
599	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
600
601_sysenter_call:
602	call	*SY_CALLC(%eax)
603
604_sysenter_done:
605	CHECK_POSTSYS_NE(%ebx, %ecx)
606	jne	_full_syscall_postsys
607	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
608	/
609	/ sysexit uses %edx to restore %eip, so we can't use it
610	/ to return a value, sigh.
611	/
612	movl	%eax, REGOFF_EAX(%esp)
613	/ movl	%edx, REGOFF_EDX(%esp)
614
615	/ Interrupts will be turned on by the 'sti' executed just before
616	/ sysexit. The following ensures that restoring the user's EFLAGS
617	/ doesn't enable interrupts too soon.
618	andl	$_BITNOT(PS_IE), REGOFF_EFL(%esp)
619
620	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
621
622	cli
623
624	SYSCALL_POP
625
626	popl	%edx			/ sysexit: %edx -> %eip
627	addl	$4, %esp		/ get CS off the stack
628	popfl				/ EFL
629	popl	%ecx			/ sysexit: %ecx -> %esp
630	sti
631	sysexit
632	SET_SIZE(sys_sysenter)
633	SET_SIZE(brand_sys_sysenter)
634
635/*
636 * Declare a uintptr_t which covers the entire pc range of syscall
637 * handlers for the stack walkers that need this.
638 */
639	.align	CPTRSIZE
640	.globl	_allsyscalls_size
641	.type	_allsyscalls_size, @object
642_allsyscalls_size:
643	.NWORD	. - _allsyscalls
644	SET_SIZE(_allsyscalls_size)
645
646#endif	/* __lint */
647
648/*
649 * These are the thread context handlers for lwps using sysenter/sysexit.
650 */
651
652#if defined(__lint)
653
654/*ARGSUSED*/
655void
656sep_save(void *ksp)
657{}
658
659/*ARGSUSED*/
660void
661sep_restore(void *ksp)
662{}
663
664#else	/* __lint */
665
666	/*
667	 * setting this value to zero as we switch away causes the
668	 * stack-pointer-on-sysenter to be NULL, ensuring that we
669	 * don't silently corrupt another (preempted) thread stack
670	 * when running an lwp that (somehow) didn't get sep_restore'd
671	 */
672	ENTRY_NP(sep_save)
673	xorl	%edx, %edx
674	xorl	%eax, %eax
675	movl	$MSR_INTC_SEP_ESP, %ecx
676	wrmsr
677	ret
678	SET_SIZE(sep_save)
679
680	/*
681	 * Update the kernel stack pointer as we resume onto this cpu.
682	 */
683	ENTRY_NP(sep_restore)
684	movl	4(%esp), %eax			/* per-lwp kernel sp */
685	xorl	%edx, %edx
686	movl	$MSR_INTC_SEP_ESP, %ecx
687	wrmsr
688	ret
689	SET_SIZE(sep_restore)
690
691#endif	/* __lint */
692
693/*
694 * Call syscall().  Called from trap() on watchpoint at lcall 0,7
695 */
696
697#if defined(__lint)
698
699void
700watch_syscall(void)
701{}
702
703#else	/* __lint */
704
705	ENTRY_NP(watch_syscall)
706	CLI(%eax)
707	movl	%gs:CPU_THREAD, %ebx
708	movl	T_STACK(%ebx), %esp		/ switch to the thread stack
709	movl	REGOFF_EAX(%esp), %eax		/ recover original syscall#
710	jmp	_watch_do_syscall
711	SET_SIZE(watch_syscall)
712
713#endif	/* __lint */
714