xref: /titanic_41/usr/src/uts/i86pc/ml/syscall_asm.s (revision b509e89b2befbaa42939abad9da1d7f5a8c6aaae)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28/*	  All Rights Reserved					*/
29
30/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
31/*	  All Rights Reserved					*/
32
33#include <sys/asm_linkage.h>
34#include <sys/asm_misc.h>
35#include <sys/regset.h>
36#include <sys/psw.h>
37#include <sys/x86_archext.h>
38#include <sys/machbrand.h>
39#include <sys/privregs.h>
40
41#if defined(__lint)
42
43#include <sys/types.h>
44#include <sys/thread.h>
45#include <sys/systm.h>
46
47#else	/* __lint */
48
49#include <sys/segments.h>
50#include <sys/pcb.h>
51#include <sys/trap.h>
52#include <sys/ftrace.h>
53#include <sys/traptrace.h>
54#include <sys/clock.h>
55#include <sys/panic.h>
56#include "assym.h"
57
58#endif	/* __lint */
59
60/*
61 * We implement two flavours of system call entry points
62 *
63 * -	{int,lcall}/iret	(i386)
64 * -	sysenter/sysexit	(Pentium II and beyond)
65 *
66 * The basic pattern used in the handlers is to check to see if we can
67 * do fast (simple) version of the system call; if we can't we use various
68 * C routines that handle corner cases and debugging.
69 *
70 * To reduce the amount of assembler replication, yet keep the system call
71 * implementations vaguely comprehensible, the common code in the body
72 * of the handlers is broken up into a set of preprocessor definitions
73 * below.
74 */
75
76/*
77 * When we have SYSCALLTRACE defined, we sneak an extra
78 * predicate into a couple of tests.
79 */
80#if defined(SYSCALLTRACE)
81#define	ORL_SYSCALLTRACE(r32)	\
82	orl	syscalltrace, r32
83#else
84#define	ORL_SYSCALLTRACE(r32)
85#endif
86
87/*
88 * This check is false whenever we want to go fast i.e.
89 *
90 *	if (code >= NSYSCALL ||
91 *	    t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
92 *		do full version
93 * #ifdef SYSCALLTRACE
94 *	if (syscalltrace)
95 *		do full version
96 * #endif
97 *
98 * Preconditions:
99 * -	t	curthread
100 * -	code	contains the syscall number
101 * Postconditions:
102 * -	%ecx and %edi are smashed
103 * -	condition code flag ZF is cleared if pre-sys is too complex
104 */
105#define	CHECK_PRESYS_NE(t, code)		\
106	movzbl	T_PRE_SYS(t), %edi;		\
107	movzwl	T_PROC_FLAG(t), %ecx;		\
108	andl	$TP_WATCHPT, %ecx;		\
109	orl	%ecx, %edi;			\
110	cmpl	$NSYSCALL, code;		\
111	setae	%cl;				\
112	movzbl	%cl, %ecx;			\
113	orl	%ecx, %edi;			\
114	ORL_SYSCALLTRACE(%edi)
115
116/*
117 * When the brand's callback is invoked, the stack will look like this:
118 *	   --------------------------------------
119 *         | 'scratch space'			|
120 *         | user's %ebx			|
121 *         | user's %gs selector		|
122 *    |    | kernel's %gs selector		|
123 *    |    | lwp pointer			|
124 *    v    | user return address		|
125 *         | callback wrapper return addr 	|
126 *         --------------------------------------
127 *
128 * The lx brand (at least) uses each of these fields.
129 * If the brand code returns, we assume that we are meant to execute the
130 * normal system call path.
131 */
132#define	BRAND_CALLBACK(callback_id)					    \
133	subl	$4, %esp		/* save some scratch space	*/ ;\
134	pushl	%ebx			/* save %ebx to use for scratch	*/ ;\
135	pushl	%gs			/* save the user %gs		*/ ;\
136	movl	$KGS_SEL, %ebx						   ;\
137	pushl	%ebx			/* push kernel's %gs		*/ ;\
138	movw	%bx, %gs		/* switch to the kernel's %gs	*/ ;\
139	movl	%gs:CPU_THREAD, %ebx	/* load the thread pointer	*/ ;\
140	movl	T_LWP(%ebx), %ebx	/* load the lwp pointer		*/ ;\
141	pushl	%ebx			/* push the lwp pointer		*/ ;\
142	movl	LWP_PROCP(%ebx), %ebx	/* load the proc pointer	*/ ;\
143	movl	P_BRAND(%ebx), %ebx	/* load the brand pointer	*/ ;\
144	movl	B_MACHOPS(%ebx), %ebx	/* load the machops pointer	*/ ;\
145	movl	_CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx		   ;\
146	cmpl	$0, %ebx						   ;\
147	je	1f							   ;\
148	movl	%ebx, 16(%esp)		/* save callback to scratch	*/ ;\
149	movl	8(%esp), %ebx		/* grab the the user %gs	*/ ;\
150	movw	%bx, %gs		/* restore the user %gs		*/ ;\
151	movl	12(%esp), %ebx		/* restore %ebx			*/ ;\
152	pushl	20(%esp)		/* push the return address	*/ ;\
153	call	*20(%esp)		/* call callback		*/ ;\
154	addl	$4, %esp		/* get rid of ret addr		*/ ;\
1551:	movl	8(%esp), %ebx		/* grab the the user %gs	*/ ;\
156	movw	%bx, %gs		/* restore the user %gs		*/ ;\
157	movl	12(%esp), %ebx		/* restore user's %ebx		*/ ;\
158	addl	$20, %esp		/* restore stack ptr		*/
159
160#define	MSTATE_TRANSITION(from, to)		\
161	pushl	$to;				\
162	pushl	$from;				\
163	call	syscall_mstate;			\
164	addl	$0x8, %esp
165
166/*
167 * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
168 * This must be called with interrupts or preemption disabled.
169 */
170#define	CPU_STATS_SYS_SYSCALL_INC			\
171	addl	$1, %gs:CPU_STATS_SYS_SYSCALL;		\
172	adcl	$0, %gs:CPU_STATS_SYS_SYSCALL+4;
173
174#if !defined(__lint)
175
176/*
177 * ASSERT(lwptoregs(lwp) == rp);
178 *
179 * this may seem obvious, but very odd things happen if this
180 * assertion is false
181 *
182 * Preconditions:
183 *	-none-
184 * Postconditions (if assertion is true):
185 *	%esi and %edi are smashed
186 */
187#if defined(DEBUG)
188
189__lwptoregs_msg:
190	.string	"syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
191
192#define	ASSERT_LWPTOREGS(t, rp)				\
193	movl	T_LWP(t), %esi;				\
194	movl	LWP_REGS(%esi), %edi;			\
195	cmpl	rp, %edi;				\
196	je	7f;					\
197	pushl	rp;					\
198	pushl	%edi;					\
199	pushl	%esi;					\
200	pushl	$__LINE__;				\
201	pushl	$__lwptoregs_msg;			\
202	call	panic;					\
2037:
204#else
205#define	ASSERT_LWPTOREGS(t, rp)
206#endif
207
208#endif	/* __lint */
209
210/*
211 * This is an assembler version of this fragment:
212 *
213 * lwp->lwp_state = LWP_SYS;
214 * lwp->lwp_ru.sysc++;
215 * lwp->lwp_eosys = NORMALRETURN;
216 * lwp->lwp_ap = argp;
217 *
218 * Preconditions:
219 *	-none-
220 * Postconditions:
221 *	-none-
222 */
223#define	SET_LWP(lwp, argp)				\
224	movb	$LWP_SYS, LWP_STATE(lwp);		\
225	addl	$1, LWP_RU_SYSC(lwp);			\
226	adcl	$0, LWP_RU_SYSC+4(lwp);			\
227	movb	$NORMALRETURN, LWP_EOSYS(lwp);		\
228	movl	argp, LWP_AP(lwp)
229
230/*
231 * Set up the thread, lwp, find the handler, and copy
232 * in the arguments from userland to the kernel stack.
233 *
234 * Preconditions:
235 * -	%eax contains the syscall number
236 * Postconditions:
237 * -	%eax contains a pointer to the sysent structure
238 * -	%ecx is zeroed
239 * -	%esi, %edi are smashed
240 * -	%esp is SYS_DROPped ready for the syscall
241 */
242#define	SIMPLE_SYSCALL_PRESYS(t, faultlabel)		\
243	movl	T_LWP(t), %esi;				\
244	movw	%ax, T_SYSNUM(t);			\
245	subl	$SYS_DROP, %esp;			\
246	shll	$SYSENT_SIZE_SHIFT, %eax;			\
247	SET_LWP(%esi, %esp);				\
248	leal	sysent(%eax), %eax;			\
249	movzbl	SY_NARG(%eax), %ecx;			\
250	testl	%ecx, %ecx;				\
251	jz	4f;					\
252	movl	%esp, %edi;				\
253	movl	SYS_DROP + REGOFF_UESP(%esp), %esi;	\
254	movl	$faultlabel, T_LOFAULT(t);		\
255	addl	$4, %esi;				\
256	rep;						\
257	  smovl;					\
258	movl	%ecx, T_LOFAULT(t);			\
2594:
260
261/*
262 * Check to see if a simple return is possible i.e.
263 *
264 *	if ((t->t_post_sys_ast | syscalltrace) != 0)
265 *		do full version;
266 *
267 * Preconditions:
268 * -	t is curthread
269 * Postconditions:
270 * -	condition code NE is set if post-sys is too complex
271 * -	rtmp is zeroed if it isn't (we rely on this!)
272 */
273#define	CHECK_POSTSYS_NE(t, rtmp)			\
274	xorl	rtmp, rtmp;				\
275	ORL_SYSCALLTRACE(rtmp);				\
276	orl	T_POST_SYS_AST(t), rtmp;		\
277	cmpl	$0, rtmp
278
279/*
280 * Fix up the lwp, thread, and eflags for a successful return
281 *
282 * Preconditions:
283 * -	zwreg contains zero
284 * Postconditions:
285 * -	%esp has been unSYS_DROPped
286 * -	%esi is smashed (points to lwp)
287 */
288#define	SIMPLE_SYSCALL_POSTSYS(t, zwreg)		\
289	movl	T_LWP(t), %esi;				\
290	addl	$SYS_DROP, %esp;			\
291	movw	zwreg, T_SYSNUM(t);			\
292	movb	$LWP_USER, LWP_STATE(%esi);		\
293	andb	$_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
294
295/*
296 * System call handler.  This is the destination of both the call
297 * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
298 * there are two significant differences between an interrupt gate and a call
299 * gate:
300 *
301 * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
302 * call gate runs the handler with whatever EFLAGS settings were in effect at
303 * the time of the call.
304 *
305 * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
306 * of the interrupt onto the stack, whereas a call gate does not.
307 *
308 * Because we use the following code sequence to handle system calls made from
309 * _both_ a call gate _and_ an interrupt gate, these two differences must be
310 * respected. In regards to number 1) above, the handler must ensure that a sane
311 * EFLAGS snapshot is stored on the stack so that when the kernel returns back
312 * to the user via iret (which returns to user with the EFLAGS value saved on
313 * the stack), interrupts are re-enabled.
314 *
315 * In regards to number 2) above, the handler must always put a current snapshot
316 * of EFLAGS onto the stack in the appropriate place. If we came in via an
317 * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
318 * the interrupt gate. This is OK, as the only bit that was changed by the
319 * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
320 * now off. If we were to do nothing, the stack would contain an EFLAGS with
321 * IE off, resulting in us eventually returning back to the user with interrupts
322 * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
323 * the stack.
324 *
325 * Another subtlety which deserves mention is the difference between the two
326 * descriptors. The call gate descriptor is set to instruct the hardware to copy
327 * one parameter from the user stack to the kernel stack, whereas the interrupt
328 * gate descriptor doesn't use the parameter passing mechanism at all. The
329 * kernel doesn't actually use the parameter that is copied by the hardware; the
330 * only reason it does this is so that there is a space on the stack large
331 * enough to hold an EFLAGS register value, which happens to be in the correct
332 * place for use by iret when we go back to userland. How convenient.
333 *
334 * Stack frame description in syscall() and callees.
335 *
336 * |------------|
337 * | regs	| +(8*4)+4	registers
338 * |------------|
339 * | 8 args	| <- %esp	MAXSYSARGS (currently 8) arguments
340 * |------------|
341 *
342 */
343#define	SYS_DROP	_CONST(_MUL(MAXSYSARGS, 4))
344
345#if defined(__lint)
346
347/*ARGSUSED*/
348void
349sys_call()
350{}
351
352void
353_allsyscalls()
354{}
355
356size_t _allsyscalls_size;
357
358#else	/* __lint */
359
360	ENTRY_NP2(brand_sys_call, _allsyscalls)
361	BRAND_CALLBACK(BRAND_CB_SYSCALL)
362
363	ALTENTRY(sys_call)
364	/ on entry	eax = system call number
365
366	/ set up the stack to look as in reg.h
367	subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
368
369	SYSCALL_PUSH
370
371#ifdef TRAPTRACE
372	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
373	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/ Uses label "9"
374	pushl	%eax
375	TRACE_STAMP(%edi)		/ Clobbers %eax, %edx, uses "9"
376	popl	%eax
377	movl	%eax, TTR_SYSNUM(%edi)
378#endif
379
380_watch_do_syscall:
381	movl	%esp, %ebp
382
383	/ Interrupts may be enabled here, so we must make sure this thread
384	/ doesn't migrate off the CPU while it updates the CPU stats.
385	/
386	/ XXX This is only true if we got here via call gate thru the LDT for
387	/ old style syscalls. Perhaps this preempt++-- will go away soon?
388	movl	%gs:CPU_THREAD, %ebx
389	addb	$1, T_PREEMPT(%ebx)
390	CPU_STATS_SYS_SYSCALL_INC
391	subb	$1, T_PREEMPT(%ebx)
392
393	ENABLE_INTR_FLAGS
394
395	pushl	%eax				/ preserve across mstate call
396	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
397	popl	%eax
398
399	movl	%gs:CPU_THREAD, %ebx
400
401	ASSERT_LWPTOREGS(%ebx, %esp)
402
403	CHECK_PRESYS_NE(%ebx, %eax)
404	jne	_full_syscall_presys
405	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
406
407_syslcall_call:
408	call	*SY_CALLC(%eax)
409
410_syslcall_done:
411	CHECK_POSTSYS_NE(%ebx, %ecx)
412	jne	_full_syscall_postsys
413	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
414	movl	%eax, REGOFF_EAX(%esp)
415	movl	%edx, REGOFF_EDX(%esp)
416
417	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
418
419	/
420	/ get back via iret
421	/
422	CLI(%edx)
423	jmp	sys_rtt_syscall
424
425_full_syscall_presys:
426	movl	T_LWP(%ebx), %esi
427	subl	$SYS_DROP, %esp
428	movb	$LWP_SYS, LWP_STATE(%esi)
429	pushl	%esp
430	pushl	%ebx
431	call	syscall_entry
432	addl	$8, %esp
433	jmp	_syslcall_call
434
435_full_syscall_postsys:
436	addl	$SYS_DROP, %esp
437	pushl	%edx
438	pushl	%eax
439	pushl	%ebx
440	call	syscall_exit
441	addl	$12, %esp
442	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
443	jmp	_sys_rtt
444
445_syscall_fault:
446	push	$0xe			/ EFAULT
447	call	set_errno
448	addl	$4, %esp
449	xorl	%eax, %eax		/ fake syscall_err()
450	xorl	%edx, %edx
451	jmp	_syslcall_done
452	SET_SIZE(sys_call)
453	SET_SIZE(brand_sys_call)
454
455#endif	/* __lint */
456
457/*
458 * System call handler via the sysenter instruction
459 *
460 * Here's how syscall entry usually works (see sys_call for details).
461 *
462 * There, the caller (lcall or int) in userland has arranged that:
463 *
464 * -	%eax contains the syscall number
465 * -	the user stack contains the args to the syscall
466 *
467 * Normally the lcall instruction into the call gate causes the processor
468 * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
469 * The sys_call handler then leaves space for r_trapno and r_err, and
470 * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
471 * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
472 * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
473 * extracts %efl and puts it into r_efl (which happens to live at the offset
474 * that <top-of-stack> was copied into). Note that the value in r_efl has
475 * the IF (interrupt enable) flag turned on. (The int instruction into the
476 * interrupt gate does essentially the same thing, only instead of
477 * <top-of-stack> we get eflags - see comment above.)
478 *
479 * In the sysenter case, things are a lot more primitive.
480 *
481 * The caller in userland has arranged that:
482 *
483 * -	%eax contains the syscall number
484 * -	%ecx contains the user %esp
485 * -	%edx contains the return %eip
486 * -	the user stack contains the args to the syscall
487 *
488 * e.g.
489 *	<args on the stack>
490 *	mov	$SYS_callnum, %eax
491 *	mov	$1f, %edx	/ return %eip
492 *	mov	%esp, %ecx	/ return %esp
493 *	sysenter
494 * 1:
495 *
496 * Hardware and (privileged) initialization code have arranged that by
497 * the time the sysenter instructions completes:
498 *
499 * - %eip is pointing to sys_sysenter (below).
500 * - %cs and %ss are set to kernel text and stack (data) selectors.
501 * - %esp is pointing at the lwp's stack
502 * - Interrupts have been disabled.
503 *
504 * The task for the sysenter handler is:
505 *
506 * -	recreate the same regs structure on the stack and the same
507 *	kernel state as if we'd come in on an lcall
508 * -	do the normal work of a syscall
509 * -	execute the system call epilogue, use sysexit to return to userland.
510 *
511 * Note that we are unable to return both "rvals" to userland with this
512 * call, as %edx is used by the sysexit instruction.
513 *
514 * One final complication in this routine is its interaction with
515 * single-stepping in a debugger.  For most of the system call mechanisms,
516 * the CPU automatically clears the single-step flag before we enter the
517 * kernel.  The sysenter mechanism does not clear the flag, so a user
518 * single-stepping through a libc routine may suddenly find him/herself
519 * single-stepping through the kernel.  To detect this, kmdb compares the
520 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
521 * If it finds that we have single-stepped to a sysenter entry point, it
522 * explicitly clears the flag and executes the sys_sysenter routine.
523 *
524 * One final complication in this final complication is the fact that we
525 * have two different entry points for sysenter: brand_sys_sysenter and
526 * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
527 * through the kernel with kmdb, we will eventually hit the instruction at
528 * sys_sysenter.  kmdb cannot distinguish between that valid single-step
529 * and the undesirable one mentioned above.  To avoid this situation, we
530 * simply add a jump over the instruction at sys_sysenter to make it
531 * impossible to single-step to it.
532 */
533#if defined(__lint)
534
535void
536sys_sysenter()
537{}
538
539#else	/* __lint */
540
541	ENTRY_NP(brand_sys_sysenter)
542	pushl	%edx
543	BRAND_CALLBACK(BRAND_CB_SYSENTER)
544	popl	%edx
545	/*
546	 * Jump over sys_sysenter to allow single-stepping as described
547	 * above.
548	 */
549	ja	1f
550
551	ALTENTRY(sys_sysenter)
552	nop
5531:
554	/
555	/ do what the call gate would've done to the stack ..
556	/
557	pushl	$UDS_SEL	/ (really %ss, but it's the same ..)
558	pushl	%ecx		/ userland makes this a copy of %esp
559	pushfl
560	orl	$PS_IE, (%esp)	/ turn interrupts on when we return to user
561	pushl	$UCS_SEL
562	pushl	%edx		/ userland makes this a copy of %eip
563	/
564	/ done.  finish building the stack frame
565	/
566	subl	$8, %esp	/ leave space for ERR and TRAPNO
567
568	SYSENTER_PUSH
569
570#ifdef TRAPTRACE
571	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER)	/ uses labels 8 and 9
572	TRACE_REGS(%edi, %esp, %ebx, %ecx)		/ uses label 9
573	pushl	%eax
574	TRACE_STAMP(%edi)		/ clobbers %eax, %edx, uses label 9
575	popl	%eax
576	movl	%eax, TTR_SYSNUM(%edi)
577#endif
578	movl	%esp, %ebp
579
580	CPU_STATS_SYS_SYSCALL_INC
581
582	ENABLE_INTR_FLAGS
583
584	pushl	%eax				/ preserve across mstate call
585	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
586	popl	%eax
587
588	movl	%gs:CPU_THREAD, %ebx
589
590	ASSERT_LWPTOREGS(%ebx, %esp)
591
592	CHECK_PRESYS_NE(%ebx, %eax)
593	jne	_full_syscall_presys
594	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
595
596_sysenter_call:
597	call	*SY_CALLC(%eax)
598
599_sysenter_done:
600	CHECK_POSTSYS_NE(%ebx, %ecx)
601	jne	_full_syscall_postsys
602	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
603	/
604	/ sysexit uses %edx to restore %eip, so we can't use it
605	/ to return a value, sigh.
606	/
607	movl	%eax, REGOFF_EAX(%esp)
608	/ movl	%edx, REGOFF_EDX(%esp)
609
610	/ Interrupts will be turned on by the 'sti' executed just before
611	/ sysexit. The following ensures that restoring the user's EFLAGS
612	/ doesn't enable interrupts too soon.
613	andl	$_BITNOT(PS_IE), REGOFF_EFL(%esp)
614
615	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
616
617	cli
618
619	SYSCALL_POP
620
621	popl	%edx			/ sysexit: %edx -> %eip
622	addl	$4, %esp		/ get CS off the stack
623	popfl				/ EFL
624	popl	%ecx			/ sysexit: %ecx -> %esp
625	sti
626	sysexit
627	SET_SIZE(sys_sysenter)
628	SET_SIZE(brand_sys_sysenter)
629
630#endif	/* __lint */
631
632#if defined(__lint)
633/*
634 * System call via an int80.  This entry point is only used by the Linux
635 * application environment.  Unlike the sysenter path, there is no default
636 * action to take if no callback is registered for this process.
637 */
638void
639sys_int80()
640{}
641
642#else	/* __lint */
643
644	ENTRY_NP(brand_sys_int80)
645	BRAND_CALLBACK(BRAND_CB_INT80)
646
647	ALTENTRY(sys_int80)
648	/*
649	 * We hit an int80, but this process isn't of a brand with an int80
650	 * handler.  Bad process!  Make it look as if the INT failed.
651	 * Modify %eip to point before the INT, push the expected error
652	 * code and fake a GP fault.
653	 *
654	 */
655	subl	$2, (%esp)	/* int insn 2-bytes */
656	pushl	$_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
657	jmp	gptrap			/ GP fault
658	SET_SIZE(sys_int80)
659	SET_SIZE(brand_sys_int80)
660
661/*
662 * Declare a uintptr_t which covers the entire pc range of syscall
663 * handlers for the stack walkers that need this.
664 */
665	.align	CPTRSIZE
666	.globl	_allsyscalls_size
667	.type	_allsyscalls_size, @object
668_allsyscalls_size:
669	.NWORD	. - _allsyscalls
670	SET_SIZE(_allsyscalls_size)
671
672#endif	/* __lint */
673
674/*
675 * These are the thread context handlers for lwps using sysenter/sysexit.
676 */
677
678#if defined(__lint)
679
680/*ARGSUSED*/
681void
682sep_save(void *ksp)
683{}
684
685/*ARGSUSED*/
686void
687sep_restore(void *ksp)
688{}
689
690#else	/* __lint */
691
692	/*
693	 * setting this value to zero as we switch away causes the
694	 * stack-pointer-on-sysenter to be NULL, ensuring that we
695	 * don't silently corrupt another (preempted) thread stack
696	 * when running an lwp that (somehow) didn't get sep_restore'd
697	 */
698	ENTRY_NP(sep_save)
699	xorl	%edx, %edx
700	xorl	%eax, %eax
701	movl	$MSR_INTC_SEP_ESP, %ecx
702	wrmsr
703	ret
704	SET_SIZE(sep_save)
705
706	/*
707	 * Update the kernel stack pointer as we resume onto this cpu.
708	 */
709	ENTRY_NP(sep_restore)
710	movl	4(%esp), %eax			/* per-lwp kernel sp */
711	xorl	%edx, %edx
712	movl	$MSR_INTC_SEP_ESP, %ecx
713	wrmsr
714	ret
715	SET_SIZE(sep_restore)
716
717#endif	/* __lint */
718
719/*
720 * Call syscall().  Called from trap() on watchpoint at lcall 0,7
721 */
722
723#if defined(__lint)
724
725void
726watch_syscall(void)
727{}
728
729#else	/* __lint */
730
731	ENTRY_NP(watch_syscall)
732	CLI(%eax)
733	movl	%gs:CPU_THREAD, %ebx
734	movl	T_STACK(%ebx), %esp		/ switch to the thread stack
735	movl	REGOFF_EAX(%esp), %eax		/ recover original syscall#
736	jmp	_watch_do_syscall
737	SET_SIZE(watch_syscall)
738
739#endif	/* __lint */
740