xref: /titanic_51/usr/src/uts/i86pc/ml/syscall_asm.s (revision 12cc75c814f0c017004a9bbc96429911e008601b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28/*	  All Rights Reserved					*/
29
30/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
31/*	  All Rights Reserved					*/
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35#include <sys/asm_linkage.h>
36#include <sys/asm_misc.h>
37#include <sys/regset.h>
38#include <sys/psw.h>
39#include <sys/x86_archext.h>
40#include <sys/machbrand.h>
41#include <sys/privregs.h>
42
43#if defined(__lint)
44
45#include <sys/types.h>
46#include <sys/thread.h>
47#include <sys/systm.h>
48
49#else	/* __lint */
50
51#include <sys/segments.h>
52#include <sys/pcb.h>
53#include <sys/trap.h>
54#include <sys/ftrace.h>
55#include <sys/traptrace.h>
56#include <sys/clock.h>
57#include <sys/panic.h>
58#include "assym.h"
59
60#endif	/* __lint */
61
62/*
63 * We implement two flavours of system call entry points
64 *
65 * -	{int,lcall}/iret	(i386)
66 * -	sysenter/sysexit	(Pentium II and beyond)
67 *
68 * The basic pattern used in the handlers is to check to see if we can
69 * do fast (simple) version of the system call; if we can't we use various
70 * C routines that handle corner cases and debugging.
71 *
72 * To reduce the amount of assembler replication, yet keep the system call
73 * implementations vaguely comprehensible, the common code in the body
74 * of the handlers is broken up into a set of preprocessor definitions
75 * below.
76 */
77
78/*
79 * When we have SYSCALLTRACE defined, we sneak an extra
80 * predicate into a couple of tests.
81 */
82#if defined(SYSCALLTRACE)
83#define	ORL_SYSCALLTRACE(r32)	\
84	orl	syscalltrace, r32
85#else
86#define	ORL_SYSCALLTRACE(r32)
87#endif
88
89/*
90 * This check is false whenever we want to go fast i.e.
91 *
92 *	if (code >= NSYSCALL ||
93 *	    t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
94 *		do full version
95 * #ifdef SYSCALLTRACE
96 *	if (syscalltrace)
97 *		do full version
98 * #endif
99 *
100 * Preconditions:
101 * -	t	curthread
102 * -	code	contains the syscall number
103 * Postconditions:
104 * -	%ecx and %edi are smashed
105 * -	condition code flag ZF is cleared if pre-sys is too complex
106 */
107#define	CHECK_PRESYS_NE(t, code)		\
108	movzbl	T_PRE_SYS(t), %edi;		\
109	movzwl	T_PROC_FLAG(t), %ecx;		\
110	andl	$TP_WATCHPT, %ecx;		\
111	orl	%ecx, %edi;			\
112	cmpl	$NSYSCALL, code;		\
113	setae	%cl;				\
114	movzbl	%cl, %ecx;			\
115	orl	%ecx, %edi;			\
116	ORL_SYSCALLTRACE(%edi)
117
118/*
119 * When the brand's callback is invoked, the stack will look like this:
120 *	   --------------------------------------
121 *         | 'scratch space'			|
122 *         | user's %ebx			|
123 *         | user's %gs selector		|
124 *         | kernel's %gs selector		|
125 *    |    | lwp brand data			|
126 *    |    | proc brand data			|
127 *    v    | user return address		|
128 *         | callback wrapper return addr 	|
129 *         --------------------------------------
130 *
131 * The lx brand (at least) uses each of these fields.
132 * If the brand code returns, we assume that we are meant to execute the
133 * normal system call path.
134 */
135#define	BRAND_CALLBACK(callback_id)					    \
136	subl	$4, %esp		/* save some scratch space	*/ ;\
137	pushl	%ebx			/* save %ebx to use for scratch	*/ ;\
138	pushl	%gs			/* save the user %gs		*/ ;\
139	movl	$KGS_SEL, %ebx						   ;\
140	pushl	%ebx			/* push kernel's %gs		*/ ;\
141	movw	%bx, %gs		/* switch to the kernel's %gs	*/ ;\
142	movl	%gs:CPU_THREAD, %ebx	/* load the thread pointer	*/ ;\
143	movl	T_LWP(%ebx), %ebx	/* load the lwp pointer		*/ ;\
144	pushl	LWP_BRAND(%ebx)		/* push the lwp's brand data	*/ ;\
145	movl	LWP_PROCP(%ebx), %ebx	/* load the proc pointer	*/ ;\
146	pushl	P_BRAND_DATA(%ebx)	/* push the proc's brand data	*/ ;\
147	movl	P_BRAND(%ebx), %ebx	/* load the brand pointer	*/ ;\
148	movl	B_MACHOPS(%ebx), %ebx	/* load the machops pointer	*/ ;\
149	movl	_CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx		   ;\
150	cmpl	$0, %ebx						   ;\
151	je	1f							   ;\
152	movl	%ebx, 20(%esp)		/* save callback to scratch	*/ ;\
153	movl	12(%esp), %ebx		/* grab the the user %gs	*/ ;\
154	movw	%bx, %gs		/* restore the user %gs		*/ ;\
155	movl	16(%esp), %ebx		/* restore %ebx			*/ ;\
156	pushl	24(%esp)		/* push the return address	*/ ;\
157	call	*24(%esp)		/* call callback		*/ ;\
158	addl	$4, %esp		/* get rid of ret addr		*/ ;\
1591:	movl	12(%esp), %ebx		/* grab the the user %gs	*/ ;\
160	movw	%bx, %gs		/* restore the user %gs		*/ ;\
161	movl	16(%esp), %ebx		/* restore user's %ebx		*/ ;\
162	addl	$24, %esp		/* restore stack ptr		*/
163
164#define	MSTATE_TRANSITION(from, to)		\
165	pushl	$to;				\
166	pushl	$from;				\
167	call	syscall_mstate;			\
168	addl	$0x8, %esp
169
170/*
171 * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
172 * This must be called with interrupts or preemption disabled.
173 */
174#define	CPU_STATS_SYS_SYSCALL_INC			\
175	addl	$1, %gs:CPU_STATS_SYS_SYSCALL;		\
176	adcl	$0, %gs:CPU_STATS_SYS_SYSCALL+4;
177
178#if !defined(__lint)
179
180/*
181 * ASSERT(lwptoregs(lwp) == rp);
182 *
183 * this may seem obvious, but very odd things happen if this
184 * assertion is false
185 *
186 * Preconditions:
187 *	-none-
188 * Postconditions (if assertion is true):
189 *	%esi and %edi are smashed
190 */
191#if defined(DEBUG)
192
193__lwptoregs_msg:
194	.string	"%M%:%d lwptoregs(%p) [%p] != rp [%p]"
195
196#define	ASSERT_LWPTOREGS(t, rp)				\
197	movl	T_LWP(t), %esi;				\
198	movl	LWP_REGS(%esi), %edi;			\
199	cmpl	rp, %edi;				\
200	je	7f;					\
201	pushl	rp;					\
202	pushl	%edi;					\
203	pushl	%esi;					\
204	pushl	$__LINE__;				\
205	pushl	$__lwptoregs_msg;			\
206	call	panic;					\
2077:
208#else
209#define	ASSERT_LWPTOREGS(t, rp)
210#endif
211
212#endif	/* __lint */
213
214/*
215 * This is an assembler version of this fragment:
216 *
217 * lwp->lwp_state = LWP_SYS;
218 * lwp->lwp_ru.sysc++;
219 * lwp->lwp_eosys = NORMALRETURN;
220 * lwp->lwp_ap = argp;
221 *
222 * Preconditions:
223 *	-none-
224 * Postconditions:
225 *	-none-
226 */
227#define	SET_LWP(lwp, argp)				\
228	movb	$LWP_SYS, LWP_STATE(lwp);		\
229	addl	$1, LWP_RU_SYSC(lwp);			\
230	adcl	$0, LWP_RU_SYSC+4(lwp);			\
231	movb	$NORMALRETURN, LWP_EOSYS(lwp);		\
232	movl	argp, LWP_AP(lwp)
233
234/*
235 * Set up the thread, lwp, find the handler, and copy
236 * in the arguments from userland to the kernel stack.
237 *
238 * Preconditions:
239 * -	%eax contains the syscall number
240 * Postconditions:
241 * -	%eax contains a pointer to the sysent structure
242 * -	%ecx is zeroed
243 * -	%esi, %edi are smashed
244 * -	%esp is SYS_DROPped ready for the syscall
245 */
246#define	SIMPLE_SYSCALL_PRESYS(t, faultlabel)		\
247	movl	T_LWP(t), %esi;				\
248	movw	%ax, T_SYSNUM(t);			\
249	subl	$SYS_DROP, %esp;			\
250	shll	$SYSENT_SIZE_SHIFT, %eax;			\
251	SET_LWP(%esi, %esp);				\
252	leal	sysent(%eax), %eax;			\
253	movzbl	SY_NARG(%eax), %ecx;			\
254	testl	%ecx, %ecx;				\
255	jz	4f;					\
256	movl	%esp, %edi;				\
257	movl	SYS_DROP + REGOFF_UESP(%esp), %esi;	\
258	movl	$faultlabel, T_LOFAULT(t);		\
259	addl	$4, %esi;				\
260	rep;						\
261	  smovl;					\
262	movl	%ecx, T_LOFAULT(t);			\
2634:
264
265/*
266 * Check to see if a simple return is possible i.e.
267 *
268 *	if ((t->t_post_sys_ast | syscalltrace) != 0)
269 *		do full version;
270 *
271 * Preconditions:
272 * -	t is curthread
273 * Postconditions:
274 * -	condition code NE is set if post-sys is too complex
275 * -	rtmp is zeroed if it isn't (we rely on this!)
276 */
277#define	CHECK_POSTSYS_NE(t, rtmp)			\
278	xorl	rtmp, rtmp;				\
279	ORL_SYSCALLTRACE(rtmp);				\
280	orl	T_POST_SYS_AST(t), rtmp;		\
281	cmpl	$0, rtmp
282
283/*
284 * Fix up the lwp, thread, and eflags for a successful return
285 *
286 * Preconditions:
287 * -	zwreg contains zero
288 * Postconditions:
289 * -	%esp has been unSYS_DROPped
290 * -	%esi is smashed (points to lwp)
291 */
292#define	SIMPLE_SYSCALL_POSTSYS(t, zwreg)		\
293	movl	T_LWP(t), %esi;				\
294	addl	$SYS_DROP, %esp;			\
295	movw	zwreg, T_SYSNUM(t);			\
296	movb	$LWP_USER, LWP_STATE(%esi);		\
297	andb	$_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
298
299/*
300 * System call handler.  This is the destination of both the call
301 * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
302 * there are two significant differences between an interrupt gate and a call
303 * gate:
304 *
305 * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
306 * call gate runs the handler with whatever EFLAGS settings were in effect at
307 * the time of the call.
308 *
309 * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
310 * of the interrupt onto the stack, whereas a call gate does not.
311 *
312 * Because we use the following code sequence to handle system calls made from
313 * _both_ a call gate _and_ an interrupt gate, these two differences must be
314 * respected. In regards to number 1) above, the handler must ensure that a sane
315 * EFLAGS snapshot is stored on the stack so that when the kernel returns back
316 * to the user via iret (which returns to user with the EFLAGS value saved on
317 * the stack), interrupts are re-enabled.
318 *
319 * In regards to number 2) above, the handler must always put a current snapshot
320 * of EFLAGS onto the stack in the appropriate place. If we came in via an
321 * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
322 * the interrupt gate. This is OK, as the only bit that was changed by the
323 * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
324 * now off. If we were to do nothing, the stack would contain an EFLAGS with
325 * IE off, resulting in us eventually returning back to the user with interrupts
326 * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
327 * the stack.
328 *
329 * Another subtlety which deserves mention is the difference between the two
330 * descriptors. The call gate descriptor is set to instruct the hardware to copy
331 * one parameter from the user stack to the kernel stack, whereas the interrupt
332 * gate descriptor doesn't use the parameter passing mechanism at all. The
333 * kernel doesn't actually use the parameter that is copied by the hardware; the
334 * only reason it does this is so that there is a space on the stack large
335 * enough to hold an EFLAGS register value, which happens to be in the correct
336 * place for use by iret when we go back to userland. How convenient.
337 *
338 * Stack frame description in syscall() and callees.
339 *
340 * |------------|
341 * | regs	| +(8*4)+4	registers
342 * |------------|
343 * | 8 args	| <- %esp	MAXSYSARGS (currently 8) arguments
344 * |------------|
345 *
346 */
347#define	SYS_DROP	_CONST(_MUL(MAXSYSARGS, 4))
348
349#if defined(__lint)
350
351/*ARGSUSED*/
352void
353sys_call()
354{}
355
356void
357_allsyscalls()
358{}
359
360size_t _allsyscalls_size;
361
362#else	/* __lint */
363
364	ENTRY_NP2(brand_sys_call, _allsyscalls)
365	BRAND_CALLBACK(BRAND_CB_SYSCALL)
366
367	ALTENTRY(sys_call)
368	/ on entry	eax = system call number
369
370	/ set up the stack to look as in reg.h
371	subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
372
373	SYSCALL_PUSH
374
375#ifdef TRAPTRACE
376	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
377	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/ Uses label "9"
378	pushl	%eax
379	TRACE_STAMP(%edi)		/ Clobbers %eax, %edx, uses "9"
380	popl	%eax
381	movl	%eax, TTR_SYSNUM(%edi)
382#endif
383
384_watch_do_syscall:
385	movl	%esp, %ebp
386
387	/ Interrupts may be enabled here, so we must make sure this thread
388	/ doesn't migrate off the CPU while it updates the CPU stats.
389	/
390	/ XXX This is only true if we got here via call gate thru the LDT for
391	/ old style syscalls. Perhaps this preempt++-- will go away soon?
392	movl	%gs:CPU_THREAD, %ebx
393	addb	$1, T_PREEMPT(%ebx)
394	CPU_STATS_SYS_SYSCALL_INC
395	subb	$1, T_PREEMPT(%ebx)
396
397	ENABLE_INTR_FLAGS
398
399	pushl	%eax				/ preserve across mstate call
400	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
401	popl	%eax
402
403	movl	%gs:CPU_THREAD, %ebx
404
405	ASSERT_LWPTOREGS(%ebx, %esp)
406
407	CHECK_PRESYS_NE(%ebx, %eax)
408	jne	_full_syscall_presys
409	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
410
411_syslcall_call:
412	call	*SY_CALLC(%eax)
413
414_syslcall_done:
415	CHECK_POSTSYS_NE(%ebx, %ecx)
416	jne	_full_syscall_postsys
417	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
418	movl	%eax, REGOFF_EAX(%esp)
419	movl	%edx, REGOFF_EDX(%esp)
420
421	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
422
423	/
424	/ get back via iret
425	/
426	CLI(%edx)
427	jmp	sys_rtt_syscall
428
429_full_syscall_presys:
430	movl	T_LWP(%ebx), %esi
431	subl	$SYS_DROP, %esp
432	movb	$LWP_SYS, LWP_STATE(%esi)
433	pushl	%esp
434	pushl	%ebx
435	call	syscall_entry
436	addl	$8, %esp
437	jmp	_syslcall_call
438
439_full_syscall_postsys:
440	addl	$SYS_DROP, %esp
441	pushl	%edx
442	pushl	%eax
443	pushl	%ebx
444	call	syscall_exit
445	addl	$12, %esp
446	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
447	jmp	_sys_rtt
448
449_syscall_fault:
450	push	$0xe			/ EFAULT
451	call	set_errno
452	addl	$4, %esp
453	xorl	%eax, %eax		/ fake syscall_err()
454	xorl	%edx, %edx
455	jmp	_syslcall_done
456	SET_SIZE(sys_call)
457	SET_SIZE(brand_sys_call)
458
459#endif	/* __lint */
460
461/*
462 * System call handler via the sysenter instruction
463 *
464 * Here's how syscall entry usually works (see sys_call for details).
465 *
466 * There, the caller (lcall or int) in userland has arranged that:
467 *
468 * -	%eax contains the syscall number
469 * -	the user stack contains the args to the syscall
470 *
471 * Normally the lcall instruction into the call gate causes the processor
472 * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
473 * The sys_call handler then leaves space for r_trapno and r_err, and
474 * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
475 * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
476 * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
477 * extracts %efl and puts it into r_efl (which happens to live at the offset
478 * that <top-of-stack> was copied into). Note that the value in r_efl has
479 * the IF (interrupt enable) flag turned on. (The int instruction into the
480 * interrupt gate does essentially the same thing, only instead of
481 * <top-of-stack> we get eflags - see comment above.)
482 *
483 * In the sysenter case, things are a lot more primitive.
484 *
485 * The caller in userland has arranged that:
486 *
487 * -	%eax contains the syscall number
488 * -	%ecx contains the user %esp
489 * -	%edx contains the return %eip
490 * -	the user stack contains the args to the syscall
491 *
492 * e.g.
493 *	<args on the stack>
494 *	mov	$SYS_callnum, %eax
495 *	mov	$1f, %edx	/ return %eip
496 *	mov	%esp, %ecx	/ return %esp
497 *	sysenter
498 * 1:
499 *
500 * Hardware and (privileged) initialization code have arranged that by
501 * the time the sysenter instructions completes:
502 *
503 * - %eip is pointing to sys_sysenter (below).
504 * - %cs and %ss are set to kernel text and stack (data) selectors.
505 * - %esp is pointing at the lwp's stack
506 * - Interrupts have been disabled.
507 *
508 * The task for the sysenter handler is:
509 *
510 * -	recreate the same regs structure on the stack and the same
511 *	kernel state as if we'd come in on an lcall
512 * -	do the normal work of a syscall
513 * -	execute the system call epilogue, use sysexit to return to userland.
514 *
515 * Note that we are unable to return both "rvals" to userland with this
516 * call, as %edx is used by the sysexit instruction.
517 *
518 * One final complication in this routine is its interaction with
519 * single-stepping in a debugger.  For most of the system call mechanisms,
520 * the CPU automatically clears the single-step flag before we enter the
521 * kernel.  The sysenter mechanism does not clear the flag, so a user
522 * single-stepping through a libc routine may suddenly find him/herself
523 * single-stepping through the kernel.  To detect this, kmdb compares the
524 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
525 * If it finds that we have single-stepped to a sysenter entry point, it
526 * explicitly clears the flag and executes the sys_sysenter routine.
527 *
528 * One final complication in this final complication is the fact that we
529 * have two different entry points for sysenter: brand_sys_sysenter and
530 * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
531 * through the kernel with kmdb, we will eventually hit the instruction at
532 * sys_sysenter.  kmdb cannot distinguish between that valid single-step
533 * and the undesirable one mentioned above.  To avoid this situation, we
534 * simply add a jump over the instruction at sys_sysenter to make it
535 * impossible to single-step to it.
536 */
537#if defined(__lint)
538
539void
540sys_sysenter()
541{}
542
543#else	/* __lint */
544
545	ENTRY_NP(brand_sys_sysenter)
546	pushl	%edx
547	BRAND_CALLBACK(BRAND_CB_SYSENTER)
548	popl	%edx
549	/*
550	 * Jump over sys_sysenter to allow single-stepping as described
551	 * above.
552	 */
553	ja	1f
554
555	ALTENTRY(sys_sysenter)
556	nop
5571:
558	/
559	/ do what the call gate would've done to the stack ..
560	/
561	pushl	$UDS_SEL	/ (really %ss, but it's the same ..)
562	pushl	%ecx		/ userland makes this a copy of %esp
563	pushfl
564	orl	$PS_IE, (%esp)	/ turn interrupts on when we return to user
565	pushl	$UCS_SEL
566	pushl	%edx		/ userland makes this a copy of %eip
567	/
568	/ done.  finish building the stack frame
569	/
570	subl	$8, %esp	/ leave space for ERR and TRAPNO
571
572	SYSENTER_PUSH
573
574#ifdef TRAPTRACE
575	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER)	/ uses labels 8 and 9
576	TRACE_REGS(%edi, %esp, %ebx, %ecx)		/ uses label 9
577	pushl	%eax
578	TRACE_STAMP(%edi)		/ clobbers %eax, %edx, uses label 9
579	popl	%eax
580	movl	%eax, TTR_SYSNUM(%edi)
581#endif
582	movl	%esp, %ebp
583
584	CPU_STATS_SYS_SYSCALL_INC
585
586	ENABLE_INTR_FLAGS
587
588	pushl	%eax				/ preserve across mstate call
589	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
590	popl	%eax
591
592	movl	%gs:CPU_THREAD, %ebx
593
594	ASSERT_LWPTOREGS(%ebx, %esp)
595
596	CHECK_PRESYS_NE(%ebx, %eax)
597	jne	_full_syscall_presys
598	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
599
600_sysenter_call:
601	call	*SY_CALLC(%eax)
602
603_sysenter_done:
604	CHECK_POSTSYS_NE(%ebx, %ecx)
605	jne	_full_syscall_postsys
606	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
607	/
608	/ sysexit uses %edx to restore %eip, so we can't use it
609	/ to return a value, sigh.
610	/
611	movl	%eax, REGOFF_EAX(%esp)
612	/ movl	%edx, REGOFF_EDX(%esp)
613
614	/ Interrupts will be turned on by the 'sti' executed just before
615	/ sysexit. The following ensures that restoring the user's EFLAGS
616	/ doesn't enable interrupts too soon.
617	andl	$_BITNOT(PS_IE), REGOFF_EFL(%esp)
618
619	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
620
621	cli
622
623	SYSCALL_POP
624
625	popl	%edx			/ sysexit: %edx -> %eip
626	addl	$4, %esp		/ get CS off the stack
627	popfl				/ EFL
628	popl	%ecx			/ sysexit: %ecx -> %esp
629	sti
630	sysexit
631	SET_SIZE(sys_sysenter)
632	SET_SIZE(brand_sys_sysenter)
633
634#endif	/* __lint */
635
636#if defined(__lint)
637/*
638 * System call via an int80.  This entry point is only used by the Linux
639 * application environment.  Unlike the sysenter path, there is no default
640 * action to take if no callback is registered for this process.
641 */
642void
643sys_int80()
644{}
645
646#else	/* __lint */
647
648	ENTRY_NP(brand_sys_int80)
649	BRAND_CALLBACK(BRAND_CB_INT80)
650
651	ALTENTRY(sys_int80)
652	/*
653	 * We hit an int80, but this process isn't of a brand with an int80
654	 * handler.  Bad process!  Make it look as if the INT failed.
655	 * Modify %eip to point before the INT, push the expected error
656	 * code and fake a GP fault.
657	 *
658	 */
659	subl	$2, (%esp)	/* int insn 2-bytes */
660	pushl	$_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
661	jmp	gptrap			/ GP fault
662	SET_SIZE(sys_int80)
663	SET_SIZE(brand_sys_int80)
664
665/*
666 * Declare a uintptr_t which covers the entire pc range of syscall
667 * handlers for the stack walkers that need this.
668 */
669	.align	CPTRSIZE
670	.globl	_allsyscalls_size
671	.type	_allsyscalls_size, @object
672_allsyscalls_size:
673	.NWORD	. - _allsyscalls
674	SET_SIZE(_allsyscalls_size)
675
676#endif	/* __lint */
677
678/*
679 * These are the thread context handlers for lwps using sysenter/sysexit.
680 */
681
682#if defined(__lint)
683
684/*ARGSUSED*/
685void
686sep_save(void *ksp)
687{}
688
689/*ARGSUSED*/
690void
691sep_restore(void *ksp)
692{}
693
694#else	/* __lint */
695
696	/*
697	 * setting this value to zero as we switch away causes the
698	 * stack-pointer-on-sysenter to be NULL, ensuring that we
699	 * don't silently corrupt another (preempted) thread stack
700	 * when running an lwp that (somehow) didn't get sep_restore'd
701	 */
702	ENTRY_NP(sep_save)
703	xorl	%edx, %edx
704	xorl	%eax, %eax
705	movl	$MSR_INTC_SEP_ESP, %ecx
706	wrmsr
707	ret
708	SET_SIZE(sep_save)
709
710	/*
711	 * Update the kernel stack pointer as we resume onto this cpu.
712	 */
713	ENTRY_NP(sep_restore)
714	movl	4(%esp), %eax			/* per-lwp kernel sp */
715	xorl	%edx, %edx
716	movl	$MSR_INTC_SEP_ESP, %ecx
717	wrmsr
718	ret
719	SET_SIZE(sep_restore)
720
721#endif	/* __lint */
722
723/*
724 * Call syscall().  Called from trap() on watchpoint at lcall 0,7
725 */
726
727#if defined(__lint)
728
729void
730watch_syscall(void)
731{}
732
733#else	/* __lint */
734
735	ENTRY_NP(watch_syscall)
736	CLI(%eax)
737	movl	%gs:CPU_THREAD, %ebx
738	movl	T_STACK(%ebx), %esp		/ switch to the thread stack
739	movl	REGOFF_EAX(%esp), %eax		/ recover original syscall#
740	jmp	_watch_do_syscall
741	SET_SIZE(watch_syscall)
742
743#endif	/* __lint */
744