xref: /titanic_51/usr/src/uts/intel/ia32/ml/swtch.s (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Process switching routines.
31 */
32
33#if defined(__lint)
34#include <sys/thread.h>
35#include <sys/systm.h>
36#include <sys/time.h>
37#else	/* __lint */
38#include "assym.h"
39#endif	/* __lint */
40
41#include <sys/asm_linkage.h>
42#include <sys/asm_misc.h>
43#include <sys/regset.h>
44#include <sys/privregs.h>
45#include <sys/stack.h>
46#include <sys/segments.h>
47
48/*
49 * resume(thread_id_t t);
50 *
51 * a thread can only run on one processor at a time. there
52 * exists a window on MPs where the current thread on one
53 * processor is capable of being dispatched by another processor.
54 * some overlap between outgoing and incoming threads can happen
55 * when they are the same thread. in this case where the threads
56 * are the same, resume() on one processor will spin on the incoming
57 * thread until resume() on the other processor has finished with
58 * the outgoing thread.
59 *
60 * The MMU context changes when the resuming thread resides in a different
61 * process.  Kernel threads are known by resume to reside in process 0.
62 * The MMU context, therefore, only changes when resuming a thread in
63 * a process different from curproc.
64 *
65 * resume_from_intr() is called when the thread being resumed was not
66 * passivated by resume (e.g. was interrupted).  This means that the
67 * resume lock is already held and that a restore context is not needed.
68 * Also, the MMU context is not changed on the resume in this case.
69 *
70 * resume_from_zombie() is the same as resume except the calling thread
71 * is a zombie and must be put on the deathrow list after the CPU is
72 * off the stack.
73 */
74
75#if !defined(__lint)
76
77#if LWP_PCB_FPU != 0
78#error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
79#endif	/* LWP_PCB_FPU != 0 */
80
81#endif	/* !__lint */
82
83#if defined(__amd64)
84
85/*
86 * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
87 *
88 * The stack frame must be created before the save of %rsp so that tracebacks
89 * of swtch()ed-out processes show the process as having last called swtch().
90 */
91#define SAVE_REGS(thread_t, retaddr)			\
92	movq	%rbp, T_RBP(thread_t);			\
93	movq	%rbx, T_RBX(thread_t);			\
94	movq	%r12, T_R12(thread_t);			\
95	movq	%r13, T_R13(thread_t);			\
96	movq	%r14, T_R14(thread_t);			\
97	movq	%r15, T_R15(thread_t);			\
98	pushq	%rbp;					\
99	movq	%rsp, %rbp;				\
100	movq	%rsp, T_SP(thread_t);			\
101	movq	retaddr, T_PC(thread_t);		\
102	movq	%rdi, %r12;				\
103	call	__dtrace_probe___sched_off__cpu
104
105/*
106 * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
107 *
108 * We load up %rsp from the label_t as part of the context switch, so
109 * we don't repeat that here.
110 *
111 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
112 * already has the effect of putting the stack back the way it was when
113 * we came in.
114 */
115#define RESTORE_REGS(scratch_reg)			\
116	movq	%gs:CPU_THREAD, scratch_reg;		\
117	movq	T_RBP(scratch_reg), %rbp;		\
118	movq	T_RBX(scratch_reg), %rbx;		\
119	movq	T_R12(scratch_reg), %r12;		\
120	movq	T_R13(scratch_reg), %r13;		\
121	movq	T_R14(scratch_reg), %r14;		\
122	movq	T_R15(scratch_reg), %r15
123
124/*
125 * Get pointer to a thread's hat structure
126 */
127#define GET_THREAD_HATP(hatp, thread_t, scratch_reg)	\
128	movq	T_PROCP(thread_t), hatp;		\
129	movq	P_AS(hatp), scratch_reg;		\
130	movq	A_HAT(scratch_reg), hatp
131
132#elif defined (__i386)
133
134/*
135 * Save non-volatile registers (%ebp, %esi, %edi and %ebx)
136 *
137 * The stack frame must be created before the save of %esp so that tracebacks
138 * of swtch()ed-out processes show the process as having last called swtch().
139 */
140#define SAVE_REGS(thread_t, retaddr)			\
141	movl	%ebp, T_EBP(thread_t);			\
142	movl	%ebx, T_EBX(thread_t);			\
143	movl	%esi, T_ESI(thread_t);			\
144	movl	%edi, T_EDI(thread_t);			\
145	pushl	%ebp;					\
146	movl	%esp, %ebp;				\
147	movl	%esp, T_SP(thread_t);			\
148	movl	retaddr, T_PC(thread_t);		\
149	movl	8(%ebp), %edi;				\
150	pushl	%edi;					\
151	call	__dtrace_probe___sched_off__cpu;	\
152	addl	$CLONGSIZE, %esp
153
154/*
155 * Restore non-volatile registers (%ebp, %esi, %edi and %ebx)
156 *
157 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
158 * already has the effect of putting the stack back the way it was when
159 * we came in.
160 */
161#define RESTORE_REGS(scratch_reg)			\
162	movl	%gs:CPU_THREAD, scratch_reg;		\
163	movl	T_EBP(scratch_reg), %ebp;		\
164	movl	T_EBX(scratch_reg), %ebx;		\
165	movl	T_ESI(scratch_reg), %esi;		\
166	movl	T_EDI(scratch_reg), %edi
167
168/*
169 * Get pointer to a thread's hat structure
170 */
171#define GET_THREAD_HATP(hatp, thread_t, scratch_reg)	\
172	movl	T_PROCP(thread_t), hatp;		\
173	movl	P_AS(hatp), scratch_reg;		\
174	movl	A_HAT(scratch_reg), hatp
175
176#endif	/* __amd64 */
177
178#if defined(__lint)
179
180/* ARGSUSED */
181void
182resume(kthread_t *t)
183{}
184
185#else	/* __lint */
186
187#if defined(__amd64)
188
189	ENTRY(resume)
190	movq	%gs:CPU_THREAD, %rax
191	leaq	resume_return(%rip), %r11
192
193	/*
194	 * Save non-volatile registers, and set return address for current
195	 * thread to resume_return.
196	 *
197	 * %r12 = t (new thread) when done
198	 */
199	SAVE_REGS(%rax, %r11)
200
201	LOADCPU(%r15)				/* %r15 = CPU */
202	movq	CPU_THREAD(%r15), %r13		/* %r13 = curthread */
203
204	/*
205	 * Call savectx if thread has installed context ops.
206	 *
207	 * Note that if we have floating point context, the save op
208	 * (either fpsave_begin or fpxsave_begin) will issue the
209	 * async save instruction (fnsave or fxsave respectively)
210	 * that we fwait for below.
211	 */
212	cmpq	$0, T_CTX(%r13)		/* should current thread savectx? */
213	je	.nosavectx		/* skip call when zero */
214
215	movq	%r13, %rdi		/* arg = thread pointer */
216	call	savectx			/* call ctx ops */
217
218.nosavectx:
219	/*
220	 * Setup LDT register
221	 */
222	movq 	T_PROCP(%r12), %rax	/* load new thread proc */
223
224	/* make sure GDT contains the right LDT desc */
225	movq	%gs:CPU_GDT, %r11
226
227	movq	P_LDT_DESC(%rax), %r10
228	movq	_CONST(P_LDT_DESC+8)(%rax), %rax
229	movq	%r10, ULDT_SEL(%r11)
230	movq	%rax, _CONST(ULDT_SEL+8)(%r11)
231	movl	$ULDT_SEL, %edx
232	lldt	%dx
233
234	/*
235	 * Temporarily switch to the idle thread's stack
236	 */
237	movq	CPU_IDLE_THREAD(%r15), %rax 	/* idle thread pointer */
238
239	/*
240	 * Set the idle thread as the current thread
241	 */
242	movq	T_SP(%rax), %rsp	/* It is safe to set rsp */
243	movq	%rax, CPU_THREAD(%r15)
244
245	/*
246	 * Switch in the hat context for the new thread
247	 *
248	 */
249	GET_THREAD_HATP(%rdi, %r12, %r11)
250	call	hat_switch
251
252	movq	T_LWP(%r13), %r14
253	testq	%r14, %r14
254	jz	.disabled_fpu2
255
256	cmpl	$FPU_EN, PCB_FPU_FLAGS(%r14)
257	je	.wait_for_fpusave
258
259.disabled_fpu2:
260	/*
261	 * Clear and unlock previous thread's t_lock
262	 * to allow it to be dispatched by another processor.
263	 */
264	movb	$0, T_LOCK(%r13)
265
266	/*
267	 * IMPORTANT: Registers at this point must be:
268	 *       %r12 = new thread
269	 *
270	 * Here we are in the idle thread, have dropped the old thread.
271	 */
272	ALTENTRY(_resume_from_idle)
273	/*
274	 * spin until dispatched thread's mutex has
275	 * been unlocked. this mutex is unlocked when
276	 * it becomes safe for the thread to run.
277	 */
278.lock_thread_mutex:
279	lock
280	btsl	$0, T_LOCK(%r12) 	/* attempt to lock new thread's mutex */
281	jnc	.thread_mutex_locked	/* got it */
282
283.spin_thread_mutex:
284	pause
285	cmpb	$0, T_LOCK(%r12)	/* check mutex status */
286	jz	.lock_thread_mutex	/* clear, retry lock */
287	jmp	.spin_thread_mutex	/* still locked, spin... */
288
289.thread_mutex_locked:
290	/*
291	 * Fix CPU structure to indicate new running thread.
292	 * Set pointer in new thread to the CPU structure.
293	 */
294	LOADCPU(%r13)			/* load current CPU pointer */
295	cmpq	%r13, T_CPU(%r12)
296	je	.setup_cpu
297
298	/* cp->cpu_stats.sys.cpumigrate++ */
299	incq    CPU_STATS_SYS_CPUMIGRATE(%r13)
300	movq	%r13, T_CPU(%r12)	/* set new thread's CPU pointer */
301
302.setup_cpu:
303	/*
304	 * Setup rsp0 (kernel stack) in TSS to curthread's stack.
305	 * (Note: Since we don't have saved 'regs' structure for all
306	 *	  the threads we can't easily determine if we need to
307	 *	  change rsp0. So, we simply change the rsp0 to bottom
308	 *	  of the thread stack and it will work for all cases.)
309	 *
310	 * XX64 - Is this correct?
311	 */
312	movq	CPU_TSS(%r13), %r14
313	movq	T_STACK(%r12), %rax
314	addq	$REGSIZE+MINFRAME, %rax	/* to the bottom of thread stack */
315	movq	%rax, TSS_RSP0(%r14)
316
317	movq	%r12, CPU_THREAD(%r13)	/* set CPU's thread pointer */
318	xorl	%ebp, %ebp		/* make $<threadlist behave better */
319	movq	T_LWP(%r12), %rax 	/* set associated lwp to  */
320	movq	%rax, CPU_LWP(%r13) 	/* CPU's lwp ptr */
321
322	movq	T_SP(%r12), %rsp	/* switch to outgoing thread's stack */
323	movq	T_PC(%r12), %r13	/* saved return addr */
324
325	/*
326	 * Call restorectx if context ops have been installed.
327	 */
328	cmpq	$0, T_CTX(%r12)		/* should resumed thread restorectx? */
329	jz	.norestorectx		/* skip call when zero */
330	movq	%r12, %rdi		/* arg = thread pointer */
331	call	restorectx		/* call ctx ops */
332
333.norestorectx:
334
335	/*
336	 * If we are resuming an interrupt thread, store a timestamp
337	 * in the thread structure.
338	 */
339	testw	$T_INTR_THREAD, T_FLAGS(%r12)
340	jz	1f
341
3420:
343	/*
344	 * If an interrupt occurs between the rdtsc instruction and its
345	 * subsequent store, the timestamp will be stale by the time it is
346	 * stored. We can detect this by doing a compare-and-swap on the
347	 * thread's timestamp, since any interrupt occurring in this window
348	 * will put a new timestamp in the thread's t_intr_start field.
349	 */
350	movq	T_INTR_START(%r12), %rcx
351	rdtsc
352
353	/*
354	 * After rdtsc:
355	 *     High 32 bits of TC are in %edx
356	 *     Low 32 bits of TC are in %eax
357	 */
358	shlq	$32, %rdx
359	movl	%eax, %r14d
360	orq	%rdx, %r14
361	movq	%rcx, %rax
362	cmpxchgq %r14, T_INTR_START(%r12)
363	jnz	0b
3641:
365	/*
366	 * Restore non-volatile registers, then have spl0 return to the
367	 * resuming thread's PC after first setting the priority as low as
368	 * possible and blocking all interrupt threads that may be active.
369	 */
370	movq	%r13, %rax	/* save return address */
371	RESTORE_REGS(%r11)
372	pushq	%rax		/* push return address for spl0() */
373	call	__dtrace_probe___sched_on__cpu
374	jmp	spl0
375
376resume_return:
377	/*
378	 * Remove stack frame created in SAVE_REGS()
379	 */
380	addq	$CLONGSIZE, %rsp
381	ret
382
383.wait_for_fpusave:
384	/* mark copy in pcb as valid */
385	movq	%cr0, %rax
386	movl	$_CONST(FPU_VALID|FPU_EN), PCB_FPU_FLAGS(%r14)
387	orl	$CR0_TS, %eax	/* set to trap on next switch */
388	fwait			/* ensure save is done before we unlock */
389	finit			/* (ensure x87 tags cleared for fxsave case) */
390	movq	%rax, %cr0
391	jmp	.disabled_fpu2
392	SET_SIZE(_resume_from_idle)
393	SET_SIZE(resume)
394
395#elif defined (__i386)
396
397	ENTRY(resume)
398	movl	%gs:CPU_THREAD, %eax
399	movl	$resume_return, %ecx
400
401	/*
402	 * Save non-volatile registers, and set return address for current
403	 * thread to resume_return.
404	 *
405	 * %edi = t (new thread) when done.
406	 */
407	SAVE_REGS(%eax,  %ecx)
408
409	LOADCPU(%ebx)			/* %ebx = CPU */
410	movl	CPU_THREAD(%ebx), %esi	/* %esi = curthread */
411
412	/*
413	 * Call savectx if thread has installed context ops.
414	 *
415	 * Note that if we have floating point context, the save op
416	 * (either fpsave_begin or fpxsave_begin) will issue the
417	 * async save instruction (fnsave or fxsave respectively)
418	 * that we fwait for below.
419	 */
420	movl	T_CTX(%esi), %eax	/* should current thread savectx? */
421	testl	%eax, %eax
422	jz	.nosavectx		/* skip call when zero */
423	pushl	%esi			/* arg = thread pointer */
424	call	savectx			/* call ctx ops */
425	addl	$4, %esp		/* restore stack pointer */
426
427.nosavectx:
428	movl	T_LWP(%esi), %ecx
429	pushl	%ecx			/* save fp address for later check */
430
431	/*
432	 * Setup LDT register
433	 */
434	movl 	T_PROCP(%edi), %eax	/* load new proc */
435
436	/* make sure GDT contains the right LDT desc */
437	movl	%gs:CPU_GDT, %ecx
438
439	movl	P_LDT_DESC(%eax), %edx
440	movl	_CONST(P_LDT_DESC+4)(%eax), %eax
441	movl	%edx, ULDT_SEL(%ecx)
442	movl	%eax, _CONST(ULDT_SEL+4)(%ecx)
443	movl	$ULDT_SEL, %edx
444	lldt	%dx
445
446	/*
447	 * Temporarily switch to the idle thread's stack
448	 */
449	movl	CPU_IDLE_THREAD(%ebx), %eax 	/* idle thread pointer */
450	popl	%ecx			/* restore pointer to fp structure. */
451
452	/*
453	 * Set the idle thread as the current thread
454	 */
455	movl	T_SP(%eax), %esp	/* It is safe to set esp */
456	movl	%eax, CPU_THREAD(%ebx)
457	movl	%ecx, %ebx		/* save pcb_fpu pointer in %ebx */
458
459	/* switch in the hat context for the new thread */
460	GET_THREAD_HATP(%ecx, %edi, %ecx)
461	pushl	%ecx
462	call	hat_switch
463	addl	$4, %esp
464
465	xorl	%ecx, %ecx
466	testl	%ebx, %ebx			/* check pcb_fpu pointer */
467	jz	.disabled_fpu2
468	cmpl	$FPU_EN, PCB_FPU_FLAGS(%ebx)	/* is PCB_FPU_FLAGS FPU_EN? */
469	je	.wait_for_fpusave
470.disabled_fpu2:
471	/*
472	 * Clear and unlock previous thread's t_lock
473	 * to allow it to be dispatched by another processor.
474	 */
475	movb	%cl, T_LOCK(%esi)
476
477	/*
478	 * IMPORTANT: Registers at this point must be:
479	 *       %edi = new thread
480	 *
481	 * Here we are in the idle thread, have dropped the old thread.
482	 */
483	ALTENTRY(_resume_from_idle)
484	/*
485	 * spin until dispatched thread's mutex has
486	 * been unlocked. this mutex is unlocked when
487	 * it becomes safe for the thread to run.
488	 */
489.L4:
490	lock
491	btsl	$0, T_LOCK(%edi) /* lock new thread's mutex */
492	jc	.L4_2			/* lock did not succeed */
493
494	/*
495	 * Fix CPU structure to indicate new running thread.
496	 * Set pointer in new thread to the CPU structure.
497	 */
498	LOADCPU(%esi)			/* load current CPU pointer */
499	movl	T_STACK(%edi), %eax	/* here to use v pipeline of */
500					/* Pentium. Used few lines below */
501	cmpl	%esi, T_CPU(%edi)
502	jne	.L5_2
503.L5_1:
504	/*
505	 * Setup esp0 (kernel stack) in TSS to curthread's stack.
506	 * (Note: Since we don't have saved 'regs' structure for all
507	 *	  the threads we can't easily determine if we need to
508	 *	  change esp0. So, we simply change the esp0 to bottom
509	 *	  of the thread stack and it will work for all cases.)
510	 */
511	movl	CPU_TSS(%esi), %ecx
512	addl	$REGSIZE+MINFRAME, %eax	/* to the bottom of thread stack */
513	movl	%eax, TSS_ESP0(%ecx)
514
515	movl	%edi, CPU_THREAD(%esi)	/* set CPU's thread pointer */
516	xorl	%ebp, %ebp		/* make $<threadlist behave better */
517	movl	T_LWP(%edi), %eax 	/* set associated lwp to  */
518	movl	%eax, CPU_LWP(%esi) 	/* CPU's lwp ptr */
519
520	movl	T_SP(%edi), %esp	/* switch to outgoing thread's stack */
521	movl	T_PC(%edi), %esi	/* saved return addr */
522
523	/*
524	 * Call restorectx if context ops have been installed.
525	 */
526	movl	T_CTX(%edi), %eax	/* should resumed thread restorectx? */
527	testl	%eax, %eax
528	jz	.norestorectx		/* skip call when zero */
529	pushl	%edi			/* arg = thread pointer */
530	call	restorectx		/* call ctx ops */
531	addl	$4, %esp		/* restore stack pointer */
532.norestorectx:
533
534	/*
535	 * If we are resuming an interrupt thread, store a timestamp
536	 * in the thread structure.
537	 */
538	testw	$T_INTR_THREAD, T_FLAGS(%edi)
539	jz	1f
540	pushl	%ecx
5410:
542	/*
543	 * If an interrupt occurs between the rdtsc instruction and its
544	 * subsequent store, the timestamp will be stale by the time it is
545	 * stored. We can detect this by doing a compare-and-swap on the
546	 * thread's timestamp, since any interrupt occurring in this window
547	 * will put a new timestamp in the thread's t_intr_start field.
548	 */
549	pushl	T_INTR_START(%edi)
550	pushl	T_INTR_START+4(%edi)
551	.globl	_tsc_patch15
552_tsc_patch15:
553	nop; nop			/* patched to rdtsc if available */
554	movl	%eax, %ebx
555	movl	%edx, %ecx
556	popl	%edx
557	popl	%eax
558	cmpxchg8b T_INTR_START(%edi)
559	jnz	0b
560	popl	%ecx
5611:
562	/*
563	 * Restore non-volatile registers, then have spl0 return to the
564	 * resuming thread's PC after first setting the priority as low as
565	 * possible and blocking all interrupt threads that may be active.
566	 */
567	movl	%esi, %eax		/* save return address */
568	RESTORE_REGS(%ecx)
569	pushl	%eax			/* push return address for spl0() */
570	call	__dtrace_probe___sched_on__cpu
571	jmp	spl0
572
573resume_return:
574	/*
575	 * Remove stack frame created in SAVE_REGS()
576	 */
577	addl	$CLONGSIZE, %esp
578	ret
579
580.wait_for_fpusave:
581	mov	%cr0, %eax
582
583	/* mark copy in pcb as valid */
584	movl	$_CONST(FPU_VALID|FPU_EN), PCB_FPU_FLAGS(%ebx)
585
586	orl	$CR0_TS, %eax			/* set to trap on next switch */
587	fwait			/* ensure save is done before we unlock */
588	finit			/* (ensure x87 tags cleared for fxsave case) */
589	movl	%eax, %cr0
590	jmp	.disabled_fpu2
591
592.L4_2:
593	pause
594	cmpb	$0, T_LOCK(%edi)
595	je	.L4
596	jmp	.L4_2
597
598.L5_2:
599	/* cp->cpu_stats.sys.cpumigrate++ */
600	addl    $1, CPU_STATS_SYS_CPUMIGRATE(%esi)
601	adcl    $0, CPU_STATS_SYS_CPUMIGRATE+4(%esi)
602	movl	%esi, T_CPU(%edi)	/* set new thread's CPU pointer */
603	jmp	.L5_1
604
605	SET_SIZE(_resume_from_idle)
606	SET_SIZE(resume)
607
608#endif	/* __amd64 */
609#endif	/* __lint */
610
611#if defined(__lint)
612
613/* ARGSUSED */
614void
615resume_from_zombie(kthread_t *t)
616{}
617
618#else	/* __lint */
619
620#if defined(__amd64)
621
622	ENTRY(resume_from_zombie)
623	movq	%gs:CPU_THREAD, %rax
624	leaq	resume_from_zombie_return(%rip), %r11
625
626	/*
627	 * Save non-volatile registers, and set return address for current
628	 * thread to resume_from_zombie_return.
629	 *
630	 * %r12 = t (new thread) when done
631	 */
632	SAVE_REGS(%rax, %r11)
633
634	movq	%gs:CPU_THREAD, %r13	/* %r13 = curthread */
635
636	/*
637	 * Setup LDT register
638	 */
639	movq 	T_PROCP(%r12), %rax	/* load new thread proc */
640
641	/* make sure GDT contains the right LDT desc */
642	movq	%gs:CPU_GDT, %r11
643
644	movq	P_LDT_DESC(%rax), %r10
645	movq	_CONST(P_LDT_DESC+8)(%rax), %rax
646	movq	%r10, ULDT_SEL(%r11)
647	movq	%rax, _CONST(ULDT_SEL+8)(%r11)
648	movl	$ULDT_SEL, %edx
649	lldt	%dx
650
651	/* clean up the fp unit. It might be left enabled */
652	movq	%cr0, %rax
653	testq	$CR0_TS, %rax
654	jnz	.zfpu_disabled		/* if TS already set, nothing to do */
655	fninit				/* init fpu & discard pending error */
656
657	/*
658	 * Store a zero word into the mxcsr register to disable any sse
659	 * floating point exceptions
660	 */
661	pushq	$0
662	movq	%rsp, %rdi
663	ldmxcsr	(%rdi)
664	addq	$CLONGSIZE, %rsp
665	orq	$CR0_TS, %rax
666	movq	%rax, %cr0
667.zfpu_disabled:
668
669	/*
670	 * Temporarily switch to the idle thread's stack so that the zombie
671	 * thread's stack can be reclaimed by the reaper.
672	 */
673	movq	%gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
674	movq	T_SP(%rax), %rsp	/* get onto idle thread stack */
675
676	/*
677	 * Sigh. If the idle thread has never run thread_start()
678	 * then t_sp is mis-aligned by thread_load().
679	 */
680	andq	$_BITNOT(STACK_ALIGN-1), %rsp
681
682	/*
683	 * Set the idle thread as the current thread.
684	 */
685	movq	%rax, %gs:CPU_THREAD
686
687	/* switch in the hat context for the new thread */
688	GET_THREAD_HATP(%rdi, %r12, %r11)
689	call	hat_switch
690
691	/*
692	 * Put the zombie on death-row.
693	 */
694	movq	%r13, %rdi
695	call	reapq_add
696
697	jmp	_resume_from_idle	/* finish job of resume */
698
699resume_from_zombie_return:
700	RESTORE_REGS(%r11)		/* restore non-volatile registers */
701	call	__dtrace_probe___sched_on__cpu
702
703	/*
704	 * Remove stack frame created in SAVE_REGS()
705	 */
706	addq	$CLONGSIZE, %rsp
707	ret
708	SET_SIZE(resume_from_zombie)
709
710#elif defined (__i386)
711
712	ENTRY(resume_from_zombie)
713	movl	%gs:CPU_THREAD, %eax
714	movl	$resume_from_zombie_return, %ecx
715
716	/*
717	 * Save non-volatile registers, and set return address for current
718	 * thread to resume_from_zombie_return.
719	 *
720	 * %edi = t (new thread) when done.
721	 */
722	SAVE_REGS(%eax, %ecx)
723
724	movl	%gs:CPU_THREAD, %esi	/* %esi = curthread */
725
726	/*
727	 * Setup LDT register
728	 */
729	movl 	T_PROCP(%edi), %ecx	/* load new proc  */
730
731	/* make sure GDT contains the right LDT desc */
732	movl	%gs:CPU_GDT, %eax
733
734	movl	P_LDT_DESC(%ecx), %edx
735	movl	_CONST(P_LDT_DESC+4)(%ecx), %ecx
736	movl	%edx, ULDT_SEL(%eax)
737	movl	%ecx, _CONST(ULDT_SEL+4)(%eax)
738	movl	$ULDT_SEL, %edx
739	lldt	%dx
740
741	/* clean up the fp unit. It might be left enabled */
742	movl	%cr0, %eax
743	testl	$CR0_TS, %eax
744	jnz	.zfpu_disabled		/* if TS already set, nothing to do */
745	fninit				/* init fpu & discard pending error */
746
747	/*
748	 * If this machine supports fxsave/fxrstor, the next string of
749	 * nops may be patched to store a zero word off the stack into
750	 * the mxcsr register to disable any sse floating point exceptions
751	 */
752	pushl	$0
753	mov	%esp, %ebx
754	.globl	_patch_ldmxcsr_ebx
755_patch_ldmxcsr_ebx:
756	nop; nop; nop			/* ldmxcsr (%ebx) */
757	addl	$4, %esp
758	orl	$CR0_TS, %eax
759	movl	%eax, %cr0
760.zfpu_disabled:
761	/*
762	 * Temporarily switch to the idle thread's stack so that the zombie
763	 * thread's stack can be reclaimed by the reaper.
764	 */
765	movl	%gs:CPU_IDLE_THREAD, %eax /* idle thread pointer */
766	movl	T_SP(%eax), %esp	/* get onto idle thread stack */
767
768	/*
769	 * Set the idle thread as the current thread.
770	 */
771	movl	%eax, %gs:CPU_THREAD
772
773	/* switch in the hat context for the new thread */
774	GET_THREAD_HATP(%ecx, %edi, %ecx)
775	pushl	%ecx
776	call	hat_switch
777	addl	$4, %esp
778	/*
779	 * Put the zombie on death-row.
780	 */
781	pushl	%esi
782	call	reapq_add
783	addl	$4, %esp
784	jmp	_resume_from_idle	/* finish job of resume */
785
786resume_from_zombie_return:
787	RESTORE_REGS(%ecx)		/* restore non-volatile registers */
788	call	__dtrace_probe___sched_on__cpu
789
790	/*
791	 * Remove stack frame created in SAVE_REGS()
792	 */
793	addl	$CLONGSIZE, %esp
794	ret
795	SET_SIZE(resume_from_zombie)
796
797#endif	/* __amd64 */
798#endif	/* __lint */
799
800#if defined(__lint)
801
802/* ARGSUSED */
803void
804resume_from_intr(kthread_t *t)
805{}
806
807#else	/* __lint */
808
809#if defined(__amd64)
810
811	ENTRY(resume_from_intr)
812	movq	%gs:CPU_THREAD, %rax
813	leaq	resume_from_intr_return(%rip), %r11
814
815	/*
816	 * Save non-volatile registers, and set return address for current
817	 * thread to resume_from_intr_return.
818	 *
819	 * %r12 = t (new thread) when done
820	 */
821	SAVE_REGS(%rax, %r11)
822
823	movq	%gs:CPU_THREAD, %r13	/* %r13 = curthread */
824	movq	%r12, %gs:CPU_THREAD	/* set CPU's thread pointer */
825	movq	T_SP(%r12), %rsp	/* restore resuming thread's sp */
826	xorl	%ebp, %ebp		/* make $<threadlist behave better */
827
828	/*
829	 * Unlock outgoing thread's mutex dispatched by another processor.
830	 */
831	xorl	%eax, %eax
832	xchgb	%al, T_LOCK(%r13)
833
834	/*
835	 * If we are resuming an interrupt thread, store a timestamp in
836	 * the thread structure.
837	 */
838	testw	$T_INTR_THREAD, T_FLAGS(%r12)
839	jz	1f
8400:
841	/*
842	 * If an interrupt occurs between the rdtsc instruction and its
843	 * subsequent store, the timestamp will be stale by the time it is
844	 * stored. We can detect this by doing a compare-and-swap on the
845	 * thread's timestamp, since any interrupt occurring in this window
846	 * will put a new timestamp in the thread's t_intr_start field.
847	 */
848	movq	T_INTR_START(%r12), %rcx
849	rdtsc
850
851	/*
852	 * After rdtsc:
853	 *     High 32 bits of TC are in %edx
854	 *     Low 32 bits of TC are in %eax
855	 */
856	shlq	$32, %rdx
857	movl	%eax, %r14d
858	orq	%rdx, %r14
859	movq	%rcx, %rax
860	cmpxchgq %r14, T_INTR_START(%r12)
861	jnz	0b
8621:
863	/*
864	 * Restore non-volatile registers, then have spl0 return to the
865	 * resuming thread's PC after first setting the priority as low as
866	 * possible and blocking all interrupt threads that may be active.
867	 */
868	movq	T_PC(%r12), %rax	/* saved return addr */
869	RESTORE_REGS(%r11);
870	pushq	%rax			/* push return address for spl0() */
871	call	__dtrace_probe___sched_on__cpu
872	jmp	spl0
873
874resume_from_intr_return:
875	/*
876	 * Remove stack frame created in SAVE_REGS()
877	 */
878	addq 	$CLONGSIZE, %rsp
879	ret
880	SET_SIZE(resume_from_intr)
881
882#elif defined (__i386)
883
884	ENTRY(resume_from_intr)
885	movl	%gs:CPU_THREAD, %eax
886	movl	$resume_from_intr_return, %ecx
887
888	/*
889	 * Save non-volatile registers, and set return address for current
890	 * thread to resume_return.
891	 *
892	 * %edi = t (new thread) when done.
893	 */
894	SAVE_REGS(%eax, %ecx)
895
896	movl	%gs:CPU_THREAD, %esi	/* %esi = curthread */
897	movl	%edi, %gs:CPU_THREAD	/* set CPU's thread pointer */
898	movl	T_SP(%edi), %esp	/* restore resuming thread's sp */
899	xorl	%ebp, %ebp		/* make $<threadlist behave better */
900
901	/*
902	 * Unlock outgoing thread's mutex dispatched by another processor.
903	 */
904	xorl	%eax,%eax
905	xchgb	%al, T_LOCK(%esi)
906
907	/*
908	 * If we are resuming an interrupt thread, store a timestamp in
909	 * the thread structure.
910	 */
911	testw	$T_INTR_THREAD, T_FLAGS(%edi)
912	jz	1f
9130:
914	/*
915	 * If an interrupt occurs between the rdtsc instruction and its
916	 * subsequent store, the timestamp will be stale by the time it is
917	 * stored. We can detect this by doing a compare-and-swap on the
918	 * thread's timestamp, since any interrupt occurring in this window
919	 * will put a new timestamp in the thread's t_intr_start field.
920	 */
921	pushl	T_INTR_START(%edi)
922	pushl	T_INTR_START+4(%edi)
923	.globl	_tsc_patch16
924_tsc_patch16:
925	nop; nop			/* patched to rdtsc if available */
926	movl	%eax, %ebx
927	movl	%edx, %ecx
928	popl	%edx
929	popl	%eax
930	cmpxchg8b T_INTR_START(%edi)
931	jnz	0b
9321:
933	/*
934	 * Restore non-volatile registers, then have spl0 return to the
935	 * resuming thread's PC after first setting the priority as low as
936	 * possible and blocking all interrupt threads that may be active.
937	 */
938	movl	T_PC(%edi), %eax	/* saved return addr */
939	RESTORE_REGS(%ecx)
940	pushl	%eax			/* push return address for spl0() */
941	call	__dtrace_probe___sched_on__cpu
942	jmp	spl0
943
944resume_from_intr_return:
945	/*
946	 * Remove stack frame created in SAVE_REGS()
947	 */
948	addl	$CLONGSIZE, %esp
949	ret
950	SET_SIZE(resume_from_intr)
951
952#endif	/* __amd64 */
953#endif /* __lint */
954
955#if defined(__lint)
956
957void
958thread_start(void)
959{}
960
961#else   /* __lint */
962
963#if defined(__amd64)
964
965	ENTRY(thread_start)
966	popq	%rax		/* start() */
967	popq	%rdi		/* arg */
968	popq	%rsi		/* len */
969	movq	%rsp, %rbp
970	call	*%rax
971	call	thread_exit	/* destroy thread if it returns. */
972	/*NOTREACHED*/
973	SET_SIZE(thread_start)
974
975#elif defined(__i386)
976
977	ENTRY(thread_start)
978	popl	%eax
979	movl	%esp, %ebp
980	addl	$8, %ebp
981	call	*%eax
982	addl	$8, %esp
983	call	thread_exit	/* destroy thread if it returns. */
984	/*NOTREACHED*/
985	SET_SIZE(thread_start)
986
987#endif	/* __i386 */
988
989#endif  /* __lint */
990