xref: /titanic_52/usr/src/uts/intel/ia32/ml/swtch.s (revision f6c0ee12afa096973e277ee50c7e9a04030b129b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Process switching routines.
31 */
32
33#if defined(__lint)
34#include <sys/thread.h>
35#include <sys/systm.h>
36#include <sys/time.h>
37#else	/* __lint */
38#include "assym.h"
39#endif	/* __lint */
40
41#include <sys/asm_linkage.h>
42#include <sys/asm_misc.h>
43#include <sys/regset.h>
44#include <sys/privregs.h>
45#include <sys/stack.h>
46#include <sys/segments.h>
47
48/*
49 * resume(thread_id_t t);
50 *
51 * a thread can only run on one processor at a time. there
52 * exists a window on MPs where the current thread on one
53 * processor is capable of being dispatched by another processor.
54 * some overlap between outgoing and incoming threads can happen
55 * when they are the same thread. in this case where the threads
56 * are the same, resume() on one processor will spin on the incoming
57 * thread until resume() on the other processor has finished with
58 * the outgoing thread.
59 *
60 * The MMU context changes when the resuming thread resides in a different
61 * process.  Kernel threads are known by resume to reside in process 0.
62 * The MMU context, therefore, only changes when resuming a thread in
63 * a process different from curproc.
64 *
65 * resume_from_intr() is called when the thread being resumed was not
66 * passivated by resume (e.g. was interrupted).  This means that the
67 * resume lock is already held and that a restore context is not needed.
68 * Also, the MMU context is not changed on the resume in this case.
69 *
70 * resume_from_zombie() is the same as resume except the calling thread
71 * is a zombie and must be put on the deathrow list after the CPU is
72 * off the stack.
73 */
74
75#if !defined(__lint)
76
77#if LWP_PCB_FPU != 0
78#error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
79#endif	/* LWP_PCB_FPU != 0 */
80
81#endif	/* !__lint */
82
83#if defined(__amd64)
84
85/*
86 * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
87 *
88 * The stack frame must be created before the save of %rsp so that tracebacks
89 * of swtch()ed-out processes show the process as having last called swtch().
90 */
91#define SAVE_REGS(thread_t, retaddr)			\
92	movq	%rbp, T_RBP(thread_t);			\
93	movq	%rbx, T_RBX(thread_t);			\
94	movq	%r12, T_R12(thread_t);			\
95	movq	%r13, T_R13(thread_t);			\
96	movq	%r14, T_R14(thread_t);			\
97	movq	%r15, T_R15(thread_t);			\
98	pushq	%rbp;					\
99	movq	%rsp, %rbp;				\
100	movq	%rsp, T_SP(thread_t);			\
101	movq	retaddr, T_PC(thread_t);		\
102	movq	%rdi, %r12;				\
103	call	__dtrace_probe___sched_off__cpu
104
105/*
106 * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
107 *
108 * We load up %rsp from the label_t as part of the context switch, so
109 * we don't repeat that here.
110 *
111 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
112 * already has the effect of putting the stack back the way it was when
113 * we came in.
114 */
115#define RESTORE_REGS(scratch_reg)			\
116	movq	%gs:CPU_THREAD, scratch_reg;		\
117	movq	T_RBP(scratch_reg), %rbp;		\
118	movq	T_RBX(scratch_reg), %rbx;		\
119	movq	T_R12(scratch_reg), %r12;		\
120	movq	T_R13(scratch_reg), %r13;		\
121	movq	T_R14(scratch_reg), %r14;		\
122	movq	T_R15(scratch_reg), %r15
123
124/*
125 * Get pointer to a thread's hat structure
126 */
127#define GET_THREAD_HATP(hatp, thread_t, scratch_reg)	\
128	movq	T_PROCP(thread_t), hatp;		\
129	movq	P_AS(hatp), scratch_reg;		\
130	movq	A_HAT(scratch_reg), hatp
131
132#elif defined (__i386)
133
134/*
135 * Save non-volatile registers (%ebp, %esi, %edi and %ebx)
136 *
137 * The stack frame must be created before the save of %esp so that tracebacks
138 * of swtch()ed-out processes show the process as having last called swtch().
139 */
140#define SAVE_REGS(thread_t, retaddr)			\
141	movl	%ebp, T_EBP(thread_t);			\
142	movl	%ebx, T_EBX(thread_t);			\
143	movl	%esi, T_ESI(thread_t);			\
144	movl	%edi, T_EDI(thread_t);			\
145	pushl	%ebp;					\
146	movl	%esp, %ebp;				\
147	movl	%esp, T_SP(thread_t);			\
148	movl	retaddr, T_PC(thread_t);		\
149	movl	8(%ebp), %edi;				\
150	pushl	%edi;					\
151	call	__dtrace_probe___sched_off__cpu;	\
152	addl	$CLONGSIZE, %esp
153
154/*
155 * Restore non-volatile registers (%ebp, %esi, %edi and %ebx)
156 *
157 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
158 * already has the effect of putting the stack back the way it was when
159 * we came in.
160 */
161#define RESTORE_REGS(scratch_reg)			\
162	movl	%gs:CPU_THREAD, scratch_reg;		\
163	movl	T_EBP(scratch_reg), %ebp;		\
164	movl	T_EBX(scratch_reg), %ebx;		\
165	movl	T_ESI(scratch_reg), %esi;		\
166	movl	T_EDI(scratch_reg), %edi
167
168/*
169 * Get pointer to a thread's hat structure
170 */
171#define GET_THREAD_HATP(hatp, thread_t, scratch_reg)	\
172	movl	T_PROCP(thread_t), hatp;		\
173	movl	P_AS(hatp), scratch_reg;		\
174	movl	A_HAT(scratch_reg), hatp
175
176#endif	/* __amd64 */
177
178#if defined(__lint)
179
180/* ARGSUSED */
181void
182resume(kthread_t *t)
183{}
184
185#else	/* __lint */
186
187#if defined(__amd64)
188
189	ENTRY(resume)
190	movq	%gs:CPU_THREAD, %rax
191	leaq	resume_return(%rip), %r11
192
193	/*
194	 * Save non-volatile registers, and set return address for current
195	 * thread to resume_return.
196	 *
197	 * %r12 = t (new thread) when done
198	 */
199	SAVE_REGS(%rax, %r11)
200
201	LOADCPU(%r15)				/* %r15 = CPU */
202	movq	CPU_THREAD(%r15), %r13		/* %r13 = curthread */
203
204	/*
205	 * Call savectx if thread has installed context ops.
206	 *
207	 * Note that if we have floating point context, the save op
208	 * (either fpsave_begin or fpxsave_begin) will issue the
209	 * async save instruction (fnsave or fxsave respectively)
210	 * that we fwait for below.
211	 */
212	cmpq	$0, T_CTX(%r13)		/* should current thread savectx? */
213	je	.nosavectx		/* skip call when zero */
214
215	movq	%r13, %rdi		/* arg = thread pointer */
216	call	savectx			/* call ctx ops */
217
218.nosavectx:
219        /*
220         * Call savepctx if process has installed context ops.
221         */
222	movq	T_PROCP(%r13), %r14	/* %r14 = proc */
223        cmpq    $0, P_PCTX(%r14)         /* should current thread savectx? */
224        je      .nosavepctx              /* skip call when zero */
225
226        movq    %r14, %rdi              /* arg = proc pointer */
227        call    savepctx                 /* call ctx ops */
228.nosavepctx:
229
230	/*
231	 * Temporarily switch to the idle thread's stack
232	 */
233	movq	CPU_IDLE_THREAD(%r15), %rax 	/* idle thread pointer */
234
235	/*
236	 * Set the idle thread as the current thread
237	 */
238	movq	T_SP(%rax), %rsp	/* It is safe to set rsp */
239	movq	%rax, CPU_THREAD(%r15)
240
241	/*
242	 * Switch in the hat context for the new thread
243	 *
244	 */
245	GET_THREAD_HATP(%rdi, %r12, %r11)
246	call	hat_switch
247
248	movq	T_LWP(%r13), %r14
249	testq	%r14, %r14
250	jz	.disabled_fpu2
251
252	cmpl	$FPU_EN, PCB_FPU_FLAGS(%r14)
253	je	.wait_for_fpusave
254
255.disabled_fpu2:
256	/*
257	 * Clear and unlock previous thread's t_lock
258	 * to allow it to be dispatched by another processor.
259	 */
260	movb	$0, T_LOCK(%r13)
261
262	/*
263	 * IMPORTANT: Registers at this point must be:
264	 *       %r12 = new thread
265	 *
266	 * Here we are in the idle thread, have dropped the old thread.
267	 */
268	ALTENTRY(_resume_from_idle)
269	/*
270	 * spin until dispatched thread's mutex has
271	 * been unlocked. this mutex is unlocked when
272	 * it becomes safe for the thread to run.
273	 */
274.lock_thread_mutex:
275	lock
276	btsl	$0, T_LOCK(%r12) 	/* attempt to lock new thread's mutex */
277	jnc	.thread_mutex_locked	/* got it */
278
279.spin_thread_mutex:
280	pause
281	cmpb	$0, T_LOCK(%r12)	/* check mutex status */
282	jz	.lock_thread_mutex	/* clear, retry lock */
283	jmp	.spin_thread_mutex	/* still locked, spin... */
284
285.thread_mutex_locked:
286	/*
287	 * Fix CPU structure to indicate new running thread.
288	 * Set pointer in new thread to the CPU structure.
289	 */
290	LOADCPU(%r13)			/* load current CPU pointer */
291	cmpq	%r13, T_CPU(%r12)
292	je	.setup_cpu
293
294	/* cp->cpu_stats.sys.cpumigrate++ */
295	incq    CPU_STATS_SYS_CPUMIGRATE(%r13)
296	movq	%r13, T_CPU(%r12)	/* set new thread's CPU pointer */
297
298.setup_cpu:
299	/*
300	 * Setup rsp0 (kernel stack) in TSS to curthread's stack.
301	 * (Note: Since we don't have saved 'regs' structure for all
302	 *	  the threads we can't easily determine if we need to
303	 *	  change rsp0. So, we simply change the rsp0 to bottom
304	 *	  of the thread stack and it will work for all cases.)
305	 *
306	 * XX64 - Is this correct?
307	 */
308	movq	CPU_TSS(%r13), %r14
309	movq	T_STACK(%r12), %rax
310	addq	$REGSIZE+MINFRAME, %rax	/* to the bottom of thread stack */
311	movq	%rax, TSS_RSP0(%r14)
312
313	movq	%r12, CPU_THREAD(%r13)	/* set CPU's thread pointer */
314	xorl	%ebp, %ebp		/* make $<threadlist behave better */
315	movq	T_LWP(%r12), %rax 	/* set associated lwp to  */
316	movq	%rax, CPU_LWP(%r13) 	/* CPU's lwp ptr */
317
318	movq	T_SP(%r12), %rsp	/* switch to outgoing thread's stack */
319	movq	T_PC(%r12), %r13	/* saved return addr */
320
321	/*
322	 * Call restorectx if context ops have been installed.
323	 */
324	cmpq	$0, T_CTX(%r12)		/* should resumed thread restorectx? */
325	jz	.norestorectx		/* skip call when zero */
326	movq	%r12, %rdi		/* arg = thread pointer */
327	call	restorectx		/* call ctx ops */
328.norestorectx:
329
330	/*
331	 * Call restorepctx if context ops have been installed for the proc.
332	 */
333	movq	T_PROCP(%r12), %rcx
334	cmpq	$0, P_PCTX(%rcx)
335	jz	.norestorepctx
336	movq	%rcx, %rdi
337	call	restorepctx
338.norestorepctx:
339
340	/*
341	 * If we are resuming an interrupt thread, store a timestamp
342	 * in the thread structure.
343	 */
344	testw	$T_INTR_THREAD, T_FLAGS(%r12)
345	jz	1f
346
3470:
348	/*
349	 * If an interrupt occurs between the rdtsc instruction and its
350	 * subsequent store, the timestamp will be stale by the time it is
351	 * stored. We can detect this by doing a compare-and-swap on the
352	 * thread's timestamp, since any interrupt occurring in this window
353	 * will put a new timestamp in the thread's t_intr_start field.
354	 */
355	movq	T_INTR_START(%r12), %rcx
356	rdtsc
357
358	/*
359	 * After rdtsc:
360	 *     High 32 bits of TC are in %edx
361	 *     Low 32 bits of TC are in %eax
362	 */
363	shlq	$32, %rdx
364	movl	%eax, %r14d
365	orq	%rdx, %r14
366	movq	%rcx, %rax
367	cmpxchgq %r14, T_INTR_START(%r12)
368	jnz	0b
3691:
370	/*
371	 * Restore non-volatile registers, then have spl0 return to the
372	 * resuming thread's PC after first setting the priority as low as
373	 * possible and blocking all interrupt threads that may be active.
374	 */
375	movq	%r13, %rax	/* save return address */
376	RESTORE_REGS(%r11)
377	pushq	%rax		/* push return address for spl0() */
378	call	__dtrace_probe___sched_on__cpu
379	jmp	spl0
380
381resume_return:
382	/*
383	 * Remove stack frame created in SAVE_REGS()
384	 */
385	addq	$CLONGSIZE, %rsp
386	ret
387
388.wait_for_fpusave:
389	/* mark copy in pcb as valid */
390	movq	%cr0, %rax
391	movl	$_CONST(FPU_VALID|FPU_EN), PCB_FPU_FLAGS(%r14)
392	orl	$CR0_TS, %eax	/* set to trap on next switch */
393	fwait			/* ensure save is done before we unlock */
394	finit			/* (ensure x87 tags cleared for fxsave case) */
395	movq	%rax, %cr0
396	jmp	.disabled_fpu2
397	SET_SIZE(_resume_from_idle)
398	SET_SIZE(resume)
399
400#elif defined (__i386)
401
402	ENTRY(resume)
403	movl	%gs:CPU_THREAD, %eax
404	movl	$resume_return, %ecx
405
406	/*
407	 * Save non-volatile registers, and set return address for current
408	 * thread to resume_return.
409	 *
410	 * %edi = t (new thread) when done.
411	 */
412	SAVE_REGS(%eax,  %ecx)
413
414	LOADCPU(%ebx)			/* %ebx = CPU */
415	movl	CPU_THREAD(%ebx), %esi	/* %esi = curthread */
416
417	/*
418	 * Call savectx if thread has installed context ops.
419	 *
420	 * Note that if we have floating point context, the save op
421	 * (either fpsave_begin or fpxsave_begin) will issue the
422	 * async save instruction (fnsave or fxsave respectively)
423	 * that we fwait for below.
424	 */
425	movl	T_CTX(%esi), %eax	/* should current thread savectx? */
426	testl	%eax, %eax
427	jz	.nosavectx		/* skip call when zero */
428	pushl	%esi			/* arg = thread pointer */
429	call	savectx			/* call ctx ops */
430	addl	$4, %esp		/* restore stack pointer */
431
432.nosavectx:
433        /*
434         * Call savepctx if process has installed context ops.
435         */
436	movl	T_PROCP(%esi), %eax	/* %eax = proc */
437	cmpl	$0, P_PCTX(%eax)	/* should current thread savectx? */
438	je	.nosavepctx		/* skip call when zero */
439	pushl	%eax			/* arg = proc pointer */
440	call	savepctx		/* call ctx ops */
441	addl	$4, %esp
442.nosavepctx:
443
444	/*
445	 * Temporarily switch to the idle thread's stack
446	 */
447	movl	CPU_IDLE_THREAD(%ebx), %eax 	/* idle thread pointer */
448
449	/*
450	 * Set the idle thread as the current thread
451	 */
452	movl	T_SP(%eax), %esp	/* It is safe to set esp */
453	movl	%eax, CPU_THREAD(%ebx)
454	movl	T_LWP(%esi), %ecx	/* load pointer to pcb_fpu */
455	movl	%ecx, %ebx		/* save pcb_fpu pointer in %ebx */
456
457	/* switch in the hat context for the new thread */
458	GET_THREAD_HATP(%ecx, %edi, %ecx)
459	pushl	%ecx
460	call	hat_switch
461	addl	$4, %esp
462
463	xorl	%ecx, %ecx
464	testl	%ebx, %ebx			/* check pcb_fpu pointer */
465	jz	.disabled_fpu2
466	cmpl	$FPU_EN, PCB_FPU_FLAGS(%ebx)	/* is PCB_FPU_FLAGS FPU_EN? */
467	je	.wait_for_fpusave
468.disabled_fpu2:
469	/*
470	 * Clear and unlock previous thread's t_lock
471	 * to allow it to be dispatched by another processor.
472	 */
473	movb	%cl, T_LOCK(%esi)
474
475	/*
476	 * IMPORTANT: Registers at this point must be:
477	 *       %edi = new thread
478	 *
479	 * Here we are in the idle thread, have dropped the old thread.
480	 */
481	ALTENTRY(_resume_from_idle)
482	/*
483	 * spin until dispatched thread's mutex has
484	 * been unlocked. this mutex is unlocked when
485	 * it becomes safe for the thread to run.
486	 */
487.L4:
488	lock
489	btsl	$0, T_LOCK(%edi) /* lock new thread's mutex */
490	jc	.L4_2			/* lock did not succeed */
491
492	/*
493	 * Fix CPU structure to indicate new running thread.
494	 * Set pointer in new thread to the CPU structure.
495	 */
496	LOADCPU(%esi)			/* load current CPU pointer */
497	movl	T_STACK(%edi), %eax	/* here to use v pipeline of */
498					/* Pentium. Used few lines below */
499	cmpl	%esi, T_CPU(%edi)
500	jne	.L5_2
501.L5_1:
502	/*
503	 * Setup esp0 (kernel stack) in TSS to curthread's stack.
504	 * (Note: Since we don't have saved 'regs' structure for all
505	 *	  the threads we can't easily determine if we need to
506	 *	  change esp0. So, we simply change the esp0 to bottom
507	 *	  of the thread stack and it will work for all cases.)
508	 */
509	movl	CPU_TSS(%esi), %ecx
510	addl	$REGSIZE+MINFRAME, %eax	/* to the bottom of thread stack */
511	movl	%eax, TSS_ESP0(%ecx)
512
513	movl	%edi, CPU_THREAD(%esi)	/* set CPU's thread pointer */
514	xorl	%ebp, %ebp		/* make $<threadlist behave better */
515	movl	T_LWP(%edi), %eax 	/* set associated lwp to  */
516	movl	%eax, CPU_LWP(%esi) 	/* CPU's lwp ptr */
517
518	movl	T_SP(%edi), %esp	/* switch to outgoing thread's stack */
519	movl	T_PC(%edi), %esi	/* saved return addr */
520
521	/*
522	 * Call restorectx if context ops have been installed.
523	 */
524	movl	T_CTX(%edi), %eax	/* should resumed thread restorectx? */
525	testl	%eax, %eax
526	jz	.norestorectx		/* skip call when zero */
527	pushl	%edi			/* arg = thread pointer */
528	call	restorectx		/* call ctx ops */
529	addl	$4, %esp		/* restore stack pointer */
530.norestorectx:
531
532	/*
533	 * Call restorepctx if context ops have been installed for the proc.
534	 */
535	movl	T_PROCP(%edi), %eax
536	cmpl	$0, P_PCTX(%eax)
537	je	.norestorepctx
538	pushl	%eax			/* arg = proc pointer */
539	call	restorepctx
540	addl	$4, %esp		/* restore stack pointer */
541.norestorepctx:
542
543	/*
544	 * If we are resuming an interrupt thread, store a timestamp
545	 * in the thread structure.
546	 */
547	testw	$T_INTR_THREAD, T_FLAGS(%edi)
548	jz	1f
549	pushl	%ecx
5500:
551	/*
552	 * If an interrupt occurs between the rdtsc instruction and its
553	 * subsequent store, the timestamp will be stale by the time it is
554	 * stored. We can detect this by doing a compare-and-swap on the
555	 * thread's timestamp, since any interrupt occurring in this window
556	 * will put a new timestamp in the thread's t_intr_start field.
557	 */
558	pushl	T_INTR_START(%edi)
559	pushl	T_INTR_START+4(%edi)
560	.globl	_tsc_patch15
561_tsc_patch15:
562	nop; nop			/* patched to rdtsc if available */
563	movl	%eax, %ebx
564	movl	%edx, %ecx
565	popl	%edx
566	popl	%eax
567	cmpxchg8b T_INTR_START(%edi)
568	jnz	0b
569	popl	%ecx
5701:
571	/*
572	 * Restore non-volatile registers, then have spl0 return to the
573	 * resuming thread's PC after first setting the priority as low as
574	 * possible and blocking all interrupt threads that may be active.
575	 */
576	movl	%esi, %eax		/* save return address */
577	RESTORE_REGS(%ecx)
578	pushl	%eax			/* push return address for spl0() */
579	call	__dtrace_probe___sched_on__cpu
580	jmp	spl0
581
582resume_return:
583	/*
584	 * Remove stack frame created in SAVE_REGS()
585	 */
586	addl	$CLONGSIZE, %esp
587	ret
588
589.wait_for_fpusave:
590	mov	%cr0, %eax
591
592	/* mark copy in pcb as valid */
593	movl	$_CONST(FPU_VALID|FPU_EN), PCB_FPU_FLAGS(%ebx)
594
595	orl	$CR0_TS, %eax			/* set to trap on next switch */
596	fwait			/* ensure save is done before we unlock */
597	finit			/* (ensure x87 tags cleared for fxsave case) */
598	movl	%eax, %cr0
599	jmp	.disabled_fpu2
600
601.L4_2:
602	pause
603	cmpb	$0, T_LOCK(%edi)
604	je	.L4
605	jmp	.L4_2
606
607.L5_2:
608	/* cp->cpu_stats.sys.cpumigrate++ */
609	addl    $1, CPU_STATS_SYS_CPUMIGRATE(%esi)
610	adcl    $0, CPU_STATS_SYS_CPUMIGRATE+4(%esi)
611	movl	%esi, T_CPU(%edi)	/* set new thread's CPU pointer */
612	jmp	.L5_1
613
614	SET_SIZE(_resume_from_idle)
615	SET_SIZE(resume)
616
617#endif	/* __amd64 */
618#endif	/* __lint */
619
620#if defined(__lint)
621
622/* ARGSUSED */
623void
624resume_from_zombie(kthread_t *t)
625{}
626
627#else	/* __lint */
628
629#if defined(__amd64)
630
631	ENTRY(resume_from_zombie)
632	movq	%gs:CPU_THREAD, %rax
633	leaq	resume_from_zombie_return(%rip), %r11
634
635	/*
636	 * Save non-volatile registers, and set return address for current
637	 * thread to resume_from_zombie_return.
638	 *
639	 * %r12 = t (new thread) when done
640	 */
641	SAVE_REGS(%rax, %r11)
642
643	movq	%gs:CPU_THREAD, %r13	/* %r13 = curthread */
644
645	/* clean up the fp unit. It might be left enabled */
646	movq	%cr0, %rax
647	testq	$CR0_TS, %rax
648	jnz	.zfpu_disabled		/* if TS already set, nothing to do */
649	fninit				/* init fpu & discard pending error */
650
651	/*
652	 * Store a zero word into the mxcsr register to disable any sse
653	 * floating point exceptions
654	 */
655	pushq	$0
656	movq	%rsp, %rdi
657	ldmxcsr	(%rdi)
658	addq	$CLONGSIZE, %rsp
659	orq	$CR0_TS, %rax
660	movq	%rax, %cr0
661.zfpu_disabled:
662
663	/*
664	 * Temporarily switch to the idle thread's stack so that the zombie
665	 * thread's stack can be reclaimed by the reaper.
666	 */
667	movq	%gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
668	movq	T_SP(%rax), %rsp	/* get onto idle thread stack */
669
670	/*
671	 * Sigh. If the idle thread has never run thread_start()
672	 * then t_sp is mis-aligned by thread_load().
673	 */
674	andq	$_BITNOT(STACK_ALIGN-1), %rsp
675
676	/*
677	 * Set the idle thread as the current thread.
678	 */
679	movq	%rax, %gs:CPU_THREAD
680
681	/* switch in the hat context for the new thread */
682	GET_THREAD_HATP(%rdi, %r12, %r11)
683	call	hat_switch
684
685	/*
686	 * Put the zombie on death-row.
687	 */
688	movq	%r13, %rdi
689	call	reapq_add
690
691	jmp	_resume_from_idle	/* finish job of resume */
692
693resume_from_zombie_return:
694	RESTORE_REGS(%r11)		/* restore non-volatile registers */
695	call	__dtrace_probe___sched_on__cpu
696
697	/*
698	 * Remove stack frame created in SAVE_REGS()
699	 */
700	addq	$CLONGSIZE, %rsp
701	ret
702	SET_SIZE(resume_from_zombie)
703
704#elif defined (__i386)
705
706	ENTRY(resume_from_zombie)
707	movl	%gs:CPU_THREAD, %eax
708	movl	$resume_from_zombie_return, %ecx
709
710	/*
711	 * Save non-volatile registers, and set return address for current
712	 * thread to resume_from_zombie_return.
713	 *
714	 * %edi = t (new thread) when done.
715	 */
716	SAVE_REGS(%eax, %ecx)
717
718	movl	%gs:CPU_THREAD, %esi	/* %esi = curthread */
719
720	/* clean up the fp unit. It might be left enabled */
721	movl	%cr0, %eax
722	testl	$CR0_TS, %eax
723	jnz	.zfpu_disabled		/* if TS already set, nothing to do */
724	fninit				/* init fpu & discard pending error */
725
726	/*
727	 * If this machine supports fxsave/fxrstor, the next string of
728	 * nops may be patched to store a zero word off the stack into
729	 * the mxcsr register to disable any sse floating point exceptions
730	 */
731	pushl	$0
732	mov	%esp, %ebx
733	.globl	_patch_ldmxcsr_ebx
734_patch_ldmxcsr_ebx:
735	nop; nop; nop			/* ldmxcsr (%ebx) */
736	addl	$4, %esp
737	orl	$CR0_TS, %eax
738	movl	%eax, %cr0
739.zfpu_disabled:
740	/*
741	 * Temporarily switch to the idle thread's stack so that the zombie
742	 * thread's stack can be reclaimed by the reaper.
743	 */
744	movl	%gs:CPU_IDLE_THREAD, %eax /* idle thread pointer */
745	movl	T_SP(%eax), %esp	/* get onto idle thread stack */
746
747	/*
748	 * Set the idle thread as the current thread.
749	 */
750	movl	%eax, %gs:CPU_THREAD
751
752	/* switch in the hat context for the new thread */
753	GET_THREAD_HATP(%ecx, %edi, %ecx)
754	pushl	%ecx
755	call	hat_switch
756	addl	$4, %esp
757	/*
758	 * Put the zombie on death-row.
759	 */
760	pushl	%esi
761	call	reapq_add
762	addl	$4, %esp
763	jmp	_resume_from_idle	/* finish job of resume */
764
765resume_from_zombie_return:
766	RESTORE_REGS(%ecx)		/* restore non-volatile registers */
767	call	__dtrace_probe___sched_on__cpu
768
769	/*
770	 * Remove stack frame created in SAVE_REGS()
771	 */
772	addl	$CLONGSIZE, %esp
773	ret
774	SET_SIZE(resume_from_zombie)
775
776#endif	/* __amd64 */
777#endif	/* __lint */
778
779#if defined(__lint)
780
781/* ARGSUSED */
782void
783resume_from_intr(kthread_t *t)
784{}
785
786#else	/* __lint */
787
788#if defined(__amd64)
789
790	ENTRY(resume_from_intr)
791	movq	%gs:CPU_THREAD, %rax
792	leaq	resume_from_intr_return(%rip), %r11
793
794	/*
795	 * Save non-volatile registers, and set return address for current
796	 * thread to resume_from_intr_return.
797	 *
798	 * %r12 = t (new thread) when done
799	 */
800	SAVE_REGS(%rax, %r11)
801
802	movq	%gs:CPU_THREAD, %r13	/* %r13 = curthread */
803	movq	%r12, %gs:CPU_THREAD	/* set CPU's thread pointer */
804	movq	T_SP(%r12), %rsp	/* restore resuming thread's sp */
805	xorl	%ebp, %ebp		/* make $<threadlist behave better */
806
807	/*
808	 * Unlock outgoing thread's mutex dispatched by another processor.
809	 */
810	xorl	%eax, %eax
811	xchgb	%al, T_LOCK(%r13)
812
813	/*
814	 * If we are resuming an interrupt thread, store a timestamp in
815	 * the thread structure.
816	 */
817	testw	$T_INTR_THREAD, T_FLAGS(%r12)
818	jz	1f
8190:
820	/*
821	 * If an interrupt occurs between the rdtsc instruction and its
822	 * subsequent store, the timestamp will be stale by the time it is
823	 * stored. We can detect this by doing a compare-and-swap on the
824	 * thread's timestamp, since any interrupt occurring in this window
825	 * will put a new timestamp in the thread's t_intr_start field.
826	 */
827	movq	T_INTR_START(%r12), %rcx
828	rdtsc
829
830	/*
831	 * After rdtsc:
832	 *     High 32 bits of TC are in %edx
833	 *     Low 32 bits of TC are in %eax
834	 */
835	shlq	$32, %rdx
836	movl	%eax, %r14d
837	orq	%rdx, %r14
838	movq	%rcx, %rax
839	cmpxchgq %r14, T_INTR_START(%r12)
840	jnz	0b
8411:
842	/*
843	 * Restore non-volatile registers, then have spl0 return to the
844	 * resuming thread's PC after first setting the priority as low as
845	 * possible and blocking all interrupt threads that may be active.
846	 */
847	movq	T_PC(%r12), %rax	/* saved return addr */
848	RESTORE_REGS(%r11);
849	pushq	%rax			/* push return address for spl0() */
850	call	__dtrace_probe___sched_on__cpu
851	jmp	spl0
852
853resume_from_intr_return:
854	/*
855	 * Remove stack frame created in SAVE_REGS()
856	 */
857	addq 	$CLONGSIZE, %rsp
858	ret
859	SET_SIZE(resume_from_intr)
860
861#elif defined (__i386)
862
863	ENTRY(resume_from_intr)
864	movl	%gs:CPU_THREAD, %eax
865	movl	$resume_from_intr_return, %ecx
866
867	/*
868	 * Save non-volatile registers, and set return address for current
869	 * thread to resume_return.
870	 *
871	 * %edi = t (new thread) when done.
872	 */
873	SAVE_REGS(%eax, %ecx)
874
875	movl	%gs:CPU_THREAD, %esi	/* %esi = curthread */
876	movl	%edi, %gs:CPU_THREAD	/* set CPU's thread pointer */
877	movl	T_SP(%edi), %esp	/* restore resuming thread's sp */
878	xorl	%ebp, %ebp		/* make $<threadlist behave better */
879
880	/*
881	 * Unlock outgoing thread's mutex dispatched by another processor.
882	 */
883	xorl	%eax,%eax
884	xchgb	%al, T_LOCK(%esi)
885
886	/*
887	 * If we are resuming an interrupt thread, store a timestamp in
888	 * the thread structure.
889	 */
890	testw	$T_INTR_THREAD, T_FLAGS(%edi)
891	jz	1f
8920:
893	/*
894	 * If an interrupt occurs between the rdtsc instruction and its
895	 * subsequent store, the timestamp will be stale by the time it is
896	 * stored. We can detect this by doing a compare-and-swap on the
897	 * thread's timestamp, since any interrupt occurring in this window
898	 * will put a new timestamp in the thread's t_intr_start field.
899	 */
900	pushl	T_INTR_START(%edi)
901	pushl	T_INTR_START+4(%edi)
902	.globl	_tsc_patch16
903_tsc_patch16:
904	nop; nop			/* patched to rdtsc if available */
905	movl	%eax, %ebx
906	movl	%edx, %ecx
907	popl	%edx
908	popl	%eax
909	cmpxchg8b T_INTR_START(%edi)
910	jnz	0b
9111:
912	/*
913	 * Restore non-volatile registers, then have spl0 return to the
914	 * resuming thread's PC after first setting the priority as low as
915	 * possible and blocking all interrupt threads that may be active.
916	 */
917	movl	T_PC(%edi), %eax	/* saved return addr */
918	RESTORE_REGS(%ecx)
919	pushl	%eax			/* push return address for spl0() */
920	call	__dtrace_probe___sched_on__cpu
921	jmp	spl0
922
923resume_from_intr_return:
924	/*
925	 * Remove stack frame created in SAVE_REGS()
926	 */
927	addl	$CLONGSIZE, %esp
928	ret
929	SET_SIZE(resume_from_intr)
930
931#endif	/* __amd64 */
932#endif /* __lint */
933
934#if defined(__lint)
935
936void
937thread_start(void)
938{}
939
940#else   /* __lint */
941
942#if defined(__amd64)
943
944	ENTRY(thread_start)
945	popq	%rax		/* start() */
946	popq	%rdi		/* arg */
947	popq	%rsi		/* len */
948	movq	%rsp, %rbp
949	call	*%rax
950	call	thread_exit	/* destroy thread if it returns. */
951	/*NOTREACHED*/
952	SET_SIZE(thread_start)
953
954#elif defined(__i386)
955
956	ENTRY(thread_start)
957	popl	%eax
958	movl	%esp, %ebp
959	addl	$8, %ebp
960	call	*%eax
961	addl	$8, %esp
962	call	thread_exit	/* destroy thread if it returns. */
963	/*NOTREACHED*/
964	SET_SIZE(thread_start)
965
966#endif	/* __i386 */
967
968#endif  /* __lint */
969