xref: /titanic_44/usr/src/uts/i86pc/ml/cpr_wakecode.s (revision 7257d1b4d25bfac0c802847390e98a464fd787ac)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/asm_linkage.h>
29#include <sys/asm_misc.h>
30#include <sys/regset.h>
31#include <sys/privregs.h>
32#include <sys/x86_archext.h>
33#include <sys/cpr_wakecode.h>
34
35#if !defined(__lint)
36#include <sys/segments.h>
37#include "assym.h"
38#endif
39
40/*
41 *	This file contains the low level routines involved in getting
42 *	into and out of ACPI S3, including those needed for restarting
43 *	the non-boot cpus.
44 *
45 *	Our assumptions:
46 *
47 *	Our actions:
48 *
49 */
50
51#if defined(lint) || defined(__lint)
52
53/*ARGSUSED*/
54int
55wc_save_context(wc_cpu_t *pcpu)
56{ return 0; }
57
58#else	/* lint */
59
60#if defined(__GNU_AS__)
61
62	NOTHING AT ALL YET!
63
64#else	/* !defined(__GNU_AS__) */
65
66#if defined(__amd64)
67
68	ENTRY_NP(wc_save_context)
69
70	movq	(%rsp), %rdx		/ return address
71	movq	%rdx, WC_RETADDR(%rdi)
72
73/*
74 * C calling convention with no local variables, just use 1st arg ie %rdi
75 * and the registers. Therefore push caller's fp, set out fp to be sp and
76 * push %r12, %r13 %r14. At function end unwind this by: popping %r14, %r13
77 * %r14, restore the sp from fp and pop caller's fp.
78 */
79
80	pushq	%rbp
81	movq	%rsp,%rbp
82	pushq	%r12
83	pushq	%r13
84	pushq	%r14
85
86	movq    %rdi, WC_VIRTADDR(%rdi)
87	movq    %rdi, WC_RDI(%rdi)
88
89	movq    %rdx, WC_RDX(%rdi)
90
91/ stash everything else we need
92	sgdt	WC_GDT(%rdi)
93	sidt	WC_IDT(%rdi)
94	sldt	WC_LDT(%rdi)
95	str	WC_TR(%rdi)
96
97	movq	%cr0, %rdx
98	movq	%rdx, WC_CR0(%rdi)
99	movq	%cr3, %rdx
100	movq	%rdx, WC_CR3(%rdi)
101	movq	%cr4, %rdx
102	movq	%rdx, WC_CR4(%rdi)
103	movq	%cr8, %rdx
104	movq	%rdx, WC_CR8(%rdi)
105
106	movq    %r8, WC_R8(%rdi)
107	movq    %r9, WC_R9(%rdi)
108	movq    %r10, WC_R10(%rdi)
109	movq    %r11, WC_R11(%rdi)
110	movq    %r12, WC_R12(%rdi)
111	movq    %r13, WC_R13(%rdi)
112	movq    %r14, WC_R14(%rdi)
113	movq    %r15, WC_R15(%rdi)
114	movq    %rax, WC_RAX(%rdi)
115	movq    %rbp, WC_RBP(%rdi)
116	movq    %rbx, WC_RBX(%rdi)
117	movq    %rcx, WC_RCX(%rdi)
118	movq    %rsi, WC_RSI(%rdi)
119	movq    %rsp, WC_RSP(%rdi)
120
121	movw	%ss, WC_SS(%rdi)
122	movw	%cs, WC_CS(%rdi)
123	movw	%ds, WC_DS(%rdi)
124	movw	%es, WC_ES(%rdi)
125
126	movq	$0, %rcx		/ save %fs register
127	movw    %fs, %cx
128	movq    %rcx, WC_FS(%rdi)
129
130	movl    $MSR_AMD_FSBASE, %ecx
131	rdmsr
132	movl    %eax, WC_FSBASE(%rdi)
133	movl    %edx, WC_FSBASE+4(%rdi)
134
135	movq	$0, %rcx		/ save %gs register
136	movw    %gs, %cx
137	movq    %rcx, WC_GS(%rdi)
138
139	movl    $MSR_AMD_GSBASE, %ecx	/ save gsbase msr
140	rdmsr
141	movl    %eax, WC_GSBASE(%rdi)
142	movl    %edx, WC_GSBASE+4(%rdi)
143
144	movl    $MSR_AMD_KGSBASE, %ecx	/ save kgsbase msr
145	rdmsr
146	movl    %eax, WC_KGSBASE(%rdi)
147	movl    %edx, WC_KGSBASE+4(%rdi)
148
149	pushfq
150	popq	WC_EFLAGS(%rdi)
151
152/*
153 * call save_stack(cpup)
154 * NB %rdi is the first arguemnt to both wc_save_context() and save_stack()
155 * so it must not be modified during either of these calls.
156 * The pushq will decrement the value of %rsp
157 * we need to push the %rbp because it is the frame pointer and we need
158 * to use the C calling convention
159 */
160
161	pushq   %rbp
162	call	*save_stack_func
163	popq   %rbp
164
165	wbinvd				/ flush the cache
166
167	movq	$1, %rax		/ at suspend return 1
168
169/ see comment at function enter
170	popq	%r14
171	popq	%r13
172	popq	%r12
173	leave
174
175	ret
176
177	SET_SIZE(wc_save_context)
178
179#elif defined(__i386)
180
181	ENTRY_NP(wc_save_context)
182
183	movl	4(%esp), %eax		/ wc_cpu_t *
184	movl	%eax, WC_VIRTADDR(%eax)
185
186	movl	(%esp), %edx		/ return address
187	movl	%edx, WC_RETADDR(%eax)
188
189	str	WC_TR(%eax)		/ stash everything else we need
190	sgdt	WC_GDT(%eax)
191	sldt	WC_LDT(%eax)
192	sidt	WC_IDT(%eax)
193
194	movl	%cr0, %edx
195	movl	%edx, WC_CR0(%eax)
196	movl	%cr3, %edx
197	movl	%edx, WC_CR3(%eax)
198	movl	%cr4, %edx
199	movl	%edx, WC_CR4(%eax)
200
201	movl	%ebx, WC_EBX(%eax)
202	movl	%edi, WC_EDI(%eax)
203	movl	%esi, WC_ESI(%eax)
204	movl	%ebp, WC_EBP(%eax)
205	movl	%esp, WC_ESP(%eax)
206
207	movw	%ss, WC_SS(%eax)
208	movw	%cs, WC_CS(%eax)
209	movw	%ds, WC_DS(%eax)
210	movw	%es, WC_ES(%eax)
211	movw	%fs, WC_FS(%eax)
212	movw	%gs, WC_GS(%eax)
213
214	pushfl
215	popl	WC_EFLAGS(%eax)
216
217	wbinvd				/ flush the cache
218
219	movl	$1, %eax		/ at suspend return 1
220	ret
221
222	SET_SIZE(wc_save_context)
223
224#endif	/* __amd64 */
225
226#endif	/* __GNU_AS__ */
227
228#endif /* lint */
229
230
231/*
232 *	Our assumptions:
233 *		- We are running in real mode.
234 *		- Interrupts are disabled.
235 *
236 *	Our actions:
237 *		- We start using our GDT by loading correct values in the
238 *		  selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
239 *		  gs=KGS_SEL).
240 *		- We change over to using our IDT.
241 *		- We load the default LDT into the hardware LDT register.
242 *		- We load the default TSS into the hardware task register.
243 *		- We restore registers
244 *		- We return to original caller (a la setjmp)
245 */
246
247#if defined(lint) || defined(__lint)
248
249void
250wc_rm_start(void)
251{}
252
253void
254wc_rm_end(void)
255{}
256
257#else	/* lint */
258
259#if defined(__GNU_AS__)
260
261	NOTHING AT ALL YET!
262
263#else	/* __GNU_AS__ */
264
265#if defined(__amd64)
266
267	ENTRY_NP(wc_rm_start)
268
269	/*
270	 * For vulcan as we need to do a .code32 and mentally invert the
271	 * meaning of the addr16 and data16 prefixes to get 32-bit access when
272	 * generating code to be executed in 16-bit mode (sigh...)
273	 */
274
275	.code32
276
277	cli
278	movw		%cs, %ax
279	movw		%ax, %ds		/ establish ds ...
280	movw		%ax, %ss		/ ... and ss:esp
281	D16 movl	$WC_STKSTART, %esp
282/ using the following value blows up machines! - DO NOT USE
283/	D16 movl	0xffc, %esp
284
285#define LED     0
286#define SERIAL  0
287
288#if     LED
289	D16 movl        $0x80, %edx
290	D16 movb        $0xd1, %al
291	outb    (%dx)
292#endif
293
294#if     SERIAL
295	D16 movl        $0x3f8, %edx
296	D16 movb        $0x61, %al
297	outb    (%dx)
298#endif
299
300	D16 call	cominit
301
302	/*
303	 * Enable protected-mode, write protect, and alignment mask
304	 * %cr0 has already been initialsed to zero
305	 */
306	movl		%cr0, %eax
307	D16 orl		$[CR0_PE|CR0_WP|CR0_AM], %eax
308	movl		%eax, %cr0
309
310	/*
311	 * Do a jmp immediately after writing to cr0 when enabling protected
312	 * mode to clear the real mode prefetch queue (per Intel's docs)
313	 */
314	jmp		pestart
315pestart:
316
317#if     LED
318	D16 movl        $0x80, %edx
319	D16 movb        $0xd2, %al
320	outb    (%dx)
321#endif
322
323#if     SERIAL
324	D16 movl        $0x3f8, %edx
325	D16 movb        $0x62, %al
326	outb    (%dx)
327#endif
328
329	/*
330	 * 16-bit protected mode is now active, so prepare to turn on long
331	 * mode
332	 */
333
334#if     LED
335	D16 movl        $0x80, %edx
336	D16 movb        $0xd3, %al
337	outb    (%dx)
338#endif
339
340#if     SERIAL
341	D16 movl        $0x3f8, %edx
342	D16 movb        $0x63, %al
343	outb    (%dx)
344#endif
345
346	/*
347 	 * Add any initial cr4 bits
348	 */
349	movl		%cr4, %eax
350	A16 D16 orl	CR4OFF, %eax
351
352	/*
353	 * Enable PAE mode (CR4.PAE)
354	 */
355	D16 orl		$CR4_PAE, %eax
356	movl		%eax, %cr4
357
358#if     LED
359	D16 movl        $0x80, %edx
360	D16 movb        $0xd4, %al
361	outb    (%dx)
362#endif
363
364#if     SERIAL
365	D16 movl        $0x3f8, %edx
366	D16 movb        $0x64, %al
367	outb    (%dx)
368#endif
369
370	/*
371	 * Point cr3 to the 64-bit long mode page tables.
372	 *
373	 * Note that these MUST exist in 32-bit space, as we don't have
374	 * a way to load %cr3 with a 64-bit base address for the page tables
375	 * until the CPU is actually executing in 64-bit long mode.
376	 */
377	A16 D16 movl	CR3OFF, %eax
378	movl		%eax, %cr3
379
380	/*
381	 * Set long mode enable in EFER (EFER.LME = 1)
382	 */
383	D16 movl	$MSR_AMD_EFER, %ecx
384	rdmsr
385
386	D16 orl		$AMD_EFER_LME, %eax
387	wrmsr
388
389#if     LED
390	D16 movl        $0x80, %edx
391	D16 movb        $0xd5, %al
392	outb    (%dx)
393#endif
394
395#if     SERIAL
396	D16 movl        $0x3f8, %edx
397	D16 movb        $0x65, %al
398	outb    (%dx)
399#endif
400
401	/*
402	 * Finally, turn on paging (CR0.PG = 1) to activate long mode.
403	 */
404	movl		%cr0, %eax
405	D16 orl		$CR0_PG, %eax
406	movl		%eax, %cr0
407
408	/*
409	 * The instruction after enabling paging in CR0 MUST be a branch.
410	 */
411	jmp		long_mode_active
412
413long_mode_active:
414
415#if     LED
416	D16 movl        $0x80, %edx
417	D16 movb        $0xd6, %al
418	outb    (%dx)
419#endif
420
421#if     SERIAL
422	D16 movl        $0x3f8, %edx
423	D16 movb        $0x66, %al
424	outb    (%dx)
425#endif
426
427	/*
428	 * Long mode is now active but since we're still running with the
429	 * original 16-bit CS we're actually in 16-bit compatability mode.
430	 *
431	 * We have to load an intermediate GDT and IDT here that we know are
432	 * in 32-bit space before we can use the kernel's GDT and IDT, which
433	 * may be in the 64-bit address space, and since we're in compatability
434	 * mode, we only have access to 16 and 32-bit instructions at the
435	 * moment.
436	 */
437	A16 D16 lgdt	TEMPGDTOFF	/* load temporary GDT */
438	A16 D16 lidt	TEMPIDTOFF	/* load temporary IDT */
439
440
441	/*
442 	 * Do a far transfer to 64-bit mode.  Set the CS selector to a 64-bit
443	 * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump
444	 * to the real mode platter address of wc_long_mode_64 as until the
445	 * 64-bit CS is in place we don't have access to 64-bit instructions
446	 * and thus can't reference a 64-bit %rip.
447	 */
448
449#if     LED
450	D16 movl        $0x80, %edx
451	D16 movb        $0xd7, %al
452	outb    (%dx)
453#endif
454
455#if     SERIAL
456	D16 movl        $0x3f8, %edx
457	D16 movb        $0x67, %al
458	outb    (%dx)
459#endif
460
461	D16 	pushl 	$TEMP_CS64_SEL
462	A16 D16 pushl	LM64OFF
463
464	D16 lret
465
466
467/*
468 * Support routine to re-initialize VGA subsystem
469 */
470vgainit:
471	D16 ret
472
473/*
474 * Support routine to re-initialize keyboard (which is USB - help!)
475 */
476kbdinit:
477	D16 ret
478
479/*
480 * Support routine to re-initialize COM ports to something sane
481 */
482cominit:
483	/ init COM1 & COM2
484	xorl		%edx, %edx		/ select COM1
485	D16 movl	$0xe3, %eax		/ ah=0; al=(9600bd|8_bit|nopar)
486	int		$0x14
487	D16 movl	$1, %edx		/ select COM2
488	D16 movl	$0xe3, %eax		/ ah=0; al=(9600bd|8_bit|nopar)
489	int		$0x14
490	D16 ret
491
492	.code64
493/*
494 * Support routine to re-initialize COM ports to something sane
495 */
496cominit64:
497	/ init COM1 & COM2
498	xorq	%rdx, %rdx		/ select COM1
499	movq	$0xe3, %rax		/ ah=0; al=(9600bd|8_bit|nopar)
500	int	$0x14
501	movq	$1, %rdx		/ select COM2
502	movq	$0xe3, %rax		/ ah=0; al=(9600bd|8_bit|nopar)
503	int	$0x14
504	ret
505
506	.globl wc_long_mode_64
507wc_long_mode_64:
508
509#if     LED
510	movw        $0x80, %dx
511	movb        $0xd8, %al
512	outb    (%dx)
513#endif
514
515#if     SERIAL
516	movw        $0x3f8, %dx
517	movb        $0x68, %al
518	outb    (%dx)
519#endif
520
521	/*
522	 * We are now running in long mode with a 64-bit CS (EFER.LMA=1,
523	 * CS.L=1) so we now have access to 64-bit instructions.
524	 *
525	 * First, set the 64-bit GDT base.
526	 */
527	.globl	rm_platter_pa
528	movl	rm_platter_pa, %eax
529
530	lgdtq	GDTROFF(%rax)		/* load 64-bit GDT */
531
532	/*
533	 * Save the CPU number in %r11; get the value here since it's saved in
534	 * the real mode platter.
535	 */
536/ JAN
537/ the following is wrong! need to figure out MP systems
538/	movl	CPUNOFF(%rax), %r11d
539
540	/*
541	 * Add rm_platter_pa to %rsp to point it to the same location as seen
542	 * from 64-bit mode.
543	 */
544	addq	%rax, %rsp
545
546	/*
547	 * Now do an lretq to load CS with the appropriate selector for the
548	 * kernel's 64-bit GDT and to start executing 64-bit setup code at the
549	 * virtual address where boot originally loaded this code rather than
550	 * the copy in the real mode platter's rm_code array as we've been
551	 * doing so far.
552	 */
553
554#if     LED
555	movw        $0x80, %dx
556	movb        $0xd9, %al
557	outb    (%dx)
558#endif
559
560/ JAN this should produce 'i' but we get 'g' instead ???
561#if     SERIAL
562	movw        $0x3f8, %dx
563	movb        $0x69, %al
564	outb    (%dx)
565#endif
566
567	pushq	$KCS_SEL
568	pushq	$kernel_wc_code
569	lretq
570
571	.globl kernel_wc_code
572kernel_wc_code:
573
574#if     LED
575	movw        $0x80, %dx
576	movb        $0xda, %al
577	outb    (%dx)
578#endif
579
580/ JAN this should produce 'j' but we get 'g' instead ???
581#if     SERIAL
582	movw        $0x3f8, %dx
583	movb        $0x6a, %al
584	outb    (%dx)
585#endif
586
587	/*
588	 * Complete the balance of the setup we need to before executing
589	 * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS).
590	 */
591	.globl  rm_platter_va
592	movq    rm_platter_va, %rbx
593	addq	$WC_CPU, %rbx
594
595#if     LED
596	movw        $0x80, %dx
597	movb        $0xdb, %al
598	outb    (%dx)
599#endif
600
601#if     SERIAL
602	movw        $0x3f8, %dx
603	movw        $0x6b, %ax
604	outb    (%dx)
605#endif
606
607	/*
608	 * restore the rest of the registers
609	 */
610
611	lidtq	WC_IDT(%rbx)
612
613#if     LED
614	movw        $0x80, %dx
615	movb        $0xdc, %al
616	outb    (%dx)
617#endif
618
619#if     SERIAL
620	movw        $0x3f8, %dx
621	movw        $0x6c, %ax
622	outb    (%dx)
623#endif
624
625	/*
626	 * restore the rest of the registers
627	 */
628
629	movw    $KDS_SEL, %ax
630	movw    %ax, %ds
631	movw    %ax, %es
632	movw    %ax, %ss
633
634	/*
635	 * Before proceeding, enable usage of the page table NX bit if
636	 * that's how the page tables are set up.
637	 */
638	movl    x86_feature, %ecx
639	andl   	 $X86_NX, %ecx
640	jz      1f
641	movl    $MSR_AMD_EFER, %ecx
642	rdmsr
643	orl     $AMD_EFER_NXE, %eax
644	wrmsr
6451:
646
647	movq	WC_CR4(%rbx), %rax	/ restore full cr4 (with Global Enable)
648	movq	%rax, %cr4
649
650	lldt	WC_LDT(%rbx)
651	movzwq	WC_TR(%rbx), %rax	/ clear TSS busy bit
652	addq	WC_GDT+2(%rbx), %rax
653	andl	$0xfffffdff, 4(%rax)
654	movq	4(%rax), %rcx
655	ltr	WC_TR(%rbx)
656
657#if     LED
658	movw        $0x80, %dx
659	movb        $0xdd, %al
660	outb    (%dx)
661#endif
662
663#if     SERIAL
664	movw        $0x3f8, %dx
665	movw        $0x6d, %ax
666	outb    (%dx)
667#endif
668
669/ restore %fsbase %gsbase %kgbase registers using wrmsr instruction
670
671	movq    WC_FS(%rbx), %rcx	/ restore fs register
672	movw    %cx, %fs
673
674	movl    $MSR_AMD_FSBASE, %ecx
675	movl    WC_FSBASE(%rbx), %eax
676	movl    WC_FSBASE+4(%rbx), %edx
677	wrmsr
678
679	movq    WC_GS(%rbx), %rcx	/ restore gs register
680	movw    %cx, %gs
681
682	movl    $MSR_AMD_GSBASE, %ecx	/ restore gsbase msr
683	movl    WC_GSBASE(%rbx), %eax
684	movl    WC_GSBASE+4(%rbx), %edx
685	wrmsr
686
687	movl    $MSR_AMD_KGSBASE, %ecx	/ restore kgsbase msr
688	movl    WC_KGSBASE(%rbx), %eax
689	movl    WC_KGSBASE+4(%rbx), %edx
690	wrmsr
691
692	movq	WC_CR0(%rbx), %rdx
693	movq	%rdx, %cr0
694	movq	WC_CR3(%rbx), %rdx
695	movq	%rdx, %cr3
696	movq	WC_CR8(%rbx), %rdx
697	movq	%rdx, %cr8
698
699#if     LED
700	movw        $0x80, %dx
701	movb        $0xde, %al
702	outb    (%dx)
703#endif
704
705#if     SERIAL
706	movw        $0x3f8, %dx
707	movb        $0x6e, %al
708	outb    (%dx)
709#endif
710
711/ dummy up a stck so we can make C function calls
712	movq    WC_RSP(%rbx), %rsp
713
714	/*
715	 * APIC initialization (we dummy up a stack so we can make this call)
716	 */
717	pushq   $0              /* null frame pointer terminates stack trace */
718	movq    %rsp, %rbp      /* stack aligned on 16-byte boundary */
719
720	/*
721	 * skip iff function pointer is NULL
722	 */
723	cmpq	$0, ap_mlsetup
724	je	2f
725	call	*ap_mlsetup
7262:
727
728	call    *cpr_start_cpu_func
729
730/ restore %rbx to the value it ahd before we called the functions above
731	movq    rm_platter_va, %rbx
732	addq	$WC_CPU, %rbx
733
734	movq    WC_R8(%rbx), %r8
735	movq    WC_R9(%rbx), %r9
736	movq    WC_R10(%rbx), %r10
737	movq    WC_R11(%rbx), %r11
738	movq    WC_R12(%rbx), %r12
739	movq    WC_R13(%rbx), %r13
740	movq    WC_R14(%rbx), %r14
741	movq    WC_R15(%rbx), %r15
742/	movq    WC_RAX(%rbx), %rax
743	movq    WC_RBP(%rbx), %rbp
744	movq    WC_RCX(%rbx), %rcx
745/	movq    WC_RDX(%rbx), %rdx
746	movq    WC_RDI(%rbx), %rdi
747	movq    WC_RSI(%rbx), %rsi
748
749
750/ assume that %cs does not need to be restored
751/ %ds, %es & %ss are ignored in 64bit mode
752	movw	WC_SS(%rbx), %ss
753	movw	WC_DS(%rbx), %ds
754	movw	WC_ES(%rbx), %es
755
756#if     LED
757	movw        $0x80, %dx
758	movb        $0xdf, %al
759	outb    (%dx)
760#endif
761
762#if     SERIAL
763	movw        $0x3f8, %dx
764	movb        $0x6f, %al
765	outb    (%dx)
766#endif
767
768
769	movq    WC_RBP(%rbx), %rbp
770	movq    WC_RSP(%rbx), %rsp
771
772#if     LED
773	movw        $0x80, %dx
774	movb        $0xe0, %al
775	outb    (%dx)
776#endif
777
778#if     SERIAL
779	movw        $0x3f8, %dx
780	movb        $0x70, %al
781	outb    (%dx)
782#endif
783
784
785	movq    WC_RCX(%rbx), %rcx
786
787	pushq	WC_EFLAGS(%rbx)			/ restore flags
788	popfq
789
790#if     LED
791	movw        $0x80, %dx
792	movb        $0xe1, %al
793	outb    (%dx)
794#endif
795
796#if     SERIAL
797	movw        $0x3f8, %dx
798	movb        $0x71, %al
799	outb    (%dx)
800#endif
801
802/*
803 * can not use outb after this point, because doing so would mean using
804 * %dx which would modify %rdx which is restored here
805 */
806
807	movq	%rbx, %rax
808	movq    WC_RDX(%rax), %rdx
809	movq    WC_RBX(%rax), %rbx
810
811	popq	%r14
812	popq	%r13
813	popq	%r12
814	leave
815
816	movq	WC_RETADDR(%rax), %rax
817	movq	%rax, (%rsp)		/ return to caller of wc_save_context
818
819	xorl	%eax, %eax			/ at wakeup return 0
820	ret
821
822
823	SET_SIZE(wc_rm_start)
824
825	ENTRY_NP(asmspin)
826
827	movl	%edi, %ecx
828A1:
829	loop	A1
830
831	SET_SIZE(asmspin)
832
833	.globl wc_rm_end
834wc_rm_end:
835	nop
836
837#elif defined(__i386)
838
839	ENTRY_NP(wc_rm_start)
840
841/entry:	jmp		entry			/ stop here for HDT
842
843	cli
844	movw		%cs, %ax
845	movw		%ax, %ds		/ establish ds ...
846	movw		%ax, %ss		/ ... and ss:esp
847	D16 movl	$WC_STKSTART, %esp
848
849#define LED     1
850#define SERIAL  1
851
852#if     LED
853	D16 movl        $0x80, %edx
854	D16 movb        $0xd1, %al
855	outb    (%dx)
856#endif
857
858#if     SERIAL
859	D16 movl        $0x3f8, %edx
860	D16 movb        $0x61, %al
861	outb    (%dx)
862#endif
863
864
865	D16 call	vgainit
866	D16 call	kbdinit
867	D16 call	cominit
868
869#if     LED
870	D16 movl        $0x80, %edx
871	D16 movb        $0xd2, %al
872	outb    (%dx)
873#endif
874
875#if     SERIAL
876	D16 movl        $0x3f8, %edx
877	D16 movb        $0x62, %al
878	outb    (%dx)
879#endif
880
881	D16 A16 movl	$WC_CPU, %ebx		/ base add of wc_cpu_t
882
883#if     LED
884	D16 movb        $0xd3, %al
885	outb    $0x80
886#endif
887
888#if     SERIAL
889	D16 movl        $0x3f8, %edx
890	D16 movb        $0x63, %al
891	outb    (%dx)
892#endif
893
894	D16 A16 movl	%cs:WC_DS(%ebx), %edx	/ %ds post prot/paging transit
895
896	D16 movb        $0xd4, %al
897	outb    $0x80
898
899	D16 A16 lgdt	%cs:WC_GDT(%ebx)	/ restore gdt and idtr
900	D16 A16 lidt	%cs:WC_IDT(%ebx)
901
902	D16 movb        $0xd5, %al
903	outb    $0x80
904
905	D16 A16 movl	%cs:WC_CR4(%ebx), %eax	/ restore cr4
906	D16 andl	$-1!CR4_PGE, %eax	/ don't set Global Enable yet
907	movl		%eax, %cr4
908
909	D16 movb        $0xd6, %al
910	outb    $0x80
911
912	D16 A16 movl	%cs:WC_CR3(%ebx), %eax	/ set PDPT
913	movl		%eax, %cr3
914
915	D16 movb        $0xd7, %al
916	outb    $0x80
917
918	D16 A16 movl	%cs:WC_CR0(%ebx), %eax	/ enable prot/paging, etc.
919	movl		%eax, %cr0
920
921	D16 movb        $0xd8, %al
922	outb    $0x80
923
924	D16 A16 movl	%cs:WC_VIRTADDR(%ebx), %ebx	/ virtaddr of wc_cpu_t
925
926	D16 movb        $0xd9, %al
927	outb    $0x80
928
929	D16 movb        $0xda, %al
930	outb    $0x80
931	jmp		flush			/ flush prefetch queue
932flush:
933	D16 pushl	$KCS_SEL
934	D16 pushl	$kernel_wc_code
935	D16 lret				/ re-appear at kernel_wc_code
936
937
938/*
939 * Support routine to re-initialize VGA subsystem
940 */
941vgainit:
942	D16 ret
943
944/*
945 * Support routine to re-initialize keyboard (which is USB - help!)
946 */
947kbdinit:
948	D16 ret
949
950/*
951 * Support routine to re-initialize COM ports to something sane for debug output
952 */
953cominit:
954	/ init COM1 & COM2
955	xorl		%edx, %edx		/ select COM1
956	D16 movl	$0xe3, %eax		/ ah=0; al=(9600bd|8_bit|nopar)
957	int		$0x14
958	D16 movl	$1, %edx		/ select COM2
959	D16 movl	$0xe3, %eax		/ ah=0; al=(9600bd|8_bit|nopar)
960	int		$0x14
961	D16 ret
962
963	.globl wc_rm_end
964wc_rm_end:
965	nop
966
967	.globl	kernel_wc_code
968kernel_wc_code:
969	/ At this point we are with kernel's cs and proper eip.
970	/ We will be executing not from the copy in real mode platter,
971	/ but from the original code where boot loaded us.
972	/ By this time GDT and IDT are loaded as is cr0, cr3 and cr4.
973	/ %ebx is wc_cpu
974	/ %dx is our ds
975
976	D16 movb        $0xdb, %al
977	outb	$0x80
978
979/ got here OK
980
981	movw	%dx, %ds		/ $KDS_SEL
982	movb	$0xdc, %al
983	outb	$0x80
984
985	/*
986	 * Before proceeding, enable usage of the page table NX bit if
987	 * that's how the page tables are set up.
988	 */
989	movl    x86_feature, %ecx
990	andl   	 $X86_NX, %ecx
991	jz      1f
992	movl    $MSR_AMD_EFER, %ecx
993	rdmsr
994	orl     $AMD_EFER_NXE, %eax
995	wrmsr
9961:
997
998	movl	WC_CR4(%ebx), %eax	/ restore full cr4 (with Global Enable)
999	movl	%eax, %cr4
1000
1001
1002	lldt	WC_LDT(%ebx)		/ $LDT_SEL
1003
1004	movzwl	WC_TR(%ebx), %eax	/ clear TSS busy bit
1005	addl	WC_GDT+2(%ebx), %eax
1006	andl	$-1!0x200, 4(%eax)
1007	ltr	WC_TR(%ebx)		/ $UTSS_SEL
1008
1009	movw	WC_SS(%ebx), %ss	/ lssl	WC_ESP(%ebx), %esp
1010	movl	WC_ESP(%ebx), %esp	/ ^ don't use, asm busted!
1011
1012	movl	WC_RETADDR(%ebx), %eax	/ return to caller of wc_save_context
1013	movl	%eax, (%esp)
1014
1015	movw	WC_ES(%ebx), %es	/ restore segment registers
1016	movw	WC_FS(%ebx), %fs
1017	movw	WC_GS(%ebx), %gs
1018
1019	/*
1020	 * APIC initialization, skip iff function pointer is NULL
1021	 */
1022	cmpl	$0, ap_mlsetup
1023	je	2f
1024	call	*ap_mlsetup
10252:
1026
1027	call    *cpr_start_cpu_func
1028
1029	pushl	WC_EFLAGS(%ebx)		/ restore flags
1030	popfl
1031
1032	movl	WC_EDI(%ebx), %edi	/ restore general registers
1033	movl	WC_ESI(%ebx), %esi
1034	movl	WC_EBP(%ebx), %ebp
1035	movl	WC_EBX(%ebx), %ebx
1036
1037/exit:	jmp	exit			/ stop here for HDT
1038
1039	xorl	%eax, %eax		/ at wakeup return 0
1040	ret
1041
1042	SET_SIZE(wc_rm_start)
1043
1044
1045#endif	/* defined(__amd64) */
1046
1047#endif	/* !defined(__GNU_AS__) */
1048
1049#endif /* lint */
1050
1051