xref: /illumos-gate/usr/src/uts/i86pc/ml/locore.S (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25/*
26 * Copyright 2020 Joyent, Inc.
27 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
28 */
29
30/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
31/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
32/*	  All Rights Reserved					*/
33
34/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
35/*	  All Rights Reserved					*/
36
37
38#include <sys/asm_linkage.h>
39#include <sys/asm_misc.h>
40#include <sys/regset.h>
41#include <sys/privregs.h>
42#include <sys/psw.h>
43#include <sys/reboot.h>
44#include <sys/machparam.h>
45
46#include <sys/segments.h>
47#include <sys/pcb.h>
48#include <sys/trap.h>
49#include <sys/ftrace.h>
50#include <sys/traptrace.h>
51#include <sys/clock.h>
52#include <sys/cmn_err.h>
53#include <sys/pit.h>
54#include <sys/panic.h>
55
56#if defined(__xpv)
57#include <sys/hypervisor.h>
58#endif
59
60#include "assym.h"
61
62/*
63 * Our assumptions:
64 *	- We are running in protected-paged mode.
65 *	- Interrupts are disabled.
66 *	- The GDT and IDT are the callers; we need our copies.
67 *	- The kernel's text, initialized data and bss are mapped.
68 *
69 * Our actions:
70 *	- Save arguments
71 *	- Initialize our stack pointer to the thread 0 stack (t0stack)
72 *	  and leave room for a phony "struct regs".
73 *	- Our GDT and IDT need to get munged.
74 *	- Since we are using the boot's GDT descriptors, we need
75 *	  to copy them into our GDT before we switch to ours.
76 *	- We start using our GDT by loading correct values in the
77 *	  selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
78 *	  gs=KGS_SEL).
79 *	- The default LDT entry for syscall is set.
80 *	- We load the default LDT into the hardware LDT register.
81 *	- We load the default TSS into the hardware task register.
82 *	- Check for cpu type, i.e. 486 vs. P5 vs. P6 etc.
83 *	- mlsetup(%esp) gets called.
84 *	- We change our appearance to look like the real thread 0.
85 *	  (NOTE: making ourselves to be a real thread may be a noop)
86 *	- main() gets called.  (NOTE: main() never returns).
87 *
88 * NOW, the real code!
89 */
90	/*
91	 * The very first thing in the kernel's text segment must be a jump
92	 * to the os/fakebop.c startup code.
93	 */
94	.text
95	jmp     _start
96
97	/*
98	 * Globals:
99	 */
100	.globl	_locore_start
101	.globl	mlsetup
102	.globl	main
103	.globl	panic
104	.globl	t0stack
105	.globl	t0
106	.globl	sysp
107	.globl	edata
108
109	/*
110	 * call back into boot - sysp (bootsvcs.h) and bootops (bootconf.h)
111	 */
112	.globl	bootops
113	.globl	bootopsp
114
115	/*
116	 * NOTE: t0stack should be the first thing in the data section so that
117	 * if it ever overflows, it will fault on the last kernel text page.
118	 */
119	.data
120	.comm	t0stack, DEFAULTSTKSZ, 32
121	.comm	t0, 4094, 32
122
123
124	/*
125	 * kobj_init() vectors us back to here with (note) a slightly different
126	 * set of arguments than _start is given (see lint prototypes above).
127	 *
128	 * XXX	Make this less vile, please.
129	 */
130	ENTRY_NP(_locore_start)
131
132	/*
133	 * %rdi = boot services (should die someday)
134	 * %rdx = bootops
135	 * end
136	 */
137
138	leaq	edata(%rip), %rbp	/* reference edata for ksyms */
139	movq	$0, (%rbp)		/* limit stack back trace */
140
141	/*
142	 * Initialize our stack pointer to the thread 0 stack (t0stack)
143	 * and leave room for a "struct regs" for lwp0.  Note that the
144	 * stack doesn't actually align to a 16-byte boundary until just
145	 * before we call mlsetup because we want to use %rsp to point at
146	 * our regs structure.
147	 */
148	leaq	t0stack(%rip), %rsp
149	addq	$_CONST(DEFAULTSTKSZ - REGSIZE), %rsp
150#if (REGSIZE & 15) == 0
151	subq	$8, %rsp
152#endif
153	/*
154	 * Save call back for special x86 boot services vector
155	 */
156	movq	%rdi, sysp(%rip)
157
158	movq	%rdx, bootops(%rip)		/* save bootops */
159	movq	$bootops, bootopsp(%rip)
160
161	/*
162	 * Save arguments and flags, if only for debugging ..
163	 */
164	movq	%rdi, REGOFF_RDI(%rsp)
165	movq	%rsi, REGOFF_RSI(%rsp)
166	movq	%rdx, REGOFF_RDX(%rsp)
167	movq	%rcx, REGOFF_RCX(%rsp)
168	movq	%r8, REGOFF_R8(%rsp)
169	movq	%r9, REGOFF_R9(%rsp)
170	pushf
171	popq	%r11
172	movq	%r11, REGOFF_RFL(%rsp)
173
174#if !defined(__xpv)
175	/*
176	 * Enable write protect and alignment check faults.
177	 */
178	movq	%cr0, %rax
179	orq	$_CONST(CR0_WP|CR0_AM), %rax
180	andq	$_BITNOT(CR0_WT|CR0_CE), %rax
181	movq	%rax, %cr0
182#endif	/* __xpv */
183
184	/*
185	 * mlsetup() gets called with a struct regs as argument, while
186	 * main takes no args and should never return.
187	 */
188	xorl	%ebp, %ebp
189	movq	%rsp, %rdi
190	pushq	%rbp
191	/* (stack pointer now aligned on 16-byte boundary right here) */
192	movq	%rsp, %rbp
193	call	mlsetup
194	call	main
195	/* NOTREACHED */
196	leaq	__return_from_main(%rip), %rdi
197	xorl	%eax, %eax
198	call	panic
199	SET_SIZE(_locore_start)
200
201__return_from_main:
202	.string	"main() returned"
203__unsupported_cpu:
204	.string	"486 style cpu detected - no longer supported!"
205
206#if defined(DEBUG)
207_no_pending_updates:
208	.string	"locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
209#endif
210
211/*
212 *  For stack layout, see privregs.h
213 *  When cmntrap gets called, the error code and trap number have been pushed.
214 *  When cmntrap_pushed gets called, the entire struct regs has been pushed.
215 */
216
217	.globl	trap		/* C handler called below */
218
219	ENTRY_NP2(cmntrap, _cmntrap)
220
221	INTR_PUSH
222
223	ALTENTRY(cmntrap_pushed)
224
225	movq	%rsp, %rbp
226
227	/*
228	 * - if this is a #pf i.e. T_PGFLT, %r15 is live
229	 *   and contains the faulting address i.e. a copy of %cr2
230	 *
231	 * - if this is a #db i.e. T_SGLSTP, %r15 is live
232	 *   and contains the value of %db6
233	 */
234
235	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
236	TRACE_REGS(%rdi, %rsp, %rbx, %rcx)	/* Uses label 9 */
237	TRACE_STAMP(%rdi)		/* Clobbers %eax, %edx, uses 9 */
238
239	/*
240	 * We must first check if DTrace has set its NOFAULT bit.  This
241	 * regrettably must happen before the trap stack is recorded, because
242	 * this requires a call to getpcstack() and may induce recursion if an
243	 * fbt::getpcstack: enabling is inducing the bad load.
244	 */
245	movl	%gs:CPU_ID, %eax
246	shlq	$CPU_CORE_SHIFT, %rax
247	leaq	cpu_core(%rip), %r8
248	addq	%r8, %rax
249	movw	CPUC_DTRACE_FLAGS(%rax), %cx
250	testw	$CPU_DTRACE_NOFAULT, %cx
251	jnz	.dtrace_induced
252
253	TRACE_STACK(%rdi)
254
255	movq	%rbp, %rdi
256	movq	%r15, %rsi
257	movl	%gs:CPU_ID, %edx
258
259	/*
260	 * We know that this isn't a DTrace non-faulting load; we can now safely
261	 * reenable interrupts.  (In the case of pagefaults, we enter through an
262	 * interrupt gate.)
263	 */
264	ENABLE_INTR_FLAGS
265
266	call	trap		/* trap(rp, addr, cpuid) handles all traps */
267	jmp	_sys_rtt
268
269.dtrace_induced:
270	cmpw	$KCS_SEL, REGOFF_CS(%rbp)	/* test CS for user-mode trap */
271	jne	3f				/* if from user, panic */
272
273	cmpl	$T_PGFLT, REGOFF_TRAPNO(%rbp)
274	je	1f
275
276	cmpl	$T_GPFLT, REGOFF_TRAPNO(%rbp)
277	je	0f
278
279	cmpl	$T_ILLINST, REGOFF_TRAPNO(%rbp)
280	je	0f
281
282	cmpl	$T_ZERODIV, REGOFF_TRAPNO(%rbp)
283	jne	4f				/* if not PF/GP/UD/DE, panic */
284
285	orw	$CPU_DTRACE_DIVZERO, %cx
286	movw	%cx, CPUC_DTRACE_FLAGS(%rax)
287	jmp	2f
288
289	/*
290	 * If we've taken a GPF, we don't (unfortunately) have the address that
291	 * induced the fault.  So instead of setting the fault to BADADDR,
292	 * we'll set the fault to ILLOP.
293	 */
2940:
295	orw	$CPU_DTRACE_ILLOP, %cx
296	movw	%cx, CPUC_DTRACE_FLAGS(%rax)
297	jmp	2f
2981:
299	orw	$CPU_DTRACE_BADADDR, %cx
300	movw	%cx, CPUC_DTRACE_FLAGS(%rax)	/* set fault to bad addr */
301	movq	%r15, CPUC_DTRACE_ILLVAL(%rax)
302					    /* fault addr is illegal value */
3032:
304	movq	REGOFF_RIP(%rbp), %rdi
305	movq	%rdi, %r12
306	call	dtrace_instr_size
307	addq	%rax, %r12
308	movq	%r12, REGOFF_RIP(%rbp)
309	INTR_POP
310	call	x86_md_clear
311	jmp	tr_iret_auto
312	/*NOTREACHED*/
3133:
314	leaq	dtrace_badflags(%rip), %rdi
315	xorl	%eax, %eax
316	call	panic
3174:
318	leaq	dtrace_badtrap(%rip), %rdi
319	xorl	%eax, %eax
320	call	panic
321	SET_SIZE(cmntrap_pushed)
322	SET_SIZE(cmntrap)
323	SET_SIZE(_cmntrap)
324
325/*
326 * Declare a uintptr_t which has the size of _cmntrap to enable stack
327 * traceback code to know when a regs structure is on the stack.
328 */
329	.globl	_cmntrap_size
330	.align	CLONGSIZE
331_cmntrap_size:
332	.NWORD	. - _cmntrap
333	.type	_cmntrap_size, @object
334
335dtrace_badflags:
336	.string "bad DTrace flags"
337
338dtrace_badtrap:
339	.string "bad DTrace trap"
340
341	.globl	trap		/* C handler called below */
342
343	ENTRY_NP(cmninttrap)
344
345	INTR_PUSH
346	INTGATE_INIT_KERNEL_FLAGS
347
348	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
349	TRACE_REGS(%rdi, %rsp, %rbx, %rcx)	/* Uses label 9 */
350	TRACE_STAMP(%rdi)		/* Clobbers %eax, %edx, uses 9 */
351
352	movq	%rsp, %rbp
353
354	movl	%gs:CPU_ID, %edx
355	xorl	%esi, %esi
356	movq	%rsp, %rdi
357	call	trap		/* trap(rp, addr, cpuid) handles all traps */
358	jmp	_sys_rtt
359	SET_SIZE(cmninttrap)
360
361#if !defined(__xpv)
362	/*
363	 * Handle traps early in boot. Just revectors into C quickly as
364	 * these are always fatal errors.
365	 *
366	 * Adjust %rsp to get same stack layout as in 32bit mode for bop_trap().
367	 */
368	ENTRY(bop_trap_handler)
369	movq	%rsp, %rdi
370	sub	$8, %rsp
371	call	bop_trap
372	SET_SIZE(bop_trap_handler)
373#endif
374
375	.globl	dtrace_user_probe
376
377	ENTRY_NP(dtrace_trap)
378
379	INTR_PUSH
380
381	TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) /* Uses labels 8 and 9 */
382	TRACE_REGS(%rdi, %rsp, %rbx, %rcx)	/* Uses label 9 */
383	TRACE_STAMP(%rdi)		/* Clobbers %eax, %edx, uses 9 */
384
385	movq	%rsp, %rbp
386
387	movl	%gs:CPU_ID, %edx
388#if defined(__xpv)
389	movq	%gs:CPU_VCPU_INFO, %rsi
390	movq	VCPU_INFO_ARCH_CR2(%rsi), %rsi
391#else
392	movq	%cr2, %rsi
393#endif
394	movq	%rsp, %rdi
395
396	ENABLE_INTR_FLAGS
397
398	call	dtrace_user_probe /* dtrace_user_probe(rp, addr, cpuid) */
399	jmp	_sys_rtt
400
401	SET_SIZE(dtrace_trap)
402
403/*
404 * Return from _sys_trap routine.
405 */
406
407	ENTRY_NP(lwp_rtt_initial)
408	movq	%gs:CPU_THREAD, %r15
409	movq	T_STACK(%r15), %rsp	/* switch to the thread stack */
410	movq	%rsp, %rbp
411	call	__dtrace_probe___proc_start
412	jmp	_lwp_rtt
413
414	ENTRY_NP(lwp_rtt)
415
416	/*
417	 * r14	lwp
418	 * rdx	lwp->lwp_procp
419	 * r15	curthread
420	 */
421
422	movq	%gs:CPU_THREAD, %r15
423	movq	T_STACK(%r15), %rsp	/* switch to the thread stack */
424	movq	%rsp, %rbp
425_lwp_rtt:
426	call	__dtrace_probe___proc_lwp__start
427	movq	%gs:CPU_LWP, %r14
428	movq	LWP_PROCP(%r14), %rdx
429
430	/*
431	 * XX64	Is the stack misaligned correctly at this point?
432	 *	If not, we need to do a push before calling anything ..
433	 */
434
435#if defined(DEBUG)
436	/*
437	 * If we were to run lwp_savectx at this point -without-
438	 * pcb_rupdate being set to 1, we'd end up sampling the hardware
439	 * state left by the previous running lwp, rather than setting
440	 * the values requested by the lwp creator.  Bad.
441	 */
442	testb	$0x1, PCB_RUPDATE(%r14)
443	jne	1f
444	leaq	_no_pending_updates(%rip), %rdi
445	movl	$__LINE__, %esi
446	movq	%r14, %rdx
447	xorl	%eax, %eax
448	call	panic
4491:
450#endif
451
452	/*
453	 * If agent lwp, clear %fs and %gs
454	 */
455	cmpq	%r15, P_AGENTTP(%rdx)
456	jne	1f
457	xorl	%ecx, %ecx
458	movq	%rcx, REGOFF_FS(%rsp)
459	movq	%rcx, REGOFF_GS(%rsp)
460	movw	%cx, LWP_PCB_FS(%r14)
461	movw	%cx, LWP_PCB_GS(%r14)
4621:
463	call	dtrace_systrace_rtt
464	movq	REGOFF_RDX(%rsp), %rsi
465	movq	REGOFF_RAX(%rsp), %rdi
466	call	post_syscall		/* post_syscall(rval1, rval2) */
467
468	/*
469	 * XXX - may want a fast path that avoids sys_rtt_common in the
470	 * most common case.
471	 */
472	ALTENTRY(_sys_rtt)
473	CLI(%rax)			/* disable interrupts */
474	ALTENTRY(_sys_rtt_ints_disabled)
475	movq	%rsp, %rdi		/* pass rp to sys_rtt_common */
476	call	sys_rtt_common		/* do common sys_rtt tasks */
477	testq	%rax, %rax		/* returning to userland? */
478	jz	sr_sup
479
480	/*
481	 * Return to user
482	 */
483	ASSERT_UPCALL_MASK_IS_SET
484	cmpw	$UCS_SEL, REGOFF_CS(%rsp) /* test for native (64-bit) lwp? */
485	je	sys_rtt_syscall
486
487	/*
488	 * Return to 32-bit userland
489	 */
490	ALTENTRY(sys_rtt_syscall32)
491	USER32_POP
492	call	x86_md_clear
493	jmp	tr_iret_user
494	/*NOTREACHED*/
495
496	ALTENTRY(sys_rtt_syscall)
497	/*
498	 * Return to 64-bit userland
499	 */
500	USER_POP
501	ALTENTRY(nopop_sys_rtt_syscall)
502	call	x86_md_clear
503	jmp	tr_iret_user
504	/*NOTREACHED*/
505	SET_SIZE(nopop_sys_rtt_syscall)
506
507	/*
508	 * Return to supervisor
509	 * NOTE: to make the check in trap() that tests if we are executing
510	 * segment register fixup/restore code work properly, sr_sup MUST be
511	 * after _sys_rtt .
512	 */
513	ALTENTRY(sr_sup)
514	/*
515	 * Restore regs before doing iretq to kernel mode
516	 */
517	INTR_POP
518	jmp	tr_iret_kernel
519	.globl	_sys_rtt_end
520_sys_rtt_end:
521	/*NOTREACHED*/
522	SET_SIZE(sr_sup)
523	SET_SIZE(_sys_rtt_end)
524	SET_SIZE(lwp_rtt)
525	SET_SIZE(lwp_rtt_initial)
526	SET_SIZE(_sys_rtt_ints_disabled)
527	SET_SIZE(_sys_rtt)
528	SET_SIZE(sys_rtt_syscall)
529	SET_SIZE(sys_rtt_syscall32)
530
531	/*
532	 * XX64 quick and dirty port from the i386 version. Since we
533	 * believe the amd64 tsc is more reliable, could this code be
534	 * simpler?
535	 */
536	ENTRY_NP(freq_tsc_pit)
537	pushq	%rbp
538	movq	%rsp, %rbp
539	movq	%rdi, %r9	/* save pit_counter */
540	pushq	%rbx
541
542/ We have a TSC, but we have no way in general to know how reliable it is.
543/ Usually a marginal TSC behaves appropriately unless not enough time
544/ elapses between reads. A reliable TSC can be read as often and as rapidly
545/ as desired. The simplistic approach of reading the TSC counter and
546/ correlating to the PIT counter cannot be naively followed. Instead estimates
547/ have to be taken to successively refine a guess at the speed of the cpu
548/ and then the TSC and PIT counter are correlated. In practice very rarely
549/ is more than one quick loop required for an estimate. Measures have to be
550/ taken to prevent the PIT counter from wrapping beyond its resolution and for
551/ measuring the clock rate of very fast processors.
552/
553/ The following constant can be tuned. It should be such that the loop does
554/ not take too many nor too few PIT counts to execute. If this value is too
555/ large, then on slow machines the loop will take a long time, or the PIT
556/ counter may even wrap. If this value is too small, then on fast machines
557/ the PIT counter may count so few ticks that the resolution of the PIT
558/ itself causes a bad guess. Because this code is used in machines with
559/ marginal TSC's and/or IO, if this value is too small on those, it may
560/ cause the calculated cpu frequency to vary slightly from boot to boot.
561/
562/ In all cases even if this constant is set inappropriately, the algorithm
563/ will still work and the caller should be able to handle variances in the
564/ calculation of cpu frequency, but the calculation will be inefficient and
565/ take a disproportionate amount of time relative to a well selected value.
566/ As the slowest supported cpu becomes faster, this constant should be
567/ carefully increased.
568
569	movl	$0x8000, %ecx
570
571	/ to make sure the instruction cache has been warmed
572	clc
573
574	jmp	freq_tsc_loop
575
576/ The following block of code up to and including the latching of the PIT
577/ counter after freq_tsc_perf_loop is very critical and very carefully
578/ written, it should only be modified with great care. freq_tsc_loop to
579/ freq_tsc_perf_loop fits exactly in 16 bytes as do the instructions in
580/ freq_tsc_perf_loop up to the unlatching of the PIT counter.
581
582	.align	32
583freq_tsc_loop:
584	/ save the loop count in %ebx
585	movl	%ecx, %ebx
586
587	/ initialize the PIT counter and start a count down
588	movb	$PIT_LOADMODE, %al
589	outb	$PITCTL_PORT
590	movb	$0xff, %al
591	outb	$PITCTR0_PORT
592	outb	$PITCTR0_PORT
593
594	/ read the TSC and store the TS in %edi:%esi
595	rdtsc
596	movl	%eax, %esi
597
598freq_tsc_perf_loop:
599	movl	%edx, %edi
600	movl	%eax, %esi
601	movl	%edx, %edi
602	loop	freq_tsc_perf_loop
603
604	/ read the TSC and store the LSW in %ecx
605	rdtsc
606	movl	%eax, %ecx
607
608	/ latch the PIT counter and status
609	movb	$_CONST(PIT_READBACK|PIT_READBACKC0), %al
610	outb	$PITCTL_PORT
611
612	/ remember if the icache has been warmed
613	setc	%ah
614
615	/ read the PIT status
616	inb	$PITCTR0_PORT
617	shll	$8, %eax
618
619	/ read PIT count
620	inb	$PITCTR0_PORT
621	shll	$8, %eax
622	inb	$PITCTR0_PORT
623	bswap	%eax
624
625	/ check to see if the PIT count was loaded into the CE
626	btw	$_CONST(PITSTAT_NULLCNT+8), %ax
627	jc	freq_tsc_increase_count
628
629	/ check to see if PIT counter wrapped
630	btw	$_CONST(PITSTAT_OUTPUT+8), %ax
631	jnc	freq_tsc_pit_did_not_wrap
632
633	/ halve count
634	shrl	$1, %ebx
635	movl	%ebx, %ecx
636
637	/ the instruction cache has been warmed
638	stc
639
640	jmp	freq_tsc_loop
641
642freq_tsc_increase_count:
643	shll	$1, %ebx
644	jc	freq_tsc_too_fast
645
646	movl	%ebx, %ecx
647
648	/ the instruction cache has been warmed
649	stc
650
651	jmp	freq_tsc_loop
652
653freq_tsc_pit_did_not_wrap:
654	roll	$16, %eax
655
656	cmpw	$0x2000, %ax
657	notw	%ax
658	jb	freq_tsc_sufficient_duration
659
660freq_tsc_calculate:
661	/ in mode 0, the PIT loads the count into the CE on the first CLK pulse,
662	/ then on the second CLK pulse the CE is decremented, therefore mode 0
663	/ is really a (count + 1) counter, ugh
664	xorl	%esi, %esi
665	movw	%ax, %si
666	incl	%esi
667
668	movl	$0xf000, %eax
669	mull	%ebx
670
671	/ tuck away (target_pit_count * loop_count)
672	movl	%edx, %ecx
673	movl	%eax, %ebx
674
675	movl	%esi, %eax
676	movl	$0xffffffff, %edx
677	mull	%edx
678
679	addl	%esi, %eax
680	adcl	$0, %edx
681
682	cmpl	%ecx, %edx
683	ja	freq_tsc_div_safe
684	jb	freq_tsc_too_fast
685
686	cmpl	%ebx, %eax
687	jbe	freq_tsc_too_fast
688
689freq_tsc_div_safe:
690	movl	%ecx, %edx
691	movl	%ebx, %eax
692
693	movl	%esi, %ecx
694	divl	%ecx
695
696	movl	%eax, %ecx
697
698	/ the instruction cache has been warmed
699	stc
700
701	jmp	freq_tsc_loop
702
703freq_tsc_sufficient_duration:
704	/ test to see if the icache has been warmed
705	btl	$16, %eax
706	jnc	freq_tsc_calculate
707
708	/ recall mode 0 is a (count + 1) counter
709	andl	$0xffff, %eax
710	incl	%eax
711
712	/ save the number of PIT counts
713	movl	%eax, (%r9)
714
715	/ calculate the number of TS's that elapsed
716	movl	%ecx, %eax
717	subl	%esi, %eax
718	sbbl	%edi, %edx
719
720	jmp	freq_tsc_end
721
722freq_tsc_too_fast:
723	/ return 0 as a 64 bit quantity
724	xorl	%eax, %eax
725	xorl	%edx, %edx
726
727freq_tsc_end:
728	shlq	$32, %rdx
729	orq	%rdx, %rax
730
731	popq	%rbx
732	leaveq
733	ret
734	SET_SIZE(freq_tsc_pit)
735
736