xref: /titanic_44/usr/src/lib/commpage/amd64/cp_subr.s (revision cc401b3700f84e16c3a03f62783088a993d21466)
1*cc401b37SPatrick Mooney/*
2*cc401b37SPatrick Mooney * This file and its contents are supplied under the terms of the
3*cc401b37SPatrick Mooney * Common Development and Distribution License ("CDDL"), version 1.0.
4*cc401b37SPatrick Mooney * You may only use this file in accordance with the terms of version
5*cc401b37SPatrick Mooney * 1.0 of the CDDL.
6*cc401b37SPatrick Mooney *
7*cc401b37SPatrick Mooney * A full copy of the text of the CDDL should have accompanied this
8*cc401b37SPatrick Mooney * source.  A copy of the CDDL is also available via the Internet at
9*cc401b37SPatrick Mooney * http://www.illumos.org/license/CDDL.
10*cc401b37SPatrick Mooney */
11*cc401b37SPatrick Mooney
12*cc401b37SPatrick Mooney/*
13*cc401b37SPatrick Mooney * Copyright 2016 Joyent, Inc.
14*cc401b37SPatrick Mooney */
15*cc401b37SPatrick Mooney
16*cc401b37SPatrick Mooney#include <sys/asm_linkage.h>
17*cc401b37SPatrick Mooney#include <sys/segments.h>
18*cc401b37SPatrick Mooney#include <sys/time_impl.h>
19*cc401b37SPatrick Mooney#include <sys/tsc.h>
20*cc401b37SPatrick Mooney#include <cp_offsets.h>
21*cc401b37SPatrick Mooney
22*cc401b37SPatrick Mooney#define	GETCPU_GDT_OFFSET	SEL_GDT(GDT_CPUID, SEL_UPL)
23*cc401b37SPatrick Mooney
24*cc401b37SPatrick Mooney	.file	"cp_subr.s"
25*cc401b37SPatrick Mooney
26*cc401b37SPatrick Mooney/*
27*cc401b37SPatrick Mooney * These are cloned from TSC and time related code in the kernel.  They should
28*cc401b37SPatrick Mooney * be kept in sync in the case that the source values are changed.
29*cc401b37SPatrick Mooney * See: uts/i86pc/os/timestamp.c
30*cc401b37SPatrick Mooney */
31*cc401b37SPatrick Mooney#define	NSEC_SHIFT	5
32*cc401b37SPatrick Mooney#define	ADJ_SHIFT	4
33*cc401b37SPatrick Mooney#define	NANOSEC		0x3b9aca00
34*cc401b37SPatrick Mooney
35*cc401b37SPatrick Mooney/*
36*cc401b37SPatrick Mooney * hrtime_t
37*cc401b37SPatrick Mooney * __cp_tsc_read(comm_page_t *cp)
38*cc401b37SPatrick Mooney *
39*cc401b37SPatrick Mooney * Stack usage: 0 bytes
40*cc401b37SPatrick Mooney */
41*cc401b37SPatrick Mooney	ENTRY_NP(__cp_tsc_read)
42*cc401b37SPatrick Mooney	movl	CP_TSC_TYPE(%rdi), %esi
43*cc401b37SPatrick Mooney	movl	CP_TSC_NCPU(%rdi), %r8d
44*cc401b37SPatrick Mooney	leaq	CP_TSC_SYNC_TICK_DELTA(%rdi), %r9
45*cc401b37SPatrick Mooney
46*cc401b37SPatrick Mooney	cmpl	$TSC_TSCP, %esi
47*cc401b37SPatrick Mooney	jne	2f
48*cc401b37SPatrick Mooney	rdtscp
49*cc401b37SPatrick Mooney	/*
50*cc401b37SPatrick Mooney	 * When the TSC is read, the low 32 bits are placed in %eax while the
51*cc401b37SPatrick Mooney	 * high 32 bits are placed in %edx.  They are shifted and ORed together
52*cc401b37SPatrick Mooney	 * to obtain the full 64-bit value.
53*cc401b37SPatrick Mooney	 */
54*cc401b37SPatrick Mooney	shlq	$0x20, %rdx
55*cc401b37SPatrick Mooney	orq	%rdx, %rax
56*cc401b37SPatrick Mooney	cmpl	$0, %esi
57*cc401b37SPatrick Mooney	jne	1f
58*cc401b37SPatrick Mooney	ret
59*cc401b37SPatrick Mooney1:
60*cc401b37SPatrick Mooney	/*
61*cc401b37SPatrick Mooney	 * When cp_tsc_ncpu is non-zero, it indicates the length of the
62*cc401b37SPatrick Mooney	 * cp_tsc_sync_tick_delta array, which contains per-CPU offsets for the
63*cc401b37SPatrick Mooney	 * TSC.  The CPU ID furnished by the IA32_TSC_AUX register via rdtscp
64*cc401b37SPatrick Mooney	 * is used to look up an offset value in that array and apply it to the
65*cc401b37SPatrick Mooney	 * TSC reading.
66*cc401b37SPatrick Mooney	 */
67*cc401b37SPatrick Mooney	movq	(%r9, %rcx, 8), %rdx
68*cc401b37SPatrick Mooney	addq	%rdx, %rax
69*cc401b37SPatrick Mooney	ret
70*cc401b37SPatrick Mooney
71*cc401b37SPatrick Mooney2:
72*cc401b37SPatrick Mooney	/*
73*cc401b37SPatrick Mooney	 * Without rdtscp, there is no way to perform a TSC reading and
74*cc401b37SPatrick Mooney	 * simultaneously query the current CPU.  If tsc_ncpu indicates that
75*cc401b37SPatrick Mooney	 * per-CPU TSC offsets are present, the ID of the current CPU is
76*cc401b37SPatrick Mooney	 * queried before performing a TSC reading.  It will be later compared
77*cc401b37SPatrick Mooney	 * to a second CPU ID lookup to catch CPU migrations.
78*cc401b37SPatrick Mooney	 *
79*cc401b37SPatrick Mooney	 * This method will catch all but the most pathological scheduling.
80*cc401b37SPatrick Mooney	 */
81*cc401b37SPatrick Mooney	cmpl	$0, %r8d
82*cc401b37SPatrick Mooney	je	3f
83*cc401b37SPatrick Mooney	movl	$GETCPU_GDT_OFFSET, %edx
84*cc401b37SPatrick Mooney	lsl	%dx, %edx
85*cc401b37SPatrick Mooney
86*cc401b37SPatrick Mooney3:
87*cc401b37SPatrick Mooney	/* Save the most recently queried CPU ID for later comparison. */
88*cc401b37SPatrick Mooney	movl	%edx, %r10d
89*cc401b37SPatrick Mooney
90*cc401b37SPatrick Mooney	cmpl	$TSC_RDTSC_MFENCE, %esi
91*cc401b37SPatrick Mooney	jne	4f
92*cc401b37SPatrick Mooney	mfence
93*cc401b37SPatrick Mooney	rdtsc
94*cc401b37SPatrick Mooney	jmp	7f
95*cc401b37SPatrick Mooney
96*cc401b37SPatrick Mooney4:
97*cc401b37SPatrick Mooney	cmpl	$TSC_RDTSC_LFENCE, %esi
98*cc401b37SPatrick Mooney	jne	5f
99*cc401b37SPatrick Mooney	lfence
100*cc401b37SPatrick Mooney	rdtsc
101*cc401b37SPatrick Mooney	jmp	7f
102*cc401b37SPatrick Mooney
103*cc401b37SPatrick Mooney5:
104*cc401b37SPatrick Mooney	cmpl	$TSC_RDTSC_CPUID, %esi
105*cc401b37SPatrick Mooney	jne	6f
106*cc401b37SPatrick Mooney	/*
107*cc401b37SPatrick Mooney	 * Since the amd64 ABI dictates that %rbx is callee-saved, it must be
108*cc401b37SPatrick Mooney	 * preserved here.  Its contents will be overwritten when cpuid is used
109*cc401b37SPatrick Mooney	 * as a serializing instruction.
110*cc401b37SPatrick Mooney	 */
111*cc401b37SPatrick Mooney	movq	%rbx, %r11
112*cc401b37SPatrick Mooney	xorl	%eax, %eax
113*cc401b37SPatrick Mooney	cpuid
114*cc401b37SPatrick Mooney	rdtsc
115*cc401b37SPatrick Mooney	movq	%r11, %rbx
116*cc401b37SPatrick Mooney	jmp	7f
117*cc401b37SPatrick Mooney
118*cc401b37SPatrick Mooney6:
119*cc401b37SPatrick Mooney	/*
120*cc401b37SPatrick Mooney	 * Other protections should have prevented this function from being
121*cc401b37SPatrick Mooney	 * called in the first place.  The only sane action is to abort.
122*cc401b37SPatrick Mooney	 * The easiest means in this context is via SIGILL.
123*cc401b37SPatrick Mooney	 */
124*cc401b37SPatrick Mooney	ud2a
125*cc401b37SPatrick Mooney
126*cc401b37SPatrick Mooney7:
127*cc401b37SPatrick Mooney	shlq	$0x20, %rdx
128*cc401b37SPatrick Mooney	orq	%rdx, %rax
129*cc401b37SPatrick Mooney
130*cc401b37SPatrick Mooney	/*
131*cc401b37SPatrick Mooney	 * Query the current CPU again if a per-CPU offset is being applied to
132*cc401b37SPatrick Mooney	 * the TSC reading.  If the result differs from the earlier reading,
133*cc401b37SPatrick Mooney	 * then a migration has occured and the TSC must be read again.
134*cc401b37SPatrick Mooney	 */
135*cc401b37SPatrick Mooney	cmpl	$0, %r8d
136*cc401b37SPatrick Mooney	je	8f
137*cc401b37SPatrick Mooney	movl	$GETCPU_GDT_OFFSET, %edx
138*cc401b37SPatrick Mooney	lsl	%dx, %edx
139*cc401b37SPatrick Mooney	cmpl	%edx, %r10d
140*cc401b37SPatrick Mooney	jne	3b
141*cc401b37SPatrick Mooney	movq	(%r9, %rdx, 8), %rdx
142*cc401b37SPatrick Mooney	addq	%rdx, %rax
143*cc401b37SPatrick Mooney8:
144*cc401b37SPatrick Mooney	ret
145*cc401b37SPatrick Mooney	SET_SIZE(__cp_tsc_read)
146*cc401b37SPatrick Mooney
147*cc401b37SPatrick Mooney
148*cc401b37SPatrick Mooney/*
149*cc401b37SPatrick Mooney * uint_t
150*cc401b37SPatrick Mooney * __cp_getcpu(comm_page_t *)
151*cc401b37SPatrick Mooney *
152*cc401b37SPatrick Mooney * Stack usage: 0 bytes
153*cc401b37SPatrick Mooney */
154*cc401b37SPatrick Mooney	ENTRY_NP(__cp_getcpu)
155*cc401b37SPatrick Mooney	movl	CP_TSC_TYPE(%rdi), %edi
156*cc401b37SPatrick Mooney	/*
157*cc401b37SPatrick Mooney	 * If RDTSCP is available, it is a quick way to grab the cpu_id which
158*cc401b37SPatrick Mooney	 * is stored in the TSC_AUX MSR by the kernel.
159*cc401b37SPatrick Mooney	 */
160*cc401b37SPatrick Mooney	cmpl	$TSC_TSCP, %edi
161*cc401b37SPatrick Mooney	jne	1f
162*cc401b37SPatrick Mooney	rdtscp
163*cc401b37SPatrick Mooney	movl	%ecx, %eax
164*cc401b37SPatrick Mooney	ret
165*cc401b37SPatrick Mooney1:
166*cc401b37SPatrick Mooney	mov	$GETCPU_GDT_OFFSET, %eax
167*cc401b37SPatrick Mooney	lsl	%ax, %eax
168*cc401b37SPatrick Mooney	ret
169*cc401b37SPatrick Mooney	SET_SIZE(__cp_getcpu)
170*cc401b37SPatrick Mooney
171*cc401b37SPatrick Mooney/*
172*cc401b37SPatrick Mooney * hrtime_t
173*cc401b37SPatrick Mooney * __cp_gethrtime(comm_page_t *cp)
174*cc401b37SPatrick Mooney *
175*cc401b37SPatrick Mooney * Stack usage: 0x20 local + 0x8 call = 0x28 bytes
176*cc401b37SPatrick Mooney *
177*cc401b37SPatrick Mooney * %rsp+0x00 - hrtime_t tsc_last
178*cc401b37SPatrick Mooney * %rsp+0x08 - hrtime_t hrtime_base
179*cc401b37SPatrick Mooney * %rsp+0x10 - commpage_t *cp
180*cc401b37SPatrick Mooney * %rsp+0x18 - int hres_lock
181*cc401b37SPatrick Mooney */
182*cc401b37SPatrick Mooney	ENTRY_NP(__cp_gethrtime)
183*cc401b37SPatrick Mooney	subq	$0x20, %rsp
184*cc401b37SPatrick Mooney	movq	%rdi, 0x10(%rsp)
185*cc401b37SPatrick Mooney1:
186*cc401b37SPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %r9d
187*cc401b37SPatrick Mooney	movl	%r9d, 0x18(%rsp)
188*cc401b37SPatrick Mooney
189*cc401b37SPatrick Mooney	movq	CP_TSC_LAST(%rdi), %rax
190*cc401b37SPatrick Mooney	movq	CP_TSC_HRTIME_BASE(%rdi), %rdx
191*cc401b37SPatrick Mooney	movq	%rax, (%rsp)
192*cc401b37SPatrick Mooney	movq	%rdx, 0x8(%rsp)
193*cc401b37SPatrick Mooney
194*cc401b37SPatrick Mooney	call	__cp_tsc_read
195*cc401b37SPatrick Mooney	movq	0x10(%rsp), %rdi
196*cc401b37SPatrick Mooney
197*cc401b37SPatrick Mooney	movl	0x18(%rsp), %r9d
198*cc401b37SPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %edx
199*cc401b37SPatrick Mooney	andl	$0xfffffffe, %r9d
200*cc401b37SPatrick Mooney	cmpl	%r9d, %edx
201*cc401b37SPatrick Mooney	jne	1b
202*cc401b37SPatrick Mooney
203*cc401b37SPatrick Mooney	/*
204*cc401b37SPatrick Mooney	 * The in-kernel logic for calculating hrtime performs several checks
205*cc401b37SPatrick Mooney	 * to protect against edge cases.  That logic is summarized as:
206*cc401b37SPatrick Mooney	 * if (tsc >= tsc_last) {
207*cc401b37SPatrick Mooney	 *         delta -= tsc_last;
208*cc401b37SPatrick Mooney	 * } else if (tsc >= tsc_last - 2*tsc_max_delta) {
209*cc401b37SPatrick Mooney	 *         delta = 0;
210*cc401b37SPatrick Mooney	 * } else {
211*cc401b37SPatrick Mooney	 *         delta = MIN(tsc, tsc_resume_cap);
212*cc401b37SPatrick Mooney	 * }
213*cc401b37SPatrick Mooney	 *
214*cc401b37SPatrick Mooney	 * The below implementation achieves the same result, although it is
215*cc401b37SPatrick Mooney	 * structured for speed and optimized for the fast path:
216*cc401b37SPatrick Mooney	 *
217*cc401b37SPatrick Mooney	 * delta = tsc - tsc_last;
218*cc401b37SPatrick Mooney	 * if (delta < 0) {
219*cc401b37SPatrick Mooney	 *         delta += (tsc_max_delta << 1);
220*cc401b37SPatrick Mooney	 *         if (delta >= 0) {
221*cc401b37SPatrick Mooney	 *                 delta = 0;
222*cc401b37SPatrick Mooney	 *         } else {
223*cc401b37SPatrick Mooney	 *                 delta = MIN(tsc, tsc_resume_cap);
224*cc401b37SPatrick Mooney	 *         }
225*cc401b37SPatrick Mooney	 * }
226*cc401b37SPatrick Mooney	 */
227*cc401b37SPatrick Mooney	movq	(%rsp), %rdx
228*cc401b37SPatrick Mooney	subq	%rdx, %rax		/* delta = tsc - tsc_last */
229*cc401b37SPatrick Mooney	jbe	3f			/* if (delta < 0) */
230*cc401b37SPatrick Mooney
231*cc401b37SPatrick Mooney2:
232*cc401b37SPatrick Mooney	/*
233*cc401b37SPatrick Mooney	 * Optimized TSC_CONVERT_AND_ADD:
234*cc401b37SPatrick Mooney	 * hrtime_base += (tsc_delta * nsec_scale) >> (32 - NSEC_SHIFT)
235*cc401b37SPatrick Mooney	 *
236*cc401b37SPatrick Mooney	 * Since the multiply and shift are done in 128-bit, there is no need
237*cc401b37SPatrick Mooney	 * to worry about overflow.
238*cc401b37SPatrick Mooney	 */
239*cc401b37SPatrick Mooney	movl	CP_NSEC_SCALE(%rdi), %ecx
240*cc401b37SPatrick Mooney	mulq	%rcx
241*cc401b37SPatrick Mooney	shrdq	$_CONST(32 - NSEC_SHIFT), %rdx, %rax
242*cc401b37SPatrick Mooney	movq	0x8(%rsp), %r8
243*cc401b37SPatrick Mooney	addq	%r8, %rax
244*cc401b37SPatrick Mooney
245*cc401b37SPatrick Mooney	addq	$0x20, %rsp
246*cc401b37SPatrick Mooney	ret
247*cc401b37SPatrick Mooney
248*cc401b37SPatrick Mooney3:
249*cc401b37SPatrick Mooney	movq	%rax, %r9		/* save (tsc - tsc_last) in r9 */
250*cc401b37SPatrick Mooney	movl	CP_TSC_MAX_DELTA(%rdi), %ecx
251*cc401b37SPatrick Mooney	sall	$1, %ecx
252*cc401b37SPatrick Mooney	addq	%rcx, %rax		/* delta += (tsc_max_delta << 1) */
253*cc401b37SPatrick Mooney	jae	4f			/* delta < 0 */
254*cc401b37SPatrick Mooney	xorq	%rax, %rax
255*cc401b37SPatrick Mooney	jmp	2b
256*cc401b37SPatrick Mooney
257*cc401b37SPatrick Mooney4:
258*cc401b37SPatrick Mooney	/*
259*cc401b37SPatrick Mooney	 * Repopulate %rax with the TSC reading by adding tsc_last to %r9
260*cc401b37SPatrick Mooney	 * (which holds tsc - tsc_last)
261*cc401b37SPatrick Mooney	 */
262*cc401b37SPatrick Mooney	movq	(%rsp), %rax
263*cc401b37SPatrick Mooney	addq	%r9, %rax
264*cc401b37SPatrick Mooney
265*cc401b37SPatrick Mooney	/* delta = MIN(tsc, resume_cap) */
266*cc401b37SPatrick Mooney	movq	CP_TSC_RESUME_CAP(%rdi), %rcx
267*cc401b37SPatrick Mooney	cmpq	%rcx, %rax
268*cc401b37SPatrick Mooney	jbe	5f
269*cc401b37SPatrick Mooney	movq	%rcx, %rax
270*cc401b37SPatrick Mooney5:
271*cc401b37SPatrick Mooney	jmp	2b
272*cc401b37SPatrick Mooney
273*cc401b37SPatrick Mooney	SET_SIZE(__cp_gethrtime)
274*cc401b37SPatrick Mooney
275*cc401b37SPatrick Mooney/*
276*cc401b37SPatrick Mooney * int
277*cc401b37SPatrick Mooney * __cp_clock_gettime_monotonic(comm_page_t *cp, timespec_t *tsp)
278*cc401b37SPatrick Mooney *
279*cc401b37SPatrick Mooney * Stack usage: 0x8 local + 0x8 call + 0x28 called func. = 0x38 bytes
280*cc401b37SPatrick Mooney *
281*cc401b37SPatrick Mooney * %rsp+0x00 - timespec_t *tsp
282*cc401b37SPatrick Mooney */
283*cc401b37SPatrick Mooney	ENTRY_NP(__cp_clock_gettime_monotonic)
284*cc401b37SPatrick Mooney	subq	$0x8, %rsp
285*cc401b37SPatrick Mooney	movq	%rsi, (%rsp)
286*cc401b37SPatrick Mooney
287*cc401b37SPatrick Mooney	call	__cp_gethrtime
288*cc401b37SPatrick Mooney
289*cc401b37SPatrick Mooney	/*
290*cc401b37SPatrick Mooney	 * Convert from hrtime_t (int64_t in nanoseconds) to timespec_t.
291*cc401b37SPatrick Mooney	 * This uses the same approach as hrt2ts, although it has been updated
292*cc401b37SPatrick Mooney	 * to utilize 64-bit math.
293*cc401b37SPatrick Mooney	 * 1 / 1,000,000,000 =
294*cc401b37SPatrick Mooney	 * 1000100101110000010111110100000100110110101101001010110110011B-26
295*cc401b37SPatrick Mooney	 * = 0x112e0be826d694b3 * 2^-26
296*cc401b37SPatrick Mooney	 *
297*cc401b37SPatrick Mooney	 * secs = (nsecs * 0x112e0be826d694b3) >> 26
298*cc401b37SPatrick Mooney	 *
299*cc401b37SPatrick Mooney	 * In order to account for the 2s-compliment of negative inputs, a
300*cc401b37SPatrick Mooney	 * final operation completes the process:
301*cc401b37SPatrick Mooney	 *
302*cc401b37SPatrick Mooney	 * secs -= (nsecs >> 63)
303*cc401b37SPatrick Mooney	 */
304*cc401b37SPatrick Mooney	movq	%rax, %r11
305*cc401b37SPatrick Mooney	movq	$0x112e0be826d694b3, %rdx
306*cc401b37SPatrick Mooney	imulq	%rdx
307*cc401b37SPatrick Mooney	sarq	$0x1a, %rdx
308*cc401b37SPatrick Mooney	movq	%r11, %rax
309*cc401b37SPatrick Mooney	sarq	$0x3f, %rax
310*cc401b37SPatrick Mooney	subq	%rax, %rdx
311*cc401b37SPatrick Mooney	movq	(%rsp), %rsi
312*cc401b37SPatrick Mooney	movq	%rdx, (%rsi)
313*cc401b37SPatrick Mooney	/*
314*cc401b37SPatrick Mooney	 * Populating tv_nsec is easier:
315*cc401b37SPatrick Mooney	 * tv_nsec = nsecs - (secs * NANOSEC)
316*cc401b37SPatrick Mooney	 */
317*cc401b37SPatrick Mooney	imulq	$NANOSEC, %rdx, %rdx
318*cc401b37SPatrick Mooney	subq	%rdx, %r11
319*cc401b37SPatrick Mooney	movq	%r11, 0x8(%rsi)
320*cc401b37SPatrick Mooney
321*cc401b37SPatrick Mooney	xorl	%eax, %eax
322*cc401b37SPatrick Mooney	addq	$0x8, %rsp
323*cc401b37SPatrick Mooney	ret
324*cc401b37SPatrick Mooney	SET_SIZE(__cp_clock_gettime_monotonic)
325*cc401b37SPatrick Mooney
326*cc401b37SPatrick Mooney/*
327*cc401b37SPatrick Mooney * int
328*cc401b37SPatrick Mooney * __cp_clock_gettime_realtime(comm_page_t *cp, timespec_t *tsp)
329*cc401b37SPatrick Mooney *
330*cc401b37SPatrick Mooney * Stack usage: 0x18 local + 0x8 call + 0x28 called func. = 0x48 bytes
331*cc401b37SPatrick Mooney *
332*cc401b37SPatrick Mooney * %rsp+0x00 - commpage_t *cp
333*cc401b37SPatrick Mooney * %rsp+0x08 - timespec_t *tsp
334*cc401b37SPatrick Mooney * %rsp+0x10 - int hres_lock
335*cc401b37SPatrick Mooney */
336*cc401b37SPatrick Mooney	ENTRY_NP(__cp_clock_gettime_realtime)
337*cc401b37SPatrick Mooney	subq	$0x18, %rsp
338*cc401b37SPatrick Mooney	movq	%rdi, (%rsp)
339*cc401b37SPatrick Mooney	movq	%rsi, 0x8(%rsp)
340*cc401b37SPatrick Mooney
341*cc401b37SPatrick Mooney1:
342*cc401b37SPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %eax
343*cc401b37SPatrick Mooney	movl	%eax, 0x10(%rsp)
344*cc401b37SPatrick Mooney
345*cc401b37SPatrick Mooney	call	__cp_gethrtime
346*cc401b37SPatrick Mooney	movq	(%rsp), %rdi
347*cc401b37SPatrick Mooney	movq	CP_HRES_LAST_TICK(%rdi), %rdx
348*cc401b37SPatrick Mooney	subq	%rdx, %rax			/* nslt = hrtime - last_tick */
349*cc401b37SPatrick Mooney	jb	1b
350*cc401b37SPatrick Mooney	movq	CP_HRESTIME(%rdi), %r9
351*cc401b37SPatrick Mooney	movq	_CONST(CP_HRESTIME + CP_HRESTIME_INCR)(%rdi), %r10
352*cc401b37SPatrick Mooney	movl	CP_HRESTIME_ADJ(%rdi), %r11d
353*cc401b37SPatrick Mooney
354*cc401b37SPatrick Mooney	addq	%rax, %r10			/* now.tv_nsec += nslt */
355*cc401b37SPatrick Mooney
356*cc401b37SPatrick Mooney	cmpl	$0, %r11d
357*cc401b37SPatrick Mooney	jb	4f				/* hres_adj > 0 */
358*cc401b37SPatrick Mooney	ja	6f				/* hres_adj < 0 */
359*cc401b37SPatrick Mooney
360*cc401b37SPatrick Mooney2:
361*cc401b37SPatrick Mooney	cmpq	$NANOSEC, %r10
362*cc401b37SPatrick Mooney	jae	8f				/* tv_nsec >= NANOSEC */
363*cc401b37SPatrick Mooney
364*cc401b37SPatrick Mooney3:
365*cc401b37SPatrick Mooney	movl	0x10(%rsp), %eax
366*cc401b37SPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %edx
367*cc401b37SPatrick Mooney	andl	$0xfffffffe, %edx
368*cc401b37SPatrick Mooney	cmpl	%eax, %edx
369*cc401b37SPatrick Mooney	jne	1b
370*cc401b37SPatrick Mooney
371*cc401b37SPatrick Mooney	movq	0x8(%rsp), %rsi
372*cc401b37SPatrick Mooney	movq	%r9, (%rsi)
373*cc401b37SPatrick Mooney	movq	%r10, 0x8(%rsi)
374*cc401b37SPatrick Mooney
375*cc401b37SPatrick Mooney	xorl	%eax, %eax
376*cc401b37SPatrick Mooney	addq	$0x18, %rsp
377*cc401b37SPatrick Mooney	ret
378*cc401b37SPatrick Mooney
379*cc401b37SPatrick Mooney
380*cc401b37SPatrick Mooney4:						/* hres_adj > 0 */
381*cc401b37SPatrick Mooney	sarq	$ADJ_SHIFT, %rax
382*cc401b37SPatrick Mooney	cmpl	%r11d, %eax
383*cc401b37SPatrick Mooney	jbe	5f
384*cc401b37SPatrick Mooney	movl	%r11d, %eax
385*cc401b37SPatrick Mooney5:
386*cc401b37SPatrick Mooney	addq	%rax, %r10
387*cc401b37SPatrick Mooney	jmp	2b
388*cc401b37SPatrick Mooney
389*cc401b37SPatrick Mooney6:						/* hres_adj < 0 */
390*cc401b37SPatrick Mooney	sarq	$ADJ_SHIFT, %rax
391*cc401b37SPatrick Mooney	negl	%r11d
392*cc401b37SPatrick Mooney	cmpl	%r11d, %eax
393*cc401b37SPatrick Mooney	jbe	7f
394*cc401b37SPatrick Mooney	movl	%r11d, %eax
395*cc401b37SPatrick Mooney7:
396*cc401b37SPatrick Mooney	subq	%rax, %r10
397*cc401b37SPatrick Mooney	jmp	2b
398*cc401b37SPatrick Mooney
399*cc401b37SPatrick Mooney8:						/* tv_nsec >= NANOSEC */
400*cc401b37SPatrick Mooney	subq	$NANOSEC, %r10
401*cc401b37SPatrick Mooney	incq	%r9
402*cc401b37SPatrick Mooney	cmpq	$NANOSEC, %r10
403*cc401b37SPatrick Mooney	jae	8b
404*cc401b37SPatrick Mooney	jmp	3b
405*cc401b37SPatrick Mooney
406*cc401b37SPatrick Mooney	SET_SIZE(__cp_clock_gettime_realtime)
407