xref: /titanic_52/usr/src/lib/commpage/amd64/cp_subr.s (revision 263f549e5da8b32c4922f586afb365b8ae388a6c)
1*263f549eSPatrick Mooney/*
2*263f549eSPatrick Mooney * This file and its contents are supplied under the terms of the
3*263f549eSPatrick Mooney * Common Development and Distribution License ("CDDL"), version 1.0.
4*263f549eSPatrick Mooney * You may only use this file in accordance with the terms of version
5*263f549eSPatrick Mooney * 1.0 of the CDDL.
6*263f549eSPatrick Mooney *
7*263f549eSPatrick Mooney * A full copy of the text of the CDDL should have accompanied this
8*263f549eSPatrick Mooney * source.  A copy of the CDDL is also available via the Internet at
9*263f549eSPatrick Mooney * http://www.illumos.org/license/CDDL.
10*263f549eSPatrick Mooney */
11*263f549eSPatrick Mooney
12*263f549eSPatrick Mooney/*
13*263f549eSPatrick Mooney * Copyright 2016 Joyent, Inc.
14*263f549eSPatrick Mooney */
15*263f549eSPatrick Mooney
16*263f549eSPatrick Mooney#include <sys/asm_linkage.h>
17*263f549eSPatrick Mooney#include <sys/segments.h>
18*263f549eSPatrick Mooney#include <sys/time_impl.h>
19*263f549eSPatrick Mooney#include <sys/tsc.h>
20*263f549eSPatrick Mooney#include <cp_offsets.h>
21*263f549eSPatrick Mooney
22*263f549eSPatrick Mooney#define	GETCPU_GDT_OFFSET	SEL_GDT(GDT_CPUID, SEL_UPL)
23*263f549eSPatrick Mooney
24*263f549eSPatrick Mooney	.file	"cp_subr.s"
25*263f549eSPatrick Mooney
26*263f549eSPatrick Mooney/*
27*263f549eSPatrick Mooney * These are cloned from TSC and time related code in the kernel.  They should
28*263f549eSPatrick Mooney * be kept in sync in the case that the source values are changed.
29*263f549eSPatrick Mooney * See: uts/i86pc/os/timestamp.c
30*263f549eSPatrick Mooney */
31*263f549eSPatrick Mooney#define	NSEC_SHIFT	5
32*263f549eSPatrick Mooney#define	ADJ_SHIFT	4
33*263f549eSPatrick Mooney#define	NANOSEC		0x3b9aca00
34*263f549eSPatrick Mooney
35*263f549eSPatrick Mooney/*
36*263f549eSPatrick Mooney * hrtime_t
37*263f549eSPatrick Mooney * __cp_tsc_read(comm_page_t *cp)
38*263f549eSPatrick Mooney *
39*263f549eSPatrick Mooney * Stack usage: 0 bytes
40*263f549eSPatrick Mooney */
41*263f549eSPatrick Mooney	ENTRY_NP(__cp_tsc_read)
42*263f549eSPatrick Mooney	movl	CP_TSC_TYPE(%rdi), %esi
43*263f549eSPatrick Mooney	movl	CP_TSC_NCPU(%rdi), %r8d
44*263f549eSPatrick Mooney	leaq	CP_TSC_SYNC_TICK_DELTA(%rdi), %r9
45*263f549eSPatrick Mooney
46*263f549eSPatrick Mooney	cmpl	$TSC_TSCP, %esi
47*263f549eSPatrick Mooney	jne	2f
48*263f549eSPatrick Mooney	rdtscp
49*263f549eSPatrick Mooney	/*
50*263f549eSPatrick Mooney	 * When the TSC is read, the low 32 bits are placed in %eax while the
51*263f549eSPatrick Mooney	 * high 32 bits are placed in %edx.  They are shifted and ORed together
52*263f549eSPatrick Mooney	 * to obtain the full 64-bit value.
53*263f549eSPatrick Mooney	 */
54*263f549eSPatrick Mooney	shlq	$0x20, %rdx
55*263f549eSPatrick Mooney	orq	%rdx, %rax
56*263f549eSPatrick Mooney	cmpl	$0, %esi
57*263f549eSPatrick Mooney	jne	1f
58*263f549eSPatrick Mooney	ret
59*263f549eSPatrick Mooney1:
60*263f549eSPatrick Mooney	/*
61*263f549eSPatrick Mooney	 * When cp_tsc_ncpu is non-zero, it indicates the length of the
62*263f549eSPatrick Mooney	 * cp_tsc_sync_tick_delta array, which contains per-CPU offsets for the
63*263f549eSPatrick Mooney	 * TSC.  The CPU ID furnished by the IA32_TSC_AUX register via rdtscp
64*263f549eSPatrick Mooney	 * is used to look up an offset value in that array and apply it to the
65*263f549eSPatrick Mooney	 * TSC reading.
66*263f549eSPatrick Mooney	 */
67*263f549eSPatrick Mooney	movq	(%r9, %rcx, 8), %rdx
68*263f549eSPatrick Mooney	addq	%rdx, %rax
69*263f549eSPatrick Mooney	ret
70*263f549eSPatrick Mooney
71*263f549eSPatrick Mooney2:
72*263f549eSPatrick Mooney	/*
73*263f549eSPatrick Mooney	 * Without rdtscp, there is no way to perform a TSC reading and
74*263f549eSPatrick Mooney	 * simultaneously query the current CPU.  If tsc_ncpu indicates that
75*263f549eSPatrick Mooney	 * per-CPU TSC offsets are present, the ID of the current CPU is
76*263f549eSPatrick Mooney	 * queried before performing a TSC reading.  It will be later compared
77*263f549eSPatrick Mooney	 * to a second CPU ID lookup to catch CPU migrations.
78*263f549eSPatrick Mooney	 *
79*263f549eSPatrick Mooney	 * This method will catch all but the most pathological scheduling.
80*263f549eSPatrick Mooney	 */
81*263f549eSPatrick Mooney	cmpl	$0, %r8d
82*263f549eSPatrick Mooney	je	3f
83*263f549eSPatrick Mooney	movl	$GETCPU_GDT_OFFSET, %edx
84*263f549eSPatrick Mooney	lsl	%dx, %edx
85*263f549eSPatrick Mooney
86*263f549eSPatrick Mooney3:
87*263f549eSPatrick Mooney	/* Save the most recently queried CPU ID for later comparison. */
88*263f549eSPatrick Mooney	movl	%edx, %r10d
89*263f549eSPatrick Mooney
90*263f549eSPatrick Mooney	cmpl	$TSC_RDTSC_MFENCE, %esi
91*263f549eSPatrick Mooney	jne	4f
92*263f549eSPatrick Mooney	mfence
93*263f549eSPatrick Mooney	rdtsc
94*263f549eSPatrick Mooney	jmp	7f
95*263f549eSPatrick Mooney
96*263f549eSPatrick Mooney4:
97*263f549eSPatrick Mooney	cmpl	$TSC_RDTSC_LFENCE, %esi
98*263f549eSPatrick Mooney	jne	5f
99*263f549eSPatrick Mooney	lfence
100*263f549eSPatrick Mooney	rdtsc
101*263f549eSPatrick Mooney	jmp	7f
102*263f549eSPatrick Mooney
103*263f549eSPatrick Mooney5:
104*263f549eSPatrick Mooney	cmpl	$TSC_RDTSC_CPUID, %esi
105*263f549eSPatrick Mooney	jne	6f
106*263f549eSPatrick Mooney	/*
107*263f549eSPatrick Mooney	 * Since the amd64 ABI dictates that %rbx is callee-saved, it must be
108*263f549eSPatrick Mooney	 * preserved here.  Its contents will be overwritten when cpuid is used
109*263f549eSPatrick Mooney	 * as a serializing instruction.
110*263f549eSPatrick Mooney	 */
111*263f549eSPatrick Mooney	movq	%rbx, %r11
112*263f549eSPatrick Mooney	xorl	%eax, %eax
113*263f549eSPatrick Mooney	cpuid
114*263f549eSPatrick Mooney	rdtsc
115*263f549eSPatrick Mooney	movq	%r11, %rbx
116*263f549eSPatrick Mooney	jmp	7f
117*263f549eSPatrick Mooney
118*263f549eSPatrick Mooney6:
119*263f549eSPatrick Mooney	/*
120*263f549eSPatrick Mooney	 * Other protections should have prevented this function from being
121*263f549eSPatrick Mooney	 * called in the first place.  The only sane action is to abort.
122*263f549eSPatrick Mooney	 * The easiest means in this context is via SIGILL.
123*263f549eSPatrick Mooney	 */
124*263f549eSPatrick Mooney	ud2a
125*263f549eSPatrick Mooney
126*263f549eSPatrick Mooney7:
127*263f549eSPatrick Mooney	shlq	$0x20, %rdx
128*263f549eSPatrick Mooney	orq	%rdx, %rax
129*263f549eSPatrick Mooney
130*263f549eSPatrick Mooney	/*
131*263f549eSPatrick Mooney	 * Query the current CPU again if a per-CPU offset is being applied to
132*263f549eSPatrick Mooney	 * the TSC reading.  If the result differs from the earlier reading,
133*263f549eSPatrick Mooney	 * then a migration has occured and the TSC must be read again.
134*263f549eSPatrick Mooney	 */
135*263f549eSPatrick Mooney	cmpl	$0, %r8d
136*263f549eSPatrick Mooney	je	8f
137*263f549eSPatrick Mooney	movl	$GETCPU_GDT_OFFSET, %edx
138*263f549eSPatrick Mooney	lsl	%dx, %edx
139*263f549eSPatrick Mooney	cmpl	%edx, %r10d
140*263f549eSPatrick Mooney	jne	3b
141*263f549eSPatrick Mooney	movq	(%r9, %rdx, 8), %rdx
142*263f549eSPatrick Mooney	addq	%rdx, %rax
143*263f549eSPatrick Mooney8:
144*263f549eSPatrick Mooney	ret
145*263f549eSPatrick Mooney	SET_SIZE(__cp_tsc_read)
146*263f549eSPatrick Mooney
147*263f549eSPatrick Mooney
148*263f549eSPatrick Mooney/*
149*263f549eSPatrick Mooney * uint_t
150*263f549eSPatrick Mooney * __cp_getcpu(comm_page_t *)
151*263f549eSPatrick Mooney *
152*263f549eSPatrick Mooney * Stack usage: 0 bytes
153*263f549eSPatrick Mooney */
154*263f549eSPatrick Mooney	ENTRY_NP(__cp_getcpu)
155*263f549eSPatrick Mooney	movl	CP_TSC_TYPE(%rdi), %edi
156*263f549eSPatrick Mooney	/*
157*263f549eSPatrick Mooney	 * If RDTSCP is available, it is a quick way to grab the cpu_id which
158*263f549eSPatrick Mooney	 * is stored in the TSC_AUX MSR by the kernel.
159*263f549eSPatrick Mooney	 */
160*263f549eSPatrick Mooney	cmpl	$TSC_TSCP, %edi
161*263f549eSPatrick Mooney	jne	1f
162*263f549eSPatrick Mooney	rdtscp
163*263f549eSPatrick Mooney	movl	%ecx, %eax
164*263f549eSPatrick Mooney	ret
165*263f549eSPatrick Mooney1:
166*263f549eSPatrick Mooney	mov	$GETCPU_GDT_OFFSET, %eax
167*263f549eSPatrick Mooney	lsl	%ax, %eax
168*263f549eSPatrick Mooney	ret
169*263f549eSPatrick Mooney	SET_SIZE(__cp_getcpu)
170*263f549eSPatrick Mooney
171*263f549eSPatrick Mooney/*
172*263f549eSPatrick Mooney * hrtime_t
173*263f549eSPatrick Mooney * __cp_gethrtime(comm_page_t *cp)
174*263f549eSPatrick Mooney *
175*263f549eSPatrick Mooney * Stack usage: 0x20 local + 0x8 call = 0x28 bytes
176*263f549eSPatrick Mooney *
177*263f549eSPatrick Mooney * %rsp+0x00 - hrtime_t tsc_last
178*263f549eSPatrick Mooney * %rsp+0x08 - hrtime_t hrtime_base
179*263f549eSPatrick Mooney * %rsp+0x10 - commpage_t *cp
180*263f549eSPatrick Mooney * %rsp+0x18 - int hres_lock
181*263f549eSPatrick Mooney */
182*263f549eSPatrick Mooney	ENTRY_NP(__cp_gethrtime)
183*263f549eSPatrick Mooney	subq	$0x20, %rsp
184*263f549eSPatrick Mooney	movq	%rdi, 0x10(%rsp)
185*263f549eSPatrick Mooney1:
186*263f549eSPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %r9d
187*263f549eSPatrick Mooney	movl	%r9d, 0x18(%rsp)
188*263f549eSPatrick Mooney
189*263f549eSPatrick Mooney	movq	CP_TSC_LAST(%rdi), %rax
190*263f549eSPatrick Mooney	movq	CP_TSC_HRTIME_BASE(%rdi), %rdx
191*263f549eSPatrick Mooney	movq	%rax, (%rsp)
192*263f549eSPatrick Mooney	movq	%rdx, 0x8(%rsp)
193*263f549eSPatrick Mooney
194*263f549eSPatrick Mooney	call	__cp_tsc_read
195*263f549eSPatrick Mooney	movq	0x10(%rsp), %rdi
196*263f549eSPatrick Mooney
197*263f549eSPatrick Mooney	movl	0x18(%rsp), %r9d
198*263f549eSPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %edx
199*263f549eSPatrick Mooney	andl	$0xfffffffe, %r9d
200*263f549eSPatrick Mooney	cmpl	%r9d, %edx
201*263f549eSPatrick Mooney	jne	1b
202*263f549eSPatrick Mooney
203*263f549eSPatrick Mooney	/*
204*263f549eSPatrick Mooney	 * The in-kernel logic for calculating hrtime performs several checks
205*263f549eSPatrick Mooney	 * to protect against edge cases.  That logic is summarized as:
206*263f549eSPatrick Mooney	 * if (tsc >= tsc_last) {
207*263f549eSPatrick Mooney	 *         delta -= tsc_last;
208*263f549eSPatrick Mooney	 * } else if (tsc >= tsc_last - 2*tsc_max_delta) {
209*263f549eSPatrick Mooney	 *         delta = 0;
210*263f549eSPatrick Mooney	 * } else {
211*263f549eSPatrick Mooney	 *         delta = MIN(tsc, tsc_resume_cap);
212*263f549eSPatrick Mooney	 * }
213*263f549eSPatrick Mooney	 *
214*263f549eSPatrick Mooney	 * The below implementation achieves the same result, although it is
215*263f549eSPatrick Mooney	 * structured for speed and optimized for the fast path:
216*263f549eSPatrick Mooney	 *
217*263f549eSPatrick Mooney	 * delta = tsc - tsc_last;
218*263f549eSPatrick Mooney	 * if (delta < 0) {
219*263f549eSPatrick Mooney	 *         delta += (tsc_max_delta << 1);
220*263f549eSPatrick Mooney	 *         if (delta >= 0) {
221*263f549eSPatrick Mooney	 *                 delta = 0;
222*263f549eSPatrick Mooney	 *         } else {
223*263f549eSPatrick Mooney	 *                 delta = MIN(tsc, tsc_resume_cap);
224*263f549eSPatrick Mooney	 *         }
225*263f549eSPatrick Mooney	 * }
226*263f549eSPatrick Mooney	 */
227*263f549eSPatrick Mooney	movq	(%rsp), %rdx
228*263f549eSPatrick Mooney	subq	%rdx, %rax		/* delta = tsc - tsc_last */
229*263f549eSPatrick Mooney	jbe	3f			/* if (delta < 0) */
230*263f549eSPatrick Mooney
231*263f549eSPatrick Mooney2:
232*263f549eSPatrick Mooney	/*
233*263f549eSPatrick Mooney	 * Optimized TSC_CONVERT_AND_ADD:
234*263f549eSPatrick Mooney	 * hrtime_base += (tsc_delta * nsec_scale) >> (32 - NSEC_SHIFT)
235*263f549eSPatrick Mooney	 *
236*263f549eSPatrick Mooney	 * Since the multiply and shift are done in 128-bit, there is no need
237*263f549eSPatrick Mooney	 * to worry about overflow.
238*263f549eSPatrick Mooney	 */
239*263f549eSPatrick Mooney	movl	CP_NSEC_SCALE(%rdi), %ecx
240*263f549eSPatrick Mooney	mulq	%rcx
241*263f549eSPatrick Mooney	shrdq	$_CONST(32 - NSEC_SHIFT), %rdx, %rax
242*263f549eSPatrick Mooney	movq	0x8(%rsp), %r8
243*263f549eSPatrick Mooney	addq	%r8, %rax
244*263f549eSPatrick Mooney
245*263f549eSPatrick Mooney	addq	$0x20, %rsp
246*263f549eSPatrick Mooney	ret
247*263f549eSPatrick Mooney
248*263f549eSPatrick Mooney3:
249*263f549eSPatrick Mooney	movq	%rax, %r9		/* save (tsc - tsc_last) in r9 */
250*263f549eSPatrick Mooney	movl	CP_TSC_MAX_DELTA(%rdi), %ecx
251*263f549eSPatrick Mooney	sall	$1, %ecx
252*263f549eSPatrick Mooney	addq	%rcx, %rax		/* delta += (tsc_max_delta << 1) */
253*263f549eSPatrick Mooney	jae	4f			/* delta < 0 */
254*263f549eSPatrick Mooney	xorq	%rax, %rax
255*263f549eSPatrick Mooney	jmp	2b
256*263f549eSPatrick Mooney
257*263f549eSPatrick Mooney4:
258*263f549eSPatrick Mooney	/*
259*263f549eSPatrick Mooney	 * Repopulate %rax with the TSC reading by adding tsc_last to %r9
260*263f549eSPatrick Mooney	 * (which holds tsc - tsc_last)
261*263f549eSPatrick Mooney	 */
262*263f549eSPatrick Mooney	movq	(%rsp), %rax
263*263f549eSPatrick Mooney	addq	%r9, %rax
264*263f549eSPatrick Mooney
265*263f549eSPatrick Mooney	/* delta = MIN(tsc, resume_cap) */
266*263f549eSPatrick Mooney	movq	CP_TSC_RESUME_CAP(%rdi), %rcx
267*263f549eSPatrick Mooney	cmpq	%rcx, %rax
268*263f549eSPatrick Mooney	jbe	5f
269*263f549eSPatrick Mooney	movq	%rcx, %rax
270*263f549eSPatrick Mooney5:
271*263f549eSPatrick Mooney	jmp	2b
272*263f549eSPatrick Mooney
273*263f549eSPatrick Mooney	SET_SIZE(__cp_gethrtime)
274*263f549eSPatrick Mooney
275*263f549eSPatrick Mooney/*
276*263f549eSPatrick Mooney * int
277*263f549eSPatrick Mooney * __cp_clock_gettime_monotonic(comm_page_t *cp, timespec_t *tsp)
278*263f549eSPatrick Mooney *
279*263f549eSPatrick Mooney * Stack usage: 0x8 local + 0x8 call + 0x28 called func. = 0x38 bytes
280*263f549eSPatrick Mooney *
281*263f549eSPatrick Mooney * %rsp+0x00 - timespec_t *tsp
282*263f549eSPatrick Mooney */
283*263f549eSPatrick Mooney	ENTRY_NP(__cp_clock_gettime_monotonic)
284*263f549eSPatrick Mooney	subq	$0x8, %rsp
285*263f549eSPatrick Mooney	movq	%rsi, (%rsp)
286*263f549eSPatrick Mooney
287*263f549eSPatrick Mooney	call	__cp_gethrtime
288*263f549eSPatrick Mooney
289*263f549eSPatrick Mooney	/*
290*263f549eSPatrick Mooney	 * Convert from hrtime_t (int64_t in nanoseconds) to timespec_t.
291*263f549eSPatrick Mooney	 * This uses the same approach as hrt2ts, although it has been updated
292*263f549eSPatrick Mooney	 * to utilize 64-bit math.
293*263f549eSPatrick Mooney	 * 1 / 1,000,000,000 =
294*263f549eSPatrick Mooney	 * 1000100101110000010111110100000100110110101101001010110110011B-26
295*263f549eSPatrick Mooney	 * = 0x112e0be826d694b3 * 2^-26
296*263f549eSPatrick Mooney	 *
297*263f549eSPatrick Mooney	 * secs = (nsecs * 0x112e0be826d694b3) >> 26
298*263f549eSPatrick Mooney	 *
299*263f549eSPatrick Mooney	 * In order to account for the 2s-compliment of negative inputs, a
300*263f549eSPatrick Mooney	 * final operation completes the process:
301*263f549eSPatrick Mooney	 *
302*263f549eSPatrick Mooney	 * secs -= (nsecs >> 63)
303*263f549eSPatrick Mooney	 */
304*263f549eSPatrick Mooney	movq	%rax, %r11
305*263f549eSPatrick Mooney	movq	$0x112e0be826d694b3, %rdx
306*263f549eSPatrick Mooney	imulq	%rdx
307*263f549eSPatrick Mooney	sarq	$0x1a, %rdx
308*263f549eSPatrick Mooney	movq	%r11, %rax
309*263f549eSPatrick Mooney	sarq	$0x3f, %rax
310*263f549eSPatrick Mooney	subq	%rax, %rdx
311*263f549eSPatrick Mooney	movq	(%rsp), %rsi
312*263f549eSPatrick Mooney	movq	%rdx, (%rsi)
313*263f549eSPatrick Mooney	/*
314*263f549eSPatrick Mooney	 * Populating tv_nsec is easier:
315*263f549eSPatrick Mooney	 * tv_nsec = nsecs - (secs * NANOSEC)
316*263f549eSPatrick Mooney	 */
317*263f549eSPatrick Mooney	imulq	$NANOSEC, %rdx, %rdx
318*263f549eSPatrick Mooney	subq	%rdx, %r11
319*263f549eSPatrick Mooney	movq	%r11, 0x8(%rsi)
320*263f549eSPatrick Mooney
321*263f549eSPatrick Mooney	xorl	%eax, %eax
322*263f549eSPatrick Mooney	addq	$0x8, %rsp
323*263f549eSPatrick Mooney	ret
324*263f549eSPatrick Mooney	SET_SIZE(__cp_clock_gettime_monotonic)
325*263f549eSPatrick Mooney
326*263f549eSPatrick Mooney/*
327*263f549eSPatrick Mooney * int
328*263f549eSPatrick Mooney * __cp_clock_gettime_realtime(comm_page_t *cp, timespec_t *tsp)
329*263f549eSPatrick Mooney *
330*263f549eSPatrick Mooney * Stack usage: 0x18 local + 0x8 call + 0x28 called func. = 0x48 bytes
331*263f549eSPatrick Mooney *
332*263f549eSPatrick Mooney * %rsp+0x00 - commpage_t *cp
333*263f549eSPatrick Mooney * %rsp+0x08 - timespec_t *tsp
334*263f549eSPatrick Mooney * %rsp+0x10 - int hres_lock
335*263f549eSPatrick Mooney */
336*263f549eSPatrick Mooney	ENTRY_NP(__cp_clock_gettime_realtime)
337*263f549eSPatrick Mooney	subq	$0x18, %rsp
338*263f549eSPatrick Mooney	movq	%rdi, (%rsp)
339*263f549eSPatrick Mooney	movq	%rsi, 0x8(%rsp)
340*263f549eSPatrick Mooney
341*263f549eSPatrick Mooney1:
342*263f549eSPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %eax
343*263f549eSPatrick Mooney	movl	%eax, 0x10(%rsp)
344*263f549eSPatrick Mooney
345*263f549eSPatrick Mooney	call	__cp_gethrtime
346*263f549eSPatrick Mooney	movq	(%rsp), %rdi
347*263f549eSPatrick Mooney	movq	CP_HRES_LAST_TICK(%rdi), %rdx
348*263f549eSPatrick Mooney	subq	%rdx, %rax			/* nslt = hrtime - last_tick */
349*263f549eSPatrick Mooney	jb	1b
350*263f549eSPatrick Mooney	movq	CP_HRESTIME(%rdi), %r9
351*263f549eSPatrick Mooney	movq	_CONST(CP_HRESTIME + CP_HRESTIME_INCR)(%rdi), %r10
352*263f549eSPatrick Mooney	movl	CP_HRESTIME_ADJ(%rdi), %r11d
353*263f549eSPatrick Mooney
354*263f549eSPatrick Mooney	addq	%rax, %r10			/* now.tv_nsec += nslt */
355*263f549eSPatrick Mooney
356*263f549eSPatrick Mooney	cmpl	$0, %r11d
357*263f549eSPatrick Mooney	jb	4f				/* hres_adj > 0 */
358*263f549eSPatrick Mooney	ja	6f				/* hres_adj < 0 */
359*263f549eSPatrick Mooney
360*263f549eSPatrick Mooney2:
361*263f549eSPatrick Mooney	cmpq	$NANOSEC, %r10
362*263f549eSPatrick Mooney	jae	8f				/* tv_nsec >= NANOSEC */
363*263f549eSPatrick Mooney
364*263f549eSPatrick Mooney3:
365*263f549eSPatrick Mooney	movl	0x10(%rsp), %eax
366*263f549eSPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %edx
367*263f549eSPatrick Mooney	andl	$0xfffffffe, %edx
368*263f549eSPatrick Mooney	cmpl	%eax, %edx
369*263f549eSPatrick Mooney	jne	1b
370*263f549eSPatrick Mooney
371*263f549eSPatrick Mooney	movq	0x8(%rsp), %rsi
372*263f549eSPatrick Mooney	movq	%r9, (%rsi)
373*263f549eSPatrick Mooney	movq	%r10, 0x8(%rsi)
374*263f549eSPatrick Mooney
375*263f549eSPatrick Mooney	xorl	%eax, %eax
376*263f549eSPatrick Mooney	addq	$0x18, %rsp
377*263f549eSPatrick Mooney	ret
378*263f549eSPatrick Mooney
379*263f549eSPatrick Mooney
380*263f549eSPatrick Mooney4:						/* hres_adj > 0 */
381*263f549eSPatrick Mooney	sarq	$ADJ_SHIFT, %rax
382*263f549eSPatrick Mooney	cmpl	%r11d, %eax
383*263f549eSPatrick Mooney	jbe	5f
384*263f549eSPatrick Mooney	movl	%r11d, %eax
385*263f549eSPatrick Mooney5:
386*263f549eSPatrick Mooney	addq	%rax, %r10
387*263f549eSPatrick Mooney	jmp	2b
388*263f549eSPatrick Mooney
389*263f549eSPatrick Mooney6:						/* hres_adj < 0 */
390*263f549eSPatrick Mooney	sarq	$ADJ_SHIFT, %rax
391*263f549eSPatrick Mooney	negl	%r11d
392*263f549eSPatrick Mooney	cmpl	%r11d, %eax
393*263f549eSPatrick Mooney	jbe	7f
394*263f549eSPatrick Mooney	movl	%r11d, %eax
395*263f549eSPatrick Mooney7:
396*263f549eSPatrick Mooney	subq	%rax, %r10
397*263f549eSPatrick Mooney	jmp	2b
398*263f549eSPatrick Mooney
399*263f549eSPatrick Mooney8:						/* tv_nsec >= NANOSEC */
400*263f549eSPatrick Mooney	subq	$NANOSEC, %r10
401*263f549eSPatrick Mooney	incq	%r9
402*263f549eSPatrick Mooney	cmpq	$NANOSEC, %r10
403*263f549eSPatrick Mooney	jae	8b
404*263f549eSPatrick Mooney	jmp	3b
405*263f549eSPatrick Mooney
406*263f549eSPatrick Mooney	SET_SIZE(__cp_clock_gettime_realtime)
407