1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * This file and its contents are supplied under the terms of the 3*5d9d9091SRichard Lowe * Common Development and Distribution License ("CDDL"), version 1.0. 4*5d9d9091SRichard Lowe * You may only use this file in accordance with the terms of version 5*5d9d9091SRichard Lowe * 1.0 of the CDDL. 6*5d9d9091SRichard Lowe * 7*5d9d9091SRichard Lowe * A full copy of the text of the CDDL should have accompanied this 8*5d9d9091SRichard Lowe * source. A copy of the CDDL is also available via the Internet at 9*5d9d9091SRichard Lowe * http://www.illumos.org/license/CDDL. 10*5d9d9091SRichard Lowe */ 11*5d9d9091SRichard Lowe 12*5d9d9091SRichard Lowe/* 13*5d9d9091SRichard Lowe * Copyright 2019 Joyent, Inc. 14*5d9d9091SRichard Lowe * Copyright 2020 Oxide Computer Company 15*5d9d9091SRichard Lowe */ 16*5d9d9091SRichard Lowe 17*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 18*5d9d9091SRichard Lowe#include <sys/segments.h> 19*5d9d9091SRichard Lowe#include <sys/time_impl.h> 20*5d9d9091SRichard Lowe#include <sys/tsc.h> 21*5d9d9091SRichard Lowe#include <cp_offsets.h> 22*5d9d9091SRichard Lowe 23*5d9d9091SRichard Lowe#define GETCPU_GDT_OFFSET SEL_GDT(GDT_CPUID, SEL_UPL) 24*5d9d9091SRichard Lowe 25*5d9d9091SRichard Lowe .file "cp_subr.s" 26*5d9d9091SRichard Lowe 27*5d9d9091SRichard Lowe/* 28*5d9d9091SRichard Lowe * These are cloned from TSC and time related code in the kernel. They should 29*5d9d9091SRichard Lowe * be kept in sync in the case that the source values are changed. 30*5d9d9091SRichard Lowe * See: uts/i86pc/os/timestamp.c 31*5d9d9091SRichard Lowe */ 32*5d9d9091SRichard Lowe#define NSEC_SHIFT 5 33*5d9d9091SRichard Lowe#define ADJ_SHIFT 4 34*5d9d9091SRichard Lowe#define NANOSEC 0x3b9aca00 35*5d9d9091SRichard Lowe 36*5d9d9091SRichard Lowe/* 37*5d9d9091SRichard Lowe * For __cp_tsc_read calls which incur looping retries due to CPU migration, 38*5d9d9091SRichard Lowe * this represents the maximum number of tries before bailing out. 39*5d9d9091SRichard Lowe */ 40*5d9d9091SRichard Lowe#define TSC_READ_MAXLOOP 0x4 41*5d9d9091SRichard Lowe 42*5d9d9091SRichard Lowe/* 43*5d9d9091SRichard Lowe * hrtime_t 44*5d9d9091SRichard Lowe * __cp_tsc_read(comm_page_t *cp) 45*5d9d9091SRichard Lowe * 46*5d9d9091SRichard Lowe * Stack usage: 0 bytes 47*5d9d9091SRichard Lowe */ 48*5d9d9091SRichard Lowe ENTRY_NP(__cp_tsc_read) 49*5d9d9091SRichard Lowe movl CP_TSC_TYPE(%rdi), %esi 50*5d9d9091SRichard Lowe movl CP_TSC_NCPU(%rdi), %r8d 51*5d9d9091SRichard Lowe 52*5d9d9091SRichard Lowe cmpl $TSC_TSCP, %esi 53*5d9d9091SRichard Lowe jne 2f 54*5d9d9091SRichard Lowe rdtscp 55*5d9d9091SRichard Lowe /* 56*5d9d9091SRichard Lowe * When the TSC is read, the low 32 bits are placed in %eax while the 57*5d9d9091SRichard Lowe * high 32 bits are placed in %edx. They are shifted and ORed together 58*5d9d9091SRichard Lowe * to obtain the full 64-bit value. 59*5d9d9091SRichard Lowe */ 60*5d9d9091SRichard Lowe shlq $0x20, %rdx 61*5d9d9091SRichard Lowe orq %rdx, %rax 62*5d9d9091SRichard Lowe 63*5d9d9091SRichard Lowe /* 64*5d9d9091SRichard Lowe * A zeroed cp_tsc_ncpu (currently held in r8d) indicates that no 65*5d9d9091SRichard Lowe * per-CPU TSC offsets are required. 66*5d9d9091SRichard Lowe */ 67*5d9d9091SRichard Lowe testl %r8d, %r8d 68*5d9d9091SRichard Lowe jnz 1f 69*5d9d9091SRichard Lowe ret 70*5d9d9091SRichard Lowe 71*5d9d9091SRichard Lowe1: 72*5d9d9091SRichard Lowe /* 73*5d9d9091SRichard Lowe * A non-zero cp_tsc_ncpu indicates the array length of 74*5d9d9091SRichard Lowe * cp_tsc_sync_tick_delta containing per-CPU offsets which are applied 75*5d9d9091SRichard Lowe * to TSC readings. The CPU ID furnished by the IA32_TSC_AUX register 76*5d9d9091SRichard Lowe * via rdtscp (placed in rcx) is used to look up an offset value in 77*5d9d9091SRichard Lowe * that array and apply it to the TSC value. 78*5d9d9091SRichard Lowe */ 79*5d9d9091SRichard Lowe leaq CP_TSC_SYNC_TICK_DELTA(%rdi), %r9 80*5d9d9091SRichard Lowe movq (%r9, %rcx, 8), %rdx 81*5d9d9091SRichard Lowe addq %rdx, %rax 82*5d9d9091SRichard Lowe ret 83*5d9d9091SRichard Lowe 84*5d9d9091SRichard Lowe2: 85*5d9d9091SRichard Lowe /* 86*5d9d9091SRichard Lowe * TSC reading without RDTSCP 87*5d9d9091SRichard Lowe * 88*5d9d9091SRichard Lowe * Check if handling for per-CPU TSC offsets is required. If not, 89*5d9d9091SRichard Lowe * immediately skip to the the appropriate steps to perform a rdtsc. 90*5d9d9091SRichard Lowe * 91*5d9d9091SRichard Lowe * If per-CPU offsets are present, the TSC reading process is more 92*5d9d9091SRichard Lowe * complicated. Without rdtscp, there is no way to simultaneously read 93*5d9d9091SRichard Lowe * the TSC and query the current CPU. In order to "catch" migrations 94*5d9d9091SRichard Lowe * during execution, the CPU ID is queried before and after rdtsc. The 95*5d9d9091SRichard Lowe * execution is repeated if results differ, subject to a loop limit. 96*5d9d9091SRichard Lowe */ 97*5d9d9091SRichard Lowe xorq %r9, %r9 98*5d9d9091SRichard Lowe testl %r8d, %r8d 99*5d9d9091SRichard Lowe jz 3f 100*5d9d9091SRichard Lowe 101*5d9d9091SRichard Lowe /* 102*5d9d9091SRichard Lowe * Load the address of the per-CPU offset array, since it is needed. 103*5d9d9091SRichard Lowe * The attempted loop count is kept in r8. 104*5d9d9091SRichard Lowe */ 105*5d9d9091SRichard Lowe leaq CP_TSC_SYNC_TICK_DELTA(%rdi), %r9 106*5d9d9091SRichard Lowe xorl %r8d, %r8d 107*5d9d9091SRichard Lowe 108*5d9d9091SRichard Lowe /* Query the CPU ID and stash it in r10 for later comparison */ 109*5d9d9091SRichard Lowe movl $GETCPU_GDT_OFFSET, %edx 110*5d9d9091SRichard Lowe lsl %edx, %edx 111*5d9d9091SRichard Lowe movl %edx, %r10d 112*5d9d9091SRichard Lowe 113*5d9d9091SRichard Lowe3: 114*5d9d9091SRichard Lowe /* 115*5d9d9091SRichard Lowe * TSC_RDTSC_MFENCE was used in the past for AMD chips, but has been 116*5d9d9091SRichard Lowe * supplanted by TSC_RDTSC_LFENCE, which works on Intel and AMD (when 117*5d9d9091SRichard Lowe * lfence can be confirmed as serializing). 118*5d9d9091SRichard Lowe */ 119*5d9d9091SRichard Lowe 120*5d9d9091SRichard Lowe4: 121*5d9d9091SRichard Lowe cmpl $TSC_RDTSC_LFENCE, %esi 122*5d9d9091SRichard Lowe jne 5f 123*5d9d9091SRichard Lowe lfence 124*5d9d9091SRichard Lowe rdtsc 125*5d9d9091SRichard Lowe jmp 7f 126*5d9d9091SRichard Lowe 127*5d9d9091SRichard Lowe5: 128*5d9d9091SRichard Lowe cmpl $TSC_RDTSC_CPUID, %esi 129*5d9d9091SRichard Lowe jne 6f 130*5d9d9091SRichard Lowe /* 131*5d9d9091SRichard Lowe * Since the amd64 ABI dictates that %rbx is callee-saved, it must be 132*5d9d9091SRichard Lowe * preserved here. Its contents will be overwritten when cpuid is used 133*5d9d9091SRichard Lowe * as a serializing instruction. 134*5d9d9091SRichard Lowe */ 135*5d9d9091SRichard Lowe movq %rbx, %r11 136*5d9d9091SRichard Lowe xorl %eax, %eax 137*5d9d9091SRichard Lowe cpuid 138*5d9d9091SRichard Lowe rdtsc 139*5d9d9091SRichard Lowe movq %r11, %rbx 140*5d9d9091SRichard Lowe jmp 7f 141*5d9d9091SRichard Lowe 142*5d9d9091SRichard Lowe6: 143*5d9d9091SRichard Lowe /* 144*5d9d9091SRichard Lowe * Other protections should have prevented this function from being 145*5d9d9091SRichard Lowe * called in the first place. Since callers must handle a failure from 146*5d9d9091SRichard Lowe * CPU migration looping, yield the same result as a bail-out: 0 147*5d9d9091SRichard Lowe */ 148*5d9d9091SRichard Lowe xorl %eax, %eax 149*5d9d9091SRichard Lowe ret 150*5d9d9091SRichard Lowe 151*5d9d9091SRichard Lowe7: 152*5d9d9091SRichard Lowe shlq $0x20, %rdx 153*5d9d9091SRichard Lowe orq %rdx, %rax 154*5d9d9091SRichard Lowe 155*5d9d9091SRichard Lowe /* 156*5d9d9091SRichard Lowe * With the TSC reading in-hand, check if any per-CPU offset handling 157*5d9d9091SRichard Lowe * is required. The address to the array of deltas (r9) will not have 158*5d9d9091SRichard Lowe * been populated if offset handling is unecessary. 159*5d9d9091SRichard Lowe */ 160*5d9d9091SRichard Lowe testq %r9, %r9 161*5d9d9091SRichard Lowe jnz 8f 162*5d9d9091SRichard Lowe ret 163*5d9d9091SRichard Lowe 164*5d9d9091SRichard Lowe8: 165*5d9d9091SRichard Lowe movl $GETCPU_GDT_OFFSET, %edx 166*5d9d9091SRichard Lowe lsl %edx, %edx 167*5d9d9091SRichard Lowe cmpl %edx, %r10d 168*5d9d9091SRichard Lowe jne 9f 169*5d9d9091SRichard Lowe movq (%r9, %rdx, 8), %rdx 170*5d9d9091SRichard Lowe addq %rdx, %rax 171*5d9d9091SRichard Lowe ret 172*5d9d9091SRichard Lowe 173*5d9d9091SRichard Lowe9: 174*5d9d9091SRichard Lowe /* 175*5d9d9091SRichard Lowe * It appears that a migration has occurred between the first CPU ID 176*5d9d9091SRichard Lowe * query and now. Check if the loop limit has been broken and retry if 177*5d9d9091SRichard Lowe * that's not the case. 178*5d9d9091SRichard Lowe */ 179*5d9d9091SRichard Lowe cmpl $TSC_READ_MAXLOOP, %r8d 180*5d9d9091SRichard Lowe jge 10f 181*5d9d9091SRichard Lowe incl %r8d 182*5d9d9091SRichard Lowe movl %edx, %r10d 183*5d9d9091SRichard Lowe jmp 3b 184*5d9d9091SRichard Lowe 185*5d9d9091SRichard Lowe10: 186*5d9d9091SRichard Lowe /* Loop limit was reached. Return bail-out value of 0. */ 187*5d9d9091SRichard Lowe xorl %eax, %eax 188*5d9d9091SRichard Lowe ret 189*5d9d9091SRichard Lowe 190*5d9d9091SRichard Lowe SET_SIZE(__cp_tsc_read) 191*5d9d9091SRichard Lowe 192*5d9d9091SRichard Lowe 193*5d9d9091SRichard Lowe/* 194*5d9d9091SRichard Lowe * uint_t 195*5d9d9091SRichard Lowe * __cp_getcpu(comm_page_t *) 196*5d9d9091SRichard Lowe * 197*5d9d9091SRichard Lowe * Stack usage: 0 bytes 198*5d9d9091SRichard Lowe */ 199*5d9d9091SRichard Lowe ENTRY_NP(__cp_getcpu) 200*5d9d9091SRichard Lowe movl CP_TSC_TYPE(%rdi), %edi 201*5d9d9091SRichard Lowe /* 202*5d9d9091SRichard Lowe * If RDTSCP is available, it is a quick way to grab the cpu_id which 203*5d9d9091SRichard Lowe * is stored in the TSC_AUX MSR by the kernel. 204*5d9d9091SRichard Lowe */ 205*5d9d9091SRichard Lowe cmpl $TSC_TSCP, %edi 206*5d9d9091SRichard Lowe jne 1f 207*5d9d9091SRichard Lowe rdtscp 208*5d9d9091SRichard Lowe movl %ecx, %eax 209*5d9d9091SRichard Lowe ret 210*5d9d9091SRichard Lowe1: 211*5d9d9091SRichard Lowe mov $GETCPU_GDT_OFFSET, %eax 212*5d9d9091SRichard Lowe lsl %eax, %eax 213*5d9d9091SRichard Lowe ret 214*5d9d9091SRichard Lowe SET_SIZE(__cp_getcpu) 215*5d9d9091SRichard Lowe 216*5d9d9091SRichard Lowe/* 217*5d9d9091SRichard Lowe * hrtime_t 218*5d9d9091SRichard Lowe * __cp_gethrtime(comm_page_t *cp) 219*5d9d9091SRichard Lowe * 220*5d9d9091SRichard Lowe * Stack usage: 0x20 local + 0x8 call = 0x28 bytes 221*5d9d9091SRichard Lowe * 222*5d9d9091SRichard Lowe * %rsp+0x00 - hrtime_t tsc_last 223*5d9d9091SRichard Lowe * %rsp+0x08 - hrtime_t hrtime_base 224*5d9d9091SRichard Lowe * %rsp+0x10 - commpage_t *cp 225*5d9d9091SRichard Lowe * %rsp+0x18 - int hres_lock 226*5d9d9091SRichard Lowe */ 227*5d9d9091SRichard Lowe ENTRY_NP(__cp_gethrtime) 228*5d9d9091SRichard Lowe subq $0x20, %rsp 229*5d9d9091SRichard Lowe movq %rdi, 0x10(%rsp) 230*5d9d9091SRichard Lowe1: 231*5d9d9091SRichard Lowe movl CP_HRES_LOCK(%rdi), %r9d 232*5d9d9091SRichard Lowe movl %r9d, 0x18(%rsp) 233*5d9d9091SRichard Lowe 234*5d9d9091SRichard Lowe movq CP_TSC_LAST(%rdi), %rax 235*5d9d9091SRichard Lowe movq CP_TSC_HRTIME_BASE(%rdi), %rdx 236*5d9d9091SRichard Lowe movq %rax, (%rsp) 237*5d9d9091SRichard Lowe movq %rdx, 0x8(%rsp) 238*5d9d9091SRichard Lowe 239*5d9d9091SRichard Lowe call __cp_tsc_read 240*5d9d9091SRichard Lowe 241*5d9d9091SRichard Lowe /* 242*5d9d9091SRichard Lowe * Failure is inferred from a TSC reading of 0. The normal fasttrap 243*5d9d9091SRichard Lowe * mechanism can be used as a fallback in such cases. 244*5d9d9091SRichard Lowe */ 245*5d9d9091SRichard Lowe testq %rax, %rax 246*5d9d9091SRichard Lowe jz 6f 247*5d9d9091SRichard Lowe 248*5d9d9091SRichard Lowe movq 0x10(%rsp), %rdi 249*5d9d9091SRichard Lowe movl 0x18(%rsp), %r9d 250*5d9d9091SRichard Lowe movl CP_HRES_LOCK(%rdi), %edx 251*5d9d9091SRichard Lowe andl $0xfffffffe, %r9d 252*5d9d9091SRichard Lowe cmpl %r9d, %edx 253*5d9d9091SRichard Lowe jne 1b 254*5d9d9091SRichard Lowe 255*5d9d9091SRichard Lowe /* 256*5d9d9091SRichard Lowe * The in-kernel logic for calculating hrtime performs several checks 257*5d9d9091SRichard Lowe * to protect against edge cases. That logic is summarized as: 258*5d9d9091SRichard Lowe * if (tsc >= tsc_last) { 259*5d9d9091SRichard Lowe * delta -= tsc_last; 260*5d9d9091SRichard Lowe * } else if (tsc >= tsc_last - 2*tsc_max_delta) { 261*5d9d9091SRichard Lowe * delta = 0; 262*5d9d9091SRichard Lowe * } else { 263*5d9d9091SRichard Lowe * delta = MIN(tsc, tsc_resume_cap); 264*5d9d9091SRichard Lowe * } 265*5d9d9091SRichard Lowe * 266*5d9d9091SRichard Lowe * The below implementation achieves the same result, although it is 267*5d9d9091SRichard Lowe * structured for speed and optimized for the fast path: 268*5d9d9091SRichard Lowe * 269*5d9d9091SRichard Lowe * delta = tsc - tsc_last; 270*5d9d9091SRichard Lowe * if (delta < 0) { 271*5d9d9091SRichard Lowe * delta += (tsc_max_delta << 1); 272*5d9d9091SRichard Lowe * if (delta >= 0) { 273*5d9d9091SRichard Lowe * delta = 0; 274*5d9d9091SRichard Lowe * } else { 275*5d9d9091SRichard Lowe * delta = MIN(tsc, tsc_resume_cap); 276*5d9d9091SRichard Lowe * } 277*5d9d9091SRichard Lowe * } 278*5d9d9091SRichard Lowe */ 279*5d9d9091SRichard Lowe movq (%rsp), %rdx 280*5d9d9091SRichard Lowe subq %rdx, %rax /* delta = tsc - tsc_last */ 281*5d9d9091SRichard Lowe jbe 3f /* if (delta < 0) */ 282*5d9d9091SRichard Lowe 283*5d9d9091SRichard Lowe2: 284*5d9d9091SRichard Lowe /* 285*5d9d9091SRichard Lowe * Optimized TSC_CONVERT_AND_ADD: 286*5d9d9091SRichard Lowe * hrtime_base += (tsc_delta * nsec_scale) >> (32 - NSEC_SHIFT) 287*5d9d9091SRichard Lowe * 288*5d9d9091SRichard Lowe * Since the multiply and shift are done in 128-bit, there is no need 289*5d9d9091SRichard Lowe * to worry about overflow. 290*5d9d9091SRichard Lowe */ 291*5d9d9091SRichard Lowe movl CP_NSEC_SCALE(%rdi), %ecx 292*5d9d9091SRichard Lowe mulq %rcx 293*5d9d9091SRichard Lowe shrdq $_CONST(32 - NSEC_SHIFT), %rdx, %rax 294*5d9d9091SRichard Lowe movq 0x8(%rsp), %r8 295*5d9d9091SRichard Lowe addq %r8, %rax 296*5d9d9091SRichard Lowe 297*5d9d9091SRichard Lowe addq $0x20, %rsp 298*5d9d9091SRichard Lowe ret 299*5d9d9091SRichard Lowe 300*5d9d9091SRichard Lowe3: 301*5d9d9091SRichard Lowe movq %rax, %r9 /* save (tsc - tsc_last) in r9 */ 302*5d9d9091SRichard Lowe movl CP_TSC_MAX_DELTA(%rdi), %ecx 303*5d9d9091SRichard Lowe sall $1, %ecx 304*5d9d9091SRichard Lowe addq %rcx, %rax /* delta += (tsc_max_delta << 1) */ 305*5d9d9091SRichard Lowe jae 4f /* delta < 0 */ 306*5d9d9091SRichard Lowe xorq %rax, %rax 307*5d9d9091SRichard Lowe jmp 2b 308*5d9d9091SRichard Lowe 309*5d9d9091SRichard Lowe4: 310*5d9d9091SRichard Lowe /* 311*5d9d9091SRichard Lowe * Repopulate %rax with the TSC reading by adding tsc_last to %r9 312*5d9d9091SRichard Lowe * (which holds tsc - tsc_last) 313*5d9d9091SRichard Lowe */ 314*5d9d9091SRichard Lowe movq (%rsp), %rax 315*5d9d9091SRichard Lowe addq %r9, %rax 316*5d9d9091SRichard Lowe 317*5d9d9091SRichard Lowe /* delta = MIN(tsc, resume_cap) */ 318*5d9d9091SRichard Lowe movq CP_TSC_RESUME_CAP(%rdi), %rcx 319*5d9d9091SRichard Lowe cmpq %rcx, %rax 320*5d9d9091SRichard Lowe jbe 5f 321*5d9d9091SRichard Lowe movq %rcx, %rax 322*5d9d9091SRichard Lowe5: 323*5d9d9091SRichard Lowe jmp 2b 324*5d9d9091SRichard Lowe 325*5d9d9091SRichard Lowe6: 326*5d9d9091SRichard Lowe movl $T_GETHRTIME, %eax 327*5d9d9091SRichard Lowe int $T_FASTTRAP 328*5d9d9091SRichard Lowe addq $0x20, %rsp 329*5d9d9091SRichard Lowe ret 330*5d9d9091SRichard Lowe 331*5d9d9091SRichard Lowe SET_SIZE(__cp_gethrtime) 332*5d9d9091SRichard Lowe 333*5d9d9091SRichard Lowe/* 334*5d9d9091SRichard Lowe * int 335*5d9d9091SRichard Lowe * __cp_clock_gettime_monotonic(comm_page_t *cp, timespec_t *tsp) 336*5d9d9091SRichard Lowe * 337*5d9d9091SRichard Lowe * Stack usage: 0x8 local + 0x8 call + 0x28 called func. = 0x38 bytes 338*5d9d9091SRichard Lowe * 339*5d9d9091SRichard Lowe * %rsp+0x00 - timespec_t *tsp 340*5d9d9091SRichard Lowe */ 341*5d9d9091SRichard Lowe ENTRY_NP(__cp_clock_gettime_monotonic) 342*5d9d9091SRichard Lowe subq $0x8, %rsp 343*5d9d9091SRichard Lowe movq %rsi, (%rsp) 344*5d9d9091SRichard Lowe 345*5d9d9091SRichard Lowe call __cp_gethrtime 346*5d9d9091SRichard Lowe 347*5d9d9091SRichard Lowe /* 348*5d9d9091SRichard Lowe * Convert from hrtime_t (int64_t in nanoseconds) to timespec_t. 349*5d9d9091SRichard Lowe * This uses the same approach as hrt2ts, although it has been updated 350*5d9d9091SRichard Lowe * to utilize 64-bit math. 351*5d9d9091SRichard Lowe * 1 / 1,000,000,000 = 352*5d9d9091SRichard Lowe * 1000100101110000010111110100000100110110101101001010110110011B-26 353*5d9d9091SRichard Lowe * = 0x112e0be826d694b3 * 2^-26 354*5d9d9091SRichard Lowe * 355*5d9d9091SRichard Lowe * secs = (nsecs * 0x112e0be826d694b3) >> 26 356*5d9d9091SRichard Lowe * 357*5d9d9091SRichard Lowe * In order to account for the 2s-compliment of negative inputs, a 358*5d9d9091SRichard Lowe * final operation completes the process: 359*5d9d9091SRichard Lowe * 360*5d9d9091SRichard Lowe * secs -= (nsecs >> 63) 361*5d9d9091SRichard Lowe */ 362*5d9d9091SRichard Lowe movq %rax, %r11 363*5d9d9091SRichard Lowe movq $0x112e0be826d694b3, %rdx 364*5d9d9091SRichard Lowe imulq %rdx 365*5d9d9091SRichard Lowe sarq $0x1a, %rdx 366*5d9d9091SRichard Lowe movq %r11, %rax 367*5d9d9091SRichard Lowe sarq $0x3f, %rax 368*5d9d9091SRichard Lowe subq %rax, %rdx 369*5d9d9091SRichard Lowe movq (%rsp), %rsi 370*5d9d9091SRichard Lowe movq %rdx, (%rsi) 371*5d9d9091SRichard Lowe /* 372*5d9d9091SRichard Lowe * Populating tv_nsec is easier: 373*5d9d9091SRichard Lowe * tv_nsec = nsecs - (secs * NANOSEC) 374*5d9d9091SRichard Lowe */ 375*5d9d9091SRichard Lowe imulq $NANOSEC, %rdx, %rdx 376*5d9d9091SRichard Lowe subq %rdx, %r11 377*5d9d9091SRichard Lowe movq %r11, 0x8(%rsi) 378*5d9d9091SRichard Lowe 379*5d9d9091SRichard Lowe xorl %eax, %eax 380*5d9d9091SRichard Lowe addq $0x8, %rsp 381*5d9d9091SRichard Lowe ret 382*5d9d9091SRichard Lowe SET_SIZE(__cp_clock_gettime_monotonic) 383*5d9d9091SRichard Lowe 384*5d9d9091SRichard Lowe/* 385*5d9d9091SRichard Lowe * int 386*5d9d9091SRichard Lowe * __cp_clock_gettime_realtime(comm_page_t *cp, timespec_t *tsp) 387*5d9d9091SRichard Lowe * 388*5d9d9091SRichard Lowe * Stack usage: 0x18 local + 0x8 call + 0x28 called func. = 0x48 bytes 389*5d9d9091SRichard Lowe * 390*5d9d9091SRichard Lowe * %rsp+0x00 - commpage_t *cp 391*5d9d9091SRichard Lowe * %rsp+0x08 - timespec_t *tsp 392*5d9d9091SRichard Lowe * %rsp+0x10 - int hres_lock 393*5d9d9091SRichard Lowe */ 394*5d9d9091SRichard Lowe ENTRY_NP(__cp_clock_gettime_realtime) 395*5d9d9091SRichard Lowe subq $0x18, %rsp 396*5d9d9091SRichard Lowe movq %rdi, (%rsp) 397*5d9d9091SRichard Lowe movq %rsi, 0x8(%rsp) 398*5d9d9091SRichard Lowe 399*5d9d9091SRichard Lowe1: 400*5d9d9091SRichard Lowe movl CP_HRES_LOCK(%rdi), %eax 401*5d9d9091SRichard Lowe movl %eax, 0x10(%rsp) 402*5d9d9091SRichard Lowe 403*5d9d9091SRichard Lowe call __cp_gethrtime 404*5d9d9091SRichard Lowe movq (%rsp), %rdi 405*5d9d9091SRichard Lowe movq CP_HRES_LAST_TICK(%rdi), %rdx 406*5d9d9091SRichard Lowe subq %rdx, %rax /* nslt = hrtime - last_tick */ 407*5d9d9091SRichard Lowe jb 1b 408*5d9d9091SRichard Lowe movq CP_HRESTIME(%rdi), %r9 409*5d9d9091SRichard Lowe movq _CONST(CP_HRESTIME + CP_HRESTIME_INCR)(%rdi), %r10 410*5d9d9091SRichard Lowe movl CP_HRESTIME_ADJ(%rdi), %r11d 411*5d9d9091SRichard Lowe 412*5d9d9091SRichard Lowe addq %rax, %r10 /* now.tv_nsec += nslt */ 413*5d9d9091SRichard Lowe 414*5d9d9091SRichard Lowe cmpl $0, %r11d 415*5d9d9091SRichard Lowe jb 4f /* hres_adj > 0 */ 416*5d9d9091SRichard Lowe ja 6f /* hres_adj < 0 */ 417*5d9d9091SRichard Lowe 418*5d9d9091SRichard Lowe2: 419*5d9d9091SRichard Lowe cmpq $NANOSEC, %r10 420*5d9d9091SRichard Lowe jae 8f /* tv_nsec >= NANOSEC */ 421*5d9d9091SRichard Lowe 422*5d9d9091SRichard Lowe3: 423*5d9d9091SRichard Lowe movl 0x10(%rsp), %eax 424*5d9d9091SRichard Lowe movl CP_HRES_LOCK(%rdi), %edx 425*5d9d9091SRichard Lowe andl $0xfffffffe, %edx 426*5d9d9091SRichard Lowe cmpl %eax, %edx 427*5d9d9091SRichard Lowe jne 1b 428*5d9d9091SRichard Lowe 429*5d9d9091SRichard Lowe movq 0x8(%rsp), %rsi 430*5d9d9091SRichard Lowe movq %r9, (%rsi) 431*5d9d9091SRichard Lowe movq %r10, 0x8(%rsi) 432*5d9d9091SRichard Lowe 433*5d9d9091SRichard Lowe xorl %eax, %eax 434*5d9d9091SRichard Lowe addq $0x18, %rsp 435*5d9d9091SRichard Lowe ret 436*5d9d9091SRichard Lowe 437*5d9d9091SRichard Lowe 438*5d9d9091SRichard Lowe4: /* hres_adj > 0 */ 439*5d9d9091SRichard Lowe sarq $ADJ_SHIFT, %rax 440*5d9d9091SRichard Lowe cmpl %r11d, %eax 441*5d9d9091SRichard Lowe jbe 5f 442*5d9d9091SRichard Lowe movl %r11d, %eax 443*5d9d9091SRichard Lowe5: 444*5d9d9091SRichard Lowe addq %rax, %r10 445*5d9d9091SRichard Lowe jmp 2b 446*5d9d9091SRichard Lowe 447*5d9d9091SRichard Lowe6: /* hres_adj < 0 */ 448*5d9d9091SRichard Lowe sarq $ADJ_SHIFT, %rax 449*5d9d9091SRichard Lowe negl %r11d 450*5d9d9091SRichard Lowe cmpl %r11d, %eax 451*5d9d9091SRichard Lowe jbe 7f 452*5d9d9091SRichard Lowe movl %r11d, %eax 453*5d9d9091SRichard Lowe7: 454*5d9d9091SRichard Lowe subq %rax, %r10 455*5d9d9091SRichard Lowe jmp 2b 456*5d9d9091SRichard Lowe 457*5d9d9091SRichard Lowe8: /* tv_nsec >= NANOSEC */ 458*5d9d9091SRichard Lowe subq $NANOSEC, %r10 459*5d9d9091SRichard Lowe incq %r9 460*5d9d9091SRichard Lowe cmpq $NANOSEC, %r10 461*5d9d9091SRichard Lowe jae 8b 462*5d9d9091SRichard Lowe jmp 3b 463*5d9d9091SRichard Lowe 464*5d9d9091SRichard Lowe SET_SIZE(__cp_clock_gettime_realtime) 465