1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Fast user context implementation of clock_gettime, gettimeofday, and time. 4 * 5 * Copyright (C) 2019 ARM Limited. 6 * Copyright 2006 Andi Kleen, SUSE Labs. 7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 */ 10 #ifndef __ASM_VDSO_GETTIMEOFDAY_H 11 #define __ASM_VDSO_GETTIMEOFDAY_H 12 13 #ifndef __ASSEMBLY__ 14 15 #include <uapi/linux/time.h> 16 #include <asm/vgtod.h> 17 #include <asm/unistd.h> 18 #include <asm/msr.h> 19 #include <asm/pvclock.h> 20 #include <clocksource/hyperv_timer.h> 21 22 extern struct vdso_data vvar_page 23 __attribute__((visibility("hidden"))); 24 25 extern struct vdso_data timens_page 26 __attribute__((visibility("hidden"))); 27 28 #define VDSO_HAS_TIME 1 29 30 #define VDSO_HAS_CLOCK_GETRES 1 31 32 /* 33 * Declare the memory-mapped vclock data pages. These come from hypervisors. 34 * If we ever reintroduce something like direct access to an MMIO clock like 35 * the HPET again, it will go here as well. 36 * 37 * A load from any of these pages will segfault if the clock in question is 38 * disabled, so appropriate compiler barriers and checks need to be used 39 * to prevent stray loads. 40 * 41 * These declarations MUST NOT be const. The compiler will assume that 42 * an extern const variable has genuinely constant contents, and the 43 * resulting code won't work, since the whole point is that these pages 44 * change over time, possibly while we're accessing them. 45 */ 46 47 #ifdef CONFIG_PARAVIRT_CLOCK 48 /* 49 * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO 50 * if the hypervisor tells us that all vCPUs can get valid data from the 51 * vCPU 0 page. 52 */ 53 extern struct pvclock_vsyscall_time_info pvclock_page 54 __attribute__((visibility("hidden"))); 55 #endif 56 57 #ifdef CONFIG_HYPERV_TIMER 58 extern struct ms_hyperv_tsc_page hvclock_page 59 __attribute__((visibility("hidden"))); 60 #endif 61 62 #ifdef CONFIG_TIME_NS 63 static __always_inline 64 const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) 65 { 66 return &timens_page; 67 } 68 #endif 69 70 #ifndef BUILD_VDSO32 71 72 static __always_inline 73 long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) 74 { 75 long ret; 76 77 asm ("syscall" : "=a" (ret), "=m" (*_ts) : 78 "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : 79 "rcx", "r11"); 80 81 return ret; 82 } 83 84 static __always_inline 85 long gettimeofday_fallback(struct __kernel_old_timeval *_tv, 86 struct timezone *_tz) 87 { 88 long ret; 89 90 asm("syscall" : "=a" (ret) : 91 "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); 92 93 return ret; 94 } 95 96 static __always_inline 97 long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) 98 { 99 long ret; 100 101 asm ("syscall" : "=a" (ret), "=m" (*_ts) : 102 "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : 103 "rcx", "r11"); 104 105 return ret; 106 } 107 108 #else 109 110 static __always_inline 111 long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) 112 { 113 long ret; 114 115 asm ( 116 "mov %%ebx, %%edx \n" 117 "mov %[clock], %%ebx \n" 118 "call __kernel_vsyscall \n" 119 "mov %%edx, %%ebx \n" 120 : "=a" (ret), "=m" (*_ts) 121 : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) 122 : "edx"); 123 124 return ret; 125 } 126 127 static __always_inline 128 long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) 129 { 130 long ret; 131 132 asm ( 133 "mov %%ebx, %%edx \n" 134 "mov %[clock], %%ebx \n" 135 "call __kernel_vsyscall \n" 136 "mov %%edx, %%ebx \n" 137 : "=a" (ret), "=m" (*_ts) 138 : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) 139 : "edx"); 140 141 return ret; 142 } 143 144 static __always_inline 145 long gettimeofday_fallback(struct __kernel_old_timeval *_tv, 146 struct timezone *_tz) 147 { 148 long ret; 149 150 asm( 151 "mov %%ebx, %%edx \n" 152 "mov %2, %%ebx \n" 153 "call __kernel_vsyscall \n" 154 "mov %%edx, %%ebx \n" 155 : "=a" (ret) 156 : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) 157 : "memory", "edx"); 158 159 return ret; 160 } 161 162 static __always_inline long 163 clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) 164 { 165 long ret; 166 167 asm ( 168 "mov %%ebx, %%edx \n" 169 "mov %[clock], %%ebx \n" 170 "call __kernel_vsyscall \n" 171 "mov %%edx, %%ebx \n" 172 : "=a" (ret), "=m" (*_ts) 173 : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) 174 : "edx"); 175 176 return ret; 177 } 178 179 static __always_inline 180 long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) 181 { 182 long ret; 183 184 asm ( 185 "mov %%ebx, %%edx \n" 186 "mov %[clock], %%ebx \n" 187 "call __kernel_vsyscall \n" 188 "mov %%edx, %%ebx \n" 189 : "=a" (ret), "=m" (*_ts) 190 : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) 191 : "edx"); 192 193 return ret; 194 } 195 196 #endif 197 198 #ifdef CONFIG_PARAVIRT_CLOCK 199 static u64 vread_pvclock(void) 200 { 201 const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; 202 u32 version; 203 u64 ret; 204 205 /* 206 * Note: The kernel and hypervisor must guarantee that cpu ID 207 * number maps 1:1 to per-CPU pvclock time info. 208 * 209 * Because the hypervisor is entirely unaware of guest userspace 210 * preemption, it cannot guarantee that per-CPU pvclock time 211 * info is updated if the underlying CPU changes or that that 212 * version is increased whenever underlying CPU changes. 213 * 214 * On KVM, we are guaranteed that pvti updates for any vCPU are 215 * atomic as seen by *all* vCPUs. This is an even stronger 216 * guarantee than we get with a normal seqlock. 217 * 218 * On Xen, we don't appear to have that guarantee, but Xen still 219 * supplies a valid seqlock using the version field. 220 * 221 * We only do pvclock vdso timing at all if 222 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 223 * mean that all vCPUs have matching pvti and that the TSC is 224 * synced, so we can just look at vCPU 0's pvti. 225 */ 226 227 do { 228 version = pvclock_read_begin(pvti); 229 230 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) 231 return U64_MAX; 232 233 ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); 234 } while (pvclock_read_retry(pvti, version)); 235 236 return ret & S64_MAX; 237 } 238 #endif 239 240 #ifdef CONFIG_HYPERV_TIMER 241 static u64 vread_hvclock(void) 242 { 243 u64 tsc, time; 244 245 if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time)) 246 return time & S64_MAX; 247 248 return U64_MAX; 249 } 250 #endif 251 252 static inline u64 __arch_get_hw_counter(s32 clock_mode, 253 const struct vdso_data *vd) 254 { 255 if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) 256 return (u64)rdtsc_ordered() & S64_MAX; 257 /* 258 * For any memory-mapped vclock type, we need to make sure that gcc 259 * doesn't cleverly hoist a load before the mode check. Otherwise we 260 * might end up touching the memory-mapped page even if the vclock in 261 * question isn't enabled, which will segfault. Hence the barriers. 262 */ 263 #ifdef CONFIG_PARAVIRT_CLOCK 264 if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) { 265 barrier(); 266 return vread_pvclock(); 267 } 268 #endif 269 #ifdef CONFIG_HYPERV_TIMER 270 if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) { 271 barrier(); 272 return vread_hvclock(); 273 } 274 #endif 275 return U64_MAX; 276 } 277 278 static __always_inline const struct vdso_data *__arch_get_vdso_data(void) 279 { 280 return &vvar_page; 281 } 282 283 static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) 284 { 285 return true; 286 } 287 #define vdso_clocksource_ok arch_vdso_clocksource_ok 288 289 /* 290 * Clocksource read value validation to handle PV and HyperV clocksources 291 * which can be invalidated asynchronously and indicate invalidation by 292 * returning U64_MAX, which can be effectively tested by checking for a 293 * negative value after casting it to s64. 294 * 295 * This effectively forces a S64_MAX mask on the calculations, unlike the 296 * U64_MAX mask normally used by x86 clocksources. 297 */ 298 static inline bool arch_vdso_cycles_ok(u64 cycles) 299 { 300 return (s64)cycles >= 0; 301 } 302 #define vdso_cycles_ok arch_vdso_cycles_ok 303 304 /* 305 * x86 specific calculation of nanoseconds for the current cycle count 306 * 307 * The regular implementation assumes that clocksource reads are globally 308 * monotonic. The TSC can be slightly off across sockets which can cause 309 * the regular delta calculation (@cycles - @last) to return a huge time 310 * jump. 311 * 312 * Therefore it needs to be verified that @cycles are greater than 313 * @vd->cycles_last. If not then use @vd->cycles_last, which is the base 314 * time of the current conversion period. 315 * 316 * This variant also uses a custom mask because while the clocksource mask of 317 * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses 318 * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above 319 * declares everything with the MSB/Sign-bit set as invalid. Therefore the 320 * effective mask is S64_MAX. 321 */ 322 static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) 323 { 324 u64 delta = cycles - vd->cycle_last; 325 326 /* 327 * Negative motion and deltas which can cause multiplication 328 * overflow require special treatment. This check covers both as 329 * negative motion is guaranteed to be greater than @vd::max_cycles 330 * due to unsigned comparison. 331 * 332 * Due to the MSB/Sign-bit being used as invalid marker (see 333 * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that 334 * case is also unlikely and will also take the unlikely path here. 335 */ 336 if (unlikely(delta > vd->max_cycles)) { 337 /* 338 * Due to the above mentioned TSC wobbles, filter out 339 * negative motion. Per the above masking, the effective 340 * sign bit is now bit 62. 341 */ 342 if (delta & (1ULL << 62)) 343 return base >> vd->shift; 344 345 /* Handle multiplication overflow gracefully */ 346 return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift); 347 } 348 349 return ((delta * vd->mult) + base) >> vd->shift; 350 } 351 #define vdso_calc_ns vdso_calc_ns 352 353 #endif /* !__ASSEMBLY__ */ 354 355 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ 356