1 /*- 2 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> 3 * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation 4 * All rights reserved. 5 * 6 * Portions of this software were developed by Konstantin Belousov 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include "namespace.h" 33 #include <sys/capsicum.h> 34 #include <sys/elf.h> 35 #include <sys/fcntl.h> 36 #include <sys/mman.h> 37 #include <sys/time.h> 38 #include <sys/vdso.h> 39 #include <errno.h> 40 #include <string.h> 41 #include <unistd.h> 42 #include "un-namespace.h" 43 #include <machine/atomic.h> 44 #include <machine/cpufunc.h> 45 #include <machine/pvclock.h> 46 #include <machine/specialreg.h> 47 #include <dev/acpica/acpi_hpet.h> 48 #ifdef WANT_HYPERV 49 #include <dev/hyperv/hyperv.h> 50 #endif 51 #include <x86/ifunc.h> 52 #include "libc_private.h" 53 54 static inline u_int 55 rdtsc_low(const struct vdso_timehands *th) 56 { 57 u_int rv; 58 59 __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 60 : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); 61 return (rv); 62 } 63 64 static inline u_int 65 rdtscp_low(const struct vdso_timehands *th) 66 { 67 u_int rv; 68 69 __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0" 70 : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx"); 71 return (rv); 72 } 73 74 static u_int 75 rdtsc_low_mb_lfence(const struct vdso_timehands *th) 76 { 77 lfence(); 78 return (rdtsc_low(th)); 79 } 80 81 static u_int 82 rdtsc_low_mb_mfence(const struct vdso_timehands *th) 83 { 84 mfence(); 85 return (rdtsc_low(th)); 86 } 87 88 static u_int 89 rdtsc_low_mb_none(const struct vdso_timehands *th) 90 { 91 return (rdtsc_low(th)); 92 } 93 94 static u_int 95 rdtsc32_mb_lfence(void) 96 { 97 lfence(); 98 return (rdtsc32()); 99 } 100 101 static uint64_t 102 rdtsc_mb_lfence(void) 103 { 104 lfence(); 105 return (rdtsc()); 106 } 107 108 static u_int 109 rdtsc32_mb_mfence(void) 110 { 111 mfence(); 112 return (rdtsc32()); 113 } 114 115 static uint64_t 116 rdtsc_mb_mfence(void) 117 { 118 mfence(); 119 return (rdtsc()); 120 } 121 122 static u_int 123 rdtsc32_mb_none(void) 124 { 125 return (rdtsc32()); 126 } 127 128 static uint64_t 129 rdtsc_mb_none(void) 130 { 131 return (rdtsc()); 132 } 133 134 static u_int 135 rdtscp32_(void) 136 { 137 return (rdtscp32()); 138 } 139 140 static uint64_t 141 rdtscp_(void) 142 { 143 return (rdtscp()); 144 } 145 146 struct tsc_selector_tag { 147 u_int (*ts_rdtsc32)(void); 148 uint64_t (*ts_rdtsc)(void); 149 u_int (*ts_rdtsc_low)(const struct vdso_timehands *); 150 }; 151 152 static const struct tsc_selector_tag tsc_selector[] = { 153 [0] = { /* Intel, LFENCE */ 154 .ts_rdtsc32 = rdtsc32_mb_lfence, 155 .ts_rdtsc = rdtsc_mb_lfence, 156 .ts_rdtsc_low = rdtsc_low_mb_lfence, 157 }, 158 [1] = { /* AMD, MFENCE */ 159 .ts_rdtsc32 = rdtsc32_mb_mfence, 160 .ts_rdtsc = rdtsc_mb_mfence, 161 .ts_rdtsc_low = rdtsc_low_mb_mfence, 162 }, 163 [2] = { /* No SSE2 */ 164 .ts_rdtsc32 = rdtsc32_mb_none, 165 .ts_rdtsc = rdtsc_mb_none, 166 .ts_rdtsc_low = rdtsc_low_mb_none, 167 }, 168 [3] = { /* RDTSCP */ 169 .ts_rdtsc32 = rdtscp32_, 170 .ts_rdtsc = rdtscp_, 171 .ts_rdtsc_low = rdtscp_low, 172 }, 173 }; 174 175 static int 176 tsc_selector_idx(u_int cpu_feature) 177 { 178 u_int amd_feature, cpu_exthigh, p[4], v[3]; 179 static const char amd_id[] = "AuthenticAMD"; 180 static const char hygon_id[] = "HygonGenuine"; 181 bool amd_cpu; 182 183 if (cpu_feature == 0) 184 return (2); /* should not happen due to RDTSC */ 185 186 do_cpuid(0, p); 187 v[0] = p[1]; 188 v[1] = p[3]; 189 v[2] = p[2]; 190 amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 || 191 memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0; 192 193 if (cpu_feature != 0) { 194 do_cpuid(0x80000000, p); 195 cpu_exthigh = p[0]; 196 } else { 197 cpu_exthigh = 0; 198 } 199 if (cpu_exthigh >= 0x80000001) { 200 do_cpuid(0x80000001, p); 201 amd_feature = p[3]; 202 } else { 203 amd_feature = 0; 204 } 205 206 if ((amd_feature & AMDID_RDTSCP) != 0) 207 return (3); 208 if ((cpu_feature & CPUID_SSE2) == 0) 209 return (2); 210 return (amd_cpu ? 1 : 0); 211 } 212 213 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low, 214 (const struct vdso_timehands *th)) 215 { 216 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low); 217 } 218 219 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void)) 220 { 221 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32); 222 } 223 224 DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void)) 225 { 226 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc); 227 } 228 229 #define HPET_DEV_MAP_MAX 10 230 static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; 231 232 static void 233 __vdso_init_hpet(uint32_t u) 234 { 235 static const char devprefix[] = "/dev/hpet"; 236 char devname[64], *c, *c1, t; 237 volatile char *new_map, *old_map; 238 unsigned int mode; 239 uint32_t u1; 240 int fd; 241 242 c1 = c = stpcpy(devname, devprefix); 243 u1 = u; 244 do { 245 *c++ = u1 % 10 + '0'; 246 u1 /= 10; 247 } while (u1 != 0); 248 *c = '\0'; 249 for (c--; c1 != c; c1++, c--) { 250 t = *c1; 251 *c1 = *c; 252 *c = t; 253 } 254 255 old_map = hpet_dev_map[u]; 256 if (old_map != NULL) 257 return; 258 259 /* 260 * Explicitely check for the capability mode to avoid 261 * triggering trap_enocap on the device open by absolute path. 262 */ 263 if ((cap_getmode(&mode) == 0 && mode != 0) || 264 (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) { 265 /* Prevent the caller from re-entering. */ 266 atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], 267 (uintptr_t)old_map, (uintptr_t)MAP_FAILED); 268 return; 269 } 270 271 new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); 272 _close(fd); 273 if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], 274 (uintptr_t)old_map, (uintptr_t)new_map) == 0 && 275 new_map != MAP_FAILED) 276 munmap((void *)new_map, PAGE_SIZE); 277 } 278 279 #ifdef WANT_HYPERV 280 281 #define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME 282 283 /* 284 * NOTE: 285 * We use 'NULL' for this variable to indicate that initialization 286 * is required. And if this variable is 'MAP_FAILED', then Hyper-V 287 * reference TSC can not be used, e.g. in misconfigured jail. 288 */ 289 static struct hyperv_reftsc *hyperv_ref_tsc; 290 291 static void 292 __vdso_init_hyperv_tsc(void) 293 { 294 int fd; 295 unsigned int mode; 296 297 if (cap_getmode(&mode) == 0 && mode != 0) 298 goto fail; 299 300 fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC); 301 if (fd < 0) 302 goto fail; 303 hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, 304 MAP_SHARED, fd, 0); 305 _close(fd); 306 307 return; 308 fail: 309 /* Prevent the caller from re-entering. */ 310 hyperv_ref_tsc = MAP_FAILED; 311 } 312 313 static int 314 __vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) 315 { 316 uint64_t disc, ret, tsc, scale; 317 uint32_t seq; 318 int64_t ofs; 319 320 while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { 321 scale = tsc_ref->tsc_scale; 322 ofs = tsc_ref->tsc_ofs; 323 324 mfence(); /* XXXKIB */ 325 tsc = rdtsc(); 326 327 /* ret = ((tsc * scale) >> 64) + ofs */ 328 __asm__ __volatile__ ("mulq %3" : 329 "=d" (ret), "=a" (disc) : 330 "a" (tsc), "r" (scale)); 331 ret += ofs; 332 333 atomic_thread_fence_acq(); 334 if (tsc_ref->tsc_seq == seq) { 335 *tc = ret; 336 return (0); 337 } 338 339 /* Sequence changed; re-sync. */ 340 } 341 return (ENOSYS); 342 } 343 344 #endif /* WANT_HYPERV */ 345 346 static struct pvclock_vcpu_time_info *pvclock_timeinfos; 347 348 static int 349 __vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc) 350 { 351 uint64_t delta, ns, tsc; 352 struct pvclock_vcpu_time_info *ti; 353 uint32_t cpuid_ti, cpuid_tsc, version; 354 bool stable; 355 356 do { 357 ti = &pvclock_timeinfos[0]; 358 version = atomic_load_acq_32(&ti->version); 359 stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0; 360 if (stable) { 361 tsc = __vdso_gettc_rdtsc(); 362 } else { 363 (void)rdtscp_aux(&cpuid_ti); 364 ti = &pvclock_timeinfos[cpuid_ti]; 365 version = atomic_load_acq_32(&ti->version); 366 tsc = rdtscp_aux(&cpuid_tsc); 367 } 368 delta = tsc - ti->tsc_timestamp; 369 ns = ti->system_time + pvclock_scale_delta(delta, 370 ti->tsc_to_system_mul, ti->tsc_shift); 371 atomic_thread_fence_acq(); 372 } while ((ti->version & 1) != 0 || ti->version != version || 373 (!stable && cpuid_ti != cpuid_tsc)); 374 *tc = MAX(ns, th->th_x86_pvc_last_systime); 375 return (0); 376 } 377 378 static void 379 __vdso_init_pvclock_timeinfos(void) 380 { 381 struct pvclock_vcpu_time_info *timeinfos; 382 size_t len; 383 int fd, ncpus; 384 unsigned int mode; 385 386 timeinfos = MAP_FAILED; 387 if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 || 388 (cap_getmode(&mode) == 0 && mode != 0) || 389 (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0) 390 goto leave; 391 len = ncpus * sizeof(*pvclock_timeinfos); 392 timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); 393 _close(fd); 394 leave: 395 if (atomic_cmpset_rel_ptr( 396 (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL, 397 (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED) 398 (void)munmap((void *)timeinfos, len); 399 } 400 401 #pragma weak __vdso_gettc 402 int 403 __vdso_gettc(const struct vdso_timehands *th, u_int *tc) 404 { 405 volatile char *map; 406 uint32_t idx; 407 408 switch (th->th_algo) { 409 case VDSO_TH_ALGO_X86_TSC: 410 *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : 411 __vdso_gettc_rdtsc32(); 412 return (0); 413 case VDSO_TH_ALGO_X86_HPET: 414 idx = th->th_x86_hpet_idx; 415 if (idx >= HPET_DEV_MAP_MAX) 416 return (ENOSYS); 417 map = (volatile char *)atomic_load_acq_ptr( 418 (volatile uintptr_t *)&hpet_dev_map[idx]); 419 if (map == NULL) { 420 __vdso_init_hpet(idx); 421 map = (volatile char *)atomic_load_acq_ptr( 422 (volatile uintptr_t *)&hpet_dev_map[idx]); 423 } 424 if (map == MAP_FAILED) 425 return (ENOSYS); 426 *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER); 427 return (0); 428 #ifdef WANT_HYPERV 429 case VDSO_TH_ALGO_X86_HVTSC: 430 if (hyperv_ref_tsc == NULL) 431 __vdso_init_hyperv_tsc(); 432 if (hyperv_ref_tsc == MAP_FAILED) 433 return (ENOSYS); 434 return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); 435 #endif 436 case VDSO_TH_ALGO_X86_PVCLK: 437 if (pvclock_timeinfos == NULL) 438 __vdso_init_pvclock_timeinfos(); 439 if (pvclock_timeinfos == MAP_FAILED) 440 return (ENOSYS); 441 return (__vdso_pvclock_gettc(th, tc)); 442 default: 443 return (ENOSYS); 444 } 445 } 446 447 #pragma weak __vdso_gettimekeep 448 int 449 __vdso_gettimekeep(struct vdso_timekeep **tk) 450 { 451 452 return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); 453 } 454