1 /*-
2 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
3 * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by Konstantin Belousov
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include "namespace.h"
33 #include <sys/capsicum.h>
34 #include <sys/elf.h>
35 #include <sys/fcntl.h>
36 #include <sys/mman.h>
37 #include <sys/time.h>
38 #include <sys/vdso.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include "un-namespace.h"
43 #include <machine/atomic.h>
44 #include <machine/cpufunc.h>
45 #include <machine/pvclock.h>
46 #include <machine/specialreg.h>
47 #include <dev/acpica/acpi_hpet.h>
48 #ifdef WANT_HYPERV
49 #include <dev/hyperv/hyperv.h>
50 #endif
51 #include <x86/ifunc.h>
52 #include "libc_private.h"
53
54 static inline u_int
rdtsc_low(const struct vdso_timehands * th)55 rdtsc_low(const struct vdso_timehands *th)
56 {
57 u_int rv;
58
59 __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
60 : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
61 return (rv);
62 }
63
64 static inline u_int
rdtscp_low(const struct vdso_timehands * th)65 rdtscp_low(const struct vdso_timehands *th)
66 {
67 u_int rv;
68
69 __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0"
70 : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx");
71 return (rv);
72 }
73
74 static u_int
rdtsc_low_mb_lfence(const struct vdso_timehands * th)75 rdtsc_low_mb_lfence(const struct vdso_timehands *th)
76 {
77 lfence();
78 return (rdtsc_low(th));
79 }
80
81 static u_int
rdtsc_low_mb_mfence(const struct vdso_timehands * th)82 rdtsc_low_mb_mfence(const struct vdso_timehands *th)
83 {
84 mfence();
85 return (rdtsc_low(th));
86 }
87
88 static u_int
rdtsc_low_mb_none(const struct vdso_timehands * th)89 rdtsc_low_mb_none(const struct vdso_timehands *th)
90 {
91 return (rdtsc_low(th));
92 }
93
94 static u_int
rdtsc32_mb_lfence(void)95 rdtsc32_mb_lfence(void)
96 {
97 lfence();
98 return (rdtsc32());
99 }
100
101 static uint64_t
rdtsc_mb_lfence(void)102 rdtsc_mb_lfence(void)
103 {
104 lfence();
105 return (rdtsc());
106 }
107
108 static u_int
rdtsc32_mb_mfence(void)109 rdtsc32_mb_mfence(void)
110 {
111 mfence();
112 return (rdtsc32());
113 }
114
115 static uint64_t
rdtsc_mb_mfence(void)116 rdtsc_mb_mfence(void)
117 {
118 mfence();
119 return (rdtsc());
120 }
121
122 static u_int
rdtsc32_mb_none(void)123 rdtsc32_mb_none(void)
124 {
125 return (rdtsc32());
126 }
127
128 static uint64_t
rdtsc_mb_none(void)129 rdtsc_mb_none(void)
130 {
131 return (rdtsc());
132 }
133
134 static u_int
rdtscp32_(void)135 rdtscp32_(void)
136 {
137 return (rdtscp32());
138 }
139
140 static uint64_t
rdtscp_(void)141 rdtscp_(void)
142 {
143 return (rdtscp());
144 }
145
146 struct tsc_selector_tag {
147 u_int (*ts_rdtsc32)(void);
148 uint64_t (*ts_rdtsc)(void);
149 u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
150 };
151
152 static const struct tsc_selector_tag tsc_selector[] = {
153 [0] = { /* Intel, LFENCE */
154 .ts_rdtsc32 = rdtsc32_mb_lfence,
155 .ts_rdtsc = rdtsc_mb_lfence,
156 .ts_rdtsc_low = rdtsc_low_mb_lfence,
157 },
158 [1] = { /* AMD, MFENCE */
159 .ts_rdtsc32 = rdtsc32_mb_mfence,
160 .ts_rdtsc = rdtsc_mb_mfence,
161 .ts_rdtsc_low = rdtsc_low_mb_mfence,
162 },
163 [2] = { /* No SSE2 */
164 .ts_rdtsc32 = rdtsc32_mb_none,
165 .ts_rdtsc = rdtsc_mb_none,
166 .ts_rdtsc_low = rdtsc_low_mb_none,
167 },
168 [3] = { /* RDTSCP */
169 .ts_rdtsc32 = rdtscp32_,
170 .ts_rdtsc = rdtscp_,
171 .ts_rdtsc_low = rdtscp_low,
172 },
173 };
174
175 static int
tsc_selector_idx(u_int cpu_feature)176 tsc_selector_idx(u_int cpu_feature)
177 {
178 u_int amd_feature, cpu_exthigh, p[4], v[3];
179 static const char amd_id[] = "AuthenticAMD";
180 static const char hygon_id[] = "HygonGenuine";
181 bool amd_cpu;
182
183 if (cpu_feature == 0)
184 return (2); /* should not happen due to RDTSC */
185
186 do_cpuid(0, p);
187 v[0] = p[1];
188 v[1] = p[3];
189 v[2] = p[2];
190 amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 ||
191 memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0;
192
193 if (cpu_feature != 0) {
194 do_cpuid(0x80000000, p);
195 cpu_exthigh = p[0];
196 } else {
197 cpu_exthigh = 0;
198 }
199 if (cpu_exthigh >= 0x80000001) {
200 do_cpuid(0x80000001, p);
201 amd_feature = p[3];
202 } else {
203 amd_feature = 0;
204 }
205
206 if ((amd_feature & AMDID_RDTSCP) != 0)
207 return (3);
208 if ((cpu_feature & CPUID_SSE2) == 0)
209 return (2);
210 return (amd_cpu ? 1 : 0);
211 }
212
213 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low,
214 (const struct vdso_timehands *th))
215 {
216 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low);
217 }
218
219 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void))
220 {
221 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32);
222 }
223
224 DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void))
225 {
226 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc);
227 }
228
229 #define HPET_DEV_MAP_MAX 10
230 static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
231
232 static void
__vdso_init_hpet(uint32_t u)233 __vdso_init_hpet(uint32_t u)
234 {
235 static const char devprefix[] = "/dev/hpet";
236 char devname[64], *c, *c1, t;
237 volatile char *new_map, *old_map;
238 unsigned int mode;
239 uint32_t u1;
240 int fd;
241
242 c1 = c = stpcpy(devname, devprefix);
243 u1 = u;
244 do {
245 *c++ = u1 % 10 + '0';
246 u1 /= 10;
247 } while (u1 != 0);
248 *c = '\0';
249 for (c--; c1 != c; c1++, c--) {
250 t = *c1;
251 *c1 = *c;
252 *c = t;
253 }
254
255 old_map = hpet_dev_map[u];
256 if (old_map != NULL)
257 return;
258
259 /*
260 * Explicitely check for the capability mode to avoid
261 * triggering trap_enocap on the device open by absolute path.
262 */
263 if ((cap_getmode(&mode) == 0 && mode != 0) ||
264 (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) {
265 /* Prevent the caller from re-entering. */
266 atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
267 (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
268 return;
269 }
270
271 new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
272 _close(fd);
273 if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
274 (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
275 new_map != MAP_FAILED)
276 munmap((void *)new_map, PAGE_SIZE);
277 }
278
279 #ifdef WANT_HYPERV
280
281 #define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME
282
283 /*
284 * NOTE:
285 * We use 'NULL' for this variable to indicate that initialization
286 * is required. And if this variable is 'MAP_FAILED', then Hyper-V
287 * reference TSC can not be used, e.g. in misconfigured jail.
288 */
289 static struct hyperv_reftsc *hyperv_ref_tsc;
290
291 static void
__vdso_init_hyperv_tsc(void)292 __vdso_init_hyperv_tsc(void)
293 {
294 int fd;
295 unsigned int mode;
296
297 if (cap_getmode(&mode) == 0 && mode != 0)
298 goto fail;
299
300 fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC);
301 if (fd < 0)
302 goto fail;
303 hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
304 MAP_SHARED, fd, 0);
305 _close(fd);
306
307 return;
308 fail:
309 /* Prevent the caller from re-entering. */
310 hyperv_ref_tsc = MAP_FAILED;
311 }
312
313 static int
__vdso_hyperv_tsc(struct hyperv_reftsc * tsc_ref,u_int * tc)314 __vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
315 {
316 uint64_t disc, ret, tsc, scale;
317 uint32_t seq;
318 int64_t ofs;
319
320 while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
321 scale = tsc_ref->tsc_scale;
322 ofs = tsc_ref->tsc_ofs;
323
324 mfence(); /* XXXKIB */
325 tsc = rdtsc();
326
327 /* ret = ((tsc * scale) >> 64) + ofs */
328 __asm__ __volatile__ ("mulq %3" :
329 "=d" (ret), "=a" (disc) :
330 "a" (tsc), "r" (scale));
331 ret += ofs;
332
333 atomic_thread_fence_acq();
334 if (tsc_ref->tsc_seq == seq) {
335 *tc = ret;
336 return (0);
337 }
338
339 /* Sequence changed; re-sync. */
340 }
341 return (ENOSYS);
342 }
343
344 #endif /* WANT_HYPERV */
345
346 static struct pvclock_vcpu_time_info *pvclock_timeinfos;
347
348 static int
__vdso_pvclock_gettc(const struct vdso_timehands * th,u_int * tc)349 __vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc)
350 {
351 uint64_t delta, ns, tsc;
352 struct pvclock_vcpu_time_info *ti;
353 uint32_t cpuid_ti, cpuid_tsc, version;
354 bool stable;
355
356 do {
357 ti = &pvclock_timeinfos[0];
358 version = atomic_load_acq_32(&ti->version);
359 stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0;
360 if (stable) {
361 tsc = __vdso_gettc_rdtsc();
362 } else {
363 (void)rdtscp_aux(&cpuid_ti);
364 ti = &pvclock_timeinfos[cpuid_ti];
365 version = atomic_load_acq_32(&ti->version);
366 tsc = rdtscp_aux(&cpuid_tsc);
367 }
368 delta = tsc - ti->tsc_timestamp;
369 ns = ti->system_time + pvclock_scale_delta(delta,
370 ti->tsc_to_system_mul, ti->tsc_shift);
371 atomic_thread_fence_acq();
372 } while ((ti->version & 1) != 0 || ti->version != version ||
373 (!stable && cpuid_ti != cpuid_tsc));
374 *tc = MAX(ns, th->th_x86_pvc_last_systime);
375 return (0);
376 }
377
378 static void
__vdso_init_pvclock_timeinfos(void)379 __vdso_init_pvclock_timeinfos(void)
380 {
381 struct pvclock_vcpu_time_info *timeinfos;
382 size_t len;
383 int fd, ncpus;
384 unsigned int mode;
385
386 timeinfos = MAP_FAILED;
387 if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 ||
388 (cap_getmode(&mode) == 0 && mode != 0) ||
389 (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0)
390 goto leave;
391 len = ncpus * sizeof(*pvclock_timeinfos);
392 timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
393 _close(fd);
394 leave:
395 if (atomic_cmpset_rel_ptr(
396 (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL,
397 (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED)
398 (void)munmap((void *)timeinfos, len);
399 }
400
401 #pragma weak __vdso_gettc
402 int
__vdso_gettc(const struct vdso_timehands * th,u_int * tc)403 __vdso_gettc(const struct vdso_timehands *th, u_int *tc)
404 {
405 volatile char *map;
406 uint32_t idx;
407
408 switch (th->th_algo) {
409 case VDSO_TH_ALGO_X86_TSC:
410 *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
411 __vdso_gettc_rdtsc32();
412 return (0);
413 case VDSO_TH_ALGO_X86_HPET:
414 idx = th->th_x86_hpet_idx;
415 if (idx >= HPET_DEV_MAP_MAX)
416 return (ENOSYS);
417 map = (volatile char *)atomic_load_acq_ptr(
418 (volatile uintptr_t *)&hpet_dev_map[idx]);
419 if (map == NULL) {
420 __vdso_init_hpet(idx);
421 map = (volatile char *)atomic_load_acq_ptr(
422 (volatile uintptr_t *)&hpet_dev_map[idx]);
423 }
424 if (map == MAP_FAILED)
425 return (ENOSYS);
426 *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
427 return (0);
428 #ifdef WANT_HYPERV
429 case VDSO_TH_ALGO_X86_HVTSC:
430 if (hyperv_ref_tsc == NULL)
431 __vdso_init_hyperv_tsc();
432 if (hyperv_ref_tsc == MAP_FAILED)
433 return (ENOSYS);
434 return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
435 #endif
436 case VDSO_TH_ALGO_X86_PVCLK:
437 if (pvclock_timeinfos == NULL)
438 __vdso_init_pvclock_timeinfos();
439 if (pvclock_timeinfos == MAP_FAILED)
440 return (ENOSYS);
441 return (__vdso_pvclock_gettc(th, tc));
442 default:
443 return (ENOSYS);
444 }
445 }
446
447 #pragma weak __vdso_gettimekeep
448 int
__vdso_gettimekeep(struct vdso_timekeep ** tk)449 __vdso_gettimekeep(struct vdso_timekeep **tk)
450 {
451
452 return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));
453 }
454