xref: /freebsd/lib/libsys/x86/__vdso_gettc.c (revision a91a246563dffa876a52f53a98de4af9fa364c52)
1 /*-
2  * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
3  * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
4  * All rights reserved.
5  *
6  * Portions of this software were developed by Konstantin Belousov
7  * under sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include "namespace.h"
33 #include <sys/capsicum.h>
34 #include <sys/elf.h>
35 #include <sys/fcntl.h>
36 #include <sys/mman.h>
37 #include <sys/time.h>
38 #include <sys/vdso.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include "un-namespace.h"
43 #include <machine/atomic.h>
44 #include <machine/cpufunc.h>
45 #include <machine/pvclock.h>
46 #include <machine/specialreg.h>
47 #include <dev/acpica/acpi_hpet.h>
48 #ifdef WANT_HYPERV
49 #include <dev/hyperv/hyperv.h>
50 #endif
51 #include <x86/ifunc.h>
52 #include "libc_private.h"
53 
54 static inline u_int
55 rdtsc_low(const struct vdso_timehands *th)
56 {
57 	u_int rv;
58 
59 	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
60 	    : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
61 	return (rv);
62 }
63 
64 static inline u_int
65 rdtscp_low(const struct vdso_timehands *th)
66 {
67 	u_int rv;
68 
69 	__asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0"
70 	    : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx");
71 	return (rv);
72 }
73 
74 static u_int
75 rdtsc_low_mb_lfence(const struct vdso_timehands *th)
76 {
77 	lfence();
78 	return (rdtsc_low(th));
79 }
80 
81 static u_int
82 rdtsc_low_mb_mfence(const struct vdso_timehands *th)
83 {
84 	mfence();
85 	return (rdtsc_low(th));
86 }
87 
88 static u_int
89 rdtsc_low_mb_none(const struct vdso_timehands *th)
90 {
91 	return (rdtsc_low(th));
92 }
93 
94 static u_int
95 rdtsc32_mb_lfence(void)
96 {
97 	lfence();
98 	return (rdtsc32());
99 }
100 
101 static uint64_t
102 rdtsc_mb_lfence(void)
103 {
104 	lfence();
105 	return (rdtsc());
106 }
107 
108 static u_int
109 rdtsc32_mb_mfence(void)
110 {
111 	mfence();
112 	return (rdtsc32());
113 }
114 
115 static uint64_t
116 rdtsc_mb_mfence(void)
117 {
118 	mfence();
119 	return (rdtsc());
120 }
121 
122 static u_int
123 rdtsc32_mb_none(void)
124 {
125 	return (rdtsc32());
126 }
127 
128 static uint64_t
129 rdtsc_mb_none(void)
130 {
131 	return (rdtsc());
132 }
133 
134 static u_int
135 rdtscp32_(void)
136 {
137 	return (rdtscp32());
138 }
139 
140 static uint64_t
141 rdtscp_(void)
142 {
143 	return (rdtscp());
144 }
145 
146 struct tsc_selector_tag {
147 	u_int (*ts_rdtsc32)(void);
148 	uint64_t (*ts_rdtsc)(void);
149 	u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
150 };
151 
152 static const struct tsc_selector_tag tsc_selector[] = {
153 	[0] = {				/* Intel, LFENCE */
154 		.ts_rdtsc32 =	rdtsc32_mb_lfence,
155 		.ts_rdtsc =	rdtsc_mb_lfence,
156 		.ts_rdtsc_low =	rdtsc_low_mb_lfence,
157 	},
158 	[1] = {				/* AMD, MFENCE */
159 		.ts_rdtsc32 =	rdtsc32_mb_mfence,
160 		.ts_rdtsc =	rdtsc_mb_mfence,
161 		.ts_rdtsc_low =	rdtsc_low_mb_mfence,
162 	},
163 	[2] = {				/* No SSE2 */
164 		.ts_rdtsc32 =	rdtsc32_mb_none,
165 		.ts_rdtsc =	rdtsc_mb_none,
166 		.ts_rdtsc_low =	rdtsc_low_mb_none,
167 	},
168 	[3] = {				/* RDTSCP */
169 		.ts_rdtsc32 =	rdtscp32_,
170 		.ts_rdtsc =	rdtscp_,
171 		.ts_rdtsc_low =	rdtscp_low,
172 	},
173 };
174 
175 static int
176 tsc_selector_idx(u_int cpu_feature)
177 {
178 	u_int amd_feature, cpu_exthigh, p[4], v[3];
179 	static const char amd_id[] = "AuthenticAMD";
180 	static const char hygon_id[] = "HygonGenuine";
181 	bool amd_cpu;
182 
183 	if (cpu_feature == 0)
184 		return (2);	/* should not happen due to RDTSC */
185 
186 	do_cpuid(0, p);
187 	v[0] = p[1];
188 	v[1] = p[3];
189 	v[2] = p[2];
190 	amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 ||
191 	    memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0;
192 
193 	if (cpu_feature != 0) {
194 		do_cpuid(0x80000000, p);
195 		cpu_exthigh = p[0];
196 	} else {
197 		cpu_exthigh = 0;
198 	}
199 	if (cpu_exthigh >= 0x80000001) {
200 		do_cpuid(0x80000001, p);
201 		amd_feature = p[3];
202 	} else {
203 		amd_feature = 0;
204 	}
205 
206 	if ((amd_feature & AMDID_RDTSCP) != 0)
207 		return (3);
208 	if ((cpu_feature & CPUID_SSE2) == 0)
209 		return (2);
210 	return (amd_cpu ? 1 : 0);
211 }
212 
213 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low,
214     (const struct vdso_timehands *th))
215 {
216 	return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low);
217 }
218 
219 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void))
220 {
221 	return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32);
222 }
223 
224 DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void))
225 {
226 	return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc);
227 }
228 
229 #define	HPET_DEV_MAP_MAX	10
230 static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
231 
232 static void
233 __vdso_init_hpet(uint32_t u)
234 {
235 	static const char devprefix[] = "/dev/hpet";
236 	char devname[64], *c, *c1, t;
237 	volatile char *new_map, *old_map;
238 	unsigned int mode;
239 	uint32_t u1;
240 	int fd;
241 
242 	c1 = c = stpcpy(devname, devprefix);
243 	u1 = u;
244 	do {
245 		*c++ = u1 % 10 + '0';
246 		u1 /= 10;
247 	} while (u1 != 0);
248 	*c = '\0';
249 	for (c--; c1 != c; c1++, c--) {
250 		t = *c1;
251 		*c1 = *c;
252 		*c = t;
253 	}
254 
255 	old_map = hpet_dev_map[u];
256 	if (old_map != NULL)
257 		return;
258 
259 	/*
260 	 * Explicitely check for the capability mode to avoid
261 	 * triggering trap_enocap on the device open by absolute path.
262 	 */
263 	if ((cap_getmode(&mode) == 0 && mode != 0) ||
264 	    (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) {
265 		/* Prevent the caller from re-entering. */
266 		atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
267 		    (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
268 		return;
269 	}
270 
271 	new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
272 	_close(fd);
273 	if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
274 	    (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
275 	    new_map != MAP_FAILED)
276 		munmap((void *)new_map, PAGE_SIZE);
277 }
278 
279 #ifdef WANT_HYPERV
280 
281 #define HYPERV_REFTSC_DEVPATH	"/dev/" HYPERV_REFTSC_DEVNAME
282 
283 /*
284  * NOTE:
285  * We use 'NULL' for this variable to indicate that initialization
286  * is required.  And if this variable is 'MAP_FAILED', then Hyper-V
287  * reference TSC can not be used, e.g. in misconfigured jail.
288  */
289 static struct hyperv_reftsc *hyperv_ref_tsc;
290 
291 static void
292 __vdso_init_hyperv_tsc(void)
293 {
294 	int fd;
295 	unsigned int mode;
296 
297 	if (cap_getmode(&mode) == 0 && mode != 0)
298 		goto fail;
299 
300 	fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC);
301 	if (fd < 0)
302 		goto fail;
303 	hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
304 	    MAP_SHARED, fd, 0);
305 	_close(fd);
306 
307 	return;
308 fail:
309 	/* Prevent the caller from re-entering. */
310 	hyperv_ref_tsc = MAP_FAILED;
311 }
312 
313 static int
314 __vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
315 {
316 	uint64_t disc, ret, tsc, scale;
317 	uint32_t seq;
318 	int64_t ofs;
319 
320 	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
321 		scale = tsc_ref->tsc_scale;
322 		ofs = tsc_ref->tsc_ofs;
323 
324 		mfence();	/* XXXKIB */
325 		tsc = rdtsc();
326 
327 		/* ret = ((tsc * scale) >> 64) + ofs */
328 		__asm__ __volatile__ ("mulq %3" :
329 		    "=d" (ret), "=a" (disc) :
330 		    "a" (tsc), "r" (scale));
331 		ret += ofs;
332 
333 		atomic_thread_fence_acq();
334 		if (tsc_ref->tsc_seq == seq) {
335 			*tc = ret;
336 			return (0);
337 		}
338 
339 		/* Sequence changed; re-sync. */
340 	}
341 	return (ENOSYS);
342 }
343 
344 #endif	/* WANT_HYPERV */
345 
346 static struct pvclock_vcpu_time_info *pvclock_timeinfos;
347 
348 static int
349 __vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc)
350 {
351 	uint64_t delta, ns, tsc;
352 	struct pvclock_vcpu_time_info *ti;
353 	uint32_t cpuid_ti, cpuid_tsc, version;
354 	bool stable;
355 
356 	do {
357 		ti = &pvclock_timeinfos[0];
358 		version = atomic_load_acq_32(&ti->version);
359 		stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0;
360 		if (stable) {
361 			tsc = __vdso_gettc_rdtsc();
362 		} else {
363 			(void)rdtscp_aux(&cpuid_ti);
364 			ti = &pvclock_timeinfos[cpuid_ti];
365 			version = atomic_load_acq_32(&ti->version);
366 			tsc = rdtscp_aux(&cpuid_tsc);
367 		}
368 		delta = tsc - ti->tsc_timestamp;
369 		ns = ti->system_time + pvclock_scale_delta(delta,
370 		    ti->tsc_to_system_mul, ti->tsc_shift);
371 		atomic_thread_fence_acq();
372 	} while ((ti->version & 1) != 0 || ti->version != version ||
373 	    (!stable && cpuid_ti != cpuid_tsc));
374 	*tc = MAX(ns, th->th_x86_pvc_last_systime);
375 	return (0);
376 }
377 
378 static void
379 __vdso_init_pvclock_timeinfos(void)
380 {
381 	struct pvclock_vcpu_time_info *timeinfos;
382 	size_t len;
383 	int fd, ncpus;
384 	unsigned int mode;
385 
386 	timeinfos = MAP_FAILED;
387 	if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 ||
388 	    (cap_getmode(&mode) == 0 && mode != 0) ||
389 	    (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0)
390 		goto leave;
391 	len = ncpus * sizeof(*pvclock_timeinfos);
392 	timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
393 	_close(fd);
394 leave:
395 	if (atomic_cmpset_rel_ptr(
396 	    (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL,
397 	    (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED)
398 		(void)munmap((void *)timeinfos, len);
399 }
400 
401 #pragma weak __vdso_gettc
402 int
403 __vdso_gettc(const struct vdso_timehands *th, u_int *tc)
404 {
405 	volatile char *map;
406 	uint32_t idx;
407 
408 	switch (th->th_algo) {
409 	case VDSO_TH_ALGO_X86_TSC:
410 		*tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
411 		    __vdso_gettc_rdtsc32();
412 		return (0);
413 	case VDSO_TH_ALGO_X86_HPET:
414 		idx = th->th_x86_hpet_idx;
415 		if (idx >= HPET_DEV_MAP_MAX)
416 			return (ENOSYS);
417 		map = (volatile char *)atomic_load_acq_ptr(
418 		    (volatile uintptr_t *)&hpet_dev_map[idx]);
419 		if (map == NULL) {
420 			__vdso_init_hpet(idx);
421 			map = (volatile char *)atomic_load_acq_ptr(
422 			    (volatile uintptr_t *)&hpet_dev_map[idx]);
423 		}
424 		if (map == MAP_FAILED)
425 			return (ENOSYS);
426 		*tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
427 		return (0);
428 #ifdef WANT_HYPERV
429 	case VDSO_TH_ALGO_X86_HVTSC:
430 		if (hyperv_ref_tsc == NULL)
431 			__vdso_init_hyperv_tsc();
432 		if (hyperv_ref_tsc == MAP_FAILED)
433 			return (ENOSYS);
434 		return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
435 #endif
436 	case VDSO_TH_ALGO_X86_PVCLK:
437 		if (pvclock_timeinfos == NULL)
438 			__vdso_init_pvclock_timeinfos();
439 		if (pvclock_timeinfos == MAP_FAILED)
440 			return (ENOSYS);
441 		return (__vdso_pvclock_gettc(th, tc));
442 	default:
443 		return (ENOSYS);
444 	}
445 }
446 
447 #pragma weak __vdso_gettimekeep
448 int
449 __vdso_gettimekeep(struct vdso_timekeep **tk)
450 {
451 
452 	return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));
453 }
454