xref: /freebsd/sys/x86/x86/tsc.c (revision d184218c18d067f8fd47203f54ab02a7b2ed9b11)
1 /*-
2  * Copyright (c) 1998-2003 Poul-Henning Kamp
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_compat.h"
31 #include "opt_clock.h"
32 
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/cpu.h>
36 #include <sys/limits.h>
37 #include <sys/malloc.h>
38 #include <sys/systm.h>
39 #include <sys/sysctl.h>
40 #include <sys/time.h>
41 #include <sys/timetc.h>
42 #include <sys/kernel.h>
43 #include <sys/power.h>
44 #include <sys/smp.h>
45 #include <sys/vdso.h>
46 #include <machine/clock.h>
47 #include <machine/cputypes.h>
48 #include <machine/md_var.h>
49 #include <machine/specialreg.h>
50 
51 #include "cpufreq_if.h"
52 
53 uint64_t	tsc_freq;
54 int		tsc_is_invariant;
55 int		tsc_perf_stat;
56 
57 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
58 
59 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
60     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
61 TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
62 
63 #ifdef SMP
64 int	smp_tsc;
65 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
66     "Indicates whether the TSC is safe to use in SMP mode");
67 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
68 #endif
69 
70 static int	tsc_shift = 1;
71 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
72     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
73 TUNABLE_INT("kern.timecounter.tsc_shift", &tsc_shift);
74 
75 static int	tsc_disabled;
76 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
77     "Disable x86 Time Stamp Counter");
78 TUNABLE_INT("machdep.disable_tsc", &tsc_disabled);
79 
80 static int	tsc_skip_calibration;
81 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
82     &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
83 TUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration);
84 
85 static void tsc_freq_changed(void *arg, const struct cf_level *level,
86     int status);
87 static void tsc_freq_changing(void *arg, const struct cf_level *level,
88     int *status);
89 static unsigned tsc_get_timecount(struct timecounter *tc);
90 static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
91 static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
92 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
93 static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
94 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
95 static void tsc_levels_changed(void *arg, int unit);
96 
97 static struct timecounter tsc_timecounter = {
98 	tsc_get_timecount,	/* get_timecount */
99 	0,			/* no poll_pps */
100 	~0u,			/* counter_mask */
101 	0,			/* frequency */
102 	"TSC",			/* name */
103 	800,			/* quality (adjusted in code) */
104 };
105 
106 #define	VMW_HVMAGIC		0x564d5868
107 #define	VMW_HVPORT		0x5658
108 #define	VMW_HVCMD_GETVERSION	10
109 #define	VMW_HVCMD_GETHZ		45
110 
111 static __inline void
112 vmware_hvcall(u_int cmd, u_int *p)
113 {
114 
115 	__asm __volatile("inl %w3, %0"
116 	: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
117 	: "0" (VMW_HVMAGIC), "1" (UINT_MAX), "2" (cmd), "3" (VMW_HVPORT)
118 	: "memory");
119 }
120 
121 static int
122 tsc_freq_vmware(void)
123 {
124 	char hv_sig[13];
125 	u_int regs[4];
126 	char *p;
127 	u_int hv_high;
128 	int i;
129 
130 	/*
131 	 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
132 	 * http://lkml.org/lkml/2008/10/1/246
133 	 *
134 	 * KB1009458: Mechanisms to determine if software is running in
135 	 * a VMware virtual machine
136 	 * http://kb.vmware.com/kb/1009458
137 	 */
138 	hv_high = 0;
139 	if ((cpu_feature2 & CPUID2_HV) != 0) {
140 		do_cpuid(0x40000000, regs);
141 		hv_high = regs[0];
142 		for (i = 1, p = hv_sig; i < 4; i++, p += sizeof(regs) / 4)
143 			memcpy(p, &regs[i], sizeof(regs[i]));
144 		*p = '\0';
145 		if (bootverbose) {
146 			/*
147 			 * HV vendor	ID string
148 			 * ------------+--------------
149 			 * KVM		"KVMKVMKVM"
150 			 * Microsoft	"Microsoft Hv"
151 			 * VMware	"VMwareVMware"
152 			 * Xen		"XenVMMXenVMM"
153 			 */
154 			printf("Hypervisor: Origin = \"%s\"\n", hv_sig);
155 		}
156 		if (strncmp(hv_sig, "VMwareVMware", 12) != 0)
157 			return (0);
158 	} else {
159 		p = getenv("smbios.system.serial");
160 		if (p == NULL)
161 			return (0);
162 		if (strncmp(p, "VMware-", 7) != 0 &&
163 		    strncmp(p, "VMW", 3) != 0) {
164 			freeenv(p);
165 			return (0);
166 		}
167 		freeenv(p);
168 		vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
169 		if (regs[1] != VMW_HVMAGIC)
170 			return (0);
171 	}
172 	if (hv_high >= 0x40000010) {
173 		do_cpuid(0x40000010, regs);
174 		tsc_freq = regs[0] * 1000;
175 	} else {
176 		vmware_hvcall(VMW_HVCMD_GETHZ, regs);
177 		if (regs[1] != UINT_MAX)
178 			tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
179 	}
180 	tsc_is_invariant = 1;
181 	return (1);
182 }
183 
184 static void
185 tsc_freq_intel(void)
186 {
187 	char brand[48];
188 	u_int regs[4];
189 	uint64_t freq;
190 	char *p;
191 	u_int i;
192 
193 	/*
194 	 * Intel Processor Identification and the CPUID Instruction
195 	 * Application Note 485.
196 	 * http://www.intel.com/assets/pdf/appnote/241618.pdf
197 	 */
198 	if (cpu_exthigh >= 0x80000004) {
199 		p = brand;
200 		for (i = 0x80000002; i < 0x80000005; i++) {
201 			do_cpuid(i, regs);
202 			memcpy(p, regs, sizeof(regs));
203 			p += sizeof(regs);
204 		}
205 		p = NULL;
206 		for (i = 0; i < sizeof(brand) - 1; i++)
207 			if (brand[i] == 'H' && brand[i + 1] == 'z')
208 				p = brand + i;
209 		if (p != NULL) {
210 			p -= 5;
211 			switch (p[4]) {
212 			case 'M':
213 				i = 1;
214 				break;
215 			case 'G':
216 				i = 1000;
217 				break;
218 			case 'T':
219 				i = 1000000;
220 				break;
221 			default:
222 				return;
223 			}
224 #define	C2D(c)	((c) - '0')
225 			if (p[1] == '.') {
226 				freq = C2D(p[0]) * 1000;
227 				freq += C2D(p[2]) * 100;
228 				freq += C2D(p[3]) * 10;
229 				freq *= i * 1000;
230 			} else {
231 				freq = C2D(p[0]) * 1000;
232 				freq += C2D(p[1]) * 100;
233 				freq += C2D(p[2]) * 10;
234 				freq += C2D(p[3]);
235 				freq *= i * 1000000;
236 			}
237 #undef C2D
238 			tsc_freq = freq;
239 		}
240 	}
241 }
242 
243 static void
244 probe_tsc_freq(void)
245 {
246 	u_int regs[4];
247 	uint64_t tsc1, tsc2;
248 
249 	if (cpu_high >= 6) {
250 		do_cpuid(6, regs);
251 		if ((regs[2] & CPUID_PERF_STAT) != 0) {
252 			/*
253 			 * XXX Some emulators expose host CPUID without actual
254 			 * support for these MSRs.  We must test whether they
255 			 * really work.
256 			 */
257 			wrmsr(MSR_MPERF, 0);
258 			wrmsr(MSR_APERF, 0);
259 			DELAY(10);
260 			if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
261 				tsc_perf_stat = 1;
262 		}
263 	}
264 
265 	if (tsc_freq_vmware())
266 		return;
267 
268 	switch (cpu_vendor_id) {
269 	case CPU_VENDOR_AMD:
270 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
271 		    (vm_guest == VM_GUEST_NO &&
272 		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
273 			tsc_is_invariant = 1;
274 		if (cpu_feature & CPUID_SSE2) {
275 			tsc_timecounter.tc_get_timecount =
276 			    tsc_get_timecount_mfence;
277 		}
278 		break;
279 	case CPU_VENDOR_INTEL:
280 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
281 		    (vm_guest == VM_GUEST_NO &&
282 		    ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
283 		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
284 		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
285 		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
286 			tsc_is_invariant = 1;
287 		if (cpu_feature & CPUID_SSE2) {
288 			tsc_timecounter.tc_get_timecount =
289 			    tsc_get_timecount_lfence;
290 		}
291 		break;
292 	case CPU_VENDOR_CENTAUR:
293 		if (vm_guest == VM_GUEST_NO &&
294 		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
295 		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
296 		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
297 			tsc_is_invariant = 1;
298 		if (cpu_feature & CPUID_SSE2) {
299 			tsc_timecounter.tc_get_timecount =
300 			    tsc_get_timecount_lfence;
301 		}
302 		break;
303 	}
304 
305 	if (tsc_skip_calibration) {
306 		if (cpu_vendor_id == CPU_VENDOR_INTEL)
307 			tsc_freq_intel();
308 		return;
309 	}
310 
311 	if (bootverbose)
312 	        printf("Calibrating TSC clock ... ");
313 	tsc1 = rdtsc();
314 	DELAY(1000000);
315 	tsc2 = rdtsc();
316 	tsc_freq = tsc2 - tsc1;
317 	if (bootverbose)
318 		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
319 }
320 
321 void
322 init_TSC(void)
323 {
324 
325 	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
326 		return;
327 
328 	probe_tsc_freq();
329 
330 	/*
331 	 * Inform CPU accounting about our boot-time clock rate.  This will
332 	 * be updated if someone loads a cpufreq driver after boot that
333 	 * discovers a new max frequency.
334 	 */
335 	if (tsc_freq != 0)
336 		set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
337 
338 	if (tsc_is_invariant)
339 		return;
340 
341 	/* Register to find out about changes in CPU frequency. */
342 	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
343 	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
344 	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
345 	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
346 	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
347 	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
348 }
349 
350 #ifdef SMP
351 
352 /*
353  * RDTSC is not a serializing instruction, and does not drain
354  * instruction stream, so we need to drain the stream before executing
355  * it.  It could be fixed by use of RDTSCP, except the instruction is
356  * not available everywhere.
357  *
358  * Use CPUID for draining in the boot-time SMP constistency test.  The
359  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
360  * and VIA) when SSE2 is present, and nothing on older machines which
361  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
362  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
363  *
364  * Do not use do_cpuid(), since we do not need CPUID results, which
365  * have to be written into memory with do_cpuid().
366  */
367 #define	TSC_READ(x)							\
368 static void								\
369 tsc_read_##x(void *arg)							\
370 {									\
371 	uint64_t *tsc = arg;						\
372 	u_int cpu = PCPU_GET(cpuid);					\
373 									\
374 	__asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");	\
375 	tsc[cpu * 3 + x] = rdtsc();					\
376 }
377 TSC_READ(0)
378 TSC_READ(1)
379 TSC_READ(2)
380 #undef TSC_READ
381 
382 #define	N	1000
383 
384 static void
385 comp_smp_tsc(void *arg)
386 {
387 	uint64_t *tsc;
388 	int64_t d1, d2;
389 	u_int cpu = PCPU_GET(cpuid);
390 	u_int i, j, size;
391 
392 	size = (mp_maxid + 1) * 3;
393 	for (i = 0, tsc = arg; i < N; i++, tsc += size)
394 		CPU_FOREACH(j) {
395 			if (j == cpu)
396 				continue;
397 			d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
398 			d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
399 			if (d1 <= 0 || d2 <= 0) {
400 				smp_tsc = 0;
401 				return;
402 			}
403 		}
404 }
405 
406 static int
407 test_tsc(void)
408 {
409 	uint64_t *data, *tsc;
410 	u_int i, size;
411 
412 	if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
413 		return (-100);
414 	size = (mp_maxid + 1) * 3;
415 	data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
416 	for (i = 0, tsc = data; i < N; i++, tsc += size)
417 		smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
418 	smp_tsc = 1;	/* XXX */
419 	smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc,
420 	    smp_no_rendevous_barrier, data);
421 	free(data, M_TEMP);
422 	if (bootverbose)
423 		printf("SMP: %sed TSC synchronization test\n",
424 		    smp_tsc ? "pass" : "fail");
425 	if (smp_tsc && tsc_is_invariant) {
426 		switch (cpu_vendor_id) {
427 		case CPU_VENDOR_AMD:
428 			/*
429 			 * Starting with Family 15h processors, TSC clock
430 			 * source is in the north bridge.  Check whether
431 			 * we have a single-socket/multi-core platform.
432 			 * XXX Need more work for complex cases.
433 			 */
434 			if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
435 			    (amd_feature2 & AMDID2_CMP) == 0 ||
436 			    smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
437 				break;
438 			return (1000);
439 		case CPU_VENDOR_INTEL:
440 			/*
441 			 * XXX Assume Intel platforms have synchronized TSCs.
442 			 */
443 			return (1000);
444 		}
445 		return (800);
446 	}
447 	return (-100);
448 }
449 
450 #undef N
451 
452 #else
453 
454 /*
455  * The function is not called, it is provided to avoid linking failure
456  * on uniprocessor kernel.
457  */
458 static int
459 test_tsc(void)
460 {
461 
462 	return (0);
463 }
464 
465 #endif /* SMP */
466 
467 static void
468 init_TSC_tc(void)
469 {
470 	uint64_t max_freq;
471 	int shift;
472 
473 	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
474 		return;
475 
476 	/*
477 	 * Limit timecounter frequency to fit in an int and prevent it from
478 	 * overflowing too fast.
479 	 */
480 	max_freq = UINT_MAX;
481 
482 	/*
483 	 * We can not use the TSC if we support APM.  Precise timekeeping
484 	 * on an APM'ed machine is at best a fools pursuit, since
485 	 * any and all of the time spent in various SMM code can't
486 	 * be reliably accounted for.  Reading the RTC is your only
487 	 * source of reliable time info.  The i8254 loses too, of course,
488 	 * but we need to have some kind of time...
489 	 * We don't know at this point whether APM is going to be used
490 	 * or not, nor when it might be activated.  Play it safe.
491 	 */
492 	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
493 		tsc_timecounter.tc_quality = -1000;
494 		if (bootverbose)
495 			printf("TSC timecounter disabled: APM enabled.\n");
496 		goto init;
497 	}
498 
499 	/*
500 	 * We cannot use the TSC if it stops incrementing in deep sleep.
501 	 * Currently only Intel CPUs are known for this problem unless
502 	 * the invariant TSC bit is set.
503 	 */
504 	if (cpu_can_deep_sleep && cpu_vendor_id == CPU_VENDOR_INTEL &&
505 	    (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
506 		tsc_timecounter.tc_quality = -1000;
507 		tsc_timecounter.tc_flags |= TC_FLAGS_C3STOP;
508 		if (bootverbose)
509 			printf("TSC timecounter disabled: C3 enabled.\n");
510 		goto init;
511 	}
512 
513 	/*
514 	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
515 	 * are synchronized.  If the user is sure that the system has
516 	 * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
517 	 * non-zero value.  The TSC seems unreliable in virtualized SMP
518 	 * environments, so it is set to a negative quality in those cases.
519 	 */
520 	if (mp_ncpus > 1)
521 		tsc_timecounter.tc_quality = test_tsc();
522 	else if (tsc_is_invariant)
523 		tsc_timecounter.tc_quality = 1000;
524 	max_freq >>= tsc_shift;
525 
526 init:
527 	for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
528 		;
529 	if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
530 		if (cpu_vendor_id == CPU_VENDOR_AMD) {
531 			tsc_timecounter.tc_get_timecount = shift > 0 ?
532 			    tsc_get_timecount_low_mfence :
533 			    tsc_get_timecount_mfence;
534 		} else {
535 			tsc_timecounter.tc_get_timecount = shift > 0 ?
536 			    tsc_get_timecount_low_lfence :
537 			    tsc_get_timecount_lfence;
538 		}
539 	} else {
540 		tsc_timecounter.tc_get_timecount = shift > 0 ?
541 		    tsc_get_timecount_low : tsc_get_timecount;
542 	}
543 	if (shift > 0) {
544 		tsc_timecounter.tc_name = "TSC-low";
545 		if (bootverbose)
546 			printf("TSC timecounter discards lower %d bit(s)\n",
547 			    shift);
548 	}
549 	if (tsc_freq != 0) {
550 		tsc_timecounter.tc_frequency = tsc_freq >> shift;
551 		tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
552 		tc_init(&tsc_timecounter);
553 	}
554 }
555 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
556 
557 /*
558  * When cpufreq levels change, find out about the (new) max frequency.  We
559  * use this to update CPU accounting in case it got a lower estimate at boot.
560  */
561 static void
562 tsc_levels_changed(void *arg, int unit)
563 {
564 	device_t cf_dev;
565 	struct cf_level *levels;
566 	int count, error;
567 	uint64_t max_freq;
568 
569 	/* Only use values from the first CPU, assuming all are equal. */
570 	if (unit != 0)
571 		return;
572 
573 	/* Find the appropriate cpufreq device instance. */
574 	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
575 	if (cf_dev == NULL) {
576 		printf("tsc_levels_changed() called but no cpufreq device?\n");
577 		return;
578 	}
579 
580 	/* Get settings from the device and find the max frequency. */
581 	count = 64;
582 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
583 	if (levels == NULL)
584 		return;
585 	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
586 	if (error == 0 && count != 0) {
587 		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
588 		set_cputicker(rdtsc, max_freq, 1);
589 	} else
590 		printf("tsc_levels_changed: no max freq found\n");
591 	free(levels, M_TEMP);
592 }
593 
594 /*
595  * If the TSC timecounter is in use, veto the pending change.  It may be
596  * possible in the future to handle a dynamically-changing timecounter rate.
597  */
598 static void
599 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
600 {
601 
602 	if (*status != 0 || timecounter != &tsc_timecounter)
603 		return;
604 
605 	printf("timecounter TSC must not be in use when "
606 	    "changing frequencies; change denied\n");
607 	*status = EBUSY;
608 }
609 
610 /* Update TSC freq with the value indicated by the caller. */
611 static void
612 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
613 {
614 	uint64_t freq;
615 
616 	/* If there was an error during the transition, don't do anything. */
617 	if (tsc_disabled || status != 0)
618 		return;
619 
620 	/* Total setting for this level gives the new frequency in MHz. */
621 	freq = (uint64_t)level->total_set.freq * 1000000;
622 	atomic_store_rel_64(&tsc_freq, freq);
623 	tsc_timecounter.tc_frequency =
624 	    freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
625 }
626 
627 static int
628 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
629 {
630 	int error;
631 	uint64_t freq;
632 
633 	freq = atomic_load_acq_64(&tsc_freq);
634 	if (freq == 0)
635 		return (EOPNOTSUPP);
636 	error = sysctl_handle_64(oidp, &freq, 0, req);
637 	if (error == 0 && req->newptr != NULL) {
638 		atomic_store_rel_64(&tsc_freq, freq);
639 		atomic_store_rel_64(&tsc_timecounter.tc_frequency,
640 		    freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
641 	}
642 	return (error);
643 }
644 
645 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
646     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
647 
648 static u_int
649 tsc_get_timecount(struct timecounter *tc __unused)
650 {
651 
652 	return (rdtsc32());
653 }
654 
655 static inline u_int
656 tsc_get_timecount_low(struct timecounter *tc)
657 {
658 	uint32_t rv;
659 
660 	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
661 	    : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
662 	return (rv);
663 }
664 
665 static u_int
666 tsc_get_timecount_lfence(struct timecounter *tc __unused)
667 {
668 
669 	lfence();
670 	return (rdtsc32());
671 }
672 
673 static u_int
674 tsc_get_timecount_low_lfence(struct timecounter *tc)
675 {
676 
677 	lfence();
678 	return (tsc_get_timecount_low(tc));
679 }
680 
681 static u_int
682 tsc_get_timecount_mfence(struct timecounter *tc __unused)
683 {
684 
685 	mfence();
686 	return (rdtsc32());
687 }
688 
689 static u_int
690 tsc_get_timecount_low_mfence(struct timecounter *tc)
691 {
692 
693 	mfence();
694 	return (tsc_get_timecount_low(tc));
695 }
696 
697 uint32_t
698 cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th)
699 {
700 
701 	vdso_th->th_x86_shift = (int)(intptr_t)timecounter->tc_priv;
702 	bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
703 	return (timecounter == &tsc_timecounter);
704 }
705 
706 #ifdef COMPAT_FREEBSD32
707 uint32_t
708 cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
709 {
710 
711 	vdso_th32->th_x86_shift = (int)(intptr_t)timecounter->tc_priv;
712 	bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
713 	return (timecounter == &tsc_timecounter);
714 }
715 #endif
716