1dd7d207dSJung-uk Kim /*- 2dd7d207dSJung-uk Kim * Copyright (c) 1998-2003 Poul-Henning Kamp 3dd7d207dSJung-uk Kim * All rights reserved. 4dd7d207dSJung-uk Kim * 5dd7d207dSJung-uk Kim * Redistribution and use in source and binary forms, with or without 6dd7d207dSJung-uk Kim * modification, are permitted provided that the following conditions 7dd7d207dSJung-uk Kim * are met: 8dd7d207dSJung-uk Kim * 1. Redistributions of source code must retain the above copyright 9dd7d207dSJung-uk Kim * notice, this list of conditions and the following disclaimer. 10dd7d207dSJung-uk Kim * 2. Redistributions in binary form must reproduce the above copyright 11dd7d207dSJung-uk Kim * notice, this list of conditions and the following disclaimer in the 12dd7d207dSJung-uk Kim * documentation and/or other materials provided with the distribution. 13dd7d207dSJung-uk Kim * 14dd7d207dSJung-uk Kim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15dd7d207dSJung-uk Kim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16dd7d207dSJung-uk Kim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17dd7d207dSJung-uk Kim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18dd7d207dSJung-uk Kim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19dd7d207dSJung-uk Kim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20dd7d207dSJung-uk Kim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21dd7d207dSJung-uk Kim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22dd7d207dSJung-uk Kim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23dd7d207dSJung-uk Kim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24dd7d207dSJung-uk Kim * SUCH DAMAGE. 25dd7d207dSJung-uk Kim */ 26dd7d207dSJung-uk Kim 27dd7d207dSJung-uk Kim #include <sys/cdefs.h> 28dd7d207dSJung-uk Kim __FBSDID("$FreeBSD$"); 29dd7d207dSJung-uk Kim 30dd7d207dSJung-uk Kim #include "opt_clock.h" 31dd7d207dSJung-uk Kim 32dd7d207dSJung-uk Kim #include <sys/param.h> 33dd7d207dSJung-uk Kim #include <sys/bus.h> 34dd7d207dSJung-uk Kim #include <sys/cpu.h> 355da5812bSJung-uk Kim #include <sys/limits.h> 36dd7d207dSJung-uk Kim #include <sys/malloc.h> 37dd7d207dSJung-uk Kim #include <sys/systm.h> 38dd7d207dSJung-uk Kim #include <sys/sysctl.h> 39dd7d207dSJung-uk Kim #include <sys/time.h> 40dd7d207dSJung-uk Kim #include <sys/timetc.h> 41dd7d207dSJung-uk Kim #include <sys/kernel.h> 42dd7d207dSJung-uk Kim #include <sys/power.h> 43dd7d207dSJung-uk Kim #include <sys/smp.h> 44dd7d207dSJung-uk Kim #include <machine/clock.h> 45dd7d207dSJung-uk Kim #include <machine/cputypes.h> 46dd7d207dSJung-uk Kim #include <machine/md_var.h> 47dd7d207dSJung-uk Kim #include <machine/specialreg.h> 48dd7d207dSJung-uk Kim 49dd7d207dSJung-uk Kim #include "cpufreq_if.h" 50dd7d207dSJung-uk Kim 51dd7d207dSJung-uk Kim uint64_t tsc_freq; 52dd7d207dSJung-uk Kim int tsc_is_invariant; 53155094d7SJung-uk Kim int tsc_perf_stat; 54155094d7SJung-uk Kim 55dd7d207dSJung-uk Kim static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; 56dd7d207dSJung-uk Kim 57dd7d207dSJung-uk Kim SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, 58dd7d207dSJung-uk Kim &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); 59dd7d207dSJung-uk Kim TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant); 60dd7d207dSJung-uk Kim 61dd7d207dSJung-uk Kim #ifdef SMP 62dd7d207dSJung-uk Kim static int smp_tsc; 63dd7d207dSJung-uk Kim SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, 64dd7d207dSJung-uk Kim "Indicates whether the TSC is safe to use in SMP mode"); 65dd7d207dSJung-uk Kim TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); 66dd7d207dSJung-uk Kim #endif 67dd7d207dSJung-uk Kim 6879422085SJung-uk Kim static int tsc_disabled; 6979422085SJung-uk Kim SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0, 7079422085SJung-uk Kim "Disable x86 Time Stamp Counter"); 7179422085SJung-uk Kim TUNABLE_INT("machdep.disable_tsc", &tsc_disabled); 7279422085SJung-uk Kim 73a4e4127fSJung-uk Kim static int tsc_skip_calibration; 74a4e4127fSJung-uk Kim SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN, 75a4e4127fSJung-uk Kim &tsc_skip_calibration, 0, "Disable TSC frequency calibration"); 76a4e4127fSJung-uk Kim TUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration); 77a4e4127fSJung-uk Kim 78dd7d207dSJung-uk Kim static void tsc_freq_changed(void *arg, const struct cf_level *level, 79dd7d207dSJung-uk Kim int status); 80dd7d207dSJung-uk Kim static void tsc_freq_changing(void *arg, const struct cf_level *level, 81dd7d207dSJung-uk Kim int *status); 82dd7d207dSJung-uk Kim static unsigned tsc_get_timecount(struct timecounter *tc); 83bc8e4ad2SJung-uk Kim static unsigned tsc_get_timecount_low(struct timecounter *tc); 84dd7d207dSJung-uk Kim static void tsc_levels_changed(void *arg, int unit); 85dd7d207dSJung-uk Kim 86dd7d207dSJung-uk Kim static struct timecounter tsc_timecounter = { 87dd7d207dSJung-uk Kim tsc_get_timecount, /* get_timecount */ 88dd7d207dSJung-uk Kim 0, /* no poll_pps */ 89dd7d207dSJung-uk Kim ~0u, /* counter_mask */ 90dd7d207dSJung-uk Kim 0, /* frequency */ 91dd7d207dSJung-uk Kim "TSC", /* name */ 92dd7d207dSJung-uk Kim 800, /* quality (adjusted in code) */ 93dd7d207dSJung-uk Kim }; 94dd7d207dSJung-uk Kim 955da5812bSJung-uk Kim #define VMW_HVMAGIC 0x564d5868 965da5812bSJung-uk Kim #define VMW_HVPORT 0x5658 975da5812bSJung-uk Kim #define VMW_HVCMD_GETVERSION 10 985da5812bSJung-uk Kim #define VMW_HVCMD_GETHZ 45 995da5812bSJung-uk Kim 1005da5812bSJung-uk Kim static __inline void 1015da5812bSJung-uk Kim vmware_hvcall(u_int cmd, u_int *p) 1025da5812bSJung-uk Kim { 1035da5812bSJung-uk Kim 104a990fbf9SJung-uk Kim __asm __volatile("inl %w3, %0" 1055da5812bSJung-uk Kim : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 1065da5812bSJung-uk Kim : "0" (VMW_HVMAGIC), "1" (UINT_MAX), "2" (cmd), "3" (VMW_HVPORT) 1075da5812bSJung-uk Kim : "memory"); 1085da5812bSJung-uk Kim } 1095da5812bSJung-uk Kim 1105da5812bSJung-uk Kim static int 1115da5812bSJung-uk Kim tsc_freq_vmware(void) 1125da5812bSJung-uk Kim { 1135da5812bSJung-uk Kim char hv_sig[13]; 1145da5812bSJung-uk Kim u_int regs[4]; 1155da5812bSJung-uk Kim char *p; 1165da5812bSJung-uk Kim u_int hv_high; 1175da5812bSJung-uk Kim int i; 1185da5812bSJung-uk Kim 1195da5812bSJung-uk Kim /* 1205da5812bSJung-uk Kim * [RFC] CPUID usage for interaction between Hypervisors and Linux. 1215da5812bSJung-uk Kim * http://lkml.org/lkml/2008/10/1/246 1225da5812bSJung-uk Kim * 1235da5812bSJung-uk Kim * KB1009458: Mechanisms to determine if software is running in 1245da5812bSJung-uk Kim * a VMware virtual machine 1255da5812bSJung-uk Kim * http://kb.vmware.com/kb/1009458 1265da5812bSJung-uk Kim */ 1275da5812bSJung-uk Kim hv_high = 0; 1285da5812bSJung-uk Kim if ((cpu_feature2 & CPUID2_HV) != 0) { 1295da5812bSJung-uk Kim do_cpuid(0x40000000, regs); 1305da5812bSJung-uk Kim hv_high = regs[0]; 1315da5812bSJung-uk Kim for (i = 1, p = hv_sig; i < 4; i++, p += sizeof(regs) / 4) 1325da5812bSJung-uk Kim memcpy(p, ®s[i], sizeof(regs[i])); 1335da5812bSJung-uk Kim *p = '\0'; 1345da5812bSJung-uk Kim if (bootverbose) { 1355da5812bSJung-uk Kim /* 1365da5812bSJung-uk Kim * HV vendor ID string 1375da5812bSJung-uk Kim * ------------+-------------- 1385da5812bSJung-uk Kim * KVM "KVMKVMKVM" 1395da5812bSJung-uk Kim * Microsoft "Microsoft Hv" 1405da5812bSJung-uk Kim * VMware "VMwareVMware" 1415da5812bSJung-uk Kim * Xen "XenVMMXenVMM" 1425da5812bSJung-uk Kim */ 1435da5812bSJung-uk Kim printf("Hypervisor: Origin = \"%s\"\n", hv_sig); 1445da5812bSJung-uk Kim } 1455da5812bSJung-uk Kim if (strncmp(hv_sig, "VMwareVMware", 12) != 0) 1465da5812bSJung-uk Kim return (0); 1475da5812bSJung-uk Kim } else { 1485da5812bSJung-uk Kim p = getenv("smbios.system.serial"); 1495da5812bSJung-uk Kim if (p == NULL) 1505da5812bSJung-uk Kim return (0); 1515da5812bSJung-uk Kim if (strncmp(p, "VMware-", 7) != 0 && 1525da5812bSJung-uk Kim strncmp(p, "VMW", 3) != 0) { 1535da5812bSJung-uk Kim freeenv(p); 1545da5812bSJung-uk Kim return (0); 1555da5812bSJung-uk Kim } 1565da5812bSJung-uk Kim freeenv(p); 1575da5812bSJung-uk Kim vmware_hvcall(VMW_HVCMD_GETVERSION, regs); 1585da5812bSJung-uk Kim if (regs[1] != VMW_HVMAGIC) 1595da5812bSJung-uk Kim return (0); 1605da5812bSJung-uk Kim } 1615da5812bSJung-uk Kim if (hv_high >= 0x40000010) { 1625da5812bSJung-uk Kim do_cpuid(0x40000010, regs); 1635da5812bSJung-uk Kim tsc_freq = regs[0] * 1000; 1645da5812bSJung-uk Kim } else { 1655da5812bSJung-uk Kim vmware_hvcall(VMW_HVCMD_GETHZ, regs); 1665da5812bSJung-uk Kim if (regs[1] != UINT_MAX) 1675da5812bSJung-uk Kim tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); 1685da5812bSJung-uk Kim } 1695da5812bSJung-uk Kim tsc_is_invariant = 1; 1705da5812bSJung-uk Kim return (1); 1715da5812bSJung-uk Kim } 1725da5812bSJung-uk Kim 173a4e4127fSJung-uk Kim static void 174a4e4127fSJung-uk Kim tsc_freq_intel(void) 175dd7d207dSJung-uk Kim { 176a4e4127fSJung-uk Kim char brand[48]; 177a4e4127fSJung-uk Kim u_int regs[4]; 178a4e4127fSJung-uk Kim uint64_t freq; 179a4e4127fSJung-uk Kim char *p; 180a4e4127fSJung-uk Kim u_int i; 181dd7d207dSJung-uk Kim 182a4e4127fSJung-uk Kim /* 183a4e4127fSJung-uk Kim * Intel Processor Identification and the CPUID Instruction 184a4e4127fSJung-uk Kim * Application Note 485. 185a4e4127fSJung-uk Kim * http://www.intel.com/assets/pdf/appnote/241618.pdf 186a4e4127fSJung-uk Kim */ 187a4e4127fSJung-uk Kim if (cpu_exthigh >= 0x80000004) { 188a4e4127fSJung-uk Kim p = brand; 189a4e4127fSJung-uk Kim for (i = 0x80000002; i < 0x80000005; i++) { 190a4e4127fSJung-uk Kim do_cpuid(i, regs); 191a4e4127fSJung-uk Kim memcpy(p, regs, sizeof(regs)); 192a4e4127fSJung-uk Kim p += sizeof(regs); 193a4e4127fSJung-uk Kim } 194a4e4127fSJung-uk Kim p = NULL; 195a4e4127fSJung-uk Kim for (i = 0; i < sizeof(brand) - 1; i++) 196a4e4127fSJung-uk Kim if (brand[i] == 'H' && brand[i + 1] == 'z') 197a4e4127fSJung-uk Kim p = brand + i; 198a4e4127fSJung-uk Kim if (p != NULL) { 199a4e4127fSJung-uk Kim p -= 5; 200a4e4127fSJung-uk Kim switch (p[4]) { 201a4e4127fSJung-uk Kim case 'M': 202a4e4127fSJung-uk Kim i = 1; 203a4e4127fSJung-uk Kim break; 204a4e4127fSJung-uk Kim case 'G': 205a4e4127fSJung-uk Kim i = 1000; 206a4e4127fSJung-uk Kim break; 207a4e4127fSJung-uk Kim case 'T': 208a4e4127fSJung-uk Kim i = 1000000; 209a4e4127fSJung-uk Kim break; 210a4e4127fSJung-uk Kim default: 211dd7d207dSJung-uk Kim return; 212a4e4127fSJung-uk Kim } 213a4e4127fSJung-uk Kim #define C2D(c) ((c) - '0') 214a4e4127fSJung-uk Kim if (p[1] == '.') { 215a4e4127fSJung-uk Kim freq = C2D(p[0]) * 1000; 216a4e4127fSJung-uk Kim freq += C2D(p[2]) * 100; 217a4e4127fSJung-uk Kim freq += C2D(p[3]) * 10; 218a4e4127fSJung-uk Kim freq *= i * 1000; 219a4e4127fSJung-uk Kim } else { 220a4e4127fSJung-uk Kim freq = C2D(p[0]) * 1000; 221a4e4127fSJung-uk Kim freq += C2D(p[1]) * 100; 222a4e4127fSJung-uk Kim freq += C2D(p[2]) * 10; 223a4e4127fSJung-uk Kim freq += C2D(p[3]); 224a4e4127fSJung-uk Kim freq *= i * 1000000; 225a4e4127fSJung-uk Kim } 226a4e4127fSJung-uk Kim #undef C2D 227a4e4127fSJung-uk Kim tsc_freq = freq; 228a4e4127fSJung-uk Kim } 229a4e4127fSJung-uk Kim } 230a4e4127fSJung-uk Kim } 231dd7d207dSJung-uk Kim 232a4e4127fSJung-uk Kim static void 233a4e4127fSJung-uk Kim probe_tsc_freq(void) 234a4e4127fSJung-uk Kim { 235155094d7SJung-uk Kim u_int regs[4]; 236a4e4127fSJung-uk Kim uint64_t tsc1, tsc2; 237dd7d207dSJung-uk Kim 2385da5812bSJung-uk Kim if (cpu_high >= 6) { 2395da5812bSJung-uk Kim do_cpuid(6, regs); 2405da5812bSJung-uk Kim if ((regs[2] & CPUID_PERF_STAT) != 0) { 2415da5812bSJung-uk Kim /* 2425da5812bSJung-uk Kim * XXX Some emulators expose host CPUID without actual 2435da5812bSJung-uk Kim * support for these MSRs. We must test whether they 2445da5812bSJung-uk Kim * really work. 2455da5812bSJung-uk Kim */ 2465da5812bSJung-uk Kim wrmsr(MSR_MPERF, 0); 2475da5812bSJung-uk Kim wrmsr(MSR_APERF, 0); 2485da5812bSJung-uk Kim DELAY(10); 2495da5812bSJung-uk Kim if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0) 2505da5812bSJung-uk Kim tsc_perf_stat = 1; 2515da5812bSJung-uk Kim } 2525da5812bSJung-uk Kim } 2535da5812bSJung-uk Kim 2545da5812bSJung-uk Kim if (tsc_freq_vmware()) 2555da5812bSJung-uk Kim return; 2565da5812bSJung-uk Kim 257dd7d207dSJung-uk Kim switch (cpu_vendor_id) { 258dd7d207dSJung-uk Kim case CPU_VENDOR_AMD: 259a106a27cSJung-uk Kim if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 260a106a27cSJung-uk Kim (vm_guest == VM_GUEST_NO && 261a106a27cSJung-uk Kim CPUID_TO_FAMILY(cpu_id) >= 0x10)) 262dd7d207dSJung-uk Kim tsc_is_invariant = 1; 263dd7d207dSJung-uk Kim break; 264dd7d207dSJung-uk Kim case CPU_VENDOR_INTEL: 265a106a27cSJung-uk Kim if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 266a106a27cSJung-uk Kim (vm_guest == VM_GUEST_NO && 267a106a27cSJung-uk Kim ((CPUID_TO_FAMILY(cpu_id) == 0x6 && 268dd7d207dSJung-uk Kim CPUID_TO_MODEL(cpu_id) >= 0xe) || 269dd7d207dSJung-uk Kim (CPUID_TO_FAMILY(cpu_id) == 0xf && 270a106a27cSJung-uk Kim CPUID_TO_MODEL(cpu_id) >= 0x3)))) 271dd7d207dSJung-uk Kim tsc_is_invariant = 1; 272dd7d207dSJung-uk Kim break; 273dd7d207dSJung-uk Kim case CPU_VENDOR_CENTAUR: 274a106a27cSJung-uk Kim if (vm_guest == VM_GUEST_NO && 275a106a27cSJung-uk Kim CPUID_TO_FAMILY(cpu_id) == 0x6 && 276dd7d207dSJung-uk Kim CPUID_TO_MODEL(cpu_id) >= 0xf && 277dd7d207dSJung-uk Kim (rdmsr(0x1203) & 0x100000000ULL) == 0) 278dd7d207dSJung-uk Kim tsc_is_invariant = 1; 279dd7d207dSJung-uk Kim break; 280dd7d207dSJung-uk Kim } 281dd7d207dSJung-uk Kim 282a4e4127fSJung-uk Kim if (tsc_skip_calibration) { 283a4e4127fSJung-uk Kim if (cpu_vendor_id == CPU_VENDOR_INTEL) 284a4e4127fSJung-uk Kim tsc_freq_intel(); 285a4e4127fSJung-uk Kim return; 286a4e4127fSJung-uk Kim } 287a4e4127fSJung-uk Kim 288a4e4127fSJung-uk Kim if (bootverbose) 289a4e4127fSJung-uk Kim printf("Calibrating TSC clock ... "); 290a4e4127fSJung-uk Kim tsc1 = rdtsc(); 291a4e4127fSJung-uk Kim DELAY(1000000); 292a4e4127fSJung-uk Kim tsc2 = rdtsc(); 293a4e4127fSJung-uk Kim tsc_freq = tsc2 - tsc1; 294a4e4127fSJung-uk Kim if (bootverbose) 295a4e4127fSJung-uk Kim printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); 296a4e4127fSJung-uk Kim } 297a4e4127fSJung-uk Kim 298a4e4127fSJung-uk Kim void 299a4e4127fSJung-uk Kim init_TSC(void) 300a4e4127fSJung-uk Kim { 301a4e4127fSJung-uk Kim 302a4e4127fSJung-uk Kim if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 303a4e4127fSJung-uk Kim return; 304a4e4127fSJung-uk Kim 305a4e4127fSJung-uk Kim probe_tsc_freq(); 306a4e4127fSJung-uk Kim 307dd7d207dSJung-uk Kim /* 308dd7d207dSJung-uk Kim * Inform CPU accounting about our boot-time clock rate. This will 309dd7d207dSJung-uk Kim * be updated if someone loads a cpufreq driver after boot that 310dd7d207dSJung-uk Kim * discovers a new max frequency. 311dd7d207dSJung-uk Kim */ 312a4e4127fSJung-uk Kim if (tsc_freq != 0) 3135ac44f72SJung-uk Kim set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); 314dd7d207dSJung-uk Kim 315dd7d207dSJung-uk Kim if (tsc_is_invariant) 316dd7d207dSJung-uk Kim return; 317dd7d207dSJung-uk Kim 318dd7d207dSJung-uk Kim /* Register to find out about changes in CPU frequency. */ 319dd7d207dSJung-uk Kim tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, 320dd7d207dSJung-uk Kim tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); 321dd7d207dSJung-uk Kim tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, 322dd7d207dSJung-uk Kim tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); 323dd7d207dSJung-uk Kim tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, 324dd7d207dSJung-uk Kim tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); 325dd7d207dSJung-uk Kim } 326dd7d207dSJung-uk Kim 32765e7d70bSJung-uk Kim #ifdef SMP 32865e7d70bSJung-uk Kim 32965e7d70bSJung-uk Kim #define TSC_READ(x) \ 33065e7d70bSJung-uk Kim static void \ 33165e7d70bSJung-uk Kim tsc_read_##x(void *arg) \ 33265e7d70bSJung-uk Kim { \ 33365e7d70bSJung-uk Kim uint32_t *tsc = arg; \ 33465e7d70bSJung-uk Kim u_int cpu = PCPU_GET(cpuid); \ 33565e7d70bSJung-uk Kim \ 33665e7d70bSJung-uk Kim tsc[cpu * 3 + x] = rdtsc32(); \ 33765e7d70bSJung-uk Kim } 33865e7d70bSJung-uk Kim TSC_READ(0) 33965e7d70bSJung-uk Kim TSC_READ(1) 34065e7d70bSJung-uk Kim TSC_READ(2) 34165e7d70bSJung-uk Kim #undef TSC_READ 34265e7d70bSJung-uk Kim 34365e7d70bSJung-uk Kim #define N 1000 34465e7d70bSJung-uk Kim 34565e7d70bSJung-uk Kim static void 34665e7d70bSJung-uk Kim comp_smp_tsc(void *arg) 34765e7d70bSJung-uk Kim { 34865e7d70bSJung-uk Kim uint32_t *tsc; 34965e7d70bSJung-uk Kim int32_t d1, d2; 35065e7d70bSJung-uk Kim u_int cpu = PCPU_GET(cpuid); 35165e7d70bSJung-uk Kim u_int i, j, size; 35265e7d70bSJung-uk Kim 35365e7d70bSJung-uk Kim size = (mp_maxid + 1) * 3; 35465e7d70bSJung-uk Kim for (i = 0, tsc = arg; i < N; i++, tsc += size) 35565e7d70bSJung-uk Kim CPU_FOREACH(j) { 35665e7d70bSJung-uk Kim if (j == cpu) 35765e7d70bSJung-uk Kim continue; 35865e7d70bSJung-uk Kim d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; 35965e7d70bSJung-uk Kim d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; 36065e7d70bSJung-uk Kim if (d1 <= 0 || d2 <= 0) { 36165e7d70bSJung-uk Kim smp_tsc = 0; 36265e7d70bSJung-uk Kim return; 36365e7d70bSJung-uk Kim } 36465e7d70bSJung-uk Kim } 36565e7d70bSJung-uk Kim } 36665e7d70bSJung-uk Kim 36765e7d70bSJung-uk Kim static int 36865e7d70bSJung-uk Kim test_smp_tsc(void) 36965e7d70bSJung-uk Kim { 37065e7d70bSJung-uk Kim uint32_t *data, *tsc; 37165e7d70bSJung-uk Kim u_int i, size; 37265e7d70bSJung-uk Kim 37365e7d70bSJung-uk Kim if (!smp_tsc && !tsc_is_invariant) 37465e7d70bSJung-uk Kim return (-100); 37565e7d70bSJung-uk Kim size = (mp_maxid + 1) * 3; 37665e7d70bSJung-uk Kim data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK); 37765e7d70bSJung-uk Kim for (i = 0, tsc = data; i < N; i++, tsc += size) 37865e7d70bSJung-uk Kim smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc); 37965e7d70bSJung-uk Kim smp_tsc = 1; /* XXX */ 38065e7d70bSJung-uk Kim smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc, 38165e7d70bSJung-uk Kim smp_no_rendevous_barrier, data); 38265e7d70bSJung-uk Kim free(data, M_TEMP); 38365e7d70bSJung-uk Kim if (bootverbose) 38465e7d70bSJung-uk Kim printf("SMP: %sed TSC synchronization test\n", 38565e7d70bSJung-uk Kim smp_tsc ? "pass" : "fail"); 38626e6537aSJung-uk Kim if (smp_tsc && tsc_is_invariant) { 38726e6537aSJung-uk Kim switch (cpu_vendor_id) { 38826e6537aSJung-uk Kim case CPU_VENDOR_AMD: 38926e6537aSJung-uk Kim /* 39026e6537aSJung-uk Kim * Starting with Family 15h processors, TSC clock 39126e6537aSJung-uk Kim * source is in the north bridge. Check whether 39226e6537aSJung-uk Kim * we have a single-socket/multi-core platform. 39326e6537aSJung-uk Kim * XXX Need more work for complex cases. 39426e6537aSJung-uk Kim */ 39526e6537aSJung-uk Kim if (CPUID_TO_FAMILY(cpu_id) < 0x15 || 39626e6537aSJung-uk Kim (amd_feature2 & AMDID2_CMP) == 0 || 39726e6537aSJung-uk Kim smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) 39826e6537aSJung-uk Kim break; 39926e6537aSJung-uk Kim return (1000); 40026e6537aSJung-uk Kim case CPU_VENDOR_INTEL: 40126e6537aSJung-uk Kim /* 40226e6537aSJung-uk Kim * XXX Assume Intel platforms have synchronized TSCs. 40326e6537aSJung-uk Kim */ 40426e6537aSJung-uk Kim return (1000); 40526e6537aSJung-uk Kim } 40626e6537aSJung-uk Kim return (800); 40726e6537aSJung-uk Kim } 40826e6537aSJung-uk Kim return (-100); 40965e7d70bSJung-uk Kim } 41065e7d70bSJung-uk Kim 41165e7d70bSJung-uk Kim #undef N 41265e7d70bSJung-uk Kim 41365e7d70bSJung-uk Kim #endif /* SMP */ 41465e7d70bSJung-uk Kim 41565e7d70bSJung-uk Kim static void 416dd7d207dSJung-uk Kim init_TSC_tc(void) 417dd7d207dSJung-uk Kim { 41895f2f098SJung-uk Kim uint64_t max_freq; 41995f2f098SJung-uk Kim int shift; 420dd7d207dSJung-uk Kim 42138b8542cSJung-uk Kim if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 422dd7d207dSJung-uk Kim return; 423dd7d207dSJung-uk Kim 424dd7d207dSJung-uk Kim /* 42595f2f098SJung-uk Kim * Limit timecounter frequency to fit in an int and prevent it from 42695f2f098SJung-uk Kim * overflowing too fast. 42795f2f098SJung-uk Kim */ 42895f2f098SJung-uk Kim max_freq = UINT_MAX; 42995f2f098SJung-uk Kim 43095f2f098SJung-uk Kim /* 431dd7d207dSJung-uk Kim * We can not use the TSC if we support APM. Precise timekeeping 432dd7d207dSJung-uk Kim * on an APM'ed machine is at best a fools pursuit, since 433dd7d207dSJung-uk Kim * any and all of the time spent in various SMM code can't 434dd7d207dSJung-uk Kim * be reliably accounted for. Reading the RTC is your only 435dd7d207dSJung-uk Kim * source of reliable time info. The i8254 loses too, of course, 436dd7d207dSJung-uk Kim * but we need to have some kind of time... 437dd7d207dSJung-uk Kim * We don't know at this point whether APM is going to be used 438dd7d207dSJung-uk Kim * or not, nor when it might be activated. Play it safe. 439dd7d207dSJung-uk Kim */ 440dd7d207dSJung-uk Kim if (power_pm_get_type() == POWER_PM_TYPE_APM) { 441dd7d207dSJung-uk Kim tsc_timecounter.tc_quality = -1000; 442dd7d207dSJung-uk Kim if (bootverbose) 443dd7d207dSJung-uk Kim printf("TSC timecounter disabled: APM enabled.\n"); 44465e7d70bSJung-uk Kim goto init; 445dd7d207dSJung-uk Kim } 446dd7d207dSJung-uk Kim 447*a49399a9SJung-uk Kim /* 448*a49399a9SJung-uk Kim * We cannot use the TSC if it stops incrementing in deep sleep. 449*a49399a9SJung-uk Kim * Currently only Intel CPUs are known for this problem unless 450*a49399a9SJung-uk Kim * the invariant TSC bit is set. 451*a49399a9SJung-uk Kim */ 452*a49399a9SJung-uk Kim if (cpu_can_deep_sleep && cpu_vendor_id == CPU_VENDOR_INTEL && 453*a49399a9SJung-uk Kim (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) { 454*a49399a9SJung-uk Kim tsc_timecounter.tc_quality = -1000; 455*a49399a9SJung-uk Kim if (bootverbose) 456*a49399a9SJung-uk Kim printf("TSC timecounter disabled: C3 enabled.\n"); 457*a49399a9SJung-uk Kim goto init; 458*a49399a9SJung-uk Kim } 459*a49399a9SJung-uk Kim 460dd7d207dSJung-uk Kim #ifdef SMP 461dd7d207dSJung-uk Kim /* 46265e7d70bSJung-uk Kim * We can not use the TSC in SMP mode unless the TSCs on all CPUs are 46365e7d70bSJung-uk Kim * synchronized. If the user is sure that the system has synchronized 46465e7d70bSJung-uk Kim * TSCs, set kern.timecounter.smp_tsc tunable to a non-zero value. 46595f2f098SJung-uk Kim * We also limit the frequency even lower to avoid "temporal anomalies" 46695f2f098SJung-uk Kim * as much as possible. 467dd7d207dSJung-uk Kim */ 46895f2f098SJung-uk Kim if (smp_cpus > 1) { 46965e7d70bSJung-uk Kim tsc_timecounter.tc_quality = test_smp_tsc(); 47095f2f098SJung-uk Kim max_freq >>= 8; 47126e6537aSJung-uk Kim } else 472dd7d207dSJung-uk Kim #endif 47326e6537aSJung-uk Kim if (tsc_is_invariant) 47426e6537aSJung-uk Kim tsc_timecounter.tc_quality = 1000; 47526e6537aSJung-uk Kim 47665e7d70bSJung-uk Kim init: 4775df88f46SJung-uk Kim for (shift = 0; shift < 31 && (tsc_freq >> shift) > max_freq; shift++) 47895f2f098SJung-uk Kim ; 47995f2f098SJung-uk Kim if (shift > 0) { 480bc8e4ad2SJung-uk Kim tsc_timecounter.tc_get_timecount = tsc_get_timecount_low; 48195f2f098SJung-uk Kim tsc_timecounter.tc_name = "TSC-low"; 48295f2f098SJung-uk Kim if (bootverbose) 483bc8e4ad2SJung-uk Kim printf("TSC timecounter discards lower %d bit(s)\n", 48495f2f098SJung-uk Kim shift); 48595f2f098SJung-uk Kim } 486bc34c87eSJung-uk Kim if (tsc_freq != 0) { 48795f2f098SJung-uk Kim tsc_timecounter.tc_frequency = tsc_freq >> shift; 48895f2f098SJung-uk Kim tsc_timecounter.tc_priv = (void *)(intptr_t)shift; 489dd7d207dSJung-uk Kim tc_init(&tsc_timecounter); 490dd7d207dSJung-uk Kim } 491dd7d207dSJung-uk Kim } 49265e7d70bSJung-uk Kim SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); 493dd7d207dSJung-uk Kim 494dd7d207dSJung-uk Kim /* 495dd7d207dSJung-uk Kim * When cpufreq levels change, find out about the (new) max frequency. We 496dd7d207dSJung-uk Kim * use this to update CPU accounting in case it got a lower estimate at boot. 497dd7d207dSJung-uk Kim */ 498dd7d207dSJung-uk Kim static void 499dd7d207dSJung-uk Kim tsc_levels_changed(void *arg, int unit) 500dd7d207dSJung-uk Kim { 501dd7d207dSJung-uk Kim device_t cf_dev; 502dd7d207dSJung-uk Kim struct cf_level *levels; 503dd7d207dSJung-uk Kim int count, error; 504dd7d207dSJung-uk Kim uint64_t max_freq; 505dd7d207dSJung-uk Kim 506dd7d207dSJung-uk Kim /* Only use values from the first CPU, assuming all are equal. */ 507dd7d207dSJung-uk Kim if (unit != 0) 508dd7d207dSJung-uk Kim return; 509dd7d207dSJung-uk Kim 510dd7d207dSJung-uk Kim /* Find the appropriate cpufreq device instance. */ 511dd7d207dSJung-uk Kim cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); 512dd7d207dSJung-uk Kim if (cf_dev == NULL) { 513dd7d207dSJung-uk Kim printf("tsc_levels_changed() called but no cpufreq device?\n"); 514dd7d207dSJung-uk Kim return; 515dd7d207dSJung-uk Kim } 516dd7d207dSJung-uk Kim 517dd7d207dSJung-uk Kim /* Get settings from the device and find the max frequency. */ 518dd7d207dSJung-uk Kim count = 64; 519dd7d207dSJung-uk Kim levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); 520dd7d207dSJung-uk Kim if (levels == NULL) 521dd7d207dSJung-uk Kim return; 522dd7d207dSJung-uk Kim error = CPUFREQ_LEVELS(cf_dev, levels, &count); 523dd7d207dSJung-uk Kim if (error == 0 && count != 0) { 524dd7d207dSJung-uk Kim max_freq = (uint64_t)levels[0].total_set.freq * 1000000; 525dd7d207dSJung-uk Kim set_cputicker(rdtsc, max_freq, 1); 526dd7d207dSJung-uk Kim } else 527dd7d207dSJung-uk Kim printf("tsc_levels_changed: no max freq found\n"); 528dd7d207dSJung-uk Kim free(levels, M_TEMP); 529dd7d207dSJung-uk Kim } 530dd7d207dSJung-uk Kim 531dd7d207dSJung-uk Kim /* 532dd7d207dSJung-uk Kim * If the TSC timecounter is in use, veto the pending change. It may be 533dd7d207dSJung-uk Kim * possible in the future to handle a dynamically-changing timecounter rate. 534dd7d207dSJung-uk Kim */ 535dd7d207dSJung-uk Kim static void 536dd7d207dSJung-uk Kim tsc_freq_changing(void *arg, const struct cf_level *level, int *status) 537dd7d207dSJung-uk Kim { 538dd7d207dSJung-uk Kim 539dd7d207dSJung-uk Kim if (*status != 0 || timecounter != &tsc_timecounter) 540dd7d207dSJung-uk Kim return; 541dd7d207dSJung-uk Kim 542dd7d207dSJung-uk Kim printf("timecounter TSC must not be in use when " 543dd7d207dSJung-uk Kim "changing frequencies; change denied\n"); 544dd7d207dSJung-uk Kim *status = EBUSY; 545dd7d207dSJung-uk Kim } 546dd7d207dSJung-uk Kim 547dd7d207dSJung-uk Kim /* Update TSC freq with the value indicated by the caller. */ 548dd7d207dSJung-uk Kim static void 549dd7d207dSJung-uk Kim tsc_freq_changed(void *arg, const struct cf_level *level, int status) 550dd7d207dSJung-uk Kim { 5513453537fSJung-uk Kim uint64_t freq; 552dd7d207dSJung-uk Kim 553dd7d207dSJung-uk Kim /* If there was an error during the transition, don't do anything. */ 55479422085SJung-uk Kim if (tsc_disabled || status != 0) 555dd7d207dSJung-uk Kim return; 556dd7d207dSJung-uk Kim 557dd7d207dSJung-uk Kim /* Total setting for this level gives the new frequency in MHz. */ 5583453537fSJung-uk Kim freq = (uint64_t)level->total_set.freq * 1000000; 5593453537fSJung-uk Kim atomic_store_rel_64(&tsc_freq, freq); 56095f2f098SJung-uk Kim tsc_timecounter.tc_frequency = 56195f2f098SJung-uk Kim freq >> (int)(intptr_t)tsc_timecounter.tc_priv; 562dd7d207dSJung-uk Kim } 563dd7d207dSJung-uk Kim 564dd7d207dSJung-uk Kim static int 565dd7d207dSJung-uk Kim sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) 566dd7d207dSJung-uk Kim { 567dd7d207dSJung-uk Kim int error; 568dd7d207dSJung-uk Kim uint64_t freq; 569dd7d207dSJung-uk Kim 5703453537fSJung-uk Kim freq = atomic_load_acq_64(&tsc_freq); 5713453537fSJung-uk Kim if (freq == 0) 572dd7d207dSJung-uk Kim return (EOPNOTSUPP); 573cbc134adSMatthew D Fleming error = sysctl_handle_64(oidp, &freq, 0, req); 5747ebbcb21SJung-uk Kim if (error == 0 && req->newptr != NULL) { 5753453537fSJung-uk Kim atomic_store_rel_64(&tsc_freq, freq); 576bc8e4ad2SJung-uk Kim atomic_store_rel_64(&tsc_timecounter.tc_frequency, 577bc8e4ad2SJung-uk Kim freq >> (int)(intptr_t)tsc_timecounter.tc_priv); 5787ebbcb21SJung-uk Kim } 579dd7d207dSJung-uk Kim return (error); 580dd7d207dSJung-uk Kim } 581dd7d207dSJung-uk Kim 582cbc134adSMatthew D Fleming SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 5835331d61dSJung-uk Kim 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); 584dd7d207dSJung-uk Kim 585727c7b2dSJung-uk Kim static u_int 58695f2f098SJung-uk Kim tsc_get_timecount(struct timecounter *tc __unused) 587dd7d207dSJung-uk Kim { 588727c7b2dSJung-uk Kim 589727c7b2dSJung-uk Kim return (rdtsc32()); 590dd7d207dSJung-uk Kim } 59195f2f098SJung-uk Kim 59295f2f098SJung-uk Kim static u_int 593bc8e4ad2SJung-uk Kim tsc_get_timecount_low(struct timecounter *tc) 59495f2f098SJung-uk Kim { 5955df88f46SJung-uk Kim uint32_t rv; 59695f2f098SJung-uk Kim 5975df88f46SJung-uk Kim __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 5985df88f46SJung-uk Kim : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); 5995df88f46SJung-uk Kim return (rv); 60095f2f098SJung-uk Kim } 601