1 /*- 2 * Copyright (c) 1998-2003 Poul-Henning Kamp 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_compat.h" 31 #include "opt_clock.h" 32 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/cpu.h> 36 #include <sys/limits.h> 37 #include <sys/malloc.h> 38 #include <sys/systm.h> 39 #include <sys/sysctl.h> 40 #include <sys/time.h> 41 #include <sys/timetc.h> 42 #include <sys/kernel.h> 43 #include <sys/power.h> 44 #include <sys/smp.h> 45 #include <sys/vdso.h> 46 #include <machine/clock.h> 47 #include <machine/cputypes.h> 48 #include <machine/md_var.h> 49 #include <machine/specialreg.h> 50 51 #include "cpufreq_if.h" 52 53 uint64_t tsc_freq; 54 int tsc_is_invariant; 55 int tsc_perf_stat; 56 57 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; 58 59 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, 60 &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); 61 TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant); 62 63 #ifdef SMP 64 int smp_tsc; 65 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, 66 "Indicates whether the TSC is safe to use in SMP mode"); 67 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); 68 69 int smp_tsc_adjust = 0; 70 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN, 71 &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP"); 72 TUNABLE_INT("kern.timecounter.smp_tsc_adjust", &smp_tsc_adjust); 73 #endif 74 75 static int tsc_shift = 1; 76 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN, 77 &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency"); 78 TUNABLE_INT("kern.timecounter.tsc_shift", &tsc_shift); 79 80 static int tsc_disabled; 81 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0, 82 "Disable x86 Time Stamp Counter"); 83 TUNABLE_INT("machdep.disable_tsc", &tsc_disabled); 84 85 static int tsc_skip_calibration; 86 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN, 87 &tsc_skip_calibration, 0, "Disable TSC frequency calibration"); 88 TUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration); 89 90 static void tsc_freq_changed(void *arg, const struct cf_level *level, 91 int status); 92 static void tsc_freq_changing(void *arg, const struct cf_level *level, 93 int *status); 94 static unsigned tsc_get_timecount(struct timecounter *tc); 95 static inline unsigned tsc_get_timecount_low(struct timecounter *tc); 96 static unsigned tsc_get_timecount_lfence(struct timecounter *tc); 97 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); 98 static unsigned tsc_get_timecount_mfence(struct timecounter *tc); 99 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); 100 static void tsc_levels_changed(void *arg, int unit); 101 102 static struct timecounter tsc_timecounter = { 103 tsc_get_timecount, /* get_timecount */ 104 0, /* no poll_pps */ 105 ~0u, /* counter_mask */ 106 0, /* frequency */ 107 "TSC", /* name */ 108 800, /* quality (adjusted in code) */ 109 }; 110 111 #define VMW_HVMAGIC 0x564d5868 112 #define VMW_HVPORT 0x5658 113 #define VMW_HVCMD_GETVERSION 10 114 #define VMW_HVCMD_GETHZ 45 115 116 static __inline void 117 vmware_hvcall(u_int cmd, u_int *p) 118 { 119 120 __asm __volatile("inl %w3, %0" 121 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 122 : "0" (VMW_HVMAGIC), "1" (UINT_MAX), "2" (cmd), "3" (VMW_HVPORT) 123 : "memory"); 124 } 125 126 static int 127 tsc_freq_vmware(void) 128 { 129 char hv_sig[13]; 130 u_int regs[4]; 131 char *p; 132 u_int hv_high; 133 int i; 134 135 /* 136 * [RFC] CPUID usage for interaction between Hypervisors and Linux. 137 * http://lkml.org/lkml/2008/10/1/246 138 * 139 * KB1009458: Mechanisms to determine if software is running in 140 * a VMware virtual machine 141 * http://kb.vmware.com/kb/1009458 142 */ 143 hv_high = 0; 144 if ((cpu_feature2 & CPUID2_HV) != 0) { 145 do_cpuid(0x40000000, regs); 146 hv_high = regs[0]; 147 for (i = 1, p = hv_sig; i < 4; i++, p += sizeof(regs) / 4) 148 memcpy(p, ®s[i], sizeof(regs[i])); 149 *p = '\0'; 150 if (bootverbose) { 151 /* 152 * HV vendor ID string 153 * ------------+-------------- 154 * KVM "KVMKVMKVM" 155 * Microsoft "Microsoft Hv" 156 * VMware "VMwareVMware" 157 * Xen "XenVMMXenVMM" 158 */ 159 printf("Hypervisor: Origin = \"%s\"\n", hv_sig); 160 } 161 if (strncmp(hv_sig, "VMwareVMware", 12) != 0) 162 return (0); 163 } else { 164 p = getenv("smbios.system.serial"); 165 if (p == NULL) 166 return (0); 167 if (strncmp(p, "VMware-", 7) != 0 && 168 strncmp(p, "VMW", 3) != 0) { 169 freeenv(p); 170 return (0); 171 } 172 freeenv(p); 173 vmware_hvcall(VMW_HVCMD_GETVERSION, regs); 174 if (regs[1] != VMW_HVMAGIC) 175 return (0); 176 } 177 if (hv_high >= 0x40000010) { 178 do_cpuid(0x40000010, regs); 179 tsc_freq = regs[0] * 1000; 180 } else { 181 vmware_hvcall(VMW_HVCMD_GETHZ, regs); 182 if (regs[1] != UINT_MAX) 183 tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); 184 } 185 tsc_is_invariant = 1; 186 return (1); 187 } 188 189 static void 190 tsc_freq_intel(void) 191 { 192 char brand[48]; 193 u_int regs[4]; 194 uint64_t freq; 195 char *p; 196 u_int i; 197 198 /* 199 * Intel Processor Identification and the CPUID Instruction 200 * Application Note 485. 201 * http://www.intel.com/assets/pdf/appnote/241618.pdf 202 */ 203 if (cpu_exthigh >= 0x80000004) { 204 p = brand; 205 for (i = 0x80000002; i < 0x80000005; i++) { 206 do_cpuid(i, regs); 207 memcpy(p, regs, sizeof(regs)); 208 p += sizeof(regs); 209 } 210 p = NULL; 211 for (i = 0; i < sizeof(brand) - 1; i++) 212 if (brand[i] == 'H' && brand[i + 1] == 'z') 213 p = brand + i; 214 if (p != NULL) { 215 p -= 5; 216 switch (p[4]) { 217 case 'M': 218 i = 1; 219 break; 220 case 'G': 221 i = 1000; 222 break; 223 case 'T': 224 i = 1000000; 225 break; 226 default: 227 return; 228 } 229 #define C2D(c) ((c) - '0') 230 if (p[1] == '.') { 231 freq = C2D(p[0]) * 1000; 232 freq += C2D(p[2]) * 100; 233 freq += C2D(p[3]) * 10; 234 freq *= i * 1000; 235 } else { 236 freq = C2D(p[0]) * 1000; 237 freq += C2D(p[1]) * 100; 238 freq += C2D(p[2]) * 10; 239 freq += C2D(p[3]); 240 freq *= i * 1000000; 241 } 242 #undef C2D 243 tsc_freq = freq; 244 } 245 } 246 } 247 248 static void 249 probe_tsc_freq(void) 250 { 251 u_int regs[4]; 252 uint64_t tsc1, tsc2; 253 254 if (cpu_high >= 6) { 255 do_cpuid(6, regs); 256 if ((regs[2] & CPUID_PERF_STAT) != 0) { 257 /* 258 * XXX Some emulators expose host CPUID without actual 259 * support for these MSRs. We must test whether they 260 * really work. 261 */ 262 wrmsr(MSR_MPERF, 0); 263 wrmsr(MSR_APERF, 0); 264 DELAY(10); 265 if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0) 266 tsc_perf_stat = 1; 267 } 268 } 269 270 if (tsc_freq_vmware()) 271 return; 272 273 switch (cpu_vendor_id) { 274 case CPU_VENDOR_AMD: 275 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 276 (vm_guest == VM_GUEST_NO && 277 CPUID_TO_FAMILY(cpu_id) >= 0x10)) 278 tsc_is_invariant = 1; 279 if (cpu_feature & CPUID_SSE2) { 280 tsc_timecounter.tc_get_timecount = 281 tsc_get_timecount_mfence; 282 } 283 break; 284 case CPU_VENDOR_INTEL: 285 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 286 (vm_guest == VM_GUEST_NO && 287 ((CPUID_TO_FAMILY(cpu_id) == 0x6 && 288 CPUID_TO_MODEL(cpu_id) >= 0xe) || 289 (CPUID_TO_FAMILY(cpu_id) == 0xf && 290 CPUID_TO_MODEL(cpu_id) >= 0x3)))) 291 tsc_is_invariant = 1; 292 if (cpu_feature & CPUID_SSE2) { 293 tsc_timecounter.tc_get_timecount = 294 tsc_get_timecount_lfence; 295 } 296 break; 297 case CPU_VENDOR_CENTAUR: 298 if (vm_guest == VM_GUEST_NO && 299 CPUID_TO_FAMILY(cpu_id) == 0x6 && 300 CPUID_TO_MODEL(cpu_id) >= 0xf && 301 (rdmsr(0x1203) & 0x100000000ULL) == 0) 302 tsc_is_invariant = 1; 303 if (cpu_feature & CPUID_SSE2) { 304 tsc_timecounter.tc_get_timecount = 305 tsc_get_timecount_lfence; 306 } 307 break; 308 } 309 310 if (tsc_skip_calibration) { 311 if (cpu_vendor_id == CPU_VENDOR_INTEL) 312 tsc_freq_intel(); 313 return; 314 } 315 316 if (bootverbose) 317 printf("Calibrating TSC clock ... "); 318 tsc1 = rdtsc(); 319 DELAY(1000000); 320 tsc2 = rdtsc(); 321 tsc_freq = tsc2 - tsc1; 322 if (bootverbose) 323 printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); 324 } 325 326 void 327 init_TSC(void) 328 { 329 330 if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 331 return; 332 333 probe_tsc_freq(); 334 335 /* 336 * Inform CPU accounting about our boot-time clock rate. This will 337 * be updated if someone loads a cpufreq driver after boot that 338 * discovers a new max frequency. 339 */ 340 if (tsc_freq != 0) 341 set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); 342 343 if (tsc_is_invariant) 344 return; 345 346 /* Register to find out about changes in CPU frequency. */ 347 tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, 348 tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); 349 tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, 350 tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); 351 tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, 352 tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); 353 } 354 355 #ifdef SMP 356 357 /* 358 * RDTSC is not a serializing instruction, and does not drain 359 * instruction stream, so we need to drain the stream before executing 360 * it. It could be fixed by use of RDTSCP, except the instruction is 361 * not available everywhere. 362 * 363 * Use CPUID for draining in the boot-time SMP constistency test. The 364 * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel 365 * and VIA) when SSE2 is present, and nothing on older machines which 366 * also do not issue RDTSC prematurely. There, testing for SSE2 and 367 * vendor is too cumbersome, and we learn about TSC presence from CPUID. 368 * 369 * Do not use do_cpuid(), since we do not need CPUID results, which 370 * have to be written into memory with do_cpuid(). 371 */ 372 #define TSC_READ(x) \ 373 static void \ 374 tsc_read_##x(void *arg) \ 375 { \ 376 uint64_t *tsc = arg; \ 377 u_int cpu = PCPU_GET(cpuid); \ 378 \ 379 __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx"); \ 380 tsc[cpu * 3 + x] = rdtsc(); \ 381 } 382 TSC_READ(0) 383 TSC_READ(1) 384 TSC_READ(2) 385 #undef TSC_READ 386 387 #define N 1000 388 389 static void 390 comp_smp_tsc(void *arg) 391 { 392 uint64_t *tsc; 393 int64_t d1, d2; 394 u_int cpu = PCPU_GET(cpuid); 395 u_int i, j, size; 396 397 size = (mp_maxid + 1) * 3; 398 for (i = 0, tsc = arg; i < N; i++, tsc += size) 399 CPU_FOREACH(j) { 400 if (j == cpu) 401 continue; 402 d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; 403 d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; 404 if (d1 <= 0 || d2 <= 0) { 405 smp_tsc = 0; 406 return; 407 } 408 } 409 } 410 411 static void 412 adj_smp_tsc(void *arg) 413 { 414 uint64_t *tsc; 415 int64_t d, min, max; 416 u_int cpu = PCPU_GET(cpuid); 417 u_int first, i, size; 418 419 first = CPU_FIRST(); 420 if (cpu == first) 421 return; 422 min = INT64_MIN; 423 max = INT64_MAX; 424 size = (mp_maxid + 1) * 3; 425 for (i = 0, tsc = arg; i < N; i++, tsc += size) { 426 d = tsc[first * 3] - tsc[cpu * 3 + 1]; 427 if (d > min) 428 min = d; 429 d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2]; 430 if (d > min) 431 min = d; 432 d = tsc[first * 3 + 1] - tsc[cpu * 3]; 433 if (d < max) 434 max = d; 435 d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1]; 436 if (d < max) 437 max = d; 438 } 439 if (min > max) 440 return; 441 d = min / 2 + max / 2; 442 __asm __volatile ( 443 "movl $0x10, %%ecx\n\t" 444 "rdmsr\n\t" 445 "addl %%edi, %%eax\n\t" 446 "adcl %%esi, %%edx\n\t" 447 "wrmsr\n" 448 : /* No output */ 449 : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32)) 450 : "ax", "cx", "dx", "cc" 451 ); 452 } 453 454 static int 455 test_tsc(void) 456 { 457 uint64_t *data, *tsc; 458 u_int i, size, adj; 459 460 if ((!smp_tsc && !tsc_is_invariant) || vm_guest) 461 return (-100); 462 size = (mp_maxid + 1) * 3; 463 data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK); 464 adj = 0; 465 retry: 466 for (i = 0, tsc = data; i < N; i++, tsc += size) 467 smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc); 468 smp_tsc = 1; /* XXX */ 469 smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc, 470 smp_no_rendevous_barrier, data); 471 if (!smp_tsc && adj < smp_tsc_adjust) { 472 adj++; 473 smp_rendezvous(smp_no_rendevous_barrier, adj_smp_tsc, 474 smp_no_rendevous_barrier, data); 475 goto retry; 476 } 477 free(data, M_TEMP); 478 if (bootverbose) 479 printf("SMP: %sed TSC synchronization test%s\n", 480 smp_tsc ? "pass" : "fail", 481 adj > 0 ? " after adjustment" : ""); 482 if (smp_tsc && tsc_is_invariant) { 483 switch (cpu_vendor_id) { 484 case CPU_VENDOR_AMD: 485 /* 486 * Starting with Family 15h processors, TSC clock 487 * source is in the north bridge. Check whether 488 * we have a single-socket/multi-core platform. 489 * XXX Need more work for complex cases. 490 */ 491 if (CPUID_TO_FAMILY(cpu_id) < 0x15 || 492 (amd_feature2 & AMDID2_CMP) == 0 || 493 smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) 494 break; 495 return (1000); 496 case CPU_VENDOR_INTEL: 497 /* 498 * XXX Assume Intel platforms have synchronized TSCs. 499 */ 500 return (1000); 501 } 502 return (800); 503 } 504 return (-100); 505 } 506 507 #undef N 508 509 #else 510 511 /* 512 * The function is not called, it is provided to avoid linking failure 513 * on uniprocessor kernel. 514 */ 515 static int 516 test_tsc(void) 517 { 518 519 return (0); 520 } 521 522 #endif /* SMP */ 523 524 static void 525 init_TSC_tc(void) 526 { 527 uint64_t max_freq; 528 int shift; 529 530 if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 531 return; 532 533 /* 534 * Limit timecounter frequency to fit in an int and prevent it from 535 * overflowing too fast. 536 */ 537 max_freq = UINT_MAX; 538 539 /* 540 * We can not use the TSC if we support APM. Precise timekeeping 541 * on an APM'ed machine is at best a fools pursuit, since 542 * any and all of the time spent in various SMM code can't 543 * be reliably accounted for. Reading the RTC is your only 544 * source of reliable time info. The i8254 loses too, of course, 545 * but we need to have some kind of time... 546 * We don't know at this point whether APM is going to be used 547 * or not, nor when it might be activated. Play it safe. 548 */ 549 if (power_pm_get_type() == POWER_PM_TYPE_APM) { 550 tsc_timecounter.tc_quality = -1000; 551 if (bootverbose) 552 printf("TSC timecounter disabled: APM enabled.\n"); 553 goto init; 554 } 555 556 /* 557 * We cannot use the TSC if it stops incrementing in deep sleep. 558 * Currently only Intel CPUs are known for this problem unless 559 * the invariant TSC bit is set. 560 */ 561 if (cpu_can_deep_sleep && cpu_vendor_id == CPU_VENDOR_INTEL && 562 (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) { 563 tsc_timecounter.tc_quality = -1000; 564 tsc_timecounter.tc_flags |= TC_FLAGS_C3STOP; 565 if (bootverbose) 566 printf("TSC timecounter disabled: C3 enabled.\n"); 567 goto init; 568 } 569 570 /* 571 * We can not use the TSC in SMP mode unless the TSCs on all CPUs 572 * are synchronized. If the user is sure that the system has 573 * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a 574 * non-zero value. The TSC seems unreliable in virtualized SMP 575 * environments, so it is set to a negative quality in those cases. 576 */ 577 if (mp_ncpus > 1) 578 tsc_timecounter.tc_quality = test_tsc(); 579 else if (tsc_is_invariant) 580 tsc_timecounter.tc_quality = 1000; 581 max_freq >>= tsc_shift; 582 583 init: 584 for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) 585 ; 586 if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { 587 if (cpu_vendor_id == CPU_VENDOR_AMD) { 588 tsc_timecounter.tc_get_timecount = shift > 0 ? 589 tsc_get_timecount_low_mfence : 590 tsc_get_timecount_mfence; 591 } else { 592 tsc_timecounter.tc_get_timecount = shift > 0 ? 593 tsc_get_timecount_low_lfence : 594 tsc_get_timecount_lfence; 595 } 596 } else { 597 tsc_timecounter.tc_get_timecount = shift > 0 ? 598 tsc_get_timecount_low : tsc_get_timecount; 599 } 600 if (shift > 0) { 601 tsc_timecounter.tc_name = "TSC-low"; 602 if (bootverbose) 603 printf("TSC timecounter discards lower %d bit(s)\n", 604 shift); 605 } 606 if (tsc_freq != 0) { 607 tsc_timecounter.tc_frequency = tsc_freq >> shift; 608 tsc_timecounter.tc_priv = (void *)(intptr_t)shift; 609 tc_init(&tsc_timecounter); 610 } 611 } 612 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); 613 614 /* 615 * When cpufreq levels change, find out about the (new) max frequency. We 616 * use this to update CPU accounting in case it got a lower estimate at boot. 617 */ 618 static void 619 tsc_levels_changed(void *arg, int unit) 620 { 621 device_t cf_dev; 622 struct cf_level *levels; 623 int count, error; 624 uint64_t max_freq; 625 626 /* Only use values from the first CPU, assuming all are equal. */ 627 if (unit != 0) 628 return; 629 630 /* Find the appropriate cpufreq device instance. */ 631 cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); 632 if (cf_dev == NULL) { 633 printf("tsc_levels_changed() called but no cpufreq device?\n"); 634 return; 635 } 636 637 /* Get settings from the device and find the max frequency. */ 638 count = 64; 639 levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); 640 if (levels == NULL) 641 return; 642 error = CPUFREQ_LEVELS(cf_dev, levels, &count); 643 if (error == 0 && count != 0) { 644 max_freq = (uint64_t)levels[0].total_set.freq * 1000000; 645 set_cputicker(rdtsc, max_freq, 1); 646 } else 647 printf("tsc_levels_changed: no max freq found\n"); 648 free(levels, M_TEMP); 649 } 650 651 /* 652 * If the TSC timecounter is in use, veto the pending change. It may be 653 * possible in the future to handle a dynamically-changing timecounter rate. 654 */ 655 static void 656 tsc_freq_changing(void *arg, const struct cf_level *level, int *status) 657 { 658 659 if (*status != 0 || timecounter != &tsc_timecounter) 660 return; 661 662 printf("timecounter TSC must not be in use when " 663 "changing frequencies; change denied\n"); 664 *status = EBUSY; 665 } 666 667 /* Update TSC freq with the value indicated by the caller. */ 668 static void 669 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 670 { 671 uint64_t freq; 672 673 /* If there was an error during the transition, don't do anything. */ 674 if (tsc_disabled || status != 0) 675 return; 676 677 /* Total setting for this level gives the new frequency in MHz. */ 678 freq = (uint64_t)level->total_set.freq * 1000000; 679 atomic_store_rel_64(&tsc_freq, freq); 680 tsc_timecounter.tc_frequency = 681 freq >> (int)(intptr_t)tsc_timecounter.tc_priv; 682 } 683 684 static int 685 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) 686 { 687 int error; 688 uint64_t freq; 689 690 freq = atomic_load_acq_64(&tsc_freq); 691 if (freq == 0) 692 return (EOPNOTSUPP); 693 error = sysctl_handle_64(oidp, &freq, 0, req); 694 if (error == 0 && req->newptr != NULL) { 695 atomic_store_rel_64(&tsc_freq, freq); 696 atomic_store_rel_64(&tsc_timecounter.tc_frequency, 697 freq >> (int)(intptr_t)tsc_timecounter.tc_priv); 698 } 699 return (error); 700 } 701 702 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 703 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); 704 705 static u_int 706 tsc_get_timecount(struct timecounter *tc __unused) 707 { 708 709 return (rdtsc32()); 710 } 711 712 static inline u_int 713 tsc_get_timecount_low(struct timecounter *tc) 714 { 715 uint32_t rv; 716 717 __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 718 : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); 719 return (rv); 720 } 721 722 static u_int 723 tsc_get_timecount_lfence(struct timecounter *tc __unused) 724 { 725 726 lfence(); 727 return (rdtsc32()); 728 } 729 730 static u_int 731 tsc_get_timecount_low_lfence(struct timecounter *tc) 732 { 733 734 lfence(); 735 return (tsc_get_timecount_low(tc)); 736 } 737 738 static u_int 739 tsc_get_timecount_mfence(struct timecounter *tc __unused) 740 { 741 742 mfence(); 743 return (rdtsc32()); 744 } 745 746 static u_int 747 tsc_get_timecount_low_mfence(struct timecounter *tc) 748 { 749 750 mfence(); 751 return (tsc_get_timecount_low(tc)); 752 } 753 754 uint32_t 755 cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th) 756 { 757 758 vdso_th->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; 759 bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); 760 return (timecounter == &tsc_timecounter); 761 } 762 763 #ifdef COMPAT_FREEBSD32 764 uint32_t 765 cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) 766 { 767 768 vdso_th32->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; 769 bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); 770 return (timecounter == &tsc_timecounter); 771 } 772 #endif 773