16c69c6bbSAdam Fenn /*- 2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 36c69c6bbSAdam Fenn * 46c69c6bbSAdam Fenn * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org> 56c69c6bbSAdam Fenn * Copyright (c) 2021 Mathieu Chouquet-Stringer 66c69c6bbSAdam Fenn * Copyright (c) 2021 Juniper Networks, Inc. 76c69c6bbSAdam Fenn * Copyright (c) 2021 Klara, Inc. 86c69c6bbSAdam Fenn * 96c69c6bbSAdam Fenn * Redistribution and use in source and binary forms, with or without 106c69c6bbSAdam Fenn * modification, are permitted provided that the following conditions 116c69c6bbSAdam Fenn * are met: 126c69c6bbSAdam Fenn * 1. Redistributions of source code must retain the above copyright 136c69c6bbSAdam Fenn * notice, this list of conditions and the following disclaimer. 146c69c6bbSAdam Fenn * 2. Redistributions in binary form must reproduce the above copyright 156c69c6bbSAdam Fenn * notice, this list of conditions and the following disclaimer in the 166c69c6bbSAdam Fenn * documentation and/or other materials provided with the distribution. 176c69c6bbSAdam Fenn * 186c69c6bbSAdam Fenn * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 196c69c6bbSAdam Fenn * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 206c69c6bbSAdam Fenn * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 216c69c6bbSAdam Fenn * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 226c69c6bbSAdam Fenn * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 236c69c6bbSAdam Fenn * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 246c69c6bbSAdam Fenn * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 256c69c6bbSAdam Fenn * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 266c69c6bbSAdam Fenn * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 276c69c6bbSAdam Fenn * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 286c69c6bbSAdam Fenn * SUCH DAMAGE. 296c69c6bbSAdam Fenn */ 306c69c6bbSAdam Fenn 316c69c6bbSAdam Fenn /* 326c69c6bbSAdam Fenn * Linux KVM paravirtual clock support 336c69c6bbSAdam Fenn * 346c69c6bbSAdam Fenn * References: 356c69c6bbSAdam Fenn * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html 366c69c6bbSAdam Fenn * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html 376c69c6bbSAdam Fenn */ 386c69c6bbSAdam Fenn 396c69c6bbSAdam Fenn #include <sys/param.h> 406c69c6bbSAdam Fenn #include <sys/bus.h> 416c69c6bbSAdam Fenn #include <sys/domainset.h> 426c69c6bbSAdam Fenn #include <sys/kernel.h> 436c69c6bbSAdam Fenn #include <sys/malloc.h> 446c69c6bbSAdam Fenn #include <sys/module.h> 456c69c6bbSAdam Fenn #include <sys/smp.h> 4657a8fa6fSColin Percival #include <sys/sysctl.h> 476c69c6bbSAdam Fenn 486c69c6bbSAdam Fenn #include <vm/vm.h> 496c69c6bbSAdam Fenn #include <vm/pmap.h> 506c69c6bbSAdam Fenn #include <vm/vm_extern.h> 516c69c6bbSAdam Fenn 526c69c6bbSAdam Fenn #include <machine/pvclock.h> 536c69c6bbSAdam Fenn #include <x86/kvm.h> 546c69c6bbSAdam Fenn 556c69c6bbSAdam Fenn #include "clock_if.h" 566c69c6bbSAdam Fenn 576c69c6bbSAdam Fenn #define KVM_CLOCK_DEVNAME "kvmclock" 586c69c6bbSAdam Fenn /* 596c69c6bbSAdam Fenn * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's 606c69c6bbSAdam Fenn * default value of 800, and (3) below the TSC's value when it supports the 616c69c6bbSAdam Fenn * "Invariant TSC" feature and is believed to be synchronized across all CPUs. 626c69c6bbSAdam Fenn */ 636c69c6bbSAdam Fenn #define KVM_CLOCK_TC_QUALITY 975 646c69c6bbSAdam Fenn 656c69c6bbSAdam Fenn struct kvm_clock_softc { 666c69c6bbSAdam Fenn struct pvclock pvc; 676c69c6bbSAdam Fenn struct pvclock_wall_clock wc; 686c69c6bbSAdam Fenn struct pvclock_vcpu_time_info *timeinfos; 696c69c6bbSAdam Fenn u_int msr_tc; 706c69c6bbSAdam Fenn u_int msr_wc; 71568f552bSMark Johnston #ifndef EARLY_AP_STARTUP 72568f552bSMark Johnston int firstcpu; 73568f552bSMark Johnston #endif 746c69c6bbSAdam Fenn }; 756c69c6bbSAdam Fenn 766c69c6bbSAdam Fenn static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg); 77568f552bSMark Johnston static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc, 78568f552bSMark Johnston const cpuset_t *cpus); 796c69c6bbSAdam Fenn static void kvm_clock_system_time_enable_pcpu(void *arg); 8057a8fa6fSColin Percival static void kvm_clock_setup_sysctl(device_t); 816c69c6bbSAdam Fenn 826c69c6bbSAdam Fenn static struct pvclock_wall_clock * 836c69c6bbSAdam Fenn kvm_clock_get_wallclock(void *arg) 846c69c6bbSAdam Fenn { 856c69c6bbSAdam Fenn struct kvm_clock_softc *sc = arg; 866c69c6bbSAdam Fenn 876c69c6bbSAdam Fenn wrmsr(sc->msr_wc, vtophys(&sc->wc)); 886c69c6bbSAdam Fenn return (&sc->wc); 896c69c6bbSAdam Fenn } 906c69c6bbSAdam Fenn 916c69c6bbSAdam Fenn static void 92568f552bSMark Johnston kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus) 936c69c6bbSAdam Fenn { 94568f552bSMark Johnston smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu, 95568f552bSMark Johnston NULL, sc); 966c69c6bbSAdam Fenn } 976c69c6bbSAdam Fenn 986c69c6bbSAdam Fenn static void 996c69c6bbSAdam Fenn kvm_clock_system_time_enable_pcpu(void *arg) 1006c69c6bbSAdam Fenn { 1016c69c6bbSAdam Fenn struct kvm_clock_softc *sc = arg; 1026c69c6bbSAdam Fenn 1036c69c6bbSAdam Fenn /* 1046c69c6bbSAdam Fenn * See [2]; the lsb of this MSR is the system time enable bit. 1056c69c6bbSAdam Fenn */ 1066c69c6bbSAdam Fenn wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1); 1076c69c6bbSAdam Fenn } 1086c69c6bbSAdam Fenn 109568f552bSMark Johnston #ifndef EARLY_AP_STARTUP 110568f552bSMark Johnston static void 111568f552bSMark Johnston kvm_clock_init_smp(void *arg __unused) 112568f552bSMark Johnston { 113568f552bSMark Johnston devclass_t kvm_clock_devclass; 114568f552bSMark Johnston cpuset_t cpus; 115568f552bSMark Johnston struct kvm_clock_softc *sc; 116568f552bSMark Johnston 117568f552bSMark Johnston kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME); 118568f552bSMark Johnston sc = devclass_get_softc(kvm_clock_devclass, 0); 119568f552bSMark Johnston if (sc == NULL || mp_ncpus == 1) 120568f552bSMark Johnston return; 121568f552bSMark Johnston 122568f552bSMark Johnston /* 123568f552bSMark Johnston * Register with the hypervisor on all CPUs except the one that 124568f552bSMark Johnston * registered in kvm_clock_attach(). 125568f552bSMark Johnston */ 126568f552bSMark Johnston cpus = all_cpus; 127568f552bSMark Johnston KASSERT(CPU_ISSET(sc->firstcpu, &cpus), 128568f552bSMark Johnston ("%s: invalid first CPU %d", __func__, sc->firstcpu)); 129568f552bSMark Johnston CPU_CLR(sc->firstcpu, &cpus); 130568f552bSMark Johnston kvm_clock_system_time_enable(sc, &cpus); 131568f552bSMark Johnston } 132568f552bSMark Johnston SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL); 133568f552bSMark Johnston #endif 134568f552bSMark Johnston 1356c69c6bbSAdam Fenn static void 1366c69c6bbSAdam Fenn kvm_clock_identify(driver_t *driver, device_t parent) 1376c69c6bbSAdam Fenn { 1386c69c6bbSAdam Fenn u_int regs[4]; 1396c69c6bbSAdam Fenn 1406c69c6bbSAdam Fenn kvm_cpuid_get_features(regs); 1416c69c6bbSAdam Fenn if ((regs[0] & 1426c69c6bbSAdam Fenn (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0) 1436c69c6bbSAdam Fenn return; 1446c69c6bbSAdam Fenn if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1)) 1456c69c6bbSAdam Fenn return; 1466c69c6bbSAdam Fenn BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0); 1476c69c6bbSAdam Fenn } 1486c69c6bbSAdam Fenn 1496c69c6bbSAdam Fenn static int 1506c69c6bbSAdam Fenn kvm_clock_probe(device_t dev) 1516c69c6bbSAdam Fenn { 1526c69c6bbSAdam Fenn device_set_desc(dev, "KVM paravirtual clock"); 1536c69c6bbSAdam Fenn return (BUS_PROBE_DEFAULT); 1546c69c6bbSAdam Fenn } 1556c69c6bbSAdam Fenn 1566c69c6bbSAdam Fenn static int 1576c69c6bbSAdam Fenn kvm_clock_attach(device_t dev) 1586c69c6bbSAdam Fenn { 1596c69c6bbSAdam Fenn u_int regs[4]; 1606c69c6bbSAdam Fenn struct kvm_clock_softc *sc = device_get_softc(dev); 1616c69c6bbSAdam Fenn bool stable_flag_supported; 1626c69c6bbSAdam Fenn 1636c69c6bbSAdam Fenn /* Process KVM "features" CPUID leaf content: */ 1646c69c6bbSAdam Fenn kvm_cpuid_get_features(regs); 1656c69c6bbSAdam Fenn if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) { 1666c69c6bbSAdam Fenn sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW; 1676c69c6bbSAdam Fenn sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW; 1686c69c6bbSAdam Fenn } else { 1696c69c6bbSAdam Fenn KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0, 1706c69c6bbSAdam Fenn ("Clocksource feature flags disappeared since " 1716c69c6bbSAdam Fenn "kvm_clock_identify: regs[0] %#0x.", regs[0])); 1726c69c6bbSAdam Fenn sc->msr_tc = KVM_MSR_SYSTEM_TIME; 1736c69c6bbSAdam Fenn sc->msr_wc = KVM_MSR_WALL_CLOCK; 1746c69c6bbSAdam Fenn } 1756c69c6bbSAdam Fenn stable_flag_supported = 1766c69c6bbSAdam Fenn (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0; 1776c69c6bbSAdam Fenn 1786c69c6bbSAdam Fenn /* Set up 'struct pvclock_vcpu_time_info' page(s): */ 179f49fd63aSJohn Baldwin sc->timeinfos = kmem_malloc(mp_ncpus * 1806c69c6bbSAdam Fenn sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO); 181568f552bSMark Johnston #ifdef EARLY_AP_STARTUP 182568f552bSMark Johnston kvm_clock_system_time_enable(sc, &all_cpus); 183568f552bSMark Johnston #else 184568f552bSMark Johnston sc->firstcpu = curcpu; 185568f552bSMark Johnston kvm_clock_system_time_enable_pcpu(sc); 186568f552bSMark Johnston #endif 1876c69c6bbSAdam Fenn 1886c69c6bbSAdam Fenn /* 1896c69c6bbSAdam Fenn * Init pvclock; register KVM clock wall clock, register KVM clock 1906c69c6bbSAdam Fenn * timecounter, and set up the requisite infrastructure for vDSO access 1916c69c6bbSAdam Fenn * to this timecounter. 1926c69c6bbSAdam Fenn * Regarding 'tc_flags': Since the KVM MSR documentation does not 1936c69c6bbSAdam Fenn * specifically discuss suspend/resume scenarios, conservatively 1946c69c6bbSAdam Fenn * leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system 1956c69c6bbSAdam Fenn * time must be re-inited in such cases. 1966c69c6bbSAdam Fenn */ 1976c69c6bbSAdam Fenn sc->pvc.get_wallclock = kvm_clock_get_wallclock; 1986c69c6bbSAdam Fenn sc->pvc.get_wallclock_arg = sc; 1996c69c6bbSAdam Fenn sc->pvc.timeinfos = sc->timeinfos; 2006c69c6bbSAdam Fenn sc->pvc.stable_flag_supported = stable_flag_supported; 2016c69c6bbSAdam Fenn pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0); 20257a8fa6fSColin Percival kvm_clock_setup_sysctl(dev); 2036c69c6bbSAdam Fenn return (0); 2046c69c6bbSAdam Fenn } 2056c69c6bbSAdam Fenn 2066c69c6bbSAdam Fenn static int 2076c69c6bbSAdam Fenn kvm_clock_detach(device_t dev) 2086c69c6bbSAdam Fenn { 2096c69c6bbSAdam Fenn struct kvm_clock_softc *sc = device_get_softc(dev); 2106c69c6bbSAdam Fenn 2116c69c6bbSAdam Fenn return (pvclock_destroy(&sc->pvc)); 2126c69c6bbSAdam Fenn } 2136c69c6bbSAdam Fenn 2146c69c6bbSAdam Fenn static int 2156c69c6bbSAdam Fenn kvm_clock_suspend(device_t dev) 2166c69c6bbSAdam Fenn { 2176c69c6bbSAdam Fenn return (0); 2186c69c6bbSAdam Fenn } 2196c69c6bbSAdam Fenn 2206c69c6bbSAdam Fenn static int 2216c69c6bbSAdam Fenn kvm_clock_resume(device_t dev) 2226c69c6bbSAdam Fenn { 2236c69c6bbSAdam Fenn /* 2246c69c6bbSAdam Fenn * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE'; 2256c69c6bbSAdam Fenn * conservatively assume that the system time must be re-inited in 2266c69c6bbSAdam Fenn * suspend/resume scenarios. 2276c69c6bbSAdam Fenn */ 228568f552bSMark Johnston kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus); 2296c69c6bbSAdam Fenn pvclock_resume(); 2306c69c6bbSAdam Fenn inittodr(time_second); 2316c69c6bbSAdam Fenn return (0); 2326c69c6bbSAdam Fenn } 2336c69c6bbSAdam Fenn 2346c69c6bbSAdam Fenn static int 2356c69c6bbSAdam Fenn kvm_clock_gettime(device_t dev, struct timespec *ts) 2366c69c6bbSAdam Fenn { 2376c69c6bbSAdam Fenn struct kvm_clock_softc *sc = device_get_softc(dev); 2386c69c6bbSAdam Fenn 2396c69c6bbSAdam Fenn pvclock_gettime(&sc->pvc, ts); 2406c69c6bbSAdam Fenn return (0); 2416c69c6bbSAdam Fenn } 2426c69c6bbSAdam Fenn 2436c69c6bbSAdam Fenn static int 2446c69c6bbSAdam Fenn kvm_clock_settime(device_t dev, struct timespec *ts) 2456c69c6bbSAdam Fenn { 2466c69c6bbSAdam Fenn /* 2476c69c6bbSAdam Fenn * Even though it is not possible to set the KVM clock's wall clock, to 2486c69c6bbSAdam Fenn * avoid the possibility of periodic benign error messages from 2496c69c6bbSAdam Fenn * 'settime_task_func()', report success rather than, e.g., 'ENODEV'. 2506c69c6bbSAdam Fenn */ 2516c69c6bbSAdam Fenn return (0); 2526c69c6bbSAdam Fenn } 2536c69c6bbSAdam Fenn 25457a8fa6fSColin Percival static int 25557a8fa6fSColin Percival kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS) 25657a8fa6fSColin Percival { 25757a8fa6fSColin Percival struct kvm_clock_softc *sc = oidp->oid_arg1; 25857a8fa6fSColin Percival uint64_t freq = pvclock_tsc_freq(sc->timeinfos); 25957a8fa6fSColin Percival 26057a8fa6fSColin Percival return (sysctl_handle_64(oidp, &freq, 0, req)); 26157a8fa6fSColin Percival } 26257a8fa6fSColin Percival 26357a8fa6fSColin Percival static void 26457a8fa6fSColin Percival kvm_clock_setup_sysctl(device_t dev) 26557a8fa6fSColin Percival { 26657a8fa6fSColin Percival struct kvm_clock_softc *sc = device_get_softc(dev); 26757a8fa6fSColin Percival struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 26857a8fa6fSColin Percival struct sysctl_oid *tree = device_get_sysctl_tree(dev); 26957a8fa6fSColin Percival struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 27057a8fa6fSColin Percival 27157a8fa6fSColin Percival SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq", 27257a8fa6fSColin Percival CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 27357a8fa6fSColin Percival kvm_clock_tsc_freq_sysctl, "QU", 27457a8fa6fSColin Percival "Time Stamp Counter frequency"); 27557a8fa6fSColin Percival } 27657a8fa6fSColin Percival 2776c69c6bbSAdam Fenn static device_method_t kvm_clock_methods[] = { 2786c69c6bbSAdam Fenn DEVMETHOD(device_identify, kvm_clock_identify), 2796c69c6bbSAdam Fenn DEVMETHOD(device_probe, kvm_clock_probe), 2806c69c6bbSAdam Fenn DEVMETHOD(device_attach, kvm_clock_attach), 2816c69c6bbSAdam Fenn DEVMETHOD(device_detach, kvm_clock_detach), 2826c69c6bbSAdam Fenn DEVMETHOD(device_suspend, kvm_clock_suspend), 2836c69c6bbSAdam Fenn DEVMETHOD(device_resume, kvm_clock_resume), 2846c69c6bbSAdam Fenn /* clock interface */ 2856c69c6bbSAdam Fenn DEVMETHOD(clock_gettime, kvm_clock_gettime), 2866c69c6bbSAdam Fenn DEVMETHOD(clock_settime, kvm_clock_settime), 2876c69c6bbSAdam Fenn 2886c69c6bbSAdam Fenn DEVMETHOD_END 2896c69c6bbSAdam Fenn }; 2906c69c6bbSAdam Fenn 2916c69c6bbSAdam Fenn static driver_t kvm_clock_driver = { 2926c69c6bbSAdam Fenn KVM_CLOCK_DEVNAME, 2936c69c6bbSAdam Fenn kvm_clock_methods, 2946c69c6bbSAdam Fenn sizeof(struct kvm_clock_softc), 2956c69c6bbSAdam Fenn }; 2966c69c6bbSAdam Fenn 29748b6e01dSJohn Baldwin DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0); 298