xref: /freebsd/sys/dev/kvm_clock/kvm_clock.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
16c69c6bbSAdam Fenn /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
36c69c6bbSAdam Fenn  *
46c69c6bbSAdam Fenn  * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
56c69c6bbSAdam Fenn  * Copyright (c) 2021 Mathieu Chouquet-Stringer
66c69c6bbSAdam Fenn  * Copyright (c) 2021 Juniper Networks, Inc.
76c69c6bbSAdam Fenn  * Copyright (c) 2021 Klara, Inc.
86c69c6bbSAdam Fenn  *
96c69c6bbSAdam Fenn  * Redistribution and use in source and binary forms, with or without
106c69c6bbSAdam Fenn  * modification, are permitted provided that the following conditions
116c69c6bbSAdam Fenn  * are met:
126c69c6bbSAdam Fenn  * 1. Redistributions of source code must retain the above copyright
136c69c6bbSAdam Fenn  *    notice, this list of conditions and the following disclaimer.
146c69c6bbSAdam Fenn  * 2. Redistributions in binary form must reproduce the above copyright
156c69c6bbSAdam Fenn  *    notice, this list of conditions and the following disclaimer in the
166c69c6bbSAdam Fenn  *    documentation and/or other materials provided with the distribution.
176c69c6bbSAdam Fenn  *
186c69c6bbSAdam Fenn  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
196c69c6bbSAdam Fenn  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
206c69c6bbSAdam Fenn  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
216c69c6bbSAdam Fenn  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
226c69c6bbSAdam Fenn  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
236c69c6bbSAdam Fenn  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
246c69c6bbSAdam Fenn  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
256c69c6bbSAdam Fenn  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
266c69c6bbSAdam Fenn  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
276c69c6bbSAdam Fenn  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
286c69c6bbSAdam Fenn  * SUCH DAMAGE.
296c69c6bbSAdam Fenn  */
306c69c6bbSAdam Fenn 
316c69c6bbSAdam Fenn /*
326c69c6bbSAdam Fenn  * Linux KVM paravirtual clock support
336c69c6bbSAdam Fenn  *
346c69c6bbSAdam Fenn  * References:
356c69c6bbSAdam Fenn  *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
366c69c6bbSAdam Fenn  *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
376c69c6bbSAdam Fenn  */
386c69c6bbSAdam Fenn 
396c69c6bbSAdam Fenn #include <sys/param.h>
406c69c6bbSAdam Fenn #include <sys/bus.h>
416c69c6bbSAdam Fenn #include <sys/domainset.h>
426c69c6bbSAdam Fenn #include <sys/kernel.h>
436c69c6bbSAdam Fenn #include <sys/malloc.h>
446c69c6bbSAdam Fenn #include <sys/module.h>
456c69c6bbSAdam Fenn #include <sys/smp.h>
4657a8fa6fSColin Percival #include <sys/sysctl.h>
476c69c6bbSAdam Fenn 
486c69c6bbSAdam Fenn #include <vm/vm.h>
496c69c6bbSAdam Fenn #include <vm/pmap.h>
506c69c6bbSAdam Fenn #include <vm/vm_extern.h>
516c69c6bbSAdam Fenn 
526c69c6bbSAdam Fenn #include <machine/pvclock.h>
536c69c6bbSAdam Fenn #include <x86/kvm.h>
546c69c6bbSAdam Fenn 
556c69c6bbSAdam Fenn #include "clock_if.h"
566c69c6bbSAdam Fenn 
576c69c6bbSAdam Fenn #define	KVM_CLOCK_DEVNAME		"kvmclock"
586c69c6bbSAdam Fenn /*
596c69c6bbSAdam Fenn  * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
606c69c6bbSAdam Fenn  * default value of 800, and (3) below the TSC's value when it supports the
616c69c6bbSAdam Fenn  * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
626c69c6bbSAdam Fenn  */
636c69c6bbSAdam Fenn #define	KVM_CLOCK_TC_QUALITY		975
646c69c6bbSAdam Fenn 
656c69c6bbSAdam Fenn struct kvm_clock_softc {
666c69c6bbSAdam Fenn 	struct pvclock			 pvc;
676c69c6bbSAdam Fenn 	struct pvclock_wall_clock	 wc;
686c69c6bbSAdam Fenn 	struct pvclock_vcpu_time_info	*timeinfos;
696c69c6bbSAdam Fenn 	u_int				 msr_tc;
706c69c6bbSAdam Fenn 	u_int				 msr_wc;
71568f552bSMark Johnston #ifndef EARLY_AP_STARTUP
72568f552bSMark Johnston 	int				 firstcpu;
73568f552bSMark Johnston #endif
746c69c6bbSAdam Fenn };
756c69c6bbSAdam Fenn 
766c69c6bbSAdam Fenn static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
77568f552bSMark Johnston static void	kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
78568f552bSMark Johnston 		    const cpuset_t *cpus);
796c69c6bbSAdam Fenn static void	kvm_clock_system_time_enable_pcpu(void *arg);
8057a8fa6fSColin Percival static void	kvm_clock_setup_sysctl(device_t);
816c69c6bbSAdam Fenn 
826c69c6bbSAdam Fenn static struct pvclock_wall_clock *
kvm_clock_get_wallclock(void * arg)836c69c6bbSAdam Fenn kvm_clock_get_wallclock(void *arg)
846c69c6bbSAdam Fenn {
856c69c6bbSAdam Fenn 	struct kvm_clock_softc *sc = arg;
866c69c6bbSAdam Fenn 
876c69c6bbSAdam Fenn 	wrmsr(sc->msr_wc, vtophys(&sc->wc));
886c69c6bbSAdam Fenn 	return (&sc->wc);
896c69c6bbSAdam Fenn }
906c69c6bbSAdam Fenn 
916c69c6bbSAdam Fenn static void
kvm_clock_system_time_enable(struct kvm_clock_softc * sc,const cpuset_t * cpus)92568f552bSMark Johnston kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
936c69c6bbSAdam Fenn {
94568f552bSMark Johnston 	smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
95568f552bSMark Johnston 	    NULL, sc);
966c69c6bbSAdam Fenn }
976c69c6bbSAdam Fenn 
986c69c6bbSAdam Fenn static void
kvm_clock_system_time_enable_pcpu(void * arg)996c69c6bbSAdam Fenn kvm_clock_system_time_enable_pcpu(void *arg)
1006c69c6bbSAdam Fenn {
1016c69c6bbSAdam Fenn 	struct kvm_clock_softc *sc = arg;
1026c69c6bbSAdam Fenn 
1036c69c6bbSAdam Fenn 	/*
1046c69c6bbSAdam Fenn 	 * See [2]; the lsb of this MSR is the system time enable bit.
1056c69c6bbSAdam Fenn 	 */
1066c69c6bbSAdam Fenn 	wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
1076c69c6bbSAdam Fenn }
1086c69c6bbSAdam Fenn 
109568f552bSMark Johnston #ifndef EARLY_AP_STARTUP
110568f552bSMark Johnston static void
kvm_clock_init_smp(void * arg __unused)111568f552bSMark Johnston kvm_clock_init_smp(void *arg __unused)
112568f552bSMark Johnston {
113568f552bSMark Johnston 	devclass_t kvm_clock_devclass;
114568f552bSMark Johnston 	cpuset_t cpus;
115568f552bSMark Johnston 	struct kvm_clock_softc *sc;
116568f552bSMark Johnston 
117568f552bSMark Johnston 	kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
118568f552bSMark Johnston 	sc = devclass_get_softc(kvm_clock_devclass, 0);
119568f552bSMark Johnston 	if (sc == NULL || mp_ncpus == 1)
120568f552bSMark Johnston 		return;
121568f552bSMark Johnston 
122568f552bSMark Johnston 	/*
123568f552bSMark Johnston 	 * Register with the hypervisor on all CPUs except the one that
124568f552bSMark Johnston 	 * registered in kvm_clock_attach().
125568f552bSMark Johnston 	 */
126568f552bSMark Johnston 	cpus = all_cpus;
127568f552bSMark Johnston 	KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
128568f552bSMark Johnston 	    ("%s: invalid first CPU %d", __func__, sc->firstcpu));
129568f552bSMark Johnston 	CPU_CLR(sc->firstcpu, &cpus);
130568f552bSMark Johnston 	kvm_clock_system_time_enable(sc, &cpus);
131568f552bSMark Johnston }
132568f552bSMark Johnston SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
133568f552bSMark Johnston #endif
134568f552bSMark Johnston 
1356c69c6bbSAdam Fenn static void
kvm_clock_identify(driver_t * driver,device_t parent)1366c69c6bbSAdam Fenn kvm_clock_identify(driver_t *driver, device_t parent)
1376c69c6bbSAdam Fenn {
1386c69c6bbSAdam Fenn 	u_int regs[4];
1396c69c6bbSAdam Fenn 
1406c69c6bbSAdam Fenn 	kvm_cpuid_get_features(regs);
1416c69c6bbSAdam Fenn 	if ((regs[0] &
1426c69c6bbSAdam Fenn 	    (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
1436c69c6bbSAdam Fenn 		return;
1446c69c6bbSAdam Fenn 	if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
1456c69c6bbSAdam Fenn 		return;
1466c69c6bbSAdam Fenn 	BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
1476c69c6bbSAdam Fenn }
1486c69c6bbSAdam Fenn 
1496c69c6bbSAdam Fenn static int
kvm_clock_probe(device_t dev)1506c69c6bbSAdam Fenn kvm_clock_probe(device_t dev)
1516c69c6bbSAdam Fenn {
1526c69c6bbSAdam Fenn 	device_set_desc(dev, "KVM paravirtual clock");
1536c69c6bbSAdam Fenn 	return (BUS_PROBE_DEFAULT);
1546c69c6bbSAdam Fenn }
1556c69c6bbSAdam Fenn 
1566c69c6bbSAdam Fenn static int
kvm_clock_attach(device_t dev)1576c69c6bbSAdam Fenn kvm_clock_attach(device_t dev)
1586c69c6bbSAdam Fenn {
1596c69c6bbSAdam Fenn 	u_int regs[4];
1606c69c6bbSAdam Fenn 	struct kvm_clock_softc *sc = device_get_softc(dev);
1616c69c6bbSAdam Fenn 	bool stable_flag_supported;
1626c69c6bbSAdam Fenn 
1636c69c6bbSAdam Fenn 	/* Process KVM "features" CPUID leaf content: */
1646c69c6bbSAdam Fenn 	kvm_cpuid_get_features(regs);
1656c69c6bbSAdam Fenn 	if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
1666c69c6bbSAdam Fenn 		sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
1676c69c6bbSAdam Fenn 		sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
1686c69c6bbSAdam Fenn 	} else {
1696c69c6bbSAdam Fenn 		KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
1706c69c6bbSAdam Fenn 		    ("Clocksource feature flags disappeared since "
1716c69c6bbSAdam Fenn 		    "kvm_clock_identify: regs[0] %#0x.", regs[0]));
1726c69c6bbSAdam Fenn 		sc->msr_tc = KVM_MSR_SYSTEM_TIME;
1736c69c6bbSAdam Fenn 		sc->msr_wc = KVM_MSR_WALL_CLOCK;
1746c69c6bbSAdam Fenn 	}
1756c69c6bbSAdam Fenn 	stable_flag_supported =
1766c69c6bbSAdam Fenn 	    (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
1776c69c6bbSAdam Fenn 
1786c69c6bbSAdam Fenn 	/* Set up 'struct pvclock_vcpu_time_info' page(s): */
179f49fd63aSJohn Baldwin 	sc->timeinfos = kmem_malloc(mp_ncpus *
1806c69c6bbSAdam Fenn 	    sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
181568f552bSMark Johnston #ifdef EARLY_AP_STARTUP
182568f552bSMark Johnston 	kvm_clock_system_time_enable(sc, &all_cpus);
183568f552bSMark Johnston #else
184568f552bSMark Johnston 	sc->firstcpu = curcpu;
185568f552bSMark Johnston 	kvm_clock_system_time_enable_pcpu(sc);
186568f552bSMark Johnston #endif
1876c69c6bbSAdam Fenn 
1886c69c6bbSAdam Fenn 	/*
1896c69c6bbSAdam Fenn 	 * Init pvclock; register KVM clock wall clock, register KVM clock
1906c69c6bbSAdam Fenn 	 * timecounter, and set up the requisite infrastructure for vDSO access
1916c69c6bbSAdam Fenn 	 * to this timecounter.
1926c69c6bbSAdam Fenn 	 *     Regarding 'tc_flags': Since the KVM MSR documentation does not
1936c69c6bbSAdam Fenn 	 *     specifically discuss suspend/resume scenarios, conservatively
1946c69c6bbSAdam Fenn 	 *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
1956c69c6bbSAdam Fenn 	 *     time must be re-inited in such cases.
1966c69c6bbSAdam Fenn 	 */
1976c69c6bbSAdam Fenn 	sc->pvc.get_wallclock = kvm_clock_get_wallclock;
1986c69c6bbSAdam Fenn 	sc->pvc.get_wallclock_arg = sc;
1996c69c6bbSAdam Fenn 	sc->pvc.timeinfos = sc->timeinfos;
2006c69c6bbSAdam Fenn 	sc->pvc.stable_flag_supported = stable_flag_supported;
2016c69c6bbSAdam Fenn 	pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
20257a8fa6fSColin Percival 	kvm_clock_setup_sysctl(dev);
2036c69c6bbSAdam Fenn 	return (0);
2046c69c6bbSAdam Fenn }
2056c69c6bbSAdam Fenn 
2066c69c6bbSAdam Fenn static int
kvm_clock_detach(device_t dev)2076c69c6bbSAdam Fenn kvm_clock_detach(device_t dev)
2086c69c6bbSAdam Fenn {
2096c69c6bbSAdam Fenn 	struct kvm_clock_softc *sc = device_get_softc(dev);
2106c69c6bbSAdam Fenn 
2116c69c6bbSAdam Fenn 	return (pvclock_destroy(&sc->pvc));
2126c69c6bbSAdam Fenn }
2136c69c6bbSAdam Fenn 
2146c69c6bbSAdam Fenn static int
kvm_clock_suspend(device_t dev)2156c69c6bbSAdam Fenn kvm_clock_suspend(device_t dev)
2166c69c6bbSAdam Fenn {
2176c69c6bbSAdam Fenn 	return (0);
2186c69c6bbSAdam Fenn }
2196c69c6bbSAdam Fenn 
2206c69c6bbSAdam Fenn static int
kvm_clock_resume(device_t dev)2216c69c6bbSAdam Fenn kvm_clock_resume(device_t dev)
2226c69c6bbSAdam Fenn {
2236c69c6bbSAdam Fenn 	/*
2246c69c6bbSAdam Fenn 	 * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
2256c69c6bbSAdam Fenn 	 * conservatively assume that the system time must be re-inited in
2266c69c6bbSAdam Fenn 	 * suspend/resume scenarios.
2276c69c6bbSAdam Fenn 	 */
228568f552bSMark Johnston 	kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
2296c69c6bbSAdam Fenn 	pvclock_resume();
2306c69c6bbSAdam Fenn 	inittodr(time_second);
2316c69c6bbSAdam Fenn 	return (0);
2326c69c6bbSAdam Fenn }
2336c69c6bbSAdam Fenn 
2346c69c6bbSAdam Fenn static int
kvm_clock_gettime(device_t dev,struct timespec * ts)2356c69c6bbSAdam Fenn kvm_clock_gettime(device_t dev, struct timespec *ts)
2366c69c6bbSAdam Fenn {
2376c69c6bbSAdam Fenn 	struct kvm_clock_softc *sc = device_get_softc(dev);
2386c69c6bbSAdam Fenn 
2396c69c6bbSAdam Fenn 	pvclock_gettime(&sc->pvc, ts);
2406c69c6bbSAdam Fenn 	return (0);
2416c69c6bbSAdam Fenn }
2426c69c6bbSAdam Fenn 
2436c69c6bbSAdam Fenn static int
kvm_clock_settime(device_t dev,struct timespec * ts)2446c69c6bbSAdam Fenn kvm_clock_settime(device_t dev, struct timespec *ts)
2456c69c6bbSAdam Fenn {
2466c69c6bbSAdam Fenn 	/*
2476c69c6bbSAdam Fenn 	 * Even though it is not possible to set the KVM clock's wall clock, to
2486c69c6bbSAdam Fenn 	 * avoid the possibility of periodic benign error messages from
2496c69c6bbSAdam Fenn 	 * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
2506c69c6bbSAdam Fenn 	 */
2516c69c6bbSAdam Fenn 	return (0);
2526c69c6bbSAdam Fenn }
2536c69c6bbSAdam Fenn 
25457a8fa6fSColin Percival static int
kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)25557a8fa6fSColin Percival kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)
25657a8fa6fSColin Percival {
25757a8fa6fSColin Percival 	struct kvm_clock_softc *sc = oidp->oid_arg1;
25857a8fa6fSColin Percival         uint64_t freq = pvclock_tsc_freq(sc->timeinfos);
25957a8fa6fSColin Percival 
26057a8fa6fSColin Percival         return (sysctl_handle_64(oidp, &freq, 0, req));
26157a8fa6fSColin Percival }
26257a8fa6fSColin Percival 
26357a8fa6fSColin Percival static void
kvm_clock_setup_sysctl(device_t dev)26457a8fa6fSColin Percival kvm_clock_setup_sysctl(device_t dev)
26557a8fa6fSColin Percival {
26657a8fa6fSColin Percival 	struct kvm_clock_softc *sc = device_get_softc(dev);
26757a8fa6fSColin Percival         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
26857a8fa6fSColin Percival         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
26957a8fa6fSColin Percival         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
27057a8fa6fSColin Percival 
27157a8fa6fSColin Percival         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq",
27257a8fa6fSColin Percival             CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
27357a8fa6fSColin Percival             kvm_clock_tsc_freq_sysctl, "QU",
27457a8fa6fSColin Percival             "Time Stamp Counter frequency");
27557a8fa6fSColin Percival }
27657a8fa6fSColin Percival 
2776c69c6bbSAdam Fenn static device_method_t kvm_clock_methods[] = {
2786c69c6bbSAdam Fenn 	DEVMETHOD(device_identify,	kvm_clock_identify),
2796c69c6bbSAdam Fenn 	DEVMETHOD(device_probe,		kvm_clock_probe),
2806c69c6bbSAdam Fenn 	DEVMETHOD(device_attach,	kvm_clock_attach),
2816c69c6bbSAdam Fenn 	DEVMETHOD(device_detach,	kvm_clock_detach),
2826c69c6bbSAdam Fenn 	DEVMETHOD(device_suspend,	kvm_clock_suspend),
2836c69c6bbSAdam Fenn 	DEVMETHOD(device_resume,	kvm_clock_resume),
2846c69c6bbSAdam Fenn 	/* clock interface */
2856c69c6bbSAdam Fenn 	DEVMETHOD(clock_gettime,	kvm_clock_gettime),
2866c69c6bbSAdam Fenn 	DEVMETHOD(clock_settime,	kvm_clock_settime),
2876c69c6bbSAdam Fenn 
2886c69c6bbSAdam Fenn 	DEVMETHOD_END
2896c69c6bbSAdam Fenn };
2906c69c6bbSAdam Fenn 
2916c69c6bbSAdam Fenn static driver_t kvm_clock_driver = {
2926c69c6bbSAdam Fenn 	KVM_CLOCK_DEVNAME,
2936c69c6bbSAdam Fenn 	kvm_clock_methods,
2946c69c6bbSAdam Fenn 	sizeof(struct kvm_clock_softc),
2956c69c6bbSAdam Fenn };
2966c69c6bbSAdam Fenn 
29748b6e01dSJohn Baldwin DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0);
298