1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009 Adrian Chadd 5 * Copyright (c) 2012 Spectra Logic Corporation 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 /** 32 * \file dev/xen/timer/xen_timer.c 33 * \brief A timer driver for the Xen hypervisor's PV clock. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/bus.h> 40 #include <sys/kernel.h> 41 #include <sys/module.h> 42 #include <sys/time.h> 43 #include <sys/timetc.h> 44 #include <sys/timeet.h> 45 #include <sys/smp.h> 46 #include <sys/limits.h> 47 #include <sys/clock.h> 48 #include <sys/proc.h> 49 50 #include <xen/xen-os.h> 51 #include <xen/features.h> 52 #include <xen/xen_intr.h> 53 #include <xen/hypervisor.h> 54 #include <contrib/xen/io/xenbus.h> 55 #include <contrib/xen/vcpu.h> 56 #include <xen/error.h> 57 58 #include <machine/cpu.h> 59 #include <machine/cpufunc.h> 60 #include <machine/clock.h> 61 #include <machine/_inttypes.h> 62 #include <machine/smp.h> 63 #include <machine/pvclock.h> 64 65 #include <dev/xen/timer/timer.h> 66 67 #include "clock_if.h" 68 69 #define NSEC_IN_SEC 1000000000ULL 70 #define NSEC_IN_USEC 1000ULL 71 /* 18446744073 = int(2^64 / NSEC_IN_SC) = 1 ns in 64-bit fractions */ 72 #define FRAC_IN_NSEC 18446744073LL 73 74 /* Xen timers may fire up to 100us off */ 75 #define XENTIMER_MIN_PERIOD_IN_NSEC 100*NSEC_IN_USEC 76 77 /* 78 * The real resolution of the PV clock is 1ns, but the highest 79 * resolution that FreeBSD supports is 1us, so just use that. 80 */ 81 #define XENCLOCK_RESOLUTION 1 82 83 #define XENTIMER_QUALITY 950 84 85 struct xentimer_pcpu_data { 86 uint64_t timer; 87 uint64_t last_processed; 88 void *irq_handle; 89 }; 90 91 DPCPU_DEFINE(struct xentimer_pcpu_data, xentimer_pcpu); 92 93 DPCPU_DECLARE(struct vcpu_info *, vcpu_info); 94 95 struct xentimer_softc { 96 device_t dev; 97 struct timecounter tc; 98 struct eventtimer et; 99 }; 100 101 static void 102 xentimer_identify(driver_t *driver, device_t parent) 103 { 104 if (!xen_domain()) 105 return; 106 107 /* Handle all Xen PV timers in one device instance. */ 108 if (devclass_get_device(devclass_find(driver->name), 0)) 109 return; 110 111 BUS_ADD_CHILD(parent, 0, driver->name, 0); 112 } 113 114 static int 115 xentimer_probe(device_t dev) 116 { 117 KASSERT((xen_domain()), ("Trying to use Xen timer on bare metal")); 118 /* 119 * In order to attach, this driver requires the following: 120 * - Vector callback support by the hypervisor, in order to deliver 121 * timer interrupts to the correct CPU for CPUs other than 0. 122 * - Access to the hypervisor shared info page, in order to look up 123 * each VCPU's timer information and the Xen wallclock time. 124 * - The hypervisor must say its PV clock is "safe" to use. 125 * - The hypervisor must support VCPUOP hypercalls. 126 * - The maximum number of CPUs supported by FreeBSD must not exceed 127 * the number of VCPUs supported by the hypervisor. 128 */ 129 #define XTREQUIRES(condition, reason...) \ 130 if (!(condition)) { \ 131 device_printf(dev, ## reason); \ 132 device_detach(dev); \ 133 return (ENXIO); \ 134 } 135 136 if (xen_hvm_domain()) { 137 XTREQUIRES(xen_vector_callback_enabled, 138 "vector callbacks unavailable\n"); 139 XTREQUIRES(xen_feature(XENFEAT_hvm_safe_pvclock), 140 "HVM safe pvclock unavailable\n"); 141 } 142 XTREQUIRES(HYPERVISOR_shared_info != NULL, 143 "shared info page unavailable\n"); 144 XTREQUIRES(HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL) == 0, 145 "VCPUOPs interface unavailable\n"); 146 #undef XTREQUIRES 147 device_set_desc(dev, "Xen PV Clock"); 148 return (BUS_PROBE_NOWILDCARD); 149 } 150 151 /** 152 * \brief Get the current time, in nanoseconds, since the hypervisor booted. 153 * 154 * \param vcpu vcpu_info structure to fetch the time from. 155 * 156 */ 157 static uint64_t 158 xen_fetch_vcpu_time(struct vcpu_info *vcpu) 159 { 160 struct pvclock_vcpu_time_info *time; 161 162 time = (struct pvclock_vcpu_time_info *) &vcpu->time; 163 164 return (pvclock_get_timecount(time)); 165 } 166 167 static uint32_t 168 xentimer_get_timecount(struct timecounter *tc) 169 { 170 uint64_t vcpu_time; 171 172 /* 173 * We don't disable preemption here because the worst that can 174 * happen is reading the vcpu_info area of a different CPU than 175 * the one we are currently running on, but that would also 176 * return a valid tc (and we avoid the overhead of 177 * critical_{enter/exit} calls). 178 */ 179 vcpu_time = xen_fetch_vcpu_time(DPCPU_GET(vcpu_info)); 180 181 return (vcpu_time & UINT32_MAX); 182 } 183 184 /** 185 * \brief Fetch the hypervisor boot time, known as the "Xen wallclock". 186 * 187 * \param ts Timespec to store the current stable value. 188 * \param version Pointer to store the corresponding wallclock version. 189 * 190 * \note This value is updated when Domain-0 shifts its clock to follow 191 * clock drift, e.g. as detected by NTP. 192 */ 193 static void 194 xen_fetch_wallclock(struct timespec *ts) 195 { 196 shared_info_t *src = HYPERVISOR_shared_info; 197 struct pvclock_wall_clock *wc; 198 199 wc = (struct pvclock_wall_clock *) &src->wc_version; 200 201 pvclock_get_wallclock(wc, ts); 202 } 203 204 static void 205 xen_fetch_uptime(struct timespec *ts) 206 { 207 uint64_t uptime; 208 209 uptime = xen_fetch_vcpu_time(DPCPU_GET(vcpu_info)); 210 211 ts->tv_sec = uptime / NSEC_IN_SEC; 212 ts->tv_nsec = uptime % NSEC_IN_SEC; 213 } 214 215 static int 216 xentimer_settime(device_t dev __unused, struct timespec *ts) 217 { 218 struct xen_platform_op settime; 219 int ret; 220 221 /* 222 * Don't return EINVAL here; just silently fail if the domain isn't 223 * privileged enough to set the TOD. 224 */ 225 if (!xen_initial_domain()) 226 return (0); 227 228 settime.cmd = XENPF_settime64; 229 settime.u.settime64.mbz = 0; 230 settime.u.settime64.secs = ts->tv_sec; 231 settime.u.settime64.nsecs = ts->tv_nsec; 232 settime.u.settime64.system_time = 233 xen_fetch_vcpu_time(DPCPU_GET(vcpu_info)); 234 235 ret = HYPERVISOR_platform_op(&settime); 236 ret = ret != 0 ? xen_translate_error(ret) : 0; 237 if (ret != 0 && bootverbose) 238 device_printf(dev, "failed to set Xen PV clock: %d\n", ret); 239 240 return (ret); 241 } 242 243 /** 244 * \brief Return current time according to the Xen Hypervisor wallclock. 245 * 246 * \param dev Xentimer device. 247 * \param ts Pointer to store the wallclock time. 248 * 249 * \note The Xen time structures document the hypervisor start time and the 250 * uptime-since-hypervisor-start (in nsec.) They need to be combined 251 * in order to calculate a TOD clock. 252 */ 253 static int 254 xentimer_gettime(device_t dev, struct timespec *ts) 255 { 256 struct timespec u_ts; 257 258 timespecclear(ts); 259 xen_fetch_wallclock(ts); 260 xen_fetch_uptime(&u_ts); 261 timespecadd(ts, &u_ts, ts); 262 263 return (0); 264 } 265 266 /** 267 * \brief Handle a timer interrupt for the Xen PV timer driver. 268 * 269 * \param arg Xen timer driver softc that is expecting the interrupt. 270 */ 271 static int 272 xentimer_intr(void *arg) 273 { 274 struct xentimer_softc *sc = (struct xentimer_softc *)arg; 275 struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu); 276 277 pcpu->last_processed = xen_fetch_vcpu_time(DPCPU_GET(vcpu_info)); 278 if (pcpu->timer != 0 && sc->et.et_active) 279 sc->et.et_event_cb(&sc->et, sc->et.et_arg); 280 281 return (FILTER_HANDLED); 282 } 283 284 static int 285 xentimer_vcpu_start_timer(int vcpu, uint64_t next_time) 286 { 287 struct vcpu_set_singleshot_timer single; 288 289 single.timeout_abs_ns = next_time; 290 /* Get an event anyway, even if the timeout is already expired */ 291 single.flags = 0; 292 return (HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, vcpu, &single)); 293 } 294 295 static int 296 xentimer_vcpu_stop_timer(int vcpu) 297 { 298 299 return (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, vcpu, NULL)); 300 } 301 302 /** 303 * \brief Set the next oneshot time for the current CPU. 304 * 305 * \param et Xen timer driver event timer to schedule on. 306 * \param first Delta to the next time to schedule the interrupt for. 307 * \param period Not used. 308 * 309 * \note See eventtimers(9) for more information. 310 * \note 311 * 312 * \returns 0 313 */ 314 static int 315 xentimer_et_start(struct eventtimer *et, 316 sbintime_t first, sbintime_t period) 317 { 318 int error; 319 struct xentimer_softc *sc = et->et_priv; 320 int cpu = PCPU_GET(vcpu_id); 321 struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu); 322 struct vcpu_info *vcpu = DPCPU_GET(vcpu_info); 323 uint64_t first_in_ns, next_time; 324 #ifdef INVARIANTS 325 struct thread *td = curthread; 326 #endif 327 328 KASSERT(td->td_critnest != 0, 329 ("xentimer_et_start called without preemption disabled")); 330 331 /* See sbttots() for this formula. */ 332 first_in_ns = (((first >> 32) * NSEC_IN_SEC) + 333 (((uint64_t)NSEC_IN_SEC * (uint32_t)first) >> 32)); 334 335 next_time = xen_fetch_vcpu_time(vcpu) + first_in_ns; 336 error = xentimer_vcpu_start_timer(cpu, next_time); 337 if (error) 338 panic("%s: Error %d setting singleshot timer to %"PRIu64"\n", 339 device_get_nameunit(sc->dev), error, next_time); 340 341 pcpu->timer = next_time; 342 return (error); 343 } 344 345 /** 346 * \brief Cancel the event timer's currently running timer, if any. 347 */ 348 static int 349 xentimer_et_stop(struct eventtimer *et) 350 { 351 int cpu = PCPU_GET(vcpu_id); 352 struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu); 353 354 pcpu->timer = 0; 355 return (xentimer_vcpu_stop_timer(cpu)); 356 } 357 358 /** 359 * \brief Attach a Xen PV timer driver instance. 360 * 361 * \param dev Bus device object to attach. 362 * 363 * \note 364 * \returns EINVAL 365 */ 366 static int 367 xentimer_attach(device_t dev) 368 { 369 struct xentimer_softc *sc = device_get_softc(dev); 370 int error, i; 371 372 sc->dev = dev; 373 374 /* Bind an event channel to a VIRQ on each VCPU. */ 375 CPU_FOREACH(i) { 376 struct xentimer_pcpu_data *pcpu; 377 378 pcpu = DPCPU_ID_PTR(i, xentimer_pcpu); 379 error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL); 380 if (error) { 381 device_printf(dev, "Error disabling Xen periodic timer " 382 "on CPU %d\n", i); 383 return (error); 384 } 385 386 error = xen_intr_bind_virq(dev, VIRQ_TIMER, i, xentimer_intr, 387 NULL, sc, INTR_TYPE_CLK, &pcpu->irq_handle); 388 if (error) { 389 device_printf(dev, "Error %d binding VIRQ_TIMER " 390 "to VCPU %d\n", error, i); 391 return (error); 392 } 393 xen_intr_describe(pcpu->irq_handle, "c%d", i); 394 } 395 396 /* Register the event timer. */ 397 sc->et.et_name = "XENTIMER"; 398 sc->et.et_quality = XENTIMER_QUALITY; 399 sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; 400 sc->et.et_frequency = NSEC_IN_SEC; 401 /* See tstosbt() for this formula */ 402 sc->et.et_min_period = (XENTIMER_MIN_PERIOD_IN_NSEC * 403 (((uint64_t)1 << 63) / 500000000) >> 32); 404 sc->et.et_max_period = ((sbintime_t)4 << 32); 405 sc->et.et_start = xentimer_et_start; 406 sc->et.et_stop = xentimer_et_stop; 407 sc->et.et_priv = sc; 408 et_register(&sc->et); 409 410 /* Register the timecounter. */ 411 sc->tc.tc_name = "XENTIMER"; 412 sc->tc.tc_quality = XENTIMER_QUALITY; 413 /* 414 * FIXME: due to the lack of ordering during resume, FreeBSD cannot 415 * guarantee that the Xen PV timer is resumed before any other device 416 * attempts to make use of it, so mark it as not safe for suspension 417 * (ie: remove the TC_FLAGS_SUSPEND_SAFE flag). 418 * 419 * NB: This was not a problem in previous FreeBSD versions because the 420 * timer was directly attached to the nexus, but it is an issue now 421 * that the timer is attached to the xenpv bus, and thus resumed 422 * later. 423 * 424 * sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE; 425 */ 426 /* 427 * The underlying resolution is in nanoseconds, since the timer info 428 * scales TSC frequencies using a fraction that represents time in 429 * terms of nanoseconds. 430 */ 431 sc->tc.tc_frequency = NSEC_IN_SEC; 432 sc->tc.tc_counter_mask = ~0u; 433 sc->tc.tc_get_timecount = xentimer_get_timecount; 434 sc->tc.tc_priv = sc; 435 tc_init(&sc->tc); 436 437 /* Register the Hypervisor wall clock */ 438 clock_register(dev, XENCLOCK_RESOLUTION); 439 440 return (0); 441 } 442 443 static int 444 xentimer_detach(device_t dev) 445 { 446 447 /* Implement Xen PV clock teardown - XXX see hpet_detach ? */ 448 /* If possible: 449 * 1. need to deregister timecounter 450 * 2. need to deregister event timer 451 * 3. need to deregister virtual IRQ event channels 452 */ 453 return (EBUSY); 454 } 455 456 static void 457 xentimer_percpu_resume(void *arg) 458 { 459 device_t dev = (device_t) arg; 460 struct xentimer_softc *sc = device_get_softc(dev); 461 462 xentimer_et_start(&sc->et, sc->et.et_min_period, 0); 463 } 464 465 static int 466 xentimer_resume(device_t dev) 467 { 468 int error; 469 int i; 470 471 /* Disable the periodic timer */ 472 CPU_FOREACH(i) { 473 error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL); 474 if (error != 0) { 475 device_printf(dev, 476 "Error disabling Xen periodic timer on CPU %d\n", 477 i); 478 return (error); 479 } 480 } 481 482 /* Reset the last uptime value */ 483 pvclock_resume(); 484 485 /* Reset the RTC clock */ 486 inittodr(time_second); 487 488 /* Kick the timers on all CPUs */ 489 smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev); 490 491 if (bootverbose) 492 device_printf(dev, "resumed operation after suspension\n"); 493 494 return (0); 495 } 496 497 static int 498 xentimer_suspend(device_t dev) 499 { 500 return (0); 501 } 502 503 /* 504 * Xen early clock init 505 */ 506 void 507 xen_clock_init(void) 508 { 509 } 510 511 /* 512 * Xen PV DELAY function 513 * 514 * When running on PVH mode we don't have an emulated i8524, so 515 * make use of the Xen time info in order to code a simple DELAY 516 * function that can be used during early boot. 517 */ 518 void 519 xen_delay(int n) 520 { 521 struct vcpu_info *vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; 522 uint64_t end_ns; 523 uint64_t current; 524 525 end_ns = xen_fetch_vcpu_time(vcpu); 526 end_ns += n * NSEC_IN_USEC; 527 528 for (;;) { 529 current = xen_fetch_vcpu_time(vcpu); 530 if (current >= end_ns) 531 break; 532 } 533 } 534 535 static device_method_t xentimer_methods[] = { 536 DEVMETHOD(device_identify, xentimer_identify), 537 DEVMETHOD(device_probe, xentimer_probe), 538 DEVMETHOD(device_attach, xentimer_attach), 539 DEVMETHOD(device_detach, xentimer_detach), 540 DEVMETHOD(device_suspend, xentimer_suspend), 541 DEVMETHOD(device_resume, xentimer_resume), 542 /* clock interface */ 543 DEVMETHOD(clock_gettime, xentimer_gettime), 544 DEVMETHOD(clock_settime, xentimer_settime), 545 DEVMETHOD_END 546 }; 547 548 static driver_t xentimer_driver = { 549 "xen_et", 550 xentimer_methods, 551 sizeof(struct xentimer_softc), 552 }; 553 554 DRIVER_MODULE(xentimer, xenpv, xentimer_driver, 0, 0); 555 MODULE_DEPEND(xentimer, xenpv, 1, 1, 1); 556