xref: /freebsd/sys/amd64/vmm/io/vhpet.c (revision 2ff63af9b88c7413b7d71715b5532625752a248e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_bhyve_snapshot.h"
34 
35 #include <sys/param.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/systm.h>
41 
42 #include <dev/acpica/acpi_hpet.h>
43 
44 #include <machine/vmm.h>
45 #include <machine/vmm_dev.h>
46 #include <machine/vmm_snapshot.h>
47 
48 #include "vmm_lapic.h"
49 #include "vatpic.h"
50 #include "vioapic.h"
51 #include "vhpet.h"
52 
53 #include "vmm_ktr.h"
54 
55 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
56 
57 #define	HPET_FREQ	16777216		/* 16.7 (2^24) Mhz */
58 #define	FS_PER_S	1000000000000000ul
59 
60 /* Timer N Configuration and Capabilities Register */
61 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
62 				 HPET_TCAP_FSB_INT_DEL	|		\
63 				 HPET_TCAP_SIZE		|		\
64 				 HPET_TCAP_PER_INT)
65 /*
66  * HPET requires at least 3 timers and up to 32 timers per block.
67  */
68 #define	VHPET_NUM_TIMERS	8
69 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
70 
71 struct vhpet_callout_arg {
72 	struct vhpet *vhpet;
73 	int timer_num;
74 };
75 
76 struct vhpet {
77 	struct vm	*vm;
78 	struct mtx	mtx;
79 	sbintime_t	freq_sbt;
80 
81 	uint64_t	config;		/* Configuration */
82 	uint64_t	isr;		/* Interrupt Status */
83 	uint32_t	countbase;	/* HPET counter base value */
84 	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
85 
86 	struct {
87 		uint64_t	cap_config;	/* Configuration */
88 		uint64_t	msireg;		/* FSB interrupt routing */
89 		uint32_t	compval;	/* Comparator */
90 		uint32_t	comprate;
91 		struct callout	callout;
92 		sbintime_t	callout_sbt;	/* time when counter==compval */
93 		struct vhpet_callout_arg arg;
94 	} timer[VHPET_NUM_TIMERS];
95 };
96 
97 #define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
98 #define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
99 
100 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
101     sbintime_t now);
102 
103 static uint64_t
104 vhpet_capabilities(void)
105 {
106 	uint64_t cap = 0;
107 
108 	cap |= 0x8086 << 16;			/* vendor id */
109 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
110 	cap |= 1;				/* revision */
111 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
112 
113 	cap &= 0xffffffff;
114 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
115 
116 	return (cap);
117 }
118 
119 static __inline bool
120 vhpet_counter_enabled(struct vhpet *vhpet)
121 {
122 
123 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
124 }
125 
126 static __inline bool
127 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
128 {
129 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
130 
131 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
132 		return (true);
133 	else
134 		return (false);
135 }
136 
137 static __inline int
138 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
139 {
140 	/*
141 	 * If the timer is configured to use MSI then treat it as if the
142 	 * timer is not connected to the ioapic.
143 	 */
144 	if (vhpet_timer_msi_enabled(vhpet, n))
145 		return (0);
146 
147 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
148 }
149 
150 static uint32_t
151 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
152 {
153 	uint32_t val;
154 	sbintime_t now, delta;
155 
156 	val = vhpet->countbase;
157 	if (vhpet_counter_enabled(vhpet)) {
158 		now = sbinuptime();
159 		delta = now - vhpet->countbase_sbt;
160 		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
161 		    "%#lx to %#lx", vhpet->countbase_sbt, now));
162 		val += delta / vhpet->freq_sbt;
163 		if (nowptr != NULL)
164 			*nowptr = now;
165 	} else {
166 		/*
167 		 * The sbinuptime corresponding to the 'countbase' is
168 		 * meaningless when the counter is disabled. Make sure
169 		 * that the caller doesn't want to use it.
170 		 */
171 		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
172 	}
173 	return (val);
174 }
175 
176 static void
177 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
178 {
179 	int pin;
180 
181 	if (vhpet->isr & (1 << n)) {
182 		pin = vhpet_timer_ioapic_pin(vhpet, n);
183 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
184 		vioapic_deassert_irq(vhpet->vm, pin);
185 		vhpet->isr &= ~(1 << n);
186 	}
187 }
188 
189 static __inline bool
190 vhpet_periodic_timer(struct vhpet *vhpet, int n)
191 {
192 
193 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
194 }
195 
196 static __inline bool
197 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
198 {
199 
200 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
201 }
202 
203 static __inline bool
204 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
205 {
206 
207 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
208 	    "timer %d is using MSI", n));
209 
210 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
211 		return (true);
212 	else
213 		return (false);
214 }
215 
216 static void
217 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
218 {
219 	int pin;
220 
221 	/* If interrupts are not enabled for this timer then just return. */
222 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
223 		return;
224 
225 	/*
226 	 * If a level triggered interrupt is already asserted then just return.
227 	 */
228 	if ((vhpet->isr & (1 << n)) != 0) {
229 		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
230 		return;
231 	}
232 
233 	if (vhpet_timer_msi_enabled(vhpet, n)) {
234 		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
235 		    vhpet->timer[n].msireg & 0xffffffff);
236 		return;
237 	}
238 
239 	pin = vhpet_timer_ioapic_pin(vhpet, n);
240 	if (pin == 0) {
241 		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
242 		return;
243 	}
244 
245 	if (vhpet_timer_edge_trig(vhpet, n)) {
246 		vioapic_pulse_irq(vhpet->vm, pin);
247 	} else {
248 		vhpet->isr |= 1 << n;
249 		vioapic_assert_irq(vhpet->vm, pin);
250 	}
251 }
252 
253 static void
254 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
255 {
256 	uint32_t compval, comprate, compnext;
257 
258 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
259 
260 	compval = vhpet->timer[n].compval;
261 	comprate = vhpet->timer[n].comprate;
262 
263 	/*
264 	 * Calculate the comparator value to be used for the next periodic
265 	 * interrupt.
266 	 *
267 	 * This function is commonly called from the callout handler.
268 	 * In this scenario the 'counter' is ahead of 'compval'. To find
269 	 * the next value to program into the accumulator we divide the
270 	 * number space between 'compval' and 'counter' into 'comprate'
271 	 * sized units. The 'compval' is rounded up such that is "ahead"
272 	 * of 'counter'.
273 	 */
274 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
275 
276 	vhpet->timer[n].compval = compnext;
277 }
278 
279 static void
280 vhpet_handler(void *a)
281 {
282 	int n;
283 	uint32_t counter;
284 	sbintime_t now;
285 	struct vhpet *vhpet;
286 	struct callout *callout;
287 	struct vhpet_callout_arg *arg;
288 
289 	arg = a;
290 	vhpet = arg->vhpet;
291 	n = arg->timer_num;
292 	callout = &vhpet->timer[n].callout;
293 
294 	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
295 
296 	VHPET_LOCK(vhpet);
297 
298 	if (callout_pending(callout))		/* callout was reset */
299 		goto done;
300 
301 	if (!callout_active(callout))		/* callout was stopped */
302 		goto done;
303 
304 	callout_deactivate(callout);
305 
306 	if (!vhpet_counter_enabled(vhpet))
307 		panic("vhpet(%p) callout with counter disabled", vhpet);
308 
309 	counter = vhpet_counter(vhpet, &now);
310 	vhpet_start_timer(vhpet, n, counter, now);
311 	vhpet_timer_interrupt(vhpet, n);
312 done:
313 	VHPET_UNLOCK(vhpet);
314 	return;
315 }
316 
317 static void
318 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
319 {
320 
321 	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
322 	callout_stop(&vhpet->timer[n].callout);
323 
324 	/*
325 	 * If the callout was scheduled to expire in the past but hasn't
326 	 * had a chance to execute yet then trigger the timer interrupt
327 	 * here. Failing to do so will result in a missed timer interrupt
328 	 * in the guest. This is especially bad in one-shot mode because
329 	 * the next interrupt has to wait for the counter to wrap around.
330 	 */
331 	if (vhpet->timer[n].callout_sbt < now) {
332 		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
333 		    "stopping timer", n);
334 		vhpet_timer_interrupt(vhpet, n);
335 	}
336 }
337 
338 static void
339 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
340 {
341 	sbintime_t delta, precision;
342 
343 	if (vhpet->timer[n].comprate != 0)
344 		vhpet_adjust_compval(vhpet, n, counter);
345 	else {
346 		/*
347 		 * In one-shot mode it is the guest's responsibility to make
348 		 * sure that the comparator value is not in the "past". The
349 		 * hardware doesn't have any belt-and-suspenders to deal with
350 		 * this so we don't either.
351 		 */
352 	}
353 
354 	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
355 	precision = delta >> tc_precexp;
356 	vhpet->timer[n].callout_sbt = now + delta;
357 	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
358 	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
359 }
360 
361 static void
362 vhpet_start_counting(struct vhpet *vhpet)
363 {
364 	int i;
365 
366 	vhpet->countbase_sbt = sbinuptime();
367 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
368 		/*
369 		 * Restart the timers based on the value of the main counter
370 		 * when it stopped counting.
371 		 */
372 		vhpet_start_timer(vhpet, i, vhpet->countbase,
373 		    vhpet->countbase_sbt);
374 	}
375 }
376 
377 static void
378 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
379 {
380 	int i;
381 
382 	vhpet->countbase = counter;
383 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
384 		vhpet_stop_timer(vhpet, i, now);
385 }
386 
387 static __inline void
388 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
389 {
390 
391 	*regptr &= ~mask;
392 	*regptr |= (data & mask);
393 }
394 
395 static void
396 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
397     uint64_t mask)
398 {
399 	bool clear_isr;
400 	int old_pin, new_pin;
401 	uint32_t allowed_irqs;
402 	uint64_t oldval, newval;
403 
404 	if (vhpet_timer_msi_enabled(vhpet, n) ||
405 	    vhpet_timer_edge_trig(vhpet, n)) {
406 		if (vhpet->isr & (1 << n))
407 			panic("vhpet timer %d isr should not be asserted", n);
408 	}
409 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
410 	oldval = vhpet->timer[n].cap_config;
411 
412 	newval = oldval;
413 	update_register(&newval, data, mask);
414 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
415 	newval |= oldval & HPET_TCAP_RO_MASK;
416 
417 	if (newval == oldval)
418 		return;
419 
420 	vhpet->timer[n].cap_config = newval;
421 	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
422 
423 	/*
424 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
425 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
426 	 * it to the default value of 0.
427 	 */
428 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
429 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
430 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
431 		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
432 		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
433 		new_pin = 0;
434 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
435 	}
436 
437 	if (!vhpet_periodic_timer(vhpet, n))
438 		vhpet->timer[n].comprate = 0;
439 
440 	/*
441 	 * If the timer's ISR bit is set then clear it in the following cases:
442 	 * - interrupt is disabled
443 	 * - interrupt type is changed from level to edge or fsb.
444 	 * - interrupt routing is changed
445 	 *
446 	 * This is to ensure that this timer's level triggered interrupt does
447 	 * not remain asserted forever.
448 	 */
449 	if (vhpet->isr & (1 << n)) {
450 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
451 		    n, old_pin));
452 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
453 			clear_isr = true;
454 		else if (vhpet_timer_msi_enabled(vhpet, n))
455 			clear_isr = true;
456 		else if (vhpet_timer_edge_trig(vhpet, n))
457 			clear_isr = true;
458 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
459 			clear_isr = true;
460 		else
461 			clear_isr = false;
462 
463 		if (clear_isr) {
464 			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
465 			    "configuration change", n);
466 			vioapic_deassert_irq(vhpet->vm, old_pin);
467 			vhpet->isr &= ~(1 << n);
468 		}
469 	}
470 }
471 
472 int
473 vhpet_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t val, int size,
474     void *arg)
475 {
476 	struct vhpet *vhpet;
477 	uint64_t data, mask, oldval, val64;
478 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
479 	sbintime_t now, *nowptr;
480 	int i, offset;
481 
482 	vhpet = vm_hpet(vcpu_vm(vcpu));
483 	offset = gpa - VHPET_BASE;
484 
485 	VHPET_LOCK(vhpet);
486 
487 	/* Accesses to the HPET should be 4 or 8 bytes wide */
488 	switch (size) {
489 	case 8:
490 		mask = 0xffffffffffffffff;
491 		data = val;
492 		break;
493 	case 4:
494 		mask = 0xffffffff;
495 		data = val;
496 		if ((offset & 0x4) != 0) {
497 			mask <<= 32;
498 			data <<= 32;
499 		}
500 		break;
501 	default:
502 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
503 		    "offset 0x%08x, size %d", offset, size);
504 		goto done;
505 	}
506 
507 	/* Access to the HPET should be naturally aligned to its width */
508 	if (offset & (size - 1)) {
509 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
510 		    "offset 0x%08x, size %d", offset, size);
511 		goto done;
512 	}
513 
514 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
515 		/*
516 		 * Get the most recent value of the counter before updating
517 		 * the 'config' register. If the HPET is going to be disabled
518 		 * then we need to update 'countbase' with the value right
519 		 * before it is disabled.
520 		 */
521 		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
522 		counter = vhpet_counter(vhpet, nowptr);
523 		oldval = vhpet->config;
524 		update_register(&vhpet->config, data, mask);
525 
526 		/*
527 		 * LegacyReplacement Routing is not supported so clear the
528 		 * bit explicitly.
529 		 */
530 		vhpet->config &= ~HPET_CNF_LEG_RT;
531 
532 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
533 			if (vhpet_counter_enabled(vhpet)) {
534 				vhpet_start_counting(vhpet);
535 				VM_CTR0(vhpet->vm, "hpet enabled");
536 			} else {
537 				vhpet_stop_counting(vhpet, counter, now);
538 				VM_CTR0(vhpet->vm, "hpet disabled");
539 			}
540 		}
541 		goto done;
542 	}
543 
544 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
545 		isr_clear_mask = vhpet->isr & data;
546 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
547 			if ((isr_clear_mask & (1 << i)) != 0) {
548 				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
549 				vhpet_timer_clear_isr(vhpet, i);
550 			}
551 		}
552 		goto done;
553 	}
554 
555 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
556 		/* Zero-extend the counter to 64-bits before updating it */
557 		val64 = vhpet_counter(vhpet, NULL);
558 		update_register(&val64, data, mask);
559 		vhpet->countbase = val64;
560 		if (vhpet_counter_enabled(vhpet))
561 			vhpet_start_counting(vhpet);
562 		goto done;
563 	}
564 
565 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
566 		if (offset == HPET_TIMER_CAP_CNF(i) ||
567 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
568 			vhpet_timer_update_config(vhpet, i, data, mask);
569 			break;
570 		}
571 
572 		if (offset == HPET_TIMER_COMPARATOR(i) ||
573 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
574 			old_compval = vhpet->timer[i].compval;
575 			old_comprate = vhpet->timer[i].comprate;
576 			if (vhpet_periodic_timer(vhpet, i)) {
577 				/*
578 				 * In periodic mode writes to the comparator
579 				 * change the 'compval' register only if the
580 				 * HPET_TCNF_VAL_SET bit is set in the config
581 				 * register.
582 				 */
583 				val64 = vhpet->timer[i].comprate;
584 				update_register(&val64, data, mask);
585 				vhpet->timer[i].comprate = val64;
586 				if ((vhpet->timer[i].cap_config &
587 				    HPET_TCNF_VAL_SET) != 0) {
588 					vhpet->timer[i].compval = val64;
589 				}
590 			} else {
591 				KASSERT(vhpet->timer[i].comprate == 0,
592 				    ("vhpet one-shot timer %d has invalid "
593 				    "rate %u", i, vhpet->timer[i].comprate));
594 				val64 = vhpet->timer[i].compval;
595 				update_register(&val64, data, mask);
596 				vhpet->timer[i].compval = val64;
597 			}
598 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
599 
600 			if (vhpet->timer[i].compval != old_compval ||
601 			    vhpet->timer[i].comprate != old_comprate) {
602 				if (vhpet_counter_enabled(vhpet)) {
603 					counter = vhpet_counter(vhpet, &now);
604 					vhpet_start_timer(vhpet, i, counter,
605 					    now);
606 				}
607 			}
608 			break;
609 		}
610 
611 		if (offset == HPET_TIMER_FSB_VAL(i) ||
612 		    offset == HPET_TIMER_FSB_ADDR(i)) {
613 			update_register(&vhpet->timer[i].msireg, data, mask);
614 			break;
615 		}
616 	}
617 done:
618 	VHPET_UNLOCK(vhpet);
619 	return (0);
620 }
621 
622 int
623 vhpet_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size,
624     void *arg)
625 {
626 	int i, offset;
627 	struct vhpet *vhpet;
628 	uint64_t data;
629 
630 	vhpet = vm_hpet(vcpu_vm(vcpu));
631 	offset = gpa - VHPET_BASE;
632 
633 	VHPET_LOCK(vhpet);
634 
635 	/* Accesses to the HPET should be 4 or 8 bytes wide */
636 	if (size != 4 && size != 8) {
637 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
638 		    "offset 0x%08x, size %d", offset, size);
639 		data = 0;
640 		goto done;
641 	}
642 
643 	/* Access to the HPET should be naturally aligned to its width */
644 	if (offset & (size - 1)) {
645 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
646 		    "offset 0x%08x, size %d", offset, size);
647 		data = 0;
648 		goto done;
649 	}
650 
651 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
652 		data = vhpet_capabilities();
653 		goto done;
654 	}
655 
656 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
657 		data = vhpet->config;
658 		goto done;
659 	}
660 
661 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
662 		data = vhpet->isr;
663 		goto done;
664 	}
665 
666 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
667 		data = vhpet_counter(vhpet, NULL);
668 		goto done;
669 	}
670 
671 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
672 		if (offset == HPET_TIMER_CAP_CNF(i) ||
673 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
674 			data = vhpet->timer[i].cap_config;
675 			break;
676 		}
677 
678 		if (offset == HPET_TIMER_COMPARATOR(i) ||
679 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
680 			data = vhpet->timer[i].compval;
681 			break;
682 		}
683 
684 		if (offset == HPET_TIMER_FSB_VAL(i) ||
685 		    offset == HPET_TIMER_FSB_ADDR(i)) {
686 			data = vhpet->timer[i].msireg;
687 			break;
688 		}
689 	}
690 
691 	if (i >= VHPET_NUM_TIMERS)
692 		data = 0;
693 done:
694 	VHPET_UNLOCK(vhpet);
695 
696 	if (size == 4) {
697 		if (offset & 0x4)
698 			data >>= 32;
699 	}
700 	*rval = data;
701 	return (0);
702 }
703 
704 struct vhpet *
705 vhpet_init(struct vm *vm)
706 {
707 	int i, pincount;
708 	struct vhpet *vhpet;
709 	uint64_t allowed_irqs;
710 	struct vhpet_callout_arg *arg;
711 	struct bintime bt;
712 
713 	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
714         vhpet->vm = vm;
715 	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
716 
717 	FREQ2BT(HPET_FREQ, &bt);
718 	vhpet->freq_sbt = bttosbt(bt);
719 
720 	pincount = vioapic_pincount(vm);
721 	if (pincount >= 32)
722 		allowed_irqs = 0xff000000;	/* irqs 24-31 */
723 	else if (pincount >= 20)
724 		allowed_irqs = 0xf << (pincount - 4);	/* 4 upper irqs */
725 	else
726 		allowed_irqs = 0;
727 
728 	/*
729 	 * Initialize HPET timer hardware state.
730 	 */
731 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
732 		vhpet->timer[i].cap_config = allowed_irqs << 32;
733 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
734 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
735 
736 		vhpet->timer[i].compval = 0xffffffff;
737 		callout_init(&vhpet->timer[i].callout, 1);
738 
739 		arg = &vhpet->timer[i].arg;
740 		arg->vhpet = vhpet;
741 		arg->timer_num = i;
742 	}
743 
744 	return (vhpet);
745 }
746 
747 void
748 vhpet_cleanup(struct vhpet *vhpet)
749 {
750 	int i;
751 
752 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
753 		callout_drain(&vhpet->timer[i].callout);
754 
755 	mtx_destroy(&vhpet->mtx);
756 	free(vhpet, M_VHPET);
757 }
758 
759 int
760 vhpet_getcap(struct vm_hpet_cap *cap)
761 {
762 
763 	cap->capabilities = vhpet_capabilities();
764 	return (0);
765 }
766 
767 #ifdef BHYVE_SNAPSHOT
768 int
769 vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
770 {
771 	int i, ret;
772 	uint32_t countbase;
773 
774 	SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
775 	SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
776 	SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
777 
778 	/* at restore time the countbase should have the value it had when the
779 	 * snapshot was created; since the value is not directly kept in
780 	 * vhpet->countbase, but rather computed relative to the current system
781 	 * uptime using countbase_sbt, save the value retured by vhpet_counter
782 	 */
783 	if (meta->op == VM_SNAPSHOT_SAVE)
784 		countbase = vhpet_counter(vhpet, NULL);
785 	SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
786 	if (meta->op == VM_SNAPSHOT_RESTORE)
787 		vhpet->countbase = countbase;
788 
789 	for (i = 0; i < nitems(vhpet->timer); i++) {
790 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
791 				      meta, ret, done);
792 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
793 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
794 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
795 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
796 				      meta, ret, done);
797 	}
798 
799 done:
800 	return (ret);
801 }
802 
803 int
804 vhpet_restore_time(struct vhpet *vhpet)
805 {
806 	if (vhpet_counter_enabled(vhpet))
807 		vhpet_start_counting(vhpet);
808 
809 	return (0);
810 }
811 #endif
812