xref: /freebsd/sys/amd64/vmm/io/vhpet.c (revision 52c81be11a107cdedb865a274b5567b0c95c0308)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_bhyve_snapshot.h"
36 
37 #include <sys/param.h>
38 #include <sys/lock.h>
39 #include <sys/mutex.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/systm.h>
43 
44 #include <dev/acpica/acpi_hpet.h>
45 
46 #include <machine/vmm.h>
47 #include <machine/vmm_dev.h>
48 #include <machine/vmm_snapshot.h>
49 
50 #include "vmm_lapic.h"
51 #include "vatpic.h"
52 #include "vioapic.h"
53 #include "vhpet.h"
54 
55 #include "vmm_ktr.h"
56 
57 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
58 
59 #define	HPET_FREQ	16777216		/* 16.7 (2^24) Mhz */
60 #define	FS_PER_S	1000000000000000ul
61 
62 /* Timer N Configuration and Capabilities Register */
63 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
64 				 HPET_TCAP_FSB_INT_DEL	|		\
65 				 HPET_TCAP_SIZE		|		\
66 				 HPET_TCAP_PER_INT)
67 /*
68  * HPET requires at least 3 timers and up to 32 timers per block.
69  */
70 #define	VHPET_NUM_TIMERS	8
71 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
72 
73 struct vhpet_callout_arg {
74 	struct vhpet *vhpet;
75 	int timer_num;
76 };
77 
78 struct vhpet {
79 	struct vm	*vm;
80 	struct mtx	mtx;
81 	sbintime_t	freq_sbt;
82 
83 	uint64_t	config;		/* Configuration */
84 	uint64_t	isr;		/* Interrupt Status */
85 	uint32_t	countbase;	/* HPET counter base value */
86 	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
87 
88 	struct {
89 		uint64_t	cap_config;	/* Configuration */
90 		uint64_t	msireg;		/* FSB interrupt routing */
91 		uint32_t	compval;	/* Comparator */
92 		uint32_t	comprate;
93 		struct callout	callout;
94 		sbintime_t	callout_sbt;	/* time when counter==compval */
95 		struct vhpet_callout_arg arg;
96 	} timer[VHPET_NUM_TIMERS];
97 };
98 
99 #define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
100 #define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
101 
102 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
103     sbintime_t now);
104 
105 static uint64_t
106 vhpet_capabilities(void)
107 {
108 	uint64_t cap = 0;
109 
110 	cap |= 0x8086 << 16;			/* vendor id */
111 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
112 	cap |= 1;				/* revision */
113 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
114 
115 	cap &= 0xffffffff;
116 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
117 
118 	return (cap);
119 }
120 
121 static __inline bool
122 vhpet_counter_enabled(struct vhpet *vhpet)
123 {
124 
125 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
126 }
127 
128 static __inline bool
129 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
130 {
131 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
132 
133 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
134 		return (true);
135 	else
136 		return (false);
137 }
138 
139 static __inline int
140 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
141 {
142 	/*
143 	 * If the timer is configured to use MSI then treat it as if the
144 	 * timer is not connected to the ioapic.
145 	 */
146 	if (vhpet_timer_msi_enabled(vhpet, n))
147 		return (0);
148 
149 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
150 }
151 
152 static uint32_t
153 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
154 {
155 	uint32_t val;
156 	sbintime_t now, delta;
157 
158 	val = vhpet->countbase;
159 	if (vhpet_counter_enabled(vhpet)) {
160 		now = sbinuptime();
161 		delta = now - vhpet->countbase_sbt;
162 		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
163 		    "%#lx to %#lx", vhpet->countbase_sbt, now));
164 		val += delta / vhpet->freq_sbt;
165 		if (nowptr != NULL)
166 			*nowptr = now;
167 	} else {
168 		/*
169 		 * The sbinuptime corresponding to the 'countbase' is
170 		 * meaningless when the counter is disabled. Make sure
171 		 * that the caller doesn't want to use it.
172 		 */
173 		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
174 	}
175 	return (val);
176 }
177 
178 static void
179 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
180 {
181 	int pin;
182 
183 	if (vhpet->isr & (1 << n)) {
184 		pin = vhpet_timer_ioapic_pin(vhpet, n);
185 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
186 		vioapic_deassert_irq(vhpet->vm, pin);
187 		vhpet->isr &= ~(1 << n);
188 	}
189 }
190 
191 static __inline bool
192 vhpet_periodic_timer(struct vhpet *vhpet, int n)
193 {
194 
195 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
196 }
197 
198 static __inline bool
199 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
200 {
201 
202 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
203 }
204 
205 static __inline bool
206 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
207 {
208 
209 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
210 	    "timer %d is using MSI", n));
211 
212 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
213 		return (true);
214 	else
215 		return (false);
216 }
217 
218 static void
219 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
220 {
221 	int pin;
222 
223 	/* If interrupts are not enabled for this timer then just return. */
224 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
225 		return;
226 
227 	/*
228 	 * If a level triggered interrupt is already asserted then just return.
229 	 */
230 	if ((vhpet->isr & (1 << n)) != 0) {
231 		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
232 		return;
233 	}
234 
235 	if (vhpet_timer_msi_enabled(vhpet, n)) {
236 		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
237 		    vhpet->timer[n].msireg & 0xffffffff);
238 		return;
239 	}
240 
241 	pin = vhpet_timer_ioapic_pin(vhpet, n);
242 	if (pin == 0) {
243 		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
244 		return;
245 	}
246 
247 	if (vhpet_timer_edge_trig(vhpet, n)) {
248 		vioapic_pulse_irq(vhpet->vm, pin);
249 	} else {
250 		vhpet->isr |= 1 << n;
251 		vioapic_assert_irq(vhpet->vm, pin);
252 	}
253 }
254 
255 static void
256 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
257 {
258 	uint32_t compval, comprate, compnext;
259 
260 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
261 
262 	compval = vhpet->timer[n].compval;
263 	comprate = vhpet->timer[n].comprate;
264 
265 	/*
266 	 * Calculate the comparator value to be used for the next periodic
267 	 * interrupt.
268 	 *
269 	 * This function is commonly called from the callout handler.
270 	 * In this scenario the 'counter' is ahead of 'compval'. To find
271 	 * the next value to program into the accumulator we divide the
272 	 * number space between 'compval' and 'counter' into 'comprate'
273 	 * sized units. The 'compval' is rounded up such that is "ahead"
274 	 * of 'counter'.
275 	 */
276 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
277 
278 	vhpet->timer[n].compval = compnext;
279 }
280 
281 static void
282 vhpet_handler(void *a)
283 {
284 	int n;
285 	uint32_t counter;
286 	sbintime_t now;
287 	struct vhpet *vhpet;
288 	struct callout *callout;
289 	struct vhpet_callout_arg *arg;
290 
291 	arg = a;
292 	vhpet = arg->vhpet;
293 	n = arg->timer_num;
294 	callout = &vhpet->timer[n].callout;
295 
296 	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
297 
298 	VHPET_LOCK(vhpet);
299 
300 	if (callout_pending(callout))		/* callout was reset */
301 		goto done;
302 
303 	if (!callout_active(callout))		/* callout was stopped */
304 		goto done;
305 
306 	callout_deactivate(callout);
307 
308 	if (!vhpet_counter_enabled(vhpet))
309 		panic("vhpet(%p) callout with counter disabled", vhpet);
310 
311 	counter = vhpet_counter(vhpet, &now);
312 	vhpet_start_timer(vhpet, n, counter, now);
313 	vhpet_timer_interrupt(vhpet, n);
314 done:
315 	VHPET_UNLOCK(vhpet);
316 	return;
317 }
318 
319 static void
320 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
321 {
322 
323 	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
324 	callout_stop(&vhpet->timer[n].callout);
325 
326 	/*
327 	 * If the callout was scheduled to expire in the past but hasn't
328 	 * had a chance to execute yet then trigger the timer interrupt
329 	 * here. Failing to do so will result in a missed timer interrupt
330 	 * in the guest. This is especially bad in one-shot mode because
331 	 * the next interrupt has to wait for the counter to wrap around.
332 	 */
333 	if (vhpet->timer[n].callout_sbt < now) {
334 		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
335 		    "stopping timer", n);
336 		vhpet_timer_interrupt(vhpet, n);
337 	}
338 }
339 
340 static void
341 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
342 {
343 	sbintime_t delta, precision;
344 
345 	if (vhpet->timer[n].comprate != 0)
346 		vhpet_adjust_compval(vhpet, n, counter);
347 	else {
348 		/*
349 		 * In one-shot mode it is the guest's responsibility to make
350 		 * sure that the comparator value is not in the "past". The
351 		 * hardware doesn't have any belt-and-suspenders to deal with
352 		 * this so we don't either.
353 		 */
354 	}
355 
356 	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
357 	precision = delta >> tc_precexp;
358 	vhpet->timer[n].callout_sbt = now + delta;
359 	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
360 	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
361 }
362 
363 static void
364 vhpet_start_counting(struct vhpet *vhpet)
365 {
366 	int i;
367 
368 	vhpet->countbase_sbt = sbinuptime();
369 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
370 		/*
371 		 * Restart the timers based on the value of the main counter
372 		 * when it stopped counting.
373 		 */
374 		vhpet_start_timer(vhpet, i, vhpet->countbase,
375 		    vhpet->countbase_sbt);
376 	}
377 }
378 
379 static void
380 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
381 {
382 	int i;
383 
384 	vhpet->countbase = counter;
385 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
386 		vhpet_stop_timer(vhpet, i, now);
387 }
388 
389 static __inline void
390 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
391 {
392 
393 	*regptr &= ~mask;
394 	*regptr |= (data & mask);
395 }
396 
397 static void
398 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
399     uint64_t mask)
400 {
401 	bool clear_isr;
402 	int old_pin, new_pin;
403 	uint32_t allowed_irqs;
404 	uint64_t oldval, newval;
405 
406 	if (vhpet_timer_msi_enabled(vhpet, n) ||
407 	    vhpet_timer_edge_trig(vhpet, n)) {
408 		if (vhpet->isr & (1 << n))
409 			panic("vhpet timer %d isr should not be asserted", n);
410 	}
411 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
412 	oldval = vhpet->timer[n].cap_config;
413 
414 	newval = oldval;
415 	update_register(&newval, data, mask);
416 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
417 	newval |= oldval & HPET_TCAP_RO_MASK;
418 
419 	if (newval == oldval)
420 		return;
421 
422 	vhpet->timer[n].cap_config = newval;
423 	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
424 
425 	/*
426 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
427 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
428 	 * it to the default value of 0.
429 	 */
430 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
431 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
432 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
433 		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
434 		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
435 		new_pin = 0;
436 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
437 	}
438 
439 	if (!vhpet_periodic_timer(vhpet, n))
440 		vhpet->timer[n].comprate = 0;
441 
442 	/*
443 	 * If the timer's ISR bit is set then clear it in the following cases:
444 	 * - interrupt is disabled
445 	 * - interrupt type is changed from level to edge or fsb.
446 	 * - interrupt routing is changed
447 	 *
448 	 * This is to ensure that this timer's level triggered interrupt does
449 	 * not remain asserted forever.
450 	 */
451 	if (vhpet->isr & (1 << n)) {
452 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
453 		    n, old_pin));
454 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
455 			clear_isr = true;
456 		else if (vhpet_timer_msi_enabled(vhpet, n))
457 			clear_isr = true;
458 		else if (vhpet_timer_edge_trig(vhpet, n))
459 			clear_isr = true;
460 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
461 			clear_isr = true;
462 		else
463 			clear_isr = false;
464 
465 		if (clear_isr) {
466 			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
467 			    "configuration change", n);
468 			vioapic_deassert_irq(vhpet->vm, old_pin);
469 			vhpet->isr &= ~(1 << n);
470 		}
471 	}
472 }
473 
474 int
475 vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
476     void *arg)
477 {
478 	struct vhpet *vhpet;
479 	uint64_t data, mask, oldval, val64;
480 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
481 	sbintime_t now, *nowptr;
482 	int i, offset;
483 
484 	vhpet = vm_hpet(vm);
485 	offset = gpa - VHPET_BASE;
486 
487 	VHPET_LOCK(vhpet);
488 
489 	/* Accesses to the HPET should be 4 or 8 bytes wide */
490 	switch (size) {
491 	case 8:
492 		mask = 0xffffffffffffffff;
493 		data = val;
494 		break;
495 	case 4:
496 		mask = 0xffffffff;
497 		data = val;
498 		if ((offset & 0x4) != 0) {
499 			mask <<= 32;
500 			data <<= 32;
501 		}
502 		break;
503 	default:
504 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
505 		    "offset 0x%08x, size %d", offset, size);
506 		goto done;
507 	}
508 
509 	/* Access to the HPET should be naturally aligned to its width */
510 	if (offset & (size - 1)) {
511 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
512 		    "offset 0x%08x, size %d", offset, size);
513 		goto done;
514 	}
515 
516 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
517 		/*
518 		 * Get the most recent value of the counter before updating
519 		 * the 'config' register. If the HPET is going to be disabled
520 		 * then we need to update 'countbase' with the value right
521 		 * before it is disabled.
522 		 */
523 		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
524 		counter = vhpet_counter(vhpet, nowptr);
525 		oldval = vhpet->config;
526 		update_register(&vhpet->config, data, mask);
527 
528 		/*
529 		 * LegacyReplacement Routing is not supported so clear the
530 		 * bit explicitly.
531 		 */
532 		vhpet->config &= ~HPET_CNF_LEG_RT;
533 
534 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
535 			if (vhpet_counter_enabled(vhpet)) {
536 				vhpet_start_counting(vhpet);
537 				VM_CTR0(vhpet->vm, "hpet enabled");
538 			} else {
539 				vhpet_stop_counting(vhpet, counter, now);
540 				VM_CTR0(vhpet->vm, "hpet disabled");
541 			}
542 		}
543 		goto done;
544 	}
545 
546 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
547 		isr_clear_mask = vhpet->isr & data;
548 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
549 			if ((isr_clear_mask & (1 << i)) != 0) {
550 				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
551 				vhpet_timer_clear_isr(vhpet, i);
552 			}
553 		}
554 		goto done;
555 	}
556 
557 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
558 		/* Zero-extend the counter to 64-bits before updating it */
559 		val64 = vhpet_counter(vhpet, NULL);
560 		update_register(&val64, data, mask);
561 		vhpet->countbase = val64;
562 		if (vhpet_counter_enabled(vhpet))
563 			vhpet_start_counting(vhpet);
564 		goto done;
565 	}
566 
567 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
568 		if (offset == HPET_TIMER_CAP_CNF(i) ||
569 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
570 			vhpet_timer_update_config(vhpet, i, data, mask);
571 			break;
572 		}
573 
574 		if (offset == HPET_TIMER_COMPARATOR(i) ||
575 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
576 			old_compval = vhpet->timer[i].compval;
577 			old_comprate = vhpet->timer[i].comprate;
578 			if (vhpet_periodic_timer(vhpet, i)) {
579 				/*
580 				 * In periodic mode writes to the comparator
581 				 * change the 'compval' register only if the
582 				 * HPET_TCNF_VAL_SET bit is set in the config
583 				 * register.
584 				 */
585 				val64 = vhpet->timer[i].comprate;
586 				update_register(&val64, data, mask);
587 				vhpet->timer[i].comprate = val64;
588 				if ((vhpet->timer[i].cap_config &
589 				    HPET_TCNF_VAL_SET) != 0) {
590 					vhpet->timer[i].compval = val64;
591 				}
592 			} else {
593 				KASSERT(vhpet->timer[i].comprate == 0,
594 				    ("vhpet one-shot timer %d has invalid "
595 				    "rate %u", i, vhpet->timer[i].comprate));
596 				val64 = vhpet->timer[i].compval;
597 				update_register(&val64, data, mask);
598 				vhpet->timer[i].compval = val64;
599 			}
600 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
601 
602 			if (vhpet->timer[i].compval != old_compval ||
603 			    vhpet->timer[i].comprate != old_comprate) {
604 				if (vhpet_counter_enabled(vhpet)) {
605 					counter = vhpet_counter(vhpet, &now);
606 					vhpet_start_timer(vhpet, i, counter,
607 					    now);
608 				}
609 			}
610 			break;
611 		}
612 
613 		if (offset == HPET_TIMER_FSB_VAL(i) ||
614 		    offset == HPET_TIMER_FSB_ADDR(i)) {
615 			update_register(&vhpet->timer[i].msireg, data, mask);
616 			break;
617 		}
618 	}
619 done:
620 	VHPET_UNLOCK(vhpet);
621 	return (0);
622 }
623 
624 int
625 vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size,
626     void *arg)
627 {
628 	int i, offset;
629 	struct vhpet *vhpet;
630 	uint64_t data;
631 
632 	vhpet = vm_hpet(vm);
633 	offset = gpa - VHPET_BASE;
634 
635 	VHPET_LOCK(vhpet);
636 
637 	/* Accesses to the HPET should be 4 or 8 bytes wide */
638 	if (size != 4 && size != 8) {
639 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
640 		    "offset 0x%08x, size %d", offset, size);
641 		data = 0;
642 		goto done;
643 	}
644 
645 	/* Access to the HPET should be naturally aligned to its width */
646 	if (offset & (size - 1)) {
647 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
648 		    "offset 0x%08x, size %d", offset, size);
649 		data = 0;
650 		goto done;
651 	}
652 
653 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
654 		data = vhpet_capabilities();
655 		goto done;
656 	}
657 
658 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
659 		data = vhpet->config;
660 		goto done;
661 	}
662 
663 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
664 		data = vhpet->isr;
665 		goto done;
666 	}
667 
668 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
669 		data = vhpet_counter(vhpet, NULL);
670 		goto done;
671 	}
672 
673 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
674 		if (offset == HPET_TIMER_CAP_CNF(i) ||
675 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
676 			data = vhpet->timer[i].cap_config;
677 			break;
678 		}
679 
680 		if (offset == HPET_TIMER_COMPARATOR(i) ||
681 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
682 			data = vhpet->timer[i].compval;
683 			break;
684 		}
685 
686 		if (offset == HPET_TIMER_FSB_VAL(i) ||
687 		    offset == HPET_TIMER_FSB_ADDR(i)) {
688 			data = vhpet->timer[i].msireg;
689 			break;
690 		}
691 	}
692 
693 	if (i >= VHPET_NUM_TIMERS)
694 		data = 0;
695 done:
696 	VHPET_UNLOCK(vhpet);
697 
698 	if (size == 4) {
699 		if (offset & 0x4)
700 			data >>= 32;
701 	}
702 	*rval = data;
703 	return (0);
704 }
705 
706 struct vhpet *
707 vhpet_init(struct vm *vm)
708 {
709 	int i, pincount;
710 	struct vhpet *vhpet;
711 	uint64_t allowed_irqs;
712 	struct vhpet_callout_arg *arg;
713 	struct bintime bt;
714 
715 	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
716         vhpet->vm = vm;
717 	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
718 
719 	FREQ2BT(HPET_FREQ, &bt);
720 	vhpet->freq_sbt = bttosbt(bt);
721 
722 	pincount = vioapic_pincount(vm);
723 	if (pincount >= 32)
724 		allowed_irqs = 0xff000000;	/* irqs 24-31 */
725 	else if (pincount >= 20)
726 		allowed_irqs = 0xf << (pincount - 4);	/* 4 upper irqs */
727 	else
728 		allowed_irqs = 0;
729 
730 	/*
731 	 * Initialize HPET timer hardware state.
732 	 */
733 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
734 		vhpet->timer[i].cap_config = allowed_irqs << 32;
735 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
736 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
737 
738 		vhpet->timer[i].compval = 0xffffffff;
739 		callout_init(&vhpet->timer[i].callout, 1);
740 
741 		arg = &vhpet->timer[i].arg;
742 		arg->vhpet = vhpet;
743 		arg->timer_num = i;
744 	}
745 
746 	return (vhpet);
747 }
748 
749 void
750 vhpet_cleanup(struct vhpet *vhpet)
751 {
752 	int i;
753 
754 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
755 		callout_drain(&vhpet->timer[i].callout);
756 
757 	free(vhpet, M_VHPET);
758 }
759 
760 int
761 vhpet_getcap(struct vm_hpet_cap *cap)
762 {
763 
764 	cap->capabilities = vhpet_capabilities();
765 	return (0);
766 }
767 
768 #ifdef BHYVE_SNAPSHOT
769 int
770 vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
771 {
772 	int i, ret;
773 	uint32_t countbase;
774 
775 	SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
776 	SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
777 	SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
778 
779 	/* at restore time the countbase should have the value it had when the
780 	 * snapshot was created; since the value is not directly kept in
781 	 * vhpet->countbase, but rather computed relative to the current system
782 	 * uptime using countbase_sbt, save the value retured by vhpet_counter
783 	 */
784 	if (meta->op == VM_SNAPSHOT_SAVE)
785 		countbase = vhpet_counter(vhpet, NULL);
786 	SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
787 	if (meta->op == VM_SNAPSHOT_RESTORE)
788 		vhpet->countbase = countbase;
789 
790 	for (i = 0; i < nitems(vhpet->timer); i++) {
791 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
792 				      meta, ret, done);
793 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
794 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
795 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
796 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
797 				      meta, ret, done);
798 	}
799 
800 done:
801 	return (ret);
802 }
803 
804 int
805 vhpet_restore_time(struct vhpet *vhpet)
806 {
807 	if (vhpet_counter_enabled(vhpet))
808 		vhpet_start_counting(vhpet);
809 
810 	return (0);
811 }
812 #endif
813