xref: /freebsd/sys/amd64/vmm/io/vhpet.c (revision 7899f917b1c0ea178f1d2be0cfb452086d079d23)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include "opt_bhyve_snapshot.h"
32 
33 #include <sys/param.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/systm.h>
39 
40 #include <machine/vmm.h>
41 #include <machine/vmm_dev.h>
42 #include <machine/vmm_snapshot.h>
43 
44 #include <dev/acpica/acpi_hpet.h>
45 #include <dev/vmm/vmm_ktr.h>
46 
47 #include "vmm_lapic.h"
48 #include "vatpic.h"
49 #include "vioapic.h"
50 #include "vhpet.h"
51 
52 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
53 
54 #define	HPET_FREQ	16777216		/* 16.7 (2^24) Mhz */
55 #define	FS_PER_S	1000000000000000ul
56 
57 /* Timer N Configuration and Capabilities Register */
58 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
59 				 HPET_TCAP_FSB_INT_DEL	|		\
60 				 HPET_TCAP_SIZE		|		\
61 				 HPET_TCAP_PER_INT)
62 /*
63  * HPET requires at least 3 timers and up to 32 timers per block.
64  */
65 #define	VHPET_NUM_TIMERS	8
66 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
67 
68 struct vhpet_callout_arg {
69 	struct vhpet *vhpet;
70 	int timer_num;
71 };
72 
73 struct vhpet {
74 	struct vm	*vm;
75 	struct mtx	mtx;
76 	sbintime_t	freq_sbt;
77 
78 	uint64_t	config;		/* Configuration */
79 	uint64_t	isr;		/* Interrupt Status */
80 	uint32_t	countbase;	/* HPET counter base value */
81 	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
82 
83 	struct {
84 		uint64_t	cap_config;	/* Configuration */
85 		uint64_t	msireg;		/* FSB interrupt routing */
86 		uint32_t	compval;	/* Comparator */
87 		uint32_t	comprate;
88 		struct callout	callout;
89 		sbintime_t	callout_sbt;	/* time when counter==compval */
90 		struct vhpet_callout_arg arg;
91 	} timer[VHPET_NUM_TIMERS];
92 };
93 
94 #define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
95 #define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
96 
97 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
98     sbintime_t now);
99 
100 static uint64_t
101 vhpet_capabilities(void)
102 {
103 	uint64_t cap = 0;
104 
105 	cap |= 0x8086 << 16;			/* vendor id */
106 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
107 	cap |= 1;				/* revision */
108 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
109 
110 	cap &= 0xffffffff;
111 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
112 
113 	return (cap);
114 }
115 
116 static __inline bool
117 vhpet_counter_enabled(struct vhpet *vhpet)
118 {
119 
120 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
121 }
122 
123 static __inline bool
124 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
125 {
126 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
127 
128 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
129 		return (true);
130 	else
131 		return (false);
132 }
133 
134 static __inline int
135 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
136 {
137 	/*
138 	 * If the timer is configured to use MSI then treat it as if the
139 	 * timer is not connected to the ioapic.
140 	 */
141 	if (vhpet_timer_msi_enabled(vhpet, n))
142 		return (0);
143 
144 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
145 }
146 
147 static uint32_t
148 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
149 {
150 	uint32_t val;
151 	sbintime_t now, delta;
152 
153 	val = vhpet->countbase;
154 	if (vhpet_counter_enabled(vhpet)) {
155 		now = sbinuptime();
156 		delta = now - vhpet->countbase_sbt;
157 		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
158 		    "%#lx to %#lx", vhpet->countbase_sbt, now));
159 		val += delta / vhpet->freq_sbt;
160 		if (nowptr != NULL)
161 			*nowptr = now;
162 	} else {
163 		/*
164 		 * The sbinuptime corresponding to the 'countbase' is
165 		 * meaningless when the counter is disabled. Make sure
166 		 * that the caller doesn't want to use it.
167 		 */
168 		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
169 	}
170 	return (val);
171 }
172 
173 static void
174 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
175 {
176 	int pin;
177 
178 	if (vhpet->isr & (1 << n)) {
179 		pin = vhpet_timer_ioapic_pin(vhpet, n);
180 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
181 		vioapic_deassert_irq(vhpet->vm, pin);
182 		vhpet->isr &= ~(1 << n);
183 	}
184 }
185 
186 static __inline bool
187 vhpet_periodic_timer(struct vhpet *vhpet, int n)
188 {
189 
190 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
191 }
192 
193 static __inline bool
194 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
195 {
196 
197 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
198 }
199 
200 static __inline bool
201 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
202 {
203 
204 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
205 	    "timer %d is using MSI", n));
206 
207 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
208 		return (true);
209 	else
210 		return (false);
211 }
212 
213 static void
214 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
215 {
216 	int pin;
217 
218 	/* If interrupts are not enabled for this timer then just return. */
219 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
220 		return;
221 
222 	/*
223 	 * If a level triggered interrupt is already asserted then just return.
224 	 */
225 	if ((vhpet->isr & (1 << n)) != 0) {
226 		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
227 		return;
228 	}
229 
230 	if (vhpet_timer_msi_enabled(vhpet, n)) {
231 		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
232 		    vhpet->timer[n].msireg & 0xffffffff);
233 		return;
234 	}
235 
236 	pin = vhpet_timer_ioapic_pin(vhpet, n);
237 	if (pin == 0) {
238 		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
239 		return;
240 	}
241 
242 	if (vhpet_timer_edge_trig(vhpet, n)) {
243 		vioapic_pulse_irq(vhpet->vm, pin);
244 	} else {
245 		vhpet->isr |= 1 << n;
246 		vioapic_assert_irq(vhpet->vm, pin);
247 	}
248 }
249 
250 static void
251 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
252 {
253 	uint32_t compval, comprate, compnext;
254 
255 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
256 
257 	compval = vhpet->timer[n].compval;
258 	comprate = vhpet->timer[n].comprate;
259 
260 	/*
261 	 * Calculate the comparator value to be used for the next periodic
262 	 * interrupt.
263 	 *
264 	 * This function is commonly called from the callout handler.
265 	 * In this scenario the 'counter' is ahead of 'compval'. To find
266 	 * the next value to program into the accumulator we divide the
267 	 * number space between 'compval' and 'counter' into 'comprate'
268 	 * sized units. The 'compval' is rounded up such that is "ahead"
269 	 * of 'counter'.
270 	 */
271 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
272 
273 	vhpet->timer[n].compval = compnext;
274 }
275 
276 static void
277 vhpet_handler(void *a)
278 {
279 	int n;
280 	uint32_t counter;
281 	sbintime_t now;
282 	struct vhpet *vhpet;
283 	struct callout *callout;
284 	struct vhpet_callout_arg *arg;
285 
286 	arg = a;
287 	vhpet = arg->vhpet;
288 	n = arg->timer_num;
289 	callout = &vhpet->timer[n].callout;
290 
291 	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
292 
293 	VHPET_LOCK(vhpet);
294 
295 	if (callout_pending(callout))		/* callout was reset */
296 		goto done;
297 
298 	if (!callout_active(callout))		/* callout was stopped */
299 		goto done;
300 
301 	callout_deactivate(callout);
302 
303 	if (!vhpet_counter_enabled(vhpet))
304 		panic("vhpet(%p) callout with counter disabled", vhpet);
305 
306 	counter = vhpet_counter(vhpet, &now);
307 	vhpet_start_timer(vhpet, n, counter, now);
308 	vhpet_timer_interrupt(vhpet, n);
309 done:
310 	VHPET_UNLOCK(vhpet);
311 	return;
312 }
313 
314 static void
315 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
316 {
317 
318 	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
319 	callout_stop(&vhpet->timer[n].callout);
320 
321 	/*
322 	 * If the callout was scheduled to expire in the past but hasn't
323 	 * had a chance to execute yet then trigger the timer interrupt
324 	 * here. Failing to do so will result in a missed timer interrupt
325 	 * in the guest. This is especially bad in one-shot mode because
326 	 * the next interrupt has to wait for the counter to wrap around.
327 	 */
328 	if (vhpet->timer[n].callout_sbt < now) {
329 		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
330 		    "stopping timer", n);
331 		vhpet_timer_interrupt(vhpet, n);
332 	}
333 }
334 
335 static void
336 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
337 {
338 	sbintime_t delta, precision;
339 
340 	if (vhpet->timer[n].comprate != 0)
341 		vhpet_adjust_compval(vhpet, n, counter);
342 	else {
343 		/*
344 		 * In one-shot mode it is the guest's responsibility to make
345 		 * sure that the comparator value is not in the "past". The
346 		 * hardware doesn't have any belt-and-suspenders to deal with
347 		 * this so we don't either.
348 		 */
349 	}
350 
351 	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
352 	precision = delta >> tc_precexp;
353 	vhpet->timer[n].callout_sbt = now + delta;
354 	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
355 	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
356 }
357 
358 static void
359 vhpet_start_counting(struct vhpet *vhpet)
360 {
361 	int i;
362 
363 	vhpet->countbase_sbt = sbinuptime();
364 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
365 		/*
366 		 * Restart the timers based on the value of the main counter
367 		 * when it stopped counting.
368 		 */
369 		vhpet_start_timer(vhpet, i, vhpet->countbase,
370 		    vhpet->countbase_sbt);
371 	}
372 }
373 
374 static void
375 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
376 {
377 	int i;
378 
379 	vhpet->countbase = counter;
380 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
381 		vhpet_stop_timer(vhpet, i, now);
382 }
383 
384 static __inline void
385 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
386 {
387 
388 	*regptr &= ~mask;
389 	*regptr |= (data & mask);
390 }
391 
392 static void
393 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
394     uint64_t mask)
395 {
396 	bool clear_isr;
397 	int old_pin, new_pin;
398 	uint32_t allowed_irqs;
399 	uint64_t oldval, newval;
400 
401 	if (vhpet_timer_msi_enabled(vhpet, n) ||
402 	    vhpet_timer_edge_trig(vhpet, n)) {
403 		if (vhpet->isr & (1 << n))
404 			panic("vhpet timer %d isr should not be asserted", n);
405 	}
406 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
407 	oldval = vhpet->timer[n].cap_config;
408 
409 	newval = oldval;
410 	update_register(&newval, data, mask);
411 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
412 	newval |= oldval & HPET_TCAP_RO_MASK;
413 
414 	if (newval == oldval)
415 		return;
416 
417 	vhpet->timer[n].cap_config = newval;
418 	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
419 
420 	/*
421 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
422 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
423 	 * it to the default value of 0.
424 	 */
425 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
426 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
427 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
428 		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
429 		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
430 		new_pin = 0;
431 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
432 	}
433 
434 	if (!vhpet_periodic_timer(vhpet, n))
435 		vhpet->timer[n].comprate = 0;
436 
437 	/*
438 	 * If the timer's ISR bit is set then clear it in the following cases:
439 	 * - interrupt is disabled
440 	 * - interrupt type is changed from level to edge or fsb.
441 	 * - interrupt routing is changed
442 	 *
443 	 * This is to ensure that this timer's level triggered interrupt does
444 	 * not remain asserted forever.
445 	 */
446 	if (vhpet->isr & (1 << n)) {
447 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
448 		    n, old_pin));
449 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
450 			clear_isr = true;
451 		else if (vhpet_timer_msi_enabled(vhpet, n))
452 			clear_isr = true;
453 		else if (vhpet_timer_edge_trig(vhpet, n))
454 			clear_isr = true;
455 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
456 			clear_isr = true;
457 		else
458 			clear_isr = false;
459 
460 		if (clear_isr) {
461 			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
462 			    "configuration change", n);
463 			vioapic_deassert_irq(vhpet->vm, old_pin);
464 			vhpet->isr &= ~(1 << n);
465 		}
466 	}
467 }
468 
469 int
470 vhpet_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t val, int size,
471     void *arg)
472 {
473 	struct vhpet *vhpet;
474 	uint64_t data, mask, oldval, val64;
475 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
476 	sbintime_t now, *nowptr;
477 	int i, offset;
478 
479 	vhpet = vm_hpet(vcpu_vm(vcpu));
480 	offset = gpa - VHPET_BASE;
481 
482 	VHPET_LOCK(vhpet);
483 
484 	/* Accesses to the HPET should be 4 or 8 bytes wide */
485 	switch (size) {
486 	case 8:
487 		mask = 0xffffffffffffffff;
488 		data = val;
489 		break;
490 	case 4:
491 		mask = 0xffffffff;
492 		data = val;
493 		if ((offset & 0x4) != 0) {
494 			mask <<= 32;
495 			data <<= 32;
496 		}
497 		break;
498 	default:
499 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
500 		    "offset 0x%08x, size %d", offset, size);
501 		goto done;
502 	}
503 
504 	/* Access to the HPET should be naturally aligned to its width */
505 	if (offset & (size - 1)) {
506 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
507 		    "offset 0x%08x, size %d", offset, size);
508 		goto done;
509 	}
510 
511 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
512 		/*
513 		 * Get the most recent value of the counter before updating
514 		 * the 'config' register. If the HPET is going to be disabled
515 		 * then we need to update 'countbase' with the value right
516 		 * before it is disabled.
517 		 */
518 		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
519 		counter = vhpet_counter(vhpet, nowptr);
520 		oldval = vhpet->config;
521 		update_register(&vhpet->config, data, mask);
522 
523 		/*
524 		 * LegacyReplacement Routing is not supported so clear the
525 		 * bit explicitly.
526 		 */
527 		vhpet->config &= ~HPET_CNF_LEG_RT;
528 
529 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
530 			if (vhpet_counter_enabled(vhpet)) {
531 				vhpet_start_counting(vhpet);
532 				VM_CTR0(vhpet->vm, "hpet enabled");
533 			} else {
534 				vhpet_stop_counting(vhpet, counter, now);
535 				VM_CTR0(vhpet->vm, "hpet disabled");
536 			}
537 		}
538 		goto done;
539 	}
540 
541 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
542 		isr_clear_mask = vhpet->isr & data;
543 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
544 			if ((isr_clear_mask & (1 << i)) != 0) {
545 				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
546 				vhpet_timer_clear_isr(vhpet, i);
547 			}
548 		}
549 		goto done;
550 	}
551 
552 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
553 		/* Zero-extend the counter to 64-bits before updating it */
554 		val64 = vhpet_counter(vhpet, NULL);
555 		update_register(&val64, data, mask);
556 		vhpet->countbase = val64;
557 		if (vhpet_counter_enabled(vhpet))
558 			vhpet_start_counting(vhpet);
559 		goto done;
560 	}
561 
562 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
563 		if (offset == HPET_TIMER_CAP_CNF(i) ||
564 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
565 			vhpet_timer_update_config(vhpet, i, data, mask);
566 			break;
567 		}
568 
569 		if (offset == HPET_TIMER_COMPARATOR(i) ||
570 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
571 			old_compval = vhpet->timer[i].compval;
572 			old_comprate = vhpet->timer[i].comprate;
573 			if (vhpet_periodic_timer(vhpet, i)) {
574 				/*
575 				 * In periodic mode writes to the comparator
576 				 * change the 'compval' register only if the
577 				 * HPET_TCNF_VAL_SET bit is set in the config
578 				 * register.
579 				 */
580 				val64 = vhpet->timer[i].comprate;
581 				update_register(&val64, data, mask);
582 				vhpet->timer[i].comprate = val64;
583 				if ((vhpet->timer[i].cap_config &
584 				    HPET_TCNF_VAL_SET) != 0) {
585 					vhpet->timer[i].compval = val64;
586 				}
587 			} else {
588 				KASSERT(vhpet->timer[i].comprate == 0,
589 				    ("vhpet one-shot timer %d has invalid "
590 				    "rate %u", i, vhpet->timer[i].comprate));
591 				val64 = vhpet->timer[i].compval;
592 				update_register(&val64, data, mask);
593 				vhpet->timer[i].compval = val64;
594 			}
595 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
596 
597 			if (vhpet->timer[i].compval != old_compval ||
598 			    vhpet->timer[i].comprate != old_comprate) {
599 				if (vhpet_counter_enabled(vhpet)) {
600 					counter = vhpet_counter(vhpet, &now);
601 					vhpet_start_timer(vhpet, i, counter,
602 					    now);
603 				}
604 			}
605 			break;
606 		}
607 
608 		if (offset == HPET_TIMER_FSB_VAL(i) ||
609 		    offset == HPET_TIMER_FSB_ADDR(i)) {
610 			update_register(&vhpet->timer[i].msireg, data, mask);
611 			break;
612 		}
613 	}
614 done:
615 	VHPET_UNLOCK(vhpet);
616 	return (0);
617 }
618 
619 int
620 vhpet_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size,
621     void *arg)
622 {
623 	int i, offset;
624 	struct vhpet *vhpet;
625 	uint64_t data;
626 
627 	vhpet = vm_hpet(vcpu_vm(vcpu));
628 	offset = gpa - VHPET_BASE;
629 
630 	VHPET_LOCK(vhpet);
631 
632 	/* Accesses to the HPET should be 4 or 8 bytes wide */
633 	if (size != 4 && size != 8) {
634 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
635 		    "offset 0x%08x, size %d", offset, size);
636 		data = 0;
637 		goto done;
638 	}
639 
640 	/* Access to the HPET should be naturally aligned to its width */
641 	if (offset & (size - 1)) {
642 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
643 		    "offset 0x%08x, size %d", offset, size);
644 		data = 0;
645 		goto done;
646 	}
647 
648 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
649 		data = vhpet_capabilities();
650 		goto done;
651 	}
652 
653 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
654 		data = vhpet->config;
655 		goto done;
656 	}
657 
658 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
659 		data = vhpet->isr;
660 		goto done;
661 	}
662 
663 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
664 		data = vhpet_counter(vhpet, NULL);
665 		goto done;
666 	}
667 
668 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
669 		if (offset == HPET_TIMER_CAP_CNF(i) ||
670 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
671 			data = vhpet->timer[i].cap_config;
672 			break;
673 		}
674 
675 		if (offset == HPET_TIMER_COMPARATOR(i) ||
676 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
677 			data = vhpet->timer[i].compval;
678 			break;
679 		}
680 
681 		if (offset == HPET_TIMER_FSB_VAL(i) ||
682 		    offset == HPET_TIMER_FSB_ADDR(i)) {
683 			data = vhpet->timer[i].msireg;
684 			break;
685 		}
686 	}
687 
688 	if (i >= VHPET_NUM_TIMERS)
689 		data = 0;
690 done:
691 	VHPET_UNLOCK(vhpet);
692 
693 	if (size == 4) {
694 		if (offset & 0x4)
695 			data >>= 32;
696 	}
697 	*rval = data;
698 	return (0);
699 }
700 
701 struct vhpet *
702 vhpet_init(struct vm *vm)
703 {
704 	int i, pincount;
705 	struct vhpet *vhpet;
706 	uint64_t allowed_irqs;
707 	struct vhpet_callout_arg *arg;
708 	struct bintime bt;
709 
710 	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
711         vhpet->vm = vm;
712 	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
713 
714 	FREQ2BT(HPET_FREQ, &bt);
715 	vhpet->freq_sbt = bttosbt(bt);
716 
717 	pincount = vioapic_pincount(vm);
718 	if (pincount >= 32)
719 		allowed_irqs = 0xff000000;	/* irqs 24-31 */
720 	else if (pincount >= 20)
721 		allowed_irqs = 0xf << (pincount - 4);	/* 4 upper irqs */
722 	else
723 		allowed_irqs = 0;
724 
725 	/*
726 	 * Initialize HPET timer hardware state.
727 	 */
728 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
729 		vhpet->timer[i].cap_config = allowed_irqs << 32;
730 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
731 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
732 
733 		vhpet->timer[i].compval = 0xffffffff;
734 		callout_init(&vhpet->timer[i].callout, 1);
735 
736 		arg = &vhpet->timer[i].arg;
737 		arg->vhpet = vhpet;
738 		arg->timer_num = i;
739 	}
740 
741 	return (vhpet);
742 }
743 
744 void
745 vhpet_cleanup(struct vhpet *vhpet)
746 {
747 	int i;
748 
749 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
750 		callout_drain(&vhpet->timer[i].callout);
751 
752 	mtx_destroy(&vhpet->mtx);
753 	free(vhpet, M_VHPET);
754 }
755 
756 int
757 vhpet_getcap(struct vm_hpet_cap *cap)
758 {
759 
760 	cap->capabilities = vhpet_capabilities();
761 	return (0);
762 }
763 
764 #ifdef BHYVE_SNAPSHOT
765 int
766 vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
767 {
768 	int i, ret;
769 	uint32_t countbase;
770 
771 	SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
772 	SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
773 	SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
774 
775 	/* at restore time the countbase should have the value it had when the
776 	 * snapshot was created; since the value is not directly kept in
777 	 * vhpet->countbase, but rather computed relative to the current system
778 	 * uptime using countbase_sbt, save the value returned by vhpet_counter
779 	 */
780 	if (meta->op == VM_SNAPSHOT_SAVE)
781 		countbase = vhpet_counter(vhpet, NULL);
782 	SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
783 	if (meta->op == VM_SNAPSHOT_RESTORE)
784 		vhpet->countbase = countbase;
785 
786 	for (i = 0; i < nitems(vhpet->timer); i++) {
787 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
788 				      meta, ret, done);
789 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
790 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
791 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
792 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
793 				      meta, ret, done);
794 	}
795 
796 done:
797 	return (ret);
798 }
799 
800 int
801 vhpet_restore_time(struct vhpet *vhpet)
802 {
803 	if (vhpet_counter_enabled(vhpet))
804 		vhpet_start_counting(vhpet);
805 
806 	return (0);
807 }
808 #endif
809