xref: /freebsd/sys/amd64/vmm/io/vhpet.c (revision 9bc300465e48e19d794d88d0c158a2adb92c7197)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include "opt_bhyve_snapshot.h"
32 
33 #include <sys/param.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/systm.h>
39 
40 #include <dev/acpica/acpi_hpet.h>
41 
42 #include <machine/vmm.h>
43 #include <machine/vmm_dev.h>
44 #include <machine/vmm_snapshot.h>
45 
46 #include "vmm_lapic.h"
47 #include "vatpic.h"
48 #include "vioapic.h"
49 #include "vhpet.h"
50 
51 #include "vmm_ktr.h"
52 
53 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
54 
55 #define	HPET_FREQ	16777216		/* 16.7 (2^24) Mhz */
56 #define	FS_PER_S	1000000000000000ul
57 
58 /* Timer N Configuration and Capabilities Register */
59 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
60 				 HPET_TCAP_FSB_INT_DEL	|		\
61 				 HPET_TCAP_SIZE		|		\
62 				 HPET_TCAP_PER_INT)
63 /*
64  * HPET requires at least 3 timers and up to 32 timers per block.
65  */
66 #define	VHPET_NUM_TIMERS	8
67 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
68 
69 struct vhpet_callout_arg {
70 	struct vhpet *vhpet;
71 	int timer_num;
72 };
73 
74 struct vhpet {
75 	struct vm	*vm;
76 	struct mtx	mtx;
77 	sbintime_t	freq_sbt;
78 
79 	uint64_t	config;		/* Configuration */
80 	uint64_t	isr;		/* Interrupt Status */
81 	uint32_t	countbase;	/* HPET counter base value */
82 	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
83 
84 	struct {
85 		uint64_t	cap_config;	/* Configuration */
86 		uint64_t	msireg;		/* FSB interrupt routing */
87 		uint32_t	compval;	/* Comparator */
88 		uint32_t	comprate;
89 		struct callout	callout;
90 		sbintime_t	callout_sbt;	/* time when counter==compval */
91 		struct vhpet_callout_arg arg;
92 	} timer[VHPET_NUM_TIMERS];
93 };
94 
95 #define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
96 #define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
97 
98 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
99     sbintime_t now);
100 
101 static uint64_t
102 vhpet_capabilities(void)
103 {
104 	uint64_t cap = 0;
105 
106 	cap |= 0x8086 << 16;			/* vendor id */
107 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
108 	cap |= 1;				/* revision */
109 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
110 
111 	cap &= 0xffffffff;
112 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
113 
114 	return (cap);
115 }
116 
117 static __inline bool
118 vhpet_counter_enabled(struct vhpet *vhpet)
119 {
120 
121 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
122 }
123 
124 static __inline bool
125 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
126 {
127 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
128 
129 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
130 		return (true);
131 	else
132 		return (false);
133 }
134 
135 static __inline int
136 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
137 {
138 	/*
139 	 * If the timer is configured to use MSI then treat it as if the
140 	 * timer is not connected to the ioapic.
141 	 */
142 	if (vhpet_timer_msi_enabled(vhpet, n))
143 		return (0);
144 
145 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
146 }
147 
148 static uint32_t
149 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
150 {
151 	uint32_t val;
152 	sbintime_t now, delta;
153 
154 	val = vhpet->countbase;
155 	if (vhpet_counter_enabled(vhpet)) {
156 		now = sbinuptime();
157 		delta = now - vhpet->countbase_sbt;
158 		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
159 		    "%#lx to %#lx", vhpet->countbase_sbt, now));
160 		val += delta / vhpet->freq_sbt;
161 		if (nowptr != NULL)
162 			*nowptr = now;
163 	} else {
164 		/*
165 		 * The sbinuptime corresponding to the 'countbase' is
166 		 * meaningless when the counter is disabled. Make sure
167 		 * that the caller doesn't want to use it.
168 		 */
169 		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
170 	}
171 	return (val);
172 }
173 
174 static void
175 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
176 {
177 	int pin;
178 
179 	if (vhpet->isr & (1 << n)) {
180 		pin = vhpet_timer_ioapic_pin(vhpet, n);
181 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
182 		vioapic_deassert_irq(vhpet->vm, pin);
183 		vhpet->isr &= ~(1 << n);
184 	}
185 }
186 
187 static __inline bool
188 vhpet_periodic_timer(struct vhpet *vhpet, int n)
189 {
190 
191 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
192 }
193 
194 static __inline bool
195 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
196 {
197 
198 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
199 }
200 
201 static __inline bool
202 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
203 {
204 
205 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
206 	    "timer %d is using MSI", n));
207 
208 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
209 		return (true);
210 	else
211 		return (false);
212 }
213 
214 static void
215 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
216 {
217 	int pin;
218 
219 	/* If interrupts are not enabled for this timer then just return. */
220 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
221 		return;
222 
223 	/*
224 	 * If a level triggered interrupt is already asserted then just return.
225 	 */
226 	if ((vhpet->isr & (1 << n)) != 0) {
227 		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
228 		return;
229 	}
230 
231 	if (vhpet_timer_msi_enabled(vhpet, n)) {
232 		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
233 		    vhpet->timer[n].msireg & 0xffffffff);
234 		return;
235 	}
236 
237 	pin = vhpet_timer_ioapic_pin(vhpet, n);
238 	if (pin == 0) {
239 		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
240 		return;
241 	}
242 
243 	if (vhpet_timer_edge_trig(vhpet, n)) {
244 		vioapic_pulse_irq(vhpet->vm, pin);
245 	} else {
246 		vhpet->isr |= 1 << n;
247 		vioapic_assert_irq(vhpet->vm, pin);
248 	}
249 }
250 
251 static void
252 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
253 {
254 	uint32_t compval, comprate, compnext;
255 
256 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
257 
258 	compval = vhpet->timer[n].compval;
259 	comprate = vhpet->timer[n].comprate;
260 
261 	/*
262 	 * Calculate the comparator value to be used for the next periodic
263 	 * interrupt.
264 	 *
265 	 * This function is commonly called from the callout handler.
266 	 * In this scenario the 'counter' is ahead of 'compval'. To find
267 	 * the next value to program into the accumulator we divide the
268 	 * number space between 'compval' and 'counter' into 'comprate'
269 	 * sized units. The 'compval' is rounded up such that is "ahead"
270 	 * of 'counter'.
271 	 */
272 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
273 
274 	vhpet->timer[n].compval = compnext;
275 }
276 
277 static void
278 vhpet_handler(void *a)
279 {
280 	int n;
281 	uint32_t counter;
282 	sbintime_t now;
283 	struct vhpet *vhpet;
284 	struct callout *callout;
285 	struct vhpet_callout_arg *arg;
286 
287 	arg = a;
288 	vhpet = arg->vhpet;
289 	n = arg->timer_num;
290 	callout = &vhpet->timer[n].callout;
291 
292 	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
293 
294 	VHPET_LOCK(vhpet);
295 
296 	if (callout_pending(callout))		/* callout was reset */
297 		goto done;
298 
299 	if (!callout_active(callout))		/* callout was stopped */
300 		goto done;
301 
302 	callout_deactivate(callout);
303 
304 	if (!vhpet_counter_enabled(vhpet))
305 		panic("vhpet(%p) callout with counter disabled", vhpet);
306 
307 	counter = vhpet_counter(vhpet, &now);
308 	vhpet_start_timer(vhpet, n, counter, now);
309 	vhpet_timer_interrupt(vhpet, n);
310 done:
311 	VHPET_UNLOCK(vhpet);
312 	return;
313 }
314 
315 static void
316 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
317 {
318 
319 	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
320 	callout_stop(&vhpet->timer[n].callout);
321 
322 	/*
323 	 * If the callout was scheduled to expire in the past but hasn't
324 	 * had a chance to execute yet then trigger the timer interrupt
325 	 * here. Failing to do so will result in a missed timer interrupt
326 	 * in the guest. This is especially bad in one-shot mode because
327 	 * the next interrupt has to wait for the counter to wrap around.
328 	 */
329 	if (vhpet->timer[n].callout_sbt < now) {
330 		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
331 		    "stopping timer", n);
332 		vhpet_timer_interrupt(vhpet, n);
333 	}
334 }
335 
336 static void
337 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
338 {
339 	sbintime_t delta, precision;
340 
341 	if (vhpet->timer[n].comprate != 0)
342 		vhpet_adjust_compval(vhpet, n, counter);
343 	else {
344 		/*
345 		 * In one-shot mode it is the guest's responsibility to make
346 		 * sure that the comparator value is not in the "past". The
347 		 * hardware doesn't have any belt-and-suspenders to deal with
348 		 * this so we don't either.
349 		 */
350 	}
351 
352 	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
353 	precision = delta >> tc_precexp;
354 	vhpet->timer[n].callout_sbt = now + delta;
355 	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
356 	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
357 }
358 
359 static void
360 vhpet_start_counting(struct vhpet *vhpet)
361 {
362 	int i;
363 
364 	vhpet->countbase_sbt = sbinuptime();
365 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
366 		/*
367 		 * Restart the timers based on the value of the main counter
368 		 * when it stopped counting.
369 		 */
370 		vhpet_start_timer(vhpet, i, vhpet->countbase,
371 		    vhpet->countbase_sbt);
372 	}
373 }
374 
375 static void
376 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
377 {
378 	int i;
379 
380 	vhpet->countbase = counter;
381 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
382 		vhpet_stop_timer(vhpet, i, now);
383 }
384 
385 static __inline void
386 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
387 {
388 
389 	*regptr &= ~mask;
390 	*regptr |= (data & mask);
391 }
392 
393 static void
394 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
395     uint64_t mask)
396 {
397 	bool clear_isr;
398 	int old_pin, new_pin;
399 	uint32_t allowed_irqs;
400 	uint64_t oldval, newval;
401 
402 	if (vhpet_timer_msi_enabled(vhpet, n) ||
403 	    vhpet_timer_edge_trig(vhpet, n)) {
404 		if (vhpet->isr & (1 << n))
405 			panic("vhpet timer %d isr should not be asserted", n);
406 	}
407 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
408 	oldval = vhpet->timer[n].cap_config;
409 
410 	newval = oldval;
411 	update_register(&newval, data, mask);
412 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
413 	newval |= oldval & HPET_TCAP_RO_MASK;
414 
415 	if (newval == oldval)
416 		return;
417 
418 	vhpet->timer[n].cap_config = newval;
419 	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
420 
421 	/*
422 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
423 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
424 	 * it to the default value of 0.
425 	 */
426 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
427 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
428 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
429 		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
430 		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
431 		new_pin = 0;
432 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
433 	}
434 
435 	if (!vhpet_periodic_timer(vhpet, n))
436 		vhpet->timer[n].comprate = 0;
437 
438 	/*
439 	 * If the timer's ISR bit is set then clear it in the following cases:
440 	 * - interrupt is disabled
441 	 * - interrupt type is changed from level to edge or fsb.
442 	 * - interrupt routing is changed
443 	 *
444 	 * This is to ensure that this timer's level triggered interrupt does
445 	 * not remain asserted forever.
446 	 */
447 	if (vhpet->isr & (1 << n)) {
448 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
449 		    n, old_pin));
450 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
451 			clear_isr = true;
452 		else if (vhpet_timer_msi_enabled(vhpet, n))
453 			clear_isr = true;
454 		else if (vhpet_timer_edge_trig(vhpet, n))
455 			clear_isr = true;
456 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
457 			clear_isr = true;
458 		else
459 			clear_isr = false;
460 
461 		if (clear_isr) {
462 			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
463 			    "configuration change", n);
464 			vioapic_deassert_irq(vhpet->vm, old_pin);
465 			vhpet->isr &= ~(1 << n);
466 		}
467 	}
468 }
469 
470 int
471 vhpet_mmio_write(struct vcpu *vcpu, uint64_t gpa, uint64_t val, int size,
472     void *arg)
473 {
474 	struct vhpet *vhpet;
475 	uint64_t data, mask, oldval, val64;
476 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
477 	sbintime_t now, *nowptr;
478 	int i, offset;
479 
480 	vhpet = vm_hpet(vcpu_vm(vcpu));
481 	offset = gpa - VHPET_BASE;
482 
483 	VHPET_LOCK(vhpet);
484 
485 	/* Accesses to the HPET should be 4 or 8 bytes wide */
486 	switch (size) {
487 	case 8:
488 		mask = 0xffffffffffffffff;
489 		data = val;
490 		break;
491 	case 4:
492 		mask = 0xffffffff;
493 		data = val;
494 		if ((offset & 0x4) != 0) {
495 			mask <<= 32;
496 			data <<= 32;
497 		}
498 		break;
499 	default:
500 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
501 		    "offset 0x%08x, size %d", offset, size);
502 		goto done;
503 	}
504 
505 	/* Access to the HPET should be naturally aligned to its width */
506 	if (offset & (size - 1)) {
507 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
508 		    "offset 0x%08x, size %d", offset, size);
509 		goto done;
510 	}
511 
512 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
513 		/*
514 		 * Get the most recent value of the counter before updating
515 		 * the 'config' register. If the HPET is going to be disabled
516 		 * then we need to update 'countbase' with the value right
517 		 * before it is disabled.
518 		 */
519 		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
520 		counter = vhpet_counter(vhpet, nowptr);
521 		oldval = vhpet->config;
522 		update_register(&vhpet->config, data, mask);
523 
524 		/*
525 		 * LegacyReplacement Routing is not supported so clear the
526 		 * bit explicitly.
527 		 */
528 		vhpet->config &= ~HPET_CNF_LEG_RT;
529 
530 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
531 			if (vhpet_counter_enabled(vhpet)) {
532 				vhpet_start_counting(vhpet);
533 				VM_CTR0(vhpet->vm, "hpet enabled");
534 			} else {
535 				vhpet_stop_counting(vhpet, counter, now);
536 				VM_CTR0(vhpet->vm, "hpet disabled");
537 			}
538 		}
539 		goto done;
540 	}
541 
542 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
543 		isr_clear_mask = vhpet->isr & data;
544 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
545 			if ((isr_clear_mask & (1 << i)) != 0) {
546 				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
547 				vhpet_timer_clear_isr(vhpet, i);
548 			}
549 		}
550 		goto done;
551 	}
552 
553 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
554 		/* Zero-extend the counter to 64-bits before updating it */
555 		val64 = vhpet_counter(vhpet, NULL);
556 		update_register(&val64, data, mask);
557 		vhpet->countbase = val64;
558 		if (vhpet_counter_enabled(vhpet))
559 			vhpet_start_counting(vhpet);
560 		goto done;
561 	}
562 
563 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
564 		if (offset == HPET_TIMER_CAP_CNF(i) ||
565 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
566 			vhpet_timer_update_config(vhpet, i, data, mask);
567 			break;
568 		}
569 
570 		if (offset == HPET_TIMER_COMPARATOR(i) ||
571 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
572 			old_compval = vhpet->timer[i].compval;
573 			old_comprate = vhpet->timer[i].comprate;
574 			if (vhpet_periodic_timer(vhpet, i)) {
575 				/*
576 				 * In periodic mode writes to the comparator
577 				 * change the 'compval' register only if the
578 				 * HPET_TCNF_VAL_SET bit is set in the config
579 				 * register.
580 				 */
581 				val64 = vhpet->timer[i].comprate;
582 				update_register(&val64, data, mask);
583 				vhpet->timer[i].comprate = val64;
584 				if ((vhpet->timer[i].cap_config &
585 				    HPET_TCNF_VAL_SET) != 0) {
586 					vhpet->timer[i].compval = val64;
587 				}
588 			} else {
589 				KASSERT(vhpet->timer[i].comprate == 0,
590 				    ("vhpet one-shot timer %d has invalid "
591 				    "rate %u", i, vhpet->timer[i].comprate));
592 				val64 = vhpet->timer[i].compval;
593 				update_register(&val64, data, mask);
594 				vhpet->timer[i].compval = val64;
595 			}
596 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
597 
598 			if (vhpet->timer[i].compval != old_compval ||
599 			    vhpet->timer[i].comprate != old_comprate) {
600 				if (vhpet_counter_enabled(vhpet)) {
601 					counter = vhpet_counter(vhpet, &now);
602 					vhpet_start_timer(vhpet, i, counter,
603 					    now);
604 				}
605 			}
606 			break;
607 		}
608 
609 		if (offset == HPET_TIMER_FSB_VAL(i) ||
610 		    offset == HPET_TIMER_FSB_ADDR(i)) {
611 			update_register(&vhpet->timer[i].msireg, data, mask);
612 			break;
613 		}
614 	}
615 done:
616 	VHPET_UNLOCK(vhpet);
617 	return (0);
618 }
619 
620 int
621 vhpet_mmio_read(struct vcpu *vcpu, uint64_t gpa, uint64_t *rval, int size,
622     void *arg)
623 {
624 	int i, offset;
625 	struct vhpet *vhpet;
626 	uint64_t data;
627 
628 	vhpet = vm_hpet(vcpu_vm(vcpu));
629 	offset = gpa - VHPET_BASE;
630 
631 	VHPET_LOCK(vhpet);
632 
633 	/* Accesses to the HPET should be 4 or 8 bytes wide */
634 	if (size != 4 && size != 8) {
635 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
636 		    "offset 0x%08x, size %d", offset, size);
637 		data = 0;
638 		goto done;
639 	}
640 
641 	/* Access to the HPET should be naturally aligned to its width */
642 	if (offset & (size - 1)) {
643 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
644 		    "offset 0x%08x, size %d", offset, size);
645 		data = 0;
646 		goto done;
647 	}
648 
649 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
650 		data = vhpet_capabilities();
651 		goto done;
652 	}
653 
654 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
655 		data = vhpet->config;
656 		goto done;
657 	}
658 
659 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
660 		data = vhpet->isr;
661 		goto done;
662 	}
663 
664 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
665 		data = vhpet_counter(vhpet, NULL);
666 		goto done;
667 	}
668 
669 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
670 		if (offset == HPET_TIMER_CAP_CNF(i) ||
671 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
672 			data = vhpet->timer[i].cap_config;
673 			break;
674 		}
675 
676 		if (offset == HPET_TIMER_COMPARATOR(i) ||
677 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
678 			data = vhpet->timer[i].compval;
679 			break;
680 		}
681 
682 		if (offset == HPET_TIMER_FSB_VAL(i) ||
683 		    offset == HPET_TIMER_FSB_ADDR(i)) {
684 			data = vhpet->timer[i].msireg;
685 			break;
686 		}
687 	}
688 
689 	if (i >= VHPET_NUM_TIMERS)
690 		data = 0;
691 done:
692 	VHPET_UNLOCK(vhpet);
693 
694 	if (size == 4) {
695 		if (offset & 0x4)
696 			data >>= 32;
697 	}
698 	*rval = data;
699 	return (0);
700 }
701 
702 struct vhpet *
703 vhpet_init(struct vm *vm)
704 {
705 	int i, pincount;
706 	struct vhpet *vhpet;
707 	uint64_t allowed_irqs;
708 	struct vhpet_callout_arg *arg;
709 	struct bintime bt;
710 
711 	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
712         vhpet->vm = vm;
713 	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
714 
715 	FREQ2BT(HPET_FREQ, &bt);
716 	vhpet->freq_sbt = bttosbt(bt);
717 
718 	pincount = vioapic_pincount(vm);
719 	if (pincount >= 32)
720 		allowed_irqs = 0xff000000;	/* irqs 24-31 */
721 	else if (pincount >= 20)
722 		allowed_irqs = 0xf << (pincount - 4);	/* 4 upper irqs */
723 	else
724 		allowed_irqs = 0;
725 
726 	/*
727 	 * Initialize HPET timer hardware state.
728 	 */
729 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
730 		vhpet->timer[i].cap_config = allowed_irqs << 32;
731 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
732 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
733 
734 		vhpet->timer[i].compval = 0xffffffff;
735 		callout_init(&vhpet->timer[i].callout, 1);
736 
737 		arg = &vhpet->timer[i].arg;
738 		arg->vhpet = vhpet;
739 		arg->timer_num = i;
740 	}
741 
742 	return (vhpet);
743 }
744 
745 void
746 vhpet_cleanup(struct vhpet *vhpet)
747 {
748 	int i;
749 
750 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
751 		callout_drain(&vhpet->timer[i].callout);
752 
753 	mtx_destroy(&vhpet->mtx);
754 	free(vhpet, M_VHPET);
755 }
756 
757 int
758 vhpet_getcap(struct vm_hpet_cap *cap)
759 {
760 
761 	cap->capabilities = vhpet_capabilities();
762 	return (0);
763 }
764 
765 #ifdef BHYVE_SNAPSHOT
766 int
767 vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
768 {
769 	int i, ret;
770 	uint32_t countbase;
771 
772 	SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
773 	SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
774 	SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
775 
776 	/* at restore time the countbase should have the value it had when the
777 	 * snapshot was created; since the value is not directly kept in
778 	 * vhpet->countbase, but rather computed relative to the current system
779 	 * uptime using countbase_sbt, save the value returned by vhpet_counter
780 	 */
781 	if (meta->op == VM_SNAPSHOT_SAVE)
782 		countbase = vhpet_counter(vhpet, NULL);
783 	SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
784 	if (meta->op == VM_SNAPSHOT_RESTORE)
785 		vhpet->countbase = countbase;
786 
787 	for (i = 0; i < nitems(vhpet->timer); i++) {
788 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
789 				      meta, ret, done);
790 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
791 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
792 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
793 		SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
794 				      meta, ret, done);
795 	}
796 
797 done:
798 	return (ret);
799 }
800 
801 int
802 vhpet_restore_time(struct vhpet *vhpet)
803 {
804 	if (vhpet_counter_enabled(vhpet))
805 		vhpet_start_counting(vhpet);
806 
807 	return (0);
808 }
809 #endif
810