xref: /freebsd/sys/amd64/vmm/io/vlapic.c (revision 730cecb05aaf016ac52ef7cfc691ccec3a0408cd)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/systm.h>
36 #include <sys/smp.h>
37 
38 #include <machine/clock.h>
39 #include <x86/specialreg.h>
40 #include <x86/apicreg.h>
41 
42 #include <machine/vmm.h>
43 
44 #include "vmm_lapic.h"
45 #include "vmm_ktr.h"
46 #include "vdev.h"
47 #include "vlapic.h"
48 
49 #define	VLAPIC_CTR0(vlapic, format)					\
50 	VMM_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
51 
52 #define	VLAPIC_CTR1(vlapic, format, p1)					\
53 	VMM_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
54 
55 #define	VLAPIC_CTR_IRR(vlapic, msg)					\
56 do {									\
57 	uint32_t *irrptr = &(vlapic)->apic.irr0;			\
58 	irrptr[0] = irrptr[0];	/* silence compiler */			\
59 	VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);	\
60 	VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);	\
61 	VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);	\
62 	VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);	\
63 	VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);	\
64 	VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);	\
65 	VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);	\
66 	VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);	\
67 } while (0)
68 
69 #define	VLAPIC_CTR_ISR(vlapic, msg)					\
70 do {									\
71 	uint32_t *isrptr = &(vlapic)->apic.isr0;			\
72 	isrptr[0] = isrptr[0];	/* silence compiler */			\
73 	VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);	\
74 	VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);	\
75 	VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);	\
76 	VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);	\
77 	VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);	\
78 	VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);	\
79 	VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);	\
80 	VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);	\
81 } while (0)
82 
83 static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
84 
85 #define	PRIO(x)			((x) >> 4)
86 
87 #define VLAPIC_VERSION		(16)
88 #define VLAPIC_MAXLVT_ENTRIES	(5)
89 
90 #define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
91 
92 enum boot_state {
93 	BS_INIT,
94 	BS_SIPI,
95 	BS_RUNNING
96 };
97 
98 struct vlapic {
99 	struct vm		*vm;
100 	int			vcpuid;
101 
102 	struct io_region	*mmio;
103 	struct vdev_ops		*ops;
104 	struct LAPIC		 apic;
105 
106 	int			 esr_update;
107 
108 	int			 divisor;
109 	int			 ccr_ticks;
110 
111 	/*
112 	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
113 	 * A vector is popped from the stack when the processor does an EOI.
114 	 * The vector on the top of the stack is used to compute the
115 	 * Processor Priority in conjunction with the TPR.
116 	 */
117 	uint8_t			 isrvec_stk[ISRVEC_STK_SIZE];
118 	int			 isrvec_stk_top;
119 
120 	uint64_t		msr_apicbase;
121 	enum boot_state		boot_state;
122 };
123 
124 #define VLAPIC_BUS_FREQ	tsc_freq
125 
126 static int
127 vlapic_timer_divisor(uint32_t dcr)
128 {
129 	switch (dcr & 0xB) {
130 	case APIC_TDCR_2:
131 		return (2);
132 	case APIC_TDCR_4:
133 		return (4);
134 	case APIC_TDCR_8:
135 		return (8);
136 	case APIC_TDCR_16:
137 		return (16);
138 	case APIC_TDCR_32:
139 		return (32);
140 	case APIC_TDCR_64:
141 		return (64);
142 	case APIC_TDCR_128:
143 		return (128);
144 	default:
145 		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
146 	}
147 }
148 
149 static void
150 vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
151 {
152 	int i;
153 	for (i = 0; i < num_lvt; i++) {
154 		*lvts |= APIC_LVT_M;
155 		lvts += 4;
156 	}
157 }
158 
159 #if 0
160 static inline void
161 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
162 {
163 	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
164 	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
165 	    *lvt & APIC_LVTT_M);
166 }
167 #endif
168 
169 static uint64_t
170 vlapic_get_ccr(struct vlapic *vlapic)
171 {
172 	struct LAPIC    *lapic = &vlapic->apic;
173 	return lapic->ccr_timer;
174 }
175 
176 static void
177 vlapic_update_errors(struct vlapic *vlapic)
178 {
179 	struct LAPIC    *lapic = &vlapic->apic;
180 	lapic->esr = 0; // XXX
181 }
182 
183 static void
184 vlapic_init_ipi(struct vlapic *vlapic)
185 {
186 	struct LAPIC    *lapic = &vlapic->apic;
187 	lapic->version = VLAPIC_VERSION;
188 	lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
189 	lapic->dfr = 0xffffffff;
190 	lapic->svr = APIC_SVR_VECTOR;
191 	vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
192 }
193 
194 static int
195 vlapic_op_reset(void* dev)
196 {
197 	struct vlapic 	*vlapic = (struct vlapic*)dev;
198 	struct LAPIC	*lapic = &vlapic->apic;
199 
200 	memset(lapic, 0, sizeof(*lapic));
201 	lapic->apr = vlapic->vcpuid;
202 	vlapic_init_ipi(vlapic);
203 	vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer);
204 
205 	if (vlapic->vcpuid == 0)
206 		vlapic->boot_state = BS_RUNNING;	/* BSP */
207 	else
208 		vlapic->boot_state = BS_INIT;		/* AP */
209 
210 	return 0;
211 
212 }
213 
214 static int
215 vlapic_op_init(void* dev)
216 {
217 	struct vlapic *vlapic = (struct vlapic*)dev;
218 	vdev_register_region(vlapic->ops, vlapic, vlapic->mmio);
219 	return vlapic_op_reset(dev);
220 }
221 
222 static int
223 vlapic_op_halt(void* dev)
224 {
225 	struct vlapic *vlapic = (struct vlapic*)dev;
226 	vdev_unregister_region(vlapic, vlapic->mmio);
227 	return 0;
228 
229 }
230 
231 void
232 vlapic_set_intr_ready(struct vlapic *vlapic, int vector)
233 {
234 	struct LAPIC	*lapic = &vlapic->apic;
235 	uint32_t	*irrptr;
236 	int		idx;
237 
238 	if (vector < 0 || vector >= 256)
239 		panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
240 
241 	idx = (vector / 32) * 4;
242 	irrptr = &lapic->irr0;
243 	atomic_set_int(&irrptr[idx], 1 << (vector % 32));
244 	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
245 }
246 
247 static void
248 vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
249 {
250 	uint32_t icr_timer;
251 
252 	icr_timer = vlapic->apic.icr_timer;
253 
254 	vlapic->ccr_ticks = ticks;
255 	if (elapsed < icr_timer)
256 		vlapic->apic.ccr_timer = icr_timer - elapsed;
257 	else {
258 		/*
259 		 * This can happen when the guest is trying to run its local
260 		 * apic timer higher that the setting of 'hz' in the host.
261 		 *
262 		 * We deal with this by running the guest local apic timer
263 		 * at the rate of the host's 'hz' setting.
264 		 */
265 		vlapic->apic.ccr_timer = 0;
266 	}
267 }
268 
269 static __inline uint32_t *
270 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
271 {
272 	struct LAPIC	*lapic = &vlapic->apic;
273 	int 		 i;
274 
275 	if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
276 		panic("vlapic_get_lvt: invalid LVT\n");
277 	}
278 	i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
279 	return ((&lapic->lvt_timer) + i);;
280 }
281 
282 #if 1
283 static void
284 dump_isrvec_stk(struct vlapic *vlapic)
285 {
286 	int i;
287 	uint32_t *isrptr;
288 
289 	isrptr = &vlapic->apic.isr0;
290 	for (i = 0; i < 8; i++)
291 		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
292 
293 	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
294 		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
295 }
296 #endif
297 
298 /*
299  * Algorithm adopted from section "Interrupt, Task and Processor Priority"
300  * in Intel Architecture Manual Vol 3a.
301  */
302 static void
303 vlapic_update_ppr(struct vlapic *vlapic)
304 {
305 	int isrvec, tpr, ppr;
306 
307 	/*
308 	 * Note that the value on the stack at index 0 is always 0.
309 	 *
310 	 * This is a placeholder for the value of ISRV when none of the
311 	 * bits is set in the ISRx registers.
312 	 */
313 	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
314 	tpr = vlapic->apic.tpr;
315 
316 #if 1
317 	{
318 		int i, lastprio, curprio, vector, idx;
319 		uint32_t *isrptr;
320 
321 		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
322 			panic("isrvec_stk is corrupted: %d", isrvec);
323 
324 		/*
325 		 * Make sure that the priority of the nested interrupts is
326 		 * always increasing.
327 		 */
328 		lastprio = -1;
329 		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
330 			curprio = PRIO(vlapic->isrvec_stk[i]);
331 			if (curprio <= lastprio) {
332 				dump_isrvec_stk(vlapic);
333 				panic("isrvec_stk does not satisfy invariant");
334 			}
335 			lastprio = curprio;
336 		}
337 
338 		/*
339 		 * Make sure that each bit set in the ISRx registers has a
340 		 * corresponding entry on the isrvec stack.
341 		 */
342 		i = 1;
343 		isrptr = &vlapic->apic.isr0;
344 		for (vector = 0; vector < 256; vector++) {
345 			idx = (vector / 32) * 4;
346 			if (isrptr[idx] & (1 << (vector % 32))) {
347 				if (i > vlapic->isrvec_stk_top ||
348 				    vlapic->isrvec_stk[i] != vector) {
349 					dump_isrvec_stk(vlapic);
350 					panic("ISR and isrvec_stk out of sync");
351 				}
352 				i++;
353 			}
354 		}
355 	}
356 #endif
357 
358 	if (PRIO(tpr) >= PRIO(isrvec))
359 		ppr = tpr;
360 	else
361 		ppr = isrvec & 0xf0;
362 
363 	vlapic->apic.ppr = ppr;
364 	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
365 }
366 
367 static void
368 vlapic_process_eoi(struct vlapic *vlapic)
369 {
370 	struct LAPIC	*lapic = &vlapic->apic;
371 	uint32_t	*isrptr;
372 	int		i, idx, bitpos;
373 
374 	isrptr = &lapic->isr0;
375 
376 	/*
377 	 * The x86 architecture reserves the the first 32 vectors for use
378 	 * by the processor.
379 	 */
380 	for (i = 7; i > 0; i--) {
381 		idx = i * 4;
382 		bitpos = fls(isrptr[idx]);
383 		if (bitpos != 0) {
384 			if (vlapic->isrvec_stk_top <= 0) {
385 				panic("invalid vlapic isrvec_stk_top %d",
386 				      vlapic->isrvec_stk_top);
387 			}
388 			isrptr[idx] &= ~(1 << (bitpos - 1));
389 			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
390 			vlapic->isrvec_stk_top--;
391 			vlapic_update_ppr(vlapic);
392 			return;
393 		}
394 	}
395 }
396 
397 static __inline int
398 vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
399 {
400 	return (*lvt & mask);
401 }
402 
403 static __inline int
404 vlapic_periodic_timer(struct vlapic *vlapic)
405 {
406 	uint32_t *lvt;
407 
408 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
409 
410 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
411 }
412 
413 static void
414 vlapic_fire_timer(struct vlapic *vlapic)
415 {
416 	int vector;
417 	uint32_t *lvt;
418 
419 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
420 
421 	if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
422 		vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
423 		vlapic_set_intr_ready(vlapic, vector);
424 	}
425 }
426 
427 static int
428 lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
429 {
430 	int i;
431 	cpuset_t dmask;
432 	uint32_t dest, vec, mode;
433 	struct vlapic *vlapic2;
434 	struct vm_exit *vmexit;
435 
436 	if (x2apic(vlapic))
437 		dest = icrval >> 32;
438 	else
439 		dest = icrval >> (32 + 24);
440 	vec = icrval & APIC_VECTOR_MASK;
441 	mode = icrval & APIC_DELMODE_MASK;
442 
443 	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
444 		switch (icrval & APIC_DEST_MASK) {
445 		case APIC_DEST_DESTFLD:
446 			CPU_SETOF(dest, &dmask);
447 			break;
448 		case APIC_DEST_SELF:
449 			CPU_SETOF(vlapic->vcpuid, &dmask);
450 			break;
451 		case APIC_DEST_ALLISELF:
452 			dmask = vm_active_cpus(vlapic->vm);
453 			break;
454 		case APIC_DEST_ALLESELF:
455 			dmask = vm_active_cpus(vlapic->vm);
456 			CPU_CLR(vlapic->vcpuid, &dmask);
457 			break;
458 		}
459 
460 		while ((i = cpusetobj_ffs(&dmask)) != 0) {
461 			i--;
462 			CPU_CLR(i, &dmask);
463 			if (mode == APIC_DELMODE_FIXED)
464 				lapic_set_intr(vlapic->vm, i, vec);
465 			else
466 				vm_inject_nmi(vlapic->vm, i);
467 		}
468 
469 		return (0);	/* handled completely in the kernel */
470 	}
471 
472 	if (mode == APIC_DELMODE_INIT) {
473 		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
474 			return (0);
475 
476 		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
477 			vlapic2 = vm_lapic(vlapic->vm, dest);
478 
479 			/* move from INIT to waiting-for-SIPI state */
480 			if (vlapic2->boot_state == BS_INIT) {
481 				vlapic2->boot_state = BS_SIPI;
482 			}
483 
484 			return (0);
485 		}
486 	}
487 
488 	if (mode == APIC_DELMODE_STARTUP) {
489 		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
490 			vlapic2 = vm_lapic(vlapic->vm, dest);
491 
492 			/*
493 			 * Ignore SIPIs in any state other than wait-for-SIPI
494 			 */
495 			if (vlapic2->boot_state != BS_SIPI)
496 				return (0);
497 
498 			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
499 			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
500 			vmexit->u.spinup_ap.vcpu = dest;
501 			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
502 
503 			/*
504 			 * XXX this assumes that the startup IPI always succeeds
505 			 */
506 			vlapic2->boot_state = BS_RUNNING;
507 			vm_activate_cpu(vlapic2->vm, dest);
508 
509 			return (0);
510 		}
511 	}
512 
513 	/*
514 	 * This will cause a return to userland.
515 	 */
516 	return (1);
517 }
518 
519 int
520 vlapic_pending_intr(struct vlapic *vlapic)
521 {
522 	struct LAPIC	*lapic = &vlapic->apic;
523 	int	  	 idx, i, bitpos, vector;
524 	uint32_t	*irrptr, val;
525 
526 	irrptr = &lapic->irr0;
527 
528 	/*
529 	 * The x86 architecture reserves the the first 32 vectors for use
530 	 * by the processor.
531 	 */
532 	for (i = 7; i > 0; i--) {
533 		idx = i * 4;
534 		val = atomic_load_acq_int(&irrptr[idx]);
535 		bitpos = fls(val);
536 		if (bitpos != 0) {
537 			vector = i * 32 + (bitpos - 1);
538 			if (PRIO(vector) > PRIO(lapic->ppr)) {
539 				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
540 				return (vector);
541 			} else
542 				break;
543 		}
544 	}
545 	VLAPIC_CTR0(vlapic, "no pending intr");
546 	return (-1);
547 }
548 
549 void
550 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
551 {
552 	struct LAPIC	*lapic = &vlapic->apic;
553 	uint32_t	*irrptr, *isrptr;
554 	int		idx, stk_top;
555 
556 	/*
557 	 * clear the ready bit for vector being accepted in irr
558 	 * and set the vector as in service in isr.
559 	 */
560 	idx = (vector / 32) * 4;
561 
562 	irrptr = &lapic->irr0;
563 	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
564 	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
565 
566 	isrptr = &lapic->isr0;
567 	isrptr[idx] |= 1 << (vector % 32);
568 	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
569 
570 	/*
571 	 * Update the PPR
572 	 */
573 	vlapic->isrvec_stk_top++;
574 
575 	stk_top = vlapic->isrvec_stk_top;
576 	if (stk_top >= ISRVEC_STK_SIZE)
577 		panic("isrvec_stk_top overflow %d", stk_top);
578 
579 	vlapic->isrvec_stk[stk_top] = vector;
580 	vlapic_update_ppr(vlapic);
581 }
582 
583 int
584 vlapic_op_mem_read(void* dev, uint64_t gpa, opsize_t size, uint64_t *data)
585 {
586 	struct vlapic 	*vlapic = (struct vlapic*)dev;
587 	struct LAPIC	*lapic = &vlapic->apic;
588 	uint64_t	 offset = gpa & ~(PAGE_SIZE);
589 	uint32_t	*reg;
590 	int		 i;
591 
592 	if (offset > sizeof(*lapic)) {
593 		*data = 0;
594 		return 0;
595 	}
596 
597 	offset &= ~3;
598 	switch(offset)
599 	{
600 		case APIC_OFFSET_ID:
601 			if (x2apic(vlapic))
602 				*data = vlapic->vcpuid;
603 			else
604 				*data = vlapic->vcpuid << 24;
605 			break;
606 		case APIC_OFFSET_VER:
607 			*data = lapic->version;
608 			break;
609 		case APIC_OFFSET_TPR:
610 			*data = lapic->tpr;
611 			break;
612 		case APIC_OFFSET_APR:
613 			*data = lapic->apr;
614 			break;
615 		case APIC_OFFSET_PPR:
616 			*data = lapic->ppr;
617 			break;
618 		case APIC_OFFSET_EOI:
619 			*data = lapic->eoi;
620 			break;
621 		case APIC_OFFSET_LDR:
622 			*data = lapic->ldr;
623 			break;
624 		case APIC_OFFSET_DFR:
625 			*data = lapic->dfr;
626 			break;
627 		case APIC_OFFSET_SVR:
628 			*data = lapic->svr;
629 			break;
630 		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
631 			i = (offset - APIC_OFFSET_ISR0) >> 2;
632 			reg = &lapic->isr0;
633 			*data = *(reg + i);
634 			break;
635 		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
636 			i = (offset - APIC_OFFSET_TMR0) >> 2;
637 			reg = &lapic->tmr0;
638 			*data = *(reg + i);
639 			break;
640 		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
641 			i = (offset - APIC_OFFSET_IRR0) >> 2;
642 			reg = &lapic->irr0;
643 			*data = atomic_load_acq_int(reg + i);
644 			break;
645 		case APIC_OFFSET_ESR:
646 			*data = lapic->esr;
647 			break;
648 		case APIC_OFFSET_ICR_LOW:
649 			*data = lapic->icr_lo;
650 			break;
651 		case APIC_OFFSET_ICR_HI:
652 			*data = lapic->icr_hi;
653 			break;
654 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
655 			reg = vlapic_get_lvt(vlapic, offset);
656 			*data = *(reg);
657 			break;
658 		case APIC_OFFSET_ICR:
659 			*data = lapic->icr_timer;
660 			break;
661 		case APIC_OFFSET_CCR:
662 			*data = vlapic_get_ccr(vlapic);
663 			break;
664 		case APIC_OFFSET_DCR:
665 			*data = lapic->dcr_timer;
666 			break;
667 		case APIC_OFFSET_RRR:
668 		default:
669 			*data = 0;
670 			break;
671 	}
672 	return 0;
673 }
674 
675 int
676 vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data)
677 {
678 	struct vlapic 	*vlapic = (struct vlapic*)dev;
679 	struct LAPIC	*lapic = &vlapic->apic;
680 	uint64_t	 offset = gpa & ~(PAGE_SIZE);
681 	uint32_t	*reg;
682 	int		retval;
683 
684 	if (offset > sizeof(*lapic)) {
685 		return 0;
686 	}
687 
688 	retval = 0;
689 	offset &= ~3;
690 	switch(offset)
691 	{
692 		case APIC_OFFSET_ID:
693 			break;
694 		case APIC_OFFSET_TPR:
695 			lapic->tpr = data & 0xff;
696 			vlapic_update_ppr(vlapic);
697 			break;
698 		case APIC_OFFSET_EOI:
699 			vlapic_process_eoi(vlapic);
700 			break;
701 		case APIC_OFFSET_LDR:
702 			break;
703 		case APIC_OFFSET_DFR:
704 			break;
705 		case APIC_OFFSET_SVR:
706 			lapic->svr = data;
707 			break;
708 		case APIC_OFFSET_ICR_LOW:
709 			if (!x2apic(vlapic)) {
710 				data &= 0xffffffff;
711 				data |= (uint64_t)lapic->icr_hi << 32;
712 			}
713 			retval = lapic_process_icr(vlapic, data);
714 			break;
715 		case APIC_OFFSET_ICR_HI:
716 			if (!x2apic(vlapic)) {
717 				retval = 0;
718 				lapic->icr_hi = data;
719 			}
720 			break;
721 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
722 			reg = vlapic_get_lvt(vlapic, offset);
723 			if (!(lapic->svr & APIC_SVR_ENABLE)) {
724 				data |= APIC_LVT_M;
725 			}
726 			*reg = data;
727 			// vlapic_dump_lvt(offset, reg);
728 			break;
729 		case APIC_OFFSET_ICR:
730 			lapic->icr_timer = data;
731 			vlapic_start_timer(vlapic, 0);
732 			break;
733 
734 		case APIC_OFFSET_DCR:
735 			lapic->dcr_timer = data;
736 			vlapic->divisor = vlapic_timer_divisor(data);
737 			break;
738 
739 		case APIC_OFFSET_ESR:
740 			vlapic_update_errors(vlapic);
741 			break;
742 		case APIC_OFFSET_VER:
743 		case APIC_OFFSET_APR:
744 		case APIC_OFFSET_PPR:
745 		case APIC_OFFSET_RRR:
746 		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
747 		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
748 		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
749 		case APIC_OFFSET_CCR:
750 		default:
751 			// Read only.
752 			break;
753 	}
754 
755 	return (retval);
756 }
757 
758 int
759 vlapic_timer_tick(struct vlapic *vlapic)
760 {
761 	int curticks, delta, periodic, fired;
762 	uint32_t ccr;
763 	uint32_t decrement, leftover;
764 
765 restart:
766 	curticks = ticks;
767 	delta = curticks - vlapic->ccr_ticks;
768 
769 	/* Local APIC timer is disabled */
770 	if (vlapic->apic.icr_timer == 0)
771 		return (-1);
772 
773 	/* One-shot mode and timer has already counted down to zero */
774 	periodic = vlapic_periodic_timer(vlapic);
775 	if (!periodic && vlapic->apic.ccr_timer == 0)
776 		return (-1);
777 	/*
778 	 * The 'curticks' and 'ccr_ticks' are out of sync by more than
779 	 * 2^31 ticks. We deal with this by restarting the timer.
780 	 */
781 	if (delta < 0) {
782 		vlapic_start_timer(vlapic, 0);
783 		goto restart;
784 	}
785 
786 	fired = 0;
787 	decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
788 
789 	vlapic->ccr_ticks = curticks;
790 	ccr = vlapic->apic.ccr_timer;
791 
792 	while (delta-- > 0) {
793 		if (ccr > decrement) {
794 			ccr -= decrement;
795 			continue;
796 		}
797 
798 		/* Trigger the local apic timer interrupt */
799 		vlapic_fire_timer(vlapic);
800 		if (periodic) {
801 			leftover = decrement - ccr;
802 			vlapic_start_timer(vlapic, leftover);
803 			ccr = vlapic->apic.ccr_timer;
804 		} else {
805 			/*
806 			 * One-shot timer has counted down to zero.
807 			 */
808 			ccr = 0;
809 		}
810 		fired = 1;
811 		break;
812 	}
813 
814 	vlapic->apic.ccr_timer = ccr;
815 
816 	if (!fired)
817 		return ((ccr / decrement) + 1);
818 	else
819 		return (0);
820 }
821 
822 struct vdev_ops vlapic_dev_ops = {
823 	.name = "vlapic",
824 	.init = vlapic_op_init,
825 	.reset = vlapic_op_reset,
826 	.halt = vlapic_op_halt,
827 	.memread = vlapic_op_mem_read,
828 	.memwrite = vlapic_op_mem_write,
829 };
830 static struct io_region vlapic_mmio[VM_MAXCPU];
831 
832 struct vlapic *
833 vlapic_init(struct vm *vm, int vcpuid)
834 {
835 	struct vlapic 		*vlapic;
836 
837 	vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
838 	vlapic->vm = vm;
839 	vlapic->vcpuid = vcpuid;
840 
841 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
842 
843 	if (vcpuid == 0)
844 		vlapic->msr_apicbase |= APICBASE_BSP;
845 
846 	vlapic->ops = &vlapic_dev_ops;
847 
848 	vlapic->mmio = vlapic_mmio + vcpuid;
849 	vlapic->mmio->base = DEFAULT_APIC_BASE;
850 	vlapic->mmio->len = PAGE_SIZE;
851 	vlapic->mmio->attr = MMIO_READ|MMIO_WRITE;
852 	vlapic->mmio->vcpu = vcpuid;
853 
854 	vdev_register(&vlapic_dev_ops, vlapic);
855 
856 	vlapic_op_init(vlapic);
857 
858 	return (vlapic);
859 }
860 
861 void
862 vlapic_cleanup(struct vlapic *vlapic)
863 {
864 	vlapic_op_halt(vlapic);
865 	vdev_unregister(vlapic);
866 	free(vlapic, M_VLAPIC);
867 }
868 
869 uint64_t
870 vlapic_get_apicbase(struct vlapic *vlapic)
871 {
872 
873 	return (vlapic->msr_apicbase);
874 }
875 
876 void
877 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
878 {
879 	int err;
880 	enum x2apic_state state;
881 
882 	err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
883 	if (err)
884 		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
885 
886 	if (state == X2APIC_DISABLED)
887 		val &= ~APICBASE_X2APIC;
888 
889 	vlapic->msr_apicbase = val;
890 }
891 
892 void
893 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
894 {
895 	struct vlapic *vlapic;
896 
897 	vlapic = vm_lapic(vm, vcpuid);
898 
899 	if (state == X2APIC_DISABLED)
900 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
901 }
902