xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/vlapic.c (revision fdad6fbf87b201fdb96a704fc41fa8be1e4efbc8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  * Copyright (c) 2019 Joyent, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * This file and its contents are supplied under the terms of the
31  * Common Development and Distribution License ("CDDL"), version 1.0.
32  * You may only use this file in accordance with the terms of version
33  * 1.0 of the CDDL.
34  *
35  * A full copy of the text of the CDDL should have accompanied this
36  * source.  A copy of the CDDL is also available via the Internet at
37  * http://www.illumos.org/license/CDDL.
38  */
39 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
40 
41 /*
42  * Copyright 2014 Pluribus Networks Inc.
43  * Copyright 2018 Joyent, Inc.
44  * Copyright 2024 Oxide Computer Company
45  */
46 
47 #include <sys/cdefs.h>
48 
49 #include <sys/param.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/mutex.h>
53 #include <sys/systm.h>
54 #include <sys/cpuset.h>
55 
56 #include <x86/specialreg.h>
57 #include <x86/apicreg.h>
58 
59 #include <machine/clock.h>
60 
61 #include <machine/vmm.h>
62 #include <sys/vmm_kernel.h>
63 
64 #include "vmm_lapic.h"
65 #include "vmm_stat.h"
66 
67 #include "vlapic.h"
68 #include "vlapic_priv.h"
69 #include "vioapic.h"
70 
71 
72 /*
73  * The 4 high bits of a given interrupt vector represent its priority.  The same
74  * is true for the contents of the TPR when it is used to calculate the ultimate
75  * PPR of an APIC - the 4 high bits hold the priority.
76  */
77 #define	PRIO(x)			((x) & 0xf0)
78 
79 #define	VLAPIC_VERSION		(0x14)
80 
81 /*
82  * The 'vlapic->timer_lock' is used to provide mutual exclusion between the
83  * vlapic_callout_handler() and vcpu accesses to:
84  * - timer_freq_bt, timer_period_bt, timer_fire_bt
85  * - timer LVT register
86  */
87 #define	VLAPIC_TIMER_LOCK(vlapic)	mutex_enter(&((vlapic)->timer_lock))
88 #define	VLAPIC_TIMER_UNLOCK(vlapic)	mutex_exit(&((vlapic)->timer_lock))
89 #define	VLAPIC_TIMER_LOCKED(vlapic)	MUTEX_HELD(&((vlapic)->timer_lock))
90 
91 /*
92  * APIC timer frequency:
93  * - arbitrary but chosen to be in the ballpark of contemporary hardware.
94  * - power-of-two to avoid loss of precision when calculating times
95  */
96 #define	VLAPIC_BUS_FREQ		(128 * 1024 * 1024)
97 
98 #define	APICBASE_ADDR_MASK	0xfffffffffffff000UL
99 
100 #define	APIC_VALID_MASK_ESR	(APIC_ESR_SEND_CS_ERROR | \
101 		APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
102 		APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
103 		APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
104 
105 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
106 static void vlapic_callout_handler(void *arg);
107 
108 static __inline bool
vlapic_x2mode(const struct vlapic * vlapic)109 vlapic_x2mode(const struct vlapic *vlapic)
110 {
111 	return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0);
112 }
113 
114 bool
vlapic_hw_disabled(const struct vlapic * vlapic)115 vlapic_hw_disabled(const struct vlapic *vlapic)
116 {
117 	return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0);
118 }
119 
120 static __inline bool
vlapic_sw_disabled(const struct vlapic * vlapic)121 vlapic_sw_disabled(const struct vlapic *vlapic)
122 {
123 	const struct LAPIC *lapic = vlapic->apic_page;
124 
125 	return ((lapic->svr & APIC_SVR_ENABLE) == 0);
126 }
127 
128 static __inline bool
vlapic_enabled(const struct vlapic * vlapic)129 vlapic_enabled(const struct vlapic *vlapic)
130 {
131 	return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic));
132 }
133 
134 static __inline uint32_t
vlapic_get_id(const struct vlapic * vlapic)135 vlapic_get_id(const struct vlapic *vlapic)
136 {
137 
138 	if (vlapic_x2mode(vlapic))
139 		return (vlapic->vcpuid);
140 	else
141 		return (vlapic->vcpuid << 24);
142 }
143 
144 static uint32_t
x2apic_ldr(const struct vlapic * vlapic)145 x2apic_ldr(const struct vlapic *vlapic)
146 {
147 	int apicid;
148 	uint32_t ldr;
149 
150 	apicid = vlapic_get_id(vlapic);
151 	ldr = 1 << (apicid & 0xf);
152 	ldr |= (apicid & 0xffff0) << 12;
153 	return (ldr);
154 }
155 
156 void
vlapic_dfr_write_handler(struct vlapic * vlapic)157 vlapic_dfr_write_handler(struct vlapic *vlapic)
158 {
159 	struct LAPIC *lapic;
160 
161 	lapic = vlapic->apic_page;
162 	if (vlapic_x2mode(vlapic)) {
163 		/* Ignore write to DFR in x2APIC mode */
164 		lapic->dfr = 0;
165 		return;
166 	}
167 
168 	lapic->dfr &= APIC_DFR_MODEL_MASK;
169 	lapic->dfr |= APIC_DFR_RESERVED;
170 }
171 
172 void
vlapic_ldr_write_handler(struct vlapic * vlapic)173 vlapic_ldr_write_handler(struct vlapic *vlapic)
174 {
175 	struct LAPIC *lapic;
176 
177 	lapic = vlapic->apic_page;
178 
179 	/* LDR is read-only in x2apic mode */
180 	if (vlapic_x2mode(vlapic)) {
181 		/* Ignore write to LDR in x2APIC mode */
182 		lapic->ldr = x2apic_ldr(vlapic);
183 	} else {
184 		lapic->ldr &= ~APIC_LDR_RESERVED;
185 	}
186 }
187 
188 void
vlapic_id_write_handler(struct vlapic * vlapic)189 vlapic_id_write_handler(struct vlapic *vlapic)
190 {
191 	struct LAPIC *lapic;
192 
193 	/*
194 	 * We don't allow the ID register to be modified so reset it back to
195 	 * its default value.
196 	 */
197 	lapic = vlapic->apic_page;
198 	lapic->id = vlapic_get_id(vlapic);
199 }
200 
201 static int
vlapic_timer_divisor(uint32_t dcr)202 vlapic_timer_divisor(uint32_t dcr)
203 {
204 	switch (dcr & 0xB) {
205 	case APIC_TDCR_1:
206 		return (1);
207 	case APIC_TDCR_2:
208 		return (2);
209 	case APIC_TDCR_4:
210 		return (4);
211 	case APIC_TDCR_8:
212 		return (8);
213 	case APIC_TDCR_16:
214 		return (16);
215 	case APIC_TDCR_32:
216 		return (32);
217 	case APIC_TDCR_64:
218 		return (64);
219 	case APIC_TDCR_128:
220 		return (128);
221 	default:
222 		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
223 	}
224 }
225 
226 static uint32_t
vlapic_get_ccr(struct vlapic * vlapic)227 vlapic_get_ccr(struct vlapic *vlapic)
228 {
229 	struct LAPIC *lapic;
230 	uint32_t ccr;
231 
232 	ccr = 0;
233 	lapic = vlapic->apic_page;
234 
235 	VLAPIC_TIMER_LOCK(vlapic);
236 	if (callout_active(&vlapic->callout)) {
237 		/*
238 		 * If the timer is scheduled to expire in the future then
239 		 * compute the value of 'ccr' based on the remaining time.
240 		 */
241 
242 		const hrtime_t now = gethrtime();
243 		if (vlapic->timer_fire_when > now) {
244 			ccr += hrt_freq_count(vlapic->timer_fire_when - now,
245 			    vlapic->timer_cur_freq);
246 		}
247 	}
248 
249 	/*
250 	 * Clamp CCR value to that programmed in ICR - its theoretical maximum.
251 	 * Normal operation should never result in this being necessary.  Only
252 	 * strange circumstances due to state importation as part of instance
253 	 * save/restore or live-migration require such wariness.
254 	 */
255 	if (ccr > lapic->icr_timer) {
256 		ccr = lapic->icr_timer;
257 		vlapic->stats.vs_clamp_ccr++;
258 	}
259 	VLAPIC_TIMER_UNLOCK(vlapic);
260 	return (ccr);
261 }
262 
263 static void
vlapic_update_divider(struct vlapic * vlapic)264 vlapic_update_divider(struct vlapic *vlapic)
265 {
266 	struct LAPIC *lapic = vlapic->apic_page;
267 
268 	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
269 
270 	vlapic->timer_cur_freq =
271 	    VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
272 	vlapic->timer_period =
273 	    hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
274 }
275 
276 void
vlapic_dcr_write_handler(struct vlapic * vlapic)277 vlapic_dcr_write_handler(struct vlapic *vlapic)
278 {
279 	/*
280 	 * Update the timer frequency and the timer period.
281 	 *
282 	 * XXX changes to the frequency divider will not take effect until
283 	 * the timer is reloaded.
284 	 */
285 	VLAPIC_TIMER_LOCK(vlapic);
286 	vlapic_update_divider(vlapic);
287 	VLAPIC_TIMER_UNLOCK(vlapic);
288 }
289 
290 void
vlapic_esr_write_handler(struct vlapic * vlapic)291 vlapic_esr_write_handler(struct vlapic *vlapic)
292 {
293 	struct LAPIC *lapic;
294 
295 	lapic = vlapic->apic_page;
296 	lapic->esr = vlapic->esr_pending;
297 	vlapic->esr_pending = 0;
298 }
299 
300 vcpu_notify_t
vlapic_set_intr_ready(struct vlapic * vlapic,int vector,bool level)301 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
302 {
303 	struct LAPIC *lapic;
304 	uint32_t *irrptr, *tmrptr, mask, tmr;
305 	int idx;
306 
307 	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
308 
309 	lapic = vlapic->apic_page;
310 	if (!(lapic->svr & APIC_SVR_ENABLE)) {
311 		/* ignore interrupt on software-disabled APIC */
312 		return (VCPU_NOTIFY_NONE);
313 	}
314 
315 	if (vector < 16) {
316 		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
317 		    false);
318 
319 		/*
320 		 * If the error LVT is configured to interrupt the vCPU, it will
321 		 * have delivered a notification through that mechanism.
322 		 */
323 		return (VCPU_NOTIFY_NONE);
324 	}
325 
326 	if (vlapic->ops.set_intr_ready) {
327 		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
328 	}
329 
330 	idx = (vector / 32) * 4;
331 	mask = 1 << (vector % 32);
332 	tmrptr = &lapic->tmr0;
333 	irrptr = &lapic->irr0;
334 
335 	/*
336 	 * Update TMR for requested vector, if necessary.
337 	 * This must be done prior to asserting the bit in IRR so that the
338 	 * proper TMR state is always visible before the to-be-queued interrupt
339 	 * can be injected.
340 	 */
341 	tmr = atomic_load_acq_32(&tmrptr[idx]);
342 	if ((tmr & mask) != (level ? mask : 0)) {
343 		if (level) {
344 			atomic_set_int(&tmrptr[idx], mask);
345 		} else {
346 			atomic_clear_int(&tmrptr[idx], mask);
347 		}
348 	}
349 
350 	/* Now set the bit in IRR */
351 	atomic_set_int(&irrptr[idx], mask);
352 
353 	return (VCPU_NOTIFY_EXIT);
354 }
355 
356 static __inline uint32_t *
vlapic_get_lvtptr(struct vlapic * vlapic,uint32_t offset)357 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
358 {
359 	struct LAPIC	*lapic = vlapic->apic_page;
360 	int		i;
361 
362 	switch (offset) {
363 	case APIC_OFFSET_CMCI_LVT:
364 		return (&lapic->lvt_cmci);
365 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
366 		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
367 		return ((&lapic->lvt_timer) + i);
368 	default:
369 		panic("vlapic_get_lvt: invalid LVT\n");
370 	}
371 }
372 
373 static __inline int
lvt_off_to_idx(uint32_t offset)374 lvt_off_to_idx(uint32_t offset)
375 {
376 	int index;
377 
378 	switch (offset) {
379 	case APIC_OFFSET_CMCI_LVT:
380 		index = APIC_LVT_CMCI;
381 		break;
382 	case APIC_OFFSET_TIMER_LVT:
383 		index = APIC_LVT_TIMER;
384 		break;
385 	case APIC_OFFSET_THERM_LVT:
386 		index = APIC_LVT_THERMAL;
387 		break;
388 	case APIC_OFFSET_PERF_LVT:
389 		index = APIC_LVT_PMC;
390 		break;
391 	case APIC_OFFSET_LINT0_LVT:
392 		index = APIC_LVT_LINT0;
393 		break;
394 	case APIC_OFFSET_LINT1_LVT:
395 		index = APIC_LVT_LINT1;
396 		break;
397 	case APIC_OFFSET_ERROR_LVT:
398 		index = APIC_LVT_ERROR;
399 		break;
400 	default:
401 		index = -1;
402 		break;
403 	}
404 	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
405 	    "invalid lvt index %d for offset %x", index, offset));
406 
407 	return (index);
408 }
409 
410 static __inline uint32_t
vlapic_get_lvt(struct vlapic * vlapic,uint32_t offset)411 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
412 {
413 	int idx;
414 	uint32_t val;
415 
416 	idx = lvt_off_to_idx(offset);
417 	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
418 	return (val);
419 }
420 
421 void
vlapic_lvt_write_handler(struct vlapic * vlapic,uint32_t offset)422 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
423 {
424 	uint32_t *lvtptr, mask, val;
425 	struct LAPIC *lapic;
426 	int idx;
427 
428 	lapic = vlapic->apic_page;
429 	lvtptr = vlapic_get_lvtptr(vlapic, offset);
430 	val = *lvtptr;
431 	idx = lvt_off_to_idx(offset);
432 
433 	if (!(lapic->svr & APIC_SVR_ENABLE))
434 		val |= APIC_LVT_M;
435 	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
436 	switch (offset) {
437 	case APIC_OFFSET_TIMER_LVT:
438 		mask |= APIC_LVTT_TM;
439 		break;
440 	case APIC_OFFSET_ERROR_LVT:
441 		break;
442 	case APIC_OFFSET_LINT0_LVT:
443 	case APIC_OFFSET_LINT1_LVT:
444 		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
445 		/* FALLTHROUGH */
446 	default:
447 		mask |= APIC_LVT_DM;
448 		break;
449 	}
450 	val &= mask;
451 	*lvtptr = val;
452 	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
453 }
454 
455 static void
vlapic_refresh_lvts(struct vlapic * vlapic)456 vlapic_refresh_lvts(struct vlapic *vlapic)
457 {
458 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
459 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
460 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
461 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
462 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
463 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
464 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
465 }
466 
467 static void
vlapic_mask_lvts(struct vlapic * vlapic)468 vlapic_mask_lvts(struct vlapic *vlapic)
469 {
470 	struct LAPIC *lapic = vlapic->apic_page;
471 
472 	lapic->lvt_cmci |= APIC_LVT_M;
473 	lapic->lvt_timer |= APIC_LVT_M;
474 	lapic->lvt_thermal |= APIC_LVT_M;
475 	lapic->lvt_pcint |= APIC_LVT_M;
476 	lapic->lvt_lint0 |= APIC_LVT_M;
477 	lapic->lvt_lint1 |= APIC_LVT_M;
478 	lapic->lvt_error |= APIC_LVT_M;
479 	vlapic_refresh_lvts(vlapic);
480 }
481 
482 static int
vlapic_fire_lvt(struct vlapic * vlapic,uint_t lvt)483 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt)
484 {
485 	uint32_t mode, reg, vec;
486 	vcpu_notify_t notify;
487 
488 	reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
489 
490 	if (reg & APIC_LVT_M)
491 		return (0);
492 	vec = reg & APIC_LVT_VECTOR;
493 	mode = reg & APIC_LVT_DM;
494 
495 	switch (mode) {
496 	case APIC_LVT_DM_FIXED:
497 		if (vec < 16) {
498 			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
499 			    lvt == APIC_LVT_ERROR);
500 			return (0);
501 		}
502 		notify = vlapic_set_intr_ready(vlapic, vec, false);
503 		vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
504 		break;
505 	case APIC_LVT_DM_NMI:
506 		(void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
507 		break;
508 	case APIC_LVT_DM_EXTINT:
509 		(void) vm_inject_extint(vlapic->vm, vlapic->vcpuid);
510 		break;
511 	default:
512 		// Other modes ignored
513 		return (0);
514 	}
515 	return (1);
516 }
517 
518 static uint_t
vlapic_active_isr(struct vlapic * vlapic)519 vlapic_active_isr(struct vlapic *vlapic)
520 {
521 	int i;
522 	uint32_t *isrp;
523 
524 	isrp = &vlapic->apic_page->isr7;
525 
526 	for (i = 7; i >= 0; i--, isrp -= 4) {
527 		uint32_t reg = *isrp;
528 
529 		if (reg != 0) {
530 			uint_t vec = (i * 32) + bsrl(reg);
531 
532 			if (vec < 16) {
533 				/*
534 				 * Truncate the illegal low vectors to value of
535 				 * 0, indicating that no active ISR was found.
536 				 */
537 				return (0);
538 			}
539 			return (vec);
540 		}
541 	}
542 
543 	return (0);
544 }
545 
546 /*
547  * After events which might arbitrarily change the value of PPR, such as a TPR
548  * write or an EOI, calculate that new PPR value and store it in the APIC page.
549  */
550 static void
vlapic_update_ppr(struct vlapic * vlapic)551 vlapic_update_ppr(struct vlapic *vlapic)
552 {
553 	int isrvec, tpr, ppr;
554 
555 	isrvec = vlapic_active_isr(vlapic);
556 	tpr = vlapic->apic_page->tpr;
557 
558 	/*
559 	 * Algorithm adopted from section "Interrupt, Task and Processor
560 	 * Priority" in Intel Architecture Manual Vol 3a.
561 	 */
562 	if (PRIO(tpr) >= PRIO(isrvec)) {
563 		ppr = tpr;
564 	} else {
565 		ppr = PRIO(isrvec);
566 	}
567 
568 	vlapic->apic_page->ppr = ppr;
569 }
570 
571 /*
572  * When a vector is asserted in ISR as in-service, the PPR must be raised to the
573  * priority of that vector, as the vCPU would have been at a lower priority in
574  * order for the vector to be accepted.
575  */
576 static void
vlapic_raise_ppr(struct vlapic * vlapic,int vec)577 vlapic_raise_ppr(struct vlapic *vlapic, int vec)
578 {
579 	struct LAPIC *lapic = vlapic->apic_page;
580 	int ppr;
581 
582 	ppr = PRIO(vec);
583 
584 	lapic->ppr = ppr;
585 }
586 
587 void
vlapic_sync_tpr(struct vlapic * vlapic)588 vlapic_sync_tpr(struct vlapic *vlapic)
589 {
590 	vlapic_update_ppr(vlapic);
591 }
592 
593 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
594 
595 static void
vlapic_process_eoi(struct vlapic * vlapic)596 vlapic_process_eoi(struct vlapic *vlapic)
597 {
598 	struct LAPIC	*lapic = vlapic->apic_page;
599 	uint32_t	*isrptr, *tmrptr;
600 	int		i;
601 	uint_t		idx, bitpos, vector;
602 
603 	isrptr = &lapic->isr0;
604 	tmrptr = &lapic->tmr0;
605 
606 	for (i = 7; i >= 0; i--) {
607 		idx = i * 4;
608 		if (isrptr[idx] != 0) {
609 			bitpos = bsrl(isrptr[idx]);
610 			vector = i * 32 + bitpos;
611 
612 			isrptr[idx] &= ~(1 << bitpos);
613 			vlapic_update_ppr(vlapic);
614 			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
615 				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
616 				    vector);
617 			}
618 			return;
619 		}
620 	}
621 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
622 }
623 
624 static __inline int
vlapic_get_lvt_field(uint32_t lvt,uint32_t mask)625 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
626 {
627 
628 	return (lvt & mask);
629 }
630 
631 static __inline int
vlapic_periodic_timer(struct vlapic * vlapic)632 vlapic_periodic_timer(struct vlapic *vlapic)
633 {
634 	uint32_t lvt;
635 
636 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
637 
638 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
639 }
640 
641 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
642 
643 static void
vlapic_set_error(struct vlapic * vlapic,uint32_t mask,bool lvt_error)644 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error)
645 {
646 
647 	vlapic->esr_pending |= mask;
648 
649 	/*
650 	 * Avoid infinite recursion if the error LVT itself is configured with
651 	 * an illegal vector.
652 	 */
653 	if (lvt_error)
654 		return;
655 
656 	if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) {
657 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
658 	}
659 }
660 
661 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
662 
663 static void
vlapic_fire_timer(struct vlapic * vlapic)664 vlapic_fire_timer(struct vlapic *vlapic)
665 {
666 	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
667 
668 	if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) {
669 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
670 	}
671 }
672 
673 static VMM_STAT(VLAPIC_INTR_CMC,
674 	"corrected machine check interrupts generated by vlapic");
675 
676 void
vlapic_fire_cmci(struct vlapic * vlapic)677 vlapic_fire_cmci(struct vlapic *vlapic)
678 {
679 
680 	if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) {
681 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
682 	}
683 }
684 
685 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
686 	"lvts triggered");
687 
688 int
vlapic_trigger_lvt(struct vlapic * vlapic,int vector)689 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
690 {
691 	if (!vlapic_enabled(vlapic)) {
692 		/*
693 		 * When the local APIC is global/hardware disabled,
694 		 * LINT[1:0] pins are configured as INTR and NMI pins,
695 		 * respectively.
696 		 */
697 		switch (vector) {
698 			case APIC_LVT_LINT0:
699 				(void) vm_inject_extint(vlapic->vm,
700 				    vlapic->vcpuid);
701 				break;
702 			case APIC_LVT_LINT1:
703 				(void) vm_inject_nmi(vlapic->vm,
704 				    vlapic->vcpuid);
705 				break;
706 			default:
707 				break;
708 		}
709 		return (0);
710 	}
711 
712 	switch (vector) {
713 	case APIC_LVT_LINT0:
714 	case APIC_LVT_LINT1:
715 	case APIC_LVT_TIMER:
716 	case APIC_LVT_ERROR:
717 	case APIC_LVT_PMC:
718 	case APIC_LVT_THERMAL:
719 	case APIC_LVT_CMCI:
720 		if (vlapic_fire_lvt(vlapic, vector)) {
721 			vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
722 			    LVTS_TRIGGERRED, vector, 1);
723 		}
724 		break;
725 	default:
726 		return (EINVAL);
727 	}
728 	return (0);
729 }
730 
731 static void
vlapic_callout_reset(struct vlapic * vlapic)732 vlapic_callout_reset(struct vlapic *vlapic)
733 {
734 	callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
735 	    vlapic_callout_handler, vlapic, C_ABSOLUTE);
736 }
737 
738 static void
vlapic_callout_handler(void * arg)739 vlapic_callout_handler(void *arg)
740 {
741 	struct vlapic *vlapic = arg;
742 
743 	VLAPIC_TIMER_LOCK(vlapic);
744 	if (callout_pending(&vlapic->callout))	/* callout was reset */
745 		goto done;
746 
747 	if (!callout_active(&vlapic->callout))	/* callout was stopped */
748 		goto done;
749 
750 	callout_deactivate(&vlapic->callout);
751 
752 	vlapic_fire_timer(vlapic);
753 
754 	/*
755 	 * We should not end up here with timer_period == 0, but to prevent a
756 	 * runaway periodic timer, it is checked anyways.
757 	 */
758 	if (vlapic_periodic_timer(vlapic) && vlapic->timer_period != 0) {
759 		/*
760 		 * Compute the delta between when the timer was supposed to
761 		 * fire and the present time.  We can depend on the fact that
762 		 * cyclics (which underly these callouts) will never be called
763 		 * early.
764 		 */
765 		const hrtime_t now = gethrtime();
766 		const hrtime_t delta = now - vlapic->timer_fire_when;
767 		if (delta >= vlapic->timer_period) {
768 			/*
769 			 * If we are so behind that we have missed an entire
770 			 * timer period, reset the time base rather than
771 			 * attempting to catch up.
772 			 */
773 			vlapic->timer_fire_when = now + vlapic->timer_period;
774 		} else {
775 			vlapic->timer_fire_when += vlapic->timer_period;
776 		}
777 		vlapic_callout_reset(vlapic);
778 	} else {
779 		/*
780 		 * Clear the target time so that logic can distinguish from a
781 		 * timer which has fired (where the value is zero) from one
782 		 * which is held pending due to the instance being paused (where
783 		 * the value is non-zero, but the callout is not pending).
784 		 */
785 		vlapic->timer_fire_when = 0;
786 	}
787 done:
788 	VLAPIC_TIMER_UNLOCK(vlapic);
789 }
790 
791 void
vlapic_icrtmr_write_handler(struct vlapic * vlapic)792 vlapic_icrtmr_write_handler(struct vlapic *vlapic)
793 {
794 	struct LAPIC *lapic = vlapic->apic_page;
795 
796 	VLAPIC_TIMER_LOCK(vlapic);
797 	vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
798 	    lapic->icr_timer);
799 	if (vlapic->timer_period != 0) {
800 		vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
801 		vlapic_callout_reset(vlapic);
802 	} else {
803 		vlapic->timer_fire_when = 0;
804 		callout_stop(&vlapic->callout);
805 	}
806 	VLAPIC_TIMER_UNLOCK(vlapic);
807 }
808 
809 /*
810  * This function populates 'dmask' with the set of vcpus that match the
811  * addressing specified by the (dest, phys, lowprio) tuple.
812  *
813  * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
814  * or xAPIC (8-bit) destination field.
815  */
816 void
vlapic_calcdest(struct vm * vm,cpuset_t * dmask,uint32_t dest,bool phys,bool lowprio,bool x2apic_dest)817 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
818     bool lowprio, bool x2apic_dest)
819 {
820 	struct vlapic *vlapic;
821 	uint32_t dfr, ldr, ldest, cluster;
822 	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
823 	cpuset_t amask;
824 	int vcpuid;
825 
826 	if ((x2apic_dest && dest == 0xffffffff) ||
827 	    (!x2apic_dest && dest == 0xff)) {
828 		/*
829 		 * Broadcast in both logical and physical modes.
830 		 */
831 		*dmask = vm_active_cpus(vm);
832 		return;
833 	}
834 
835 	if (phys) {
836 		/*
837 		 * Physical mode: destination is APIC ID.
838 		 */
839 		CPU_ZERO(dmask);
840 		vcpuid = vm_apicid2vcpuid(vm, dest);
841 		amask = vm_active_cpus(vm);
842 		if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
843 			CPU_SET(vcpuid, dmask);
844 	} else {
845 		/*
846 		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
847 		 * bitmask. This model is only available in the xAPIC mode.
848 		 */
849 		mda_flat_ldest = dest & 0xff;
850 
851 		/*
852 		 * In the "Cluster Model" the MDA is used to identify a
853 		 * specific cluster and a set of APICs in that cluster.
854 		 */
855 		if (x2apic_dest) {
856 			mda_cluster_id = dest >> 16;
857 			mda_cluster_ldest = dest & 0xffff;
858 		} else {
859 			mda_cluster_id = (dest >> 4) & 0xf;
860 			mda_cluster_ldest = dest & 0xf;
861 		}
862 
863 		/*
864 		 * Logical mode: match each APIC that has a bit set
865 		 * in its LDR that matches a bit in the ldest.
866 		 */
867 		CPU_ZERO(dmask);
868 		amask = vm_active_cpus(vm);
869 		while ((vcpuid = CPU_FFS(&amask)) != 0) {
870 			vcpuid--;
871 			CPU_CLR(vcpuid, &amask);
872 
873 			vlapic = vm_lapic(vm, vcpuid);
874 			dfr = vlapic->apic_page->dfr;
875 			ldr = vlapic->apic_page->ldr;
876 
877 			if ((dfr & APIC_DFR_MODEL_MASK) ==
878 			    APIC_DFR_MODEL_FLAT) {
879 				ldest = ldr >> 24;
880 				mda_ldest = mda_flat_ldest;
881 			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
882 			    APIC_DFR_MODEL_CLUSTER) {
883 				if (vlapic_x2mode(vlapic)) {
884 					cluster = ldr >> 16;
885 					ldest = ldr & 0xffff;
886 				} else {
887 					cluster = ldr >> 28;
888 					ldest = (ldr >> 24) & 0xf;
889 				}
890 				if (cluster != mda_cluster_id)
891 					continue;
892 				mda_ldest = mda_cluster_ldest;
893 			} else {
894 				/*
895 				 * Guest has configured a bad logical
896 				 * model for this vcpu - skip it.
897 				 */
898 				continue;
899 			}
900 
901 			if ((mda_ldest & ldest) != 0) {
902 				CPU_SET(vcpuid, dmask);
903 				if (lowprio)
904 					break;
905 			}
906 		}
907 	}
908 }
909 
910 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu");
911 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu");
912 
913 static void
vlapic_set_tpr(struct vlapic * vlapic,uint8_t val)914 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
915 {
916 	struct LAPIC *lapic = vlapic->apic_page;
917 
918 	if (lapic->tpr != val) {
919 		lapic->tpr = val;
920 		vlapic_update_ppr(vlapic);
921 	}
922 }
923 
924 void
vlapic_set_cr8(struct vlapic * vlapic,uint64_t val)925 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
926 {
927 	uint8_t tpr;
928 
929 	if (val & ~0xf) {
930 		vm_inject_gp(vlapic->vm, vlapic->vcpuid);
931 		return;
932 	}
933 
934 	tpr = val << 4;
935 	vlapic_set_tpr(vlapic, tpr);
936 }
937 
938 uint64_t
vlapic_get_cr8(const struct vlapic * vlapic)939 vlapic_get_cr8(const struct vlapic *vlapic)
940 {
941 	const struct LAPIC *lapic = vlapic->apic_page;
942 
943 	return (lapic->tpr >> 4);
944 }
945 
946 static bool
vlapic_is_icr_valid(uint64_t icrval)947 vlapic_is_icr_valid(uint64_t icrval)
948 {
949 	uint32_t mode = icrval & APIC_DELMODE_MASK;
950 	uint32_t level = icrval & APIC_LEVEL_MASK;
951 	uint32_t trigger = icrval & APIC_TRIGMOD_MASK;
952 	uint32_t shorthand = icrval & APIC_DEST_MASK;
953 
954 	switch (mode) {
955 	case APIC_DELMODE_FIXED:
956 		if (trigger == APIC_TRIGMOD_EDGE)
957 			return (true);
958 		/*
959 		 * AMD allows a level assert IPI and Intel converts a level
960 		 * assert IPI into an edge IPI.
961 		 */
962 		if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT)
963 			return (true);
964 		break;
965 	case APIC_DELMODE_LOWPRIO:
966 	case APIC_DELMODE_SMI:
967 	case APIC_DELMODE_NMI:
968 	case APIC_DELMODE_INIT:
969 		if (trigger == APIC_TRIGMOD_EDGE &&
970 		    (shorthand == APIC_DEST_DESTFLD ||
971 		    shorthand == APIC_DEST_ALLESELF)) {
972 			return (true);
973 		}
974 		/*
975 		 * AMD allows a level assert IPI and Intel converts a level
976 		 * assert IPI into an edge IPI.
977 		 */
978 		if (trigger == APIC_TRIGMOD_LEVEL &&
979 		    level == APIC_LEVEL_ASSERT &&
980 		    (shorthand == APIC_DEST_DESTFLD ||
981 		    shorthand == APIC_DEST_ALLESELF)) {
982 			return (true);
983 		}
984 		/*
985 		 * An level triggered deassert INIT is defined in the Intel
986 		 * Multiprocessor Specification and the Intel Software Developer
987 		 * Manual. Due to the MPS it's required to send a level assert
988 		 * INIT to a cpu and then a level deassert INIT. Some operating
989 		 * systems e.g. FreeBSD or Linux use that algorithm. According
990 		 * to the SDM a level deassert INIT is only supported by Pentium
991 		 * and P6 processors. It's always send to all cpus regardless of
992 		 * the destination or shorthand field. It resets the arbitration
993 		 * id register. This register is not software accessible and
994 		 * only required for the APIC bus arbitration. So, the level
995 		 * deassert INIT doesn't need any emulation and we should ignore
996 		 * it. The SDM also defines that newer processors don't support
997 		 * the level deassert INIT and it's not valid any more. As it's
998 		 * defined for older systems, it can't be invalid per se.
999 		 * Otherwise, backward compatibility would be broken. However,
1000 		 * when returning false here, it'll be ignored which is the
1001 		 * desired behaviour.
1002 		 */
1003 		if (mode == APIC_DELMODE_INIT &&
1004 		    trigger == APIC_TRIGMOD_LEVEL &&
1005 		    level == APIC_LEVEL_DEASSERT) {
1006 			return (false);
1007 		}
1008 		break;
1009 	case APIC_DELMODE_STARTUP:
1010 		if (shorthand == APIC_DEST_DESTFLD ||
1011 		    shorthand == APIC_DEST_ALLESELF) {
1012 			return (true);
1013 		}
1014 		break;
1015 	case APIC_DELMODE_RR:
1016 		/* Only available on AMD! */
1017 		if (trigger == APIC_TRIGMOD_EDGE &&
1018 		    shorthand == APIC_DEST_DESTFLD) {
1019 			return (true);
1020 		}
1021 		break;
1022 	case APIC_DELMODE_RESV:
1023 		return (false);
1024 	default:
1025 		panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode);
1026 	}
1027 
1028 	return (false);
1029 }
1030 
1031 void
vlapic_icrlo_write_handler(struct vlapic * vlapic)1032 vlapic_icrlo_write_handler(struct vlapic *vlapic)
1033 {
1034 	int i;
1035 	cpuset_t dmask;
1036 	uint64_t icrval;
1037 	uint32_t dest, vec, mode, dsh;
1038 	struct LAPIC *lapic;
1039 
1040 	lapic = vlapic->apic_page;
1041 	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
1042 	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1043 
1044 	/*
1045 	 * Ignore invalid combinations of the icr.
1046 	 */
1047 	if (!vlapic_is_icr_valid(icrval))
1048 		return;
1049 
1050 	if (vlapic_x2mode(vlapic))
1051 		dest = icrval >> 32;
1052 	else
1053 		dest = icrval >> (32 + 24);
1054 	vec = icrval & APIC_VECTOR_MASK;
1055 	mode = icrval & APIC_DELMODE_MASK;
1056 	dsh = icrval & APIC_DEST_MASK;
1057 
1058 	if (mode == APIC_DELMODE_FIXED && vec < 16) {
1059 		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
1060 		return;
1061 	}
1062 
1063 	if (mode == APIC_DELMODE_INIT &&
1064 	    (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
1065 		/* No work required to deassert INIT */
1066 		return;
1067 	}
1068 
1069 	switch (dsh) {
1070 	case APIC_DEST_DESTFLD:
1071 		vlapic_calcdest(vlapic->vm, &dmask, dest,
1072 		    (icrval & APIC_DESTMODE_LOG) == 0, false,
1073 		    vlapic_x2mode(vlapic));
1074 		break;
1075 	case APIC_DEST_SELF:
1076 		CPU_SETOF(vlapic->vcpuid, &dmask);
1077 		break;
1078 	case APIC_DEST_ALLISELF:
1079 		dmask = vm_active_cpus(vlapic->vm);
1080 		break;
1081 	case APIC_DEST_ALLESELF:
1082 		dmask = vm_active_cpus(vlapic->vm);
1083 		CPU_CLR(vlapic->vcpuid, &dmask);
1084 		break;
1085 	default:
1086 		/*
1087 		 * All possible delivery notations are covered above.
1088 		 * We should never end up here.
1089 		 */
1090 		panic("unknown delivery shorthand: %x", dsh);
1091 	}
1092 
1093 	while ((i = CPU_FFS(&dmask)) != 0) {
1094 		i--;
1095 		CPU_CLR(i, &dmask);
1096 		switch (mode) {
1097 		case APIC_DELMODE_FIXED:
1098 			(void) lapic_intr_edge(vlapic->vm, i, vec);
1099 			vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
1100 			    VLAPIC_IPI_SEND, 1);
1101 			vmm_stat_incr(vlapic->vm, i,
1102 			    VLAPIC_IPI_RECV, 1);
1103 			break;
1104 		case APIC_DELMODE_NMI:
1105 			(void) vm_inject_nmi(vlapic->vm, i);
1106 			break;
1107 		case APIC_DELMODE_INIT:
1108 			(void) vm_inject_init(vlapic->vm, i);
1109 			break;
1110 		case APIC_DELMODE_STARTUP:
1111 			(void) vm_inject_sipi(vlapic->vm, i, vec);
1112 			break;
1113 		case APIC_DELMODE_LOWPRIO:
1114 		case APIC_DELMODE_SMI:
1115 		default:
1116 			/* Unhandled IPI modes (for now) */
1117 			break;
1118 		}
1119 	}
1120 }
1121 
1122 void
vlapic_self_ipi_handler(struct vlapic * vlapic,uint32_t val)1123 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val)
1124 {
1125 	const int vec = val & 0xff;
1126 
1127 	/* self-IPI is only exposed via x2APIC */
1128 	ASSERT(vlapic_x2mode(vlapic));
1129 
1130 	(void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1131 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1);
1132 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1);
1133 }
1134 
1135 int
vlapic_pending_intr(struct vlapic * vlapic,int * vecptr)1136 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1137 {
1138 	struct LAPIC	*lapic = vlapic->apic_page;
1139 	int		 idx, i, bitpos, vector;
1140 	uint32_t	*irrptr, val;
1141 
1142 	if (vlapic->ops.sync_state) {
1143 		(*vlapic->ops.sync_state)(vlapic);
1144 	}
1145 
1146 	irrptr = &lapic->irr0;
1147 
1148 	for (i = 7; i >= 0; i--) {
1149 		idx = i * 4;
1150 		val = atomic_load_acq_int(&irrptr[idx]);
1151 		bitpos = fls(val);
1152 		if (bitpos != 0) {
1153 			vector = i * 32 + (bitpos - 1);
1154 			if (PRIO(vector) > PRIO(lapic->ppr)) {
1155 				if (vecptr != NULL)
1156 					*vecptr = vector;
1157 				return (1);
1158 			} else
1159 				break;
1160 		}
1161 	}
1162 	return (0);
1163 }
1164 
1165 void
vlapic_intr_accepted(struct vlapic * vlapic,int vector)1166 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1167 {
1168 	struct LAPIC	*lapic = vlapic->apic_page;
1169 	uint32_t	*irrptr, *isrptr;
1170 	int		idx;
1171 
1172 	KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
1173 
1174 	if (vlapic->ops.intr_accepted)
1175 		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1176 
1177 	/*
1178 	 * clear the ready bit for vector being accepted in irr
1179 	 * and set the vector as in service in isr.
1180 	 */
1181 	idx = (vector / 32) * 4;
1182 
1183 	irrptr = &lapic->irr0;
1184 	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1185 
1186 	isrptr = &lapic->isr0;
1187 	isrptr[idx] |= 1 << (vector % 32);
1188 
1189 	/*
1190 	 * The only way a fresh vector could be accepted into ISR is if it was
1191 	 * of a higher priority than the current PPR.  With that vector now
1192 	 * in-service, the PPR must be raised.
1193 	 */
1194 	vlapic_raise_ppr(vlapic, vector);
1195 }
1196 
1197 void
vlapic_svr_write_handler(struct vlapic * vlapic)1198 vlapic_svr_write_handler(struct vlapic *vlapic)
1199 {
1200 	struct LAPIC *lapic;
1201 	uint32_t old, new, changed;
1202 
1203 	lapic = vlapic->apic_page;
1204 
1205 	new = lapic->svr;
1206 	old = vlapic->svr_last;
1207 	vlapic->svr_last = new;
1208 
1209 	changed = old ^ new;
1210 	if ((changed & APIC_SVR_ENABLE) != 0) {
1211 		if ((new & APIC_SVR_ENABLE) == 0) {
1212 			/*
1213 			 * The apic is now disabled so stop the apic timer
1214 			 * and mask all the LVT entries.
1215 			 */
1216 			VLAPIC_TIMER_LOCK(vlapic);
1217 			callout_stop(&vlapic->callout);
1218 			VLAPIC_TIMER_UNLOCK(vlapic);
1219 			vlapic_mask_lvts(vlapic);
1220 		} else {
1221 			/*
1222 			 * The apic is now enabled so restart the apic timer
1223 			 * if it is configured in periodic mode.
1224 			 */
1225 			if (vlapic_periodic_timer(vlapic))
1226 				vlapic_icrtmr_write_handler(vlapic);
1227 		}
1228 	}
1229 }
1230 
1231 static bool
vlapic_read(struct vlapic * vlapic,uint16_t offset,uint32_t * outp)1232 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp)
1233 {
1234 	struct LAPIC *lapic = vlapic->apic_page;
1235 	uint32_t *reg;
1236 	int i;
1237 
1238 	ASSERT3U(offset & 0x3, ==, 0);
1239 	ASSERT3U(offset, <, PAGESIZE);
1240 	ASSERT3P(outp, !=, NULL);
1241 
1242 	uint32_t data = 0;
1243 	switch (offset) {
1244 	case APIC_OFFSET_ID:
1245 		data = lapic->id;
1246 		break;
1247 	case APIC_OFFSET_VER:
1248 		data = lapic->version;
1249 		break;
1250 	case APIC_OFFSET_TPR:
1251 		data = lapic->tpr;
1252 		break;
1253 	case APIC_OFFSET_APR:
1254 		data = lapic->apr;
1255 		break;
1256 	case APIC_OFFSET_PPR:
1257 		data = lapic->ppr;
1258 		break;
1259 	case APIC_OFFSET_LDR:
1260 		data = lapic->ldr;
1261 		break;
1262 	case APIC_OFFSET_DFR:
1263 		data = lapic->dfr;
1264 		break;
1265 	case APIC_OFFSET_SVR:
1266 		data = lapic->svr;
1267 		break;
1268 	case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1269 		i = (offset - APIC_OFFSET_ISR0) >> 2;
1270 		reg = &lapic->isr0;
1271 		data = *(reg + i);
1272 		break;
1273 	case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1274 		i = (offset - APIC_OFFSET_TMR0) >> 2;
1275 		reg = &lapic->tmr0;
1276 		data = *(reg + i);
1277 		break;
1278 	case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1279 		i = (offset - APIC_OFFSET_IRR0) >> 2;
1280 		reg = &lapic->irr0;
1281 		data = atomic_load_acq_int(reg + i);
1282 		break;
1283 	case APIC_OFFSET_ESR:
1284 		data = lapic->esr;
1285 		break;
1286 	case APIC_OFFSET_ICR_LOW:
1287 		data = lapic->icr_lo;
1288 		break;
1289 	case APIC_OFFSET_ICR_HI:
1290 		data = lapic->icr_hi;
1291 		break;
1292 	case APIC_OFFSET_CMCI_LVT:
1293 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1294 		data = vlapic_get_lvt(vlapic, offset);
1295 #ifdef INVARIANTS
1296 		reg = vlapic_get_lvtptr(vlapic, offset);
1297 		ASSERT3U(data, ==, *reg);
1298 #endif
1299 		break;
1300 	case APIC_OFFSET_TIMER_ICR:
1301 		data = lapic->icr_timer;
1302 		break;
1303 	case APIC_OFFSET_TIMER_CCR:
1304 		data = vlapic_get_ccr(vlapic);
1305 		break;
1306 	case APIC_OFFSET_TIMER_DCR:
1307 		data = lapic->dcr_timer;
1308 		break;
1309 	case APIC_OFFSET_RRR:
1310 		data = 0;
1311 		break;
1312 
1313 	case APIC_OFFSET_SELF_IPI:
1314 	case APIC_OFFSET_EOI:
1315 		/* Write-only register */
1316 		*outp = 0;
1317 		return (false);
1318 
1319 	default:
1320 		/* Invalid register */
1321 		*outp = 0;
1322 		return (false);
1323 	}
1324 
1325 	*outp = data;
1326 	return (true);
1327 }
1328 
1329 static bool
vlapic_write(struct vlapic * vlapic,uint16_t offset,uint32_t data)1330 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data)
1331 {
1332 	struct LAPIC	*lapic = vlapic->apic_page;
1333 	uint32_t	*regptr;
1334 
1335 	ASSERT3U(offset & 0xf, ==, 0);
1336 	ASSERT3U(offset, <, PAGESIZE);
1337 
1338 	switch (offset) {
1339 	case APIC_OFFSET_ID:
1340 		lapic->id = data;
1341 		vlapic_id_write_handler(vlapic);
1342 		break;
1343 	case APIC_OFFSET_TPR:
1344 		vlapic_set_tpr(vlapic, data & 0xff);
1345 		break;
1346 	case APIC_OFFSET_EOI:
1347 		vlapic_process_eoi(vlapic);
1348 		break;
1349 	case APIC_OFFSET_LDR:
1350 		lapic->ldr = data;
1351 		vlapic_ldr_write_handler(vlapic);
1352 		break;
1353 	case APIC_OFFSET_DFR:
1354 		lapic->dfr = data;
1355 		vlapic_dfr_write_handler(vlapic);
1356 		break;
1357 	case APIC_OFFSET_SVR:
1358 		lapic->svr = data;
1359 		vlapic_svr_write_handler(vlapic);
1360 		break;
1361 	case APIC_OFFSET_ICR_LOW:
1362 		lapic->icr_lo = data;
1363 		vlapic_icrlo_write_handler(vlapic);
1364 		break;
1365 	case APIC_OFFSET_ICR_HI:
1366 		lapic->icr_hi = data;
1367 		break;
1368 	case APIC_OFFSET_CMCI_LVT:
1369 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1370 		regptr = vlapic_get_lvtptr(vlapic, offset);
1371 		*regptr = data;
1372 		vlapic_lvt_write_handler(vlapic, offset);
1373 		break;
1374 	case APIC_OFFSET_TIMER_ICR:
1375 		lapic->icr_timer = data;
1376 		vlapic_icrtmr_write_handler(vlapic);
1377 		break;
1378 
1379 	case APIC_OFFSET_TIMER_DCR:
1380 		lapic->dcr_timer = data;
1381 		vlapic_dcr_write_handler(vlapic);
1382 		break;
1383 
1384 	case APIC_OFFSET_ESR:
1385 		vlapic_esr_write_handler(vlapic);
1386 		break;
1387 
1388 	case APIC_OFFSET_SELF_IPI:
1389 		if (vlapic_x2mode(vlapic))
1390 			vlapic_self_ipi_handler(vlapic, data);
1391 		break;
1392 
1393 	case APIC_OFFSET_VER:
1394 	case APIC_OFFSET_APR:
1395 	case APIC_OFFSET_PPR:
1396 	case APIC_OFFSET_RRR:
1397 	case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1398 	case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1399 	case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1400 	case APIC_OFFSET_TIMER_CCR:
1401 		/* Read-only register */
1402 		return (false);
1403 
1404 	default:
1405 		/* Invalid register */
1406 		return (false);
1407 	}
1408 
1409 	return (true);
1410 }
1411 
1412 void
vlapic_reset(struct vlapic * vlapic)1413 vlapic_reset(struct vlapic *vlapic)
1414 {
1415 	struct LAPIC *lapic = vlapic->apic_page;
1416 	uint32_t *isrptr, *tmrptr, *irrptr;
1417 
1418 	/* Reset any timer-related state first */
1419 	VLAPIC_TIMER_LOCK(vlapic);
1420 	callout_stop(&vlapic->callout);
1421 	vlapic->timer_fire_when = 0;
1422 	lapic->icr_timer = 0;
1423 	lapic->ccr_timer = 0;
1424 	lapic->dcr_timer = 0;
1425 	vlapic_update_divider(vlapic);
1426 	VLAPIC_TIMER_UNLOCK(vlapic);
1427 
1428 	/*
1429 	 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
1430 	 * it is not leftover after the reset.  This is performed after the APIC
1431 	 * timer has been stopped, in case it happened to fire just prior to
1432 	 * being deactivated.
1433 	 */
1434 	if (vlapic->ops.sync_state) {
1435 		(*vlapic->ops.sync_state)(vlapic);
1436 	}
1437 
1438 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1439 	if (vlapic->vcpuid == 0)
1440 		vlapic->msr_apicbase |= APICBASE_BSP;
1441 
1442 	lapic->id = vlapic_get_id(vlapic);
1443 	lapic->version = VLAPIC_VERSION;
1444 	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1445 
1446 	lapic->tpr = 0;
1447 	lapic->apr = 0;
1448 	lapic->ppr = 0;
1449 
1450 	lapic->eoi = 0;
1451 	lapic->ldr = 0;
1452 	lapic->dfr = 0xffffffff;
1453 	lapic->svr = APIC_SVR_VECTOR;
1454 	vlapic->svr_last = lapic->svr;
1455 
1456 	isrptr = &lapic->isr0;
1457 	tmrptr = &lapic->tmr0;
1458 	irrptr = &lapic->irr0;
1459 	for (uint_t i = 0; i < 8; i++) {
1460 		atomic_store_rel_int(&isrptr[i * 4], 0);
1461 		atomic_store_rel_int(&tmrptr[i * 4], 0);
1462 		atomic_store_rel_int(&irrptr[i * 4], 0);
1463 	}
1464 
1465 	lapic->esr = 0;
1466 	vlapic->esr_pending = 0;
1467 	lapic->icr_lo = 0;
1468 	lapic->icr_hi = 0;
1469 
1470 	lapic->lvt_cmci = 0;
1471 	lapic->lvt_timer = 0;
1472 	lapic->lvt_thermal = 0;
1473 	lapic->lvt_pcint = 0;
1474 	lapic->lvt_lint0 = 0;
1475 	lapic->lvt_lint1 = 0;
1476 	lapic->lvt_error = 0;
1477 	vlapic_mask_lvts(vlapic);
1478 }
1479 
1480 void
vlapic_init(struct vlapic * vlapic)1481 vlapic_init(struct vlapic *vlapic)
1482 {
1483 	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1484 	KASSERT(vlapic->vcpuid >= 0 &&
1485 	    vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1486 	    ("vlapic_init: vcpuid is not initialized"));
1487 	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1488 	    "initialized"));
1489 
1490 	/*
1491 	 * If the vlapic is configured in x2apic mode then it will be
1492 	 * accessed in the critical section via the MSR emulation code.
1493 	 *
1494 	 * Therefore the timer mutex must be a spinlock because blockable
1495 	 * mutexes cannot be acquired in a critical section.
1496 	 */
1497 	mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL);
1498 	callout_init(&vlapic->callout, 1);
1499 
1500 	vlapic_reset(vlapic);
1501 }
1502 
1503 void
vlapic_cleanup(struct vlapic * vlapic)1504 vlapic_cleanup(struct vlapic *vlapic)
1505 {
1506 	callout_drain(&vlapic->callout);
1507 	mutex_destroy(&vlapic->timer_lock);
1508 }
1509 
1510 int
vlapic_mmio_read(struct vlapic * vlapic,uint64_t gpa,uint64_t * valp,uint_t size)1511 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp,
1512     uint_t size)
1513 {
1514 	ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1515 	ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1516 
1517 	/* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1518 	if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1519 		*valp = UINT64_MAX;
1520 		return (0);
1521 	}
1522 
1523 	const uint16_t off = gpa - DEFAULT_APIC_BASE;
1524 	uint32_t raw = 0;
1525 	(void) vlapic_read(vlapic, off & ~0xf, &raw);
1526 
1527 	/* Shift and mask reads which are small and/or unaligned */
1528 	const uint8_t align = off & 0xf;
1529 	if (align < 4) {
1530 		*valp = (uint64_t)raw << (align * 8);
1531 	} else {
1532 		*valp = 0;
1533 	}
1534 
1535 	return (0);
1536 }
1537 
1538 int
vlapic_mmio_write(struct vlapic * vlapic,uint64_t gpa,uint64_t val,uint_t size)1539 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val,
1540     uint_t size)
1541 {
1542 	ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1543 	ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1544 
1545 	/* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1546 	if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1547 		return (0);
1548 	}
1549 
1550 	const uint16_t off = gpa - DEFAULT_APIC_BASE;
1551 	/* Ignore writes which are not 32-bits wide and 16-byte aligned */
1552 	if ((off & 0xf) != 0 || size != 4) {
1553 		return (0);
1554 	}
1555 
1556 	(void) vlapic_write(vlapic, off, (uint32_t)val);
1557 	return (0);
1558 }
1559 
1560 /* Should attempts to change the APIC base address be rejected with a #GP?  */
1561 int vlapic_gp_on_addr_change = 1;
1562 
1563 static vm_msr_result_t
vlapic_set_apicbase(struct vlapic * vlapic,uint64_t val)1564 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
1565 {
1566 	const uint64_t diff = vlapic->msr_apicbase ^ val;
1567 
1568 	/*
1569 	 * Until the LAPIC emulation for switching between xAPIC and x2APIC
1570 	 * modes is more polished, it will remain off-limits from being altered
1571 	 * by the guest.
1572 	 */
1573 	const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC |
1574 	    APICBASE_BSP;
1575 	if ((diff & reserved_bits) != 0) {
1576 		return (VMR_GP);
1577 	}
1578 
1579 	/* We do not presently allow the LAPIC access address to be modified. */
1580 	if ((diff & APICBASE_ADDR_MASK) != 0) {
1581 		/*
1582 		 * Explicitly rebuffing such requests with a #GP is the most
1583 		 * straightforward way to handle the situation, but certain
1584 		 * consumers (such as the KVM unit tests) may balk at the
1585 		 * otherwise unexpected exception.
1586 		 */
1587 		if (vlapic_gp_on_addr_change) {
1588 			return (VMR_GP);
1589 		}
1590 
1591 		/* If silence is required, just ignore the address change. */
1592 		val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE;
1593 	}
1594 
1595 	vlapic->msr_apicbase = val;
1596 	return (VMR_OK);
1597 }
1598 
1599 static __inline uint16_t
vlapic_msr_to_regoff(uint32_t msr)1600 vlapic_msr_to_regoff(uint32_t msr)
1601 {
1602 	ASSERT3U(msr, >=, MSR_APIC_000);
1603 	ASSERT3U(msr, <, (MSR_APIC_000 + 0x100));
1604 
1605 	return ((msr - MSR_APIC_000) << 4);
1606 }
1607 
1608 bool
vlapic_owned_msr(uint32_t msr)1609 vlapic_owned_msr(uint32_t msr)
1610 {
1611 	if (msr == MSR_APICBASE) {
1612 		return (true);
1613 	}
1614 	if (msr >= MSR_APIC_000 &&
1615 	    msr < (MSR_APIC_000 + 0x100)) {
1616 		return (true);
1617 	}
1618 	return (false);
1619 }
1620 
1621 vm_msr_result_t
vlapic_rdmsr(struct vlapic * vlapic,uint32_t msr,uint64_t * valp)1622 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp)
1623 {
1624 	ASSERT(vlapic_owned_msr(msr));
1625 	ASSERT3P(valp, !=, NULL);
1626 
1627 	if (msr == MSR_APICBASE) {
1628 		*valp = vlapic->msr_apicbase;
1629 		return (VMR_OK);
1630 	}
1631 
1632 	/* #GP for x2APIC MSR accesses in xAPIC mode */
1633 	if (!vlapic_x2mode(vlapic)) {
1634 		return (VMR_GP);
1635 	}
1636 
1637 	uint64_t out = 0;
1638 	const uint16_t reg = vlapic_msr_to_regoff(msr);
1639 	switch (reg) {
1640 	case APIC_OFFSET_ICR_LOW: {
1641 		/* Read from ICR register gets entire (64-bit) value */
1642 		uint32_t low = 0, high = 0;
1643 		bool valid;
1644 
1645 		valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high);
1646 		VERIFY(valid);
1647 		valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low);
1648 		VERIFY(valid);
1649 
1650 		*valp = ((uint64_t)high << 32) | low;
1651 		return (VMR_OK);
1652 		}
1653 	case APIC_OFFSET_ICR_HI:
1654 		/* Already covered by ICR_LOW */
1655 		return (VMR_GP);
1656 	default:
1657 		break;
1658 	}
1659 	if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) {
1660 		return (VMR_GP);
1661 	}
1662 	*valp = out;
1663 	return (VMR_OK);
1664 }
1665 
1666 vm_msr_result_t
vlapic_wrmsr(struct vlapic * vlapic,uint32_t msr,uint64_t val)1667 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val)
1668 {
1669 	ASSERT(vlapic_owned_msr(msr));
1670 
1671 	if (msr == MSR_APICBASE) {
1672 		return (vlapic_set_apicbase(vlapic, val));
1673 	}
1674 
1675 	/* #GP for x2APIC MSR accesses in xAPIC mode */
1676 	if (!vlapic_x2mode(vlapic)) {
1677 		return (VMR_GP);
1678 	}
1679 
1680 	const uint16_t reg = vlapic_msr_to_regoff(msr);
1681 	switch (reg) {
1682 	case APIC_OFFSET_ICR_LOW: {
1683 		/* Write to ICR register sets entire (64-bit) value */
1684 		bool valid;
1685 
1686 		valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32);
1687 		VERIFY(valid);
1688 		valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val);
1689 		VERIFY(valid);
1690 		return (VMR_OK);
1691 		}
1692 	case APIC_OFFSET_ICR_HI:
1693 		/* Already covered by ICR_LOW */
1694 		return (VMR_GP);
1695 	case APIC_OFFSET_ESR:
1696 		/* Only 0 may be written from x2APIC mode */
1697 		if (val != 0) {
1698 			return (VMR_GP);
1699 		}
1700 		break;
1701 	default:
1702 		break;
1703 	}
1704 	if (!vlapic_write(vlapic, reg, val)) {
1705 		return (VMR_GP);
1706 	}
1707 	return (VMR_OK);
1708 }
1709 
1710 void
vlapic_set_x2apic_state(struct vm * vm,int vcpuid,enum x2apic_state state)1711 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1712 {
1713 	struct vlapic *vlapic;
1714 	struct LAPIC *lapic;
1715 
1716 	vlapic = vm_lapic(vm, vcpuid);
1717 
1718 	if (state == X2APIC_DISABLED)
1719 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1720 	else
1721 		vlapic->msr_apicbase |= APICBASE_X2APIC;
1722 
1723 	/*
1724 	 * Reset the local APIC registers whose values are mode-dependent.
1725 	 *
1726 	 * XXX this works because the APIC mode can be changed only at vcpu
1727 	 * initialization time.
1728 	 */
1729 	lapic = vlapic->apic_page;
1730 	lapic->id = vlapic_get_id(vlapic);
1731 	if (vlapic_x2mode(vlapic)) {
1732 		lapic->ldr = x2apic_ldr(vlapic);
1733 		lapic->dfr = 0;
1734 	} else {
1735 		lapic->ldr = 0;
1736 		lapic->dfr = 0xffffffff;
1737 	}
1738 
1739 	if (state == X2APIC_ENABLED) {
1740 		if (vlapic->ops.enable_x2apic_mode)
1741 			(*vlapic->ops.enable_x2apic_mode)(vlapic);
1742 	}
1743 }
1744 
1745 void
vlapic_deliver_intr(struct vm * vm,bool level,uint32_t dest,bool phys,int delmode,int vec)1746 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1747     int delmode, int vec)
1748 {
1749 	bool lowprio;
1750 	int vcpuid;
1751 	cpuset_t dmask;
1752 
1753 	if (delmode != IOART_DELFIXED &&
1754 	    delmode != IOART_DELLOPRI &&
1755 	    delmode != IOART_DELEXINT) {
1756 		/* Invalid delivery mode */
1757 		return;
1758 	}
1759 	lowprio = (delmode == IOART_DELLOPRI);
1760 
1761 	/*
1762 	 * We don't provide any virtual interrupt redirection hardware so
1763 	 * all interrupts originating from the ioapic or MSI specify the
1764 	 * 'dest' in the legacy xAPIC format.
1765 	 */
1766 	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1767 
1768 	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1769 		vcpuid--;
1770 		CPU_CLR(vcpuid, &dmask);
1771 		if (delmode == IOART_DELEXINT) {
1772 			(void) vm_inject_extint(vm, vcpuid);
1773 		} else {
1774 			(void) lapic_set_intr(vm, vcpuid, vec, level);
1775 		}
1776 	}
1777 }
1778 
1779 void
vlapic_post_intr(struct vlapic * vlapic,int hostcpu)1780 vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
1781 {
1782 	/*
1783 	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1784 	 *
1785 	 * This is done by leveraging features like Posted Interrupts (Intel)
1786 	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1787 	 *
1788 	 * If neither of these features are available then fallback to
1789 	 * sending an IPI to 'hostcpu'.
1790 	 */
1791 	if (vlapic->ops.post_intr)
1792 		(*vlapic->ops.post_intr)(vlapic, hostcpu);
1793 	else
1794 		poke_cpu(hostcpu);
1795 }
1796 
1797 void
vlapic_localize_resources(struct vlapic * vlapic)1798 vlapic_localize_resources(struct vlapic *vlapic)
1799 {
1800 	vmm_glue_callout_localize(&vlapic->callout);
1801 }
1802 
1803 void
vlapic_pause(struct vlapic * vlapic)1804 vlapic_pause(struct vlapic *vlapic)
1805 {
1806 	VLAPIC_TIMER_LOCK(vlapic);
1807 	callout_stop(&vlapic->callout);
1808 	VLAPIC_TIMER_UNLOCK(vlapic);
1809 
1810 }
1811 
1812 void
vlapic_resume(struct vlapic * vlapic)1813 vlapic_resume(struct vlapic *vlapic)
1814 {
1815 	VLAPIC_TIMER_LOCK(vlapic);
1816 	if (vlapic->timer_fire_when != 0) {
1817 		vlapic_callout_reset(vlapic);
1818 	}
1819 	VLAPIC_TIMER_UNLOCK(vlapic);
1820 }
1821 
1822 static int
vlapic_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1823 vlapic_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1824 {
1825 	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1826 	VERIFY3U(req->vdr_version, ==, 1);
1827 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1828 
1829 	struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1830 	struct vdi_lapic_v1 *out = req->vdr_data;
1831 
1832 	VLAPIC_TIMER_LOCK(vlapic);
1833 
1834 	if (vlapic->ops.sync_state) {
1835 		(*vlapic->ops.sync_state)(vlapic);
1836 	}
1837 
1838 	out->vl_msr_apicbase = vlapic->msr_apicbase;
1839 	out->vl_esr_pending = vlapic->esr_pending;
1840 	if (vlapic->timer_fire_when != 0) {
1841 		out->vl_timer_target =
1842 		    vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
1843 	} else {
1844 		out->vl_timer_target = 0;
1845 	}
1846 
1847 	const struct LAPIC *lapic = vlapic->apic_page;
1848 	struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
1849 
1850 	/*
1851 	 * While this might appear, at first glance, to be missing some fields,
1852 	 * they are intentionally omitted:
1853 	 * - PPR: its contents are always generated at runtime
1854 	 * - EOI: write-only, and contents are ignored after handling
1855 	 * - RRD: (aka RRR) read-only and always 0
1856 	 * - CCR: calculated from underlying timer data
1857 	 */
1858 	out_page->vlp_id = lapic->id;
1859 	out_page->vlp_version = lapic->version;
1860 	out_page->vlp_tpr = lapic->tpr;
1861 	out_page->vlp_apr = lapic->apr;
1862 	out_page->vlp_ldr = lapic->ldr;
1863 	out_page->vlp_dfr = lapic->dfr;
1864 	out_page->vlp_svr = lapic->svr;
1865 	out_page->vlp_esr = lapic->esr;
1866 	out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1867 	out_page->vlp_icr_timer = lapic->icr_timer;
1868 	out_page->vlp_dcr_timer = lapic->dcr_timer;
1869 
1870 	out_page->vlp_lvt_cmci = lapic->lvt_cmci;
1871 	out_page->vlp_lvt_timer = lapic->lvt_timer;
1872 	out_page->vlp_lvt_thermal = lapic->lvt_thermal;
1873 	out_page->vlp_lvt_pcint = lapic->lvt_pcint;
1874 	out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
1875 	out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
1876 	out_page->vlp_lvt_error = lapic->lvt_error;
1877 
1878 	const uint32_t *isrptr = &lapic->isr0;
1879 	const uint32_t *tmrptr = &lapic->tmr0;
1880 	const uint32_t *irrptr = &lapic->irr0;
1881 	for (uint_t i = 0; i < 8; i++) {
1882 		out_page->vlp_isr[i] = isrptr[i * 4];
1883 		out_page->vlp_tmr[i] = tmrptr[i * 4];
1884 		out_page->vlp_irr[i] = irrptr[i * 4];
1885 	}
1886 	VLAPIC_TIMER_UNLOCK(vlapic);
1887 
1888 	return (0);
1889 }
1890 
1891 static uint8_t
popc8(uint8_t val)1892 popc8(uint8_t val)
1893 {
1894 	uint8_t cnt;
1895 
1896 	for (cnt = 0; val != 0; val &= (val - 1)) {
1897 		cnt++;
1898 	}
1899 	return (cnt);
1900 }
1901 
1902 /*
1903  * Descriptions for the various failures which can occur when validating
1904  * to-be-written vlapic state.
1905  */
1906 enum vlapic_validation_error {
1907 	VVE_OK,
1908 	VVE_BAD_ID,
1909 	VVE_BAD_VERSION,
1910 	VVE_BAD_MSR_BASE,
1911 	VVE_BAD_ESR,
1912 	VVE_BAD_TPR,
1913 	VVE_LOW_VECTOR,
1914 	VVE_ISR_PRIORITY,
1915 	VVE_TIMER_MISMATCH,
1916 };
1917 
1918 static enum vlapic_validation_error
vlapic_data_validate(const struct vlapic * vlapic,const vmm_data_req_t * req)1919 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
1920 {
1921 	ASSERT(req->vdr_version == 1 &&
1922 	    req->vdr_len >= sizeof (struct vdi_lapic_v1));
1923 	const struct vdi_lapic_v1 *src = req->vdr_data;
1924 
1925 	if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
1926 	    (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
1927 		return (VVE_BAD_ESR);
1928 	}
1929 
1930 	/* Use the same restrictions as the wrmsr accessor for now */
1931 	const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
1932 	    APICBASE_BSP;
1933 	const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
1934 	if ((diff & apicbase_reserved) != 0) {
1935 		return (VVE_BAD_MSR_BASE);
1936 	}
1937 
1938 	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1939 	/*
1940 	 * Demand that ID match for now.  This can be further updated when some
1941 	 * of the x2apic handling is improved.
1942 	 */
1943 	if (page->vlp_id != vlapic_get_id(vlapic)) {
1944 		return (VVE_BAD_ID);
1945 	}
1946 
1947 	if (page->vlp_version != vlapic->apic_page->version) {
1948 		return (VVE_BAD_VERSION);
1949 	}
1950 
1951 	if (page->vlp_tpr > 0xff) {
1952 		return (VVE_BAD_TPR);
1953 	}
1954 
1955 	/* Vectors 0-15 are not expected to be handled by the lapic */
1956 	if ((page->vlp_isr[0] & 0xffff) != 0 ||
1957 	    (page->vlp_irr[0] & 0xffff) != 0 ||
1958 	    (page->vlp_tmr[0] & 0xffff) != 0) {
1959 		return (VVE_LOW_VECTOR);
1960 	}
1961 
1962 	/* Only one interrupt should be in-service for each priority level */
1963 	for (uint_t i = 0; i < 8; i++) {
1964 		if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
1965 		    popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
1966 		    popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
1967 		    popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
1968 			return (VVE_ISR_PRIORITY);
1969 		}
1970 	}
1971 
1972 	/* If icr_timer is zero, then a scheduled timer does not make sense */
1973 	if (page->vlp_icr_timer == 0 && src->vl_timer_target != 0) {
1974 		return (VVE_TIMER_MISMATCH);
1975 	}
1976 
1977 	return (VVE_OK);
1978 }
1979 
1980 static int
vlapic_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1981 vlapic_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1982 {
1983 	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1984 	VERIFY3U(req->vdr_version, ==, 1);
1985 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1986 
1987 	struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1988 	if (vlapic_data_validate(vlapic, req) != VVE_OK) {
1989 		return (EINVAL);
1990 	}
1991 	const struct vdi_lapic_v1 *src = req->vdr_data;
1992 	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1993 	struct LAPIC *lapic = vlapic->apic_page;
1994 
1995 	VLAPIC_TIMER_LOCK(vlapic);
1996 
1997 	/* Already ensured by vlapic_data_validate() */
1998 	VERIFY3U(page->vlp_version, ==, lapic->version);
1999 
2000 	vlapic->msr_apicbase = src->vl_msr_apicbase;
2001 	vlapic->esr_pending = src->vl_esr_pending;
2002 
2003 	lapic->tpr = page->vlp_tpr;
2004 	lapic->apr = page->vlp_apr;
2005 	lapic->ldr = page->vlp_ldr;
2006 	lapic->dfr = page->vlp_dfr;
2007 	lapic->svr = page->vlp_svr;
2008 	lapic->esr = page->vlp_esr;
2009 	lapic->icr_lo = (uint32_t)page->vlp_icr;
2010 	lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
2011 
2012 	lapic->icr_timer = page->vlp_icr_timer;
2013 	lapic->dcr_timer = page->vlp_dcr_timer;
2014 	vlapic_update_divider(vlapic);
2015 
2016 	/* cleanse LDR/DFR */
2017 	vlapic_ldr_write_handler(vlapic);
2018 	vlapic_dfr_write_handler(vlapic);
2019 
2020 	lapic->lvt_cmci = page->vlp_lvt_cmci;
2021 	lapic->lvt_timer = page->vlp_lvt_timer;
2022 	lapic->lvt_thermal = page->vlp_lvt_thermal;
2023 	lapic->lvt_pcint = page->vlp_lvt_pcint;
2024 	lapic->lvt_lint0 = page->vlp_lvt_lint0;
2025 	lapic->lvt_lint1 = page->vlp_lvt_lint1;
2026 	lapic->lvt_error = page->vlp_lvt_error;
2027 	/* cleanse LVTs */
2028 	vlapic_refresh_lvts(vlapic);
2029 
2030 	uint32_t *isrptr = &lapic->isr0;
2031 	uint32_t *tmrptr = &lapic->tmr0;
2032 	uint32_t *irrptr = &lapic->irr0;
2033 	for (uint_t i = 0; i < 8; i++) {
2034 		isrptr[i * 4] = page->vlp_isr[i];
2035 		tmrptr[i * 4] = page->vlp_tmr[i];
2036 		irrptr[i * 4] = page->vlp_irr[i];
2037 	}
2038 
2039 	if (src->vl_timer_target != 0) {
2040 		vlapic->timer_fire_when =
2041 		    vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
2042 
2043 		/*
2044 		 * Check to see if timer expiration would result computed CCR
2045 		 * values in excess of what is configured in ICR/DCR.
2046 		 */
2047 		const hrtime_t now = gethrtime();
2048 		if (vlapic->timer_fire_when > now) {
2049 			const uint32_t ccr = hrt_freq_count(
2050 			    vlapic->timer_fire_when - now,
2051 			    vlapic->timer_cur_freq);
2052 
2053 			/*
2054 			 * Until we have a richer event/logging system
2055 			 * available, just note such an overage as a stat.
2056 			 */
2057 			if (ccr > lapic->icr_timer) {
2058 				vlapic->stats.vs_import_timer_overage++;
2059 			}
2060 		}
2061 
2062 		if (!vm_is_paused(vlapic->vm)) {
2063 			vlapic_callout_reset(vlapic);
2064 		}
2065 	} else {
2066 		vlapic->timer_fire_when = 0;
2067 	}
2068 
2069 	if (vlapic->ops.sync_state) {
2070 		(*vlapic->ops.sync_state)(vlapic);
2071 	}
2072 	VLAPIC_TIMER_UNLOCK(vlapic);
2073 
2074 	return (0);
2075 }
2076 
2077 static const vmm_data_version_entry_t lapic_v1 = {
2078 	.vdve_class = VDC_LAPIC,
2079 	.vdve_version = 1,
2080 	.vdve_len_expect = sizeof (struct vdi_lapic_v1),
2081 	.vdve_vcpu_readf = vlapic_data_read,
2082 	.vdve_vcpu_writef = vlapic_data_write,
2083 };
2084 VMM_DATA_VERSION(lapic_v1);
2085