xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/vlapic.c (revision 1f7cf86b3ece41a9cec19a3ed7e3b45e005d1461)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  * Copyright (c) 2019 Joyent, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * This file and its contents are supplied under the terms of the
31  * Common Development and Distribution License ("CDDL"), version 1.0.
32  * You may only use this file in accordance with the terms of version
33  * 1.0 of the CDDL.
34  *
35  * A full copy of the text of the CDDL should have accompanied this
36  * source.  A copy of the CDDL is also available via the Internet at
37  * http://www.illumos.org/license/CDDL.
38  *
39  * Copyright 2014 Pluribus Networks Inc.
40  * Copyright 2018 Joyent, Inc.
41  * Copyright 2024 Oxide Computer Company
42  */
43 
44 #include <sys/cdefs.h>
45 
46 #include <sys/param.h>
47 #include <sys/kernel.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/systm.h>
51 #include <sys/cpuset.h>
52 
53 #include <x86/specialreg.h>
54 #include <x86/apicreg.h>
55 
56 #include <machine/clock.h>
57 
58 #include <machine/vmm.h>
59 #include <sys/vmm_kernel.h>
60 
61 #include "vmm_lapic.h"
62 #include "vmm_stat.h"
63 
64 #include "vlapic.h"
65 #include "vlapic_priv.h"
66 #include "vioapic.h"
67 
68 
69 /*
70  * The 4 high bits of a given interrupt vector represent its priority.  The same
71  * is true for the contents of the TPR when it is used to calculate the ultimate
72  * PPR of an APIC - the 4 high bits hold the priority.
73  */
74 #define	PRIO(x)			((x) & 0xf0)
75 
76 #define	VLAPIC_VERSION		(0x14)
77 
78 /*
79  * The 'vlapic->timer_lock' is used to provide mutual exclusion between the
80  * vlapic_callout_handler() and vcpu accesses to:
81  * - timer_freq_bt, timer_period_bt, timer_fire_bt
82  * - timer LVT register
83  */
84 #define	VLAPIC_TIMER_LOCK(vlapic)	mutex_enter(&((vlapic)->timer_lock))
85 #define	VLAPIC_TIMER_UNLOCK(vlapic)	mutex_exit(&((vlapic)->timer_lock))
86 #define	VLAPIC_TIMER_LOCKED(vlapic)	MUTEX_HELD(&((vlapic)->timer_lock))
87 
88 /*
89  * APIC timer frequency:
90  * - arbitrary but chosen to be in the ballpark of contemporary hardware.
91  * - power-of-two to avoid loss of precision when calculating times
92  */
93 #define	VLAPIC_BUS_FREQ		(128 * 1024 * 1024)
94 
95 #define	APICBASE_ADDR_MASK	0xfffffffffffff000UL
96 
97 #define	APIC_VALID_MASK_ESR	(APIC_ESR_SEND_CS_ERROR | \
98 		APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
99 		APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
100 		APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
101 
102 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
103 static void vlapic_callout_handler(void *arg);
104 
105 static __inline bool
vlapic_x2mode(const struct vlapic * vlapic)106 vlapic_x2mode(const struct vlapic *vlapic)
107 {
108 	return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0);
109 }
110 
111 static __inline bool
vlapic_hw_disabled(const struct vlapic * vlapic)112 vlapic_hw_disabled(const struct vlapic *vlapic)
113 {
114 	return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0);
115 }
116 
117 static __inline bool
vlapic_sw_disabled(const struct vlapic * vlapic)118 vlapic_sw_disabled(const struct vlapic *vlapic)
119 {
120 	const struct LAPIC *lapic = vlapic->apic_page;
121 
122 	return ((lapic->svr & APIC_SVR_ENABLE) == 0);
123 }
124 
125 static __inline bool
vlapic_enabled(const struct vlapic * vlapic)126 vlapic_enabled(const struct vlapic *vlapic)
127 {
128 	return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic));
129 }
130 
131 static __inline uint32_t
vlapic_get_id(const struct vlapic * vlapic)132 vlapic_get_id(const struct vlapic *vlapic)
133 {
134 
135 	if (vlapic_x2mode(vlapic))
136 		return (vlapic->vcpuid);
137 	else
138 		return (vlapic->vcpuid << 24);
139 }
140 
141 static uint32_t
x2apic_ldr(const struct vlapic * vlapic)142 x2apic_ldr(const struct vlapic *vlapic)
143 {
144 	int apicid;
145 	uint32_t ldr;
146 
147 	apicid = vlapic_get_id(vlapic);
148 	ldr = 1 << (apicid & 0xf);
149 	ldr |= (apicid & 0xffff0) << 12;
150 	return (ldr);
151 }
152 
153 void
vlapic_dfr_write_handler(struct vlapic * vlapic)154 vlapic_dfr_write_handler(struct vlapic *vlapic)
155 {
156 	struct LAPIC *lapic;
157 
158 	lapic = vlapic->apic_page;
159 	if (vlapic_x2mode(vlapic)) {
160 		/* Ignore write to DFR in x2APIC mode */
161 		lapic->dfr = 0;
162 		return;
163 	}
164 
165 	lapic->dfr &= APIC_DFR_MODEL_MASK;
166 	lapic->dfr |= APIC_DFR_RESERVED;
167 }
168 
169 void
vlapic_ldr_write_handler(struct vlapic * vlapic)170 vlapic_ldr_write_handler(struct vlapic *vlapic)
171 {
172 	struct LAPIC *lapic;
173 
174 	lapic = vlapic->apic_page;
175 
176 	/* LDR is read-only in x2apic mode */
177 	if (vlapic_x2mode(vlapic)) {
178 		/* Ignore write to LDR in x2APIC mode */
179 		lapic->ldr = x2apic_ldr(vlapic);
180 	} else {
181 		lapic->ldr &= ~APIC_LDR_RESERVED;
182 	}
183 }
184 
185 void
vlapic_id_write_handler(struct vlapic * vlapic)186 vlapic_id_write_handler(struct vlapic *vlapic)
187 {
188 	struct LAPIC *lapic;
189 
190 	/*
191 	 * We don't allow the ID register to be modified so reset it back to
192 	 * its default value.
193 	 */
194 	lapic = vlapic->apic_page;
195 	lapic->id = vlapic_get_id(vlapic);
196 }
197 
198 static int
vlapic_timer_divisor(uint32_t dcr)199 vlapic_timer_divisor(uint32_t dcr)
200 {
201 	switch (dcr & 0xB) {
202 	case APIC_TDCR_1:
203 		return (1);
204 	case APIC_TDCR_2:
205 		return (2);
206 	case APIC_TDCR_4:
207 		return (4);
208 	case APIC_TDCR_8:
209 		return (8);
210 	case APIC_TDCR_16:
211 		return (16);
212 	case APIC_TDCR_32:
213 		return (32);
214 	case APIC_TDCR_64:
215 		return (64);
216 	case APIC_TDCR_128:
217 		return (128);
218 	default:
219 		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
220 	}
221 }
222 
223 static uint32_t
vlapic_get_ccr(struct vlapic * vlapic)224 vlapic_get_ccr(struct vlapic *vlapic)
225 {
226 	struct LAPIC *lapic;
227 	uint32_t ccr;
228 
229 	ccr = 0;
230 	lapic = vlapic->apic_page;
231 
232 	VLAPIC_TIMER_LOCK(vlapic);
233 	if (callout_active(&vlapic->callout)) {
234 		/*
235 		 * If the timer is scheduled to expire in the future then
236 		 * compute the value of 'ccr' based on the remaining time.
237 		 */
238 
239 		const hrtime_t now = gethrtime();
240 		if (vlapic->timer_fire_when > now) {
241 			ccr += hrt_freq_count(vlapic->timer_fire_when - now,
242 			    vlapic->timer_cur_freq);
243 		}
244 	}
245 
246 	/*
247 	 * Clamp CCR value to that programmed in ICR - its theoretical maximum.
248 	 * Normal operation should never result in this being necessary.  Only
249 	 * strange circumstances due to state importation as part of instance
250 	 * save/restore or live-migration require such wariness.
251 	 */
252 	if (ccr > lapic->icr_timer) {
253 		ccr = lapic->icr_timer;
254 		vlapic->stats.vs_clamp_ccr++;
255 	}
256 	VLAPIC_TIMER_UNLOCK(vlapic);
257 	return (ccr);
258 }
259 
260 static void
vlapic_update_divider(struct vlapic * vlapic)261 vlapic_update_divider(struct vlapic *vlapic)
262 {
263 	struct LAPIC *lapic = vlapic->apic_page;
264 
265 	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
266 
267 	vlapic->timer_cur_freq =
268 	    VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
269 	vlapic->timer_period =
270 	    hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
271 }
272 
273 void
vlapic_dcr_write_handler(struct vlapic * vlapic)274 vlapic_dcr_write_handler(struct vlapic *vlapic)
275 {
276 	/*
277 	 * Update the timer frequency and the timer period.
278 	 *
279 	 * XXX changes to the frequency divider will not take effect until
280 	 * the timer is reloaded.
281 	 */
282 	VLAPIC_TIMER_LOCK(vlapic);
283 	vlapic_update_divider(vlapic);
284 	VLAPIC_TIMER_UNLOCK(vlapic);
285 }
286 
287 void
vlapic_esr_write_handler(struct vlapic * vlapic)288 vlapic_esr_write_handler(struct vlapic *vlapic)
289 {
290 	struct LAPIC *lapic;
291 
292 	lapic = vlapic->apic_page;
293 	lapic->esr = vlapic->esr_pending;
294 	vlapic->esr_pending = 0;
295 }
296 
297 vcpu_notify_t
vlapic_set_intr_ready(struct vlapic * vlapic,int vector,bool level)298 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
299 {
300 	struct LAPIC *lapic;
301 	uint32_t *irrptr, *tmrptr, mask, tmr;
302 	int idx;
303 
304 	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
305 
306 	lapic = vlapic->apic_page;
307 	if (!(lapic->svr & APIC_SVR_ENABLE)) {
308 		/* ignore interrupt on software-disabled APIC */
309 		return (VCPU_NOTIFY_NONE);
310 	}
311 
312 	if (vector < 16) {
313 		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
314 		    false);
315 
316 		/*
317 		 * If the error LVT is configured to interrupt the vCPU, it will
318 		 * have delivered a notification through that mechanism.
319 		 */
320 		return (VCPU_NOTIFY_NONE);
321 	}
322 
323 	if (vlapic->ops.set_intr_ready) {
324 		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
325 	}
326 
327 	idx = (vector / 32) * 4;
328 	mask = 1 << (vector % 32);
329 	tmrptr = &lapic->tmr0;
330 	irrptr = &lapic->irr0;
331 
332 	/*
333 	 * Update TMR for requested vector, if necessary.
334 	 * This must be done prior to asserting the bit in IRR so that the
335 	 * proper TMR state is always visible before the to-be-queued interrupt
336 	 * can be injected.
337 	 */
338 	tmr = atomic_load_acq_32(&tmrptr[idx]);
339 	if ((tmr & mask) != (level ? mask : 0)) {
340 		if (level) {
341 			atomic_set_int(&tmrptr[idx], mask);
342 		} else {
343 			atomic_clear_int(&tmrptr[idx], mask);
344 		}
345 	}
346 
347 	/* Now set the bit in IRR */
348 	atomic_set_int(&irrptr[idx], mask);
349 
350 	return (VCPU_NOTIFY_EXIT);
351 }
352 
353 static __inline uint32_t *
vlapic_get_lvtptr(struct vlapic * vlapic,uint32_t offset)354 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
355 {
356 	struct LAPIC	*lapic = vlapic->apic_page;
357 	int		i;
358 
359 	switch (offset) {
360 	case APIC_OFFSET_CMCI_LVT:
361 		return (&lapic->lvt_cmci);
362 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
363 		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
364 		return ((&lapic->lvt_timer) + i);
365 	default:
366 		panic("vlapic_get_lvt: invalid LVT\n");
367 	}
368 }
369 
370 static __inline int
lvt_off_to_idx(uint32_t offset)371 lvt_off_to_idx(uint32_t offset)
372 {
373 	int index;
374 
375 	switch (offset) {
376 	case APIC_OFFSET_CMCI_LVT:
377 		index = APIC_LVT_CMCI;
378 		break;
379 	case APIC_OFFSET_TIMER_LVT:
380 		index = APIC_LVT_TIMER;
381 		break;
382 	case APIC_OFFSET_THERM_LVT:
383 		index = APIC_LVT_THERMAL;
384 		break;
385 	case APIC_OFFSET_PERF_LVT:
386 		index = APIC_LVT_PMC;
387 		break;
388 	case APIC_OFFSET_LINT0_LVT:
389 		index = APIC_LVT_LINT0;
390 		break;
391 	case APIC_OFFSET_LINT1_LVT:
392 		index = APIC_LVT_LINT1;
393 		break;
394 	case APIC_OFFSET_ERROR_LVT:
395 		index = APIC_LVT_ERROR;
396 		break;
397 	default:
398 		index = -1;
399 		break;
400 	}
401 	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
402 	    "invalid lvt index %d for offset %x", index, offset));
403 
404 	return (index);
405 }
406 
407 static __inline uint32_t
vlapic_get_lvt(struct vlapic * vlapic,uint32_t offset)408 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
409 {
410 	int idx;
411 	uint32_t val;
412 
413 	idx = lvt_off_to_idx(offset);
414 	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
415 	return (val);
416 }
417 
418 void
vlapic_lvt_write_handler(struct vlapic * vlapic,uint32_t offset)419 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
420 {
421 	uint32_t *lvtptr, mask, val;
422 	struct LAPIC *lapic;
423 	int idx;
424 
425 	lapic = vlapic->apic_page;
426 	lvtptr = vlapic_get_lvtptr(vlapic, offset);
427 	val = *lvtptr;
428 	idx = lvt_off_to_idx(offset);
429 
430 	if (!(lapic->svr & APIC_SVR_ENABLE))
431 		val |= APIC_LVT_M;
432 	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
433 	switch (offset) {
434 	case APIC_OFFSET_TIMER_LVT:
435 		mask |= APIC_LVTT_TM;
436 		break;
437 	case APIC_OFFSET_ERROR_LVT:
438 		break;
439 	case APIC_OFFSET_LINT0_LVT:
440 	case APIC_OFFSET_LINT1_LVT:
441 		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
442 		/* FALLTHROUGH */
443 	default:
444 		mask |= APIC_LVT_DM;
445 		break;
446 	}
447 	val &= mask;
448 	*lvtptr = val;
449 	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
450 }
451 
452 static void
vlapic_refresh_lvts(struct vlapic * vlapic)453 vlapic_refresh_lvts(struct vlapic *vlapic)
454 {
455 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
456 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
457 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
458 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
459 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
460 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
461 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
462 }
463 
464 static void
vlapic_mask_lvts(struct vlapic * vlapic)465 vlapic_mask_lvts(struct vlapic *vlapic)
466 {
467 	struct LAPIC *lapic = vlapic->apic_page;
468 
469 	lapic->lvt_cmci |= APIC_LVT_M;
470 	lapic->lvt_timer |= APIC_LVT_M;
471 	lapic->lvt_thermal |= APIC_LVT_M;
472 	lapic->lvt_pcint |= APIC_LVT_M;
473 	lapic->lvt_lint0 |= APIC_LVT_M;
474 	lapic->lvt_lint1 |= APIC_LVT_M;
475 	lapic->lvt_error |= APIC_LVT_M;
476 	vlapic_refresh_lvts(vlapic);
477 }
478 
479 static int
vlapic_fire_lvt(struct vlapic * vlapic,uint_t lvt)480 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt)
481 {
482 	uint32_t mode, reg, vec;
483 	vcpu_notify_t notify;
484 
485 	reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
486 
487 	if (reg & APIC_LVT_M)
488 		return (0);
489 	vec = reg & APIC_LVT_VECTOR;
490 	mode = reg & APIC_LVT_DM;
491 
492 	switch (mode) {
493 	case APIC_LVT_DM_FIXED:
494 		if (vec < 16) {
495 			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
496 			    lvt == APIC_LVT_ERROR);
497 			return (0);
498 		}
499 		notify = vlapic_set_intr_ready(vlapic, vec, false);
500 		vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
501 		break;
502 	case APIC_LVT_DM_NMI:
503 		(void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
504 		break;
505 	case APIC_LVT_DM_EXTINT:
506 		(void) vm_inject_extint(vlapic->vm, vlapic->vcpuid);
507 		break;
508 	default:
509 		// Other modes ignored
510 		return (0);
511 	}
512 	return (1);
513 }
514 
515 static uint_t
vlapic_active_isr(struct vlapic * vlapic)516 vlapic_active_isr(struct vlapic *vlapic)
517 {
518 	int i;
519 	uint32_t *isrp;
520 
521 	isrp = &vlapic->apic_page->isr7;
522 
523 	for (i = 7; i >= 0; i--, isrp -= 4) {
524 		uint32_t reg = *isrp;
525 
526 		if (reg != 0) {
527 			uint_t vec = (i * 32) + bsrl(reg);
528 
529 			if (vec < 16) {
530 				/*
531 				 * Truncate the illegal low vectors to value of
532 				 * 0, indicating that no active ISR was found.
533 				 */
534 				return (0);
535 			}
536 			return (vec);
537 		}
538 	}
539 
540 	return (0);
541 }
542 
543 /*
544  * After events which might arbitrarily change the value of PPR, such as a TPR
545  * write or an EOI, calculate that new PPR value and store it in the APIC page.
546  */
547 static void
vlapic_update_ppr(struct vlapic * vlapic)548 vlapic_update_ppr(struct vlapic *vlapic)
549 {
550 	int isrvec, tpr, ppr;
551 
552 	isrvec = vlapic_active_isr(vlapic);
553 	tpr = vlapic->apic_page->tpr;
554 
555 	/*
556 	 * Algorithm adopted from section "Interrupt, Task and Processor
557 	 * Priority" in Intel Architecture Manual Vol 3a.
558 	 */
559 	if (PRIO(tpr) >= PRIO(isrvec)) {
560 		ppr = tpr;
561 	} else {
562 		ppr = PRIO(isrvec);
563 	}
564 
565 	vlapic->apic_page->ppr = ppr;
566 }
567 
568 /*
569  * When a vector is asserted in ISR as in-service, the PPR must be raised to the
570  * priority of that vector, as the vCPU would have been at a lower priority in
571  * order for the vector to be accepted.
572  */
573 static void
vlapic_raise_ppr(struct vlapic * vlapic,int vec)574 vlapic_raise_ppr(struct vlapic *vlapic, int vec)
575 {
576 	struct LAPIC *lapic = vlapic->apic_page;
577 	int ppr;
578 
579 	ppr = PRIO(vec);
580 
581 	lapic->ppr = ppr;
582 }
583 
584 void
vlapic_sync_tpr(struct vlapic * vlapic)585 vlapic_sync_tpr(struct vlapic *vlapic)
586 {
587 	vlapic_update_ppr(vlapic);
588 }
589 
590 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
591 
592 static void
vlapic_process_eoi(struct vlapic * vlapic)593 vlapic_process_eoi(struct vlapic *vlapic)
594 {
595 	struct LAPIC	*lapic = vlapic->apic_page;
596 	uint32_t	*isrptr, *tmrptr;
597 	int		i;
598 	uint_t		idx, bitpos, vector;
599 
600 	isrptr = &lapic->isr0;
601 	tmrptr = &lapic->tmr0;
602 
603 	for (i = 7; i >= 0; i--) {
604 		idx = i * 4;
605 		if (isrptr[idx] != 0) {
606 			bitpos = bsrl(isrptr[idx]);
607 			vector = i * 32 + bitpos;
608 
609 			isrptr[idx] &= ~(1 << bitpos);
610 			vlapic_update_ppr(vlapic);
611 			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
612 				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
613 				    vector);
614 			}
615 			return;
616 		}
617 	}
618 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
619 }
620 
621 static __inline int
vlapic_get_lvt_field(uint32_t lvt,uint32_t mask)622 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
623 {
624 
625 	return (lvt & mask);
626 }
627 
628 static __inline int
vlapic_periodic_timer(struct vlapic * vlapic)629 vlapic_periodic_timer(struct vlapic *vlapic)
630 {
631 	uint32_t lvt;
632 
633 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
634 
635 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
636 }
637 
638 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
639 
640 static void
vlapic_set_error(struct vlapic * vlapic,uint32_t mask,bool lvt_error)641 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error)
642 {
643 
644 	vlapic->esr_pending |= mask;
645 
646 	/*
647 	 * Avoid infinite recursion if the error LVT itself is configured with
648 	 * an illegal vector.
649 	 */
650 	if (lvt_error)
651 		return;
652 
653 	if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) {
654 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
655 	}
656 }
657 
658 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
659 
660 static void
vlapic_fire_timer(struct vlapic * vlapic)661 vlapic_fire_timer(struct vlapic *vlapic)
662 {
663 	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
664 
665 	if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) {
666 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
667 	}
668 }
669 
670 static VMM_STAT(VLAPIC_INTR_CMC,
671 	"corrected machine check interrupts generated by vlapic");
672 
673 void
vlapic_fire_cmci(struct vlapic * vlapic)674 vlapic_fire_cmci(struct vlapic *vlapic)
675 {
676 
677 	if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) {
678 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
679 	}
680 }
681 
682 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
683 	"lvts triggered");
684 
685 int
vlapic_trigger_lvt(struct vlapic * vlapic,int vector)686 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
687 {
688 	if (!vlapic_enabled(vlapic)) {
689 		/*
690 		 * When the local APIC is global/hardware disabled,
691 		 * LINT[1:0] pins are configured as INTR and NMI pins,
692 		 * respectively.
693 		 */
694 		switch (vector) {
695 			case APIC_LVT_LINT0:
696 				(void) vm_inject_extint(vlapic->vm,
697 				    vlapic->vcpuid);
698 				break;
699 			case APIC_LVT_LINT1:
700 				(void) vm_inject_nmi(vlapic->vm,
701 				    vlapic->vcpuid);
702 				break;
703 			default:
704 				break;
705 		}
706 		return (0);
707 	}
708 
709 	switch (vector) {
710 	case APIC_LVT_LINT0:
711 	case APIC_LVT_LINT1:
712 	case APIC_LVT_TIMER:
713 	case APIC_LVT_ERROR:
714 	case APIC_LVT_PMC:
715 	case APIC_LVT_THERMAL:
716 	case APIC_LVT_CMCI:
717 		if (vlapic_fire_lvt(vlapic, vector)) {
718 			vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
719 			    LVTS_TRIGGERRED, vector, 1);
720 		}
721 		break;
722 	default:
723 		return (EINVAL);
724 	}
725 	return (0);
726 }
727 
728 static void
vlapic_callout_reset(struct vlapic * vlapic)729 vlapic_callout_reset(struct vlapic *vlapic)
730 {
731 	callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
732 	    vlapic_callout_handler, vlapic, C_ABSOLUTE);
733 }
734 
735 static void
vlapic_callout_handler(void * arg)736 vlapic_callout_handler(void *arg)
737 {
738 	struct vlapic *vlapic = arg;
739 
740 	VLAPIC_TIMER_LOCK(vlapic);
741 	if (callout_pending(&vlapic->callout))	/* callout was reset */
742 		goto done;
743 
744 	if (!callout_active(&vlapic->callout))	/* callout was stopped */
745 		goto done;
746 
747 	callout_deactivate(&vlapic->callout);
748 
749 	vlapic_fire_timer(vlapic);
750 
751 	/*
752 	 * We should not end up here with timer_period == 0, but to prevent a
753 	 * runaway periodic timer, it is checked anyways.
754 	 */
755 	if (vlapic_periodic_timer(vlapic) && vlapic->timer_period != 0) {
756 		/*
757 		 * Compute the delta between when the timer was supposed to
758 		 * fire and the present time.  We can depend on the fact that
759 		 * cyclics (which underly these callouts) will never be called
760 		 * early.
761 		 */
762 		const hrtime_t now = gethrtime();
763 		const hrtime_t delta = now - vlapic->timer_fire_when;
764 		if (delta >= vlapic->timer_period) {
765 			/*
766 			 * If we are so behind that we have missed an entire
767 			 * timer period, reset the time base rather than
768 			 * attempting to catch up.
769 			 */
770 			vlapic->timer_fire_when = now + vlapic->timer_period;
771 		} else {
772 			vlapic->timer_fire_when += vlapic->timer_period;
773 		}
774 		vlapic_callout_reset(vlapic);
775 	} else {
776 		/*
777 		 * Clear the target time so that logic can distinguish from a
778 		 * timer which has fired (where the value is zero) from one
779 		 * which is held pending due to the instance being paused (where
780 		 * the value is non-zero, but the callout is not pending).
781 		 */
782 		vlapic->timer_fire_when = 0;
783 	}
784 done:
785 	VLAPIC_TIMER_UNLOCK(vlapic);
786 }
787 
788 void
vlapic_icrtmr_write_handler(struct vlapic * vlapic)789 vlapic_icrtmr_write_handler(struct vlapic *vlapic)
790 {
791 	struct LAPIC *lapic = vlapic->apic_page;
792 
793 	VLAPIC_TIMER_LOCK(vlapic);
794 	vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
795 	    lapic->icr_timer);
796 	if (vlapic->timer_period != 0) {
797 		vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
798 		vlapic_callout_reset(vlapic);
799 	} else {
800 		vlapic->timer_fire_when = 0;
801 		callout_stop(&vlapic->callout);
802 	}
803 	VLAPIC_TIMER_UNLOCK(vlapic);
804 }
805 
806 /*
807  * This function populates 'dmask' with the set of vcpus that match the
808  * addressing specified by the (dest, phys, lowprio) tuple.
809  *
810  * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
811  * or xAPIC (8-bit) destination field.
812  */
813 void
vlapic_calcdest(struct vm * vm,cpuset_t * dmask,uint32_t dest,bool phys,bool lowprio,bool x2apic_dest)814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
815     bool lowprio, bool x2apic_dest)
816 {
817 	struct vlapic *vlapic;
818 	uint32_t dfr, ldr, ldest, cluster;
819 	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
820 	cpuset_t amask;
821 	int vcpuid;
822 
823 	if ((x2apic_dest && dest == 0xffffffff) ||
824 	    (!x2apic_dest && dest == 0xff)) {
825 		/*
826 		 * Broadcast in both logical and physical modes.
827 		 */
828 		*dmask = vm_active_cpus(vm);
829 		return;
830 	}
831 
832 	if (phys) {
833 		/*
834 		 * Physical mode: destination is APIC ID.
835 		 */
836 		CPU_ZERO(dmask);
837 		vcpuid = vm_apicid2vcpuid(vm, dest);
838 		amask = vm_active_cpus(vm);
839 		if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
840 			CPU_SET(vcpuid, dmask);
841 	} else {
842 		/*
843 		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
844 		 * bitmask. This model is only available in the xAPIC mode.
845 		 */
846 		mda_flat_ldest = dest & 0xff;
847 
848 		/*
849 		 * In the "Cluster Model" the MDA is used to identify a
850 		 * specific cluster and a set of APICs in that cluster.
851 		 */
852 		if (x2apic_dest) {
853 			mda_cluster_id = dest >> 16;
854 			mda_cluster_ldest = dest & 0xffff;
855 		} else {
856 			mda_cluster_id = (dest >> 4) & 0xf;
857 			mda_cluster_ldest = dest & 0xf;
858 		}
859 
860 		/*
861 		 * Logical mode: match each APIC that has a bit set
862 		 * in its LDR that matches a bit in the ldest.
863 		 */
864 		CPU_ZERO(dmask);
865 		amask = vm_active_cpus(vm);
866 		while ((vcpuid = CPU_FFS(&amask)) != 0) {
867 			vcpuid--;
868 			CPU_CLR(vcpuid, &amask);
869 
870 			vlapic = vm_lapic(vm, vcpuid);
871 			dfr = vlapic->apic_page->dfr;
872 			ldr = vlapic->apic_page->ldr;
873 
874 			if ((dfr & APIC_DFR_MODEL_MASK) ==
875 			    APIC_DFR_MODEL_FLAT) {
876 				ldest = ldr >> 24;
877 				mda_ldest = mda_flat_ldest;
878 			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
879 			    APIC_DFR_MODEL_CLUSTER) {
880 				if (vlapic_x2mode(vlapic)) {
881 					cluster = ldr >> 16;
882 					ldest = ldr & 0xffff;
883 				} else {
884 					cluster = ldr >> 28;
885 					ldest = (ldr >> 24) & 0xf;
886 				}
887 				if (cluster != mda_cluster_id)
888 					continue;
889 				mda_ldest = mda_cluster_ldest;
890 			} else {
891 				/*
892 				 * Guest has configured a bad logical
893 				 * model for this vcpu - skip it.
894 				 */
895 				continue;
896 			}
897 
898 			if ((mda_ldest & ldest) != 0) {
899 				CPU_SET(vcpuid, dmask);
900 				if (lowprio)
901 					break;
902 			}
903 		}
904 	}
905 }
906 
907 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu");
908 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu");
909 
910 static void
vlapic_set_tpr(struct vlapic * vlapic,uint8_t val)911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
912 {
913 	struct LAPIC *lapic = vlapic->apic_page;
914 
915 	if (lapic->tpr != val) {
916 		lapic->tpr = val;
917 		vlapic_update_ppr(vlapic);
918 	}
919 }
920 
921 void
vlapic_set_cr8(struct vlapic * vlapic,uint64_t val)922 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
923 {
924 	uint8_t tpr;
925 
926 	if (val & ~0xf) {
927 		vm_inject_gp(vlapic->vm, vlapic->vcpuid);
928 		return;
929 	}
930 
931 	tpr = val << 4;
932 	vlapic_set_tpr(vlapic, tpr);
933 }
934 
935 uint64_t
vlapic_get_cr8(const struct vlapic * vlapic)936 vlapic_get_cr8(const struct vlapic *vlapic)
937 {
938 	const struct LAPIC *lapic = vlapic->apic_page;
939 
940 	return (lapic->tpr >> 4);
941 }
942 
943 static bool
vlapic_is_icr_valid(uint64_t icrval)944 vlapic_is_icr_valid(uint64_t icrval)
945 {
946 	uint32_t mode = icrval & APIC_DELMODE_MASK;
947 	uint32_t level = icrval & APIC_LEVEL_MASK;
948 	uint32_t trigger = icrval & APIC_TRIGMOD_MASK;
949 	uint32_t shorthand = icrval & APIC_DEST_MASK;
950 
951 	switch (mode) {
952 	case APIC_DELMODE_FIXED:
953 		if (trigger == APIC_TRIGMOD_EDGE)
954 			return (true);
955 		/*
956 		 * AMD allows a level assert IPI and Intel converts a level
957 		 * assert IPI into an edge IPI.
958 		 */
959 		if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT)
960 			return (true);
961 		break;
962 	case APIC_DELMODE_LOWPRIO:
963 	case APIC_DELMODE_SMI:
964 	case APIC_DELMODE_NMI:
965 	case APIC_DELMODE_INIT:
966 		if (trigger == APIC_TRIGMOD_EDGE &&
967 		    (shorthand == APIC_DEST_DESTFLD ||
968 		    shorthand == APIC_DEST_ALLESELF)) {
969 			return (true);
970 		}
971 		/*
972 		 * AMD allows a level assert IPI and Intel converts a level
973 		 * assert IPI into an edge IPI.
974 		 */
975 		if (trigger == APIC_TRIGMOD_LEVEL &&
976 		    level == APIC_LEVEL_ASSERT &&
977 		    (shorthand == APIC_DEST_DESTFLD ||
978 		    shorthand == APIC_DEST_ALLESELF)) {
979 			return (true);
980 		}
981 		/*
982 		 * An level triggered deassert INIT is defined in the Intel
983 		 * Multiprocessor Specification and the Intel Software Developer
984 		 * Manual. Due to the MPS it's required to send a level assert
985 		 * INIT to a cpu and then a level deassert INIT. Some operating
986 		 * systems e.g. FreeBSD or Linux use that algorithm. According
987 		 * to the SDM a level deassert INIT is only supported by Pentium
988 		 * and P6 processors. It's always send to all cpus regardless of
989 		 * the destination or shorthand field. It resets the arbitration
990 		 * id register. This register is not software accessible and
991 		 * only required for the APIC bus arbitration. So, the level
992 		 * deassert INIT doesn't need any emulation and we should ignore
993 		 * it. The SDM also defines that newer processors don't support
994 		 * the level deassert INIT and it's not valid any more. As it's
995 		 * defined for older systems, it can't be invalid per se.
996 		 * Otherwise, backward compatibility would be broken. However,
997 		 * when returning false here, it'll be ignored which is the
998 		 * desired behaviour.
999 		 */
1000 		if (mode == APIC_DELMODE_INIT &&
1001 		    trigger == APIC_TRIGMOD_LEVEL &&
1002 		    level == APIC_LEVEL_DEASSERT) {
1003 			return (false);
1004 		}
1005 		break;
1006 	case APIC_DELMODE_STARTUP:
1007 		if (shorthand == APIC_DEST_DESTFLD ||
1008 		    shorthand == APIC_DEST_ALLESELF) {
1009 			return (true);
1010 		}
1011 		break;
1012 	case APIC_DELMODE_RR:
1013 		/* Only available on AMD! */
1014 		if (trigger == APIC_TRIGMOD_EDGE &&
1015 		    shorthand == APIC_DEST_DESTFLD) {
1016 			return (true);
1017 		}
1018 		break;
1019 	case APIC_DELMODE_RESV:
1020 		return (false);
1021 	default:
1022 		panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode);
1023 	}
1024 
1025 	return (false);
1026 }
1027 
1028 void
vlapic_icrlo_write_handler(struct vlapic * vlapic)1029 vlapic_icrlo_write_handler(struct vlapic *vlapic)
1030 {
1031 	int i;
1032 	cpuset_t dmask;
1033 	uint64_t icrval;
1034 	uint32_t dest, vec, mode, dsh;
1035 	struct LAPIC *lapic;
1036 
1037 	lapic = vlapic->apic_page;
1038 	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
1039 	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1040 
1041 	/*
1042 	 * Ignore invalid combinations of the icr.
1043 	 */
1044 	if (!vlapic_is_icr_valid(icrval))
1045 		return;
1046 
1047 	if (vlapic_x2mode(vlapic))
1048 		dest = icrval >> 32;
1049 	else
1050 		dest = icrval >> (32 + 24);
1051 	vec = icrval & APIC_VECTOR_MASK;
1052 	mode = icrval & APIC_DELMODE_MASK;
1053 	dsh = icrval & APIC_DEST_MASK;
1054 
1055 	if (mode == APIC_DELMODE_FIXED && vec < 16) {
1056 		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
1057 		return;
1058 	}
1059 
1060 	if (mode == APIC_DELMODE_INIT &&
1061 	    (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
1062 		/* No work required to deassert INIT */
1063 		return;
1064 	}
1065 
1066 	switch (dsh) {
1067 	case APIC_DEST_DESTFLD:
1068 		vlapic_calcdest(vlapic->vm, &dmask, dest,
1069 		    (icrval & APIC_DESTMODE_LOG) == 0, false,
1070 		    vlapic_x2mode(vlapic));
1071 		break;
1072 	case APIC_DEST_SELF:
1073 		CPU_SETOF(vlapic->vcpuid, &dmask);
1074 		break;
1075 	case APIC_DEST_ALLISELF:
1076 		dmask = vm_active_cpus(vlapic->vm);
1077 		break;
1078 	case APIC_DEST_ALLESELF:
1079 		dmask = vm_active_cpus(vlapic->vm);
1080 		CPU_CLR(vlapic->vcpuid, &dmask);
1081 		break;
1082 	default:
1083 		/*
1084 		 * All possible delivery notations are covered above.
1085 		 * We should never end up here.
1086 		 */
1087 		panic("unknown delivery shorthand: %x", dsh);
1088 	}
1089 
1090 	while ((i = CPU_FFS(&dmask)) != 0) {
1091 		i--;
1092 		CPU_CLR(i, &dmask);
1093 		switch (mode) {
1094 		case APIC_DELMODE_FIXED:
1095 			(void) lapic_intr_edge(vlapic->vm, i, vec);
1096 			vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
1097 			    VLAPIC_IPI_SEND, 1);
1098 			vmm_stat_incr(vlapic->vm, i,
1099 			    VLAPIC_IPI_RECV, 1);
1100 			break;
1101 		case APIC_DELMODE_NMI:
1102 			(void) vm_inject_nmi(vlapic->vm, i);
1103 			break;
1104 		case APIC_DELMODE_INIT:
1105 			(void) vm_inject_init(vlapic->vm, i);
1106 			break;
1107 		case APIC_DELMODE_STARTUP:
1108 			(void) vm_inject_sipi(vlapic->vm, i, vec);
1109 			break;
1110 		case APIC_DELMODE_LOWPRIO:
1111 		case APIC_DELMODE_SMI:
1112 		default:
1113 			/* Unhandled IPI modes (for now) */
1114 			break;
1115 		}
1116 	}
1117 }
1118 
1119 void
vlapic_self_ipi_handler(struct vlapic * vlapic,uint32_t val)1120 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val)
1121 {
1122 	const int vec = val & 0xff;
1123 
1124 	/* self-IPI is only exposed via x2APIC */
1125 	ASSERT(vlapic_x2mode(vlapic));
1126 
1127 	(void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1128 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1);
1129 	vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1);
1130 }
1131 
1132 int
vlapic_pending_intr(struct vlapic * vlapic,int * vecptr)1133 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1134 {
1135 	struct LAPIC	*lapic = vlapic->apic_page;
1136 	int		 idx, i, bitpos, vector;
1137 	uint32_t	*irrptr, val;
1138 
1139 	if (vlapic->ops.sync_state) {
1140 		(*vlapic->ops.sync_state)(vlapic);
1141 	}
1142 
1143 	irrptr = &lapic->irr0;
1144 
1145 	for (i = 7; i >= 0; i--) {
1146 		idx = i * 4;
1147 		val = atomic_load_acq_int(&irrptr[idx]);
1148 		bitpos = fls(val);
1149 		if (bitpos != 0) {
1150 			vector = i * 32 + (bitpos - 1);
1151 			if (PRIO(vector) > PRIO(lapic->ppr)) {
1152 				if (vecptr != NULL)
1153 					*vecptr = vector;
1154 				return (1);
1155 			} else
1156 				break;
1157 		}
1158 	}
1159 	return (0);
1160 }
1161 
1162 void
vlapic_intr_accepted(struct vlapic * vlapic,int vector)1163 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1164 {
1165 	struct LAPIC	*lapic = vlapic->apic_page;
1166 	uint32_t	*irrptr, *isrptr;
1167 	int		idx;
1168 
1169 	KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
1170 
1171 	if (vlapic->ops.intr_accepted)
1172 		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1173 
1174 	/*
1175 	 * clear the ready bit for vector being accepted in irr
1176 	 * and set the vector as in service in isr.
1177 	 */
1178 	idx = (vector / 32) * 4;
1179 
1180 	irrptr = &lapic->irr0;
1181 	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1182 
1183 	isrptr = &lapic->isr0;
1184 	isrptr[idx] |= 1 << (vector % 32);
1185 
1186 	/*
1187 	 * The only way a fresh vector could be accepted into ISR is if it was
1188 	 * of a higher priority than the current PPR.  With that vector now
1189 	 * in-service, the PPR must be raised.
1190 	 */
1191 	vlapic_raise_ppr(vlapic, vector);
1192 }
1193 
1194 void
vlapic_svr_write_handler(struct vlapic * vlapic)1195 vlapic_svr_write_handler(struct vlapic *vlapic)
1196 {
1197 	struct LAPIC *lapic;
1198 	uint32_t old, new, changed;
1199 
1200 	lapic = vlapic->apic_page;
1201 
1202 	new = lapic->svr;
1203 	old = vlapic->svr_last;
1204 	vlapic->svr_last = new;
1205 
1206 	changed = old ^ new;
1207 	if ((changed & APIC_SVR_ENABLE) != 0) {
1208 		if ((new & APIC_SVR_ENABLE) == 0) {
1209 			/*
1210 			 * The apic is now disabled so stop the apic timer
1211 			 * and mask all the LVT entries.
1212 			 */
1213 			VLAPIC_TIMER_LOCK(vlapic);
1214 			callout_stop(&vlapic->callout);
1215 			VLAPIC_TIMER_UNLOCK(vlapic);
1216 			vlapic_mask_lvts(vlapic);
1217 		} else {
1218 			/*
1219 			 * The apic is now enabled so restart the apic timer
1220 			 * if it is configured in periodic mode.
1221 			 */
1222 			if (vlapic_periodic_timer(vlapic))
1223 				vlapic_icrtmr_write_handler(vlapic);
1224 		}
1225 	}
1226 }
1227 
1228 static bool
vlapic_read(struct vlapic * vlapic,uint16_t offset,uint32_t * outp)1229 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp)
1230 {
1231 	struct LAPIC *lapic = vlapic->apic_page;
1232 	uint32_t *reg;
1233 	int i;
1234 
1235 	ASSERT3U(offset & 0x3, ==, 0);
1236 	ASSERT3U(offset, <, PAGESIZE);
1237 	ASSERT3P(outp, !=, NULL);
1238 
1239 	uint32_t data = 0;
1240 	switch (offset) {
1241 	case APIC_OFFSET_ID:
1242 		data = lapic->id;
1243 		break;
1244 	case APIC_OFFSET_VER:
1245 		data = lapic->version;
1246 		break;
1247 	case APIC_OFFSET_TPR:
1248 		data = lapic->tpr;
1249 		break;
1250 	case APIC_OFFSET_APR:
1251 		data = lapic->apr;
1252 		break;
1253 	case APIC_OFFSET_PPR:
1254 		data = lapic->ppr;
1255 		break;
1256 	case APIC_OFFSET_LDR:
1257 		data = lapic->ldr;
1258 		break;
1259 	case APIC_OFFSET_DFR:
1260 		data = lapic->dfr;
1261 		break;
1262 	case APIC_OFFSET_SVR:
1263 		data = lapic->svr;
1264 		break;
1265 	case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1266 		i = (offset - APIC_OFFSET_ISR0) >> 2;
1267 		reg = &lapic->isr0;
1268 		data = *(reg + i);
1269 		break;
1270 	case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1271 		i = (offset - APIC_OFFSET_TMR0) >> 2;
1272 		reg = &lapic->tmr0;
1273 		data = *(reg + i);
1274 		break;
1275 	case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1276 		i = (offset - APIC_OFFSET_IRR0) >> 2;
1277 		reg = &lapic->irr0;
1278 		data = atomic_load_acq_int(reg + i);
1279 		break;
1280 	case APIC_OFFSET_ESR:
1281 		data = lapic->esr;
1282 		break;
1283 	case APIC_OFFSET_ICR_LOW:
1284 		data = lapic->icr_lo;
1285 		break;
1286 	case APIC_OFFSET_ICR_HI:
1287 		data = lapic->icr_hi;
1288 		break;
1289 	case APIC_OFFSET_CMCI_LVT:
1290 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1291 		data = vlapic_get_lvt(vlapic, offset);
1292 #ifdef INVARIANTS
1293 		reg = vlapic_get_lvtptr(vlapic, offset);
1294 		ASSERT3U(data, ==, *reg);
1295 #endif
1296 		break;
1297 	case APIC_OFFSET_TIMER_ICR:
1298 		data = lapic->icr_timer;
1299 		break;
1300 	case APIC_OFFSET_TIMER_CCR:
1301 		data = vlapic_get_ccr(vlapic);
1302 		break;
1303 	case APIC_OFFSET_TIMER_DCR:
1304 		data = lapic->dcr_timer;
1305 		break;
1306 	case APIC_OFFSET_RRR:
1307 		data = 0;
1308 		break;
1309 
1310 	case APIC_OFFSET_SELF_IPI:
1311 	case APIC_OFFSET_EOI:
1312 		/* Write-only register */
1313 		*outp = 0;
1314 		return (false);
1315 
1316 	default:
1317 		/* Invalid register */
1318 		*outp = 0;
1319 		return (false);
1320 	}
1321 
1322 	*outp = data;
1323 	return (true);
1324 }
1325 
1326 static bool
vlapic_write(struct vlapic * vlapic,uint16_t offset,uint32_t data)1327 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data)
1328 {
1329 	struct LAPIC	*lapic = vlapic->apic_page;
1330 	uint32_t	*regptr;
1331 
1332 	ASSERT3U(offset & 0xf, ==, 0);
1333 	ASSERT3U(offset, <, PAGESIZE);
1334 
1335 	switch (offset) {
1336 	case APIC_OFFSET_ID:
1337 		lapic->id = data;
1338 		vlapic_id_write_handler(vlapic);
1339 		break;
1340 	case APIC_OFFSET_TPR:
1341 		vlapic_set_tpr(vlapic, data & 0xff);
1342 		break;
1343 	case APIC_OFFSET_EOI:
1344 		vlapic_process_eoi(vlapic);
1345 		break;
1346 	case APIC_OFFSET_LDR:
1347 		lapic->ldr = data;
1348 		vlapic_ldr_write_handler(vlapic);
1349 		break;
1350 	case APIC_OFFSET_DFR:
1351 		lapic->dfr = data;
1352 		vlapic_dfr_write_handler(vlapic);
1353 		break;
1354 	case APIC_OFFSET_SVR:
1355 		lapic->svr = data;
1356 		vlapic_svr_write_handler(vlapic);
1357 		break;
1358 	case APIC_OFFSET_ICR_LOW:
1359 		lapic->icr_lo = data;
1360 		vlapic_icrlo_write_handler(vlapic);
1361 		break;
1362 	case APIC_OFFSET_ICR_HI:
1363 		lapic->icr_hi = data;
1364 		break;
1365 	case APIC_OFFSET_CMCI_LVT:
1366 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1367 		regptr = vlapic_get_lvtptr(vlapic, offset);
1368 		*regptr = data;
1369 		vlapic_lvt_write_handler(vlapic, offset);
1370 		break;
1371 	case APIC_OFFSET_TIMER_ICR:
1372 		lapic->icr_timer = data;
1373 		vlapic_icrtmr_write_handler(vlapic);
1374 		break;
1375 
1376 	case APIC_OFFSET_TIMER_DCR:
1377 		lapic->dcr_timer = data;
1378 		vlapic_dcr_write_handler(vlapic);
1379 		break;
1380 
1381 	case APIC_OFFSET_ESR:
1382 		vlapic_esr_write_handler(vlapic);
1383 		break;
1384 
1385 	case APIC_OFFSET_SELF_IPI:
1386 		if (vlapic_x2mode(vlapic))
1387 			vlapic_self_ipi_handler(vlapic, data);
1388 		break;
1389 
1390 	case APIC_OFFSET_VER:
1391 	case APIC_OFFSET_APR:
1392 	case APIC_OFFSET_PPR:
1393 	case APIC_OFFSET_RRR:
1394 	case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1395 	case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1396 	case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1397 	case APIC_OFFSET_TIMER_CCR:
1398 		/* Read-only register */
1399 		return (false);
1400 
1401 	default:
1402 		/* Invalid register */
1403 		return (false);
1404 	}
1405 
1406 	return (true);
1407 }
1408 
1409 void
vlapic_reset(struct vlapic * vlapic)1410 vlapic_reset(struct vlapic *vlapic)
1411 {
1412 	struct LAPIC *lapic = vlapic->apic_page;
1413 	uint32_t *isrptr, *tmrptr, *irrptr;
1414 
1415 	/* Reset any timer-related state first */
1416 	VLAPIC_TIMER_LOCK(vlapic);
1417 	callout_stop(&vlapic->callout);
1418 	vlapic->timer_fire_when = 0;
1419 	lapic->icr_timer = 0;
1420 	lapic->ccr_timer = 0;
1421 	lapic->dcr_timer = 0;
1422 	vlapic_update_divider(vlapic);
1423 	VLAPIC_TIMER_UNLOCK(vlapic);
1424 
1425 	/*
1426 	 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
1427 	 * it is not leftover after the reset.  This is performed after the APIC
1428 	 * timer has been stopped, in case it happened to fire just prior to
1429 	 * being deactivated.
1430 	 */
1431 	if (vlapic->ops.sync_state) {
1432 		(*vlapic->ops.sync_state)(vlapic);
1433 	}
1434 
1435 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1436 	if (vlapic->vcpuid == 0)
1437 		vlapic->msr_apicbase |= APICBASE_BSP;
1438 
1439 	lapic->id = vlapic_get_id(vlapic);
1440 	lapic->version = VLAPIC_VERSION;
1441 	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1442 
1443 	lapic->tpr = 0;
1444 	lapic->apr = 0;
1445 	lapic->ppr = 0;
1446 
1447 	lapic->eoi = 0;
1448 	lapic->ldr = 0;
1449 	lapic->dfr = 0xffffffff;
1450 	lapic->svr = APIC_SVR_VECTOR;
1451 	vlapic->svr_last = lapic->svr;
1452 
1453 	isrptr = &lapic->isr0;
1454 	tmrptr = &lapic->tmr0;
1455 	irrptr = &lapic->irr0;
1456 	for (uint_t i = 0; i < 8; i++) {
1457 		atomic_store_rel_int(&isrptr[i * 4], 0);
1458 		atomic_store_rel_int(&tmrptr[i * 4], 0);
1459 		atomic_store_rel_int(&irrptr[i * 4], 0);
1460 	}
1461 
1462 	lapic->esr = 0;
1463 	vlapic->esr_pending = 0;
1464 	lapic->icr_lo = 0;
1465 	lapic->icr_hi = 0;
1466 
1467 	lapic->lvt_cmci = 0;
1468 	lapic->lvt_timer = 0;
1469 	lapic->lvt_thermal = 0;
1470 	lapic->lvt_pcint = 0;
1471 	lapic->lvt_lint0 = 0;
1472 	lapic->lvt_lint1 = 0;
1473 	lapic->lvt_error = 0;
1474 	vlapic_mask_lvts(vlapic);
1475 }
1476 
1477 void
vlapic_init(struct vlapic * vlapic)1478 vlapic_init(struct vlapic *vlapic)
1479 {
1480 	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1481 	KASSERT(vlapic->vcpuid >= 0 &&
1482 	    vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1483 	    ("vlapic_init: vcpuid is not initialized"));
1484 	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1485 	    "initialized"));
1486 
1487 	/*
1488 	 * If the vlapic is configured in x2apic mode then it will be
1489 	 * accessed in the critical section via the MSR emulation code.
1490 	 *
1491 	 * Therefore the timer mutex must be a spinlock because blockable
1492 	 * mutexes cannot be acquired in a critical section.
1493 	 */
1494 	mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL);
1495 	callout_init(&vlapic->callout, 1);
1496 
1497 	vlapic_reset(vlapic);
1498 }
1499 
1500 void
vlapic_cleanup(struct vlapic * vlapic)1501 vlapic_cleanup(struct vlapic *vlapic)
1502 {
1503 	callout_drain(&vlapic->callout);
1504 	mutex_destroy(&vlapic->timer_lock);
1505 }
1506 
1507 int
vlapic_mmio_read(struct vlapic * vlapic,uint64_t gpa,uint64_t * valp,uint_t size)1508 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp,
1509     uint_t size)
1510 {
1511 	ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1512 	ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1513 
1514 	/* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1515 	if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1516 		*valp = UINT64_MAX;
1517 		return (0);
1518 	}
1519 
1520 	const uint16_t off = gpa - DEFAULT_APIC_BASE;
1521 	uint32_t raw = 0;
1522 	(void) vlapic_read(vlapic, off & ~0xf, &raw);
1523 
1524 	/* Shift and mask reads which are small and/or unaligned */
1525 	const uint8_t align = off & 0xf;
1526 	if (align < 4) {
1527 		*valp = (uint64_t)raw << (align * 8);
1528 	} else {
1529 		*valp = 0;
1530 	}
1531 
1532 	return (0);
1533 }
1534 
1535 int
vlapic_mmio_write(struct vlapic * vlapic,uint64_t gpa,uint64_t val,uint_t size)1536 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val,
1537     uint_t size)
1538 {
1539 	ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1540 	ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1541 
1542 	/* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1543 	if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1544 		return (0);
1545 	}
1546 
1547 	const uint16_t off = gpa - DEFAULT_APIC_BASE;
1548 	/* Ignore writes which are not 32-bits wide and 16-byte aligned */
1549 	if ((off & 0xf) != 0 || size != 4) {
1550 		return (0);
1551 	}
1552 
1553 	(void) vlapic_write(vlapic, off, (uint32_t)val);
1554 	return (0);
1555 }
1556 
1557 /* Should attempts to change the APIC base address be rejected with a #GP?  */
1558 int vlapic_gp_on_addr_change = 1;
1559 
1560 static vm_msr_result_t
vlapic_set_apicbase(struct vlapic * vlapic,uint64_t val)1561 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
1562 {
1563 	const uint64_t diff = vlapic->msr_apicbase ^ val;
1564 
1565 	/*
1566 	 * Until the LAPIC emulation for switching between xAPIC and x2APIC
1567 	 * modes is more polished, it will remain off-limits from being altered
1568 	 * by the guest.
1569 	 */
1570 	const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC |
1571 	    APICBASE_BSP;
1572 	if ((diff & reserved_bits) != 0) {
1573 		return (VMR_GP);
1574 	}
1575 
1576 	/* We do not presently allow the LAPIC access address to be modified. */
1577 	if ((diff & APICBASE_ADDR_MASK) != 0) {
1578 		/*
1579 		 * Explicitly rebuffing such requests with a #GP is the most
1580 		 * straightforward way to handle the situation, but certain
1581 		 * consumers (such as the KVM unit tests) may balk at the
1582 		 * otherwise unexpected exception.
1583 		 */
1584 		if (vlapic_gp_on_addr_change) {
1585 			return (VMR_GP);
1586 		}
1587 
1588 		/* If silence is required, just ignore the address change. */
1589 		val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE;
1590 	}
1591 
1592 	vlapic->msr_apicbase = val;
1593 	return (VMR_OK);
1594 }
1595 
1596 static __inline uint16_t
vlapic_msr_to_regoff(uint32_t msr)1597 vlapic_msr_to_regoff(uint32_t msr)
1598 {
1599 	ASSERT3U(msr, >=, MSR_APIC_000);
1600 	ASSERT3U(msr, <, (MSR_APIC_000 + 0x100));
1601 
1602 	return ((msr - MSR_APIC_000) << 4);
1603 }
1604 
1605 bool
vlapic_owned_msr(uint32_t msr)1606 vlapic_owned_msr(uint32_t msr)
1607 {
1608 	if (msr == MSR_APICBASE) {
1609 		return (true);
1610 	}
1611 	if (msr >= MSR_APIC_000 &&
1612 	    msr < (MSR_APIC_000 + 0x100)) {
1613 		return (true);
1614 	}
1615 	return (false);
1616 }
1617 
1618 vm_msr_result_t
vlapic_rdmsr(struct vlapic * vlapic,uint32_t msr,uint64_t * valp)1619 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp)
1620 {
1621 	ASSERT(vlapic_owned_msr(msr));
1622 	ASSERT3P(valp, !=, NULL);
1623 
1624 	if (msr == MSR_APICBASE) {
1625 		*valp = vlapic->msr_apicbase;
1626 		return (VMR_OK);
1627 	}
1628 
1629 	/* #GP for x2APIC MSR accesses in xAPIC mode */
1630 	if (!vlapic_x2mode(vlapic)) {
1631 		return (VMR_GP);
1632 	}
1633 
1634 	uint64_t out = 0;
1635 	const uint16_t reg = vlapic_msr_to_regoff(msr);
1636 	switch (reg) {
1637 	case APIC_OFFSET_ICR_LOW: {
1638 		/* Read from ICR register gets entire (64-bit) value */
1639 		uint32_t low = 0, high = 0;
1640 		bool valid;
1641 
1642 		valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high);
1643 		VERIFY(valid);
1644 		valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low);
1645 		VERIFY(valid);
1646 
1647 		*valp = ((uint64_t)high << 32) | low;
1648 		return (VMR_OK);
1649 		}
1650 	case APIC_OFFSET_ICR_HI:
1651 		/* Already covered by ICR_LOW */
1652 		return (VMR_GP);
1653 	default:
1654 		break;
1655 	}
1656 	if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) {
1657 		return (VMR_GP);
1658 	}
1659 	*valp = out;
1660 	return (VMR_OK);
1661 }
1662 
1663 vm_msr_result_t
vlapic_wrmsr(struct vlapic * vlapic,uint32_t msr,uint64_t val)1664 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val)
1665 {
1666 	ASSERT(vlapic_owned_msr(msr));
1667 
1668 	if (msr == MSR_APICBASE) {
1669 		return (vlapic_set_apicbase(vlapic, val));
1670 	}
1671 
1672 	/* #GP for x2APIC MSR accesses in xAPIC mode */
1673 	if (!vlapic_x2mode(vlapic)) {
1674 		return (VMR_GP);
1675 	}
1676 
1677 	const uint16_t reg = vlapic_msr_to_regoff(msr);
1678 	switch (reg) {
1679 	case APIC_OFFSET_ICR_LOW: {
1680 		/* Write to ICR register sets entire (64-bit) value */
1681 		bool valid;
1682 
1683 		valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32);
1684 		VERIFY(valid);
1685 		valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val);
1686 		VERIFY(valid);
1687 		return (VMR_OK);
1688 		}
1689 	case APIC_OFFSET_ICR_HI:
1690 		/* Already covered by ICR_LOW */
1691 		return (VMR_GP);
1692 	case APIC_OFFSET_ESR:
1693 		/* Only 0 may be written from x2APIC mode */
1694 		if (val != 0) {
1695 			return (VMR_GP);
1696 		}
1697 		break;
1698 	default:
1699 		break;
1700 	}
1701 	if (!vlapic_write(vlapic, reg, val)) {
1702 		return (VMR_GP);
1703 	}
1704 	return (VMR_OK);
1705 }
1706 
1707 void
vlapic_set_x2apic_state(struct vm * vm,int vcpuid,enum x2apic_state state)1708 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1709 {
1710 	struct vlapic *vlapic;
1711 	struct LAPIC *lapic;
1712 
1713 	vlapic = vm_lapic(vm, vcpuid);
1714 
1715 	if (state == X2APIC_DISABLED)
1716 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1717 	else
1718 		vlapic->msr_apicbase |= APICBASE_X2APIC;
1719 
1720 	/*
1721 	 * Reset the local APIC registers whose values are mode-dependent.
1722 	 *
1723 	 * XXX this works because the APIC mode can be changed only at vcpu
1724 	 * initialization time.
1725 	 */
1726 	lapic = vlapic->apic_page;
1727 	lapic->id = vlapic_get_id(vlapic);
1728 	if (vlapic_x2mode(vlapic)) {
1729 		lapic->ldr = x2apic_ldr(vlapic);
1730 		lapic->dfr = 0;
1731 	} else {
1732 		lapic->ldr = 0;
1733 		lapic->dfr = 0xffffffff;
1734 	}
1735 
1736 	if (state == X2APIC_ENABLED) {
1737 		if (vlapic->ops.enable_x2apic_mode)
1738 			(*vlapic->ops.enable_x2apic_mode)(vlapic);
1739 	}
1740 }
1741 
1742 void
vlapic_deliver_intr(struct vm * vm,bool level,uint32_t dest,bool phys,int delmode,int vec)1743 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1744     int delmode, int vec)
1745 {
1746 	bool lowprio;
1747 	int vcpuid;
1748 	cpuset_t dmask;
1749 
1750 	if (delmode != IOART_DELFIXED &&
1751 	    delmode != IOART_DELLOPRI &&
1752 	    delmode != IOART_DELEXINT) {
1753 		/* Invalid delivery mode */
1754 		return;
1755 	}
1756 	lowprio = (delmode == IOART_DELLOPRI);
1757 
1758 	/*
1759 	 * We don't provide any virtual interrupt redirection hardware so
1760 	 * all interrupts originating from the ioapic or MSI specify the
1761 	 * 'dest' in the legacy xAPIC format.
1762 	 */
1763 	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1764 
1765 	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1766 		vcpuid--;
1767 		CPU_CLR(vcpuid, &dmask);
1768 		if (delmode == IOART_DELEXINT) {
1769 			(void) vm_inject_extint(vm, vcpuid);
1770 		} else {
1771 			(void) lapic_set_intr(vm, vcpuid, vec, level);
1772 		}
1773 	}
1774 }
1775 
1776 void
vlapic_post_intr(struct vlapic * vlapic,int hostcpu)1777 vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
1778 {
1779 	/*
1780 	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1781 	 *
1782 	 * This is done by leveraging features like Posted Interrupts (Intel)
1783 	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1784 	 *
1785 	 * If neither of these features are available then fallback to
1786 	 * sending an IPI to 'hostcpu'.
1787 	 */
1788 	if (vlapic->ops.post_intr)
1789 		(*vlapic->ops.post_intr)(vlapic, hostcpu);
1790 	else
1791 		poke_cpu(hostcpu);
1792 }
1793 
1794 void
vlapic_localize_resources(struct vlapic * vlapic)1795 vlapic_localize_resources(struct vlapic *vlapic)
1796 {
1797 	vmm_glue_callout_localize(&vlapic->callout);
1798 }
1799 
1800 void
vlapic_pause(struct vlapic * vlapic)1801 vlapic_pause(struct vlapic *vlapic)
1802 {
1803 	VLAPIC_TIMER_LOCK(vlapic);
1804 	callout_stop(&vlapic->callout);
1805 	VLAPIC_TIMER_UNLOCK(vlapic);
1806 
1807 }
1808 
1809 void
vlapic_resume(struct vlapic * vlapic)1810 vlapic_resume(struct vlapic *vlapic)
1811 {
1812 	VLAPIC_TIMER_LOCK(vlapic);
1813 	if (vlapic->timer_fire_when != 0) {
1814 		vlapic_callout_reset(vlapic);
1815 	}
1816 	VLAPIC_TIMER_UNLOCK(vlapic);
1817 }
1818 
1819 static int
vlapic_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1820 vlapic_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1821 {
1822 	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1823 	VERIFY3U(req->vdr_version, ==, 1);
1824 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1825 
1826 	struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1827 	struct vdi_lapic_v1 *out = req->vdr_data;
1828 
1829 	VLAPIC_TIMER_LOCK(vlapic);
1830 
1831 	if (vlapic->ops.sync_state) {
1832 		(*vlapic->ops.sync_state)(vlapic);
1833 	}
1834 
1835 	out->vl_msr_apicbase = vlapic->msr_apicbase;
1836 	out->vl_esr_pending = vlapic->esr_pending;
1837 	if (vlapic->timer_fire_when != 0) {
1838 		out->vl_timer_target =
1839 		    vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
1840 	} else {
1841 		out->vl_timer_target = 0;
1842 	}
1843 
1844 	const struct LAPIC *lapic = vlapic->apic_page;
1845 	struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
1846 
1847 	/*
1848 	 * While this might appear, at first glance, to be missing some fields,
1849 	 * they are intentionally omitted:
1850 	 * - PPR: its contents are always generated at runtime
1851 	 * - EOI: write-only, and contents are ignored after handling
1852 	 * - RRD: (aka RRR) read-only and always 0
1853 	 * - CCR: calculated from underlying timer data
1854 	 */
1855 	out_page->vlp_id = lapic->id;
1856 	out_page->vlp_version = lapic->version;
1857 	out_page->vlp_tpr = lapic->tpr;
1858 	out_page->vlp_apr = lapic->apr;
1859 	out_page->vlp_ldr = lapic->ldr;
1860 	out_page->vlp_dfr = lapic->dfr;
1861 	out_page->vlp_svr = lapic->svr;
1862 	out_page->vlp_esr = lapic->esr;
1863 	out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1864 	out_page->vlp_icr_timer = lapic->icr_timer;
1865 	out_page->vlp_dcr_timer = lapic->dcr_timer;
1866 
1867 	out_page->vlp_lvt_cmci = lapic->lvt_cmci;
1868 	out_page->vlp_lvt_timer = lapic->lvt_timer;
1869 	out_page->vlp_lvt_thermal = lapic->lvt_thermal;
1870 	out_page->vlp_lvt_pcint = lapic->lvt_pcint;
1871 	out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
1872 	out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
1873 	out_page->vlp_lvt_error = lapic->lvt_error;
1874 
1875 	const uint32_t *isrptr = &lapic->isr0;
1876 	const uint32_t *tmrptr = &lapic->tmr0;
1877 	const uint32_t *irrptr = &lapic->irr0;
1878 	for (uint_t i = 0; i < 8; i++) {
1879 		out_page->vlp_isr[i] = isrptr[i * 4];
1880 		out_page->vlp_tmr[i] = tmrptr[i * 4];
1881 		out_page->vlp_irr[i] = irrptr[i * 4];
1882 	}
1883 	VLAPIC_TIMER_UNLOCK(vlapic);
1884 
1885 	return (0);
1886 }
1887 
1888 static uint8_t
popc8(uint8_t val)1889 popc8(uint8_t val)
1890 {
1891 	uint8_t cnt;
1892 
1893 	for (cnt = 0; val != 0; val &= (val - 1)) {
1894 		cnt++;
1895 	}
1896 	return (cnt);
1897 }
1898 
1899 /*
1900  * Descriptions for the various failures which can occur when validating
1901  * to-be-written vlapic state.
1902  */
1903 enum vlapic_validation_error {
1904 	VVE_OK,
1905 	VVE_BAD_ID,
1906 	VVE_BAD_VERSION,
1907 	VVE_BAD_MSR_BASE,
1908 	VVE_BAD_ESR,
1909 	VVE_BAD_TPR,
1910 	VVE_LOW_VECTOR,
1911 	VVE_ISR_PRIORITY,
1912 	VVE_TIMER_MISMATCH,
1913 };
1914 
1915 static enum vlapic_validation_error
vlapic_data_validate(const struct vlapic * vlapic,const vmm_data_req_t * req)1916 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
1917 {
1918 	ASSERT(req->vdr_version == 1 &&
1919 	    req->vdr_len >= sizeof (struct vdi_lapic_v1));
1920 	const struct vdi_lapic_v1 *src = req->vdr_data;
1921 
1922 	if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
1923 	    (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
1924 		return (VVE_BAD_ESR);
1925 	}
1926 
1927 	/* Use the same restrictions as the wrmsr accessor for now */
1928 	const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
1929 	    APICBASE_BSP;
1930 	const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
1931 	if ((diff & apicbase_reserved) != 0) {
1932 		return (VVE_BAD_MSR_BASE);
1933 	}
1934 
1935 	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1936 	/*
1937 	 * Demand that ID match for now.  This can be further updated when some
1938 	 * of the x2apic handling is improved.
1939 	 */
1940 	if (page->vlp_id != vlapic_get_id(vlapic)) {
1941 		return (VVE_BAD_ID);
1942 	}
1943 
1944 	if (page->vlp_version != vlapic->apic_page->version) {
1945 		return (VVE_BAD_VERSION);
1946 	}
1947 
1948 	if (page->vlp_tpr > 0xff) {
1949 		return (VVE_BAD_TPR);
1950 	}
1951 
1952 	/* Vectors 0-15 are not expected to be handled by the lapic */
1953 	if ((page->vlp_isr[0] & 0xffff) != 0 ||
1954 	    (page->vlp_irr[0] & 0xffff) != 0 ||
1955 	    (page->vlp_tmr[0] & 0xffff) != 0) {
1956 		return (VVE_LOW_VECTOR);
1957 	}
1958 
1959 	/* Only one interrupt should be in-service for each priority level */
1960 	for (uint_t i = 0; i < 8; i++) {
1961 		if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
1962 		    popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
1963 		    popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
1964 		    popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
1965 			return (VVE_ISR_PRIORITY);
1966 		}
1967 	}
1968 
1969 	/* If icr_timer is zero, then a scheduled timer does not make sense */
1970 	if (page->vlp_icr_timer == 0 && src->vl_timer_target != 0) {
1971 		return (VVE_TIMER_MISMATCH);
1972 	}
1973 
1974 	return (VVE_OK);
1975 }
1976 
1977 static int
vlapic_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1978 vlapic_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1979 {
1980 	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1981 	VERIFY3U(req->vdr_version, ==, 1);
1982 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1983 
1984 	struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1985 	if (vlapic_data_validate(vlapic, req) != VVE_OK) {
1986 		return (EINVAL);
1987 	}
1988 	const struct vdi_lapic_v1 *src = req->vdr_data;
1989 	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1990 	struct LAPIC *lapic = vlapic->apic_page;
1991 
1992 	VLAPIC_TIMER_LOCK(vlapic);
1993 
1994 	/* Already ensured by vlapic_data_validate() */
1995 	VERIFY3U(page->vlp_version, ==, lapic->version);
1996 
1997 	vlapic->msr_apicbase = src->vl_msr_apicbase;
1998 	vlapic->esr_pending = src->vl_esr_pending;
1999 
2000 	lapic->tpr = page->vlp_tpr;
2001 	lapic->apr = page->vlp_apr;
2002 	lapic->ldr = page->vlp_ldr;
2003 	lapic->dfr = page->vlp_dfr;
2004 	lapic->svr = page->vlp_svr;
2005 	lapic->esr = page->vlp_esr;
2006 	lapic->icr_lo = (uint32_t)page->vlp_icr;
2007 	lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
2008 
2009 	lapic->icr_timer = page->vlp_icr_timer;
2010 	lapic->dcr_timer = page->vlp_dcr_timer;
2011 	vlapic_update_divider(vlapic);
2012 
2013 	/* cleanse LDR/DFR */
2014 	vlapic_ldr_write_handler(vlapic);
2015 	vlapic_dfr_write_handler(vlapic);
2016 
2017 	lapic->lvt_cmci = page->vlp_lvt_cmci;
2018 	lapic->lvt_timer = page->vlp_lvt_timer;
2019 	lapic->lvt_thermal = page->vlp_lvt_thermal;
2020 	lapic->lvt_pcint = page->vlp_lvt_pcint;
2021 	lapic->lvt_lint0 = page->vlp_lvt_lint0;
2022 	lapic->lvt_lint1 = page->vlp_lvt_lint1;
2023 	lapic->lvt_error = page->vlp_lvt_error;
2024 	/* cleanse LVTs */
2025 	vlapic_refresh_lvts(vlapic);
2026 
2027 	uint32_t *isrptr = &lapic->isr0;
2028 	uint32_t *tmrptr = &lapic->tmr0;
2029 	uint32_t *irrptr = &lapic->irr0;
2030 	for (uint_t i = 0; i < 8; i++) {
2031 		isrptr[i * 4] = page->vlp_isr[i];
2032 		tmrptr[i * 4] = page->vlp_tmr[i];
2033 		irrptr[i * 4] = page->vlp_irr[i];
2034 	}
2035 
2036 	if (src->vl_timer_target != 0) {
2037 		vlapic->timer_fire_when =
2038 		    vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
2039 
2040 		/*
2041 		 * Check to see if timer expiration would result computed CCR
2042 		 * values in excess of what is configured in ICR/DCR.
2043 		 */
2044 		const hrtime_t now = gethrtime();
2045 		if (vlapic->timer_fire_when > now) {
2046 			const uint32_t ccr = hrt_freq_count(
2047 			    vlapic->timer_fire_when - now,
2048 			    vlapic->timer_cur_freq);
2049 
2050 			/*
2051 			 * Until we have a richer event/logging system
2052 			 * available, just note such an overage as a stat.
2053 			 */
2054 			if (ccr > lapic->icr_timer) {
2055 				vlapic->stats.vs_import_timer_overage++;
2056 			}
2057 		}
2058 
2059 		if (!vm_is_paused(vlapic->vm)) {
2060 			vlapic_callout_reset(vlapic);
2061 		}
2062 	} else {
2063 		vlapic->timer_fire_when = 0;
2064 	}
2065 
2066 	if (vlapic->ops.sync_state) {
2067 		(*vlapic->ops.sync_state)(vlapic);
2068 	}
2069 	VLAPIC_TIMER_UNLOCK(vlapic);
2070 
2071 	return (0);
2072 }
2073 
2074 static const vmm_data_version_entry_t lapic_v1 = {
2075 	.vdve_class = VDC_LAPIC,
2076 	.vdve_version = 1,
2077 	.vdve_len_expect = sizeof (struct vdi_lapic_v1),
2078 	.vdve_vcpu_readf = vlapic_data_read,
2079 	.vdve_vcpu_writef = vlapic_data_write,
2080 };
2081 VMM_DATA_VERSION(lapic_v1);
2082