1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 * Copyright (c) 2019 Joyent, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 /*
30 * This file and its contents are supplied under the terms of the
31 * Common Development and Distribution License ("CDDL"), version 1.0.
32 * You may only use this file in accordance with the terms of version
33 * 1.0 of the CDDL.
34 *
35 * A full copy of the text of the CDDL should have accompanied this
36 * source. A copy of the CDDL is also available via the Internet at
37 * http://www.illumos.org/license/CDDL.
38 *
39 * Copyright 2014 Pluribus Networks Inc.
40 * Copyright 2018 Joyent, Inc.
41 * Copyright 2024 Oxide Computer Company
42 */
43
44 #include <sys/cdefs.h>
45
46 #include <sys/param.h>
47 #include <sys/kernel.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/systm.h>
51 #include <sys/cpuset.h>
52
53 #include <x86/specialreg.h>
54 #include <x86/apicreg.h>
55
56 #include <machine/clock.h>
57
58 #include <machine/vmm.h>
59 #include <sys/vmm_kernel.h>
60
61 #include "vmm_lapic.h"
62 #include "vmm_stat.h"
63
64 #include "vlapic.h"
65 #include "vlapic_priv.h"
66 #include "vioapic.h"
67
68
69 /*
70 * The 4 high bits of a given interrupt vector represent its priority. The same
71 * is true for the contents of the TPR when it is used to calculate the ultimate
72 * PPR of an APIC - the 4 high bits hold the priority.
73 */
74 #define PRIO(x) ((x) & 0xf0)
75
76 #define VLAPIC_VERSION (0x14)
77
78 /*
79 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the
80 * vlapic_callout_handler() and vcpu accesses to:
81 * - timer_freq_bt, timer_period_bt, timer_fire_bt
82 * - timer LVT register
83 */
84 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock))
85 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock))
86 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock))
87
88 /*
89 * APIC timer frequency:
90 * - arbitrary but chosen to be in the ballpark of contemporary hardware.
91 * - power-of-two to avoid loss of precision when calculating times
92 */
93 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
94
95 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL
96
97 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \
98 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
99 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
100 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
101
102 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
103 static void vlapic_callout_handler(void *arg);
104
105 static __inline bool
vlapic_x2mode(const struct vlapic * vlapic)106 vlapic_x2mode(const struct vlapic *vlapic)
107 {
108 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0);
109 }
110
111 static __inline bool
vlapic_hw_disabled(const struct vlapic * vlapic)112 vlapic_hw_disabled(const struct vlapic *vlapic)
113 {
114 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0);
115 }
116
117 static __inline bool
vlapic_sw_disabled(const struct vlapic * vlapic)118 vlapic_sw_disabled(const struct vlapic *vlapic)
119 {
120 const struct LAPIC *lapic = vlapic->apic_page;
121
122 return ((lapic->svr & APIC_SVR_ENABLE) == 0);
123 }
124
125 static __inline bool
vlapic_enabled(const struct vlapic * vlapic)126 vlapic_enabled(const struct vlapic *vlapic)
127 {
128 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic));
129 }
130
131 static __inline uint32_t
vlapic_get_id(const struct vlapic * vlapic)132 vlapic_get_id(const struct vlapic *vlapic)
133 {
134
135 if (vlapic_x2mode(vlapic))
136 return (vlapic->vcpuid);
137 else
138 return (vlapic->vcpuid << 24);
139 }
140
141 static uint32_t
x2apic_ldr(const struct vlapic * vlapic)142 x2apic_ldr(const struct vlapic *vlapic)
143 {
144 int apicid;
145 uint32_t ldr;
146
147 apicid = vlapic_get_id(vlapic);
148 ldr = 1 << (apicid & 0xf);
149 ldr |= (apicid & 0xffff0) << 12;
150 return (ldr);
151 }
152
153 void
vlapic_dfr_write_handler(struct vlapic * vlapic)154 vlapic_dfr_write_handler(struct vlapic *vlapic)
155 {
156 struct LAPIC *lapic;
157
158 lapic = vlapic->apic_page;
159 if (vlapic_x2mode(vlapic)) {
160 /* Ignore write to DFR in x2APIC mode */
161 lapic->dfr = 0;
162 return;
163 }
164
165 lapic->dfr &= APIC_DFR_MODEL_MASK;
166 lapic->dfr |= APIC_DFR_RESERVED;
167 }
168
169 void
vlapic_ldr_write_handler(struct vlapic * vlapic)170 vlapic_ldr_write_handler(struct vlapic *vlapic)
171 {
172 struct LAPIC *lapic;
173
174 lapic = vlapic->apic_page;
175
176 /* LDR is read-only in x2apic mode */
177 if (vlapic_x2mode(vlapic)) {
178 /* Ignore write to LDR in x2APIC mode */
179 lapic->ldr = x2apic_ldr(vlapic);
180 } else {
181 lapic->ldr &= ~APIC_LDR_RESERVED;
182 }
183 }
184
185 void
vlapic_id_write_handler(struct vlapic * vlapic)186 vlapic_id_write_handler(struct vlapic *vlapic)
187 {
188 struct LAPIC *lapic;
189
190 /*
191 * We don't allow the ID register to be modified so reset it back to
192 * its default value.
193 */
194 lapic = vlapic->apic_page;
195 lapic->id = vlapic_get_id(vlapic);
196 }
197
198 static int
vlapic_timer_divisor(uint32_t dcr)199 vlapic_timer_divisor(uint32_t dcr)
200 {
201 switch (dcr & 0xB) {
202 case APIC_TDCR_1:
203 return (1);
204 case APIC_TDCR_2:
205 return (2);
206 case APIC_TDCR_4:
207 return (4);
208 case APIC_TDCR_8:
209 return (8);
210 case APIC_TDCR_16:
211 return (16);
212 case APIC_TDCR_32:
213 return (32);
214 case APIC_TDCR_64:
215 return (64);
216 case APIC_TDCR_128:
217 return (128);
218 default:
219 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
220 }
221 }
222
223 static uint32_t
vlapic_get_ccr(struct vlapic * vlapic)224 vlapic_get_ccr(struct vlapic *vlapic)
225 {
226 struct LAPIC *lapic;
227 uint32_t ccr;
228
229 ccr = 0;
230 lapic = vlapic->apic_page;
231
232 VLAPIC_TIMER_LOCK(vlapic);
233 if (callout_active(&vlapic->callout)) {
234 /*
235 * If the timer is scheduled to expire in the future then
236 * compute the value of 'ccr' based on the remaining time.
237 */
238
239 const hrtime_t now = gethrtime();
240 if (vlapic->timer_fire_when > now) {
241 ccr += hrt_freq_count(vlapic->timer_fire_when - now,
242 vlapic->timer_cur_freq);
243 }
244 }
245
246 /*
247 * Clamp CCR value to that programmed in ICR - its theoretical maximum.
248 * Normal operation should never result in this being necessary. Only
249 * strange circumstances due to state importation as part of instance
250 * save/restore or live-migration require such wariness.
251 */
252 if (ccr > lapic->icr_timer) {
253 ccr = lapic->icr_timer;
254 vlapic->stats.vs_clamp_ccr++;
255 }
256 VLAPIC_TIMER_UNLOCK(vlapic);
257 return (ccr);
258 }
259
260 static void
vlapic_update_divider(struct vlapic * vlapic)261 vlapic_update_divider(struct vlapic *vlapic)
262 {
263 struct LAPIC *lapic = vlapic->apic_page;
264
265 ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
266
267 vlapic->timer_cur_freq =
268 VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
269 vlapic->timer_period =
270 hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
271 }
272
273 void
vlapic_dcr_write_handler(struct vlapic * vlapic)274 vlapic_dcr_write_handler(struct vlapic *vlapic)
275 {
276 /*
277 * Update the timer frequency and the timer period.
278 *
279 * XXX changes to the frequency divider will not take effect until
280 * the timer is reloaded.
281 */
282 VLAPIC_TIMER_LOCK(vlapic);
283 vlapic_update_divider(vlapic);
284 VLAPIC_TIMER_UNLOCK(vlapic);
285 }
286
287 void
vlapic_esr_write_handler(struct vlapic * vlapic)288 vlapic_esr_write_handler(struct vlapic *vlapic)
289 {
290 struct LAPIC *lapic;
291
292 lapic = vlapic->apic_page;
293 lapic->esr = vlapic->esr_pending;
294 vlapic->esr_pending = 0;
295 }
296
297 vcpu_notify_t
vlapic_set_intr_ready(struct vlapic * vlapic,int vector,bool level)298 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
299 {
300 struct LAPIC *lapic;
301 uint32_t *irrptr, *tmrptr, mask, tmr;
302 int idx;
303
304 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
305
306 lapic = vlapic->apic_page;
307 if (!(lapic->svr & APIC_SVR_ENABLE)) {
308 /* ignore interrupt on software-disabled APIC */
309 return (VCPU_NOTIFY_NONE);
310 }
311
312 if (vector < 16) {
313 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
314 false);
315
316 /*
317 * If the error LVT is configured to interrupt the vCPU, it will
318 * have delivered a notification through that mechanism.
319 */
320 return (VCPU_NOTIFY_NONE);
321 }
322
323 if (vlapic->ops.set_intr_ready) {
324 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
325 }
326
327 idx = (vector / 32) * 4;
328 mask = 1 << (vector % 32);
329 tmrptr = &lapic->tmr0;
330 irrptr = &lapic->irr0;
331
332 /*
333 * Update TMR for requested vector, if necessary.
334 * This must be done prior to asserting the bit in IRR so that the
335 * proper TMR state is always visible before the to-be-queued interrupt
336 * can be injected.
337 */
338 tmr = atomic_load_acq_32(&tmrptr[idx]);
339 if ((tmr & mask) != (level ? mask : 0)) {
340 if (level) {
341 atomic_set_int(&tmrptr[idx], mask);
342 } else {
343 atomic_clear_int(&tmrptr[idx], mask);
344 }
345 }
346
347 /* Now set the bit in IRR */
348 atomic_set_int(&irrptr[idx], mask);
349
350 return (VCPU_NOTIFY_EXIT);
351 }
352
353 static __inline uint32_t *
vlapic_get_lvtptr(struct vlapic * vlapic,uint32_t offset)354 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
355 {
356 struct LAPIC *lapic = vlapic->apic_page;
357 int i;
358
359 switch (offset) {
360 case APIC_OFFSET_CMCI_LVT:
361 return (&lapic->lvt_cmci);
362 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
363 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
364 return ((&lapic->lvt_timer) + i);
365 default:
366 panic("vlapic_get_lvt: invalid LVT\n");
367 }
368 }
369
370 static __inline int
lvt_off_to_idx(uint32_t offset)371 lvt_off_to_idx(uint32_t offset)
372 {
373 int index;
374
375 switch (offset) {
376 case APIC_OFFSET_CMCI_LVT:
377 index = APIC_LVT_CMCI;
378 break;
379 case APIC_OFFSET_TIMER_LVT:
380 index = APIC_LVT_TIMER;
381 break;
382 case APIC_OFFSET_THERM_LVT:
383 index = APIC_LVT_THERMAL;
384 break;
385 case APIC_OFFSET_PERF_LVT:
386 index = APIC_LVT_PMC;
387 break;
388 case APIC_OFFSET_LINT0_LVT:
389 index = APIC_LVT_LINT0;
390 break;
391 case APIC_OFFSET_LINT1_LVT:
392 index = APIC_LVT_LINT1;
393 break;
394 case APIC_OFFSET_ERROR_LVT:
395 index = APIC_LVT_ERROR;
396 break;
397 default:
398 index = -1;
399 break;
400 }
401 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
402 "invalid lvt index %d for offset %x", index, offset));
403
404 return (index);
405 }
406
407 static __inline uint32_t
vlapic_get_lvt(struct vlapic * vlapic,uint32_t offset)408 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
409 {
410 int idx;
411 uint32_t val;
412
413 idx = lvt_off_to_idx(offset);
414 val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
415 return (val);
416 }
417
418 void
vlapic_lvt_write_handler(struct vlapic * vlapic,uint32_t offset)419 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
420 {
421 uint32_t *lvtptr, mask, val;
422 struct LAPIC *lapic;
423 int idx;
424
425 lapic = vlapic->apic_page;
426 lvtptr = vlapic_get_lvtptr(vlapic, offset);
427 val = *lvtptr;
428 idx = lvt_off_to_idx(offset);
429
430 if (!(lapic->svr & APIC_SVR_ENABLE))
431 val |= APIC_LVT_M;
432 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
433 switch (offset) {
434 case APIC_OFFSET_TIMER_LVT:
435 mask |= APIC_LVTT_TM;
436 break;
437 case APIC_OFFSET_ERROR_LVT:
438 break;
439 case APIC_OFFSET_LINT0_LVT:
440 case APIC_OFFSET_LINT1_LVT:
441 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
442 /* FALLTHROUGH */
443 default:
444 mask |= APIC_LVT_DM;
445 break;
446 }
447 val &= mask;
448 *lvtptr = val;
449 atomic_store_rel_32(&vlapic->lvt_last[idx], val);
450 }
451
452 static void
vlapic_refresh_lvts(struct vlapic * vlapic)453 vlapic_refresh_lvts(struct vlapic *vlapic)
454 {
455 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
456 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
457 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
458 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
459 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
460 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
461 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
462 }
463
464 static void
vlapic_mask_lvts(struct vlapic * vlapic)465 vlapic_mask_lvts(struct vlapic *vlapic)
466 {
467 struct LAPIC *lapic = vlapic->apic_page;
468
469 lapic->lvt_cmci |= APIC_LVT_M;
470 lapic->lvt_timer |= APIC_LVT_M;
471 lapic->lvt_thermal |= APIC_LVT_M;
472 lapic->lvt_pcint |= APIC_LVT_M;
473 lapic->lvt_lint0 |= APIC_LVT_M;
474 lapic->lvt_lint1 |= APIC_LVT_M;
475 lapic->lvt_error |= APIC_LVT_M;
476 vlapic_refresh_lvts(vlapic);
477 }
478
479 static int
vlapic_fire_lvt(struct vlapic * vlapic,uint_t lvt)480 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt)
481 {
482 uint32_t mode, reg, vec;
483 vcpu_notify_t notify;
484
485 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
486
487 if (reg & APIC_LVT_M)
488 return (0);
489 vec = reg & APIC_LVT_VECTOR;
490 mode = reg & APIC_LVT_DM;
491
492 switch (mode) {
493 case APIC_LVT_DM_FIXED:
494 if (vec < 16) {
495 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
496 lvt == APIC_LVT_ERROR);
497 return (0);
498 }
499 notify = vlapic_set_intr_ready(vlapic, vec, false);
500 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
501 break;
502 case APIC_LVT_DM_NMI:
503 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
504 break;
505 case APIC_LVT_DM_EXTINT:
506 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid);
507 break;
508 default:
509 // Other modes ignored
510 return (0);
511 }
512 return (1);
513 }
514
515 static uint_t
vlapic_active_isr(struct vlapic * vlapic)516 vlapic_active_isr(struct vlapic *vlapic)
517 {
518 int i;
519 uint32_t *isrp;
520
521 isrp = &vlapic->apic_page->isr7;
522
523 for (i = 7; i >= 0; i--, isrp -= 4) {
524 uint32_t reg = *isrp;
525
526 if (reg != 0) {
527 uint_t vec = (i * 32) + bsrl(reg);
528
529 if (vec < 16) {
530 /*
531 * Truncate the illegal low vectors to value of
532 * 0, indicating that no active ISR was found.
533 */
534 return (0);
535 }
536 return (vec);
537 }
538 }
539
540 return (0);
541 }
542
543 /*
544 * After events which might arbitrarily change the value of PPR, such as a TPR
545 * write or an EOI, calculate that new PPR value and store it in the APIC page.
546 */
547 static void
vlapic_update_ppr(struct vlapic * vlapic)548 vlapic_update_ppr(struct vlapic *vlapic)
549 {
550 int isrvec, tpr, ppr;
551
552 isrvec = vlapic_active_isr(vlapic);
553 tpr = vlapic->apic_page->tpr;
554
555 /*
556 * Algorithm adopted from section "Interrupt, Task and Processor
557 * Priority" in Intel Architecture Manual Vol 3a.
558 */
559 if (PRIO(tpr) >= PRIO(isrvec)) {
560 ppr = tpr;
561 } else {
562 ppr = PRIO(isrvec);
563 }
564
565 vlapic->apic_page->ppr = ppr;
566 }
567
568 /*
569 * When a vector is asserted in ISR as in-service, the PPR must be raised to the
570 * priority of that vector, as the vCPU would have been at a lower priority in
571 * order for the vector to be accepted.
572 */
573 static void
vlapic_raise_ppr(struct vlapic * vlapic,int vec)574 vlapic_raise_ppr(struct vlapic *vlapic, int vec)
575 {
576 struct LAPIC *lapic = vlapic->apic_page;
577 int ppr;
578
579 ppr = PRIO(vec);
580
581 lapic->ppr = ppr;
582 }
583
584 void
vlapic_sync_tpr(struct vlapic * vlapic)585 vlapic_sync_tpr(struct vlapic *vlapic)
586 {
587 vlapic_update_ppr(vlapic);
588 }
589
590 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
591
592 static void
vlapic_process_eoi(struct vlapic * vlapic)593 vlapic_process_eoi(struct vlapic *vlapic)
594 {
595 struct LAPIC *lapic = vlapic->apic_page;
596 uint32_t *isrptr, *tmrptr;
597 int i;
598 uint_t idx, bitpos, vector;
599
600 isrptr = &lapic->isr0;
601 tmrptr = &lapic->tmr0;
602
603 for (i = 7; i >= 0; i--) {
604 idx = i * 4;
605 if (isrptr[idx] != 0) {
606 bitpos = bsrl(isrptr[idx]);
607 vector = i * 32 + bitpos;
608
609 isrptr[idx] &= ~(1 << bitpos);
610 vlapic_update_ppr(vlapic);
611 if ((tmrptr[idx] & (1 << bitpos)) != 0) {
612 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
613 vector);
614 }
615 return;
616 }
617 }
618 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
619 }
620
621 static __inline int
vlapic_get_lvt_field(uint32_t lvt,uint32_t mask)622 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
623 {
624
625 return (lvt & mask);
626 }
627
628 static __inline int
vlapic_periodic_timer(struct vlapic * vlapic)629 vlapic_periodic_timer(struct vlapic *vlapic)
630 {
631 uint32_t lvt;
632
633 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
634
635 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
636 }
637
638 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
639
640 static void
vlapic_set_error(struct vlapic * vlapic,uint32_t mask,bool lvt_error)641 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error)
642 {
643
644 vlapic->esr_pending |= mask;
645
646 /*
647 * Avoid infinite recursion if the error LVT itself is configured with
648 * an illegal vector.
649 */
650 if (lvt_error)
651 return;
652
653 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) {
654 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
655 }
656 }
657
658 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
659
660 static void
vlapic_fire_timer(struct vlapic * vlapic)661 vlapic_fire_timer(struct vlapic *vlapic)
662 {
663 ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
664
665 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) {
666 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
667 }
668 }
669
670 static VMM_STAT(VLAPIC_INTR_CMC,
671 "corrected machine check interrupts generated by vlapic");
672
673 void
vlapic_fire_cmci(struct vlapic * vlapic)674 vlapic_fire_cmci(struct vlapic *vlapic)
675 {
676
677 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) {
678 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
679 }
680 }
681
682 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
683 "lvts triggered");
684
685 int
vlapic_trigger_lvt(struct vlapic * vlapic,int vector)686 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
687 {
688 if (!vlapic_enabled(vlapic)) {
689 /*
690 * When the local APIC is global/hardware disabled,
691 * LINT[1:0] pins are configured as INTR and NMI pins,
692 * respectively.
693 */
694 switch (vector) {
695 case APIC_LVT_LINT0:
696 (void) vm_inject_extint(vlapic->vm,
697 vlapic->vcpuid);
698 break;
699 case APIC_LVT_LINT1:
700 (void) vm_inject_nmi(vlapic->vm,
701 vlapic->vcpuid);
702 break;
703 default:
704 break;
705 }
706 return (0);
707 }
708
709 switch (vector) {
710 case APIC_LVT_LINT0:
711 case APIC_LVT_LINT1:
712 case APIC_LVT_TIMER:
713 case APIC_LVT_ERROR:
714 case APIC_LVT_PMC:
715 case APIC_LVT_THERMAL:
716 case APIC_LVT_CMCI:
717 if (vlapic_fire_lvt(vlapic, vector)) {
718 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
719 LVTS_TRIGGERRED, vector, 1);
720 }
721 break;
722 default:
723 return (EINVAL);
724 }
725 return (0);
726 }
727
728 static void
vlapic_callout_reset(struct vlapic * vlapic)729 vlapic_callout_reset(struct vlapic *vlapic)
730 {
731 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
732 vlapic_callout_handler, vlapic, C_ABSOLUTE);
733 }
734
735 static void
vlapic_callout_handler(void * arg)736 vlapic_callout_handler(void *arg)
737 {
738 struct vlapic *vlapic = arg;
739
740 VLAPIC_TIMER_LOCK(vlapic);
741 if (callout_pending(&vlapic->callout)) /* callout was reset */
742 goto done;
743
744 if (!callout_active(&vlapic->callout)) /* callout was stopped */
745 goto done;
746
747 callout_deactivate(&vlapic->callout);
748
749 vlapic_fire_timer(vlapic);
750
751 /*
752 * We should not end up here with timer_period == 0, but to prevent a
753 * runaway periodic timer, it is checked anyways.
754 */
755 if (vlapic_periodic_timer(vlapic) && vlapic->timer_period != 0) {
756 /*
757 * Compute the delta between when the timer was supposed to
758 * fire and the present time. We can depend on the fact that
759 * cyclics (which underly these callouts) will never be called
760 * early.
761 */
762 const hrtime_t now = gethrtime();
763 const hrtime_t delta = now - vlapic->timer_fire_when;
764 if (delta >= vlapic->timer_period) {
765 /*
766 * If we are so behind that we have missed an entire
767 * timer period, reset the time base rather than
768 * attempting to catch up.
769 */
770 vlapic->timer_fire_when = now + vlapic->timer_period;
771 } else {
772 vlapic->timer_fire_when += vlapic->timer_period;
773 }
774 vlapic_callout_reset(vlapic);
775 } else {
776 /*
777 * Clear the target time so that logic can distinguish from a
778 * timer which has fired (where the value is zero) from one
779 * which is held pending due to the instance being paused (where
780 * the value is non-zero, but the callout is not pending).
781 */
782 vlapic->timer_fire_when = 0;
783 }
784 done:
785 VLAPIC_TIMER_UNLOCK(vlapic);
786 }
787
788 void
vlapic_icrtmr_write_handler(struct vlapic * vlapic)789 vlapic_icrtmr_write_handler(struct vlapic *vlapic)
790 {
791 struct LAPIC *lapic = vlapic->apic_page;
792
793 VLAPIC_TIMER_LOCK(vlapic);
794 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
795 lapic->icr_timer);
796 if (vlapic->timer_period != 0) {
797 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
798 vlapic_callout_reset(vlapic);
799 } else {
800 vlapic->timer_fire_when = 0;
801 callout_stop(&vlapic->callout);
802 }
803 VLAPIC_TIMER_UNLOCK(vlapic);
804 }
805
806 /*
807 * This function populates 'dmask' with the set of vcpus that match the
808 * addressing specified by the (dest, phys, lowprio) tuple.
809 *
810 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
811 * or xAPIC (8-bit) destination field.
812 */
813 void
vlapic_calcdest(struct vm * vm,cpuset_t * dmask,uint32_t dest,bool phys,bool lowprio,bool x2apic_dest)814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
815 bool lowprio, bool x2apic_dest)
816 {
817 struct vlapic *vlapic;
818 uint32_t dfr, ldr, ldest, cluster;
819 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
820 cpuset_t amask;
821 int vcpuid;
822
823 if ((x2apic_dest && dest == 0xffffffff) ||
824 (!x2apic_dest && dest == 0xff)) {
825 /*
826 * Broadcast in both logical and physical modes.
827 */
828 *dmask = vm_active_cpus(vm);
829 return;
830 }
831
832 if (phys) {
833 /*
834 * Physical mode: destination is APIC ID.
835 */
836 CPU_ZERO(dmask);
837 vcpuid = vm_apicid2vcpuid(vm, dest);
838 amask = vm_active_cpus(vm);
839 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
840 CPU_SET(vcpuid, dmask);
841 } else {
842 /*
843 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
844 * bitmask. This model is only available in the xAPIC mode.
845 */
846 mda_flat_ldest = dest & 0xff;
847
848 /*
849 * In the "Cluster Model" the MDA is used to identify a
850 * specific cluster and a set of APICs in that cluster.
851 */
852 if (x2apic_dest) {
853 mda_cluster_id = dest >> 16;
854 mda_cluster_ldest = dest & 0xffff;
855 } else {
856 mda_cluster_id = (dest >> 4) & 0xf;
857 mda_cluster_ldest = dest & 0xf;
858 }
859
860 /*
861 * Logical mode: match each APIC that has a bit set
862 * in its LDR that matches a bit in the ldest.
863 */
864 CPU_ZERO(dmask);
865 amask = vm_active_cpus(vm);
866 while ((vcpuid = CPU_FFS(&amask)) != 0) {
867 vcpuid--;
868 CPU_CLR(vcpuid, &amask);
869
870 vlapic = vm_lapic(vm, vcpuid);
871 dfr = vlapic->apic_page->dfr;
872 ldr = vlapic->apic_page->ldr;
873
874 if ((dfr & APIC_DFR_MODEL_MASK) ==
875 APIC_DFR_MODEL_FLAT) {
876 ldest = ldr >> 24;
877 mda_ldest = mda_flat_ldest;
878 } else if ((dfr & APIC_DFR_MODEL_MASK) ==
879 APIC_DFR_MODEL_CLUSTER) {
880 if (vlapic_x2mode(vlapic)) {
881 cluster = ldr >> 16;
882 ldest = ldr & 0xffff;
883 } else {
884 cluster = ldr >> 28;
885 ldest = (ldr >> 24) & 0xf;
886 }
887 if (cluster != mda_cluster_id)
888 continue;
889 mda_ldest = mda_cluster_ldest;
890 } else {
891 /*
892 * Guest has configured a bad logical
893 * model for this vcpu - skip it.
894 */
895 continue;
896 }
897
898 if ((mda_ldest & ldest) != 0) {
899 CPU_SET(vcpuid, dmask);
900 if (lowprio)
901 break;
902 }
903 }
904 }
905 }
906
907 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu");
908 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu");
909
910 static void
vlapic_set_tpr(struct vlapic * vlapic,uint8_t val)911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
912 {
913 struct LAPIC *lapic = vlapic->apic_page;
914
915 if (lapic->tpr != val) {
916 lapic->tpr = val;
917 vlapic_update_ppr(vlapic);
918 }
919 }
920
921 void
vlapic_set_cr8(struct vlapic * vlapic,uint64_t val)922 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
923 {
924 uint8_t tpr;
925
926 if (val & ~0xf) {
927 vm_inject_gp(vlapic->vm, vlapic->vcpuid);
928 return;
929 }
930
931 tpr = val << 4;
932 vlapic_set_tpr(vlapic, tpr);
933 }
934
935 uint64_t
vlapic_get_cr8(const struct vlapic * vlapic)936 vlapic_get_cr8(const struct vlapic *vlapic)
937 {
938 const struct LAPIC *lapic = vlapic->apic_page;
939
940 return (lapic->tpr >> 4);
941 }
942
943 static bool
vlapic_is_icr_valid(uint64_t icrval)944 vlapic_is_icr_valid(uint64_t icrval)
945 {
946 uint32_t mode = icrval & APIC_DELMODE_MASK;
947 uint32_t level = icrval & APIC_LEVEL_MASK;
948 uint32_t trigger = icrval & APIC_TRIGMOD_MASK;
949 uint32_t shorthand = icrval & APIC_DEST_MASK;
950
951 switch (mode) {
952 case APIC_DELMODE_FIXED:
953 if (trigger == APIC_TRIGMOD_EDGE)
954 return (true);
955 /*
956 * AMD allows a level assert IPI and Intel converts a level
957 * assert IPI into an edge IPI.
958 */
959 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT)
960 return (true);
961 break;
962 case APIC_DELMODE_LOWPRIO:
963 case APIC_DELMODE_SMI:
964 case APIC_DELMODE_NMI:
965 case APIC_DELMODE_INIT:
966 if (trigger == APIC_TRIGMOD_EDGE &&
967 (shorthand == APIC_DEST_DESTFLD ||
968 shorthand == APIC_DEST_ALLESELF)) {
969 return (true);
970 }
971 /*
972 * AMD allows a level assert IPI and Intel converts a level
973 * assert IPI into an edge IPI.
974 */
975 if (trigger == APIC_TRIGMOD_LEVEL &&
976 level == APIC_LEVEL_ASSERT &&
977 (shorthand == APIC_DEST_DESTFLD ||
978 shorthand == APIC_DEST_ALLESELF)) {
979 return (true);
980 }
981 /*
982 * An level triggered deassert INIT is defined in the Intel
983 * Multiprocessor Specification and the Intel Software Developer
984 * Manual. Due to the MPS it's required to send a level assert
985 * INIT to a cpu and then a level deassert INIT. Some operating
986 * systems e.g. FreeBSD or Linux use that algorithm. According
987 * to the SDM a level deassert INIT is only supported by Pentium
988 * and P6 processors. It's always send to all cpus regardless of
989 * the destination or shorthand field. It resets the arbitration
990 * id register. This register is not software accessible and
991 * only required for the APIC bus arbitration. So, the level
992 * deassert INIT doesn't need any emulation and we should ignore
993 * it. The SDM also defines that newer processors don't support
994 * the level deassert INIT and it's not valid any more. As it's
995 * defined for older systems, it can't be invalid per se.
996 * Otherwise, backward compatibility would be broken. However,
997 * when returning false here, it'll be ignored which is the
998 * desired behaviour.
999 */
1000 if (mode == APIC_DELMODE_INIT &&
1001 trigger == APIC_TRIGMOD_LEVEL &&
1002 level == APIC_LEVEL_DEASSERT) {
1003 return (false);
1004 }
1005 break;
1006 case APIC_DELMODE_STARTUP:
1007 if (shorthand == APIC_DEST_DESTFLD ||
1008 shorthand == APIC_DEST_ALLESELF) {
1009 return (true);
1010 }
1011 break;
1012 case APIC_DELMODE_RR:
1013 /* Only available on AMD! */
1014 if (trigger == APIC_TRIGMOD_EDGE &&
1015 shorthand == APIC_DEST_DESTFLD) {
1016 return (true);
1017 }
1018 break;
1019 case APIC_DELMODE_RESV:
1020 return (false);
1021 default:
1022 panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode);
1023 }
1024
1025 return (false);
1026 }
1027
1028 void
vlapic_icrlo_write_handler(struct vlapic * vlapic)1029 vlapic_icrlo_write_handler(struct vlapic *vlapic)
1030 {
1031 int i;
1032 cpuset_t dmask;
1033 uint64_t icrval;
1034 uint32_t dest, vec, mode, dsh;
1035 struct LAPIC *lapic;
1036
1037 lapic = vlapic->apic_page;
1038 lapic->icr_lo &= ~APIC_DELSTAT_PEND;
1039 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1040
1041 /*
1042 * Ignore invalid combinations of the icr.
1043 */
1044 if (!vlapic_is_icr_valid(icrval))
1045 return;
1046
1047 if (vlapic_x2mode(vlapic))
1048 dest = icrval >> 32;
1049 else
1050 dest = icrval >> (32 + 24);
1051 vec = icrval & APIC_VECTOR_MASK;
1052 mode = icrval & APIC_DELMODE_MASK;
1053 dsh = icrval & APIC_DEST_MASK;
1054
1055 if (mode == APIC_DELMODE_FIXED && vec < 16) {
1056 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
1057 return;
1058 }
1059
1060 if (mode == APIC_DELMODE_INIT &&
1061 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
1062 /* No work required to deassert INIT */
1063 return;
1064 }
1065
1066 switch (dsh) {
1067 case APIC_DEST_DESTFLD:
1068 vlapic_calcdest(vlapic->vm, &dmask, dest,
1069 (icrval & APIC_DESTMODE_LOG) == 0, false,
1070 vlapic_x2mode(vlapic));
1071 break;
1072 case APIC_DEST_SELF:
1073 CPU_SETOF(vlapic->vcpuid, &dmask);
1074 break;
1075 case APIC_DEST_ALLISELF:
1076 dmask = vm_active_cpus(vlapic->vm);
1077 break;
1078 case APIC_DEST_ALLESELF:
1079 dmask = vm_active_cpus(vlapic->vm);
1080 CPU_CLR(vlapic->vcpuid, &dmask);
1081 break;
1082 default:
1083 /*
1084 * All possible delivery notations are covered above.
1085 * We should never end up here.
1086 */
1087 panic("unknown delivery shorthand: %x", dsh);
1088 }
1089
1090 while ((i = CPU_FFS(&dmask)) != 0) {
1091 i--;
1092 CPU_CLR(i, &dmask);
1093 switch (mode) {
1094 case APIC_DELMODE_FIXED:
1095 (void) lapic_intr_edge(vlapic->vm, i, vec);
1096 vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
1097 VLAPIC_IPI_SEND, 1);
1098 vmm_stat_incr(vlapic->vm, i,
1099 VLAPIC_IPI_RECV, 1);
1100 break;
1101 case APIC_DELMODE_NMI:
1102 (void) vm_inject_nmi(vlapic->vm, i);
1103 break;
1104 case APIC_DELMODE_INIT:
1105 (void) vm_inject_init(vlapic->vm, i);
1106 break;
1107 case APIC_DELMODE_STARTUP:
1108 (void) vm_inject_sipi(vlapic->vm, i, vec);
1109 break;
1110 case APIC_DELMODE_LOWPRIO:
1111 case APIC_DELMODE_SMI:
1112 default:
1113 /* Unhandled IPI modes (for now) */
1114 break;
1115 }
1116 }
1117 }
1118
1119 void
vlapic_self_ipi_handler(struct vlapic * vlapic,uint32_t val)1120 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val)
1121 {
1122 const int vec = val & 0xff;
1123
1124 /* self-IPI is only exposed via x2APIC */
1125 ASSERT(vlapic_x2mode(vlapic));
1126
1127 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1128 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1);
1129 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1);
1130 }
1131
1132 int
vlapic_pending_intr(struct vlapic * vlapic,int * vecptr)1133 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1134 {
1135 struct LAPIC *lapic = vlapic->apic_page;
1136 int idx, i, bitpos, vector;
1137 uint32_t *irrptr, val;
1138
1139 if (vlapic->ops.sync_state) {
1140 (*vlapic->ops.sync_state)(vlapic);
1141 }
1142
1143 irrptr = &lapic->irr0;
1144
1145 for (i = 7; i >= 0; i--) {
1146 idx = i * 4;
1147 val = atomic_load_acq_int(&irrptr[idx]);
1148 bitpos = fls(val);
1149 if (bitpos != 0) {
1150 vector = i * 32 + (bitpos - 1);
1151 if (PRIO(vector) > PRIO(lapic->ppr)) {
1152 if (vecptr != NULL)
1153 *vecptr = vector;
1154 return (1);
1155 } else
1156 break;
1157 }
1158 }
1159 return (0);
1160 }
1161
1162 void
vlapic_intr_accepted(struct vlapic * vlapic,int vector)1163 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1164 {
1165 struct LAPIC *lapic = vlapic->apic_page;
1166 uint32_t *irrptr, *isrptr;
1167 int idx;
1168
1169 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
1170
1171 if (vlapic->ops.intr_accepted)
1172 return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1173
1174 /*
1175 * clear the ready bit for vector being accepted in irr
1176 * and set the vector as in service in isr.
1177 */
1178 idx = (vector / 32) * 4;
1179
1180 irrptr = &lapic->irr0;
1181 atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1182
1183 isrptr = &lapic->isr0;
1184 isrptr[idx] |= 1 << (vector % 32);
1185
1186 /*
1187 * The only way a fresh vector could be accepted into ISR is if it was
1188 * of a higher priority than the current PPR. With that vector now
1189 * in-service, the PPR must be raised.
1190 */
1191 vlapic_raise_ppr(vlapic, vector);
1192 }
1193
1194 void
vlapic_svr_write_handler(struct vlapic * vlapic)1195 vlapic_svr_write_handler(struct vlapic *vlapic)
1196 {
1197 struct LAPIC *lapic;
1198 uint32_t old, new, changed;
1199
1200 lapic = vlapic->apic_page;
1201
1202 new = lapic->svr;
1203 old = vlapic->svr_last;
1204 vlapic->svr_last = new;
1205
1206 changed = old ^ new;
1207 if ((changed & APIC_SVR_ENABLE) != 0) {
1208 if ((new & APIC_SVR_ENABLE) == 0) {
1209 /*
1210 * The apic is now disabled so stop the apic timer
1211 * and mask all the LVT entries.
1212 */
1213 VLAPIC_TIMER_LOCK(vlapic);
1214 callout_stop(&vlapic->callout);
1215 VLAPIC_TIMER_UNLOCK(vlapic);
1216 vlapic_mask_lvts(vlapic);
1217 } else {
1218 /*
1219 * The apic is now enabled so restart the apic timer
1220 * if it is configured in periodic mode.
1221 */
1222 if (vlapic_periodic_timer(vlapic))
1223 vlapic_icrtmr_write_handler(vlapic);
1224 }
1225 }
1226 }
1227
1228 static bool
vlapic_read(struct vlapic * vlapic,uint16_t offset,uint32_t * outp)1229 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp)
1230 {
1231 struct LAPIC *lapic = vlapic->apic_page;
1232 uint32_t *reg;
1233 int i;
1234
1235 ASSERT3U(offset & 0x3, ==, 0);
1236 ASSERT3U(offset, <, PAGESIZE);
1237 ASSERT3P(outp, !=, NULL);
1238
1239 uint32_t data = 0;
1240 switch (offset) {
1241 case APIC_OFFSET_ID:
1242 data = lapic->id;
1243 break;
1244 case APIC_OFFSET_VER:
1245 data = lapic->version;
1246 break;
1247 case APIC_OFFSET_TPR:
1248 data = lapic->tpr;
1249 break;
1250 case APIC_OFFSET_APR:
1251 data = lapic->apr;
1252 break;
1253 case APIC_OFFSET_PPR:
1254 data = lapic->ppr;
1255 break;
1256 case APIC_OFFSET_LDR:
1257 data = lapic->ldr;
1258 break;
1259 case APIC_OFFSET_DFR:
1260 data = lapic->dfr;
1261 break;
1262 case APIC_OFFSET_SVR:
1263 data = lapic->svr;
1264 break;
1265 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1266 i = (offset - APIC_OFFSET_ISR0) >> 2;
1267 reg = &lapic->isr0;
1268 data = *(reg + i);
1269 break;
1270 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1271 i = (offset - APIC_OFFSET_TMR0) >> 2;
1272 reg = &lapic->tmr0;
1273 data = *(reg + i);
1274 break;
1275 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1276 i = (offset - APIC_OFFSET_IRR0) >> 2;
1277 reg = &lapic->irr0;
1278 data = atomic_load_acq_int(reg + i);
1279 break;
1280 case APIC_OFFSET_ESR:
1281 data = lapic->esr;
1282 break;
1283 case APIC_OFFSET_ICR_LOW:
1284 data = lapic->icr_lo;
1285 break;
1286 case APIC_OFFSET_ICR_HI:
1287 data = lapic->icr_hi;
1288 break;
1289 case APIC_OFFSET_CMCI_LVT:
1290 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1291 data = vlapic_get_lvt(vlapic, offset);
1292 #ifdef INVARIANTS
1293 reg = vlapic_get_lvtptr(vlapic, offset);
1294 ASSERT3U(data, ==, *reg);
1295 #endif
1296 break;
1297 case APIC_OFFSET_TIMER_ICR:
1298 data = lapic->icr_timer;
1299 break;
1300 case APIC_OFFSET_TIMER_CCR:
1301 data = vlapic_get_ccr(vlapic);
1302 break;
1303 case APIC_OFFSET_TIMER_DCR:
1304 data = lapic->dcr_timer;
1305 break;
1306 case APIC_OFFSET_RRR:
1307 data = 0;
1308 break;
1309
1310 case APIC_OFFSET_SELF_IPI:
1311 case APIC_OFFSET_EOI:
1312 /* Write-only register */
1313 *outp = 0;
1314 return (false);
1315
1316 default:
1317 /* Invalid register */
1318 *outp = 0;
1319 return (false);
1320 }
1321
1322 *outp = data;
1323 return (true);
1324 }
1325
1326 static bool
vlapic_write(struct vlapic * vlapic,uint16_t offset,uint32_t data)1327 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data)
1328 {
1329 struct LAPIC *lapic = vlapic->apic_page;
1330 uint32_t *regptr;
1331
1332 ASSERT3U(offset & 0xf, ==, 0);
1333 ASSERT3U(offset, <, PAGESIZE);
1334
1335 switch (offset) {
1336 case APIC_OFFSET_ID:
1337 lapic->id = data;
1338 vlapic_id_write_handler(vlapic);
1339 break;
1340 case APIC_OFFSET_TPR:
1341 vlapic_set_tpr(vlapic, data & 0xff);
1342 break;
1343 case APIC_OFFSET_EOI:
1344 vlapic_process_eoi(vlapic);
1345 break;
1346 case APIC_OFFSET_LDR:
1347 lapic->ldr = data;
1348 vlapic_ldr_write_handler(vlapic);
1349 break;
1350 case APIC_OFFSET_DFR:
1351 lapic->dfr = data;
1352 vlapic_dfr_write_handler(vlapic);
1353 break;
1354 case APIC_OFFSET_SVR:
1355 lapic->svr = data;
1356 vlapic_svr_write_handler(vlapic);
1357 break;
1358 case APIC_OFFSET_ICR_LOW:
1359 lapic->icr_lo = data;
1360 vlapic_icrlo_write_handler(vlapic);
1361 break;
1362 case APIC_OFFSET_ICR_HI:
1363 lapic->icr_hi = data;
1364 break;
1365 case APIC_OFFSET_CMCI_LVT:
1366 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1367 regptr = vlapic_get_lvtptr(vlapic, offset);
1368 *regptr = data;
1369 vlapic_lvt_write_handler(vlapic, offset);
1370 break;
1371 case APIC_OFFSET_TIMER_ICR:
1372 lapic->icr_timer = data;
1373 vlapic_icrtmr_write_handler(vlapic);
1374 break;
1375
1376 case APIC_OFFSET_TIMER_DCR:
1377 lapic->dcr_timer = data;
1378 vlapic_dcr_write_handler(vlapic);
1379 break;
1380
1381 case APIC_OFFSET_ESR:
1382 vlapic_esr_write_handler(vlapic);
1383 break;
1384
1385 case APIC_OFFSET_SELF_IPI:
1386 if (vlapic_x2mode(vlapic))
1387 vlapic_self_ipi_handler(vlapic, data);
1388 break;
1389
1390 case APIC_OFFSET_VER:
1391 case APIC_OFFSET_APR:
1392 case APIC_OFFSET_PPR:
1393 case APIC_OFFSET_RRR:
1394 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1395 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1396 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1397 case APIC_OFFSET_TIMER_CCR:
1398 /* Read-only register */
1399 return (false);
1400
1401 default:
1402 /* Invalid register */
1403 return (false);
1404 }
1405
1406 return (true);
1407 }
1408
1409 void
vlapic_reset(struct vlapic * vlapic)1410 vlapic_reset(struct vlapic *vlapic)
1411 {
1412 struct LAPIC *lapic = vlapic->apic_page;
1413 uint32_t *isrptr, *tmrptr, *irrptr;
1414
1415 /* Reset any timer-related state first */
1416 VLAPIC_TIMER_LOCK(vlapic);
1417 callout_stop(&vlapic->callout);
1418 vlapic->timer_fire_when = 0;
1419 lapic->icr_timer = 0;
1420 lapic->ccr_timer = 0;
1421 lapic->dcr_timer = 0;
1422 vlapic_update_divider(vlapic);
1423 VLAPIC_TIMER_UNLOCK(vlapic);
1424
1425 /*
1426 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
1427 * it is not leftover after the reset. This is performed after the APIC
1428 * timer has been stopped, in case it happened to fire just prior to
1429 * being deactivated.
1430 */
1431 if (vlapic->ops.sync_state) {
1432 (*vlapic->ops.sync_state)(vlapic);
1433 }
1434
1435 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1436 if (vlapic->vcpuid == 0)
1437 vlapic->msr_apicbase |= APICBASE_BSP;
1438
1439 lapic->id = vlapic_get_id(vlapic);
1440 lapic->version = VLAPIC_VERSION;
1441 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1442
1443 lapic->tpr = 0;
1444 lapic->apr = 0;
1445 lapic->ppr = 0;
1446
1447 lapic->eoi = 0;
1448 lapic->ldr = 0;
1449 lapic->dfr = 0xffffffff;
1450 lapic->svr = APIC_SVR_VECTOR;
1451 vlapic->svr_last = lapic->svr;
1452
1453 isrptr = &lapic->isr0;
1454 tmrptr = &lapic->tmr0;
1455 irrptr = &lapic->irr0;
1456 for (uint_t i = 0; i < 8; i++) {
1457 atomic_store_rel_int(&isrptr[i * 4], 0);
1458 atomic_store_rel_int(&tmrptr[i * 4], 0);
1459 atomic_store_rel_int(&irrptr[i * 4], 0);
1460 }
1461
1462 lapic->esr = 0;
1463 vlapic->esr_pending = 0;
1464 lapic->icr_lo = 0;
1465 lapic->icr_hi = 0;
1466
1467 lapic->lvt_cmci = 0;
1468 lapic->lvt_timer = 0;
1469 lapic->lvt_thermal = 0;
1470 lapic->lvt_pcint = 0;
1471 lapic->lvt_lint0 = 0;
1472 lapic->lvt_lint1 = 0;
1473 lapic->lvt_error = 0;
1474 vlapic_mask_lvts(vlapic);
1475 }
1476
1477 void
vlapic_init(struct vlapic * vlapic)1478 vlapic_init(struct vlapic *vlapic)
1479 {
1480 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1481 KASSERT(vlapic->vcpuid >= 0 &&
1482 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1483 ("vlapic_init: vcpuid is not initialized"));
1484 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1485 "initialized"));
1486
1487 /*
1488 * If the vlapic is configured in x2apic mode then it will be
1489 * accessed in the critical section via the MSR emulation code.
1490 *
1491 * Therefore the timer mutex must be a spinlock because blockable
1492 * mutexes cannot be acquired in a critical section.
1493 */
1494 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL);
1495 callout_init(&vlapic->callout, 1);
1496
1497 vlapic_reset(vlapic);
1498 }
1499
1500 void
vlapic_cleanup(struct vlapic * vlapic)1501 vlapic_cleanup(struct vlapic *vlapic)
1502 {
1503 callout_drain(&vlapic->callout);
1504 mutex_destroy(&vlapic->timer_lock);
1505 }
1506
1507 int
vlapic_mmio_read(struct vlapic * vlapic,uint64_t gpa,uint64_t * valp,uint_t size)1508 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp,
1509 uint_t size)
1510 {
1511 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1512 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1513
1514 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1515 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1516 *valp = UINT64_MAX;
1517 return (0);
1518 }
1519
1520 const uint16_t off = gpa - DEFAULT_APIC_BASE;
1521 uint32_t raw = 0;
1522 (void) vlapic_read(vlapic, off & ~0xf, &raw);
1523
1524 /* Shift and mask reads which are small and/or unaligned */
1525 const uint8_t align = off & 0xf;
1526 if (align < 4) {
1527 *valp = (uint64_t)raw << (align * 8);
1528 } else {
1529 *valp = 0;
1530 }
1531
1532 return (0);
1533 }
1534
1535 int
vlapic_mmio_write(struct vlapic * vlapic,uint64_t gpa,uint64_t val,uint_t size)1536 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val,
1537 uint_t size)
1538 {
1539 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1540 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1541
1542 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1543 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1544 return (0);
1545 }
1546
1547 const uint16_t off = gpa - DEFAULT_APIC_BASE;
1548 /* Ignore writes which are not 32-bits wide and 16-byte aligned */
1549 if ((off & 0xf) != 0 || size != 4) {
1550 return (0);
1551 }
1552
1553 (void) vlapic_write(vlapic, off, (uint32_t)val);
1554 return (0);
1555 }
1556
1557 /* Should attempts to change the APIC base address be rejected with a #GP? */
1558 int vlapic_gp_on_addr_change = 1;
1559
1560 static vm_msr_result_t
vlapic_set_apicbase(struct vlapic * vlapic,uint64_t val)1561 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
1562 {
1563 const uint64_t diff = vlapic->msr_apicbase ^ val;
1564
1565 /*
1566 * Until the LAPIC emulation for switching between xAPIC and x2APIC
1567 * modes is more polished, it will remain off-limits from being altered
1568 * by the guest.
1569 */
1570 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC |
1571 APICBASE_BSP;
1572 if ((diff & reserved_bits) != 0) {
1573 return (VMR_GP);
1574 }
1575
1576 /* We do not presently allow the LAPIC access address to be modified. */
1577 if ((diff & APICBASE_ADDR_MASK) != 0) {
1578 /*
1579 * Explicitly rebuffing such requests with a #GP is the most
1580 * straightforward way to handle the situation, but certain
1581 * consumers (such as the KVM unit tests) may balk at the
1582 * otherwise unexpected exception.
1583 */
1584 if (vlapic_gp_on_addr_change) {
1585 return (VMR_GP);
1586 }
1587
1588 /* If silence is required, just ignore the address change. */
1589 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE;
1590 }
1591
1592 vlapic->msr_apicbase = val;
1593 return (VMR_OK);
1594 }
1595
1596 static __inline uint16_t
vlapic_msr_to_regoff(uint32_t msr)1597 vlapic_msr_to_regoff(uint32_t msr)
1598 {
1599 ASSERT3U(msr, >=, MSR_APIC_000);
1600 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100));
1601
1602 return ((msr - MSR_APIC_000) << 4);
1603 }
1604
1605 bool
vlapic_owned_msr(uint32_t msr)1606 vlapic_owned_msr(uint32_t msr)
1607 {
1608 if (msr == MSR_APICBASE) {
1609 return (true);
1610 }
1611 if (msr >= MSR_APIC_000 &&
1612 msr < (MSR_APIC_000 + 0x100)) {
1613 return (true);
1614 }
1615 return (false);
1616 }
1617
1618 vm_msr_result_t
vlapic_rdmsr(struct vlapic * vlapic,uint32_t msr,uint64_t * valp)1619 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp)
1620 {
1621 ASSERT(vlapic_owned_msr(msr));
1622 ASSERT3P(valp, !=, NULL);
1623
1624 if (msr == MSR_APICBASE) {
1625 *valp = vlapic->msr_apicbase;
1626 return (VMR_OK);
1627 }
1628
1629 /* #GP for x2APIC MSR accesses in xAPIC mode */
1630 if (!vlapic_x2mode(vlapic)) {
1631 return (VMR_GP);
1632 }
1633
1634 uint64_t out = 0;
1635 const uint16_t reg = vlapic_msr_to_regoff(msr);
1636 switch (reg) {
1637 case APIC_OFFSET_ICR_LOW: {
1638 /* Read from ICR register gets entire (64-bit) value */
1639 uint32_t low = 0, high = 0;
1640 bool valid;
1641
1642 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high);
1643 VERIFY(valid);
1644 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low);
1645 VERIFY(valid);
1646
1647 *valp = ((uint64_t)high << 32) | low;
1648 return (VMR_OK);
1649 }
1650 case APIC_OFFSET_ICR_HI:
1651 /* Already covered by ICR_LOW */
1652 return (VMR_GP);
1653 default:
1654 break;
1655 }
1656 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) {
1657 return (VMR_GP);
1658 }
1659 *valp = out;
1660 return (VMR_OK);
1661 }
1662
1663 vm_msr_result_t
vlapic_wrmsr(struct vlapic * vlapic,uint32_t msr,uint64_t val)1664 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val)
1665 {
1666 ASSERT(vlapic_owned_msr(msr));
1667
1668 if (msr == MSR_APICBASE) {
1669 return (vlapic_set_apicbase(vlapic, val));
1670 }
1671
1672 /* #GP for x2APIC MSR accesses in xAPIC mode */
1673 if (!vlapic_x2mode(vlapic)) {
1674 return (VMR_GP);
1675 }
1676
1677 const uint16_t reg = vlapic_msr_to_regoff(msr);
1678 switch (reg) {
1679 case APIC_OFFSET_ICR_LOW: {
1680 /* Write to ICR register sets entire (64-bit) value */
1681 bool valid;
1682
1683 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32);
1684 VERIFY(valid);
1685 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val);
1686 VERIFY(valid);
1687 return (VMR_OK);
1688 }
1689 case APIC_OFFSET_ICR_HI:
1690 /* Already covered by ICR_LOW */
1691 return (VMR_GP);
1692 case APIC_OFFSET_ESR:
1693 /* Only 0 may be written from x2APIC mode */
1694 if (val != 0) {
1695 return (VMR_GP);
1696 }
1697 break;
1698 default:
1699 break;
1700 }
1701 if (!vlapic_write(vlapic, reg, val)) {
1702 return (VMR_GP);
1703 }
1704 return (VMR_OK);
1705 }
1706
1707 void
vlapic_set_x2apic_state(struct vm * vm,int vcpuid,enum x2apic_state state)1708 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1709 {
1710 struct vlapic *vlapic;
1711 struct LAPIC *lapic;
1712
1713 vlapic = vm_lapic(vm, vcpuid);
1714
1715 if (state == X2APIC_DISABLED)
1716 vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1717 else
1718 vlapic->msr_apicbase |= APICBASE_X2APIC;
1719
1720 /*
1721 * Reset the local APIC registers whose values are mode-dependent.
1722 *
1723 * XXX this works because the APIC mode can be changed only at vcpu
1724 * initialization time.
1725 */
1726 lapic = vlapic->apic_page;
1727 lapic->id = vlapic_get_id(vlapic);
1728 if (vlapic_x2mode(vlapic)) {
1729 lapic->ldr = x2apic_ldr(vlapic);
1730 lapic->dfr = 0;
1731 } else {
1732 lapic->ldr = 0;
1733 lapic->dfr = 0xffffffff;
1734 }
1735
1736 if (state == X2APIC_ENABLED) {
1737 if (vlapic->ops.enable_x2apic_mode)
1738 (*vlapic->ops.enable_x2apic_mode)(vlapic);
1739 }
1740 }
1741
1742 void
vlapic_deliver_intr(struct vm * vm,bool level,uint32_t dest,bool phys,int delmode,int vec)1743 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1744 int delmode, int vec)
1745 {
1746 bool lowprio;
1747 int vcpuid;
1748 cpuset_t dmask;
1749
1750 if (delmode != IOART_DELFIXED &&
1751 delmode != IOART_DELLOPRI &&
1752 delmode != IOART_DELEXINT) {
1753 /* Invalid delivery mode */
1754 return;
1755 }
1756 lowprio = (delmode == IOART_DELLOPRI);
1757
1758 /*
1759 * We don't provide any virtual interrupt redirection hardware so
1760 * all interrupts originating from the ioapic or MSI specify the
1761 * 'dest' in the legacy xAPIC format.
1762 */
1763 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1764
1765 while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1766 vcpuid--;
1767 CPU_CLR(vcpuid, &dmask);
1768 if (delmode == IOART_DELEXINT) {
1769 (void) vm_inject_extint(vm, vcpuid);
1770 } else {
1771 (void) lapic_set_intr(vm, vcpuid, vec, level);
1772 }
1773 }
1774 }
1775
1776 void
vlapic_post_intr(struct vlapic * vlapic,int hostcpu)1777 vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
1778 {
1779 /*
1780 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1781 *
1782 * This is done by leveraging features like Posted Interrupts (Intel)
1783 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1784 *
1785 * If neither of these features are available then fallback to
1786 * sending an IPI to 'hostcpu'.
1787 */
1788 if (vlapic->ops.post_intr)
1789 (*vlapic->ops.post_intr)(vlapic, hostcpu);
1790 else
1791 poke_cpu(hostcpu);
1792 }
1793
1794 void
vlapic_localize_resources(struct vlapic * vlapic)1795 vlapic_localize_resources(struct vlapic *vlapic)
1796 {
1797 vmm_glue_callout_localize(&vlapic->callout);
1798 }
1799
1800 void
vlapic_pause(struct vlapic * vlapic)1801 vlapic_pause(struct vlapic *vlapic)
1802 {
1803 VLAPIC_TIMER_LOCK(vlapic);
1804 callout_stop(&vlapic->callout);
1805 VLAPIC_TIMER_UNLOCK(vlapic);
1806
1807 }
1808
1809 void
vlapic_resume(struct vlapic * vlapic)1810 vlapic_resume(struct vlapic *vlapic)
1811 {
1812 VLAPIC_TIMER_LOCK(vlapic);
1813 if (vlapic->timer_fire_when != 0) {
1814 vlapic_callout_reset(vlapic);
1815 }
1816 VLAPIC_TIMER_UNLOCK(vlapic);
1817 }
1818
1819 static int
vlapic_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1820 vlapic_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1821 {
1822 VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1823 VERIFY3U(req->vdr_version, ==, 1);
1824 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1825
1826 struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1827 struct vdi_lapic_v1 *out = req->vdr_data;
1828
1829 VLAPIC_TIMER_LOCK(vlapic);
1830
1831 if (vlapic->ops.sync_state) {
1832 (*vlapic->ops.sync_state)(vlapic);
1833 }
1834
1835 out->vl_msr_apicbase = vlapic->msr_apicbase;
1836 out->vl_esr_pending = vlapic->esr_pending;
1837 if (vlapic->timer_fire_when != 0) {
1838 out->vl_timer_target =
1839 vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
1840 } else {
1841 out->vl_timer_target = 0;
1842 }
1843
1844 const struct LAPIC *lapic = vlapic->apic_page;
1845 struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
1846
1847 /*
1848 * While this might appear, at first glance, to be missing some fields,
1849 * they are intentionally omitted:
1850 * - PPR: its contents are always generated at runtime
1851 * - EOI: write-only, and contents are ignored after handling
1852 * - RRD: (aka RRR) read-only and always 0
1853 * - CCR: calculated from underlying timer data
1854 */
1855 out_page->vlp_id = lapic->id;
1856 out_page->vlp_version = lapic->version;
1857 out_page->vlp_tpr = lapic->tpr;
1858 out_page->vlp_apr = lapic->apr;
1859 out_page->vlp_ldr = lapic->ldr;
1860 out_page->vlp_dfr = lapic->dfr;
1861 out_page->vlp_svr = lapic->svr;
1862 out_page->vlp_esr = lapic->esr;
1863 out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1864 out_page->vlp_icr_timer = lapic->icr_timer;
1865 out_page->vlp_dcr_timer = lapic->dcr_timer;
1866
1867 out_page->vlp_lvt_cmci = lapic->lvt_cmci;
1868 out_page->vlp_lvt_timer = lapic->lvt_timer;
1869 out_page->vlp_lvt_thermal = lapic->lvt_thermal;
1870 out_page->vlp_lvt_pcint = lapic->lvt_pcint;
1871 out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
1872 out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
1873 out_page->vlp_lvt_error = lapic->lvt_error;
1874
1875 const uint32_t *isrptr = &lapic->isr0;
1876 const uint32_t *tmrptr = &lapic->tmr0;
1877 const uint32_t *irrptr = &lapic->irr0;
1878 for (uint_t i = 0; i < 8; i++) {
1879 out_page->vlp_isr[i] = isrptr[i * 4];
1880 out_page->vlp_tmr[i] = tmrptr[i * 4];
1881 out_page->vlp_irr[i] = irrptr[i * 4];
1882 }
1883 VLAPIC_TIMER_UNLOCK(vlapic);
1884
1885 return (0);
1886 }
1887
1888 static uint8_t
popc8(uint8_t val)1889 popc8(uint8_t val)
1890 {
1891 uint8_t cnt;
1892
1893 for (cnt = 0; val != 0; val &= (val - 1)) {
1894 cnt++;
1895 }
1896 return (cnt);
1897 }
1898
1899 /*
1900 * Descriptions for the various failures which can occur when validating
1901 * to-be-written vlapic state.
1902 */
1903 enum vlapic_validation_error {
1904 VVE_OK,
1905 VVE_BAD_ID,
1906 VVE_BAD_VERSION,
1907 VVE_BAD_MSR_BASE,
1908 VVE_BAD_ESR,
1909 VVE_BAD_TPR,
1910 VVE_LOW_VECTOR,
1911 VVE_ISR_PRIORITY,
1912 VVE_TIMER_MISMATCH,
1913 };
1914
1915 static enum vlapic_validation_error
vlapic_data_validate(const struct vlapic * vlapic,const vmm_data_req_t * req)1916 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
1917 {
1918 ASSERT(req->vdr_version == 1 &&
1919 req->vdr_len >= sizeof (struct vdi_lapic_v1));
1920 const struct vdi_lapic_v1 *src = req->vdr_data;
1921
1922 if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
1923 (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
1924 return (VVE_BAD_ESR);
1925 }
1926
1927 /* Use the same restrictions as the wrmsr accessor for now */
1928 const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
1929 APICBASE_BSP;
1930 const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
1931 if ((diff & apicbase_reserved) != 0) {
1932 return (VVE_BAD_MSR_BASE);
1933 }
1934
1935 const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1936 /*
1937 * Demand that ID match for now. This can be further updated when some
1938 * of the x2apic handling is improved.
1939 */
1940 if (page->vlp_id != vlapic_get_id(vlapic)) {
1941 return (VVE_BAD_ID);
1942 }
1943
1944 if (page->vlp_version != vlapic->apic_page->version) {
1945 return (VVE_BAD_VERSION);
1946 }
1947
1948 if (page->vlp_tpr > 0xff) {
1949 return (VVE_BAD_TPR);
1950 }
1951
1952 /* Vectors 0-15 are not expected to be handled by the lapic */
1953 if ((page->vlp_isr[0] & 0xffff) != 0 ||
1954 (page->vlp_irr[0] & 0xffff) != 0 ||
1955 (page->vlp_tmr[0] & 0xffff) != 0) {
1956 return (VVE_LOW_VECTOR);
1957 }
1958
1959 /* Only one interrupt should be in-service for each priority level */
1960 for (uint_t i = 0; i < 8; i++) {
1961 if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
1962 popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
1963 popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
1964 popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
1965 return (VVE_ISR_PRIORITY);
1966 }
1967 }
1968
1969 /* If icr_timer is zero, then a scheduled timer does not make sense */
1970 if (page->vlp_icr_timer == 0 && src->vl_timer_target != 0) {
1971 return (VVE_TIMER_MISMATCH);
1972 }
1973
1974 return (VVE_OK);
1975 }
1976
1977 static int
vlapic_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1978 vlapic_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1979 {
1980 VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1981 VERIFY3U(req->vdr_version, ==, 1);
1982 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1983
1984 struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1985 if (vlapic_data_validate(vlapic, req) != VVE_OK) {
1986 return (EINVAL);
1987 }
1988 const struct vdi_lapic_v1 *src = req->vdr_data;
1989 const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1990 struct LAPIC *lapic = vlapic->apic_page;
1991
1992 VLAPIC_TIMER_LOCK(vlapic);
1993
1994 /* Already ensured by vlapic_data_validate() */
1995 VERIFY3U(page->vlp_version, ==, lapic->version);
1996
1997 vlapic->msr_apicbase = src->vl_msr_apicbase;
1998 vlapic->esr_pending = src->vl_esr_pending;
1999
2000 lapic->tpr = page->vlp_tpr;
2001 lapic->apr = page->vlp_apr;
2002 lapic->ldr = page->vlp_ldr;
2003 lapic->dfr = page->vlp_dfr;
2004 lapic->svr = page->vlp_svr;
2005 lapic->esr = page->vlp_esr;
2006 lapic->icr_lo = (uint32_t)page->vlp_icr;
2007 lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
2008
2009 lapic->icr_timer = page->vlp_icr_timer;
2010 lapic->dcr_timer = page->vlp_dcr_timer;
2011 vlapic_update_divider(vlapic);
2012
2013 /* cleanse LDR/DFR */
2014 vlapic_ldr_write_handler(vlapic);
2015 vlapic_dfr_write_handler(vlapic);
2016
2017 lapic->lvt_cmci = page->vlp_lvt_cmci;
2018 lapic->lvt_timer = page->vlp_lvt_timer;
2019 lapic->lvt_thermal = page->vlp_lvt_thermal;
2020 lapic->lvt_pcint = page->vlp_lvt_pcint;
2021 lapic->lvt_lint0 = page->vlp_lvt_lint0;
2022 lapic->lvt_lint1 = page->vlp_lvt_lint1;
2023 lapic->lvt_error = page->vlp_lvt_error;
2024 /* cleanse LVTs */
2025 vlapic_refresh_lvts(vlapic);
2026
2027 uint32_t *isrptr = &lapic->isr0;
2028 uint32_t *tmrptr = &lapic->tmr0;
2029 uint32_t *irrptr = &lapic->irr0;
2030 for (uint_t i = 0; i < 8; i++) {
2031 isrptr[i * 4] = page->vlp_isr[i];
2032 tmrptr[i * 4] = page->vlp_tmr[i];
2033 irrptr[i * 4] = page->vlp_irr[i];
2034 }
2035
2036 if (src->vl_timer_target != 0) {
2037 vlapic->timer_fire_when =
2038 vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
2039
2040 /*
2041 * Check to see if timer expiration would result computed CCR
2042 * values in excess of what is configured in ICR/DCR.
2043 */
2044 const hrtime_t now = gethrtime();
2045 if (vlapic->timer_fire_when > now) {
2046 const uint32_t ccr = hrt_freq_count(
2047 vlapic->timer_fire_when - now,
2048 vlapic->timer_cur_freq);
2049
2050 /*
2051 * Until we have a richer event/logging system
2052 * available, just note such an overage as a stat.
2053 */
2054 if (ccr > lapic->icr_timer) {
2055 vlapic->stats.vs_import_timer_overage++;
2056 }
2057 }
2058
2059 if (!vm_is_paused(vlapic->vm)) {
2060 vlapic_callout_reset(vlapic);
2061 }
2062 } else {
2063 vlapic->timer_fire_when = 0;
2064 }
2065
2066 if (vlapic->ops.sync_state) {
2067 (*vlapic->ops.sync_state)(vlapic);
2068 }
2069 VLAPIC_TIMER_UNLOCK(vlapic);
2070
2071 return (0);
2072 }
2073
2074 static const vmm_data_version_entry_t lapic_v1 = {
2075 .vdve_class = VDC_LAPIC,
2076 .vdve_version = 1,
2077 .vdve_len_expect = sizeof (struct vdi_lapic_v1),
2078 .vdve_vcpu_readf = vlapic_data_read,
2079 .vdve_vcpu_writef = vlapic_data_write,
2080 };
2081 VMM_DATA_VERSION(lapic_v1);
2082