1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 * Copyright (c) 2019 Joyent, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 /*
30 * This file and its contents are supplied under the terms of the
31 * Common Development and Distribution License ("CDDL"), version 1.0.
32 * You may only use this file in accordance with the terms of version
33 * 1.0 of the CDDL.
34 *
35 * A full copy of the text of the CDDL should have accompanied this
36 * source. A copy of the CDDL is also available via the Internet at
37 * http://www.illumos.org/license/CDDL.
38 */
39 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
40
41 /*
42 * Copyright 2014 Pluribus Networks Inc.
43 * Copyright 2018 Joyent, Inc.
44 * Copyright 2024 Oxide Computer Company
45 */
46
47 #include <sys/cdefs.h>
48
49 #include <sys/param.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/mutex.h>
53 #include <sys/systm.h>
54 #include <sys/cpuset.h>
55
56 #include <x86/specialreg.h>
57 #include <x86/apicreg.h>
58
59 #include <machine/clock.h>
60
61 #include <machine/vmm.h>
62 #include <sys/vmm_kernel.h>
63
64 #include "vmm_lapic.h"
65 #include "vmm_stat.h"
66
67 #include "vlapic.h"
68 #include "vlapic_priv.h"
69 #include "vioapic.h"
70
71
72 /*
73 * The 4 high bits of a given interrupt vector represent its priority. The same
74 * is true for the contents of the TPR when it is used to calculate the ultimate
75 * PPR of an APIC - the 4 high bits hold the priority.
76 */
77 #define PRIO(x) ((x) & 0xf0)
78
79 #define VLAPIC_VERSION (0x14)
80
81 /*
82 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the
83 * vlapic_callout_handler() and vcpu accesses to:
84 * - timer_freq_bt, timer_period_bt, timer_fire_bt
85 * - timer LVT register
86 */
87 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock))
88 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock))
89 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock))
90
91 /*
92 * APIC timer frequency:
93 * - arbitrary but chosen to be in the ballpark of contemporary hardware.
94 * - power-of-two to avoid loss of precision when calculating times
95 */
96 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
97
98 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL
99
100 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \
101 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
102 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
103 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
104
105 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
106 static void vlapic_callout_handler(void *arg);
107
108 static __inline bool
vlapic_x2mode(const struct vlapic * vlapic)109 vlapic_x2mode(const struct vlapic *vlapic)
110 {
111 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0);
112 }
113
114 bool
vlapic_hw_disabled(const struct vlapic * vlapic)115 vlapic_hw_disabled(const struct vlapic *vlapic)
116 {
117 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0);
118 }
119
120 static __inline bool
vlapic_sw_disabled(const struct vlapic * vlapic)121 vlapic_sw_disabled(const struct vlapic *vlapic)
122 {
123 const struct LAPIC *lapic = vlapic->apic_page;
124
125 return ((lapic->svr & APIC_SVR_ENABLE) == 0);
126 }
127
128 static __inline bool
vlapic_enabled(const struct vlapic * vlapic)129 vlapic_enabled(const struct vlapic *vlapic)
130 {
131 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic));
132 }
133
134 static __inline uint32_t
vlapic_get_id(const struct vlapic * vlapic)135 vlapic_get_id(const struct vlapic *vlapic)
136 {
137
138 if (vlapic_x2mode(vlapic))
139 return (vlapic->vcpuid);
140 else
141 return (vlapic->vcpuid << 24);
142 }
143
144 static uint32_t
x2apic_ldr(const struct vlapic * vlapic)145 x2apic_ldr(const struct vlapic *vlapic)
146 {
147 int apicid;
148 uint32_t ldr;
149
150 apicid = vlapic_get_id(vlapic);
151 ldr = 1 << (apicid & 0xf);
152 ldr |= (apicid & 0xffff0) << 12;
153 return (ldr);
154 }
155
156 void
vlapic_dfr_write_handler(struct vlapic * vlapic)157 vlapic_dfr_write_handler(struct vlapic *vlapic)
158 {
159 struct LAPIC *lapic;
160
161 lapic = vlapic->apic_page;
162 if (vlapic_x2mode(vlapic)) {
163 /* Ignore write to DFR in x2APIC mode */
164 lapic->dfr = 0;
165 return;
166 }
167
168 lapic->dfr &= APIC_DFR_MODEL_MASK;
169 lapic->dfr |= APIC_DFR_RESERVED;
170 }
171
172 void
vlapic_ldr_write_handler(struct vlapic * vlapic)173 vlapic_ldr_write_handler(struct vlapic *vlapic)
174 {
175 struct LAPIC *lapic;
176
177 lapic = vlapic->apic_page;
178
179 /* LDR is read-only in x2apic mode */
180 if (vlapic_x2mode(vlapic)) {
181 /* Ignore write to LDR in x2APIC mode */
182 lapic->ldr = x2apic_ldr(vlapic);
183 } else {
184 lapic->ldr &= ~APIC_LDR_RESERVED;
185 }
186 }
187
188 void
vlapic_id_write_handler(struct vlapic * vlapic)189 vlapic_id_write_handler(struct vlapic *vlapic)
190 {
191 struct LAPIC *lapic;
192
193 /*
194 * We don't allow the ID register to be modified so reset it back to
195 * its default value.
196 */
197 lapic = vlapic->apic_page;
198 lapic->id = vlapic_get_id(vlapic);
199 }
200
201 static int
vlapic_timer_divisor(uint32_t dcr)202 vlapic_timer_divisor(uint32_t dcr)
203 {
204 switch (dcr & 0xB) {
205 case APIC_TDCR_1:
206 return (1);
207 case APIC_TDCR_2:
208 return (2);
209 case APIC_TDCR_4:
210 return (4);
211 case APIC_TDCR_8:
212 return (8);
213 case APIC_TDCR_16:
214 return (16);
215 case APIC_TDCR_32:
216 return (32);
217 case APIC_TDCR_64:
218 return (64);
219 case APIC_TDCR_128:
220 return (128);
221 default:
222 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
223 }
224 }
225
226 static uint32_t
vlapic_get_ccr(struct vlapic * vlapic)227 vlapic_get_ccr(struct vlapic *vlapic)
228 {
229 struct LAPIC *lapic;
230 uint32_t ccr;
231
232 ccr = 0;
233 lapic = vlapic->apic_page;
234
235 VLAPIC_TIMER_LOCK(vlapic);
236 if (callout_active(&vlapic->callout)) {
237 /*
238 * If the timer is scheduled to expire in the future then
239 * compute the value of 'ccr' based on the remaining time.
240 */
241
242 const hrtime_t now = gethrtime();
243 if (vlapic->timer_fire_when > now) {
244 ccr += hrt_freq_count(vlapic->timer_fire_when - now,
245 vlapic->timer_cur_freq);
246 }
247 }
248
249 /*
250 * Clamp CCR value to that programmed in ICR - its theoretical maximum.
251 * Normal operation should never result in this being necessary. Only
252 * strange circumstances due to state importation as part of instance
253 * save/restore or live-migration require such wariness.
254 */
255 if (ccr > lapic->icr_timer) {
256 ccr = lapic->icr_timer;
257 vlapic->stats.vs_clamp_ccr++;
258 }
259 VLAPIC_TIMER_UNLOCK(vlapic);
260 return (ccr);
261 }
262
263 static void
vlapic_update_divider(struct vlapic * vlapic)264 vlapic_update_divider(struct vlapic *vlapic)
265 {
266 struct LAPIC *lapic = vlapic->apic_page;
267
268 ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
269
270 vlapic->timer_cur_freq =
271 VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
272 vlapic->timer_period =
273 hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
274 }
275
276 void
vlapic_dcr_write_handler(struct vlapic * vlapic)277 vlapic_dcr_write_handler(struct vlapic *vlapic)
278 {
279 /*
280 * Update the timer frequency and the timer period.
281 *
282 * XXX changes to the frequency divider will not take effect until
283 * the timer is reloaded.
284 */
285 VLAPIC_TIMER_LOCK(vlapic);
286 vlapic_update_divider(vlapic);
287 VLAPIC_TIMER_UNLOCK(vlapic);
288 }
289
290 void
vlapic_esr_write_handler(struct vlapic * vlapic)291 vlapic_esr_write_handler(struct vlapic *vlapic)
292 {
293 struct LAPIC *lapic;
294
295 lapic = vlapic->apic_page;
296 lapic->esr = vlapic->esr_pending;
297 vlapic->esr_pending = 0;
298 }
299
300 vcpu_notify_t
vlapic_set_intr_ready(struct vlapic * vlapic,int vector,bool level)301 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
302 {
303 struct LAPIC *lapic;
304 uint32_t *irrptr, *tmrptr, mask, tmr;
305 int idx;
306
307 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
308
309 lapic = vlapic->apic_page;
310 if (!(lapic->svr & APIC_SVR_ENABLE)) {
311 /* ignore interrupt on software-disabled APIC */
312 return (VCPU_NOTIFY_NONE);
313 }
314
315 if (vector < 16) {
316 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
317 false);
318
319 /*
320 * If the error LVT is configured to interrupt the vCPU, it will
321 * have delivered a notification through that mechanism.
322 */
323 return (VCPU_NOTIFY_NONE);
324 }
325
326 if (vlapic->ops.set_intr_ready) {
327 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
328 }
329
330 idx = (vector / 32) * 4;
331 mask = 1 << (vector % 32);
332 tmrptr = &lapic->tmr0;
333 irrptr = &lapic->irr0;
334
335 /*
336 * Update TMR for requested vector, if necessary.
337 * This must be done prior to asserting the bit in IRR so that the
338 * proper TMR state is always visible before the to-be-queued interrupt
339 * can be injected.
340 */
341 tmr = atomic_load_acq_32(&tmrptr[idx]);
342 if ((tmr & mask) != (level ? mask : 0)) {
343 if (level) {
344 atomic_set_int(&tmrptr[idx], mask);
345 } else {
346 atomic_clear_int(&tmrptr[idx], mask);
347 }
348 }
349
350 /* Now set the bit in IRR */
351 atomic_set_int(&irrptr[idx], mask);
352
353 return (VCPU_NOTIFY_EXIT);
354 }
355
356 static __inline uint32_t *
vlapic_get_lvtptr(struct vlapic * vlapic,uint32_t offset)357 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
358 {
359 struct LAPIC *lapic = vlapic->apic_page;
360 int i;
361
362 switch (offset) {
363 case APIC_OFFSET_CMCI_LVT:
364 return (&lapic->lvt_cmci);
365 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
366 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
367 return ((&lapic->lvt_timer) + i);
368 default:
369 panic("vlapic_get_lvt: invalid LVT\n");
370 }
371 }
372
373 static __inline int
lvt_off_to_idx(uint32_t offset)374 lvt_off_to_idx(uint32_t offset)
375 {
376 int index;
377
378 switch (offset) {
379 case APIC_OFFSET_CMCI_LVT:
380 index = APIC_LVT_CMCI;
381 break;
382 case APIC_OFFSET_TIMER_LVT:
383 index = APIC_LVT_TIMER;
384 break;
385 case APIC_OFFSET_THERM_LVT:
386 index = APIC_LVT_THERMAL;
387 break;
388 case APIC_OFFSET_PERF_LVT:
389 index = APIC_LVT_PMC;
390 break;
391 case APIC_OFFSET_LINT0_LVT:
392 index = APIC_LVT_LINT0;
393 break;
394 case APIC_OFFSET_LINT1_LVT:
395 index = APIC_LVT_LINT1;
396 break;
397 case APIC_OFFSET_ERROR_LVT:
398 index = APIC_LVT_ERROR;
399 break;
400 default:
401 index = -1;
402 break;
403 }
404 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
405 "invalid lvt index %d for offset %x", index, offset));
406
407 return (index);
408 }
409
410 static __inline uint32_t
vlapic_get_lvt(struct vlapic * vlapic,uint32_t offset)411 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
412 {
413 int idx;
414 uint32_t val;
415
416 idx = lvt_off_to_idx(offset);
417 val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
418 return (val);
419 }
420
421 void
vlapic_lvt_write_handler(struct vlapic * vlapic,uint32_t offset)422 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
423 {
424 uint32_t *lvtptr, mask, val;
425 struct LAPIC *lapic;
426 int idx;
427
428 lapic = vlapic->apic_page;
429 lvtptr = vlapic_get_lvtptr(vlapic, offset);
430 val = *lvtptr;
431 idx = lvt_off_to_idx(offset);
432
433 if (!(lapic->svr & APIC_SVR_ENABLE))
434 val |= APIC_LVT_M;
435 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
436 switch (offset) {
437 case APIC_OFFSET_TIMER_LVT:
438 mask |= APIC_LVTT_TM;
439 break;
440 case APIC_OFFSET_ERROR_LVT:
441 break;
442 case APIC_OFFSET_LINT0_LVT:
443 case APIC_OFFSET_LINT1_LVT:
444 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
445 /* FALLTHROUGH */
446 default:
447 mask |= APIC_LVT_DM;
448 break;
449 }
450 val &= mask;
451 *lvtptr = val;
452 atomic_store_rel_32(&vlapic->lvt_last[idx], val);
453 }
454
455 static void
vlapic_refresh_lvts(struct vlapic * vlapic)456 vlapic_refresh_lvts(struct vlapic *vlapic)
457 {
458 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
459 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
460 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
461 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
462 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
463 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
464 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
465 }
466
467 static void
vlapic_mask_lvts(struct vlapic * vlapic)468 vlapic_mask_lvts(struct vlapic *vlapic)
469 {
470 struct LAPIC *lapic = vlapic->apic_page;
471
472 lapic->lvt_cmci |= APIC_LVT_M;
473 lapic->lvt_timer |= APIC_LVT_M;
474 lapic->lvt_thermal |= APIC_LVT_M;
475 lapic->lvt_pcint |= APIC_LVT_M;
476 lapic->lvt_lint0 |= APIC_LVT_M;
477 lapic->lvt_lint1 |= APIC_LVT_M;
478 lapic->lvt_error |= APIC_LVT_M;
479 vlapic_refresh_lvts(vlapic);
480 }
481
482 static int
vlapic_fire_lvt(struct vlapic * vlapic,uint_t lvt)483 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt)
484 {
485 uint32_t mode, reg, vec;
486 vcpu_notify_t notify;
487
488 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
489
490 if (reg & APIC_LVT_M)
491 return (0);
492 vec = reg & APIC_LVT_VECTOR;
493 mode = reg & APIC_LVT_DM;
494
495 switch (mode) {
496 case APIC_LVT_DM_FIXED:
497 if (vec < 16) {
498 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
499 lvt == APIC_LVT_ERROR);
500 return (0);
501 }
502 notify = vlapic_set_intr_ready(vlapic, vec, false);
503 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
504 break;
505 case APIC_LVT_DM_NMI:
506 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
507 break;
508 case APIC_LVT_DM_EXTINT:
509 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid);
510 break;
511 default:
512 // Other modes ignored
513 return (0);
514 }
515 return (1);
516 }
517
518 static uint_t
vlapic_active_isr(struct vlapic * vlapic)519 vlapic_active_isr(struct vlapic *vlapic)
520 {
521 int i;
522 uint32_t *isrp;
523
524 isrp = &vlapic->apic_page->isr7;
525
526 for (i = 7; i >= 0; i--, isrp -= 4) {
527 uint32_t reg = *isrp;
528
529 if (reg != 0) {
530 uint_t vec = (i * 32) + bsrl(reg);
531
532 if (vec < 16) {
533 /*
534 * Truncate the illegal low vectors to value of
535 * 0, indicating that no active ISR was found.
536 */
537 return (0);
538 }
539 return (vec);
540 }
541 }
542
543 return (0);
544 }
545
546 /*
547 * After events which might arbitrarily change the value of PPR, such as a TPR
548 * write or an EOI, calculate that new PPR value and store it in the APIC page.
549 */
550 static void
vlapic_update_ppr(struct vlapic * vlapic)551 vlapic_update_ppr(struct vlapic *vlapic)
552 {
553 int isrvec, tpr, ppr;
554
555 isrvec = vlapic_active_isr(vlapic);
556 tpr = vlapic->apic_page->tpr;
557
558 /*
559 * Algorithm adopted from section "Interrupt, Task and Processor
560 * Priority" in Intel Architecture Manual Vol 3a.
561 */
562 if (PRIO(tpr) >= PRIO(isrvec)) {
563 ppr = tpr;
564 } else {
565 ppr = PRIO(isrvec);
566 }
567
568 vlapic->apic_page->ppr = ppr;
569 }
570
571 /*
572 * When a vector is asserted in ISR as in-service, the PPR must be raised to the
573 * priority of that vector, as the vCPU would have been at a lower priority in
574 * order for the vector to be accepted.
575 */
576 static void
vlapic_raise_ppr(struct vlapic * vlapic,int vec)577 vlapic_raise_ppr(struct vlapic *vlapic, int vec)
578 {
579 struct LAPIC *lapic = vlapic->apic_page;
580 int ppr;
581
582 ppr = PRIO(vec);
583
584 lapic->ppr = ppr;
585 }
586
587 void
vlapic_sync_tpr(struct vlapic * vlapic)588 vlapic_sync_tpr(struct vlapic *vlapic)
589 {
590 vlapic_update_ppr(vlapic);
591 }
592
593 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
594
595 static void
vlapic_process_eoi(struct vlapic * vlapic)596 vlapic_process_eoi(struct vlapic *vlapic)
597 {
598 struct LAPIC *lapic = vlapic->apic_page;
599 uint32_t *isrptr, *tmrptr;
600 int i;
601 uint_t idx, bitpos, vector;
602
603 isrptr = &lapic->isr0;
604 tmrptr = &lapic->tmr0;
605
606 for (i = 7; i >= 0; i--) {
607 idx = i * 4;
608 if (isrptr[idx] != 0) {
609 bitpos = bsrl(isrptr[idx]);
610 vector = i * 32 + bitpos;
611
612 isrptr[idx] &= ~(1 << bitpos);
613 vlapic_update_ppr(vlapic);
614 if ((tmrptr[idx] & (1 << bitpos)) != 0) {
615 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
616 vector);
617 }
618 return;
619 }
620 }
621 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
622 }
623
624 static __inline int
vlapic_get_lvt_field(uint32_t lvt,uint32_t mask)625 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
626 {
627
628 return (lvt & mask);
629 }
630
631 static __inline int
vlapic_periodic_timer(struct vlapic * vlapic)632 vlapic_periodic_timer(struct vlapic *vlapic)
633 {
634 uint32_t lvt;
635
636 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
637
638 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
639 }
640
641 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
642
643 static void
vlapic_set_error(struct vlapic * vlapic,uint32_t mask,bool lvt_error)644 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error)
645 {
646
647 vlapic->esr_pending |= mask;
648
649 /*
650 * Avoid infinite recursion if the error LVT itself is configured with
651 * an illegal vector.
652 */
653 if (lvt_error)
654 return;
655
656 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) {
657 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
658 }
659 }
660
661 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
662
663 static void
vlapic_fire_timer(struct vlapic * vlapic)664 vlapic_fire_timer(struct vlapic *vlapic)
665 {
666 ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
667
668 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) {
669 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
670 }
671 }
672
673 static VMM_STAT(VLAPIC_INTR_CMC,
674 "corrected machine check interrupts generated by vlapic");
675
676 void
vlapic_fire_cmci(struct vlapic * vlapic)677 vlapic_fire_cmci(struct vlapic *vlapic)
678 {
679
680 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) {
681 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
682 }
683 }
684
685 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
686 "lvts triggered");
687
688 int
vlapic_trigger_lvt(struct vlapic * vlapic,int vector)689 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
690 {
691 if (!vlapic_enabled(vlapic)) {
692 /*
693 * When the local APIC is global/hardware disabled,
694 * LINT[1:0] pins are configured as INTR and NMI pins,
695 * respectively.
696 */
697 switch (vector) {
698 case APIC_LVT_LINT0:
699 (void) vm_inject_extint(vlapic->vm,
700 vlapic->vcpuid);
701 break;
702 case APIC_LVT_LINT1:
703 (void) vm_inject_nmi(vlapic->vm,
704 vlapic->vcpuid);
705 break;
706 default:
707 break;
708 }
709 return (0);
710 }
711
712 switch (vector) {
713 case APIC_LVT_LINT0:
714 case APIC_LVT_LINT1:
715 case APIC_LVT_TIMER:
716 case APIC_LVT_ERROR:
717 case APIC_LVT_PMC:
718 case APIC_LVT_THERMAL:
719 case APIC_LVT_CMCI:
720 if (vlapic_fire_lvt(vlapic, vector)) {
721 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
722 LVTS_TRIGGERRED, vector, 1);
723 }
724 break;
725 default:
726 return (EINVAL);
727 }
728 return (0);
729 }
730
731 static void
vlapic_callout_reset(struct vlapic * vlapic)732 vlapic_callout_reset(struct vlapic *vlapic)
733 {
734 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
735 vlapic_callout_handler, vlapic, C_ABSOLUTE);
736 }
737
738 static void
vlapic_callout_handler(void * arg)739 vlapic_callout_handler(void *arg)
740 {
741 struct vlapic *vlapic = arg;
742
743 VLAPIC_TIMER_LOCK(vlapic);
744 if (callout_pending(&vlapic->callout)) /* callout was reset */
745 goto done;
746
747 if (!callout_active(&vlapic->callout)) /* callout was stopped */
748 goto done;
749
750 callout_deactivate(&vlapic->callout);
751
752 vlapic_fire_timer(vlapic);
753
754 /*
755 * We should not end up here with timer_period == 0, but to prevent a
756 * runaway periodic timer, it is checked anyways.
757 */
758 if (vlapic_periodic_timer(vlapic) && vlapic->timer_period != 0) {
759 /*
760 * Compute the delta between when the timer was supposed to
761 * fire and the present time. We can depend on the fact that
762 * cyclics (which underly these callouts) will never be called
763 * early.
764 */
765 const hrtime_t now = gethrtime();
766 const hrtime_t delta = now - vlapic->timer_fire_when;
767 if (delta >= vlapic->timer_period) {
768 /*
769 * If we are so behind that we have missed an entire
770 * timer period, reset the time base rather than
771 * attempting to catch up.
772 */
773 vlapic->timer_fire_when = now + vlapic->timer_period;
774 } else {
775 vlapic->timer_fire_when += vlapic->timer_period;
776 }
777 vlapic_callout_reset(vlapic);
778 } else {
779 /*
780 * Clear the target time so that logic can distinguish from a
781 * timer which has fired (where the value is zero) from one
782 * which is held pending due to the instance being paused (where
783 * the value is non-zero, but the callout is not pending).
784 */
785 vlapic->timer_fire_when = 0;
786 }
787 done:
788 VLAPIC_TIMER_UNLOCK(vlapic);
789 }
790
791 void
vlapic_icrtmr_write_handler(struct vlapic * vlapic)792 vlapic_icrtmr_write_handler(struct vlapic *vlapic)
793 {
794 struct LAPIC *lapic = vlapic->apic_page;
795
796 VLAPIC_TIMER_LOCK(vlapic);
797 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
798 lapic->icr_timer);
799 if (vlapic->timer_period != 0) {
800 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
801 vlapic_callout_reset(vlapic);
802 } else {
803 vlapic->timer_fire_when = 0;
804 callout_stop(&vlapic->callout);
805 }
806 VLAPIC_TIMER_UNLOCK(vlapic);
807 }
808
809 /*
810 * This function populates 'dmask' with the set of vcpus that match the
811 * addressing specified by the (dest, phys, lowprio) tuple.
812 *
813 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
814 * or xAPIC (8-bit) destination field.
815 */
816 void
vlapic_calcdest(struct vm * vm,cpuset_t * dmask,uint32_t dest,bool phys,bool lowprio,bool x2apic_dest)817 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
818 bool lowprio, bool x2apic_dest)
819 {
820 struct vlapic *vlapic;
821 uint32_t dfr, ldr, ldest, cluster;
822 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
823 cpuset_t amask;
824 int vcpuid;
825
826 if ((x2apic_dest && dest == 0xffffffff) ||
827 (!x2apic_dest && dest == 0xff)) {
828 /*
829 * Broadcast in both logical and physical modes.
830 */
831 *dmask = vm_active_cpus(vm);
832 return;
833 }
834
835 if (phys) {
836 /*
837 * Physical mode: destination is APIC ID.
838 */
839 CPU_ZERO(dmask);
840 vcpuid = vm_apicid2vcpuid(vm, dest);
841 amask = vm_active_cpus(vm);
842 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
843 CPU_SET(vcpuid, dmask);
844 } else {
845 /*
846 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
847 * bitmask. This model is only available in the xAPIC mode.
848 */
849 mda_flat_ldest = dest & 0xff;
850
851 /*
852 * In the "Cluster Model" the MDA is used to identify a
853 * specific cluster and a set of APICs in that cluster.
854 */
855 if (x2apic_dest) {
856 mda_cluster_id = dest >> 16;
857 mda_cluster_ldest = dest & 0xffff;
858 } else {
859 mda_cluster_id = (dest >> 4) & 0xf;
860 mda_cluster_ldest = dest & 0xf;
861 }
862
863 /*
864 * Logical mode: match each APIC that has a bit set
865 * in its LDR that matches a bit in the ldest.
866 */
867 CPU_ZERO(dmask);
868 amask = vm_active_cpus(vm);
869 while ((vcpuid = CPU_FFS(&amask)) != 0) {
870 vcpuid--;
871 CPU_CLR(vcpuid, &amask);
872
873 vlapic = vm_lapic(vm, vcpuid);
874 dfr = vlapic->apic_page->dfr;
875 ldr = vlapic->apic_page->ldr;
876
877 if ((dfr & APIC_DFR_MODEL_MASK) ==
878 APIC_DFR_MODEL_FLAT) {
879 ldest = ldr >> 24;
880 mda_ldest = mda_flat_ldest;
881 } else if ((dfr & APIC_DFR_MODEL_MASK) ==
882 APIC_DFR_MODEL_CLUSTER) {
883 if (vlapic_x2mode(vlapic)) {
884 cluster = ldr >> 16;
885 ldest = ldr & 0xffff;
886 } else {
887 cluster = ldr >> 28;
888 ldest = (ldr >> 24) & 0xf;
889 }
890 if (cluster != mda_cluster_id)
891 continue;
892 mda_ldest = mda_cluster_ldest;
893 } else {
894 /*
895 * Guest has configured a bad logical
896 * model for this vcpu - skip it.
897 */
898 continue;
899 }
900
901 if ((mda_ldest & ldest) != 0) {
902 CPU_SET(vcpuid, dmask);
903 if (lowprio)
904 break;
905 }
906 }
907 }
908 }
909
910 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu");
911 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu");
912
913 static void
vlapic_set_tpr(struct vlapic * vlapic,uint8_t val)914 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
915 {
916 struct LAPIC *lapic = vlapic->apic_page;
917
918 if (lapic->tpr != val) {
919 lapic->tpr = val;
920 vlapic_update_ppr(vlapic);
921 }
922 }
923
924 void
vlapic_set_cr8(struct vlapic * vlapic,uint64_t val)925 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
926 {
927 uint8_t tpr;
928
929 if (val & ~0xf) {
930 vm_inject_gp(vlapic->vm, vlapic->vcpuid);
931 return;
932 }
933
934 tpr = val << 4;
935 vlapic_set_tpr(vlapic, tpr);
936 }
937
938 uint64_t
vlapic_get_cr8(const struct vlapic * vlapic)939 vlapic_get_cr8(const struct vlapic *vlapic)
940 {
941 const struct LAPIC *lapic = vlapic->apic_page;
942
943 return (lapic->tpr >> 4);
944 }
945
946 static bool
vlapic_is_icr_valid(uint64_t icrval)947 vlapic_is_icr_valid(uint64_t icrval)
948 {
949 uint32_t mode = icrval & APIC_DELMODE_MASK;
950 uint32_t level = icrval & APIC_LEVEL_MASK;
951 uint32_t trigger = icrval & APIC_TRIGMOD_MASK;
952 uint32_t shorthand = icrval & APIC_DEST_MASK;
953
954 switch (mode) {
955 case APIC_DELMODE_FIXED:
956 if (trigger == APIC_TRIGMOD_EDGE)
957 return (true);
958 /*
959 * AMD allows a level assert IPI and Intel converts a level
960 * assert IPI into an edge IPI.
961 */
962 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT)
963 return (true);
964 break;
965 case APIC_DELMODE_LOWPRIO:
966 case APIC_DELMODE_SMI:
967 case APIC_DELMODE_NMI:
968 case APIC_DELMODE_INIT:
969 if (trigger == APIC_TRIGMOD_EDGE &&
970 (shorthand == APIC_DEST_DESTFLD ||
971 shorthand == APIC_DEST_ALLESELF)) {
972 return (true);
973 }
974 /*
975 * AMD allows a level assert IPI and Intel converts a level
976 * assert IPI into an edge IPI.
977 */
978 if (trigger == APIC_TRIGMOD_LEVEL &&
979 level == APIC_LEVEL_ASSERT &&
980 (shorthand == APIC_DEST_DESTFLD ||
981 shorthand == APIC_DEST_ALLESELF)) {
982 return (true);
983 }
984 /*
985 * An level triggered deassert INIT is defined in the Intel
986 * Multiprocessor Specification and the Intel Software Developer
987 * Manual. Due to the MPS it's required to send a level assert
988 * INIT to a cpu and then a level deassert INIT. Some operating
989 * systems e.g. FreeBSD or Linux use that algorithm. According
990 * to the SDM a level deassert INIT is only supported by Pentium
991 * and P6 processors. It's always send to all cpus regardless of
992 * the destination or shorthand field. It resets the arbitration
993 * id register. This register is not software accessible and
994 * only required for the APIC bus arbitration. So, the level
995 * deassert INIT doesn't need any emulation and we should ignore
996 * it. The SDM also defines that newer processors don't support
997 * the level deassert INIT and it's not valid any more. As it's
998 * defined for older systems, it can't be invalid per se.
999 * Otherwise, backward compatibility would be broken. However,
1000 * when returning false here, it'll be ignored which is the
1001 * desired behaviour.
1002 */
1003 if (mode == APIC_DELMODE_INIT &&
1004 trigger == APIC_TRIGMOD_LEVEL &&
1005 level == APIC_LEVEL_DEASSERT) {
1006 return (false);
1007 }
1008 break;
1009 case APIC_DELMODE_STARTUP:
1010 if (shorthand == APIC_DEST_DESTFLD ||
1011 shorthand == APIC_DEST_ALLESELF) {
1012 return (true);
1013 }
1014 break;
1015 case APIC_DELMODE_RR:
1016 /* Only available on AMD! */
1017 if (trigger == APIC_TRIGMOD_EDGE &&
1018 shorthand == APIC_DEST_DESTFLD) {
1019 return (true);
1020 }
1021 break;
1022 case APIC_DELMODE_RESV:
1023 return (false);
1024 default:
1025 panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode);
1026 }
1027
1028 return (false);
1029 }
1030
1031 void
vlapic_icrlo_write_handler(struct vlapic * vlapic)1032 vlapic_icrlo_write_handler(struct vlapic *vlapic)
1033 {
1034 int i;
1035 cpuset_t dmask;
1036 uint64_t icrval;
1037 uint32_t dest, vec, mode, dsh;
1038 struct LAPIC *lapic;
1039
1040 lapic = vlapic->apic_page;
1041 lapic->icr_lo &= ~APIC_DELSTAT_PEND;
1042 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1043
1044 /*
1045 * Ignore invalid combinations of the icr.
1046 */
1047 if (!vlapic_is_icr_valid(icrval))
1048 return;
1049
1050 if (vlapic_x2mode(vlapic))
1051 dest = icrval >> 32;
1052 else
1053 dest = icrval >> (32 + 24);
1054 vec = icrval & APIC_VECTOR_MASK;
1055 mode = icrval & APIC_DELMODE_MASK;
1056 dsh = icrval & APIC_DEST_MASK;
1057
1058 if (mode == APIC_DELMODE_FIXED && vec < 16) {
1059 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
1060 return;
1061 }
1062
1063 if (mode == APIC_DELMODE_INIT &&
1064 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
1065 /* No work required to deassert INIT */
1066 return;
1067 }
1068
1069 switch (dsh) {
1070 case APIC_DEST_DESTFLD:
1071 vlapic_calcdest(vlapic->vm, &dmask, dest,
1072 (icrval & APIC_DESTMODE_LOG) == 0, false,
1073 vlapic_x2mode(vlapic));
1074 break;
1075 case APIC_DEST_SELF:
1076 CPU_SETOF(vlapic->vcpuid, &dmask);
1077 break;
1078 case APIC_DEST_ALLISELF:
1079 dmask = vm_active_cpus(vlapic->vm);
1080 break;
1081 case APIC_DEST_ALLESELF:
1082 dmask = vm_active_cpus(vlapic->vm);
1083 CPU_CLR(vlapic->vcpuid, &dmask);
1084 break;
1085 default:
1086 /*
1087 * All possible delivery notations are covered above.
1088 * We should never end up here.
1089 */
1090 panic("unknown delivery shorthand: %x", dsh);
1091 }
1092
1093 while ((i = CPU_FFS(&dmask)) != 0) {
1094 i--;
1095 CPU_CLR(i, &dmask);
1096 switch (mode) {
1097 case APIC_DELMODE_FIXED:
1098 (void) lapic_intr_edge(vlapic->vm, i, vec);
1099 vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
1100 VLAPIC_IPI_SEND, 1);
1101 vmm_stat_incr(vlapic->vm, i,
1102 VLAPIC_IPI_RECV, 1);
1103 break;
1104 case APIC_DELMODE_NMI:
1105 (void) vm_inject_nmi(vlapic->vm, i);
1106 break;
1107 case APIC_DELMODE_INIT:
1108 (void) vm_inject_init(vlapic->vm, i);
1109 break;
1110 case APIC_DELMODE_STARTUP:
1111 (void) vm_inject_sipi(vlapic->vm, i, vec);
1112 break;
1113 case APIC_DELMODE_LOWPRIO:
1114 case APIC_DELMODE_SMI:
1115 default:
1116 /* Unhandled IPI modes (for now) */
1117 break;
1118 }
1119 }
1120 }
1121
1122 void
vlapic_self_ipi_handler(struct vlapic * vlapic,uint32_t val)1123 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val)
1124 {
1125 const int vec = val & 0xff;
1126
1127 /* self-IPI is only exposed via x2APIC */
1128 ASSERT(vlapic_x2mode(vlapic));
1129
1130 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1131 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1);
1132 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1);
1133 }
1134
1135 int
vlapic_pending_intr(struct vlapic * vlapic,int * vecptr)1136 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1137 {
1138 struct LAPIC *lapic = vlapic->apic_page;
1139 int idx, i, bitpos, vector;
1140 uint32_t *irrptr, val;
1141
1142 if (vlapic->ops.sync_state) {
1143 (*vlapic->ops.sync_state)(vlapic);
1144 }
1145
1146 irrptr = &lapic->irr0;
1147
1148 for (i = 7; i >= 0; i--) {
1149 idx = i * 4;
1150 val = atomic_load_acq_int(&irrptr[idx]);
1151 bitpos = fls(val);
1152 if (bitpos != 0) {
1153 vector = i * 32 + (bitpos - 1);
1154 if (PRIO(vector) > PRIO(lapic->ppr)) {
1155 if (vecptr != NULL)
1156 *vecptr = vector;
1157 return (1);
1158 } else
1159 break;
1160 }
1161 }
1162 return (0);
1163 }
1164
1165 void
vlapic_intr_accepted(struct vlapic * vlapic,int vector)1166 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1167 {
1168 struct LAPIC *lapic = vlapic->apic_page;
1169 uint32_t *irrptr, *isrptr;
1170 int idx;
1171
1172 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
1173
1174 if (vlapic->ops.intr_accepted)
1175 return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1176
1177 /*
1178 * clear the ready bit for vector being accepted in irr
1179 * and set the vector as in service in isr.
1180 */
1181 idx = (vector / 32) * 4;
1182
1183 irrptr = &lapic->irr0;
1184 atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1185
1186 isrptr = &lapic->isr0;
1187 isrptr[idx] |= 1 << (vector % 32);
1188
1189 /*
1190 * The only way a fresh vector could be accepted into ISR is if it was
1191 * of a higher priority than the current PPR. With that vector now
1192 * in-service, the PPR must be raised.
1193 */
1194 vlapic_raise_ppr(vlapic, vector);
1195 }
1196
1197 void
vlapic_svr_write_handler(struct vlapic * vlapic)1198 vlapic_svr_write_handler(struct vlapic *vlapic)
1199 {
1200 struct LAPIC *lapic;
1201 uint32_t old, new, changed;
1202
1203 lapic = vlapic->apic_page;
1204
1205 new = lapic->svr;
1206 old = vlapic->svr_last;
1207 vlapic->svr_last = new;
1208
1209 changed = old ^ new;
1210 if ((changed & APIC_SVR_ENABLE) != 0) {
1211 if ((new & APIC_SVR_ENABLE) == 0) {
1212 /*
1213 * The apic is now disabled so stop the apic timer
1214 * and mask all the LVT entries.
1215 */
1216 VLAPIC_TIMER_LOCK(vlapic);
1217 callout_stop(&vlapic->callout);
1218 VLAPIC_TIMER_UNLOCK(vlapic);
1219 vlapic_mask_lvts(vlapic);
1220 } else {
1221 /*
1222 * The apic is now enabled so restart the apic timer
1223 * if it is configured in periodic mode.
1224 */
1225 if (vlapic_periodic_timer(vlapic))
1226 vlapic_icrtmr_write_handler(vlapic);
1227 }
1228 }
1229 }
1230
1231 static bool
vlapic_read(struct vlapic * vlapic,uint16_t offset,uint32_t * outp)1232 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp)
1233 {
1234 struct LAPIC *lapic = vlapic->apic_page;
1235 uint32_t *reg;
1236 int i;
1237
1238 ASSERT3U(offset & 0x3, ==, 0);
1239 ASSERT3U(offset, <, PAGESIZE);
1240 ASSERT3P(outp, !=, NULL);
1241
1242 uint32_t data = 0;
1243 switch (offset) {
1244 case APIC_OFFSET_ID:
1245 data = lapic->id;
1246 break;
1247 case APIC_OFFSET_VER:
1248 data = lapic->version;
1249 break;
1250 case APIC_OFFSET_TPR:
1251 data = lapic->tpr;
1252 break;
1253 case APIC_OFFSET_APR:
1254 data = lapic->apr;
1255 break;
1256 case APIC_OFFSET_PPR:
1257 data = lapic->ppr;
1258 break;
1259 case APIC_OFFSET_LDR:
1260 data = lapic->ldr;
1261 break;
1262 case APIC_OFFSET_DFR:
1263 data = lapic->dfr;
1264 break;
1265 case APIC_OFFSET_SVR:
1266 data = lapic->svr;
1267 break;
1268 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1269 i = (offset - APIC_OFFSET_ISR0) >> 2;
1270 reg = &lapic->isr0;
1271 data = *(reg + i);
1272 break;
1273 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1274 i = (offset - APIC_OFFSET_TMR0) >> 2;
1275 reg = &lapic->tmr0;
1276 data = *(reg + i);
1277 break;
1278 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1279 i = (offset - APIC_OFFSET_IRR0) >> 2;
1280 reg = &lapic->irr0;
1281 data = atomic_load_acq_int(reg + i);
1282 break;
1283 case APIC_OFFSET_ESR:
1284 data = lapic->esr;
1285 break;
1286 case APIC_OFFSET_ICR_LOW:
1287 data = lapic->icr_lo;
1288 break;
1289 case APIC_OFFSET_ICR_HI:
1290 data = lapic->icr_hi;
1291 break;
1292 case APIC_OFFSET_CMCI_LVT:
1293 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1294 data = vlapic_get_lvt(vlapic, offset);
1295 #ifdef INVARIANTS
1296 reg = vlapic_get_lvtptr(vlapic, offset);
1297 ASSERT3U(data, ==, *reg);
1298 #endif
1299 break;
1300 case APIC_OFFSET_TIMER_ICR:
1301 data = lapic->icr_timer;
1302 break;
1303 case APIC_OFFSET_TIMER_CCR:
1304 data = vlapic_get_ccr(vlapic);
1305 break;
1306 case APIC_OFFSET_TIMER_DCR:
1307 data = lapic->dcr_timer;
1308 break;
1309 case APIC_OFFSET_RRR:
1310 data = 0;
1311 break;
1312
1313 case APIC_OFFSET_SELF_IPI:
1314 case APIC_OFFSET_EOI:
1315 /* Write-only register */
1316 *outp = 0;
1317 return (false);
1318
1319 default:
1320 /* Invalid register */
1321 *outp = 0;
1322 return (false);
1323 }
1324
1325 *outp = data;
1326 return (true);
1327 }
1328
1329 static bool
vlapic_write(struct vlapic * vlapic,uint16_t offset,uint32_t data)1330 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data)
1331 {
1332 struct LAPIC *lapic = vlapic->apic_page;
1333 uint32_t *regptr;
1334
1335 ASSERT3U(offset & 0xf, ==, 0);
1336 ASSERT3U(offset, <, PAGESIZE);
1337
1338 switch (offset) {
1339 case APIC_OFFSET_ID:
1340 lapic->id = data;
1341 vlapic_id_write_handler(vlapic);
1342 break;
1343 case APIC_OFFSET_TPR:
1344 vlapic_set_tpr(vlapic, data & 0xff);
1345 break;
1346 case APIC_OFFSET_EOI:
1347 vlapic_process_eoi(vlapic);
1348 break;
1349 case APIC_OFFSET_LDR:
1350 lapic->ldr = data;
1351 vlapic_ldr_write_handler(vlapic);
1352 break;
1353 case APIC_OFFSET_DFR:
1354 lapic->dfr = data;
1355 vlapic_dfr_write_handler(vlapic);
1356 break;
1357 case APIC_OFFSET_SVR:
1358 lapic->svr = data;
1359 vlapic_svr_write_handler(vlapic);
1360 break;
1361 case APIC_OFFSET_ICR_LOW:
1362 lapic->icr_lo = data;
1363 vlapic_icrlo_write_handler(vlapic);
1364 break;
1365 case APIC_OFFSET_ICR_HI:
1366 lapic->icr_hi = data;
1367 break;
1368 case APIC_OFFSET_CMCI_LVT:
1369 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1370 regptr = vlapic_get_lvtptr(vlapic, offset);
1371 *regptr = data;
1372 vlapic_lvt_write_handler(vlapic, offset);
1373 break;
1374 case APIC_OFFSET_TIMER_ICR:
1375 lapic->icr_timer = data;
1376 vlapic_icrtmr_write_handler(vlapic);
1377 break;
1378
1379 case APIC_OFFSET_TIMER_DCR:
1380 lapic->dcr_timer = data;
1381 vlapic_dcr_write_handler(vlapic);
1382 break;
1383
1384 case APIC_OFFSET_ESR:
1385 vlapic_esr_write_handler(vlapic);
1386 break;
1387
1388 case APIC_OFFSET_SELF_IPI:
1389 if (vlapic_x2mode(vlapic))
1390 vlapic_self_ipi_handler(vlapic, data);
1391 break;
1392
1393 case APIC_OFFSET_VER:
1394 case APIC_OFFSET_APR:
1395 case APIC_OFFSET_PPR:
1396 case APIC_OFFSET_RRR:
1397 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1398 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1399 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1400 case APIC_OFFSET_TIMER_CCR:
1401 /* Read-only register */
1402 return (false);
1403
1404 default:
1405 /* Invalid register */
1406 return (false);
1407 }
1408
1409 return (true);
1410 }
1411
1412 void
vlapic_reset(struct vlapic * vlapic)1413 vlapic_reset(struct vlapic *vlapic)
1414 {
1415 struct LAPIC *lapic = vlapic->apic_page;
1416 uint32_t *isrptr, *tmrptr, *irrptr;
1417
1418 /* Reset any timer-related state first */
1419 VLAPIC_TIMER_LOCK(vlapic);
1420 callout_stop(&vlapic->callout);
1421 vlapic->timer_fire_when = 0;
1422 lapic->icr_timer = 0;
1423 lapic->ccr_timer = 0;
1424 lapic->dcr_timer = 0;
1425 vlapic_update_divider(vlapic);
1426 VLAPIC_TIMER_UNLOCK(vlapic);
1427
1428 /*
1429 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
1430 * it is not leftover after the reset. This is performed after the APIC
1431 * timer has been stopped, in case it happened to fire just prior to
1432 * being deactivated.
1433 */
1434 if (vlapic->ops.sync_state) {
1435 (*vlapic->ops.sync_state)(vlapic);
1436 }
1437
1438 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1439 if (vlapic->vcpuid == 0)
1440 vlapic->msr_apicbase |= APICBASE_BSP;
1441
1442 lapic->id = vlapic_get_id(vlapic);
1443 lapic->version = VLAPIC_VERSION;
1444 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1445
1446 lapic->tpr = 0;
1447 lapic->apr = 0;
1448 lapic->ppr = 0;
1449
1450 lapic->eoi = 0;
1451 lapic->ldr = 0;
1452 lapic->dfr = 0xffffffff;
1453 lapic->svr = APIC_SVR_VECTOR;
1454 vlapic->svr_last = lapic->svr;
1455
1456 isrptr = &lapic->isr0;
1457 tmrptr = &lapic->tmr0;
1458 irrptr = &lapic->irr0;
1459 for (uint_t i = 0; i < 8; i++) {
1460 atomic_store_rel_int(&isrptr[i * 4], 0);
1461 atomic_store_rel_int(&tmrptr[i * 4], 0);
1462 atomic_store_rel_int(&irrptr[i * 4], 0);
1463 }
1464
1465 lapic->esr = 0;
1466 vlapic->esr_pending = 0;
1467 lapic->icr_lo = 0;
1468 lapic->icr_hi = 0;
1469
1470 lapic->lvt_cmci = 0;
1471 lapic->lvt_timer = 0;
1472 lapic->lvt_thermal = 0;
1473 lapic->lvt_pcint = 0;
1474 lapic->lvt_lint0 = 0;
1475 lapic->lvt_lint1 = 0;
1476 lapic->lvt_error = 0;
1477 vlapic_mask_lvts(vlapic);
1478 }
1479
1480 void
vlapic_init(struct vlapic * vlapic)1481 vlapic_init(struct vlapic *vlapic)
1482 {
1483 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1484 KASSERT(vlapic->vcpuid >= 0 &&
1485 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1486 ("vlapic_init: vcpuid is not initialized"));
1487 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1488 "initialized"));
1489
1490 /*
1491 * If the vlapic is configured in x2apic mode then it will be
1492 * accessed in the critical section via the MSR emulation code.
1493 *
1494 * Therefore the timer mutex must be a spinlock because blockable
1495 * mutexes cannot be acquired in a critical section.
1496 */
1497 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL);
1498 callout_init(&vlapic->callout, 1);
1499
1500 vlapic_reset(vlapic);
1501 }
1502
1503 void
vlapic_cleanup(struct vlapic * vlapic)1504 vlapic_cleanup(struct vlapic *vlapic)
1505 {
1506 callout_drain(&vlapic->callout);
1507 mutex_destroy(&vlapic->timer_lock);
1508 }
1509
1510 int
vlapic_mmio_read(struct vlapic * vlapic,uint64_t gpa,uint64_t * valp,uint_t size)1511 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp,
1512 uint_t size)
1513 {
1514 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1515 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1516
1517 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1518 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1519 *valp = UINT64_MAX;
1520 return (0);
1521 }
1522
1523 const uint16_t off = gpa - DEFAULT_APIC_BASE;
1524 uint32_t raw = 0;
1525 (void) vlapic_read(vlapic, off & ~0xf, &raw);
1526
1527 /* Shift and mask reads which are small and/or unaligned */
1528 const uint8_t align = off & 0xf;
1529 if (align < 4) {
1530 *valp = (uint64_t)raw << (align * 8);
1531 } else {
1532 *valp = 0;
1533 }
1534
1535 return (0);
1536 }
1537
1538 int
vlapic_mmio_write(struct vlapic * vlapic,uint64_t gpa,uint64_t val,uint_t size)1539 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val,
1540 uint_t size)
1541 {
1542 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE);
1543 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE);
1544
1545 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */
1546 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) {
1547 return (0);
1548 }
1549
1550 const uint16_t off = gpa - DEFAULT_APIC_BASE;
1551 /* Ignore writes which are not 32-bits wide and 16-byte aligned */
1552 if ((off & 0xf) != 0 || size != 4) {
1553 return (0);
1554 }
1555
1556 (void) vlapic_write(vlapic, off, (uint32_t)val);
1557 return (0);
1558 }
1559
1560 /* Should attempts to change the APIC base address be rejected with a #GP? */
1561 int vlapic_gp_on_addr_change = 1;
1562
1563 static vm_msr_result_t
vlapic_set_apicbase(struct vlapic * vlapic,uint64_t val)1564 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
1565 {
1566 const uint64_t diff = vlapic->msr_apicbase ^ val;
1567
1568 /*
1569 * Until the LAPIC emulation for switching between xAPIC and x2APIC
1570 * modes is more polished, it will remain off-limits from being altered
1571 * by the guest.
1572 */
1573 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC |
1574 APICBASE_BSP;
1575 if ((diff & reserved_bits) != 0) {
1576 return (VMR_GP);
1577 }
1578
1579 /* We do not presently allow the LAPIC access address to be modified. */
1580 if ((diff & APICBASE_ADDR_MASK) != 0) {
1581 /*
1582 * Explicitly rebuffing such requests with a #GP is the most
1583 * straightforward way to handle the situation, but certain
1584 * consumers (such as the KVM unit tests) may balk at the
1585 * otherwise unexpected exception.
1586 */
1587 if (vlapic_gp_on_addr_change) {
1588 return (VMR_GP);
1589 }
1590
1591 /* If silence is required, just ignore the address change. */
1592 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE;
1593 }
1594
1595 vlapic->msr_apicbase = val;
1596 return (VMR_OK);
1597 }
1598
1599 static __inline uint16_t
vlapic_msr_to_regoff(uint32_t msr)1600 vlapic_msr_to_regoff(uint32_t msr)
1601 {
1602 ASSERT3U(msr, >=, MSR_APIC_000);
1603 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100));
1604
1605 return ((msr - MSR_APIC_000) << 4);
1606 }
1607
1608 bool
vlapic_owned_msr(uint32_t msr)1609 vlapic_owned_msr(uint32_t msr)
1610 {
1611 if (msr == MSR_APICBASE) {
1612 return (true);
1613 }
1614 if (msr >= MSR_APIC_000 &&
1615 msr < (MSR_APIC_000 + 0x100)) {
1616 return (true);
1617 }
1618 return (false);
1619 }
1620
1621 vm_msr_result_t
vlapic_rdmsr(struct vlapic * vlapic,uint32_t msr,uint64_t * valp)1622 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp)
1623 {
1624 ASSERT(vlapic_owned_msr(msr));
1625 ASSERT3P(valp, !=, NULL);
1626
1627 if (msr == MSR_APICBASE) {
1628 *valp = vlapic->msr_apicbase;
1629 return (VMR_OK);
1630 }
1631
1632 /* #GP for x2APIC MSR accesses in xAPIC mode */
1633 if (!vlapic_x2mode(vlapic)) {
1634 return (VMR_GP);
1635 }
1636
1637 uint64_t out = 0;
1638 const uint16_t reg = vlapic_msr_to_regoff(msr);
1639 switch (reg) {
1640 case APIC_OFFSET_ICR_LOW: {
1641 /* Read from ICR register gets entire (64-bit) value */
1642 uint32_t low = 0, high = 0;
1643 bool valid;
1644
1645 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high);
1646 VERIFY(valid);
1647 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low);
1648 VERIFY(valid);
1649
1650 *valp = ((uint64_t)high << 32) | low;
1651 return (VMR_OK);
1652 }
1653 case APIC_OFFSET_ICR_HI:
1654 /* Already covered by ICR_LOW */
1655 return (VMR_GP);
1656 default:
1657 break;
1658 }
1659 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) {
1660 return (VMR_GP);
1661 }
1662 *valp = out;
1663 return (VMR_OK);
1664 }
1665
1666 vm_msr_result_t
vlapic_wrmsr(struct vlapic * vlapic,uint32_t msr,uint64_t val)1667 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val)
1668 {
1669 ASSERT(vlapic_owned_msr(msr));
1670
1671 if (msr == MSR_APICBASE) {
1672 return (vlapic_set_apicbase(vlapic, val));
1673 }
1674
1675 /* #GP for x2APIC MSR accesses in xAPIC mode */
1676 if (!vlapic_x2mode(vlapic)) {
1677 return (VMR_GP);
1678 }
1679
1680 const uint16_t reg = vlapic_msr_to_regoff(msr);
1681 switch (reg) {
1682 case APIC_OFFSET_ICR_LOW: {
1683 /* Write to ICR register sets entire (64-bit) value */
1684 bool valid;
1685
1686 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32);
1687 VERIFY(valid);
1688 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val);
1689 VERIFY(valid);
1690 return (VMR_OK);
1691 }
1692 case APIC_OFFSET_ICR_HI:
1693 /* Already covered by ICR_LOW */
1694 return (VMR_GP);
1695 case APIC_OFFSET_ESR:
1696 /* Only 0 may be written from x2APIC mode */
1697 if (val != 0) {
1698 return (VMR_GP);
1699 }
1700 break;
1701 default:
1702 break;
1703 }
1704 if (!vlapic_write(vlapic, reg, val)) {
1705 return (VMR_GP);
1706 }
1707 return (VMR_OK);
1708 }
1709
1710 void
vlapic_set_x2apic_state(struct vm * vm,int vcpuid,enum x2apic_state state)1711 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1712 {
1713 struct vlapic *vlapic;
1714 struct LAPIC *lapic;
1715
1716 vlapic = vm_lapic(vm, vcpuid);
1717
1718 if (state == X2APIC_DISABLED)
1719 vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1720 else
1721 vlapic->msr_apicbase |= APICBASE_X2APIC;
1722
1723 /*
1724 * Reset the local APIC registers whose values are mode-dependent.
1725 *
1726 * XXX this works because the APIC mode can be changed only at vcpu
1727 * initialization time.
1728 */
1729 lapic = vlapic->apic_page;
1730 lapic->id = vlapic_get_id(vlapic);
1731 if (vlapic_x2mode(vlapic)) {
1732 lapic->ldr = x2apic_ldr(vlapic);
1733 lapic->dfr = 0;
1734 } else {
1735 lapic->ldr = 0;
1736 lapic->dfr = 0xffffffff;
1737 }
1738
1739 if (state == X2APIC_ENABLED) {
1740 if (vlapic->ops.enable_x2apic_mode)
1741 (*vlapic->ops.enable_x2apic_mode)(vlapic);
1742 }
1743 }
1744
1745 void
vlapic_deliver_intr(struct vm * vm,bool level,uint32_t dest,bool phys,int delmode,int vec)1746 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1747 int delmode, int vec)
1748 {
1749 bool lowprio;
1750 int vcpuid;
1751 cpuset_t dmask;
1752
1753 if (delmode != IOART_DELFIXED &&
1754 delmode != IOART_DELLOPRI &&
1755 delmode != IOART_DELEXINT) {
1756 /* Invalid delivery mode */
1757 return;
1758 }
1759 lowprio = (delmode == IOART_DELLOPRI);
1760
1761 /*
1762 * We don't provide any virtual interrupt redirection hardware so
1763 * all interrupts originating from the ioapic or MSI specify the
1764 * 'dest' in the legacy xAPIC format.
1765 */
1766 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1767
1768 while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1769 vcpuid--;
1770 CPU_CLR(vcpuid, &dmask);
1771 if (delmode == IOART_DELEXINT) {
1772 (void) vm_inject_extint(vm, vcpuid);
1773 } else {
1774 (void) lapic_set_intr(vm, vcpuid, vec, level);
1775 }
1776 }
1777 }
1778
1779 void
vlapic_post_intr(struct vlapic * vlapic,int hostcpu)1780 vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
1781 {
1782 /*
1783 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1784 *
1785 * This is done by leveraging features like Posted Interrupts (Intel)
1786 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1787 *
1788 * If neither of these features are available then fallback to
1789 * sending an IPI to 'hostcpu'.
1790 */
1791 if (vlapic->ops.post_intr)
1792 (*vlapic->ops.post_intr)(vlapic, hostcpu);
1793 else
1794 poke_cpu(hostcpu);
1795 }
1796
1797 void
vlapic_localize_resources(struct vlapic * vlapic)1798 vlapic_localize_resources(struct vlapic *vlapic)
1799 {
1800 vmm_glue_callout_localize(&vlapic->callout);
1801 }
1802
1803 void
vlapic_pause(struct vlapic * vlapic)1804 vlapic_pause(struct vlapic *vlapic)
1805 {
1806 VLAPIC_TIMER_LOCK(vlapic);
1807 callout_stop(&vlapic->callout);
1808 VLAPIC_TIMER_UNLOCK(vlapic);
1809
1810 }
1811
1812 void
vlapic_resume(struct vlapic * vlapic)1813 vlapic_resume(struct vlapic *vlapic)
1814 {
1815 VLAPIC_TIMER_LOCK(vlapic);
1816 if (vlapic->timer_fire_when != 0) {
1817 vlapic_callout_reset(vlapic);
1818 }
1819 VLAPIC_TIMER_UNLOCK(vlapic);
1820 }
1821
1822 static int
vlapic_data_read(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1823 vlapic_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1824 {
1825 VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1826 VERIFY3U(req->vdr_version, ==, 1);
1827 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1828
1829 struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1830 struct vdi_lapic_v1 *out = req->vdr_data;
1831
1832 VLAPIC_TIMER_LOCK(vlapic);
1833
1834 if (vlapic->ops.sync_state) {
1835 (*vlapic->ops.sync_state)(vlapic);
1836 }
1837
1838 out->vl_msr_apicbase = vlapic->msr_apicbase;
1839 out->vl_esr_pending = vlapic->esr_pending;
1840 if (vlapic->timer_fire_when != 0) {
1841 out->vl_timer_target =
1842 vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
1843 } else {
1844 out->vl_timer_target = 0;
1845 }
1846
1847 const struct LAPIC *lapic = vlapic->apic_page;
1848 struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
1849
1850 /*
1851 * While this might appear, at first glance, to be missing some fields,
1852 * they are intentionally omitted:
1853 * - PPR: its contents are always generated at runtime
1854 * - EOI: write-only, and contents are ignored after handling
1855 * - RRD: (aka RRR) read-only and always 0
1856 * - CCR: calculated from underlying timer data
1857 */
1858 out_page->vlp_id = lapic->id;
1859 out_page->vlp_version = lapic->version;
1860 out_page->vlp_tpr = lapic->tpr;
1861 out_page->vlp_apr = lapic->apr;
1862 out_page->vlp_ldr = lapic->ldr;
1863 out_page->vlp_dfr = lapic->dfr;
1864 out_page->vlp_svr = lapic->svr;
1865 out_page->vlp_esr = lapic->esr;
1866 out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1867 out_page->vlp_icr_timer = lapic->icr_timer;
1868 out_page->vlp_dcr_timer = lapic->dcr_timer;
1869
1870 out_page->vlp_lvt_cmci = lapic->lvt_cmci;
1871 out_page->vlp_lvt_timer = lapic->lvt_timer;
1872 out_page->vlp_lvt_thermal = lapic->lvt_thermal;
1873 out_page->vlp_lvt_pcint = lapic->lvt_pcint;
1874 out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
1875 out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
1876 out_page->vlp_lvt_error = lapic->lvt_error;
1877
1878 const uint32_t *isrptr = &lapic->isr0;
1879 const uint32_t *tmrptr = &lapic->tmr0;
1880 const uint32_t *irrptr = &lapic->irr0;
1881 for (uint_t i = 0; i < 8; i++) {
1882 out_page->vlp_isr[i] = isrptr[i * 4];
1883 out_page->vlp_tmr[i] = tmrptr[i * 4];
1884 out_page->vlp_irr[i] = irrptr[i * 4];
1885 }
1886 VLAPIC_TIMER_UNLOCK(vlapic);
1887
1888 return (0);
1889 }
1890
1891 static uint8_t
popc8(uint8_t val)1892 popc8(uint8_t val)
1893 {
1894 uint8_t cnt;
1895
1896 for (cnt = 0; val != 0; val &= (val - 1)) {
1897 cnt++;
1898 }
1899 return (cnt);
1900 }
1901
1902 /*
1903 * Descriptions for the various failures which can occur when validating
1904 * to-be-written vlapic state.
1905 */
1906 enum vlapic_validation_error {
1907 VVE_OK,
1908 VVE_BAD_ID,
1909 VVE_BAD_VERSION,
1910 VVE_BAD_MSR_BASE,
1911 VVE_BAD_ESR,
1912 VVE_BAD_TPR,
1913 VVE_LOW_VECTOR,
1914 VVE_ISR_PRIORITY,
1915 VVE_TIMER_MISMATCH,
1916 };
1917
1918 static enum vlapic_validation_error
vlapic_data_validate(const struct vlapic * vlapic,const vmm_data_req_t * req)1919 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
1920 {
1921 ASSERT(req->vdr_version == 1 &&
1922 req->vdr_len >= sizeof (struct vdi_lapic_v1));
1923 const struct vdi_lapic_v1 *src = req->vdr_data;
1924
1925 if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
1926 (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
1927 return (VVE_BAD_ESR);
1928 }
1929
1930 /* Use the same restrictions as the wrmsr accessor for now */
1931 const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
1932 APICBASE_BSP;
1933 const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
1934 if ((diff & apicbase_reserved) != 0) {
1935 return (VVE_BAD_MSR_BASE);
1936 }
1937
1938 const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1939 /*
1940 * Demand that ID match for now. This can be further updated when some
1941 * of the x2apic handling is improved.
1942 */
1943 if (page->vlp_id != vlapic_get_id(vlapic)) {
1944 return (VVE_BAD_ID);
1945 }
1946
1947 if (page->vlp_version != vlapic->apic_page->version) {
1948 return (VVE_BAD_VERSION);
1949 }
1950
1951 if (page->vlp_tpr > 0xff) {
1952 return (VVE_BAD_TPR);
1953 }
1954
1955 /* Vectors 0-15 are not expected to be handled by the lapic */
1956 if ((page->vlp_isr[0] & 0xffff) != 0 ||
1957 (page->vlp_irr[0] & 0xffff) != 0 ||
1958 (page->vlp_tmr[0] & 0xffff) != 0) {
1959 return (VVE_LOW_VECTOR);
1960 }
1961
1962 /* Only one interrupt should be in-service for each priority level */
1963 for (uint_t i = 0; i < 8; i++) {
1964 if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
1965 popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
1966 popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
1967 popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
1968 return (VVE_ISR_PRIORITY);
1969 }
1970 }
1971
1972 /* If icr_timer is zero, then a scheduled timer does not make sense */
1973 if (page->vlp_icr_timer == 0 && src->vl_timer_target != 0) {
1974 return (VVE_TIMER_MISMATCH);
1975 }
1976
1977 return (VVE_OK);
1978 }
1979
1980 static int
vlapic_data_write(struct vm * vm,int vcpuid,const vmm_data_req_t * req)1981 vlapic_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
1982 {
1983 VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
1984 VERIFY3U(req->vdr_version, ==, 1);
1985 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1));
1986
1987 struct vlapic *vlapic = vm_lapic(vm, vcpuid);
1988 if (vlapic_data_validate(vlapic, req) != VVE_OK) {
1989 return (EINVAL);
1990 }
1991 const struct vdi_lapic_v1 *src = req->vdr_data;
1992 const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
1993 struct LAPIC *lapic = vlapic->apic_page;
1994
1995 VLAPIC_TIMER_LOCK(vlapic);
1996
1997 /* Already ensured by vlapic_data_validate() */
1998 VERIFY3U(page->vlp_version, ==, lapic->version);
1999
2000 vlapic->msr_apicbase = src->vl_msr_apicbase;
2001 vlapic->esr_pending = src->vl_esr_pending;
2002
2003 lapic->tpr = page->vlp_tpr;
2004 lapic->apr = page->vlp_apr;
2005 lapic->ldr = page->vlp_ldr;
2006 lapic->dfr = page->vlp_dfr;
2007 lapic->svr = page->vlp_svr;
2008 lapic->esr = page->vlp_esr;
2009 lapic->icr_lo = (uint32_t)page->vlp_icr;
2010 lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
2011
2012 lapic->icr_timer = page->vlp_icr_timer;
2013 lapic->dcr_timer = page->vlp_dcr_timer;
2014 vlapic_update_divider(vlapic);
2015
2016 /* cleanse LDR/DFR */
2017 vlapic_ldr_write_handler(vlapic);
2018 vlapic_dfr_write_handler(vlapic);
2019
2020 lapic->lvt_cmci = page->vlp_lvt_cmci;
2021 lapic->lvt_timer = page->vlp_lvt_timer;
2022 lapic->lvt_thermal = page->vlp_lvt_thermal;
2023 lapic->lvt_pcint = page->vlp_lvt_pcint;
2024 lapic->lvt_lint0 = page->vlp_lvt_lint0;
2025 lapic->lvt_lint1 = page->vlp_lvt_lint1;
2026 lapic->lvt_error = page->vlp_lvt_error;
2027 /* cleanse LVTs */
2028 vlapic_refresh_lvts(vlapic);
2029
2030 uint32_t *isrptr = &lapic->isr0;
2031 uint32_t *tmrptr = &lapic->tmr0;
2032 uint32_t *irrptr = &lapic->irr0;
2033 for (uint_t i = 0; i < 8; i++) {
2034 isrptr[i * 4] = page->vlp_isr[i];
2035 tmrptr[i * 4] = page->vlp_tmr[i];
2036 irrptr[i * 4] = page->vlp_irr[i];
2037 }
2038
2039 if (src->vl_timer_target != 0) {
2040 vlapic->timer_fire_when =
2041 vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
2042
2043 /*
2044 * Check to see if timer expiration would result computed CCR
2045 * values in excess of what is configured in ICR/DCR.
2046 */
2047 const hrtime_t now = gethrtime();
2048 if (vlapic->timer_fire_when > now) {
2049 const uint32_t ccr = hrt_freq_count(
2050 vlapic->timer_fire_when - now,
2051 vlapic->timer_cur_freq);
2052
2053 /*
2054 * Until we have a richer event/logging system
2055 * available, just note such an overage as a stat.
2056 */
2057 if (ccr > lapic->icr_timer) {
2058 vlapic->stats.vs_import_timer_overage++;
2059 }
2060 }
2061
2062 if (!vm_is_paused(vlapic->vm)) {
2063 vlapic_callout_reset(vlapic);
2064 }
2065 } else {
2066 vlapic->timer_fire_when = 0;
2067 }
2068
2069 if (vlapic->ops.sync_state) {
2070 (*vlapic->ops.sync_state)(vlapic);
2071 }
2072 VLAPIC_TIMER_UNLOCK(vlapic);
2073
2074 return (0);
2075 }
2076
2077 static const vmm_data_version_entry_t lapic_v1 = {
2078 .vdve_class = VDC_LAPIC,
2079 .vdve_version = 1,
2080 .vdve_len_expect = sizeof (struct vdi_lapic_v1),
2081 .vdve_vcpu_readf = vlapic_data_read,
2082 .vdve_vcpu_writef = vlapic_data_write,
2083 };
2084 VMM_DATA_VERSION(lapic_v1);
2085