1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bhyve_snapshot.h" 36 37 #include <sys/param.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/systm.h> 43 #include <sys/smp.h> 44 45 #include <x86/specialreg.h> 46 #include <x86/apicreg.h> 47 48 #include <machine/clock.h> 49 #include <machine/smp.h> 50 51 #include <machine/vmm.h> 52 #include <machine/vmm_snapshot.h> 53 54 #include "vmm_lapic.h" 55 #include "vmm_ktr.h" 56 #include "vmm_stat.h" 57 58 #include "vlapic.h" 59 #include "vlapic_priv.h" 60 #include "vioapic.h" 61 62 #define PRIO(x) ((x) >> 4) 63 64 #define VLAPIC_VERSION (0x14) 65 66 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 67 68 /* 69 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 70 * vlapic_callout_handler() and vcpu accesses to: 71 * - timer_freq_bt, timer_period_bt, timer_fire_bt 72 * - timer LVT register 73 */ 74 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 75 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 76 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 77 78 /* 79 * APIC timer frequency: 80 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 81 * - power-of-two to avoid loss of precision when converted to a bintime. 82 */ 83 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 84 85 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 86 static void vlapic_callout_handler(void *arg); 87 static void vlapic_reset(struct vlapic *vlapic); 88 89 static __inline uint32_t 90 vlapic_get_id(struct vlapic *vlapic) 91 { 92 93 if (x2apic(vlapic)) 94 return (vlapic->vcpuid); 95 else 96 return (vlapic->vcpuid << 24); 97 } 98 99 static uint32_t 100 x2apic_ldr(struct vlapic *vlapic) 101 { 102 int apicid; 103 uint32_t ldr; 104 105 apicid = vlapic_get_id(vlapic); 106 ldr = 1 << (apicid & 0xf); 107 ldr |= (apicid & 0xffff0) << 12; 108 return (ldr); 109 } 110 111 void 112 vlapic_dfr_write_handler(struct vlapic *vlapic) 113 { 114 struct LAPIC *lapic; 115 116 lapic = vlapic->apic_page; 117 if (x2apic(vlapic)) { 118 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 119 lapic->dfr); 120 lapic->dfr = 0; 121 return; 122 } 123 124 lapic->dfr &= APIC_DFR_MODEL_MASK; 125 lapic->dfr |= APIC_DFR_RESERVED; 126 127 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 128 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 129 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 130 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 131 else 132 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 133 } 134 135 void 136 vlapic_ldr_write_handler(struct vlapic *vlapic) 137 { 138 struct LAPIC *lapic; 139 140 lapic = vlapic->apic_page; 141 142 /* LDR is read-only in x2apic mode */ 143 if (x2apic(vlapic)) { 144 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 145 lapic->ldr); 146 lapic->ldr = x2apic_ldr(vlapic); 147 } else { 148 lapic->ldr &= ~APIC_LDR_RESERVED; 149 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 150 } 151 } 152 153 void 154 vlapic_id_write_handler(struct vlapic *vlapic) 155 { 156 struct LAPIC *lapic; 157 158 /* 159 * We don't allow the ID register to be modified so reset it back to 160 * its default value. 161 */ 162 lapic = vlapic->apic_page; 163 lapic->id = vlapic_get_id(vlapic); 164 } 165 166 static int 167 vlapic_timer_divisor(uint32_t dcr) 168 { 169 switch (dcr & 0xB) { 170 case APIC_TDCR_1: 171 return (1); 172 case APIC_TDCR_2: 173 return (2); 174 case APIC_TDCR_4: 175 return (4); 176 case APIC_TDCR_8: 177 return (8); 178 case APIC_TDCR_16: 179 return (16); 180 case APIC_TDCR_32: 181 return (32); 182 case APIC_TDCR_64: 183 return (64); 184 case APIC_TDCR_128: 185 return (128); 186 default: 187 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 188 } 189 } 190 191 #if 0 192 static inline void 193 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 194 { 195 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 196 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 197 *lvt & APIC_LVTT_M); 198 } 199 #endif 200 201 static uint32_t 202 vlapic_get_ccr(struct vlapic *vlapic) 203 { 204 struct bintime bt_now, bt_rem; 205 struct LAPIC *lapic __diagused; 206 uint32_t ccr; 207 208 ccr = 0; 209 lapic = vlapic->apic_page; 210 211 VLAPIC_TIMER_LOCK(vlapic); 212 if (callout_active(&vlapic->callout)) { 213 /* 214 * If the timer is scheduled to expire in the future then 215 * compute the value of 'ccr' based on the remaining time. 216 */ 217 binuptime(&bt_now); 218 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 219 bt_rem = vlapic->timer_fire_bt; 220 bintime_sub(&bt_rem, &bt_now); 221 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 222 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 223 } 224 } 225 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 226 "icr_timer is %#x", ccr, lapic->icr_timer)); 227 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 228 ccr, lapic->icr_timer); 229 VLAPIC_TIMER_UNLOCK(vlapic); 230 return (ccr); 231 } 232 233 void 234 vlapic_dcr_write_handler(struct vlapic *vlapic) 235 { 236 struct LAPIC *lapic; 237 int divisor; 238 239 lapic = vlapic->apic_page; 240 VLAPIC_TIMER_LOCK(vlapic); 241 242 divisor = vlapic_timer_divisor(lapic->dcr_timer); 243 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 244 lapic->dcr_timer, divisor); 245 246 /* 247 * Update the timer frequency and the timer period. 248 * 249 * XXX changes to the frequency divider will not take effect until 250 * the timer is reloaded. 251 */ 252 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 253 vlapic->timer_period_bt = vlapic->timer_freq_bt; 254 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 255 256 VLAPIC_TIMER_UNLOCK(vlapic); 257 } 258 259 void 260 vlapic_esr_write_handler(struct vlapic *vlapic) 261 { 262 struct LAPIC *lapic; 263 264 lapic = vlapic->apic_page; 265 lapic->esr = vlapic->esr_pending; 266 vlapic->esr_pending = 0; 267 } 268 269 int 270 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 271 { 272 struct LAPIC *lapic; 273 uint32_t *irrptr, *tmrptr, mask; 274 int idx; 275 276 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 277 278 lapic = vlapic->apic_page; 279 if (!(lapic->svr & APIC_SVR_ENABLE)) { 280 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 281 "interrupt %d", vector); 282 return (0); 283 } 284 285 if (vector < 16) { 286 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 287 false); 288 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 289 vector); 290 return (1); 291 } 292 293 if (vlapic->ops.set_intr_ready) 294 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 295 296 idx = (vector / 32) * 4; 297 mask = 1 << (vector % 32); 298 299 irrptr = &lapic->irr0; 300 atomic_set_int(&irrptr[idx], mask); 301 302 /* 303 * Verify that the trigger-mode of the interrupt matches with 304 * the vlapic TMR registers. 305 */ 306 tmrptr = &lapic->tmr0; 307 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 308 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 309 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 310 level ? "level" : "edge"); 311 } 312 313 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 314 return (1); 315 } 316 317 static __inline uint32_t * 318 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 319 { 320 struct LAPIC *lapic = vlapic->apic_page; 321 int i; 322 323 switch (offset) { 324 case APIC_OFFSET_CMCI_LVT: 325 return (&lapic->lvt_cmci); 326 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 327 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 328 return ((&lapic->lvt_timer) + i); 329 default: 330 panic("vlapic_get_lvt: invalid LVT\n"); 331 } 332 } 333 334 static __inline int 335 lvt_off_to_idx(uint32_t offset) 336 { 337 int index; 338 339 switch (offset) { 340 case APIC_OFFSET_CMCI_LVT: 341 index = APIC_LVT_CMCI; 342 break; 343 case APIC_OFFSET_TIMER_LVT: 344 index = APIC_LVT_TIMER; 345 break; 346 case APIC_OFFSET_THERM_LVT: 347 index = APIC_LVT_THERMAL; 348 break; 349 case APIC_OFFSET_PERF_LVT: 350 index = APIC_LVT_PMC; 351 break; 352 case APIC_OFFSET_LINT0_LVT: 353 index = APIC_LVT_LINT0; 354 break; 355 case APIC_OFFSET_LINT1_LVT: 356 index = APIC_LVT_LINT1; 357 break; 358 case APIC_OFFSET_ERROR_LVT: 359 index = APIC_LVT_ERROR; 360 break; 361 default: 362 index = -1; 363 break; 364 } 365 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 366 "invalid lvt index %d for offset %#x", index, offset)); 367 368 return (index); 369 } 370 371 static __inline uint32_t 372 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 373 { 374 int idx; 375 uint32_t val; 376 377 idx = lvt_off_to_idx(offset); 378 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 379 return (val); 380 } 381 382 void 383 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 384 { 385 uint32_t *lvtptr, mask, val; 386 struct LAPIC *lapic; 387 int idx; 388 389 lapic = vlapic->apic_page; 390 lvtptr = vlapic_get_lvtptr(vlapic, offset); 391 val = *lvtptr; 392 idx = lvt_off_to_idx(offset); 393 394 if (!(lapic->svr & APIC_SVR_ENABLE)) 395 val |= APIC_LVT_M; 396 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 397 switch (offset) { 398 case APIC_OFFSET_TIMER_LVT: 399 mask |= APIC_LVTT_TM; 400 break; 401 case APIC_OFFSET_ERROR_LVT: 402 break; 403 case APIC_OFFSET_LINT0_LVT: 404 case APIC_OFFSET_LINT1_LVT: 405 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 406 /* FALLTHROUGH */ 407 default: 408 mask |= APIC_LVT_DM; 409 break; 410 } 411 val &= mask; 412 *lvtptr = val; 413 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 414 } 415 416 static void 417 vlapic_mask_lvts(struct vlapic *vlapic) 418 { 419 struct LAPIC *lapic = vlapic->apic_page; 420 421 lapic->lvt_cmci |= APIC_LVT_M; 422 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 423 424 lapic->lvt_timer |= APIC_LVT_M; 425 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 426 427 lapic->lvt_thermal |= APIC_LVT_M; 428 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 429 430 lapic->lvt_pcint |= APIC_LVT_M; 431 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 432 433 lapic->lvt_lint0 |= APIC_LVT_M; 434 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 435 436 lapic->lvt_lint1 |= APIC_LVT_M; 437 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 438 439 lapic->lvt_error |= APIC_LVT_M; 440 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 441 } 442 443 static int 444 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 445 { 446 uint32_t mode, reg, vec; 447 448 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 449 450 if (reg & APIC_LVT_M) 451 return (0); 452 vec = reg & APIC_LVT_VECTOR; 453 mode = reg & APIC_LVT_DM; 454 455 switch (mode) { 456 case APIC_LVT_DM_FIXED: 457 if (vec < 16) { 458 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 459 lvt == APIC_LVT_ERROR); 460 return (0); 461 } 462 if (vlapic_set_intr_ready(vlapic, vec, false)) 463 vcpu_notify_event(vlapic->vcpu, true); 464 break; 465 case APIC_LVT_DM_NMI: 466 vm_inject_nmi(vlapic->vcpu); 467 break; 468 case APIC_LVT_DM_EXTINT: 469 vm_inject_extint(vlapic->vcpu); 470 break; 471 default: 472 // Other modes ignored 473 return (0); 474 } 475 return (1); 476 } 477 478 #if 1 479 static void 480 dump_isrvec_stk(struct vlapic *vlapic) 481 { 482 int i; 483 uint32_t *isrptr; 484 485 isrptr = &vlapic->apic_page->isr0; 486 for (i = 0; i < 8; i++) 487 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 488 489 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 490 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 491 } 492 #endif 493 494 /* 495 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 496 * in Intel Architecture Manual Vol 3a. 497 */ 498 static void 499 vlapic_update_ppr(struct vlapic *vlapic) 500 { 501 int isrvec, tpr, ppr; 502 503 /* 504 * Note that the value on the stack at index 0 is always 0. 505 * 506 * This is a placeholder for the value of ISRV when none of the 507 * bits is set in the ISRx registers. 508 */ 509 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 510 tpr = vlapic->apic_page->tpr; 511 512 #if 1 513 { 514 int i, lastprio, curprio, vector, idx; 515 uint32_t *isrptr; 516 517 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 518 panic("isrvec_stk is corrupted: %d", isrvec); 519 520 /* 521 * Make sure that the priority of the nested interrupts is 522 * always increasing. 523 */ 524 lastprio = -1; 525 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 526 curprio = PRIO(vlapic->isrvec_stk[i]); 527 if (curprio <= lastprio) { 528 dump_isrvec_stk(vlapic); 529 panic("isrvec_stk does not satisfy invariant"); 530 } 531 lastprio = curprio; 532 } 533 534 /* 535 * Make sure that each bit set in the ISRx registers has a 536 * corresponding entry on the isrvec stack. 537 */ 538 i = 1; 539 isrptr = &vlapic->apic_page->isr0; 540 for (vector = 0; vector < 256; vector++) { 541 idx = (vector / 32) * 4; 542 if (isrptr[idx] & (1 << (vector % 32))) { 543 if (i > vlapic->isrvec_stk_top || 544 vlapic->isrvec_stk[i] != vector) { 545 dump_isrvec_stk(vlapic); 546 panic("ISR and isrvec_stk out of sync"); 547 } 548 i++; 549 } 550 } 551 } 552 #endif 553 554 if (PRIO(tpr) >= PRIO(isrvec)) 555 ppr = tpr; 556 else 557 ppr = isrvec & 0xf0; 558 559 vlapic->apic_page->ppr = ppr; 560 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 561 } 562 563 void 564 vlapic_sync_tpr(struct vlapic *vlapic) 565 { 566 vlapic_update_ppr(vlapic); 567 } 568 569 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 570 571 static void 572 vlapic_process_eoi(struct vlapic *vlapic) 573 { 574 struct LAPIC *lapic = vlapic->apic_page; 575 uint32_t *isrptr, *tmrptr; 576 int i, idx, bitpos, vector; 577 578 isrptr = &lapic->isr0; 579 tmrptr = &lapic->tmr0; 580 581 for (i = 7; i >= 0; i--) { 582 idx = i * 4; 583 bitpos = fls(isrptr[idx]); 584 if (bitpos-- != 0) { 585 if (vlapic->isrvec_stk_top <= 0) { 586 panic("invalid vlapic isrvec_stk_top %d", 587 vlapic->isrvec_stk_top); 588 } 589 isrptr[idx] &= ~(1 << bitpos); 590 vector = i * 32 + bitpos; 591 VLAPIC_CTR1(vlapic, "EOI vector %d", vector); 592 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 593 vlapic->isrvec_stk_top--; 594 vlapic_update_ppr(vlapic); 595 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 596 vioapic_process_eoi(vlapic->vm, vector); 597 } 598 return; 599 } 600 } 601 VLAPIC_CTR0(vlapic, "Gratuitous EOI"); 602 vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1); 603 } 604 605 static __inline int 606 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 607 { 608 609 return (lvt & mask); 610 } 611 612 static __inline int 613 vlapic_periodic_timer(struct vlapic *vlapic) 614 { 615 uint32_t lvt; 616 617 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 618 619 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 620 } 621 622 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 623 624 static void 625 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 626 { 627 628 vlapic->esr_pending |= mask; 629 630 /* 631 * Avoid infinite recursion if the error LVT itself is configured with 632 * an illegal vector. 633 */ 634 if (lvt_error) 635 return; 636 637 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 638 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1); 639 } 640 } 641 642 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 643 644 static void 645 vlapic_fire_timer(struct vlapic *vlapic) 646 { 647 648 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 649 650 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 651 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 652 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1); 653 } 654 } 655 656 static VMM_STAT(VLAPIC_INTR_CMC, 657 "corrected machine check interrupts generated by vlapic"); 658 659 void 660 vlapic_fire_cmci(struct vlapic *vlapic) 661 { 662 663 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 664 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1); 665 } 666 } 667 668 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 669 "lvts triggered"); 670 671 int 672 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 673 { 674 675 if (vlapic_enabled(vlapic) == false) { 676 /* 677 * When the local APIC is global/hardware disabled, 678 * LINT[1:0] pins are configured as INTR and NMI pins, 679 * respectively. 680 */ 681 switch (vector) { 682 case APIC_LVT_LINT0: 683 vm_inject_extint(vlapic->vcpu); 684 break; 685 case APIC_LVT_LINT1: 686 vm_inject_nmi(vlapic->vcpu); 687 break; 688 default: 689 break; 690 } 691 return (0); 692 } 693 694 switch (vector) { 695 case APIC_LVT_LINT0: 696 case APIC_LVT_LINT1: 697 case APIC_LVT_TIMER: 698 case APIC_LVT_ERROR: 699 case APIC_LVT_PMC: 700 case APIC_LVT_THERMAL: 701 case APIC_LVT_CMCI: 702 if (vlapic_fire_lvt(vlapic, vector)) { 703 vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED, 704 vector, 1); 705 } 706 break; 707 default: 708 return (EINVAL); 709 } 710 return (0); 711 } 712 713 static void 714 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 715 { 716 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 717 vlapic_callout_handler, vlapic, 0); 718 } 719 720 static void 721 vlapic_callout_handler(void *arg) 722 { 723 struct vlapic *vlapic; 724 struct bintime bt, btnow; 725 sbintime_t rem_sbt; 726 727 vlapic = arg; 728 729 VLAPIC_TIMER_LOCK(vlapic); 730 if (callout_pending(&vlapic->callout)) /* callout was reset */ 731 goto done; 732 733 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 734 goto done; 735 736 callout_deactivate(&vlapic->callout); 737 738 vlapic_fire_timer(vlapic); 739 740 if (vlapic_periodic_timer(vlapic)) { 741 binuptime(&btnow); 742 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 743 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 744 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 745 vlapic->timer_fire_bt.frac)); 746 747 /* 748 * Compute the delta between when the timer was supposed to 749 * fire and the present time. 750 */ 751 bt = btnow; 752 bintime_sub(&bt, &vlapic->timer_fire_bt); 753 754 rem_sbt = bttosbt(vlapic->timer_period_bt); 755 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 756 /* 757 * Adjust the time until the next countdown downward 758 * to account for the lost time. 759 */ 760 rem_sbt -= bttosbt(bt); 761 } else { 762 /* 763 * If the delta is greater than the timer period then 764 * just reset our time base instead of trying to catch 765 * up. 766 */ 767 vlapic->timer_fire_bt = btnow; 768 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 769 "usecs, period is %lu usecs - resetting time base", 770 bttosbt(bt) / SBT_1US, 771 bttosbt(vlapic->timer_period_bt) / SBT_1US); 772 } 773 774 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 775 vlapic_callout_reset(vlapic, rem_sbt); 776 } 777 done: 778 VLAPIC_TIMER_UNLOCK(vlapic); 779 } 780 781 void 782 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 783 { 784 struct LAPIC *lapic; 785 sbintime_t sbt; 786 uint32_t icr_timer; 787 788 VLAPIC_TIMER_LOCK(vlapic); 789 790 lapic = vlapic->apic_page; 791 icr_timer = lapic->icr_timer; 792 793 vlapic->timer_period_bt = vlapic->timer_freq_bt; 794 bintime_mul(&vlapic->timer_period_bt, icr_timer); 795 796 if (icr_timer != 0) { 797 binuptime(&vlapic->timer_fire_bt); 798 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 799 800 sbt = bttosbt(vlapic->timer_period_bt); 801 vlapic_callout_reset(vlapic, sbt); 802 } else 803 callout_stop(&vlapic->callout); 804 805 VLAPIC_TIMER_UNLOCK(vlapic); 806 } 807 808 /* 809 * This function populates 'dmask' with the set of vcpus that match the 810 * addressing specified by the (dest, phys, lowprio) tuple. 811 * 812 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 813 * or xAPIC (8-bit) destination field. 814 */ 815 static void 816 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 817 bool lowprio, bool x2apic_dest) 818 { 819 struct vlapic *vlapic; 820 uint32_t dfr, ldr, ldest, cluster; 821 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 822 cpuset_t amask; 823 int vcpuid; 824 825 if ((x2apic_dest && dest == 0xffffffff) || 826 (!x2apic_dest && dest == 0xff)) { 827 /* 828 * Broadcast in both logical and physical modes. 829 */ 830 *dmask = vm_active_cpus(vm); 831 return; 832 } 833 834 if (phys) { 835 /* 836 * Physical mode: destination is APIC ID. 837 */ 838 CPU_ZERO(dmask); 839 vcpuid = vm_apicid2vcpuid(vm, dest); 840 amask = vm_active_cpus(vm); 841 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 842 CPU_SET(vcpuid, dmask); 843 } else { 844 /* 845 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 846 * bitmask. This model is only available in the xAPIC mode. 847 */ 848 mda_flat_ldest = dest & 0xff; 849 850 /* 851 * In the "Cluster Model" the MDA is used to identify a 852 * specific cluster and a set of APICs in that cluster. 853 */ 854 if (x2apic_dest) { 855 mda_cluster_id = dest >> 16; 856 mda_cluster_ldest = dest & 0xffff; 857 } else { 858 mda_cluster_id = (dest >> 4) & 0xf; 859 mda_cluster_ldest = dest & 0xf; 860 } 861 862 /* 863 * Logical mode: match each APIC that has a bit set 864 * in its LDR that matches a bit in the ldest. 865 */ 866 CPU_ZERO(dmask); 867 amask = vm_active_cpus(vm); 868 CPU_FOREACH_ISSET(vcpuid, &amask) { 869 vlapic = vm_lapic(vm_vcpu(vm, vcpuid)); 870 dfr = vlapic->apic_page->dfr; 871 ldr = vlapic->apic_page->ldr; 872 873 if ((dfr & APIC_DFR_MODEL_MASK) == 874 APIC_DFR_MODEL_FLAT) { 875 ldest = ldr >> 24; 876 mda_ldest = mda_flat_ldest; 877 } else if ((dfr & APIC_DFR_MODEL_MASK) == 878 APIC_DFR_MODEL_CLUSTER) { 879 if (x2apic(vlapic)) { 880 cluster = ldr >> 16; 881 ldest = ldr & 0xffff; 882 } else { 883 cluster = ldr >> 28; 884 ldest = (ldr >> 24) & 0xf; 885 } 886 if (cluster != mda_cluster_id) 887 continue; 888 mda_ldest = mda_cluster_ldest; 889 } else { 890 /* 891 * Guest has configured a bad logical 892 * model for this vcpu - skip it. 893 */ 894 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 895 "model %x - cannot deliver interrupt", dfr); 896 continue; 897 } 898 899 if ((mda_ldest & ldest) != 0) { 900 CPU_SET(vcpuid, dmask); 901 if (lowprio) 902 break; 903 } 904 } 905 } 906 } 907 908 static VMM_STAT_ARRAY(IPIS_SENT, VMM_STAT_NELEMS_VCPU, "ipis sent to vcpu"); 909 910 static void 911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 912 { 913 struct LAPIC *lapic = vlapic->apic_page; 914 915 if (lapic->tpr != val) { 916 VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x", 917 lapic->tpr, val); 918 lapic->tpr = val; 919 vlapic_update_ppr(vlapic); 920 } 921 } 922 923 static uint8_t 924 vlapic_get_tpr(struct vlapic *vlapic) 925 { 926 struct LAPIC *lapic = vlapic->apic_page; 927 928 return (lapic->tpr); 929 } 930 931 void 932 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 933 { 934 uint8_t tpr; 935 936 if (val & ~0xf) { 937 vm_inject_gp(vlapic->vcpu); 938 return; 939 } 940 941 tpr = val << 4; 942 vlapic_set_tpr(vlapic, tpr); 943 } 944 945 uint64_t 946 vlapic_get_cr8(struct vlapic *vlapic) 947 { 948 uint8_t tpr; 949 950 tpr = vlapic_get_tpr(vlapic); 951 return (tpr >> 4); 952 } 953 954 static bool 955 vlapic_is_icr_valid(uint64_t icrval) 956 { 957 uint32_t mode = icrval & APIC_DELMODE_MASK; 958 uint32_t level = icrval & APIC_LEVEL_MASK; 959 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 960 uint32_t shorthand = icrval & APIC_DEST_MASK; 961 962 switch (mode) { 963 case APIC_DELMODE_FIXED: 964 if (trigger == APIC_TRIGMOD_EDGE) 965 return (true); 966 /* 967 * AMD allows a level assert IPI and Intel converts a level 968 * assert IPI into an edge IPI. 969 */ 970 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 971 return (true); 972 break; 973 case APIC_DELMODE_LOWPRIO: 974 case APIC_DELMODE_SMI: 975 case APIC_DELMODE_NMI: 976 case APIC_DELMODE_INIT: 977 if (trigger == APIC_TRIGMOD_EDGE && 978 (shorthand == APIC_DEST_DESTFLD || 979 shorthand == APIC_DEST_ALLESELF)) 980 return (true); 981 /* 982 * AMD allows a level assert IPI and Intel converts a level 983 * assert IPI into an edge IPI. 984 */ 985 if (trigger == APIC_TRIGMOD_LEVEL && 986 level == APIC_LEVEL_ASSERT && 987 (shorthand == APIC_DEST_DESTFLD || 988 shorthand == APIC_DEST_ALLESELF)) 989 return (true); 990 /* 991 * An level triggered deassert INIT is defined in the Intel 992 * Multiprocessor Specification and the Intel Software Developer 993 * Manual. Due to the MPS it's required to send a level assert 994 * INIT to a cpu and then a level deassert INIT. Some operating 995 * systems e.g. FreeBSD or Linux use that algorithm. According 996 * to the SDM a level deassert INIT is only supported by Pentium 997 * and P6 processors. It's always send to all cpus regardless of 998 * the destination or shorthand field. It resets the arbitration 999 * id register. This register is not software accessible and 1000 * only required for the APIC bus arbitration. So, the level 1001 * deassert INIT doesn't need any emulation and we should ignore 1002 * it. The SDM also defines that newer processors don't support 1003 * the level deassert INIT and it's not valid any more. As it's 1004 * defined for older systems, it can't be invalid per se. 1005 * Otherwise, backward compatibility would be broken. However, 1006 * when returning false here, it'll be ignored which is the 1007 * desired behaviour. 1008 */ 1009 if (mode == APIC_DELMODE_INIT && 1010 trigger == APIC_TRIGMOD_LEVEL && 1011 level == APIC_LEVEL_DEASSERT) 1012 return (false); 1013 break; 1014 case APIC_DELMODE_STARTUP: 1015 if (shorthand == APIC_DEST_DESTFLD || 1016 shorthand == APIC_DEST_ALLESELF) 1017 return (true); 1018 break; 1019 case APIC_DELMODE_RR: 1020 /* Only available on AMD! */ 1021 if (trigger == APIC_TRIGMOD_EDGE && 1022 shorthand == APIC_DEST_DESTFLD) 1023 return (true); 1024 break; 1025 case APIC_DELMODE_RESV: 1026 return (false); 1027 default: 1028 __assert_unreachable(); 1029 } 1030 1031 return (false); 1032 } 1033 1034 int 1035 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 1036 { 1037 int i; 1038 bool phys; 1039 cpuset_t dmask, ipimask; 1040 uint64_t icrval; 1041 uint32_t dest, vec, mode, shorthand; 1042 struct vcpu *vcpu; 1043 struct vm_exit *vmexit; 1044 struct LAPIC *lapic; 1045 1046 lapic = vlapic->apic_page; 1047 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1048 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1049 1050 if (x2apic(vlapic)) 1051 dest = icrval >> 32; 1052 else 1053 dest = icrval >> (32 + 24); 1054 vec = icrval & APIC_VECTOR_MASK; 1055 mode = icrval & APIC_DELMODE_MASK; 1056 phys = (icrval & APIC_DESTMODE_LOG) == 0; 1057 shorthand = icrval & APIC_DEST_MASK; 1058 1059 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 1060 1061 switch (shorthand) { 1062 case APIC_DEST_DESTFLD: 1063 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); 1064 break; 1065 case APIC_DEST_SELF: 1066 CPU_SETOF(vlapic->vcpuid, &dmask); 1067 break; 1068 case APIC_DEST_ALLISELF: 1069 dmask = vm_active_cpus(vlapic->vm); 1070 break; 1071 case APIC_DEST_ALLESELF: 1072 dmask = vm_active_cpus(vlapic->vm); 1073 CPU_CLR(vlapic->vcpuid, &dmask); 1074 break; 1075 default: 1076 __assert_unreachable(); 1077 } 1078 1079 /* 1080 * Ignore invalid combinations of the icr. 1081 */ 1082 if (!vlapic_is_icr_valid(icrval)) { 1083 VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); 1084 return (0); 1085 } 1086 1087 /* 1088 * ipimask is a set of vCPUs needing userland handling of the current 1089 * IPI. 1090 */ 1091 CPU_ZERO(&ipimask); 1092 1093 switch (mode) { 1094 case APIC_DELMODE_FIXED: 1095 if (vec < 16) { 1096 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 1097 false); 1098 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 1099 return (0); 1100 } 1101 1102 CPU_FOREACH_ISSET(i, &dmask) { 1103 vcpu = vm_vcpu(vlapic->vm, i); 1104 lapic_intr_edge(vcpu, vec); 1105 vmm_stat_array_incr(vlapic->vcpu, IPIS_SENT, i, 1); 1106 VLAPIC_CTR2(vlapic, 1107 "vlapic sending ipi %d to vcpuid %d", vec, i); 1108 } 1109 1110 break; 1111 case APIC_DELMODE_NMI: 1112 CPU_FOREACH_ISSET(i, &dmask) { 1113 vcpu = vm_vcpu(vlapic->vm, i); 1114 vm_inject_nmi(vcpu); 1115 VLAPIC_CTR1(vlapic, 1116 "vlapic sending ipi nmi to vcpuid %d", i); 1117 } 1118 1119 break; 1120 case APIC_DELMODE_INIT: 1121 if (!vlapic->ipi_exit) { 1122 if (!phys) 1123 break; 1124 1125 i = vm_apicid2vcpuid(vlapic->vm, dest); 1126 if (i >= vm_get_maxcpus(vlapic->vm) || 1127 i == vlapic->vcpuid) 1128 break; 1129 1130 CPU_SETOF(i, &ipimask); 1131 1132 break; 1133 } 1134 1135 CPU_COPY(&dmask, &ipimask); 1136 break; 1137 case APIC_DELMODE_STARTUP: 1138 if (!vlapic->ipi_exit) { 1139 if (!phys) 1140 break; 1141 1142 i = vm_apicid2vcpuid(vlapic->vm, dest); 1143 if (i >= vm_get_maxcpus(vlapic->vm) || 1144 i == vlapic->vcpuid) 1145 break; 1146 1147 CPU_SETOF(i, &ipimask); 1148 1149 break; 1150 } 1151 1152 CPU_COPY(&dmask, &ipimask); 1153 break; 1154 default: 1155 return (1); 1156 } 1157 1158 if (!CPU_EMPTY(&ipimask)) { 1159 vmexit = vm_exitinfo(vlapic->vcpu); 1160 vmexit->exitcode = VM_EXITCODE_IPI; 1161 vmexit->u.ipi.mode = mode; 1162 vmexit->u.ipi.vector = vec; 1163 vmexit->u.ipi.dmask = dmask; 1164 1165 *retu = true; 1166 } 1167 1168 return (0); 1169 } 1170 1171 static void 1172 vlapic_handle_init(struct vcpu *vcpu, void *arg) 1173 { 1174 struct vlapic *vlapic = vm_lapic(vcpu); 1175 1176 vlapic_reset(vlapic); 1177 } 1178 1179 int 1180 vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1181 { 1182 struct vlapic *vlapic = vm_lapic(vcpu); 1183 cpuset_t *dmask = &vme->u.ipi.dmask; 1184 uint8_t vec = vme->u.ipi.vector; 1185 1186 *retu = true; 1187 switch (vme->u.ipi.mode) { 1188 case APIC_DELMODE_INIT: 1189 vm_smp_rendezvous(vcpu, *dmask, vlapic_handle_init, 1190 NULL); 1191 vm_await_start(vcpu_vm(vcpu), dmask); 1192 1193 if (!vlapic->ipi_exit) { 1194 *retu = false; 1195 } 1196 1197 break; 1198 case APIC_DELMODE_STARTUP: 1199 /* 1200 * Ignore SIPIs in any state other than wait-for-SIPI 1201 */ 1202 *dmask = vm_start_cpus(vcpu_vm(vcpu), dmask); 1203 1204 if (CPU_EMPTY(dmask)) { 1205 *retu = false; 1206 break; 1207 } 1208 1209 /* 1210 * Old bhyve versions don't support the IPI 1211 * exit. Translate it into the old style. 1212 */ 1213 if (!vlapic->ipi_exit) { 1214 vme->exitcode = VM_EXITCODE_SPINUP_AP; 1215 vme->u.spinup_ap.vcpu = CPU_FFS(dmask); 1216 vme->u.spinup_ap.rip = vec << PAGE_SHIFT; 1217 } 1218 1219 break; 1220 default: 1221 return (1); 1222 } 1223 1224 return (0); 1225 } 1226 1227 void 1228 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1229 { 1230 int vec; 1231 1232 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1233 1234 vec = val & 0xff; 1235 lapic_intr_edge(vlapic->vcpu, vec); 1236 vmm_stat_array_incr(vlapic->vcpu, IPIS_SENT, vlapic->vcpuid, 1); 1237 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1238 } 1239 1240 int 1241 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1242 { 1243 struct LAPIC *lapic = vlapic->apic_page; 1244 int idx, i, bitpos, vector; 1245 uint32_t *irrptr, val; 1246 1247 vlapic_update_ppr(vlapic); 1248 1249 if (vlapic->ops.pending_intr) 1250 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1251 1252 irrptr = &lapic->irr0; 1253 1254 for (i = 7; i >= 0; i--) { 1255 idx = i * 4; 1256 val = atomic_load_acq_int(&irrptr[idx]); 1257 bitpos = fls(val); 1258 if (bitpos != 0) { 1259 vector = i * 32 + (bitpos - 1); 1260 if (PRIO(vector) > PRIO(lapic->ppr)) { 1261 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1262 if (vecptr != NULL) 1263 *vecptr = vector; 1264 return (1); 1265 } else 1266 break; 1267 } 1268 } 1269 return (0); 1270 } 1271 1272 void 1273 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1274 { 1275 struct LAPIC *lapic = vlapic->apic_page; 1276 uint32_t *irrptr, *isrptr; 1277 int idx, stk_top; 1278 1279 if (vlapic->ops.intr_accepted) 1280 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1281 1282 /* 1283 * clear the ready bit for vector being accepted in irr 1284 * and set the vector as in service in isr. 1285 */ 1286 idx = (vector / 32) * 4; 1287 1288 irrptr = &lapic->irr0; 1289 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1290 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1291 1292 isrptr = &lapic->isr0; 1293 isrptr[idx] |= 1 << (vector % 32); 1294 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1295 1296 /* 1297 * Update the PPR 1298 */ 1299 vlapic->isrvec_stk_top++; 1300 1301 stk_top = vlapic->isrvec_stk_top; 1302 if (stk_top >= ISRVEC_STK_SIZE) 1303 panic("isrvec_stk_top overflow %d", stk_top); 1304 1305 vlapic->isrvec_stk[stk_top] = vector; 1306 } 1307 1308 void 1309 vlapic_svr_write_handler(struct vlapic *vlapic) 1310 { 1311 struct LAPIC *lapic; 1312 uint32_t old, new, changed; 1313 1314 lapic = vlapic->apic_page; 1315 1316 new = lapic->svr; 1317 old = vlapic->svr_last; 1318 vlapic->svr_last = new; 1319 1320 changed = old ^ new; 1321 if ((changed & APIC_SVR_ENABLE) != 0) { 1322 if ((new & APIC_SVR_ENABLE) == 0) { 1323 /* 1324 * The apic is now disabled so stop the apic timer 1325 * and mask all the LVT entries. 1326 */ 1327 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1328 VLAPIC_TIMER_LOCK(vlapic); 1329 callout_stop(&vlapic->callout); 1330 VLAPIC_TIMER_UNLOCK(vlapic); 1331 vlapic_mask_lvts(vlapic); 1332 } else { 1333 /* 1334 * The apic is now enabled so restart the apic timer 1335 * if it is configured in periodic mode. 1336 */ 1337 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1338 if (vlapic_periodic_timer(vlapic)) 1339 vlapic_icrtmr_write_handler(vlapic); 1340 } 1341 } 1342 } 1343 1344 int 1345 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1346 uint64_t *data, bool *retu) 1347 { 1348 struct LAPIC *lapic = vlapic->apic_page; 1349 uint32_t *reg; 1350 int i; 1351 1352 /* Ignore MMIO accesses in x2APIC mode */ 1353 if (x2apic(vlapic) && mmio_access) { 1354 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1355 offset); 1356 *data = 0; 1357 goto done; 1358 } 1359 1360 if (!x2apic(vlapic) && !mmio_access) { 1361 /* 1362 * XXX Generate GP fault for MSR accesses in xAPIC mode 1363 */ 1364 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1365 "xAPIC mode", offset); 1366 *data = 0; 1367 goto done; 1368 } 1369 1370 if (offset > sizeof(*lapic)) { 1371 *data = 0; 1372 goto done; 1373 } 1374 1375 offset &= ~3; 1376 switch(offset) 1377 { 1378 case APIC_OFFSET_ID: 1379 *data = lapic->id; 1380 break; 1381 case APIC_OFFSET_VER: 1382 *data = lapic->version; 1383 break; 1384 case APIC_OFFSET_TPR: 1385 *data = vlapic_get_tpr(vlapic); 1386 break; 1387 case APIC_OFFSET_APR: 1388 *data = lapic->apr; 1389 break; 1390 case APIC_OFFSET_PPR: 1391 *data = lapic->ppr; 1392 break; 1393 case APIC_OFFSET_EOI: 1394 *data = lapic->eoi; 1395 break; 1396 case APIC_OFFSET_LDR: 1397 *data = lapic->ldr; 1398 break; 1399 case APIC_OFFSET_DFR: 1400 *data = lapic->dfr; 1401 break; 1402 case APIC_OFFSET_SVR: 1403 *data = lapic->svr; 1404 break; 1405 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1406 i = (offset - APIC_OFFSET_ISR0) >> 2; 1407 reg = &lapic->isr0; 1408 *data = *(reg + i); 1409 break; 1410 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1411 i = (offset - APIC_OFFSET_TMR0) >> 2; 1412 reg = &lapic->tmr0; 1413 *data = *(reg + i); 1414 break; 1415 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1416 i = (offset - APIC_OFFSET_IRR0) >> 2; 1417 reg = &lapic->irr0; 1418 *data = atomic_load_acq_int(reg + i); 1419 break; 1420 case APIC_OFFSET_ESR: 1421 *data = lapic->esr; 1422 break; 1423 case APIC_OFFSET_ICR_LOW: 1424 *data = lapic->icr_lo; 1425 if (x2apic(vlapic)) 1426 *data |= (uint64_t)lapic->icr_hi << 32; 1427 break; 1428 case APIC_OFFSET_ICR_HI: 1429 *data = lapic->icr_hi; 1430 break; 1431 case APIC_OFFSET_CMCI_LVT: 1432 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1433 *data = vlapic_get_lvt(vlapic, offset); 1434 #ifdef INVARIANTS 1435 reg = vlapic_get_lvtptr(vlapic, offset); 1436 KASSERT(*data == *reg, ("inconsistent lvt value at " 1437 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1438 #endif 1439 break; 1440 case APIC_OFFSET_TIMER_ICR: 1441 *data = lapic->icr_timer; 1442 break; 1443 case APIC_OFFSET_TIMER_CCR: 1444 *data = vlapic_get_ccr(vlapic); 1445 break; 1446 case APIC_OFFSET_TIMER_DCR: 1447 *data = lapic->dcr_timer; 1448 break; 1449 case APIC_OFFSET_SELF_IPI: 1450 /* 1451 * XXX generate a GP fault if vlapic is in x2apic mode 1452 */ 1453 *data = 0; 1454 break; 1455 case APIC_OFFSET_RRR: 1456 default: 1457 *data = 0; 1458 break; 1459 } 1460 done: 1461 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1462 return 0; 1463 } 1464 1465 int 1466 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1467 uint64_t data, bool *retu) 1468 { 1469 struct LAPIC *lapic = vlapic->apic_page; 1470 uint32_t *regptr; 1471 int retval; 1472 1473 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1474 ("vlapic_write: invalid offset %#lx", offset)); 1475 1476 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1477 offset, data); 1478 1479 if (offset > sizeof(*lapic)) 1480 return (0); 1481 1482 /* Ignore MMIO accesses in x2APIC mode */ 1483 if (x2apic(vlapic) && mmio_access) { 1484 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1485 "in x2APIC mode", data, offset); 1486 return (0); 1487 } 1488 1489 /* 1490 * XXX Generate GP fault for MSR accesses in xAPIC mode 1491 */ 1492 if (!x2apic(vlapic) && !mmio_access) { 1493 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1494 "in xAPIC mode", data, offset); 1495 return (0); 1496 } 1497 1498 retval = 0; 1499 switch(offset) 1500 { 1501 case APIC_OFFSET_ID: 1502 lapic->id = data; 1503 vlapic_id_write_handler(vlapic); 1504 break; 1505 case APIC_OFFSET_TPR: 1506 vlapic_set_tpr(vlapic, data & 0xff); 1507 break; 1508 case APIC_OFFSET_EOI: 1509 vlapic_process_eoi(vlapic); 1510 break; 1511 case APIC_OFFSET_LDR: 1512 lapic->ldr = data; 1513 vlapic_ldr_write_handler(vlapic); 1514 break; 1515 case APIC_OFFSET_DFR: 1516 lapic->dfr = data; 1517 vlapic_dfr_write_handler(vlapic); 1518 break; 1519 case APIC_OFFSET_SVR: 1520 lapic->svr = data; 1521 vlapic_svr_write_handler(vlapic); 1522 break; 1523 case APIC_OFFSET_ICR_LOW: 1524 lapic->icr_lo = data; 1525 if (x2apic(vlapic)) 1526 lapic->icr_hi = data >> 32; 1527 retval = vlapic_icrlo_write_handler(vlapic, retu); 1528 break; 1529 case APIC_OFFSET_ICR_HI: 1530 lapic->icr_hi = data; 1531 break; 1532 case APIC_OFFSET_CMCI_LVT: 1533 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1534 regptr = vlapic_get_lvtptr(vlapic, offset); 1535 *regptr = data; 1536 vlapic_lvt_write_handler(vlapic, offset); 1537 break; 1538 case APIC_OFFSET_TIMER_ICR: 1539 lapic->icr_timer = data; 1540 vlapic_icrtmr_write_handler(vlapic); 1541 break; 1542 1543 case APIC_OFFSET_TIMER_DCR: 1544 lapic->dcr_timer = data; 1545 vlapic_dcr_write_handler(vlapic); 1546 break; 1547 1548 case APIC_OFFSET_ESR: 1549 vlapic_esr_write_handler(vlapic); 1550 break; 1551 1552 case APIC_OFFSET_SELF_IPI: 1553 if (x2apic(vlapic)) 1554 vlapic_self_ipi_handler(vlapic, data); 1555 break; 1556 1557 case APIC_OFFSET_VER: 1558 case APIC_OFFSET_APR: 1559 case APIC_OFFSET_PPR: 1560 case APIC_OFFSET_RRR: 1561 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1562 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1563 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1564 case APIC_OFFSET_TIMER_CCR: 1565 default: 1566 // Read only. 1567 break; 1568 } 1569 1570 return (retval); 1571 } 1572 1573 static void 1574 vlapic_reset(struct vlapic *vlapic) 1575 { 1576 struct LAPIC *lapic; 1577 1578 lapic = vlapic->apic_page; 1579 bzero(lapic, sizeof(struct LAPIC)); 1580 1581 lapic->id = vlapic_get_id(vlapic); 1582 lapic->version = VLAPIC_VERSION; 1583 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1584 lapic->dfr = 0xffffffff; 1585 lapic->svr = APIC_SVR_VECTOR; 1586 vlapic_mask_lvts(vlapic); 1587 vlapic_reset_tmr(vlapic); 1588 1589 lapic->dcr_timer = 0; 1590 vlapic_dcr_write_handler(vlapic); 1591 1592 vlapic->svr_last = lapic->svr; 1593 } 1594 1595 void 1596 vlapic_init(struct vlapic *vlapic) 1597 { 1598 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1599 KASSERT(vlapic->vcpuid >= 0 && 1600 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1601 ("vlapic_init: vcpuid is not initialized")); 1602 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1603 "initialized")); 1604 1605 /* 1606 * If the vlapic is configured in x2apic mode then it will be 1607 * accessed in the critical section via the MSR emulation code. 1608 * 1609 * Therefore the timer mutex must be a spinlock because blockable 1610 * mutexes cannot be acquired in a critical section. 1611 */ 1612 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1613 callout_init(&vlapic->callout, 1); 1614 1615 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1616 1617 if (vlapic->vcpuid == 0) 1618 vlapic->msr_apicbase |= APICBASE_BSP; 1619 1620 vlapic->ipi_exit = false; 1621 1622 vlapic_reset(vlapic); 1623 } 1624 1625 void 1626 vlapic_cleanup(struct vlapic *vlapic) 1627 { 1628 1629 callout_drain(&vlapic->callout); 1630 mtx_destroy(&vlapic->timer_mtx); 1631 } 1632 1633 uint64_t 1634 vlapic_get_apicbase(struct vlapic *vlapic) 1635 { 1636 1637 return (vlapic->msr_apicbase); 1638 } 1639 1640 int 1641 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1642 { 1643 1644 if (vlapic->msr_apicbase != new) { 1645 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1646 "not supported", vlapic->msr_apicbase, new); 1647 return (-1); 1648 } 1649 1650 return (0); 1651 } 1652 1653 void 1654 vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1655 { 1656 struct vlapic *vlapic; 1657 struct LAPIC *lapic; 1658 1659 vlapic = vm_lapic(vcpu); 1660 1661 if (state == X2APIC_DISABLED) 1662 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1663 else 1664 vlapic->msr_apicbase |= APICBASE_X2APIC; 1665 1666 /* 1667 * Reset the local APIC registers whose values are mode-dependent. 1668 * 1669 * XXX this works because the APIC mode can be changed only at vcpu 1670 * initialization time. 1671 */ 1672 lapic = vlapic->apic_page; 1673 lapic->id = vlapic_get_id(vlapic); 1674 if (x2apic(vlapic)) { 1675 lapic->ldr = x2apic_ldr(vlapic); 1676 lapic->dfr = 0; 1677 } else { 1678 lapic->ldr = 0; 1679 lapic->dfr = 0xffffffff; 1680 } 1681 1682 if (state == X2APIC_ENABLED) { 1683 if (vlapic->ops.enable_x2apic_mode) 1684 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1685 } 1686 } 1687 1688 void 1689 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1690 int delmode, int vec) 1691 { 1692 struct vcpu *vcpu; 1693 bool lowprio; 1694 int vcpuid; 1695 cpuset_t dmask; 1696 1697 if (delmode != IOART_DELFIXED && 1698 delmode != IOART_DELLOPRI && 1699 delmode != IOART_DELEXINT) { 1700 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1701 return; 1702 } 1703 lowprio = (delmode == IOART_DELLOPRI); 1704 1705 /* 1706 * We don't provide any virtual interrupt redirection hardware so 1707 * all interrupts originating from the ioapic or MSI specify the 1708 * 'dest' in the legacy xAPIC format. 1709 */ 1710 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1711 1712 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1713 vcpu = vm_vcpu(vm, vcpuid); 1714 if (delmode == IOART_DELEXINT) { 1715 vm_inject_extint(vcpu); 1716 } else { 1717 lapic_set_intr(vcpu, vec, level); 1718 } 1719 } 1720 } 1721 1722 void 1723 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1724 { 1725 /* 1726 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1727 * 1728 * This is done by leveraging features like Posted Interrupts (Intel) 1729 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1730 * 1731 * If neither of these features are available then fallback to 1732 * sending an IPI to 'hostcpu'. 1733 */ 1734 if (vlapic->ops.post_intr) 1735 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1736 else 1737 ipi_cpu(hostcpu, ipinum); 1738 } 1739 1740 bool 1741 vlapic_enabled(struct vlapic *vlapic) 1742 { 1743 struct LAPIC *lapic = vlapic->apic_page; 1744 1745 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1746 (lapic->svr & APIC_SVR_ENABLE) != 0) 1747 return (true); 1748 else 1749 return (false); 1750 } 1751 1752 static void 1753 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1754 { 1755 struct LAPIC *lapic; 1756 uint32_t *tmrptr, mask; 1757 int idx; 1758 1759 lapic = vlapic->apic_page; 1760 tmrptr = &lapic->tmr0; 1761 idx = (vector / 32) * 4; 1762 mask = 1 << (vector % 32); 1763 if (level) 1764 tmrptr[idx] |= mask; 1765 else 1766 tmrptr[idx] &= ~mask; 1767 1768 if (vlapic->ops.set_tmr != NULL) 1769 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1770 } 1771 1772 void 1773 vlapic_reset_tmr(struct vlapic *vlapic) 1774 { 1775 int vector; 1776 1777 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1778 1779 for (vector = 0; vector <= 255; vector++) 1780 vlapic_set_tmr(vlapic, vector, false); 1781 } 1782 1783 void 1784 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1785 int delmode, int vector) 1786 { 1787 cpuset_t dmask; 1788 bool lowprio; 1789 1790 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1791 1792 /* 1793 * A level trigger is valid only for fixed and lowprio delivery modes. 1794 */ 1795 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1796 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1797 "delivery-mode %d", delmode); 1798 return; 1799 } 1800 1801 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1802 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1803 1804 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1805 return; 1806 1807 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1808 vlapic_set_tmr(vlapic, vector, true); 1809 } 1810 1811 #ifdef BHYVE_SNAPSHOT 1812 static void 1813 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1814 { 1815 /* The implementation is similar to the one in the 1816 * `vlapic_icrtmr_write_handler` function 1817 */ 1818 sbintime_t sbt; 1819 struct bintime bt; 1820 1821 VLAPIC_TIMER_LOCK(vlapic); 1822 1823 bt = vlapic->timer_freq_bt; 1824 bintime_mul(&bt, ccr); 1825 1826 if (ccr != 0) { 1827 binuptime(&vlapic->timer_fire_bt); 1828 bintime_add(&vlapic->timer_fire_bt, &bt); 1829 1830 sbt = bttosbt(bt); 1831 vlapic_callout_reset(vlapic, sbt); 1832 } else { 1833 /* even if the CCR was 0, periodic timers should be reset */ 1834 if (vlapic_periodic_timer(vlapic)) { 1835 binuptime(&vlapic->timer_fire_bt); 1836 bintime_add(&vlapic->timer_fire_bt, 1837 &vlapic->timer_period_bt); 1838 sbt = bttosbt(vlapic->timer_period_bt); 1839 1840 callout_stop(&vlapic->callout); 1841 vlapic_callout_reset(vlapic, sbt); 1842 } 1843 } 1844 1845 VLAPIC_TIMER_UNLOCK(vlapic); 1846 } 1847 1848 int 1849 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1850 { 1851 int ret; 1852 struct vcpu *vcpu; 1853 struct vlapic *vlapic; 1854 struct LAPIC *lapic; 1855 uint32_t ccr; 1856 uint16_t i, maxcpus; 1857 1858 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1859 1860 ret = 0; 1861 1862 maxcpus = vm_get_maxcpus(vm); 1863 for (i = 0; i < maxcpus; i++) { 1864 vcpu = vm_vcpu(vm, i); 1865 if (vcpu == NULL) 1866 continue; 1867 vlapic = vm_lapic(vcpu); 1868 1869 /* snapshot the page first; timer period depends on icr_timer */ 1870 lapic = vlapic->apic_page; 1871 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1872 1873 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1874 1875 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1876 meta, ret, done); 1877 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1878 meta, ret, done); 1879 1880 /* 1881 * Timer period is equal to 'icr_timer' ticks at a frequency of 1882 * 'timer_freq_bt'. 1883 */ 1884 if (meta->op == VM_SNAPSHOT_RESTORE) { 1885 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1886 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1887 } 1888 1889 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1890 sizeof(vlapic->isrvec_stk), 1891 meta, ret, done); 1892 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1893 1894 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1895 sizeof(vlapic->lvt_last), 1896 meta, ret, done); 1897 1898 if (meta->op == VM_SNAPSHOT_SAVE) 1899 ccr = vlapic_get_ccr(vlapic); 1900 1901 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1902 1903 if (meta->op == VM_SNAPSHOT_RESTORE && 1904 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1905 /* Reset the value of the 'timer_fire_bt' and the vlapic 1906 * callout based on the value of the current count 1907 * register saved when the VM snapshot was created. 1908 * If initial count register is 0, timer is not used. 1909 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1910 */ 1911 vlapic_reset_callout(vlapic, ccr); 1912 } 1913 } 1914 1915 done: 1916 return (ret); 1917 } 1918 #endif 1919