1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bhyve_snapshot.h" 36 37 #include <sys/param.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/systm.h> 43 #include <sys/smp.h> 44 45 #include <x86/specialreg.h> 46 #include <x86/apicreg.h> 47 48 #include <machine/clock.h> 49 #include <machine/smp.h> 50 51 #include <machine/vmm.h> 52 #include <machine/vmm_snapshot.h> 53 54 #include "vmm_lapic.h" 55 #include "vmm_ktr.h" 56 #include "vmm_stat.h" 57 58 #include "vlapic.h" 59 #include "vlapic_priv.h" 60 #include "vioapic.h" 61 62 #define PRIO(x) ((x) >> 4) 63 64 #define VLAPIC_VERSION (0x14) 65 66 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 67 68 /* 69 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 70 * vlapic_callout_handler() and vcpu accesses to: 71 * - timer_freq_bt, timer_period_bt, timer_fire_bt 72 * - timer LVT register 73 */ 74 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 75 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 76 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 77 78 /* 79 * APIC timer frequency: 80 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 81 * - power-of-two to avoid loss of precision when converted to a bintime. 82 */ 83 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 84 85 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 86 static void vlapic_callout_handler(void *arg); 87 static void vlapic_reset(struct vlapic *vlapic); 88 89 static __inline uint32_t 90 vlapic_get_id(struct vlapic *vlapic) 91 { 92 93 if (x2apic(vlapic)) 94 return (vlapic->vcpuid); 95 else 96 return (vlapic->vcpuid << 24); 97 } 98 99 static uint32_t 100 x2apic_ldr(struct vlapic *vlapic) 101 { 102 int apicid; 103 uint32_t ldr; 104 105 apicid = vlapic_get_id(vlapic); 106 ldr = 1 << (apicid & 0xf); 107 ldr |= (apicid & 0xffff0) << 12; 108 return (ldr); 109 } 110 111 void 112 vlapic_dfr_write_handler(struct vlapic *vlapic) 113 { 114 struct LAPIC *lapic; 115 116 lapic = vlapic->apic_page; 117 if (x2apic(vlapic)) { 118 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 119 lapic->dfr); 120 lapic->dfr = 0; 121 return; 122 } 123 124 lapic->dfr &= APIC_DFR_MODEL_MASK; 125 lapic->dfr |= APIC_DFR_RESERVED; 126 127 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 128 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 129 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 130 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 131 else 132 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 133 } 134 135 void 136 vlapic_ldr_write_handler(struct vlapic *vlapic) 137 { 138 struct LAPIC *lapic; 139 140 lapic = vlapic->apic_page; 141 142 /* LDR is read-only in x2apic mode */ 143 if (x2apic(vlapic)) { 144 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 145 lapic->ldr); 146 lapic->ldr = x2apic_ldr(vlapic); 147 } else { 148 lapic->ldr &= ~APIC_LDR_RESERVED; 149 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 150 } 151 } 152 153 void 154 vlapic_id_write_handler(struct vlapic *vlapic) 155 { 156 struct LAPIC *lapic; 157 158 /* 159 * We don't allow the ID register to be modified so reset it back to 160 * its default value. 161 */ 162 lapic = vlapic->apic_page; 163 lapic->id = vlapic_get_id(vlapic); 164 } 165 166 static int 167 vlapic_timer_divisor(uint32_t dcr) 168 { 169 switch (dcr & 0xB) { 170 case APIC_TDCR_1: 171 return (1); 172 case APIC_TDCR_2: 173 return (2); 174 case APIC_TDCR_4: 175 return (4); 176 case APIC_TDCR_8: 177 return (8); 178 case APIC_TDCR_16: 179 return (16); 180 case APIC_TDCR_32: 181 return (32); 182 case APIC_TDCR_64: 183 return (64); 184 case APIC_TDCR_128: 185 return (128); 186 default: 187 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 188 } 189 } 190 191 #if 0 192 static inline void 193 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 194 { 195 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 196 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 197 *lvt & APIC_LVTT_M); 198 } 199 #endif 200 201 static uint32_t 202 vlapic_get_ccr(struct vlapic *vlapic) 203 { 204 struct bintime bt_now, bt_rem; 205 struct LAPIC *lapic __diagused; 206 uint32_t ccr; 207 208 ccr = 0; 209 lapic = vlapic->apic_page; 210 211 VLAPIC_TIMER_LOCK(vlapic); 212 if (callout_active(&vlapic->callout)) { 213 /* 214 * If the timer is scheduled to expire in the future then 215 * compute the value of 'ccr' based on the remaining time. 216 */ 217 binuptime(&bt_now); 218 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 219 bt_rem = vlapic->timer_fire_bt; 220 bintime_sub(&bt_rem, &bt_now); 221 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 222 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 223 } 224 } 225 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 226 "icr_timer is %#x", ccr, lapic->icr_timer)); 227 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 228 ccr, lapic->icr_timer); 229 VLAPIC_TIMER_UNLOCK(vlapic); 230 return (ccr); 231 } 232 233 void 234 vlapic_dcr_write_handler(struct vlapic *vlapic) 235 { 236 struct LAPIC *lapic; 237 int divisor; 238 239 lapic = vlapic->apic_page; 240 VLAPIC_TIMER_LOCK(vlapic); 241 242 divisor = vlapic_timer_divisor(lapic->dcr_timer); 243 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 244 lapic->dcr_timer, divisor); 245 246 /* 247 * Update the timer frequency and the timer period. 248 * 249 * XXX changes to the frequency divider will not take effect until 250 * the timer is reloaded. 251 */ 252 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 253 vlapic->timer_period_bt = vlapic->timer_freq_bt; 254 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 255 256 VLAPIC_TIMER_UNLOCK(vlapic); 257 } 258 259 void 260 vlapic_esr_write_handler(struct vlapic *vlapic) 261 { 262 struct LAPIC *lapic; 263 264 lapic = vlapic->apic_page; 265 lapic->esr = vlapic->esr_pending; 266 vlapic->esr_pending = 0; 267 } 268 269 int 270 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 271 { 272 struct LAPIC *lapic; 273 uint32_t *irrptr, *tmrptr, mask; 274 int idx; 275 276 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 277 278 lapic = vlapic->apic_page; 279 if (!(lapic->svr & APIC_SVR_ENABLE)) { 280 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 281 "interrupt %d", vector); 282 return (0); 283 } 284 285 if (vector < 16) { 286 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 287 false); 288 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 289 vector); 290 return (1); 291 } 292 293 if (vlapic->ops.set_intr_ready) 294 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 295 296 idx = (vector / 32) * 4; 297 mask = 1 << (vector % 32); 298 299 irrptr = &lapic->irr0; 300 atomic_set_int(&irrptr[idx], mask); 301 302 /* 303 * Verify that the trigger-mode of the interrupt matches with 304 * the vlapic TMR registers. 305 */ 306 tmrptr = &lapic->tmr0; 307 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 308 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 309 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 310 level ? "level" : "edge"); 311 } 312 313 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 314 return (1); 315 } 316 317 static __inline uint32_t * 318 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 319 { 320 struct LAPIC *lapic = vlapic->apic_page; 321 int i; 322 323 switch (offset) { 324 case APIC_OFFSET_CMCI_LVT: 325 return (&lapic->lvt_cmci); 326 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 327 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 328 return ((&lapic->lvt_timer) + i); 329 default: 330 panic("vlapic_get_lvt: invalid LVT\n"); 331 } 332 } 333 334 static __inline int 335 lvt_off_to_idx(uint32_t offset) 336 { 337 int index; 338 339 switch (offset) { 340 case APIC_OFFSET_CMCI_LVT: 341 index = APIC_LVT_CMCI; 342 break; 343 case APIC_OFFSET_TIMER_LVT: 344 index = APIC_LVT_TIMER; 345 break; 346 case APIC_OFFSET_THERM_LVT: 347 index = APIC_LVT_THERMAL; 348 break; 349 case APIC_OFFSET_PERF_LVT: 350 index = APIC_LVT_PMC; 351 break; 352 case APIC_OFFSET_LINT0_LVT: 353 index = APIC_LVT_LINT0; 354 break; 355 case APIC_OFFSET_LINT1_LVT: 356 index = APIC_LVT_LINT1; 357 break; 358 case APIC_OFFSET_ERROR_LVT: 359 index = APIC_LVT_ERROR; 360 break; 361 default: 362 index = -1; 363 break; 364 } 365 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 366 "invalid lvt index %d for offset %#x", index, offset)); 367 368 return (index); 369 } 370 371 static __inline uint32_t 372 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 373 { 374 int idx; 375 uint32_t val; 376 377 idx = lvt_off_to_idx(offset); 378 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 379 return (val); 380 } 381 382 void 383 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 384 { 385 uint32_t *lvtptr, mask, val; 386 struct LAPIC *lapic; 387 int idx; 388 389 lapic = vlapic->apic_page; 390 lvtptr = vlapic_get_lvtptr(vlapic, offset); 391 val = *lvtptr; 392 idx = lvt_off_to_idx(offset); 393 394 if (!(lapic->svr & APIC_SVR_ENABLE)) 395 val |= APIC_LVT_M; 396 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 397 switch (offset) { 398 case APIC_OFFSET_TIMER_LVT: 399 mask |= APIC_LVTT_TM; 400 break; 401 case APIC_OFFSET_ERROR_LVT: 402 break; 403 case APIC_OFFSET_LINT0_LVT: 404 case APIC_OFFSET_LINT1_LVT: 405 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 406 /* FALLTHROUGH */ 407 default: 408 mask |= APIC_LVT_DM; 409 break; 410 } 411 val &= mask; 412 *lvtptr = val; 413 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 414 } 415 416 static void 417 vlapic_mask_lvts(struct vlapic *vlapic) 418 { 419 struct LAPIC *lapic = vlapic->apic_page; 420 421 lapic->lvt_cmci |= APIC_LVT_M; 422 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 423 424 lapic->lvt_timer |= APIC_LVT_M; 425 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 426 427 lapic->lvt_thermal |= APIC_LVT_M; 428 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 429 430 lapic->lvt_pcint |= APIC_LVT_M; 431 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 432 433 lapic->lvt_lint0 |= APIC_LVT_M; 434 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 435 436 lapic->lvt_lint1 |= APIC_LVT_M; 437 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 438 439 lapic->lvt_error |= APIC_LVT_M; 440 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 441 } 442 443 static int 444 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 445 { 446 uint32_t mode, reg, vec; 447 448 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 449 450 if (reg & APIC_LVT_M) 451 return (0); 452 vec = reg & APIC_LVT_VECTOR; 453 mode = reg & APIC_LVT_DM; 454 455 switch (mode) { 456 case APIC_LVT_DM_FIXED: 457 if (vec < 16) { 458 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 459 lvt == APIC_LVT_ERROR); 460 return (0); 461 } 462 if (vlapic_set_intr_ready(vlapic, vec, false)) 463 vcpu_notify_event(vlapic->vcpu, true); 464 break; 465 case APIC_LVT_DM_NMI: 466 vm_inject_nmi(vlapic->vcpu); 467 break; 468 case APIC_LVT_DM_EXTINT: 469 vm_inject_extint(vlapic->vcpu); 470 break; 471 default: 472 // Other modes ignored 473 return (0); 474 } 475 return (1); 476 } 477 478 #if 1 479 static void 480 dump_isrvec_stk(struct vlapic *vlapic) 481 { 482 int i; 483 uint32_t *isrptr; 484 485 isrptr = &vlapic->apic_page->isr0; 486 for (i = 0; i < 8; i++) 487 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 488 489 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 490 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 491 } 492 #endif 493 494 /* 495 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 496 * in Intel Architecture Manual Vol 3a. 497 */ 498 static void 499 vlapic_update_ppr(struct vlapic *vlapic) 500 { 501 int isrvec, tpr, ppr; 502 503 /* 504 * Note that the value on the stack at index 0 is always 0. 505 * 506 * This is a placeholder for the value of ISRV when none of the 507 * bits is set in the ISRx registers. 508 */ 509 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 510 tpr = vlapic->apic_page->tpr; 511 512 #if 1 513 { 514 int i, lastprio, curprio, vector, idx; 515 uint32_t *isrptr; 516 517 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 518 panic("isrvec_stk is corrupted: %d", isrvec); 519 520 /* 521 * Make sure that the priority of the nested interrupts is 522 * always increasing. 523 */ 524 lastprio = -1; 525 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 526 curprio = PRIO(vlapic->isrvec_stk[i]); 527 if (curprio <= lastprio) { 528 dump_isrvec_stk(vlapic); 529 panic("isrvec_stk does not satisfy invariant"); 530 } 531 lastprio = curprio; 532 } 533 534 /* 535 * Make sure that each bit set in the ISRx registers has a 536 * corresponding entry on the isrvec stack. 537 */ 538 i = 1; 539 isrptr = &vlapic->apic_page->isr0; 540 for (vector = 0; vector < 256; vector++) { 541 idx = (vector / 32) * 4; 542 if (isrptr[idx] & (1 << (vector % 32))) { 543 if (i > vlapic->isrvec_stk_top || 544 vlapic->isrvec_stk[i] != vector) { 545 dump_isrvec_stk(vlapic); 546 panic("ISR and isrvec_stk out of sync"); 547 } 548 i++; 549 } 550 } 551 } 552 #endif 553 554 if (PRIO(tpr) >= PRIO(isrvec)) 555 ppr = tpr; 556 else 557 ppr = isrvec & 0xf0; 558 559 vlapic->apic_page->ppr = ppr; 560 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 561 } 562 563 void 564 vlapic_sync_tpr(struct vlapic *vlapic) 565 { 566 vlapic_update_ppr(vlapic); 567 } 568 569 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 570 571 static void 572 vlapic_process_eoi(struct vlapic *vlapic) 573 { 574 struct LAPIC *lapic = vlapic->apic_page; 575 uint32_t *isrptr, *tmrptr; 576 int i, idx, bitpos, vector; 577 578 isrptr = &lapic->isr0; 579 tmrptr = &lapic->tmr0; 580 581 for (i = 7; i >= 0; i--) { 582 idx = i * 4; 583 bitpos = fls(isrptr[idx]); 584 if (bitpos-- != 0) { 585 if (vlapic->isrvec_stk_top <= 0) { 586 panic("invalid vlapic isrvec_stk_top %d", 587 vlapic->isrvec_stk_top); 588 } 589 isrptr[idx] &= ~(1 << bitpos); 590 vector = i * 32 + bitpos; 591 VLAPIC_CTR1(vlapic, "EOI vector %d", vector); 592 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 593 vlapic->isrvec_stk_top--; 594 vlapic_update_ppr(vlapic); 595 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 596 vioapic_process_eoi(vlapic->vm, vector); 597 } 598 return; 599 } 600 } 601 VLAPIC_CTR0(vlapic, "Gratuitous EOI"); 602 vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1); 603 } 604 605 static __inline int 606 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 607 { 608 609 return (lvt & mask); 610 } 611 612 static __inline int 613 vlapic_periodic_timer(struct vlapic *vlapic) 614 { 615 uint32_t lvt; 616 617 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 618 619 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 620 } 621 622 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 623 624 static void 625 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 626 { 627 628 vlapic->esr_pending |= mask; 629 630 /* 631 * Avoid infinite recursion if the error LVT itself is configured with 632 * an illegal vector. 633 */ 634 if (lvt_error) 635 return; 636 637 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 638 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1); 639 } 640 } 641 642 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 643 644 static void 645 vlapic_fire_timer(struct vlapic *vlapic) 646 { 647 648 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 649 650 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 651 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 652 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1); 653 } 654 } 655 656 static VMM_STAT(VLAPIC_INTR_CMC, 657 "corrected machine check interrupts generated by vlapic"); 658 659 void 660 vlapic_fire_cmci(struct vlapic *vlapic) 661 { 662 663 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 664 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1); 665 } 666 } 667 668 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 669 "lvts triggered"); 670 671 int 672 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 673 { 674 675 if (vlapic_enabled(vlapic) == false) { 676 /* 677 * When the local APIC is global/hardware disabled, 678 * LINT[1:0] pins are configured as INTR and NMI pins, 679 * respectively. 680 */ 681 switch (vector) { 682 case APIC_LVT_LINT0: 683 vm_inject_extint(vlapic->vcpu); 684 break; 685 case APIC_LVT_LINT1: 686 vm_inject_nmi(vlapic->vcpu); 687 break; 688 default: 689 break; 690 } 691 return (0); 692 } 693 694 switch (vector) { 695 case APIC_LVT_LINT0: 696 case APIC_LVT_LINT1: 697 case APIC_LVT_TIMER: 698 case APIC_LVT_ERROR: 699 case APIC_LVT_PMC: 700 case APIC_LVT_THERMAL: 701 case APIC_LVT_CMCI: 702 if (vlapic_fire_lvt(vlapic, vector)) { 703 vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED, 704 vector, 1); 705 } 706 break; 707 default: 708 return (EINVAL); 709 } 710 return (0); 711 } 712 713 static void 714 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 715 { 716 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 717 vlapic_callout_handler, vlapic, 0); 718 } 719 720 static void 721 vlapic_callout_handler(void *arg) 722 { 723 struct vlapic *vlapic; 724 struct bintime bt, btnow; 725 sbintime_t rem_sbt; 726 727 vlapic = arg; 728 729 VLAPIC_TIMER_LOCK(vlapic); 730 if (callout_pending(&vlapic->callout)) /* callout was reset */ 731 goto done; 732 733 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 734 goto done; 735 736 callout_deactivate(&vlapic->callout); 737 738 vlapic_fire_timer(vlapic); 739 740 if (vlapic_periodic_timer(vlapic)) { 741 binuptime(&btnow); 742 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 743 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 744 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 745 vlapic->timer_fire_bt.frac)); 746 747 /* 748 * Compute the delta between when the timer was supposed to 749 * fire and the present time. 750 */ 751 bt = btnow; 752 bintime_sub(&bt, &vlapic->timer_fire_bt); 753 754 rem_sbt = bttosbt(vlapic->timer_period_bt); 755 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 756 /* 757 * Adjust the time until the next countdown downward 758 * to account for the lost time. 759 */ 760 rem_sbt -= bttosbt(bt); 761 } else { 762 /* 763 * If the delta is greater than the timer period then 764 * just reset our time base instead of trying to catch 765 * up. 766 */ 767 vlapic->timer_fire_bt = btnow; 768 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 769 "usecs, period is %lu usecs - resetting time base", 770 bttosbt(bt) / SBT_1US, 771 bttosbt(vlapic->timer_period_bt) / SBT_1US); 772 } 773 774 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 775 vlapic_callout_reset(vlapic, rem_sbt); 776 } 777 done: 778 VLAPIC_TIMER_UNLOCK(vlapic); 779 } 780 781 void 782 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 783 { 784 struct LAPIC *lapic; 785 sbintime_t sbt; 786 uint32_t icr_timer; 787 788 VLAPIC_TIMER_LOCK(vlapic); 789 790 lapic = vlapic->apic_page; 791 icr_timer = lapic->icr_timer; 792 793 vlapic->timer_period_bt = vlapic->timer_freq_bt; 794 bintime_mul(&vlapic->timer_period_bt, icr_timer); 795 796 if (icr_timer != 0) { 797 binuptime(&vlapic->timer_fire_bt); 798 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 799 800 sbt = bttosbt(vlapic->timer_period_bt); 801 vlapic_callout_reset(vlapic, sbt); 802 } else 803 callout_stop(&vlapic->callout); 804 805 VLAPIC_TIMER_UNLOCK(vlapic); 806 } 807 808 /* 809 * This function populates 'dmask' with the set of vcpus that match the 810 * addressing specified by the (dest, phys, lowprio) tuple. 811 * 812 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 813 * or xAPIC (8-bit) destination field. 814 */ 815 static void 816 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 817 bool lowprio, bool x2apic_dest) 818 { 819 struct vlapic *vlapic; 820 uint32_t dfr, ldr, ldest, cluster; 821 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 822 cpuset_t amask; 823 int vcpuid; 824 825 if ((x2apic_dest && dest == 0xffffffff) || 826 (!x2apic_dest && dest == 0xff)) { 827 /* 828 * Broadcast in both logical and physical modes. 829 */ 830 *dmask = vm_active_cpus(vm); 831 return; 832 } 833 834 if (phys) { 835 /* 836 * Physical mode: destination is APIC ID. 837 */ 838 CPU_ZERO(dmask); 839 vcpuid = vm_apicid2vcpuid(vm, dest); 840 amask = vm_active_cpus(vm); 841 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 842 CPU_SET(vcpuid, dmask); 843 } else { 844 /* 845 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 846 * bitmask. This model is only available in the xAPIC mode. 847 */ 848 mda_flat_ldest = dest & 0xff; 849 850 /* 851 * In the "Cluster Model" the MDA is used to identify a 852 * specific cluster and a set of APICs in that cluster. 853 */ 854 if (x2apic_dest) { 855 mda_cluster_id = dest >> 16; 856 mda_cluster_ldest = dest & 0xffff; 857 } else { 858 mda_cluster_id = (dest >> 4) & 0xf; 859 mda_cluster_ldest = dest & 0xf; 860 } 861 862 /* 863 * Logical mode: match each APIC that has a bit set 864 * in its LDR that matches a bit in the ldest. 865 */ 866 CPU_ZERO(dmask); 867 amask = vm_active_cpus(vm); 868 CPU_FOREACH_ISSET(vcpuid, &amask) { 869 vlapic = vm_lapic(vm_vcpu(vm, vcpuid)); 870 dfr = vlapic->apic_page->dfr; 871 ldr = vlapic->apic_page->ldr; 872 873 if ((dfr & APIC_DFR_MODEL_MASK) == 874 APIC_DFR_MODEL_FLAT) { 875 ldest = ldr >> 24; 876 mda_ldest = mda_flat_ldest; 877 } else if ((dfr & APIC_DFR_MODEL_MASK) == 878 APIC_DFR_MODEL_CLUSTER) { 879 if (x2apic(vlapic)) { 880 cluster = ldr >> 16; 881 ldest = ldr & 0xffff; 882 } else { 883 cluster = ldr >> 28; 884 ldest = (ldr >> 24) & 0xf; 885 } 886 if (cluster != mda_cluster_id) 887 continue; 888 mda_ldest = mda_cluster_ldest; 889 } else { 890 /* 891 * Guest has configured a bad logical 892 * model for this vcpu - skip it. 893 */ 894 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 895 "model %x - cannot deliver interrupt", dfr); 896 continue; 897 } 898 899 if ((mda_ldest & ldest) != 0) { 900 CPU_SET(vcpuid, dmask); 901 if (lowprio) 902 break; 903 } 904 } 905 } 906 } 907 908 static VMM_STAT_ARRAY(IPIS_SENT, VMM_STAT_NELEMS_VCPU, "ipis sent to vcpu"); 909 910 static void 911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 912 { 913 struct LAPIC *lapic = vlapic->apic_page; 914 915 if (lapic->tpr != val) { 916 VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x", 917 lapic->tpr, val); 918 lapic->tpr = val; 919 vlapic_update_ppr(vlapic); 920 } 921 } 922 923 static uint8_t 924 vlapic_get_tpr(struct vlapic *vlapic) 925 { 926 struct LAPIC *lapic = vlapic->apic_page; 927 928 return (lapic->tpr); 929 } 930 931 void 932 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 933 { 934 uint8_t tpr; 935 936 if (val & ~0xf) { 937 vm_inject_gp(vlapic->vcpu); 938 return; 939 } 940 941 tpr = val << 4; 942 vlapic_set_tpr(vlapic, tpr); 943 } 944 945 uint64_t 946 vlapic_get_cr8(struct vlapic *vlapic) 947 { 948 uint8_t tpr; 949 950 tpr = vlapic_get_tpr(vlapic); 951 return (tpr >> 4); 952 } 953 954 static bool 955 vlapic_is_icr_valid(uint64_t icrval) 956 { 957 uint32_t mode = icrval & APIC_DELMODE_MASK; 958 uint32_t level = icrval & APIC_LEVEL_MASK; 959 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 960 uint32_t shorthand = icrval & APIC_DEST_MASK; 961 962 switch (mode) { 963 case APIC_DELMODE_FIXED: 964 if (trigger == APIC_TRIGMOD_EDGE) 965 return (true); 966 /* 967 * AMD allows a level assert IPI and Intel converts a level 968 * assert IPI into an edge IPI. 969 */ 970 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 971 return (true); 972 break; 973 case APIC_DELMODE_LOWPRIO: 974 case APIC_DELMODE_SMI: 975 case APIC_DELMODE_NMI: 976 case APIC_DELMODE_INIT: 977 if (trigger == APIC_TRIGMOD_EDGE && 978 (shorthand == APIC_DEST_DESTFLD || 979 shorthand == APIC_DEST_ALLESELF)) 980 return (true); 981 /* 982 * AMD allows a level assert IPI and Intel converts a level 983 * assert IPI into an edge IPI. 984 */ 985 if (trigger == APIC_TRIGMOD_LEVEL && 986 level == APIC_LEVEL_ASSERT && 987 (shorthand == APIC_DEST_DESTFLD || 988 shorthand == APIC_DEST_ALLESELF)) 989 return (true); 990 /* 991 * An level triggered deassert INIT is defined in the Intel 992 * Multiprocessor Specification and the Intel Software Developer 993 * Manual. Due to the MPS it's required to send a level assert 994 * INIT to a cpu and then a level deassert INIT. Some operating 995 * systems e.g. FreeBSD or Linux use that algorithm. According 996 * to the SDM a level deassert INIT is only supported by Pentium 997 * and P6 processors. It's always send to all cpus regardless of 998 * the destination or shorthand field. It resets the arbitration 999 * id register. This register is not software accessible and 1000 * only required for the APIC bus arbitration. So, the level 1001 * deassert INIT doesn't need any emulation and we should ignore 1002 * it. The SDM also defines that newer processors don't support 1003 * the level deassert INIT and it's not valid any more. As it's 1004 * defined for older systems, it can't be invalid per se. 1005 * Otherwise, backward compatibility would be broken. However, 1006 * when returning false here, it'll be ignored which is the 1007 * desired behaviour. 1008 */ 1009 if (mode == APIC_DELMODE_INIT && 1010 trigger == APIC_TRIGMOD_LEVEL && 1011 level == APIC_LEVEL_DEASSERT) 1012 return (false); 1013 break; 1014 case APIC_DELMODE_STARTUP: 1015 if (shorthand == APIC_DEST_DESTFLD || 1016 shorthand == APIC_DEST_ALLESELF) 1017 return (true); 1018 break; 1019 case APIC_DELMODE_RR: 1020 /* Only available on AMD! */ 1021 if (trigger == APIC_TRIGMOD_EDGE && 1022 shorthand == APIC_DEST_DESTFLD) 1023 return (true); 1024 break; 1025 case APIC_DELMODE_RESV: 1026 return (false); 1027 default: 1028 __assert_unreachable(); 1029 } 1030 1031 return (false); 1032 } 1033 1034 int 1035 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 1036 { 1037 int i; 1038 bool phys; 1039 cpuset_t dmask, ipimask; 1040 uint64_t icrval; 1041 uint32_t dest, vec, mode, shorthand; 1042 struct vcpu *vcpu; 1043 struct vm_exit *vmexit; 1044 struct LAPIC *lapic; 1045 1046 lapic = vlapic->apic_page; 1047 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1048 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1049 1050 if (x2apic(vlapic)) 1051 dest = icrval >> 32; 1052 else 1053 dest = icrval >> (32 + 24); 1054 vec = icrval & APIC_VECTOR_MASK; 1055 mode = icrval & APIC_DELMODE_MASK; 1056 phys = (icrval & APIC_DESTMODE_LOG) == 0; 1057 shorthand = icrval & APIC_DEST_MASK; 1058 1059 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 1060 1061 switch (shorthand) { 1062 case APIC_DEST_DESTFLD: 1063 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); 1064 break; 1065 case APIC_DEST_SELF: 1066 CPU_SETOF(vlapic->vcpuid, &dmask); 1067 break; 1068 case APIC_DEST_ALLISELF: 1069 dmask = vm_active_cpus(vlapic->vm); 1070 break; 1071 case APIC_DEST_ALLESELF: 1072 dmask = vm_active_cpus(vlapic->vm); 1073 CPU_CLR(vlapic->vcpuid, &dmask); 1074 break; 1075 default: 1076 __assert_unreachable(); 1077 } 1078 1079 /* 1080 * Ignore invalid combinations of the icr. 1081 */ 1082 if (!vlapic_is_icr_valid(icrval)) { 1083 VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); 1084 return (0); 1085 } 1086 1087 /* 1088 * ipimask is a set of vCPUs needing userland handling of the current 1089 * IPI. 1090 */ 1091 CPU_ZERO(&ipimask); 1092 1093 switch (mode) { 1094 case APIC_DELMODE_FIXED: 1095 if (vec < 16) { 1096 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 1097 false); 1098 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 1099 return (0); 1100 } 1101 1102 CPU_FOREACH_ISSET(i, &dmask) { 1103 vcpu = vm_vcpu(vlapic->vm, i); 1104 lapic_intr_edge(vcpu, vec); 1105 vmm_stat_array_incr(vlapic->vcpu, IPIS_SENT, i, 1); 1106 VLAPIC_CTR2(vlapic, 1107 "vlapic sending ipi %d to vcpuid %d", vec, i); 1108 } 1109 1110 break; 1111 case APIC_DELMODE_NMI: 1112 CPU_FOREACH_ISSET(i, &dmask) { 1113 vcpu = vm_vcpu(vlapic->vm, i); 1114 vm_inject_nmi(vcpu); 1115 VLAPIC_CTR1(vlapic, 1116 "vlapic sending ipi nmi to vcpuid %d", i); 1117 } 1118 1119 break; 1120 case APIC_DELMODE_INIT: 1121 case APIC_DELMODE_STARTUP: 1122 if (!vlapic->ipi_exit) { 1123 if (!phys) 1124 break; 1125 1126 i = vm_apicid2vcpuid(vlapic->vm, dest); 1127 if (i >= vm_get_maxcpus(vlapic->vm) || 1128 i == vlapic->vcpuid) 1129 break; 1130 1131 CPU_SETOF(i, &ipimask); 1132 1133 break; 1134 } 1135 1136 CPU_COPY(&dmask, &ipimask); 1137 break; 1138 default: 1139 return (1); 1140 } 1141 1142 if (!CPU_EMPTY(&ipimask)) { 1143 vmexit = vm_exitinfo(vlapic->vcpu); 1144 vmexit->exitcode = VM_EXITCODE_IPI; 1145 vmexit->u.ipi.mode = mode; 1146 vmexit->u.ipi.vector = vec; 1147 vmexit->u.ipi.dmask = ipimask; 1148 1149 *retu = true; 1150 } 1151 1152 return (0); 1153 } 1154 1155 static void 1156 vlapic_handle_init(struct vcpu *vcpu, void *arg) 1157 { 1158 struct vlapic *vlapic = vm_lapic(vcpu); 1159 1160 vlapic_reset(vlapic); 1161 } 1162 1163 int 1164 vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1165 { 1166 struct vlapic *vlapic = vm_lapic(vcpu); 1167 cpuset_t *dmask = &vme->u.ipi.dmask; 1168 uint8_t vec = vme->u.ipi.vector; 1169 1170 *retu = true; 1171 switch (vme->u.ipi.mode) { 1172 case APIC_DELMODE_INIT: { 1173 cpuset_t active, reinit; 1174 1175 active = vm_active_cpus(vcpu_vm(vcpu)); 1176 CPU_AND(&reinit, &active, dmask); 1177 if (!CPU_EMPTY(&reinit)) { 1178 vm_smp_rendezvous(vcpu, reinit, vlapic_handle_init, 1179 NULL); 1180 } 1181 vm_await_start(vcpu_vm(vcpu), dmask); 1182 1183 if (!vlapic->ipi_exit) 1184 *retu = false; 1185 1186 break; 1187 } 1188 case APIC_DELMODE_STARTUP: 1189 /* 1190 * Ignore SIPIs in any state other than wait-for-SIPI 1191 */ 1192 *dmask = vm_start_cpus(vcpu_vm(vcpu), dmask); 1193 1194 if (CPU_EMPTY(dmask)) { 1195 *retu = false; 1196 break; 1197 } 1198 1199 /* 1200 * Old bhyve versions don't support the IPI 1201 * exit. Translate it into the old style. 1202 */ 1203 if (!vlapic->ipi_exit) { 1204 vme->exitcode = VM_EXITCODE_SPINUP_AP; 1205 vme->u.spinup_ap.vcpu = CPU_FFS(dmask) - 1; 1206 vme->u.spinup_ap.rip = vec << PAGE_SHIFT; 1207 } 1208 1209 break; 1210 default: 1211 __assert_unreachable(); 1212 } 1213 1214 return (0); 1215 } 1216 1217 void 1218 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1219 { 1220 int vec; 1221 1222 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1223 1224 vec = val & 0xff; 1225 lapic_intr_edge(vlapic->vcpu, vec); 1226 vmm_stat_array_incr(vlapic->vcpu, IPIS_SENT, vlapic->vcpuid, 1); 1227 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1228 } 1229 1230 int 1231 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1232 { 1233 struct LAPIC *lapic = vlapic->apic_page; 1234 int idx, i, bitpos, vector; 1235 uint32_t *irrptr, val; 1236 1237 vlapic_update_ppr(vlapic); 1238 1239 if (vlapic->ops.pending_intr) 1240 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1241 1242 irrptr = &lapic->irr0; 1243 1244 for (i = 7; i >= 0; i--) { 1245 idx = i * 4; 1246 val = atomic_load_acq_int(&irrptr[idx]); 1247 bitpos = fls(val); 1248 if (bitpos != 0) { 1249 vector = i * 32 + (bitpos - 1); 1250 if (PRIO(vector) > PRIO(lapic->ppr)) { 1251 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1252 if (vecptr != NULL) 1253 *vecptr = vector; 1254 return (1); 1255 } else 1256 break; 1257 } 1258 } 1259 return (0); 1260 } 1261 1262 void 1263 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1264 { 1265 struct LAPIC *lapic = vlapic->apic_page; 1266 uint32_t *irrptr, *isrptr; 1267 int idx, stk_top; 1268 1269 if (vlapic->ops.intr_accepted) 1270 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1271 1272 /* 1273 * clear the ready bit for vector being accepted in irr 1274 * and set the vector as in service in isr. 1275 */ 1276 idx = (vector / 32) * 4; 1277 1278 irrptr = &lapic->irr0; 1279 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1280 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1281 1282 isrptr = &lapic->isr0; 1283 isrptr[idx] |= 1 << (vector % 32); 1284 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1285 1286 /* 1287 * Update the PPR 1288 */ 1289 vlapic->isrvec_stk_top++; 1290 1291 stk_top = vlapic->isrvec_stk_top; 1292 if (stk_top >= ISRVEC_STK_SIZE) 1293 panic("isrvec_stk_top overflow %d", stk_top); 1294 1295 vlapic->isrvec_stk[stk_top] = vector; 1296 } 1297 1298 void 1299 vlapic_svr_write_handler(struct vlapic *vlapic) 1300 { 1301 struct LAPIC *lapic; 1302 uint32_t old, new, changed; 1303 1304 lapic = vlapic->apic_page; 1305 1306 new = lapic->svr; 1307 old = vlapic->svr_last; 1308 vlapic->svr_last = new; 1309 1310 changed = old ^ new; 1311 if ((changed & APIC_SVR_ENABLE) != 0) { 1312 if ((new & APIC_SVR_ENABLE) == 0) { 1313 /* 1314 * The apic is now disabled so stop the apic timer 1315 * and mask all the LVT entries. 1316 */ 1317 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1318 VLAPIC_TIMER_LOCK(vlapic); 1319 callout_stop(&vlapic->callout); 1320 VLAPIC_TIMER_UNLOCK(vlapic); 1321 vlapic_mask_lvts(vlapic); 1322 } else { 1323 /* 1324 * The apic is now enabled so restart the apic timer 1325 * if it is configured in periodic mode. 1326 */ 1327 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1328 if (vlapic_periodic_timer(vlapic)) 1329 vlapic_icrtmr_write_handler(vlapic); 1330 } 1331 } 1332 } 1333 1334 int 1335 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1336 uint64_t *data, bool *retu) 1337 { 1338 struct LAPIC *lapic = vlapic->apic_page; 1339 uint32_t *reg; 1340 int i; 1341 1342 /* Ignore MMIO accesses in x2APIC mode */ 1343 if (x2apic(vlapic) && mmio_access) { 1344 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1345 offset); 1346 *data = 0; 1347 goto done; 1348 } 1349 1350 if (!x2apic(vlapic) && !mmio_access) { 1351 /* 1352 * XXX Generate GP fault for MSR accesses in xAPIC mode 1353 */ 1354 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1355 "xAPIC mode", offset); 1356 *data = 0; 1357 goto done; 1358 } 1359 1360 if (offset > sizeof(*lapic)) { 1361 *data = 0; 1362 goto done; 1363 } 1364 1365 offset &= ~3; 1366 switch(offset) 1367 { 1368 case APIC_OFFSET_ID: 1369 *data = lapic->id; 1370 break; 1371 case APIC_OFFSET_VER: 1372 *data = lapic->version; 1373 break; 1374 case APIC_OFFSET_TPR: 1375 *data = vlapic_get_tpr(vlapic); 1376 break; 1377 case APIC_OFFSET_APR: 1378 *data = lapic->apr; 1379 break; 1380 case APIC_OFFSET_PPR: 1381 *data = lapic->ppr; 1382 break; 1383 case APIC_OFFSET_EOI: 1384 *data = lapic->eoi; 1385 break; 1386 case APIC_OFFSET_LDR: 1387 *data = lapic->ldr; 1388 break; 1389 case APIC_OFFSET_DFR: 1390 *data = lapic->dfr; 1391 break; 1392 case APIC_OFFSET_SVR: 1393 *data = lapic->svr; 1394 break; 1395 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1396 i = (offset - APIC_OFFSET_ISR0) >> 2; 1397 reg = &lapic->isr0; 1398 *data = *(reg + i); 1399 break; 1400 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1401 i = (offset - APIC_OFFSET_TMR0) >> 2; 1402 reg = &lapic->tmr0; 1403 *data = *(reg + i); 1404 break; 1405 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1406 i = (offset - APIC_OFFSET_IRR0) >> 2; 1407 reg = &lapic->irr0; 1408 *data = atomic_load_acq_int(reg + i); 1409 break; 1410 case APIC_OFFSET_ESR: 1411 *data = lapic->esr; 1412 break; 1413 case APIC_OFFSET_ICR_LOW: 1414 *data = lapic->icr_lo; 1415 if (x2apic(vlapic)) 1416 *data |= (uint64_t)lapic->icr_hi << 32; 1417 break; 1418 case APIC_OFFSET_ICR_HI: 1419 *data = lapic->icr_hi; 1420 break; 1421 case APIC_OFFSET_CMCI_LVT: 1422 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1423 *data = vlapic_get_lvt(vlapic, offset); 1424 #ifdef INVARIANTS 1425 reg = vlapic_get_lvtptr(vlapic, offset); 1426 KASSERT(*data == *reg, ("inconsistent lvt value at " 1427 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1428 #endif 1429 break; 1430 case APIC_OFFSET_TIMER_ICR: 1431 *data = lapic->icr_timer; 1432 break; 1433 case APIC_OFFSET_TIMER_CCR: 1434 *data = vlapic_get_ccr(vlapic); 1435 break; 1436 case APIC_OFFSET_TIMER_DCR: 1437 *data = lapic->dcr_timer; 1438 break; 1439 case APIC_OFFSET_SELF_IPI: 1440 /* 1441 * XXX generate a GP fault if vlapic is in x2apic mode 1442 */ 1443 *data = 0; 1444 break; 1445 case APIC_OFFSET_RRR: 1446 default: 1447 *data = 0; 1448 break; 1449 } 1450 done: 1451 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1452 return 0; 1453 } 1454 1455 int 1456 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1457 uint64_t data, bool *retu) 1458 { 1459 struct LAPIC *lapic = vlapic->apic_page; 1460 uint32_t *regptr; 1461 int retval; 1462 1463 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1464 ("vlapic_write: invalid offset %#lx", offset)); 1465 1466 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1467 offset, data); 1468 1469 if (offset > sizeof(*lapic)) 1470 return (0); 1471 1472 /* Ignore MMIO accesses in x2APIC mode */ 1473 if (x2apic(vlapic) && mmio_access) { 1474 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1475 "in x2APIC mode", data, offset); 1476 return (0); 1477 } 1478 1479 /* 1480 * XXX Generate GP fault for MSR accesses in xAPIC mode 1481 */ 1482 if (!x2apic(vlapic) && !mmio_access) { 1483 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1484 "in xAPIC mode", data, offset); 1485 return (0); 1486 } 1487 1488 retval = 0; 1489 switch(offset) 1490 { 1491 case APIC_OFFSET_ID: 1492 lapic->id = data; 1493 vlapic_id_write_handler(vlapic); 1494 break; 1495 case APIC_OFFSET_TPR: 1496 vlapic_set_tpr(vlapic, data & 0xff); 1497 break; 1498 case APIC_OFFSET_EOI: 1499 vlapic_process_eoi(vlapic); 1500 break; 1501 case APIC_OFFSET_LDR: 1502 lapic->ldr = data; 1503 vlapic_ldr_write_handler(vlapic); 1504 break; 1505 case APIC_OFFSET_DFR: 1506 lapic->dfr = data; 1507 vlapic_dfr_write_handler(vlapic); 1508 break; 1509 case APIC_OFFSET_SVR: 1510 lapic->svr = data; 1511 vlapic_svr_write_handler(vlapic); 1512 break; 1513 case APIC_OFFSET_ICR_LOW: 1514 lapic->icr_lo = data; 1515 if (x2apic(vlapic)) 1516 lapic->icr_hi = data >> 32; 1517 retval = vlapic_icrlo_write_handler(vlapic, retu); 1518 break; 1519 case APIC_OFFSET_ICR_HI: 1520 lapic->icr_hi = data; 1521 break; 1522 case APIC_OFFSET_CMCI_LVT: 1523 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1524 regptr = vlapic_get_lvtptr(vlapic, offset); 1525 *regptr = data; 1526 vlapic_lvt_write_handler(vlapic, offset); 1527 break; 1528 case APIC_OFFSET_TIMER_ICR: 1529 lapic->icr_timer = data; 1530 vlapic_icrtmr_write_handler(vlapic); 1531 break; 1532 1533 case APIC_OFFSET_TIMER_DCR: 1534 lapic->dcr_timer = data; 1535 vlapic_dcr_write_handler(vlapic); 1536 break; 1537 1538 case APIC_OFFSET_ESR: 1539 vlapic_esr_write_handler(vlapic); 1540 break; 1541 1542 case APIC_OFFSET_SELF_IPI: 1543 if (x2apic(vlapic)) 1544 vlapic_self_ipi_handler(vlapic, data); 1545 break; 1546 1547 case APIC_OFFSET_VER: 1548 case APIC_OFFSET_APR: 1549 case APIC_OFFSET_PPR: 1550 case APIC_OFFSET_RRR: 1551 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1552 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1553 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1554 case APIC_OFFSET_TIMER_CCR: 1555 default: 1556 // Read only. 1557 break; 1558 } 1559 1560 return (retval); 1561 } 1562 1563 static void 1564 vlapic_reset(struct vlapic *vlapic) 1565 { 1566 struct LAPIC *lapic; 1567 1568 lapic = vlapic->apic_page; 1569 bzero(lapic, sizeof(struct LAPIC)); 1570 1571 lapic->id = vlapic_get_id(vlapic); 1572 lapic->version = VLAPIC_VERSION; 1573 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1574 lapic->dfr = 0xffffffff; 1575 lapic->svr = APIC_SVR_VECTOR; 1576 vlapic_mask_lvts(vlapic); 1577 vlapic_reset_tmr(vlapic); 1578 1579 lapic->dcr_timer = 0; 1580 vlapic_dcr_write_handler(vlapic); 1581 1582 vlapic->svr_last = lapic->svr; 1583 } 1584 1585 void 1586 vlapic_init(struct vlapic *vlapic) 1587 { 1588 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1589 KASSERT(vlapic->vcpuid >= 0 && 1590 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1591 ("vlapic_init: vcpuid is not initialized")); 1592 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1593 "initialized")); 1594 1595 /* 1596 * If the vlapic is configured in x2apic mode then it will be 1597 * accessed in the critical section via the MSR emulation code. 1598 * 1599 * Therefore the timer mutex must be a spinlock because blockable 1600 * mutexes cannot be acquired in a critical section. 1601 */ 1602 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1603 callout_init(&vlapic->callout, 1); 1604 1605 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1606 1607 if (vlapic->vcpuid == 0) 1608 vlapic->msr_apicbase |= APICBASE_BSP; 1609 1610 vlapic->ipi_exit = false; 1611 1612 vlapic_reset(vlapic); 1613 } 1614 1615 void 1616 vlapic_cleanup(struct vlapic *vlapic) 1617 { 1618 1619 callout_drain(&vlapic->callout); 1620 mtx_destroy(&vlapic->timer_mtx); 1621 } 1622 1623 uint64_t 1624 vlapic_get_apicbase(struct vlapic *vlapic) 1625 { 1626 1627 return (vlapic->msr_apicbase); 1628 } 1629 1630 int 1631 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1632 { 1633 1634 if (vlapic->msr_apicbase != new) { 1635 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1636 "not supported", vlapic->msr_apicbase, new); 1637 return (-1); 1638 } 1639 1640 return (0); 1641 } 1642 1643 void 1644 vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1645 { 1646 struct vlapic *vlapic; 1647 struct LAPIC *lapic; 1648 1649 vlapic = vm_lapic(vcpu); 1650 1651 if (state == X2APIC_DISABLED) 1652 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1653 else 1654 vlapic->msr_apicbase |= APICBASE_X2APIC; 1655 1656 /* 1657 * Reset the local APIC registers whose values are mode-dependent. 1658 * 1659 * XXX this works because the APIC mode can be changed only at vcpu 1660 * initialization time. 1661 */ 1662 lapic = vlapic->apic_page; 1663 lapic->id = vlapic_get_id(vlapic); 1664 if (x2apic(vlapic)) { 1665 lapic->ldr = x2apic_ldr(vlapic); 1666 lapic->dfr = 0; 1667 } else { 1668 lapic->ldr = 0; 1669 lapic->dfr = 0xffffffff; 1670 } 1671 1672 if (state == X2APIC_ENABLED) { 1673 if (vlapic->ops.enable_x2apic_mode) 1674 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1675 } 1676 } 1677 1678 void 1679 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1680 int delmode, int vec) 1681 { 1682 struct vcpu *vcpu; 1683 bool lowprio; 1684 int vcpuid; 1685 cpuset_t dmask; 1686 1687 if (delmode != IOART_DELFIXED && 1688 delmode != IOART_DELLOPRI && 1689 delmode != IOART_DELEXINT) { 1690 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1691 return; 1692 } 1693 lowprio = (delmode == IOART_DELLOPRI); 1694 1695 /* 1696 * We don't provide any virtual interrupt redirection hardware so 1697 * all interrupts originating from the ioapic or MSI specify the 1698 * 'dest' in the legacy xAPIC format. 1699 */ 1700 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1701 1702 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1703 vcpu = vm_vcpu(vm, vcpuid); 1704 if (delmode == IOART_DELEXINT) { 1705 vm_inject_extint(vcpu); 1706 } else { 1707 lapic_set_intr(vcpu, vec, level); 1708 } 1709 } 1710 } 1711 1712 void 1713 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1714 { 1715 /* 1716 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1717 * 1718 * This is done by leveraging features like Posted Interrupts (Intel) 1719 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1720 * 1721 * If neither of these features are available then fallback to 1722 * sending an IPI to 'hostcpu'. 1723 */ 1724 if (vlapic->ops.post_intr) 1725 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1726 else 1727 ipi_cpu(hostcpu, ipinum); 1728 } 1729 1730 bool 1731 vlapic_enabled(struct vlapic *vlapic) 1732 { 1733 struct LAPIC *lapic = vlapic->apic_page; 1734 1735 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1736 (lapic->svr & APIC_SVR_ENABLE) != 0) 1737 return (true); 1738 else 1739 return (false); 1740 } 1741 1742 static void 1743 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1744 { 1745 struct LAPIC *lapic; 1746 uint32_t *tmrptr, mask; 1747 int idx; 1748 1749 lapic = vlapic->apic_page; 1750 tmrptr = &lapic->tmr0; 1751 idx = (vector / 32) * 4; 1752 mask = 1 << (vector % 32); 1753 if (level) 1754 tmrptr[idx] |= mask; 1755 else 1756 tmrptr[idx] &= ~mask; 1757 1758 if (vlapic->ops.set_tmr != NULL) 1759 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1760 } 1761 1762 void 1763 vlapic_reset_tmr(struct vlapic *vlapic) 1764 { 1765 int vector; 1766 1767 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1768 1769 for (vector = 0; vector <= 255; vector++) 1770 vlapic_set_tmr(vlapic, vector, false); 1771 } 1772 1773 void 1774 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1775 int delmode, int vector) 1776 { 1777 cpuset_t dmask; 1778 bool lowprio; 1779 1780 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1781 1782 /* 1783 * A level trigger is valid only for fixed and lowprio delivery modes. 1784 */ 1785 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1786 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1787 "delivery-mode %d", delmode); 1788 return; 1789 } 1790 1791 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1792 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1793 1794 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1795 return; 1796 1797 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1798 vlapic_set_tmr(vlapic, vector, true); 1799 } 1800 1801 #ifdef BHYVE_SNAPSHOT 1802 static void 1803 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1804 { 1805 /* The implementation is similar to the one in the 1806 * `vlapic_icrtmr_write_handler` function 1807 */ 1808 sbintime_t sbt; 1809 struct bintime bt; 1810 1811 VLAPIC_TIMER_LOCK(vlapic); 1812 1813 bt = vlapic->timer_freq_bt; 1814 bintime_mul(&bt, ccr); 1815 1816 if (ccr != 0) { 1817 binuptime(&vlapic->timer_fire_bt); 1818 bintime_add(&vlapic->timer_fire_bt, &bt); 1819 1820 sbt = bttosbt(bt); 1821 vlapic_callout_reset(vlapic, sbt); 1822 } else { 1823 /* even if the CCR was 0, periodic timers should be reset */ 1824 if (vlapic_periodic_timer(vlapic)) { 1825 binuptime(&vlapic->timer_fire_bt); 1826 bintime_add(&vlapic->timer_fire_bt, 1827 &vlapic->timer_period_bt); 1828 sbt = bttosbt(vlapic->timer_period_bt); 1829 1830 callout_stop(&vlapic->callout); 1831 vlapic_callout_reset(vlapic, sbt); 1832 } 1833 } 1834 1835 VLAPIC_TIMER_UNLOCK(vlapic); 1836 } 1837 1838 int 1839 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1840 { 1841 int ret; 1842 struct vcpu *vcpu; 1843 struct vlapic *vlapic; 1844 struct LAPIC *lapic; 1845 uint32_t ccr; 1846 uint16_t i, maxcpus; 1847 1848 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1849 1850 ret = 0; 1851 1852 maxcpus = vm_get_maxcpus(vm); 1853 for (i = 0; i < maxcpus; i++) { 1854 vcpu = vm_vcpu(vm, i); 1855 if (vcpu == NULL) 1856 continue; 1857 vlapic = vm_lapic(vcpu); 1858 1859 /* snapshot the page first; timer period depends on icr_timer */ 1860 lapic = vlapic->apic_page; 1861 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1862 1863 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1864 1865 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1866 meta, ret, done); 1867 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1868 meta, ret, done); 1869 1870 /* 1871 * Timer period is equal to 'icr_timer' ticks at a frequency of 1872 * 'timer_freq_bt'. 1873 */ 1874 if (meta->op == VM_SNAPSHOT_RESTORE) { 1875 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1876 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1877 } 1878 1879 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1880 sizeof(vlapic->isrvec_stk), 1881 meta, ret, done); 1882 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1883 1884 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1885 sizeof(vlapic->lvt_last), 1886 meta, ret, done); 1887 1888 if (meta->op == VM_SNAPSHOT_SAVE) 1889 ccr = vlapic_get_ccr(vlapic); 1890 1891 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1892 1893 if (meta->op == VM_SNAPSHOT_RESTORE && 1894 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1895 /* Reset the value of the 'timer_fire_bt' and the vlapic 1896 * callout based on the value of the current count 1897 * register saved when the VM snapshot was created. 1898 * If initial count register is 0, timer is not used. 1899 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1900 */ 1901 vlapic_reset_callout(vlapic, ccr); 1902 } 1903 } 1904 1905 done: 1906 return (ret); 1907 } 1908 #endif 1909