1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_bhyve_snapshot.h" 32 33 #include <sys/param.h> 34 #include <sys/lock.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/mutex.h> 38 #include <sys/systm.h> 39 #include <sys/smp.h> 40 41 #include <x86/specialreg.h> 42 #include <x86/apicreg.h> 43 44 #include <machine/clock.h> 45 #include <machine/smp.h> 46 47 #include <machine/vmm.h> 48 #include <machine/vmm_snapshot.h> 49 50 #include "vmm_lapic.h" 51 #include "vmm_ktr.h" 52 #include "vmm_stat.h" 53 54 #include "vlapic.h" 55 #include "vlapic_priv.h" 56 #include "vioapic.h" 57 58 #define PRIO(x) ((x) >> 4) 59 60 #define VLAPIC_VERSION (0x14) 61 62 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 63 64 /* 65 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 66 * vlapic_callout_handler() and vcpu accesses to: 67 * - timer_freq_bt, timer_period_bt, timer_fire_bt 68 * - timer LVT register 69 */ 70 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 71 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 72 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 73 74 /* 75 * APIC timer frequency: 76 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 77 * - power-of-two to avoid loss of precision when converted to a bintime. 78 */ 79 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 80 81 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 82 static void vlapic_callout_handler(void *arg); 83 static void vlapic_reset(struct vlapic *vlapic); 84 85 static __inline uint32_t 86 vlapic_get_id(struct vlapic *vlapic) 87 { 88 89 if (x2apic(vlapic)) 90 return (vlapic->vcpuid); 91 else 92 return (vlapic->vcpuid << 24); 93 } 94 95 static uint32_t 96 x2apic_ldr(struct vlapic *vlapic) 97 { 98 int apicid; 99 uint32_t ldr; 100 101 apicid = vlapic_get_id(vlapic); 102 ldr = 1 << (apicid & 0xf); 103 ldr |= (apicid & 0xffff0) << 12; 104 return (ldr); 105 } 106 107 void 108 vlapic_dfr_write_handler(struct vlapic *vlapic) 109 { 110 struct LAPIC *lapic; 111 112 lapic = vlapic->apic_page; 113 if (x2apic(vlapic)) { 114 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 115 lapic->dfr); 116 lapic->dfr = 0; 117 return; 118 } 119 120 lapic->dfr &= APIC_DFR_MODEL_MASK; 121 lapic->dfr |= APIC_DFR_RESERVED; 122 123 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 124 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 125 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 126 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 127 else 128 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 129 } 130 131 void 132 vlapic_ldr_write_handler(struct vlapic *vlapic) 133 { 134 struct LAPIC *lapic; 135 136 lapic = vlapic->apic_page; 137 138 /* LDR is read-only in x2apic mode */ 139 if (x2apic(vlapic)) { 140 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 141 lapic->ldr); 142 lapic->ldr = x2apic_ldr(vlapic); 143 } else { 144 lapic->ldr &= ~APIC_LDR_RESERVED; 145 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 146 } 147 } 148 149 void 150 vlapic_id_write_handler(struct vlapic *vlapic) 151 { 152 struct LAPIC *lapic; 153 154 /* 155 * We don't allow the ID register to be modified so reset it back to 156 * its default value. 157 */ 158 lapic = vlapic->apic_page; 159 lapic->id = vlapic_get_id(vlapic); 160 } 161 162 static int 163 vlapic_timer_divisor(uint32_t dcr) 164 { 165 switch (dcr & 0xB) { 166 case APIC_TDCR_1: 167 return (1); 168 case APIC_TDCR_2: 169 return (2); 170 case APIC_TDCR_4: 171 return (4); 172 case APIC_TDCR_8: 173 return (8); 174 case APIC_TDCR_16: 175 return (16); 176 case APIC_TDCR_32: 177 return (32); 178 case APIC_TDCR_64: 179 return (64); 180 case APIC_TDCR_128: 181 return (128); 182 default: 183 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 184 } 185 } 186 187 #if 0 188 static inline void 189 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 190 { 191 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 192 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 193 *lvt & APIC_LVTT_M); 194 } 195 #endif 196 197 static uint32_t 198 vlapic_get_ccr(struct vlapic *vlapic) 199 { 200 struct bintime bt_now, bt_rem; 201 struct LAPIC *lapic __diagused; 202 uint32_t ccr; 203 204 ccr = 0; 205 lapic = vlapic->apic_page; 206 207 VLAPIC_TIMER_LOCK(vlapic); 208 if (callout_active(&vlapic->callout)) { 209 /* 210 * If the timer is scheduled to expire in the future then 211 * compute the value of 'ccr' based on the remaining time. 212 */ 213 binuptime(&bt_now); 214 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 215 bt_rem = vlapic->timer_fire_bt; 216 bintime_sub(&bt_rem, &bt_now); 217 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 218 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 219 } 220 } 221 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 222 "icr_timer is %#x", ccr, lapic->icr_timer)); 223 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 224 ccr, lapic->icr_timer); 225 VLAPIC_TIMER_UNLOCK(vlapic); 226 return (ccr); 227 } 228 229 void 230 vlapic_dcr_write_handler(struct vlapic *vlapic) 231 { 232 struct LAPIC *lapic; 233 int divisor; 234 235 lapic = vlapic->apic_page; 236 VLAPIC_TIMER_LOCK(vlapic); 237 238 divisor = vlapic_timer_divisor(lapic->dcr_timer); 239 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 240 lapic->dcr_timer, divisor); 241 242 /* 243 * Update the timer frequency and the timer period. 244 * 245 * XXX changes to the frequency divider will not take effect until 246 * the timer is reloaded. 247 */ 248 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 249 vlapic->timer_period_bt = vlapic->timer_freq_bt; 250 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 251 252 VLAPIC_TIMER_UNLOCK(vlapic); 253 } 254 255 void 256 vlapic_esr_write_handler(struct vlapic *vlapic) 257 { 258 struct LAPIC *lapic; 259 260 lapic = vlapic->apic_page; 261 lapic->esr = vlapic->esr_pending; 262 vlapic->esr_pending = 0; 263 } 264 265 int 266 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 267 { 268 struct LAPIC *lapic; 269 uint32_t *irrptr, *tmrptr, mask; 270 int idx; 271 272 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 273 274 lapic = vlapic->apic_page; 275 if (!(lapic->svr & APIC_SVR_ENABLE)) { 276 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 277 "interrupt %d", vector); 278 return (0); 279 } 280 281 if (vector < 16) { 282 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 283 false); 284 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 285 vector); 286 return (1); 287 } 288 289 if (vlapic->ops.set_intr_ready) 290 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 291 292 idx = (vector / 32) * 4; 293 mask = 1 << (vector % 32); 294 295 irrptr = &lapic->irr0; 296 atomic_set_int(&irrptr[idx], mask); 297 298 /* 299 * Verify that the trigger-mode of the interrupt matches with 300 * the vlapic TMR registers. 301 */ 302 tmrptr = &lapic->tmr0; 303 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 304 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 305 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 306 level ? "level" : "edge"); 307 } 308 309 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 310 return (1); 311 } 312 313 static __inline uint32_t * 314 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 315 { 316 struct LAPIC *lapic = vlapic->apic_page; 317 int i; 318 319 switch (offset) { 320 case APIC_OFFSET_CMCI_LVT: 321 return (&lapic->lvt_cmci); 322 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 323 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 324 return ((&lapic->lvt_timer) + i); 325 default: 326 panic("vlapic_get_lvt: invalid LVT\n"); 327 } 328 } 329 330 static __inline int 331 lvt_off_to_idx(uint32_t offset) 332 { 333 int index; 334 335 switch (offset) { 336 case APIC_OFFSET_CMCI_LVT: 337 index = APIC_LVT_CMCI; 338 break; 339 case APIC_OFFSET_TIMER_LVT: 340 index = APIC_LVT_TIMER; 341 break; 342 case APIC_OFFSET_THERM_LVT: 343 index = APIC_LVT_THERMAL; 344 break; 345 case APIC_OFFSET_PERF_LVT: 346 index = APIC_LVT_PMC; 347 break; 348 case APIC_OFFSET_LINT0_LVT: 349 index = APIC_LVT_LINT0; 350 break; 351 case APIC_OFFSET_LINT1_LVT: 352 index = APIC_LVT_LINT1; 353 break; 354 case APIC_OFFSET_ERROR_LVT: 355 index = APIC_LVT_ERROR; 356 break; 357 default: 358 index = -1; 359 break; 360 } 361 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 362 "invalid lvt index %d for offset %#x", index, offset)); 363 364 return (index); 365 } 366 367 static __inline uint32_t 368 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 369 { 370 int idx; 371 uint32_t val; 372 373 idx = lvt_off_to_idx(offset); 374 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 375 return (val); 376 } 377 378 void 379 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 380 { 381 uint32_t *lvtptr, mask, val; 382 struct LAPIC *lapic; 383 int idx; 384 385 lapic = vlapic->apic_page; 386 lvtptr = vlapic_get_lvtptr(vlapic, offset); 387 val = *lvtptr; 388 idx = lvt_off_to_idx(offset); 389 390 if (!(lapic->svr & APIC_SVR_ENABLE)) 391 val |= APIC_LVT_M; 392 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 393 switch (offset) { 394 case APIC_OFFSET_TIMER_LVT: 395 mask |= APIC_LVTT_TM; 396 break; 397 case APIC_OFFSET_ERROR_LVT: 398 break; 399 case APIC_OFFSET_LINT0_LVT: 400 case APIC_OFFSET_LINT1_LVT: 401 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 402 /* FALLTHROUGH */ 403 default: 404 mask |= APIC_LVT_DM; 405 break; 406 } 407 val &= mask; 408 *lvtptr = val; 409 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 410 } 411 412 static void 413 vlapic_mask_lvts(struct vlapic *vlapic) 414 { 415 struct LAPIC *lapic = vlapic->apic_page; 416 417 lapic->lvt_cmci |= APIC_LVT_M; 418 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 419 420 lapic->lvt_timer |= APIC_LVT_M; 421 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 422 423 lapic->lvt_thermal |= APIC_LVT_M; 424 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 425 426 lapic->lvt_pcint |= APIC_LVT_M; 427 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 428 429 lapic->lvt_lint0 |= APIC_LVT_M; 430 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 431 432 lapic->lvt_lint1 |= APIC_LVT_M; 433 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 434 435 lapic->lvt_error |= APIC_LVT_M; 436 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 437 } 438 439 static int 440 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 441 { 442 uint32_t mode, reg, vec; 443 444 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 445 446 if (reg & APIC_LVT_M) 447 return (0); 448 vec = reg & APIC_LVT_VECTOR; 449 mode = reg & APIC_LVT_DM; 450 451 switch (mode) { 452 case APIC_LVT_DM_FIXED: 453 if (vec < 16) { 454 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 455 lvt == APIC_LVT_ERROR); 456 return (0); 457 } 458 if (vlapic_set_intr_ready(vlapic, vec, false)) 459 vcpu_notify_event(vlapic->vcpu, true); 460 break; 461 case APIC_LVT_DM_NMI: 462 vm_inject_nmi(vlapic->vcpu); 463 break; 464 case APIC_LVT_DM_EXTINT: 465 vm_inject_extint(vlapic->vcpu); 466 break; 467 default: 468 // Other modes ignored 469 return (0); 470 } 471 return (1); 472 } 473 474 #if 1 475 static void 476 dump_isrvec_stk(struct vlapic *vlapic) 477 { 478 int i; 479 uint32_t *isrptr; 480 481 isrptr = &vlapic->apic_page->isr0; 482 for (i = 0; i < 8; i++) 483 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 484 485 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 486 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 487 } 488 #endif 489 490 /* 491 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 492 * in Intel Architecture Manual Vol 3a. 493 */ 494 static void 495 vlapic_update_ppr(struct vlapic *vlapic) 496 { 497 int isrvec, tpr, ppr; 498 499 /* 500 * Note that the value on the stack at index 0 is always 0. 501 * 502 * This is a placeholder for the value of ISRV when none of the 503 * bits is set in the ISRx registers. 504 */ 505 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 506 tpr = vlapic->apic_page->tpr; 507 508 #if 1 509 { 510 int i, lastprio, curprio, vector, idx; 511 uint32_t *isrptr; 512 513 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 514 panic("isrvec_stk is corrupted: %d", isrvec); 515 516 /* 517 * Make sure that the priority of the nested interrupts is 518 * always increasing. 519 */ 520 lastprio = -1; 521 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 522 curprio = PRIO(vlapic->isrvec_stk[i]); 523 if (curprio <= lastprio) { 524 dump_isrvec_stk(vlapic); 525 panic("isrvec_stk does not satisfy invariant"); 526 } 527 lastprio = curprio; 528 } 529 530 /* 531 * Make sure that each bit set in the ISRx registers has a 532 * corresponding entry on the isrvec stack. 533 */ 534 i = 1; 535 isrptr = &vlapic->apic_page->isr0; 536 for (vector = 0; vector < 256; vector++) { 537 idx = (vector / 32) * 4; 538 if (isrptr[idx] & (1 << (vector % 32))) { 539 if (i > vlapic->isrvec_stk_top || 540 vlapic->isrvec_stk[i] != vector) { 541 dump_isrvec_stk(vlapic); 542 panic("ISR and isrvec_stk out of sync"); 543 } 544 i++; 545 } 546 } 547 } 548 #endif 549 550 if (PRIO(tpr) >= PRIO(isrvec)) 551 ppr = tpr; 552 else 553 ppr = isrvec & 0xf0; 554 555 vlapic->apic_page->ppr = ppr; 556 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 557 } 558 559 void 560 vlapic_sync_tpr(struct vlapic *vlapic) 561 { 562 vlapic_update_ppr(vlapic); 563 } 564 565 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 566 567 static void 568 vlapic_process_eoi(struct vlapic *vlapic) 569 { 570 struct LAPIC *lapic = vlapic->apic_page; 571 uint32_t *isrptr, *tmrptr; 572 int i, idx, bitpos, vector; 573 574 isrptr = &lapic->isr0; 575 tmrptr = &lapic->tmr0; 576 577 for (i = 7; i >= 0; i--) { 578 idx = i * 4; 579 bitpos = fls(isrptr[idx]); 580 if (bitpos-- != 0) { 581 if (vlapic->isrvec_stk_top <= 0) { 582 panic("invalid vlapic isrvec_stk_top %d", 583 vlapic->isrvec_stk_top); 584 } 585 isrptr[idx] &= ~(1 << bitpos); 586 vector = i * 32 + bitpos; 587 VLAPIC_CTR1(vlapic, "EOI vector %d", vector); 588 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 589 vlapic->isrvec_stk_top--; 590 vlapic_update_ppr(vlapic); 591 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 592 vioapic_process_eoi(vlapic->vm, vector); 593 } 594 return; 595 } 596 } 597 VLAPIC_CTR0(vlapic, "Gratuitous EOI"); 598 vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1); 599 } 600 601 static __inline int 602 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 603 { 604 605 return (lvt & mask); 606 } 607 608 static __inline int 609 vlapic_periodic_timer(struct vlapic *vlapic) 610 { 611 uint32_t lvt; 612 613 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 614 615 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 616 } 617 618 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 619 620 static void 621 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 622 { 623 624 vlapic->esr_pending |= mask; 625 626 /* 627 * Avoid infinite recursion if the error LVT itself is configured with 628 * an illegal vector. 629 */ 630 if (lvt_error) 631 return; 632 633 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 634 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1); 635 } 636 } 637 638 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 639 640 static void 641 vlapic_fire_timer(struct vlapic *vlapic) 642 { 643 644 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 645 646 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 647 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 648 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1); 649 } 650 } 651 652 static VMM_STAT(VLAPIC_INTR_CMC, 653 "corrected machine check interrupts generated by vlapic"); 654 655 void 656 vlapic_fire_cmci(struct vlapic *vlapic) 657 { 658 659 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 660 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1); 661 } 662 } 663 664 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 665 "lvts triggered"); 666 667 int 668 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 669 { 670 671 if (vlapic_enabled(vlapic) == false) { 672 /* 673 * When the local APIC is global/hardware disabled, 674 * LINT[1:0] pins are configured as INTR and NMI pins, 675 * respectively. 676 */ 677 switch (vector) { 678 case APIC_LVT_LINT0: 679 vm_inject_extint(vlapic->vcpu); 680 break; 681 case APIC_LVT_LINT1: 682 vm_inject_nmi(vlapic->vcpu); 683 break; 684 default: 685 break; 686 } 687 return (0); 688 } 689 690 switch (vector) { 691 case APIC_LVT_LINT0: 692 case APIC_LVT_LINT1: 693 case APIC_LVT_TIMER: 694 case APIC_LVT_ERROR: 695 case APIC_LVT_PMC: 696 case APIC_LVT_THERMAL: 697 case APIC_LVT_CMCI: 698 if (vlapic_fire_lvt(vlapic, vector)) { 699 vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED, 700 vector, 1); 701 } 702 break; 703 default: 704 return (EINVAL); 705 } 706 return (0); 707 } 708 709 static void 710 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 711 { 712 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 713 vlapic_callout_handler, vlapic, 0); 714 } 715 716 static void 717 vlapic_callout_handler(void *arg) 718 { 719 struct vlapic *vlapic; 720 struct bintime bt, btnow; 721 sbintime_t rem_sbt; 722 723 vlapic = arg; 724 725 VLAPIC_TIMER_LOCK(vlapic); 726 if (callout_pending(&vlapic->callout)) /* callout was reset */ 727 goto done; 728 729 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 730 goto done; 731 732 callout_deactivate(&vlapic->callout); 733 734 vlapic_fire_timer(vlapic); 735 736 if (vlapic_periodic_timer(vlapic)) { 737 binuptime(&btnow); 738 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 739 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 740 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 741 vlapic->timer_fire_bt.frac)); 742 743 /* 744 * Compute the delta between when the timer was supposed to 745 * fire and the present time. 746 */ 747 bt = btnow; 748 bintime_sub(&bt, &vlapic->timer_fire_bt); 749 750 rem_sbt = bttosbt(vlapic->timer_period_bt); 751 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 752 /* 753 * Adjust the time until the next countdown downward 754 * to account for the lost time. 755 */ 756 rem_sbt -= bttosbt(bt); 757 } else { 758 /* 759 * If the delta is greater than the timer period then 760 * just reset our time base instead of trying to catch 761 * up. 762 */ 763 vlapic->timer_fire_bt = btnow; 764 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 765 "usecs, period is %lu usecs - resetting time base", 766 bttosbt(bt) / SBT_1US, 767 bttosbt(vlapic->timer_period_bt) / SBT_1US); 768 } 769 770 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 771 vlapic_callout_reset(vlapic, rem_sbt); 772 } 773 done: 774 VLAPIC_TIMER_UNLOCK(vlapic); 775 } 776 777 void 778 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 779 { 780 struct LAPIC *lapic; 781 sbintime_t sbt; 782 uint32_t icr_timer; 783 784 VLAPIC_TIMER_LOCK(vlapic); 785 786 lapic = vlapic->apic_page; 787 icr_timer = lapic->icr_timer; 788 789 vlapic->timer_period_bt = vlapic->timer_freq_bt; 790 bintime_mul(&vlapic->timer_period_bt, icr_timer); 791 792 if (icr_timer != 0) { 793 binuptime(&vlapic->timer_fire_bt); 794 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 795 796 sbt = bttosbt(vlapic->timer_period_bt); 797 vlapic_callout_reset(vlapic, sbt); 798 } else 799 callout_stop(&vlapic->callout); 800 801 VLAPIC_TIMER_UNLOCK(vlapic); 802 } 803 804 /* 805 * This function populates 'dmask' with the set of vcpus that match the 806 * addressing specified by the (dest, phys, lowprio) tuple. 807 * 808 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 809 * or xAPIC (8-bit) destination field. 810 */ 811 static void 812 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 813 bool lowprio, bool x2apic_dest) 814 { 815 struct vlapic *vlapic; 816 uint32_t dfr, ldr, ldest, cluster; 817 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 818 cpuset_t amask; 819 int vcpuid; 820 821 if ((x2apic_dest && dest == 0xffffffff) || 822 (!x2apic_dest && dest == 0xff)) { 823 /* 824 * Broadcast in both logical and physical modes. 825 */ 826 *dmask = vm_active_cpus(vm); 827 return; 828 } 829 830 if (phys) { 831 /* 832 * Physical mode: destination is APIC ID. 833 */ 834 CPU_ZERO(dmask); 835 vcpuid = vm_apicid2vcpuid(vm, dest); 836 amask = vm_active_cpus(vm); 837 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 838 CPU_SET(vcpuid, dmask); 839 } else { 840 /* 841 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 842 * bitmask. This model is only available in the xAPIC mode. 843 */ 844 mda_flat_ldest = dest & 0xff; 845 846 /* 847 * In the "Cluster Model" the MDA is used to identify a 848 * specific cluster and a set of APICs in that cluster. 849 */ 850 if (x2apic_dest) { 851 mda_cluster_id = dest >> 16; 852 mda_cluster_ldest = dest & 0xffff; 853 } else { 854 mda_cluster_id = (dest >> 4) & 0xf; 855 mda_cluster_ldest = dest & 0xf; 856 } 857 858 /* 859 * Logical mode: match each APIC that has a bit set 860 * in its LDR that matches a bit in the ldest. 861 */ 862 CPU_ZERO(dmask); 863 amask = vm_active_cpus(vm); 864 CPU_FOREACH_ISSET(vcpuid, &amask) { 865 vlapic = vm_lapic(vm_vcpu(vm, vcpuid)); 866 dfr = vlapic->apic_page->dfr; 867 ldr = vlapic->apic_page->ldr; 868 869 if ((dfr & APIC_DFR_MODEL_MASK) == 870 APIC_DFR_MODEL_FLAT) { 871 ldest = ldr >> 24; 872 mda_ldest = mda_flat_ldest; 873 } else if ((dfr & APIC_DFR_MODEL_MASK) == 874 APIC_DFR_MODEL_CLUSTER) { 875 if (x2apic(vlapic)) { 876 cluster = ldr >> 16; 877 ldest = ldr & 0xffff; 878 } else { 879 cluster = ldr >> 28; 880 ldest = (ldr >> 24) & 0xf; 881 } 882 if (cluster != mda_cluster_id) 883 continue; 884 mda_ldest = mda_cluster_ldest; 885 } else { 886 /* 887 * Guest has configured a bad logical 888 * model for this vcpu - skip it. 889 */ 890 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 891 "model %x - cannot deliver interrupt", dfr); 892 continue; 893 } 894 895 if ((mda_ldest & ldest) != 0) { 896 CPU_SET(vcpuid, dmask); 897 if (lowprio) 898 break; 899 } 900 } 901 } 902 } 903 904 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 905 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 906 907 static void 908 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 909 { 910 struct LAPIC *lapic = vlapic->apic_page; 911 912 if (lapic->tpr != val) { 913 VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x", 914 lapic->tpr, val); 915 lapic->tpr = val; 916 vlapic_update_ppr(vlapic); 917 } 918 } 919 920 static uint8_t 921 vlapic_get_tpr(struct vlapic *vlapic) 922 { 923 struct LAPIC *lapic = vlapic->apic_page; 924 925 return (lapic->tpr); 926 } 927 928 void 929 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 930 { 931 uint8_t tpr; 932 933 if (val & ~0xf) { 934 vm_inject_gp(vlapic->vcpu); 935 return; 936 } 937 938 tpr = val << 4; 939 vlapic_set_tpr(vlapic, tpr); 940 } 941 942 uint64_t 943 vlapic_get_cr8(struct vlapic *vlapic) 944 { 945 uint8_t tpr; 946 947 tpr = vlapic_get_tpr(vlapic); 948 return (tpr >> 4); 949 } 950 951 static bool 952 vlapic_is_icr_valid(uint64_t icrval) 953 { 954 uint32_t mode = icrval & APIC_DELMODE_MASK; 955 uint32_t level = icrval & APIC_LEVEL_MASK; 956 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 957 uint32_t shorthand = icrval & APIC_DEST_MASK; 958 959 switch (mode) { 960 case APIC_DELMODE_FIXED: 961 if (trigger == APIC_TRIGMOD_EDGE) 962 return (true); 963 /* 964 * AMD allows a level assert IPI and Intel converts a level 965 * assert IPI into an edge IPI. 966 */ 967 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 968 return (true); 969 break; 970 case APIC_DELMODE_LOWPRIO: 971 case APIC_DELMODE_SMI: 972 case APIC_DELMODE_NMI: 973 case APIC_DELMODE_INIT: 974 if (trigger == APIC_TRIGMOD_EDGE && 975 (shorthand == APIC_DEST_DESTFLD || 976 shorthand == APIC_DEST_ALLESELF)) 977 return (true); 978 /* 979 * AMD allows a level assert IPI and Intel converts a level 980 * assert IPI into an edge IPI. 981 */ 982 if (trigger == APIC_TRIGMOD_LEVEL && 983 level == APIC_LEVEL_ASSERT && 984 (shorthand == APIC_DEST_DESTFLD || 985 shorthand == APIC_DEST_ALLESELF)) 986 return (true); 987 /* 988 * An level triggered deassert INIT is defined in the Intel 989 * Multiprocessor Specification and the Intel Software Developer 990 * Manual. Due to the MPS it's required to send a level assert 991 * INIT to a cpu and then a level deassert INIT. Some operating 992 * systems e.g. FreeBSD or Linux use that algorithm. According 993 * to the SDM a level deassert INIT is only supported by Pentium 994 * and P6 processors. It's always send to all cpus regardless of 995 * the destination or shorthand field. It resets the arbitration 996 * id register. This register is not software accessible and 997 * only required for the APIC bus arbitration. So, the level 998 * deassert INIT doesn't need any emulation and we should ignore 999 * it. The SDM also defines that newer processors don't support 1000 * the level deassert INIT and it's not valid any more. As it's 1001 * defined for older systems, it can't be invalid per se. 1002 * Otherwise, backward compatibility would be broken. However, 1003 * when returning false here, it'll be ignored which is the 1004 * desired behaviour. 1005 */ 1006 if (mode == APIC_DELMODE_INIT && 1007 trigger == APIC_TRIGMOD_LEVEL && 1008 level == APIC_LEVEL_DEASSERT) 1009 return (false); 1010 break; 1011 case APIC_DELMODE_STARTUP: 1012 if (shorthand == APIC_DEST_DESTFLD || 1013 shorthand == APIC_DEST_ALLESELF) 1014 return (true); 1015 break; 1016 case APIC_DELMODE_RR: 1017 /* Only available on AMD! */ 1018 if (trigger == APIC_TRIGMOD_EDGE && 1019 shorthand == APIC_DEST_DESTFLD) 1020 return (true); 1021 break; 1022 case APIC_DELMODE_RESV: 1023 return (false); 1024 default: 1025 __assert_unreachable(); 1026 } 1027 1028 return (false); 1029 } 1030 1031 int 1032 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 1033 { 1034 int i; 1035 bool phys; 1036 cpuset_t dmask, ipimask; 1037 uint64_t icrval; 1038 uint32_t dest, vec, mode, shorthand; 1039 struct vcpu *vcpu; 1040 struct vm_exit *vmexit; 1041 struct LAPIC *lapic; 1042 1043 lapic = vlapic->apic_page; 1044 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1045 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1046 1047 if (x2apic(vlapic)) 1048 dest = icrval >> 32; 1049 else 1050 dest = icrval >> (32 + 24); 1051 vec = icrval & APIC_VECTOR_MASK; 1052 mode = icrval & APIC_DELMODE_MASK; 1053 phys = (icrval & APIC_DESTMODE_LOG) == 0; 1054 shorthand = icrval & APIC_DEST_MASK; 1055 1056 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 1057 1058 switch (shorthand) { 1059 case APIC_DEST_DESTFLD: 1060 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); 1061 break; 1062 case APIC_DEST_SELF: 1063 CPU_SETOF(vlapic->vcpuid, &dmask); 1064 break; 1065 case APIC_DEST_ALLISELF: 1066 dmask = vm_active_cpus(vlapic->vm); 1067 break; 1068 case APIC_DEST_ALLESELF: 1069 dmask = vm_active_cpus(vlapic->vm); 1070 CPU_CLR(vlapic->vcpuid, &dmask); 1071 break; 1072 default: 1073 __assert_unreachable(); 1074 } 1075 1076 /* 1077 * Ignore invalid combinations of the icr. 1078 */ 1079 if (!vlapic_is_icr_valid(icrval)) { 1080 VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); 1081 return (0); 1082 } 1083 1084 /* 1085 * ipimask is a set of vCPUs needing userland handling of the current 1086 * IPI. 1087 */ 1088 CPU_ZERO(&ipimask); 1089 1090 switch (mode) { 1091 case APIC_DELMODE_FIXED: 1092 if (vec < 16) { 1093 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 1094 false); 1095 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 1096 return (0); 1097 } 1098 1099 CPU_FOREACH_ISSET(i, &dmask) { 1100 vcpu = vm_vcpu(vlapic->vm, i); 1101 lapic_intr_edge(vcpu, vec); 1102 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); 1103 vmm_stat_incr(vcpu, VLAPIC_IPI_RECV, 1); 1104 VLAPIC_CTR2(vlapic, 1105 "vlapic sending ipi %d to vcpuid %d", vec, i); 1106 } 1107 1108 break; 1109 case APIC_DELMODE_NMI: 1110 CPU_FOREACH_ISSET(i, &dmask) { 1111 vcpu = vm_vcpu(vlapic->vm, i); 1112 vm_inject_nmi(vcpu); 1113 VLAPIC_CTR1(vlapic, 1114 "vlapic sending ipi nmi to vcpuid %d", i); 1115 } 1116 1117 break; 1118 case APIC_DELMODE_INIT: 1119 case APIC_DELMODE_STARTUP: 1120 if (!vlapic->ipi_exit) { 1121 if (!phys) 1122 break; 1123 1124 i = vm_apicid2vcpuid(vlapic->vm, dest); 1125 if (i >= vm_get_maxcpus(vlapic->vm) || 1126 i == vlapic->vcpuid) 1127 break; 1128 1129 CPU_SETOF(i, &ipimask); 1130 1131 break; 1132 } 1133 1134 CPU_COPY(&dmask, &ipimask); 1135 break; 1136 default: 1137 return (1); 1138 } 1139 1140 if (!CPU_EMPTY(&ipimask)) { 1141 vmexit = vm_exitinfo(vlapic->vcpu); 1142 vmexit->exitcode = VM_EXITCODE_IPI; 1143 vmexit->u.ipi.mode = mode; 1144 vmexit->u.ipi.vector = vec; 1145 *vm_exitinfo_cpuset(vlapic->vcpu) = ipimask; 1146 1147 *retu = true; 1148 } 1149 1150 return (0); 1151 } 1152 1153 static void 1154 vlapic_handle_init(struct vcpu *vcpu, void *arg) 1155 { 1156 struct vlapic *vlapic = vm_lapic(vcpu); 1157 1158 vlapic_reset(vlapic); 1159 } 1160 1161 int 1162 vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1163 { 1164 struct vlapic *vlapic = vm_lapic(vcpu); 1165 cpuset_t *dmask = vm_exitinfo_cpuset(vcpu); 1166 uint8_t vec = vme->u.ipi.vector; 1167 1168 *retu = true; 1169 switch (vme->u.ipi.mode) { 1170 case APIC_DELMODE_INIT: { 1171 cpuset_t active, reinit; 1172 1173 active = vm_active_cpus(vcpu_vm(vcpu)); 1174 CPU_AND(&reinit, &active, dmask); 1175 if (!CPU_EMPTY(&reinit)) { 1176 vm_smp_rendezvous(vcpu, reinit, vlapic_handle_init, 1177 NULL); 1178 } 1179 vm_await_start(vcpu_vm(vcpu), dmask); 1180 1181 if (!vlapic->ipi_exit) 1182 *retu = false; 1183 1184 break; 1185 } 1186 case APIC_DELMODE_STARTUP: 1187 /* 1188 * Ignore SIPIs in any state other than wait-for-SIPI 1189 */ 1190 *dmask = vm_start_cpus(vcpu_vm(vcpu), dmask); 1191 1192 if (CPU_EMPTY(dmask)) { 1193 *retu = false; 1194 break; 1195 } 1196 1197 /* 1198 * Old bhyve versions don't support the IPI 1199 * exit. Translate it into the old style. 1200 */ 1201 if (!vlapic->ipi_exit) { 1202 vme->exitcode = VM_EXITCODE_SPINUP_AP; 1203 vme->u.spinup_ap.vcpu = CPU_FFS(dmask) - 1; 1204 vme->u.spinup_ap.rip = vec << PAGE_SHIFT; 1205 } 1206 1207 break; 1208 default: 1209 __assert_unreachable(); 1210 } 1211 1212 return (0); 1213 } 1214 1215 void 1216 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1217 { 1218 int vec; 1219 1220 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1221 1222 vec = val & 0xff; 1223 lapic_intr_edge(vlapic->vcpu, vec); 1224 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); 1225 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_RECV, 1); 1226 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1227 } 1228 1229 int 1230 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1231 { 1232 struct LAPIC *lapic = vlapic->apic_page; 1233 int idx, i, bitpos, vector; 1234 uint32_t *irrptr, val; 1235 1236 vlapic_update_ppr(vlapic); 1237 1238 if (vlapic->ops.pending_intr) 1239 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1240 1241 irrptr = &lapic->irr0; 1242 1243 for (i = 7; i >= 0; i--) { 1244 idx = i * 4; 1245 val = atomic_load_acq_int(&irrptr[idx]); 1246 bitpos = fls(val); 1247 if (bitpos != 0) { 1248 vector = i * 32 + (bitpos - 1); 1249 if (PRIO(vector) > PRIO(lapic->ppr)) { 1250 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1251 if (vecptr != NULL) 1252 *vecptr = vector; 1253 return (1); 1254 } else 1255 break; 1256 } 1257 } 1258 return (0); 1259 } 1260 1261 void 1262 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1263 { 1264 struct LAPIC *lapic = vlapic->apic_page; 1265 uint32_t *irrptr, *isrptr; 1266 int idx, stk_top; 1267 1268 if (vlapic->ops.intr_accepted) 1269 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1270 1271 /* 1272 * clear the ready bit for vector being accepted in irr 1273 * and set the vector as in service in isr. 1274 */ 1275 idx = (vector / 32) * 4; 1276 1277 irrptr = &lapic->irr0; 1278 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1279 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1280 1281 isrptr = &lapic->isr0; 1282 isrptr[idx] |= 1 << (vector % 32); 1283 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1284 1285 /* 1286 * Update the PPR 1287 */ 1288 vlapic->isrvec_stk_top++; 1289 1290 stk_top = vlapic->isrvec_stk_top; 1291 if (stk_top >= ISRVEC_STK_SIZE) 1292 panic("isrvec_stk_top overflow %d", stk_top); 1293 1294 vlapic->isrvec_stk[stk_top] = vector; 1295 } 1296 1297 void 1298 vlapic_svr_write_handler(struct vlapic *vlapic) 1299 { 1300 struct LAPIC *lapic; 1301 uint32_t old, new, changed; 1302 1303 lapic = vlapic->apic_page; 1304 1305 new = lapic->svr; 1306 old = vlapic->svr_last; 1307 vlapic->svr_last = new; 1308 1309 changed = old ^ new; 1310 if ((changed & APIC_SVR_ENABLE) != 0) { 1311 if ((new & APIC_SVR_ENABLE) == 0) { 1312 /* 1313 * The apic is now disabled so stop the apic timer 1314 * and mask all the LVT entries. 1315 */ 1316 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1317 VLAPIC_TIMER_LOCK(vlapic); 1318 callout_stop(&vlapic->callout); 1319 VLAPIC_TIMER_UNLOCK(vlapic); 1320 vlapic_mask_lvts(vlapic); 1321 } else { 1322 /* 1323 * The apic is now enabled so restart the apic timer 1324 * if it is configured in periodic mode. 1325 */ 1326 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1327 if (vlapic_periodic_timer(vlapic)) 1328 vlapic_icrtmr_write_handler(vlapic); 1329 } 1330 } 1331 } 1332 1333 int 1334 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1335 uint64_t *data, bool *retu) 1336 { 1337 struct LAPIC *lapic = vlapic->apic_page; 1338 uint32_t *reg; 1339 int i; 1340 1341 /* Ignore MMIO accesses in x2APIC mode */ 1342 if (x2apic(vlapic) && mmio_access) { 1343 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1344 offset); 1345 *data = 0; 1346 goto done; 1347 } 1348 1349 if (!x2apic(vlapic) && !mmio_access) { 1350 /* 1351 * XXX Generate GP fault for MSR accesses in xAPIC mode 1352 */ 1353 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1354 "xAPIC mode", offset); 1355 *data = 0; 1356 goto done; 1357 } 1358 1359 if (offset > sizeof(*lapic)) { 1360 *data = 0; 1361 goto done; 1362 } 1363 1364 offset &= ~3; 1365 switch(offset) 1366 { 1367 case APIC_OFFSET_ID: 1368 *data = lapic->id; 1369 break; 1370 case APIC_OFFSET_VER: 1371 *data = lapic->version; 1372 break; 1373 case APIC_OFFSET_TPR: 1374 *data = vlapic_get_tpr(vlapic); 1375 break; 1376 case APIC_OFFSET_APR: 1377 *data = lapic->apr; 1378 break; 1379 case APIC_OFFSET_PPR: 1380 *data = lapic->ppr; 1381 break; 1382 case APIC_OFFSET_EOI: 1383 *data = lapic->eoi; 1384 break; 1385 case APIC_OFFSET_LDR: 1386 *data = lapic->ldr; 1387 break; 1388 case APIC_OFFSET_DFR: 1389 *data = lapic->dfr; 1390 break; 1391 case APIC_OFFSET_SVR: 1392 *data = lapic->svr; 1393 break; 1394 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1395 i = (offset - APIC_OFFSET_ISR0) >> 2; 1396 reg = &lapic->isr0; 1397 *data = *(reg + i); 1398 break; 1399 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1400 i = (offset - APIC_OFFSET_TMR0) >> 2; 1401 reg = &lapic->tmr0; 1402 *data = *(reg + i); 1403 break; 1404 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1405 i = (offset - APIC_OFFSET_IRR0) >> 2; 1406 reg = &lapic->irr0; 1407 *data = atomic_load_acq_int(reg + i); 1408 break; 1409 case APIC_OFFSET_ESR: 1410 *data = lapic->esr; 1411 break; 1412 case APIC_OFFSET_ICR_LOW: 1413 *data = lapic->icr_lo; 1414 if (x2apic(vlapic)) 1415 *data |= (uint64_t)lapic->icr_hi << 32; 1416 break; 1417 case APIC_OFFSET_ICR_HI: 1418 *data = lapic->icr_hi; 1419 break; 1420 case APIC_OFFSET_CMCI_LVT: 1421 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1422 *data = vlapic_get_lvt(vlapic, offset); 1423 #ifdef INVARIANTS 1424 reg = vlapic_get_lvtptr(vlapic, offset); 1425 KASSERT(*data == *reg, ("inconsistent lvt value at " 1426 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1427 #endif 1428 break; 1429 case APIC_OFFSET_TIMER_ICR: 1430 *data = lapic->icr_timer; 1431 break; 1432 case APIC_OFFSET_TIMER_CCR: 1433 *data = vlapic_get_ccr(vlapic); 1434 break; 1435 case APIC_OFFSET_TIMER_DCR: 1436 *data = lapic->dcr_timer; 1437 break; 1438 case APIC_OFFSET_SELF_IPI: 1439 /* 1440 * XXX generate a GP fault if vlapic is in x2apic mode 1441 */ 1442 *data = 0; 1443 break; 1444 case APIC_OFFSET_RRR: 1445 default: 1446 *data = 0; 1447 break; 1448 } 1449 done: 1450 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1451 return 0; 1452 } 1453 1454 int 1455 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1456 uint64_t data, bool *retu) 1457 { 1458 struct LAPIC *lapic = vlapic->apic_page; 1459 uint32_t *regptr; 1460 int retval; 1461 1462 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1463 ("vlapic_write: invalid offset %#lx", offset)); 1464 1465 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1466 offset, data); 1467 1468 if (offset > sizeof(*lapic)) 1469 return (0); 1470 1471 /* Ignore MMIO accesses in x2APIC mode */ 1472 if (x2apic(vlapic) && mmio_access) { 1473 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1474 "in x2APIC mode", data, offset); 1475 return (0); 1476 } 1477 1478 /* 1479 * XXX Generate GP fault for MSR accesses in xAPIC mode 1480 */ 1481 if (!x2apic(vlapic) && !mmio_access) { 1482 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1483 "in xAPIC mode", data, offset); 1484 return (0); 1485 } 1486 1487 retval = 0; 1488 switch(offset) 1489 { 1490 case APIC_OFFSET_ID: 1491 lapic->id = data; 1492 vlapic_id_write_handler(vlapic); 1493 break; 1494 case APIC_OFFSET_TPR: 1495 vlapic_set_tpr(vlapic, data & 0xff); 1496 break; 1497 case APIC_OFFSET_EOI: 1498 vlapic_process_eoi(vlapic); 1499 break; 1500 case APIC_OFFSET_LDR: 1501 lapic->ldr = data; 1502 vlapic_ldr_write_handler(vlapic); 1503 break; 1504 case APIC_OFFSET_DFR: 1505 lapic->dfr = data; 1506 vlapic_dfr_write_handler(vlapic); 1507 break; 1508 case APIC_OFFSET_SVR: 1509 lapic->svr = data; 1510 vlapic_svr_write_handler(vlapic); 1511 break; 1512 case APIC_OFFSET_ICR_LOW: 1513 lapic->icr_lo = data; 1514 if (x2apic(vlapic)) 1515 lapic->icr_hi = data >> 32; 1516 retval = vlapic_icrlo_write_handler(vlapic, retu); 1517 break; 1518 case APIC_OFFSET_ICR_HI: 1519 lapic->icr_hi = data; 1520 break; 1521 case APIC_OFFSET_CMCI_LVT: 1522 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1523 regptr = vlapic_get_lvtptr(vlapic, offset); 1524 *regptr = data; 1525 vlapic_lvt_write_handler(vlapic, offset); 1526 break; 1527 case APIC_OFFSET_TIMER_ICR: 1528 lapic->icr_timer = data; 1529 vlapic_icrtmr_write_handler(vlapic); 1530 break; 1531 1532 case APIC_OFFSET_TIMER_DCR: 1533 lapic->dcr_timer = data; 1534 vlapic_dcr_write_handler(vlapic); 1535 break; 1536 1537 case APIC_OFFSET_ESR: 1538 vlapic_esr_write_handler(vlapic); 1539 break; 1540 1541 case APIC_OFFSET_SELF_IPI: 1542 if (x2apic(vlapic)) 1543 vlapic_self_ipi_handler(vlapic, data); 1544 break; 1545 1546 case APIC_OFFSET_VER: 1547 case APIC_OFFSET_APR: 1548 case APIC_OFFSET_PPR: 1549 case APIC_OFFSET_RRR: 1550 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1551 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1552 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1553 case APIC_OFFSET_TIMER_CCR: 1554 default: 1555 // Read only. 1556 break; 1557 } 1558 1559 return (retval); 1560 } 1561 1562 static void 1563 vlapic_reset(struct vlapic *vlapic) 1564 { 1565 struct LAPIC *lapic; 1566 1567 lapic = vlapic->apic_page; 1568 bzero(lapic, sizeof(struct LAPIC)); 1569 1570 lapic->id = vlapic_get_id(vlapic); 1571 lapic->version = VLAPIC_VERSION; 1572 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1573 lapic->dfr = 0xffffffff; 1574 lapic->svr = APIC_SVR_VECTOR; 1575 vlapic_mask_lvts(vlapic); 1576 vlapic_reset_tmr(vlapic); 1577 1578 lapic->dcr_timer = 0; 1579 vlapic_dcr_write_handler(vlapic); 1580 1581 vlapic->svr_last = lapic->svr; 1582 } 1583 1584 void 1585 vlapic_init(struct vlapic *vlapic) 1586 { 1587 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1588 KASSERT(vlapic->vcpuid >= 0 && 1589 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1590 ("vlapic_init: vcpuid is not initialized")); 1591 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1592 "initialized")); 1593 1594 /* 1595 * If the vlapic is configured in x2apic mode then it will be 1596 * accessed in the critical section via the MSR emulation code. 1597 * 1598 * Therefore the timer mutex must be a spinlock because blockable 1599 * mutexes cannot be acquired in a critical section. 1600 */ 1601 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1602 callout_init(&vlapic->callout, 1); 1603 1604 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1605 1606 if (vlapic->vcpuid == 0) 1607 vlapic->msr_apicbase |= APICBASE_BSP; 1608 1609 vlapic->ipi_exit = false; 1610 1611 vlapic_reset(vlapic); 1612 } 1613 1614 void 1615 vlapic_cleanup(struct vlapic *vlapic) 1616 { 1617 1618 callout_drain(&vlapic->callout); 1619 mtx_destroy(&vlapic->timer_mtx); 1620 } 1621 1622 uint64_t 1623 vlapic_get_apicbase(struct vlapic *vlapic) 1624 { 1625 1626 return (vlapic->msr_apicbase); 1627 } 1628 1629 int 1630 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1631 { 1632 1633 if (vlapic->msr_apicbase != new) { 1634 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1635 "not supported", vlapic->msr_apicbase, new); 1636 return (-1); 1637 } 1638 1639 return (0); 1640 } 1641 1642 void 1643 vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1644 { 1645 struct vlapic *vlapic; 1646 struct LAPIC *lapic; 1647 1648 vlapic = vm_lapic(vcpu); 1649 1650 if (state == X2APIC_DISABLED) 1651 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1652 else 1653 vlapic->msr_apicbase |= APICBASE_X2APIC; 1654 1655 /* 1656 * Reset the local APIC registers whose values are mode-dependent. 1657 * 1658 * XXX this works because the APIC mode can be changed only at vcpu 1659 * initialization time. 1660 */ 1661 lapic = vlapic->apic_page; 1662 lapic->id = vlapic_get_id(vlapic); 1663 if (x2apic(vlapic)) { 1664 lapic->ldr = x2apic_ldr(vlapic); 1665 lapic->dfr = 0; 1666 } else { 1667 lapic->ldr = 0; 1668 lapic->dfr = 0xffffffff; 1669 } 1670 1671 if (state == X2APIC_ENABLED) { 1672 if (vlapic->ops.enable_x2apic_mode) 1673 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1674 } 1675 } 1676 1677 void 1678 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1679 int delmode, int vec) 1680 { 1681 struct vcpu *vcpu; 1682 bool lowprio; 1683 int vcpuid; 1684 cpuset_t dmask; 1685 1686 if (delmode != IOART_DELFIXED && 1687 delmode != IOART_DELLOPRI && 1688 delmode != IOART_DELEXINT) { 1689 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1690 return; 1691 } 1692 lowprio = (delmode == IOART_DELLOPRI); 1693 1694 /* 1695 * We don't provide any virtual interrupt redirection hardware so 1696 * all interrupts originating from the ioapic or MSI specify the 1697 * 'dest' in the legacy xAPIC format. 1698 */ 1699 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1700 1701 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1702 vcpu = vm_vcpu(vm, vcpuid); 1703 if (delmode == IOART_DELEXINT) { 1704 vm_inject_extint(vcpu); 1705 } else { 1706 lapic_set_intr(vcpu, vec, level); 1707 } 1708 } 1709 } 1710 1711 void 1712 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1713 { 1714 /* 1715 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1716 * 1717 * This is done by leveraging features like Posted Interrupts (Intel) 1718 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1719 * 1720 * If neither of these features are available then fallback to 1721 * sending an IPI to 'hostcpu'. 1722 */ 1723 if (vlapic->ops.post_intr) 1724 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1725 else 1726 ipi_cpu(hostcpu, ipinum); 1727 } 1728 1729 bool 1730 vlapic_enabled(struct vlapic *vlapic) 1731 { 1732 struct LAPIC *lapic = vlapic->apic_page; 1733 1734 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1735 (lapic->svr & APIC_SVR_ENABLE) != 0) 1736 return (true); 1737 else 1738 return (false); 1739 } 1740 1741 static void 1742 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1743 { 1744 struct LAPIC *lapic; 1745 uint32_t *tmrptr, mask; 1746 int idx; 1747 1748 lapic = vlapic->apic_page; 1749 tmrptr = &lapic->tmr0; 1750 idx = (vector / 32) * 4; 1751 mask = 1 << (vector % 32); 1752 if (level) 1753 tmrptr[idx] |= mask; 1754 else 1755 tmrptr[idx] &= ~mask; 1756 1757 if (vlapic->ops.set_tmr != NULL) 1758 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1759 } 1760 1761 void 1762 vlapic_reset_tmr(struct vlapic *vlapic) 1763 { 1764 int vector; 1765 1766 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1767 1768 for (vector = 0; vector <= 255; vector++) 1769 vlapic_set_tmr(vlapic, vector, false); 1770 } 1771 1772 void 1773 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1774 int delmode, int vector) 1775 { 1776 cpuset_t dmask; 1777 bool lowprio; 1778 1779 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1780 1781 /* 1782 * A level trigger is valid only for fixed and lowprio delivery modes. 1783 */ 1784 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1785 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1786 "delivery-mode %d", delmode); 1787 return; 1788 } 1789 1790 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1791 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1792 1793 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1794 return; 1795 1796 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1797 vlapic_set_tmr(vlapic, vector, true); 1798 } 1799 1800 #ifdef BHYVE_SNAPSHOT 1801 static void 1802 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1803 { 1804 /* The implementation is similar to the one in the 1805 * `vlapic_icrtmr_write_handler` function 1806 */ 1807 sbintime_t sbt; 1808 struct bintime bt; 1809 1810 VLAPIC_TIMER_LOCK(vlapic); 1811 1812 bt = vlapic->timer_freq_bt; 1813 bintime_mul(&bt, ccr); 1814 1815 if (ccr != 0) { 1816 binuptime(&vlapic->timer_fire_bt); 1817 bintime_add(&vlapic->timer_fire_bt, &bt); 1818 1819 sbt = bttosbt(bt); 1820 vlapic_callout_reset(vlapic, sbt); 1821 } else { 1822 /* even if the CCR was 0, periodic timers should be reset */ 1823 if (vlapic_periodic_timer(vlapic)) { 1824 binuptime(&vlapic->timer_fire_bt); 1825 bintime_add(&vlapic->timer_fire_bt, 1826 &vlapic->timer_period_bt); 1827 sbt = bttosbt(vlapic->timer_period_bt); 1828 1829 callout_stop(&vlapic->callout); 1830 vlapic_callout_reset(vlapic, sbt); 1831 } 1832 } 1833 1834 VLAPIC_TIMER_UNLOCK(vlapic); 1835 } 1836 1837 int 1838 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1839 { 1840 int ret; 1841 struct vcpu *vcpu; 1842 struct vlapic *vlapic; 1843 struct LAPIC *lapic; 1844 uint32_t ccr; 1845 uint16_t i, maxcpus; 1846 1847 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1848 1849 ret = 0; 1850 1851 maxcpus = vm_get_maxcpus(vm); 1852 for (i = 0; i < maxcpus; i++) { 1853 vcpu = vm_vcpu(vm, i); 1854 if (vcpu == NULL) 1855 continue; 1856 vlapic = vm_lapic(vcpu); 1857 1858 /* snapshot the page first; timer period depends on icr_timer */ 1859 lapic = vlapic->apic_page; 1860 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1861 1862 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1863 1864 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1865 meta, ret, done); 1866 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1867 meta, ret, done); 1868 1869 /* 1870 * Timer period is equal to 'icr_timer' ticks at a frequency of 1871 * 'timer_freq_bt'. 1872 */ 1873 if (meta->op == VM_SNAPSHOT_RESTORE) { 1874 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1875 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1876 } 1877 1878 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1879 sizeof(vlapic->isrvec_stk), 1880 meta, ret, done); 1881 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1882 1883 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1884 sizeof(vlapic->lvt_last), 1885 meta, ret, done); 1886 1887 if (meta->op == VM_SNAPSHOT_SAVE) 1888 ccr = vlapic_get_ccr(vlapic); 1889 1890 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1891 1892 if (meta->op == VM_SNAPSHOT_RESTORE && 1893 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1894 /* Reset the value of the 'timer_fire_bt' and the vlapic 1895 * callout based on the value of the current count 1896 * register saved when the VM snapshot was created. 1897 * If initial count register is 0, timer is not used. 1898 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1899 */ 1900 vlapic_reset_callout(vlapic, ccr); 1901 } 1902 } 1903 1904 done: 1905 return (ret); 1906 } 1907 #endif 1908