1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_bhyve_snapshot.h" 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/systm.h> 41 #include <sys/smp.h> 42 43 #include <x86/specialreg.h> 44 #include <x86/apicreg.h> 45 46 #include <machine/clock.h> 47 #include <machine/smp.h> 48 49 #include <machine/vmm.h> 50 #include <machine/vmm_snapshot.h> 51 52 #include "vmm_lapic.h" 53 #include "vmm_ktr.h" 54 #include "vmm_stat.h" 55 56 #include "vlapic.h" 57 #include "vlapic_priv.h" 58 #include "vioapic.h" 59 60 #define PRIO(x) ((x) >> 4) 61 62 #define VLAPIC_VERSION (0x14) 63 64 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 65 66 /* 67 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 68 * vlapic_callout_handler() and vcpu accesses to: 69 * - timer_freq_bt, timer_period_bt, timer_fire_bt 70 * - timer LVT register 71 */ 72 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 73 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 74 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 75 76 /* 77 * APIC timer frequency: 78 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 79 * - power-of-two to avoid loss of precision when converted to a bintime. 80 */ 81 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 82 83 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 84 static void vlapic_callout_handler(void *arg); 85 static void vlapic_reset(struct vlapic *vlapic); 86 87 static __inline uint32_t 88 vlapic_get_id(struct vlapic *vlapic) 89 { 90 91 if (x2apic(vlapic)) 92 return (vlapic->vcpuid); 93 else 94 return (vlapic->vcpuid << 24); 95 } 96 97 static uint32_t 98 x2apic_ldr(struct vlapic *vlapic) 99 { 100 int apicid; 101 uint32_t ldr; 102 103 apicid = vlapic_get_id(vlapic); 104 ldr = 1 << (apicid & 0xf); 105 ldr |= (apicid & 0xffff0) << 12; 106 return (ldr); 107 } 108 109 void 110 vlapic_dfr_write_handler(struct vlapic *vlapic) 111 { 112 struct LAPIC *lapic; 113 114 lapic = vlapic->apic_page; 115 if (x2apic(vlapic)) { 116 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 117 lapic->dfr); 118 lapic->dfr = 0; 119 return; 120 } 121 122 lapic->dfr &= APIC_DFR_MODEL_MASK; 123 lapic->dfr |= APIC_DFR_RESERVED; 124 125 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 126 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 127 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 128 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 129 else 130 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 131 } 132 133 void 134 vlapic_ldr_write_handler(struct vlapic *vlapic) 135 { 136 struct LAPIC *lapic; 137 138 lapic = vlapic->apic_page; 139 140 /* LDR is read-only in x2apic mode */ 141 if (x2apic(vlapic)) { 142 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 143 lapic->ldr); 144 lapic->ldr = x2apic_ldr(vlapic); 145 } else { 146 lapic->ldr &= ~APIC_LDR_RESERVED; 147 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 148 } 149 } 150 151 void 152 vlapic_id_write_handler(struct vlapic *vlapic) 153 { 154 struct LAPIC *lapic; 155 156 /* 157 * We don't allow the ID register to be modified so reset it back to 158 * its default value. 159 */ 160 lapic = vlapic->apic_page; 161 lapic->id = vlapic_get_id(vlapic); 162 } 163 164 static int 165 vlapic_timer_divisor(uint32_t dcr) 166 { 167 switch (dcr & 0xB) { 168 case APIC_TDCR_1: 169 return (1); 170 case APIC_TDCR_2: 171 return (2); 172 case APIC_TDCR_4: 173 return (4); 174 case APIC_TDCR_8: 175 return (8); 176 case APIC_TDCR_16: 177 return (16); 178 case APIC_TDCR_32: 179 return (32); 180 case APIC_TDCR_64: 181 return (64); 182 case APIC_TDCR_128: 183 return (128); 184 default: 185 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 186 } 187 } 188 189 #if 0 190 static inline void 191 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 192 { 193 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 194 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 195 *lvt & APIC_LVTT_M); 196 } 197 #endif 198 199 static uint32_t 200 vlapic_get_ccr(struct vlapic *vlapic) 201 { 202 struct bintime bt_now, bt_rem; 203 struct LAPIC *lapic __diagused; 204 uint32_t ccr; 205 206 ccr = 0; 207 lapic = vlapic->apic_page; 208 209 VLAPIC_TIMER_LOCK(vlapic); 210 if (callout_active(&vlapic->callout)) { 211 /* 212 * If the timer is scheduled to expire in the future then 213 * compute the value of 'ccr' based on the remaining time. 214 */ 215 binuptime(&bt_now); 216 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 217 bt_rem = vlapic->timer_fire_bt; 218 bintime_sub(&bt_rem, &bt_now); 219 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 220 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 221 } 222 } 223 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 224 "icr_timer is %#x", ccr, lapic->icr_timer)); 225 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 226 ccr, lapic->icr_timer); 227 VLAPIC_TIMER_UNLOCK(vlapic); 228 return (ccr); 229 } 230 231 void 232 vlapic_dcr_write_handler(struct vlapic *vlapic) 233 { 234 struct LAPIC *lapic; 235 int divisor; 236 237 lapic = vlapic->apic_page; 238 VLAPIC_TIMER_LOCK(vlapic); 239 240 divisor = vlapic_timer_divisor(lapic->dcr_timer); 241 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 242 lapic->dcr_timer, divisor); 243 244 /* 245 * Update the timer frequency and the timer period. 246 * 247 * XXX changes to the frequency divider will not take effect until 248 * the timer is reloaded. 249 */ 250 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 251 vlapic->timer_period_bt = vlapic->timer_freq_bt; 252 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 253 254 VLAPIC_TIMER_UNLOCK(vlapic); 255 } 256 257 void 258 vlapic_esr_write_handler(struct vlapic *vlapic) 259 { 260 struct LAPIC *lapic; 261 262 lapic = vlapic->apic_page; 263 lapic->esr = vlapic->esr_pending; 264 vlapic->esr_pending = 0; 265 } 266 267 int 268 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 269 { 270 struct LAPIC *lapic; 271 uint32_t *irrptr, *tmrptr, mask; 272 int idx; 273 274 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 275 276 lapic = vlapic->apic_page; 277 if (!(lapic->svr & APIC_SVR_ENABLE)) { 278 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 279 "interrupt %d", vector); 280 return (0); 281 } 282 283 if (vector < 16) { 284 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 285 false); 286 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 287 vector); 288 return (1); 289 } 290 291 if (vlapic->ops.set_intr_ready) 292 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 293 294 idx = (vector / 32) * 4; 295 mask = 1 << (vector % 32); 296 297 irrptr = &lapic->irr0; 298 atomic_set_int(&irrptr[idx], mask); 299 300 /* 301 * Verify that the trigger-mode of the interrupt matches with 302 * the vlapic TMR registers. 303 */ 304 tmrptr = &lapic->tmr0; 305 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 306 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 307 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 308 level ? "level" : "edge"); 309 } 310 311 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 312 return (1); 313 } 314 315 static __inline uint32_t * 316 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 317 { 318 struct LAPIC *lapic = vlapic->apic_page; 319 int i; 320 321 switch (offset) { 322 case APIC_OFFSET_CMCI_LVT: 323 return (&lapic->lvt_cmci); 324 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 325 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 326 return ((&lapic->lvt_timer) + i); 327 default: 328 panic("vlapic_get_lvt: invalid LVT\n"); 329 } 330 } 331 332 static __inline int 333 lvt_off_to_idx(uint32_t offset) 334 { 335 int index; 336 337 switch (offset) { 338 case APIC_OFFSET_CMCI_LVT: 339 index = APIC_LVT_CMCI; 340 break; 341 case APIC_OFFSET_TIMER_LVT: 342 index = APIC_LVT_TIMER; 343 break; 344 case APIC_OFFSET_THERM_LVT: 345 index = APIC_LVT_THERMAL; 346 break; 347 case APIC_OFFSET_PERF_LVT: 348 index = APIC_LVT_PMC; 349 break; 350 case APIC_OFFSET_LINT0_LVT: 351 index = APIC_LVT_LINT0; 352 break; 353 case APIC_OFFSET_LINT1_LVT: 354 index = APIC_LVT_LINT1; 355 break; 356 case APIC_OFFSET_ERROR_LVT: 357 index = APIC_LVT_ERROR; 358 break; 359 default: 360 index = -1; 361 break; 362 } 363 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 364 "invalid lvt index %d for offset %#x", index, offset)); 365 366 return (index); 367 } 368 369 static __inline uint32_t 370 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 371 { 372 int idx; 373 uint32_t val; 374 375 idx = lvt_off_to_idx(offset); 376 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 377 return (val); 378 } 379 380 void 381 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 382 { 383 uint32_t *lvtptr, mask, val; 384 struct LAPIC *lapic; 385 int idx; 386 387 lapic = vlapic->apic_page; 388 lvtptr = vlapic_get_lvtptr(vlapic, offset); 389 val = *lvtptr; 390 idx = lvt_off_to_idx(offset); 391 392 if (!(lapic->svr & APIC_SVR_ENABLE)) 393 val |= APIC_LVT_M; 394 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 395 switch (offset) { 396 case APIC_OFFSET_TIMER_LVT: 397 mask |= APIC_LVTT_TM; 398 break; 399 case APIC_OFFSET_ERROR_LVT: 400 break; 401 case APIC_OFFSET_LINT0_LVT: 402 case APIC_OFFSET_LINT1_LVT: 403 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 404 /* FALLTHROUGH */ 405 default: 406 mask |= APIC_LVT_DM; 407 break; 408 } 409 val &= mask; 410 *lvtptr = val; 411 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 412 } 413 414 static void 415 vlapic_mask_lvts(struct vlapic *vlapic) 416 { 417 struct LAPIC *lapic = vlapic->apic_page; 418 419 lapic->lvt_cmci |= APIC_LVT_M; 420 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 421 422 lapic->lvt_timer |= APIC_LVT_M; 423 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 424 425 lapic->lvt_thermal |= APIC_LVT_M; 426 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 427 428 lapic->lvt_pcint |= APIC_LVT_M; 429 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 430 431 lapic->lvt_lint0 |= APIC_LVT_M; 432 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 433 434 lapic->lvt_lint1 |= APIC_LVT_M; 435 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 436 437 lapic->lvt_error |= APIC_LVT_M; 438 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 439 } 440 441 static int 442 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 443 { 444 uint32_t mode, reg, vec; 445 446 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 447 448 if (reg & APIC_LVT_M) 449 return (0); 450 vec = reg & APIC_LVT_VECTOR; 451 mode = reg & APIC_LVT_DM; 452 453 switch (mode) { 454 case APIC_LVT_DM_FIXED: 455 if (vec < 16) { 456 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 457 lvt == APIC_LVT_ERROR); 458 return (0); 459 } 460 if (vlapic_set_intr_ready(vlapic, vec, false)) 461 vcpu_notify_event(vlapic->vcpu, true); 462 break; 463 case APIC_LVT_DM_NMI: 464 vm_inject_nmi(vlapic->vcpu); 465 break; 466 case APIC_LVT_DM_EXTINT: 467 vm_inject_extint(vlapic->vcpu); 468 break; 469 default: 470 // Other modes ignored 471 return (0); 472 } 473 return (1); 474 } 475 476 #if 1 477 static void 478 dump_isrvec_stk(struct vlapic *vlapic) 479 { 480 int i; 481 uint32_t *isrptr; 482 483 isrptr = &vlapic->apic_page->isr0; 484 for (i = 0; i < 8; i++) 485 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 486 487 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 488 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 489 } 490 #endif 491 492 /* 493 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 494 * in Intel Architecture Manual Vol 3a. 495 */ 496 static void 497 vlapic_update_ppr(struct vlapic *vlapic) 498 { 499 int isrvec, tpr, ppr; 500 501 /* 502 * Note that the value on the stack at index 0 is always 0. 503 * 504 * This is a placeholder for the value of ISRV when none of the 505 * bits is set in the ISRx registers. 506 */ 507 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 508 tpr = vlapic->apic_page->tpr; 509 510 #if 1 511 { 512 int i, lastprio, curprio, vector, idx; 513 uint32_t *isrptr; 514 515 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 516 panic("isrvec_stk is corrupted: %d", isrvec); 517 518 /* 519 * Make sure that the priority of the nested interrupts is 520 * always increasing. 521 */ 522 lastprio = -1; 523 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 524 curprio = PRIO(vlapic->isrvec_stk[i]); 525 if (curprio <= lastprio) { 526 dump_isrvec_stk(vlapic); 527 panic("isrvec_stk does not satisfy invariant"); 528 } 529 lastprio = curprio; 530 } 531 532 /* 533 * Make sure that each bit set in the ISRx registers has a 534 * corresponding entry on the isrvec stack. 535 */ 536 i = 1; 537 isrptr = &vlapic->apic_page->isr0; 538 for (vector = 0; vector < 256; vector++) { 539 idx = (vector / 32) * 4; 540 if (isrptr[idx] & (1 << (vector % 32))) { 541 if (i > vlapic->isrvec_stk_top || 542 vlapic->isrvec_stk[i] != vector) { 543 dump_isrvec_stk(vlapic); 544 panic("ISR and isrvec_stk out of sync"); 545 } 546 i++; 547 } 548 } 549 } 550 #endif 551 552 if (PRIO(tpr) >= PRIO(isrvec)) 553 ppr = tpr; 554 else 555 ppr = isrvec & 0xf0; 556 557 vlapic->apic_page->ppr = ppr; 558 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 559 } 560 561 void 562 vlapic_sync_tpr(struct vlapic *vlapic) 563 { 564 vlapic_update_ppr(vlapic); 565 } 566 567 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 568 569 static void 570 vlapic_process_eoi(struct vlapic *vlapic) 571 { 572 struct LAPIC *lapic = vlapic->apic_page; 573 uint32_t *isrptr, *tmrptr; 574 int i, idx, bitpos, vector; 575 576 isrptr = &lapic->isr0; 577 tmrptr = &lapic->tmr0; 578 579 for (i = 7; i >= 0; i--) { 580 idx = i * 4; 581 bitpos = fls(isrptr[idx]); 582 if (bitpos-- != 0) { 583 if (vlapic->isrvec_stk_top <= 0) { 584 panic("invalid vlapic isrvec_stk_top %d", 585 vlapic->isrvec_stk_top); 586 } 587 isrptr[idx] &= ~(1 << bitpos); 588 vector = i * 32 + bitpos; 589 VLAPIC_CTR1(vlapic, "EOI vector %d", vector); 590 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 591 vlapic->isrvec_stk_top--; 592 vlapic_update_ppr(vlapic); 593 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 594 vioapic_process_eoi(vlapic->vm, vector); 595 } 596 return; 597 } 598 } 599 VLAPIC_CTR0(vlapic, "Gratuitous EOI"); 600 vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1); 601 } 602 603 static __inline int 604 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 605 { 606 607 return (lvt & mask); 608 } 609 610 static __inline int 611 vlapic_periodic_timer(struct vlapic *vlapic) 612 { 613 uint32_t lvt; 614 615 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 616 617 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 618 } 619 620 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 621 622 static void 623 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 624 { 625 626 vlapic->esr_pending |= mask; 627 628 /* 629 * Avoid infinite recursion if the error LVT itself is configured with 630 * an illegal vector. 631 */ 632 if (lvt_error) 633 return; 634 635 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 636 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1); 637 } 638 } 639 640 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 641 642 static void 643 vlapic_fire_timer(struct vlapic *vlapic) 644 { 645 646 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 647 648 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 649 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 650 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1); 651 } 652 } 653 654 static VMM_STAT(VLAPIC_INTR_CMC, 655 "corrected machine check interrupts generated by vlapic"); 656 657 void 658 vlapic_fire_cmci(struct vlapic *vlapic) 659 { 660 661 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 662 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1); 663 } 664 } 665 666 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 667 "lvts triggered"); 668 669 int 670 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 671 { 672 673 if (vlapic_enabled(vlapic) == false) { 674 /* 675 * When the local APIC is global/hardware disabled, 676 * LINT[1:0] pins are configured as INTR and NMI pins, 677 * respectively. 678 */ 679 switch (vector) { 680 case APIC_LVT_LINT0: 681 vm_inject_extint(vlapic->vcpu); 682 break; 683 case APIC_LVT_LINT1: 684 vm_inject_nmi(vlapic->vcpu); 685 break; 686 default: 687 break; 688 } 689 return (0); 690 } 691 692 switch (vector) { 693 case APIC_LVT_LINT0: 694 case APIC_LVT_LINT1: 695 case APIC_LVT_TIMER: 696 case APIC_LVT_ERROR: 697 case APIC_LVT_PMC: 698 case APIC_LVT_THERMAL: 699 case APIC_LVT_CMCI: 700 if (vlapic_fire_lvt(vlapic, vector)) { 701 vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED, 702 vector, 1); 703 } 704 break; 705 default: 706 return (EINVAL); 707 } 708 return (0); 709 } 710 711 static void 712 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 713 { 714 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 715 vlapic_callout_handler, vlapic, 0); 716 } 717 718 static void 719 vlapic_callout_handler(void *arg) 720 { 721 struct vlapic *vlapic; 722 struct bintime bt, btnow; 723 sbintime_t rem_sbt; 724 725 vlapic = arg; 726 727 VLAPIC_TIMER_LOCK(vlapic); 728 if (callout_pending(&vlapic->callout)) /* callout was reset */ 729 goto done; 730 731 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 732 goto done; 733 734 callout_deactivate(&vlapic->callout); 735 736 vlapic_fire_timer(vlapic); 737 738 if (vlapic_periodic_timer(vlapic)) { 739 binuptime(&btnow); 740 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 741 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 742 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 743 vlapic->timer_fire_bt.frac)); 744 745 /* 746 * Compute the delta between when the timer was supposed to 747 * fire and the present time. 748 */ 749 bt = btnow; 750 bintime_sub(&bt, &vlapic->timer_fire_bt); 751 752 rem_sbt = bttosbt(vlapic->timer_period_bt); 753 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 754 /* 755 * Adjust the time until the next countdown downward 756 * to account for the lost time. 757 */ 758 rem_sbt -= bttosbt(bt); 759 } else { 760 /* 761 * If the delta is greater than the timer period then 762 * just reset our time base instead of trying to catch 763 * up. 764 */ 765 vlapic->timer_fire_bt = btnow; 766 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 767 "usecs, period is %lu usecs - resetting time base", 768 bttosbt(bt) / SBT_1US, 769 bttosbt(vlapic->timer_period_bt) / SBT_1US); 770 } 771 772 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 773 vlapic_callout_reset(vlapic, rem_sbt); 774 } 775 done: 776 VLAPIC_TIMER_UNLOCK(vlapic); 777 } 778 779 void 780 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 781 { 782 struct LAPIC *lapic; 783 sbintime_t sbt; 784 uint32_t icr_timer; 785 786 VLAPIC_TIMER_LOCK(vlapic); 787 788 lapic = vlapic->apic_page; 789 icr_timer = lapic->icr_timer; 790 791 vlapic->timer_period_bt = vlapic->timer_freq_bt; 792 bintime_mul(&vlapic->timer_period_bt, icr_timer); 793 794 if (icr_timer != 0) { 795 binuptime(&vlapic->timer_fire_bt); 796 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 797 798 sbt = bttosbt(vlapic->timer_period_bt); 799 vlapic_callout_reset(vlapic, sbt); 800 } else 801 callout_stop(&vlapic->callout); 802 803 VLAPIC_TIMER_UNLOCK(vlapic); 804 } 805 806 /* 807 * This function populates 'dmask' with the set of vcpus that match the 808 * addressing specified by the (dest, phys, lowprio) tuple. 809 * 810 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 811 * or xAPIC (8-bit) destination field. 812 */ 813 static void 814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 815 bool lowprio, bool x2apic_dest) 816 { 817 struct vlapic *vlapic; 818 uint32_t dfr, ldr, ldest, cluster; 819 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 820 cpuset_t amask; 821 int vcpuid; 822 823 if ((x2apic_dest && dest == 0xffffffff) || 824 (!x2apic_dest && dest == 0xff)) { 825 /* 826 * Broadcast in both logical and physical modes. 827 */ 828 *dmask = vm_active_cpus(vm); 829 return; 830 } 831 832 if (phys) { 833 /* 834 * Physical mode: destination is APIC ID. 835 */ 836 CPU_ZERO(dmask); 837 vcpuid = vm_apicid2vcpuid(vm, dest); 838 amask = vm_active_cpus(vm); 839 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 840 CPU_SET(vcpuid, dmask); 841 } else { 842 /* 843 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 844 * bitmask. This model is only available in the xAPIC mode. 845 */ 846 mda_flat_ldest = dest & 0xff; 847 848 /* 849 * In the "Cluster Model" the MDA is used to identify a 850 * specific cluster and a set of APICs in that cluster. 851 */ 852 if (x2apic_dest) { 853 mda_cluster_id = dest >> 16; 854 mda_cluster_ldest = dest & 0xffff; 855 } else { 856 mda_cluster_id = (dest >> 4) & 0xf; 857 mda_cluster_ldest = dest & 0xf; 858 } 859 860 /* 861 * Logical mode: match each APIC that has a bit set 862 * in its LDR that matches a bit in the ldest. 863 */ 864 CPU_ZERO(dmask); 865 amask = vm_active_cpus(vm); 866 CPU_FOREACH_ISSET(vcpuid, &amask) { 867 vlapic = vm_lapic(vm_vcpu(vm, vcpuid)); 868 dfr = vlapic->apic_page->dfr; 869 ldr = vlapic->apic_page->ldr; 870 871 if ((dfr & APIC_DFR_MODEL_MASK) == 872 APIC_DFR_MODEL_FLAT) { 873 ldest = ldr >> 24; 874 mda_ldest = mda_flat_ldest; 875 } else if ((dfr & APIC_DFR_MODEL_MASK) == 876 APIC_DFR_MODEL_CLUSTER) { 877 if (x2apic(vlapic)) { 878 cluster = ldr >> 16; 879 ldest = ldr & 0xffff; 880 } else { 881 cluster = ldr >> 28; 882 ldest = (ldr >> 24) & 0xf; 883 } 884 if (cluster != mda_cluster_id) 885 continue; 886 mda_ldest = mda_cluster_ldest; 887 } else { 888 /* 889 * Guest has configured a bad logical 890 * model for this vcpu - skip it. 891 */ 892 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 893 "model %x - cannot deliver interrupt", dfr); 894 continue; 895 } 896 897 if ((mda_ldest & ldest) != 0) { 898 CPU_SET(vcpuid, dmask); 899 if (lowprio) 900 break; 901 } 902 } 903 } 904 } 905 906 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 907 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 908 909 static void 910 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 911 { 912 struct LAPIC *lapic = vlapic->apic_page; 913 914 if (lapic->tpr != val) { 915 VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x", 916 lapic->tpr, val); 917 lapic->tpr = val; 918 vlapic_update_ppr(vlapic); 919 } 920 } 921 922 static uint8_t 923 vlapic_get_tpr(struct vlapic *vlapic) 924 { 925 struct LAPIC *lapic = vlapic->apic_page; 926 927 return (lapic->tpr); 928 } 929 930 void 931 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 932 { 933 uint8_t tpr; 934 935 if (val & ~0xf) { 936 vm_inject_gp(vlapic->vcpu); 937 return; 938 } 939 940 tpr = val << 4; 941 vlapic_set_tpr(vlapic, tpr); 942 } 943 944 uint64_t 945 vlapic_get_cr8(struct vlapic *vlapic) 946 { 947 uint8_t tpr; 948 949 tpr = vlapic_get_tpr(vlapic); 950 return (tpr >> 4); 951 } 952 953 static bool 954 vlapic_is_icr_valid(uint64_t icrval) 955 { 956 uint32_t mode = icrval & APIC_DELMODE_MASK; 957 uint32_t level = icrval & APIC_LEVEL_MASK; 958 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 959 uint32_t shorthand = icrval & APIC_DEST_MASK; 960 961 switch (mode) { 962 case APIC_DELMODE_FIXED: 963 if (trigger == APIC_TRIGMOD_EDGE) 964 return (true); 965 /* 966 * AMD allows a level assert IPI and Intel converts a level 967 * assert IPI into an edge IPI. 968 */ 969 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 970 return (true); 971 break; 972 case APIC_DELMODE_LOWPRIO: 973 case APIC_DELMODE_SMI: 974 case APIC_DELMODE_NMI: 975 case APIC_DELMODE_INIT: 976 if (trigger == APIC_TRIGMOD_EDGE && 977 (shorthand == APIC_DEST_DESTFLD || 978 shorthand == APIC_DEST_ALLESELF)) 979 return (true); 980 /* 981 * AMD allows a level assert IPI and Intel converts a level 982 * assert IPI into an edge IPI. 983 */ 984 if (trigger == APIC_TRIGMOD_LEVEL && 985 level == APIC_LEVEL_ASSERT && 986 (shorthand == APIC_DEST_DESTFLD || 987 shorthand == APIC_DEST_ALLESELF)) 988 return (true); 989 /* 990 * An level triggered deassert INIT is defined in the Intel 991 * Multiprocessor Specification and the Intel Software Developer 992 * Manual. Due to the MPS it's required to send a level assert 993 * INIT to a cpu and then a level deassert INIT. Some operating 994 * systems e.g. FreeBSD or Linux use that algorithm. According 995 * to the SDM a level deassert INIT is only supported by Pentium 996 * and P6 processors. It's always send to all cpus regardless of 997 * the destination or shorthand field. It resets the arbitration 998 * id register. This register is not software accessible and 999 * only required for the APIC bus arbitration. So, the level 1000 * deassert INIT doesn't need any emulation and we should ignore 1001 * it. The SDM also defines that newer processors don't support 1002 * the level deassert INIT and it's not valid any more. As it's 1003 * defined for older systems, it can't be invalid per se. 1004 * Otherwise, backward compatibility would be broken. However, 1005 * when returning false here, it'll be ignored which is the 1006 * desired behaviour. 1007 */ 1008 if (mode == APIC_DELMODE_INIT && 1009 trigger == APIC_TRIGMOD_LEVEL && 1010 level == APIC_LEVEL_DEASSERT) 1011 return (false); 1012 break; 1013 case APIC_DELMODE_STARTUP: 1014 if (shorthand == APIC_DEST_DESTFLD || 1015 shorthand == APIC_DEST_ALLESELF) 1016 return (true); 1017 break; 1018 case APIC_DELMODE_RR: 1019 /* Only available on AMD! */ 1020 if (trigger == APIC_TRIGMOD_EDGE && 1021 shorthand == APIC_DEST_DESTFLD) 1022 return (true); 1023 break; 1024 case APIC_DELMODE_RESV: 1025 return (false); 1026 default: 1027 __assert_unreachable(); 1028 } 1029 1030 return (false); 1031 } 1032 1033 int 1034 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 1035 { 1036 int i; 1037 bool phys; 1038 cpuset_t dmask, ipimask; 1039 uint64_t icrval; 1040 uint32_t dest, vec, mode, shorthand; 1041 struct vcpu *vcpu; 1042 struct vm_exit *vmexit; 1043 struct LAPIC *lapic; 1044 1045 lapic = vlapic->apic_page; 1046 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1047 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1048 1049 if (x2apic(vlapic)) 1050 dest = icrval >> 32; 1051 else 1052 dest = icrval >> (32 + 24); 1053 vec = icrval & APIC_VECTOR_MASK; 1054 mode = icrval & APIC_DELMODE_MASK; 1055 phys = (icrval & APIC_DESTMODE_LOG) == 0; 1056 shorthand = icrval & APIC_DEST_MASK; 1057 1058 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 1059 1060 switch (shorthand) { 1061 case APIC_DEST_DESTFLD: 1062 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); 1063 break; 1064 case APIC_DEST_SELF: 1065 CPU_SETOF(vlapic->vcpuid, &dmask); 1066 break; 1067 case APIC_DEST_ALLISELF: 1068 dmask = vm_active_cpus(vlapic->vm); 1069 break; 1070 case APIC_DEST_ALLESELF: 1071 dmask = vm_active_cpus(vlapic->vm); 1072 CPU_CLR(vlapic->vcpuid, &dmask); 1073 break; 1074 default: 1075 __assert_unreachable(); 1076 } 1077 1078 /* 1079 * Ignore invalid combinations of the icr. 1080 */ 1081 if (!vlapic_is_icr_valid(icrval)) { 1082 VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); 1083 return (0); 1084 } 1085 1086 /* 1087 * ipimask is a set of vCPUs needing userland handling of the current 1088 * IPI. 1089 */ 1090 CPU_ZERO(&ipimask); 1091 1092 switch (mode) { 1093 case APIC_DELMODE_FIXED: 1094 if (vec < 16) { 1095 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 1096 false); 1097 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 1098 return (0); 1099 } 1100 1101 CPU_FOREACH_ISSET(i, &dmask) { 1102 vcpu = vm_vcpu(vlapic->vm, i); 1103 lapic_intr_edge(vcpu, vec); 1104 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); 1105 vmm_stat_incr(vcpu, VLAPIC_IPI_RECV, 1); 1106 VLAPIC_CTR2(vlapic, 1107 "vlapic sending ipi %d to vcpuid %d", vec, i); 1108 } 1109 1110 break; 1111 case APIC_DELMODE_NMI: 1112 CPU_FOREACH_ISSET(i, &dmask) { 1113 vcpu = vm_vcpu(vlapic->vm, i); 1114 vm_inject_nmi(vcpu); 1115 VLAPIC_CTR1(vlapic, 1116 "vlapic sending ipi nmi to vcpuid %d", i); 1117 } 1118 1119 break; 1120 case APIC_DELMODE_INIT: 1121 case APIC_DELMODE_STARTUP: 1122 if (!vlapic->ipi_exit) { 1123 if (!phys) 1124 break; 1125 1126 i = vm_apicid2vcpuid(vlapic->vm, dest); 1127 if (i >= vm_get_maxcpus(vlapic->vm) || 1128 i == vlapic->vcpuid) 1129 break; 1130 1131 CPU_SETOF(i, &ipimask); 1132 1133 break; 1134 } 1135 1136 CPU_COPY(&dmask, &ipimask); 1137 break; 1138 default: 1139 return (1); 1140 } 1141 1142 if (!CPU_EMPTY(&ipimask)) { 1143 vmexit = vm_exitinfo(vlapic->vcpu); 1144 vmexit->exitcode = VM_EXITCODE_IPI; 1145 vmexit->u.ipi.mode = mode; 1146 vmexit->u.ipi.vector = vec; 1147 *vm_exitinfo_cpuset(vlapic->vcpu) = ipimask; 1148 1149 *retu = true; 1150 } 1151 1152 return (0); 1153 } 1154 1155 static void 1156 vlapic_handle_init(struct vcpu *vcpu, void *arg) 1157 { 1158 struct vlapic *vlapic = vm_lapic(vcpu); 1159 1160 vlapic_reset(vlapic); 1161 } 1162 1163 int 1164 vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1165 { 1166 struct vlapic *vlapic = vm_lapic(vcpu); 1167 cpuset_t *dmask = vm_exitinfo_cpuset(vcpu); 1168 uint8_t vec = vme->u.ipi.vector; 1169 1170 *retu = true; 1171 switch (vme->u.ipi.mode) { 1172 case APIC_DELMODE_INIT: { 1173 cpuset_t active, reinit; 1174 1175 active = vm_active_cpus(vcpu_vm(vcpu)); 1176 CPU_AND(&reinit, &active, dmask); 1177 if (!CPU_EMPTY(&reinit)) { 1178 vm_smp_rendezvous(vcpu, reinit, vlapic_handle_init, 1179 NULL); 1180 } 1181 vm_await_start(vcpu_vm(vcpu), dmask); 1182 1183 if (!vlapic->ipi_exit) 1184 *retu = false; 1185 1186 break; 1187 } 1188 case APIC_DELMODE_STARTUP: 1189 /* 1190 * Ignore SIPIs in any state other than wait-for-SIPI 1191 */ 1192 *dmask = vm_start_cpus(vcpu_vm(vcpu), dmask); 1193 1194 if (CPU_EMPTY(dmask)) { 1195 *retu = false; 1196 break; 1197 } 1198 1199 /* 1200 * Old bhyve versions don't support the IPI 1201 * exit. Translate it into the old style. 1202 */ 1203 if (!vlapic->ipi_exit) { 1204 vme->exitcode = VM_EXITCODE_SPINUP_AP; 1205 vme->u.spinup_ap.vcpu = CPU_FFS(dmask) - 1; 1206 vme->u.spinup_ap.rip = vec << PAGE_SHIFT; 1207 } 1208 1209 break; 1210 default: 1211 __assert_unreachable(); 1212 } 1213 1214 return (0); 1215 } 1216 1217 void 1218 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1219 { 1220 int vec; 1221 1222 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1223 1224 vec = val & 0xff; 1225 lapic_intr_edge(vlapic->vcpu, vec); 1226 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); 1227 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_RECV, 1); 1228 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1229 } 1230 1231 int 1232 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1233 { 1234 struct LAPIC *lapic = vlapic->apic_page; 1235 int idx, i, bitpos, vector; 1236 uint32_t *irrptr, val; 1237 1238 vlapic_update_ppr(vlapic); 1239 1240 if (vlapic->ops.pending_intr) 1241 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1242 1243 irrptr = &lapic->irr0; 1244 1245 for (i = 7; i >= 0; i--) { 1246 idx = i * 4; 1247 val = atomic_load_acq_int(&irrptr[idx]); 1248 bitpos = fls(val); 1249 if (bitpos != 0) { 1250 vector = i * 32 + (bitpos - 1); 1251 if (PRIO(vector) > PRIO(lapic->ppr)) { 1252 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1253 if (vecptr != NULL) 1254 *vecptr = vector; 1255 return (1); 1256 } else 1257 break; 1258 } 1259 } 1260 return (0); 1261 } 1262 1263 void 1264 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1265 { 1266 struct LAPIC *lapic = vlapic->apic_page; 1267 uint32_t *irrptr, *isrptr; 1268 int idx, stk_top; 1269 1270 if (vlapic->ops.intr_accepted) 1271 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1272 1273 /* 1274 * clear the ready bit for vector being accepted in irr 1275 * and set the vector as in service in isr. 1276 */ 1277 idx = (vector / 32) * 4; 1278 1279 irrptr = &lapic->irr0; 1280 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1281 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1282 1283 isrptr = &lapic->isr0; 1284 isrptr[idx] |= 1 << (vector % 32); 1285 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1286 1287 /* 1288 * Update the PPR 1289 */ 1290 vlapic->isrvec_stk_top++; 1291 1292 stk_top = vlapic->isrvec_stk_top; 1293 if (stk_top >= ISRVEC_STK_SIZE) 1294 panic("isrvec_stk_top overflow %d", stk_top); 1295 1296 vlapic->isrvec_stk[stk_top] = vector; 1297 } 1298 1299 void 1300 vlapic_svr_write_handler(struct vlapic *vlapic) 1301 { 1302 struct LAPIC *lapic; 1303 uint32_t old, new, changed; 1304 1305 lapic = vlapic->apic_page; 1306 1307 new = lapic->svr; 1308 old = vlapic->svr_last; 1309 vlapic->svr_last = new; 1310 1311 changed = old ^ new; 1312 if ((changed & APIC_SVR_ENABLE) != 0) { 1313 if ((new & APIC_SVR_ENABLE) == 0) { 1314 /* 1315 * The apic is now disabled so stop the apic timer 1316 * and mask all the LVT entries. 1317 */ 1318 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1319 VLAPIC_TIMER_LOCK(vlapic); 1320 callout_stop(&vlapic->callout); 1321 VLAPIC_TIMER_UNLOCK(vlapic); 1322 vlapic_mask_lvts(vlapic); 1323 } else { 1324 /* 1325 * The apic is now enabled so restart the apic timer 1326 * if it is configured in periodic mode. 1327 */ 1328 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1329 if (vlapic_periodic_timer(vlapic)) 1330 vlapic_icrtmr_write_handler(vlapic); 1331 } 1332 } 1333 } 1334 1335 int 1336 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1337 uint64_t *data, bool *retu) 1338 { 1339 struct LAPIC *lapic = vlapic->apic_page; 1340 uint32_t *reg; 1341 int i; 1342 1343 /* Ignore MMIO accesses in x2APIC mode */ 1344 if (x2apic(vlapic) && mmio_access) { 1345 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1346 offset); 1347 *data = 0; 1348 goto done; 1349 } 1350 1351 if (!x2apic(vlapic) && !mmio_access) { 1352 /* 1353 * XXX Generate GP fault for MSR accesses in xAPIC mode 1354 */ 1355 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1356 "xAPIC mode", offset); 1357 *data = 0; 1358 goto done; 1359 } 1360 1361 if (offset > sizeof(*lapic)) { 1362 *data = 0; 1363 goto done; 1364 } 1365 1366 offset &= ~3; 1367 switch(offset) 1368 { 1369 case APIC_OFFSET_ID: 1370 *data = lapic->id; 1371 break; 1372 case APIC_OFFSET_VER: 1373 *data = lapic->version; 1374 break; 1375 case APIC_OFFSET_TPR: 1376 *data = vlapic_get_tpr(vlapic); 1377 break; 1378 case APIC_OFFSET_APR: 1379 *data = lapic->apr; 1380 break; 1381 case APIC_OFFSET_PPR: 1382 *data = lapic->ppr; 1383 break; 1384 case APIC_OFFSET_EOI: 1385 *data = lapic->eoi; 1386 break; 1387 case APIC_OFFSET_LDR: 1388 *data = lapic->ldr; 1389 break; 1390 case APIC_OFFSET_DFR: 1391 *data = lapic->dfr; 1392 break; 1393 case APIC_OFFSET_SVR: 1394 *data = lapic->svr; 1395 break; 1396 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1397 i = (offset - APIC_OFFSET_ISR0) >> 2; 1398 reg = &lapic->isr0; 1399 *data = *(reg + i); 1400 break; 1401 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1402 i = (offset - APIC_OFFSET_TMR0) >> 2; 1403 reg = &lapic->tmr0; 1404 *data = *(reg + i); 1405 break; 1406 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1407 i = (offset - APIC_OFFSET_IRR0) >> 2; 1408 reg = &lapic->irr0; 1409 *data = atomic_load_acq_int(reg + i); 1410 break; 1411 case APIC_OFFSET_ESR: 1412 *data = lapic->esr; 1413 break; 1414 case APIC_OFFSET_ICR_LOW: 1415 *data = lapic->icr_lo; 1416 if (x2apic(vlapic)) 1417 *data |= (uint64_t)lapic->icr_hi << 32; 1418 break; 1419 case APIC_OFFSET_ICR_HI: 1420 *data = lapic->icr_hi; 1421 break; 1422 case APIC_OFFSET_CMCI_LVT: 1423 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1424 *data = vlapic_get_lvt(vlapic, offset); 1425 #ifdef INVARIANTS 1426 reg = vlapic_get_lvtptr(vlapic, offset); 1427 KASSERT(*data == *reg, ("inconsistent lvt value at " 1428 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1429 #endif 1430 break; 1431 case APIC_OFFSET_TIMER_ICR: 1432 *data = lapic->icr_timer; 1433 break; 1434 case APIC_OFFSET_TIMER_CCR: 1435 *data = vlapic_get_ccr(vlapic); 1436 break; 1437 case APIC_OFFSET_TIMER_DCR: 1438 *data = lapic->dcr_timer; 1439 break; 1440 case APIC_OFFSET_SELF_IPI: 1441 /* 1442 * XXX generate a GP fault if vlapic is in x2apic mode 1443 */ 1444 *data = 0; 1445 break; 1446 case APIC_OFFSET_RRR: 1447 default: 1448 *data = 0; 1449 break; 1450 } 1451 done: 1452 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1453 return 0; 1454 } 1455 1456 int 1457 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1458 uint64_t data, bool *retu) 1459 { 1460 struct LAPIC *lapic = vlapic->apic_page; 1461 uint32_t *regptr; 1462 int retval; 1463 1464 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1465 ("vlapic_write: invalid offset %#lx", offset)); 1466 1467 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1468 offset, data); 1469 1470 if (offset > sizeof(*lapic)) 1471 return (0); 1472 1473 /* Ignore MMIO accesses in x2APIC mode */ 1474 if (x2apic(vlapic) && mmio_access) { 1475 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1476 "in x2APIC mode", data, offset); 1477 return (0); 1478 } 1479 1480 /* 1481 * XXX Generate GP fault for MSR accesses in xAPIC mode 1482 */ 1483 if (!x2apic(vlapic) && !mmio_access) { 1484 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1485 "in xAPIC mode", data, offset); 1486 return (0); 1487 } 1488 1489 retval = 0; 1490 switch(offset) 1491 { 1492 case APIC_OFFSET_ID: 1493 lapic->id = data; 1494 vlapic_id_write_handler(vlapic); 1495 break; 1496 case APIC_OFFSET_TPR: 1497 vlapic_set_tpr(vlapic, data & 0xff); 1498 break; 1499 case APIC_OFFSET_EOI: 1500 vlapic_process_eoi(vlapic); 1501 break; 1502 case APIC_OFFSET_LDR: 1503 lapic->ldr = data; 1504 vlapic_ldr_write_handler(vlapic); 1505 break; 1506 case APIC_OFFSET_DFR: 1507 lapic->dfr = data; 1508 vlapic_dfr_write_handler(vlapic); 1509 break; 1510 case APIC_OFFSET_SVR: 1511 lapic->svr = data; 1512 vlapic_svr_write_handler(vlapic); 1513 break; 1514 case APIC_OFFSET_ICR_LOW: 1515 lapic->icr_lo = data; 1516 if (x2apic(vlapic)) 1517 lapic->icr_hi = data >> 32; 1518 retval = vlapic_icrlo_write_handler(vlapic, retu); 1519 break; 1520 case APIC_OFFSET_ICR_HI: 1521 lapic->icr_hi = data; 1522 break; 1523 case APIC_OFFSET_CMCI_LVT: 1524 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1525 regptr = vlapic_get_lvtptr(vlapic, offset); 1526 *regptr = data; 1527 vlapic_lvt_write_handler(vlapic, offset); 1528 break; 1529 case APIC_OFFSET_TIMER_ICR: 1530 lapic->icr_timer = data; 1531 vlapic_icrtmr_write_handler(vlapic); 1532 break; 1533 1534 case APIC_OFFSET_TIMER_DCR: 1535 lapic->dcr_timer = data; 1536 vlapic_dcr_write_handler(vlapic); 1537 break; 1538 1539 case APIC_OFFSET_ESR: 1540 vlapic_esr_write_handler(vlapic); 1541 break; 1542 1543 case APIC_OFFSET_SELF_IPI: 1544 if (x2apic(vlapic)) 1545 vlapic_self_ipi_handler(vlapic, data); 1546 break; 1547 1548 case APIC_OFFSET_VER: 1549 case APIC_OFFSET_APR: 1550 case APIC_OFFSET_PPR: 1551 case APIC_OFFSET_RRR: 1552 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1553 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1554 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1555 case APIC_OFFSET_TIMER_CCR: 1556 default: 1557 // Read only. 1558 break; 1559 } 1560 1561 return (retval); 1562 } 1563 1564 static void 1565 vlapic_reset(struct vlapic *vlapic) 1566 { 1567 struct LAPIC *lapic; 1568 1569 lapic = vlapic->apic_page; 1570 bzero(lapic, sizeof(struct LAPIC)); 1571 1572 lapic->id = vlapic_get_id(vlapic); 1573 lapic->version = VLAPIC_VERSION; 1574 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1575 lapic->dfr = 0xffffffff; 1576 lapic->svr = APIC_SVR_VECTOR; 1577 vlapic_mask_lvts(vlapic); 1578 vlapic_reset_tmr(vlapic); 1579 1580 lapic->dcr_timer = 0; 1581 vlapic_dcr_write_handler(vlapic); 1582 1583 vlapic->svr_last = lapic->svr; 1584 } 1585 1586 void 1587 vlapic_init(struct vlapic *vlapic) 1588 { 1589 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1590 KASSERT(vlapic->vcpuid >= 0 && 1591 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1592 ("vlapic_init: vcpuid is not initialized")); 1593 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1594 "initialized")); 1595 1596 /* 1597 * If the vlapic is configured in x2apic mode then it will be 1598 * accessed in the critical section via the MSR emulation code. 1599 * 1600 * Therefore the timer mutex must be a spinlock because blockable 1601 * mutexes cannot be acquired in a critical section. 1602 */ 1603 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1604 callout_init(&vlapic->callout, 1); 1605 1606 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1607 1608 if (vlapic->vcpuid == 0) 1609 vlapic->msr_apicbase |= APICBASE_BSP; 1610 1611 vlapic->ipi_exit = false; 1612 1613 vlapic_reset(vlapic); 1614 } 1615 1616 void 1617 vlapic_cleanup(struct vlapic *vlapic) 1618 { 1619 1620 callout_drain(&vlapic->callout); 1621 mtx_destroy(&vlapic->timer_mtx); 1622 } 1623 1624 uint64_t 1625 vlapic_get_apicbase(struct vlapic *vlapic) 1626 { 1627 1628 return (vlapic->msr_apicbase); 1629 } 1630 1631 int 1632 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1633 { 1634 1635 if (vlapic->msr_apicbase != new) { 1636 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1637 "not supported", vlapic->msr_apicbase, new); 1638 return (-1); 1639 } 1640 1641 return (0); 1642 } 1643 1644 void 1645 vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1646 { 1647 struct vlapic *vlapic; 1648 struct LAPIC *lapic; 1649 1650 vlapic = vm_lapic(vcpu); 1651 1652 if (state == X2APIC_DISABLED) 1653 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1654 else 1655 vlapic->msr_apicbase |= APICBASE_X2APIC; 1656 1657 /* 1658 * Reset the local APIC registers whose values are mode-dependent. 1659 * 1660 * XXX this works because the APIC mode can be changed only at vcpu 1661 * initialization time. 1662 */ 1663 lapic = vlapic->apic_page; 1664 lapic->id = vlapic_get_id(vlapic); 1665 if (x2apic(vlapic)) { 1666 lapic->ldr = x2apic_ldr(vlapic); 1667 lapic->dfr = 0; 1668 } else { 1669 lapic->ldr = 0; 1670 lapic->dfr = 0xffffffff; 1671 } 1672 1673 if (state == X2APIC_ENABLED) { 1674 if (vlapic->ops.enable_x2apic_mode) 1675 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1676 } 1677 } 1678 1679 void 1680 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1681 int delmode, int vec) 1682 { 1683 struct vcpu *vcpu; 1684 bool lowprio; 1685 int vcpuid; 1686 cpuset_t dmask; 1687 1688 if (delmode != IOART_DELFIXED && 1689 delmode != IOART_DELLOPRI && 1690 delmode != IOART_DELEXINT) { 1691 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1692 return; 1693 } 1694 lowprio = (delmode == IOART_DELLOPRI); 1695 1696 /* 1697 * We don't provide any virtual interrupt redirection hardware so 1698 * all interrupts originating from the ioapic or MSI specify the 1699 * 'dest' in the legacy xAPIC format. 1700 */ 1701 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1702 1703 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1704 vcpu = vm_vcpu(vm, vcpuid); 1705 if (delmode == IOART_DELEXINT) { 1706 vm_inject_extint(vcpu); 1707 } else { 1708 lapic_set_intr(vcpu, vec, level); 1709 } 1710 } 1711 } 1712 1713 void 1714 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1715 { 1716 /* 1717 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1718 * 1719 * This is done by leveraging features like Posted Interrupts (Intel) 1720 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1721 * 1722 * If neither of these features are available then fallback to 1723 * sending an IPI to 'hostcpu'. 1724 */ 1725 if (vlapic->ops.post_intr) 1726 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1727 else 1728 ipi_cpu(hostcpu, ipinum); 1729 } 1730 1731 bool 1732 vlapic_enabled(struct vlapic *vlapic) 1733 { 1734 struct LAPIC *lapic = vlapic->apic_page; 1735 1736 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1737 (lapic->svr & APIC_SVR_ENABLE) != 0) 1738 return (true); 1739 else 1740 return (false); 1741 } 1742 1743 static void 1744 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1745 { 1746 struct LAPIC *lapic; 1747 uint32_t *tmrptr, mask; 1748 int idx; 1749 1750 lapic = vlapic->apic_page; 1751 tmrptr = &lapic->tmr0; 1752 idx = (vector / 32) * 4; 1753 mask = 1 << (vector % 32); 1754 if (level) 1755 tmrptr[idx] |= mask; 1756 else 1757 tmrptr[idx] &= ~mask; 1758 1759 if (vlapic->ops.set_tmr != NULL) 1760 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1761 } 1762 1763 void 1764 vlapic_reset_tmr(struct vlapic *vlapic) 1765 { 1766 int vector; 1767 1768 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1769 1770 for (vector = 0; vector <= 255; vector++) 1771 vlapic_set_tmr(vlapic, vector, false); 1772 } 1773 1774 void 1775 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1776 int delmode, int vector) 1777 { 1778 cpuset_t dmask; 1779 bool lowprio; 1780 1781 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1782 1783 /* 1784 * A level trigger is valid only for fixed and lowprio delivery modes. 1785 */ 1786 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1787 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1788 "delivery-mode %d", delmode); 1789 return; 1790 } 1791 1792 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1793 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1794 1795 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1796 return; 1797 1798 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1799 vlapic_set_tmr(vlapic, vector, true); 1800 } 1801 1802 #ifdef BHYVE_SNAPSHOT 1803 static void 1804 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1805 { 1806 /* The implementation is similar to the one in the 1807 * `vlapic_icrtmr_write_handler` function 1808 */ 1809 sbintime_t sbt; 1810 struct bintime bt; 1811 1812 VLAPIC_TIMER_LOCK(vlapic); 1813 1814 bt = vlapic->timer_freq_bt; 1815 bintime_mul(&bt, ccr); 1816 1817 if (ccr != 0) { 1818 binuptime(&vlapic->timer_fire_bt); 1819 bintime_add(&vlapic->timer_fire_bt, &bt); 1820 1821 sbt = bttosbt(bt); 1822 vlapic_callout_reset(vlapic, sbt); 1823 } else { 1824 /* even if the CCR was 0, periodic timers should be reset */ 1825 if (vlapic_periodic_timer(vlapic)) { 1826 binuptime(&vlapic->timer_fire_bt); 1827 bintime_add(&vlapic->timer_fire_bt, 1828 &vlapic->timer_period_bt); 1829 sbt = bttosbt(vlapic->timer_period_bt); 1830 1831 callout_stop(&vlapic->callout); 1832 vlapic_callout_reset(vlapic, sbt); 1833 } 1834 } 1835 1836 VLAPIC_TIMER_UNLOCK(vlapic); 1837 } 1838 1839 int 1840 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1841 { 1842 int ret; 1843 struct vcpu *vcpu; 1844 struct vlapic *vlapic; 1845 struct LAPIC *lapic; 1846 uint32_t ccr; 1847 uint16_t i, maxcpus; 1848 1849 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1850 1851 ret = 0; 1852 1853 maxcpus = vm_get_maxcpus(vm); 1854 for (i = 0; i < maxcpus; i++) { 1855 vcpu = vm_vcpu(vm, i); 1856 if (vcpu == NULL) 1857 continue; 1858 vlapic = vm_lapic(vcpu); 1859 1860 /* snapshot the page first; timer period depends on icr_timer */ 1861 lapic = vlapic->apic_page; 1862 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1863 1864 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1865 1866 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1867 meta, ret, done); 1868 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1869 meta, ret, done); 1870 1871 /* 1872 * Timer period is equal to 'icr_timer' ticks at a frequency of 1873 * 'timer_freq_bt'. 1874 */ 1875 if (meta->op == VM_SNAPSHOT_RESTORE) { 1876 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1877 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1878 } 1879 1880 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1881 sizeof(vlapic->isrvec_stk), 1882 meta, ret, done); 1883 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1884 1885 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1886 sizeof(vlapic->lvt_last), 1887 meta, ret, done); 1888 1889 if (meta->op == VM_SNAPSHOT_SAVE) 1890 ccr = vlapic_get_ccr(vlapic); 1891 1892 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1893 1894 if (meta->op == VM_SNAPSHOT_RESTORE && 1895 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1896 /* Reset the value of the 'timer_fire_bt' and the vlapic 1897 * callout based on the value of the current count 1898 * register saved when the VM snapshot was created. 1899 * If initial count register is 0, timer is not used. 1900 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1901 */ 1902 vlapic_reset_callout(vlapic, ccr); 1903 } 1904 } 1905 1906 done: 1907 return (ret); 1908 } 1909 #endif 1910