1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bhyve_snapshot.h" 36 37 #include <sys/param.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/systm.h> 43 #include <sys/smp.h> 44 45 #include <x86/specialreg.h> 46 #include <x86/apicreg.h> 47 48 #include <machine/clock.h> 49 #include <machine/smp.h> 50 51 #include <machine/vmm.h> 52 #include <machine/vmm_snapshot.h> 53 54 #include "vmm_lapic.h" 55 #include "vmm_ktr.h" 56 #include "vmm_stat.h" 57 58 #include "vlapic.h" 59 #include "vlapic_priv.h" 60 #include "vioapic.h" 61 62 #define PRIO(x) ((x) >> 4) 63 64 #define VLAPIC_VERSION (0x14) 65 66 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 67 68 /* 69 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 70 * vlapic_callout_handler() and vcpu accesses to: 71 * - timer_freq_bt, timer_period_bt, timer_fire_bt 72 * - timer LVT register 73 */ 74 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 75 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 76 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 77 78 /* 79 * APIC timer frequency: 80 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 81 * - power-of-two to avoid loss of precision when converted to a bintime. 82 */ 83 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 84 85 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 86 static void vlapic_callout_handler(void *arg); 87 static void vlapic_reset(struct vlapic *vlapic); 88 89 static __inline uint32_t 90 vlapic_get_id(struct vlapic *vlapic) 91 { 92 93 if (x2apic(vlapic)) 94 return (vlapic->vcpuid); 95 else 96 return (vlapic->vcpuid << 24); 97 } 98 99 static uint32_t 100 x2apic_ldr(struct vlapic *vlapic) 101 { 102 int apicid; 103 uint32_t ldr; 104 105 apicid = vlapic_get_id(vlapic); 106 ldr = 1 << (apicid & 0xf); 107 ldr |= (apicid & 0xffff0) << 12; 108 return (ldr); 109 } 110 111 void 112 vlapic_dfr_write_handler(struct vlapic *vlapic) 113 { 114 struct LAPIC *lapic; 115 116 lapic = vlapic->apic_page; 117 if (x2apic(vlapic)) { 118 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 119 lapic->dfr); 120 lapic->dfr = 0; 121 return; 122 } 123 124 lapic->dfr &= APIC_DFR_MODEL_MASK; 125 lapic->dfr |= APIC_DFR_RESERVED; 126 127 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 128 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 129 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 130 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 131 else 132 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 133 } 134 135 void 136 vlapic_ldr_write_handler(struct vlapic *vlapic) 137 { 138 struct LAPIC *lapic; 139 140 lapic = vlapic->apic_page; 141 142 /* LDR is read-only in x2apic mode */ 143 if (x2apic(vlapic)) { 144 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 145 lapic->ldr); 146 lapic->ldr = x2apic_ldr(vlapic); 147 } else { 148 lapic->ldr &= ~APIC_LDR_RESERVED; 149 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 150 } 151 } 152 153 void 154 vlapic_id_write_handler(struct vlapic *vlapic) 155 { 156 struct LAPIC *lapic; 157 158 /* 159 * We don't allow the ID register to be modified so reset it back to 160 * its default value. 161 */ 162 lapic = vlapic->apic_page; 163 lapic->id = vlapic_get_id(vlapic); 164 } 165 166 static int 167 vlapic_timer_divisor(uint32_t dcr) 168 { 169 switch (dcr & 0xB) { 170 case APIC_TDCR_1: 171 return (1); 172 case APIC_TDCR_2: 173 return (2); 174 case APIC_TDCR_4: 175 return (4); 176 case APIC_TDCR_8: 177 return (8); 178 case APIC_TDCR_16: 179 return (16); 180 case APIC_TDCR_32: 181 return (32); 182 case APIC_TDCR_64: 183 return (64); 184 case APIC_TDCR_128: 185 return (128); 186 default: 187 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 188 } 189 } 190 191 #if 0 192 static inline void 193 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 194 { 195 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 196 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 197 *lvt & APIC_LVTT_M); 198 } 199 #endif 200 201 static uint32_t 202 vlapic_get_ccr(struct vlapic *vlapic) 203 { 204 struct bintime bt_now, bt_rem; 205 struct LAPIC *lapic __diagused; 206 uint32_t ccr; 207 208 ccr = 0; 209 lapic = vlapic->apic_page; 210 211 VLAPIC_TIMER_LOCK(vlapic); 212 if (callout_active(&vlapic->callout)) { 213 /* 214 * If the timer is scheduled to expire in the future then 215 * compute the value of 'ccr' based on the remaining time. 216 */ 217 binuptime(&bt_now); 218 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 219 bt_rem = vlapic->timer_fire_bt; 220 bintime_sub(&bt_rem, &bt_now); 221 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 222 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 223 } 224 } 225 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 226 "icr_timer is %#x", ccr, lapic->icr_timer)); 227 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 228 ccr, lapic->icr_timer); 229 VLAPIC_TIMER_UNLOCK(vlapic); 230 return (ccr); 231 } 232 233 void 234 vlapic_dcr_write_handler(struct vlapic *vlapic) 235 { 236 struct LAPIC *lapic; 237 int divisor; 238 239 lapic = vlapic->apic_page; 240 VLAPIC_TIMER_LOCK(vlapic); 241 242 divisor = vlapic_timer_divisor(lapic->dcr_timer); 243 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 244 lapic->dcr_timer, divisor); 245 246 /* 247 * Update the timer frequency and the timer period. 248 * 249 * XXX changes to the frequency divider will not take effect until 250 * the timer is reloaded. 251 */ 252 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 253 vlapic->timer_period_bt = vlapic->timer_freq_bt; 254 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 255 256 VLAPIC_TIMER_UNLOCK(vlapic); 257 } 258 259 void 260 vlapic_esr_write_handler(struct vlapic *vlapic) 261 { 262 struct LAPIC *lapic; 263 264 lapic = vlapic->apic_page; 265 lapic->esr = vlapic->esr_pending; 266 vlapic->esr_pending = 0; 267 } 268 269 int 270 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 271 { 272 struct LAPIC *lapic; 273 uint32_t *irrptr, *tmrptr, mask; 274 int idx; 275 276 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 277 278 lapic = vlapic->apic_page; 279 if (!(lapic->svr & APIC_SVR_ENABLE)) { 280 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 281 "interrupt %d", vector); 282 return (0); 283 } 284 285 if (vector < 16) { 286 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 287 false); 288 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 289 vector); 290 return (1); 291 } 292 293 if (vlapic->ops.set_intr_ready) 294 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 295 296 idx = (vector / 32) * 4; 297 mask = 1 << (vector % 32); 298 299 irrptr = &lapic->irr0; 300 atomic_set_int(&irrptr[idx], mask); 301 302 /* 303 * Verify that the trigger-mode of the interrupt matches with 304 * the vlapic TMR registers. 305 */ 306 tmrptr = &lapic->tmr0; 307 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 308 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 309 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 310 level ? "level" : "edge"); 311 } 312 313 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 314 return (1); 315 } 316 317 static __inline uint32_t * 318 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 319 { 320 struct LAPIC *lapic = vlapic->apic_page; 321 int i; 322 323 switch (offset) { 324 case APIC_OFFSET_CMCI_LVT: 325 return (&lapic->lvt_cmci); 326 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 327 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 328 return ((&lapic->lvt_timer) + i); 329 default: 330 panic("vlapic_get_lvt: invalid LVT\n"); 331 } 332 } 333 334 static __inline int 335 lvt_off_to_idx(uint32_t offset) 336 { 337 int index; 338 339 switch (offset) { 340 case APIC_OFFSET_CMCI_LVT: 341 index = APIC_LVT_CMCI; 342 break; 343 case APIC_OFFSET_TIMER_LVT: 344 index = APIC_LVT_TIMER; 345 break; 346 case APIC_OFFSET_THERM_LVT: 347 index = APIC_LVT_THERMAL; 348 break; 349 case APIC_OFFSET_PERF_LVT: 350 index = APIC_LVT_PMC; 351 break; 352 case APIC_OFFSET_LINT0_LVT: 353 index = APIC_LVT_LINT0; 354 break; 355 case APIC_OFFSET_LINT1_LVT: 356 index = APIC_LVT_LINT1; 357 break; 358 case APIC_OFFSET_ERROR_LVT: 359 index = APIC_LVT_ERROR; 360 break; 361 default: 362 index = -1; 363 break; 364 } 365 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 366 "invalid lvt index %d for offset %#x", index, offset)); 367 368 return (index); 369 } 370 371 static __inline uint32_t 372 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 373 { 374 int idx; 375 uint32_t val; 376 377 idx = lvt_off_to_idx(offset); 378 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 379 return (val); 380 } 381 382 void 383 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 384 { 385 uint32_t *lvtptr, mask, val; 386 struct LAPIC *lapic; 387 int idx; 388 389 lapic = vlapic->apic_page; 390 lvtptr = vlapic_get_lvtptr(vlapic, offset); 391 val = *lvtptr; 392 idx = lvt_off_to_idx(offset); 393 394 if (!(lapic->svr & APIC_SVR_ENABLE)) 395 val |= APIC_LVT_M; 396 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 397 switch (offset) { 398 case APIC_OFFSET_TIMER_LVT: 399 mask |= APIC_LVTT_TM; 400 break; 401 case APIC_OFFSET_ERROR_LVT: 402 break; 403 case APIC_OFFSET_LINT0_LVT: 404 case APIC_OFFSET_LINT1_LVT: 405 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 406 /* FALLTHROUGH */ 407 default: 408 mask |= APIC_LVT_DM; 409 break; 410 } 411 val &= mask; 412 *lvtptr = val; 413 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 414 } 415 416 static void 417 vlapic_mask_lvts(struct vlapic *vlapic) 418 { 419 struct LAPIC *lapic = vlapic->apic_page; 420 421 lapic->lvt_cmci |= APIC_LVT_M; 422 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 423 424 lapic->lvt_timer |= APIC_LVT_M; 425 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 426 427 lapic->lvt_thermal |= APIC_LVT_M; 428 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 429 430 lapic->lvt_pcint |= APIC_LVT_M; 431 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 432 433 lapic->lvt_lint0 |= APIC_LVT_M; 434 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 435 436 lapic->lvt_lint1 |= APIC_LVT_M; 437 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 438 439 lapic->lvt_error |= APIC_LVT_M; 440 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 441 } 442 443 static int 444 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 445 { 446 uint32_t mode, reg, vec; 447 448 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 449 450 if (reg & APIC_LVT_M) 451 return (0); 452 vec = reg & APIC_LVT_VECTOR; 453 mode = reg & APIC_LVT_DM; 454 455 switch (mode) { 456 case APIC_LVT_DM_FIXED: 457 if (vec < 16) { 458 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 459 lvt == APIC_LVT_ERROR); 460 return (0); 461 } 462 if (vlapic_set_intr_ready(vlapic, vec, false)) 463 vcpu_notify_event(vlapic->vcpu, true); 464 break; 465 case APIC_LVT_DM_NMI: 466 vm_inject_nmi(vlapic->vcpu); 467 break; 468 case APIC_LVT_DM_EXTINT: 469 vm_inject_extint(vlapic->vcpu); 470 break; 471 default: 472 // Other modes ignored 473 return (0); 474 } 475 return (1); 476 } 477 478 #if 1 479 static void 480 dump_isrvec_stk(struct vlapic *vlapic) 481 { 482 int i; 483 uint32_t *isrptr; 484 485 isrptr = &vlapic->apic_page->isr0; 486 for (i = 0; i < 8; i++) 487 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 488 489 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 490 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 491 } 492 #endif 493 494 /* 495 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 496 * in Intel Architecture Manual Vol 3a. 497 */ 498 static void 499 vlapic_update_ppr(struct vlapic *vlapic) 500 { 501 int isrvec, tpr, ppr; 502 503 /* 504 * Note that the value on the stack at index 0 is always 0. 505 * 506 * This is a placeholder for the value of ISRV when none of the 507 * bits is set in the ISRx registers. 508 */ 509 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 510 tpr = vlapic->apic_page->tpr; 511 512 #if 1 513 { 514 int i, lastprio, curprio, vector, idx; 515 uint32_t *isrptr; 516 517 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 518 panic("isrvec_stk is corrupted: %d", isrvec); 519 520 /* 521 * Make sure that the priority of the nested interrupts is 522 * always increasing. 523 */ 524 lastprio = -1; 525 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 526 curprio = PRIO(vlapic->isrvec_stk[i]); 527 if (curprio <= lastprio) { 528 dump_isrvec_stk(vlapic); 529 panic("isrvec_stk does not satisfy invariant"); 530 } 531 lastprio = curprio; 532 } 533 534 /* 535 * Make sure that each bit set in the ISRx registers has a 536 * corresponding entry on the isrvec stack. 537 */ 538 i = 1; 539 isrptr = &vlapic->apic_page->isr0; 540 for (vector = 0; vector < 256; vector++) { 541 idx = (vector / 32) * 4; 542 if (isrptr[idx] & (1 << (vector % 32))) { 543 if (i > vlapic->isrvec_stk_top || 544 vlapic->isrvec_stk[i] != vector) { 545 dump_isrvec_stk(vlapic); 546 panic("ISR and isrvec_stk out of sync"); 547 } 548 i++; 549 } 550 } 551 } 552 #endif 553 554 if (PRIO(tpr) >= PRIO(isrvec)) 555 ppr = tpr; 556 else 557 ppr = isrvec & 0xf0; 558 559 vlapic->apic_page->ppr = ppr; 560 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 561 } 562 563 void 564 vlapic_sync_tpr(struct vlapic *vlapic) 565 { 566 vlapic_update_ppr(vlapic); 567 } 568 569 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 570 571 static void 572 vlapic_process_eoi(struct vlapic *vlapic) 573 { 574 struct LAPIC *lapic = vlapic->apic_page; 575 uint32_t *isrptr, *tmrptr; 576 int i, idx, bitpos, vector; 577 578 isrptr = &lapic->isr0; 579 tmrptr = &lapic->tmr0; 580 581 for (i = 7; i >= 0; i--) { 582 idx = i * 4; 583 bitpos = fls(isrptr[idx]); 584 if (bitpos-- != 0) { 585 if (vlapic->isrvec_stk_top <= 0) { 586 panic("invalid vlapic isrvec_stk_top %d", 587 vlapic->isrvec_stk_top); 588 } 589 isrptr[idx] &= ~(1 << bitpos); 590 vector = i * 32 + bitpos; 591 VLAPIC_CTR1(vlapic, "EOI vector %d", vector); 592 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 593 vlapic->isrvec_stk_top--; 594 vlapic_update_ppr(vlapic); 595 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 596 vioapic_process_eoi(vlapic->vm, vector); 597 } 598 return; 599 } 600 } 601 VLAPIC_CTR0(vlapic, "Gratuitous EOI"); 602 vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1); 603 } 604 605 static __inline int 606 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 607 { 608 609 return (lvt & mask); 610 } 611 612 static __inline int 613 vlapic_periodic_timer(struct vlapic *vlapic) 614 { 615 uint32_t lvt; 616 617 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 618 619 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 620 } 621 622 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 623 624 static void 625 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 626 { 627 628 vlapic->esr_pending |= mask; 629 630 /* 631 * Avoid infinite recursion if the error LVT itself is configured with 632 * an illegal vector. 633 */ 634 if (lvt_error) 635 return; 636 637 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 638 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1); 639 } 640 } 641 642 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 643 644 static void 645 vlapic_fire_timer(struct vlapic *vlapic) 646 { 647 648 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 649 650 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 651 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 652 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1); 653 } 654 } 655 656 static VMM_STAT(VLAPIC_INTR_CMC, 657 "corrected machine check interrupts generated by vlapic"); 658 659 void 660 vlapic_fire_cmci(struct vlapic *vlapic) 661 { 662 663 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 664 vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1); 665 } 666 } 667 668 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 669 "lvts triggered"); 670 671 int 672 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 673 { 674 675 if (vlapic_enabled(vlapic) == false) { 676 /* 677 * When the local APIC is global/hardware disabled, 678 * LINT[1:0] pins are configured as INTR and NMI pins, 679 * respectively. 680 */ 681 switch (vector) { 682 case APIC_LVT_LINT0: 683 vm_inject_extint(vlapic->vcpu); 684 break; 685 case APIC_LVT_LINT1: 686 vm_inject_nmi(vlapic->vcpu); 687 break; 688 default: 689 break; 690 } 691 return (0); 692 } 693 694 switch (vector) { 695 case APIC_LVT_LINT0: 696 case APIC_LVT_LINT1: 697 case APIC_LVT_TIMER: 698 case APIC_LVT_ERROR: 699 case APIC_LVT_PMC: 700 case APIC_LVT_THERMAL: 701 case APIC_LVT_CMCI: 702 if (vlapic_fire_lvt(vlapic, vector)) { 703 vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED, 704 vector, 1); 705 } 706 break; 707 default: 708 return (EINVAL); 709 } 710 return (0); 711 } 712 713 static void 714 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 715 { 716 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 717 vlapic_callout_handler, vlapic, 0); 718 } 719 720 static void 721 vlapic_callout_handler(void *arg) 722 { 723 struct vlapic *vlapic; 724 struct bintime bt, btnow; 725 sbintime_t rem_sbt; 726 727 vlapic = arg; 728 729 VLAPIC_TIMER_LOCK(vlapic); 730 if (callout_pending(&vlapic->callout)) /* callout was reset */ 731 goto done; 732 733 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 734 goto done; 735 736 callout_deactivate(&vlapic->callout); 737 738 vlapic_fire_timer(vlapic); 739 740 if (vlapic_periodic_timer(vlapic)) { 741 binuptime(&btnow); 742 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 743 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 744 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 745 vlapic->timer_fire_bt.frac)); 746 747 /* 748 * Compute the delta between when the timer was supposed to 749 * fire and the present time. 750 */ 751 bt = btnow; 752 bintime_sub(&bt, &vlapic->timer_fire_bt); 753 754 rem_sbt = bttosbt(vlapic->timer_period_bt); 755 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 756 /* 757 * Adjust the time until the next countdown downward 758 * to account for the lost time. 759 */ 760 rem_sbt -= bttosbt(bt); 761 } else { 762 /* 763 * If the delta is greater than the timer period then 764 * just reset our time base instead of trying to catch 765 * up. 766 */ 767 vlapic->timer_fire_bt = btnow; 768 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 769 "usecs, period is %lu usecs - resetting time base", 770 bttosbt(bt) / SBT_1US, 771 bttosbt(vlapic->timer_period_bt) / SBT_1US); 772 } 773 774 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 775 vlapic_callout_reset(vlapic, rem_sbt); 776 } 777 done: 778 VLAPIC_TIMER_UNLOCK(vlapic); 779 } 780 781 void 782 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 783 { 784 struct LAPIC *lapic; 785 sbintime_t sbt; 786 uint32_t icr_timer; 787 788 VLAPIC_TIMER_LOCK(vlapic); 789 790 lapic = vlapic->apic_page; 791 icr_timer = lapic->icr_timer; 792 793 vlapic->timer_period_bt = vlapic->timer_freq_bt; 794 bintime_mul(&vlapic->timer_period_bt, icr_timer); 795 796 if (icr_timer != 0) { 797 binuptime(&vlapic->timer_fire_bt); 798 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 799 800 sbt = bttosbt(vlapic->timer_period_bt); 801 vlapic_callout_reset(vlapic, sbt); 802 } else 803 callout_stop(&vlapic->callout); 804 805 VLAPIC_TIMER_UNLOCK(vlapic); 806 } 807 808 /* 809 * This function populates 'dmask' with the set of vcpus that match the 810 * addressing specified by the (dest, phys, lowprio) tuple. 811 * 812 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 813 * or xAPIC (8-bit) destination field. 814 */ 815 static void 816 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 817 bool lowprio, bool x2apic_dest) 818 { 819 struct vlapic *vlapic; 820 uint32_t dfr, ldr, ldest, cluster; 821 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 822 cpuset_t amask; 823 int vcpuid; 824 825 if ((x2apic_dest && dest == 0xffffffff) || 826 (!x2apic_dest && dest == 0xff)) { 827 /* 828 * Broadcast in both logical and physical modes. 829 */ 830 *dmask = vm_active_cpus(vm); 831 return; 832 } 833 834 if (phys) { 835 /* 836 * Physical mode: destination is APIC ID. 837 */ 838 CPU_ZERO(dmask); 839 vcpuid = vm_apicid2vcpuid(vm, dest); 840 amask = vm_active_cpus(vm); 841 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 842 CPU_SET(vcpuid, dmask); 843 } else { 844 /* 845 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 846 * bitmask. This model is only available in the xAPIC mode. 847 */ 848 mda_flat_ldest = dest & 0xff; 849 850 /* 851 * In the "Cluster Model" the MDA is used to identify a 852 * specific cluster and a set of APICs in that cluster. 853 */ 854 if (x2apic_dest) { 855 mda_cluster_id = dest >> 16; 856 mda_cluster_ldest = dest & 0xffff; 857 } else { 858 mda_cluster_id = (dest >> 4) & 0xf; 859 mda_cluster_ldest = dest & 0xf; 860 } 861 862 /* 863 * Logical mode: match each APIC that has a bit set 864 * in its LDR that matches a bit in the ldest. 865 */ 866 CPU_ZERO(dmask); 867 amask = vm_active_cpus(vm); 868 CPU_FOREACH_ISSET(vcpuid, &amask) { 869 vlapic = vm_lapic(vm_vcpu(vm, vcpuid)); 870 dfr = vlapic->apic_page->dfr; 871 ldr = vlapic->apic_page->ldr; 872 873 if ((dfr & APIC_DFR_MODEL_MASK) == 874 APIC_DFR_MODEL_FLAT) { 875 ldest = ldr >> 24; 876 mda_ldest = mda_flat_ldest; 877 } else if ((dfr & APIC_DFR_MODEL_MASK) == 878 APIC_DFR_MODEL_CLUSTER) { 879 if (x2apic(vlapic)) { 880 cluster = ldr >> 16; 881 ldest = ldr & 0xffff; 882 } else { 883 cluster = ldr >> 28; 884 ldest = (ldr >> 24) & 0xf; 885 } 886 if (cluster != mda_cluster_id) 887 continue; 888 mda_ldest = mda_cluster_ldest; 889 } else { 890 /* 891 * Guest has configured a bad logical 892 * model for this vcpu - skip it. 893 */ 894 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 895 "model %x - cannot deliver interrupt", dfr); 896 continue; 897 } 898 899 if ((mda_ldest & ldest) != 0) { 900 CPU_SET(vcpuid, dmask); 901 if (lowprio) 902 break; 903 } 904 } 905 } 906 } 907 908 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 909 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 910 911 static void 912 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 913 { 914 struct LAPIC *lapic = vlapic->apic_page; 915 916 if (lapic->tpr != val) { 917 VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x", 918 lapic->tpr, val); 919 lapic->tpr = val; 920 vlapic_update_ppr(vlapic); 921 } 922 } 923 924 static uint8_t 925 vlapic_get_tpr(struct vlapic *vlapic) 926 { 927 struct LAPIC *lapic = vlapic->apic_page; 928 929 return (lapic->tpr); 930 } 931 932 void 933 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 934 { 935 uint8_t tpr; 936 937 if (val & ~0xf) { 938 vm_inject_gp(vlapic->vcpu); 939 return; 940 } 941 942 tpr = val << 4; 943 vlapic_set_tpr(vlapic, tpr); 944 } 945 946 uint64_t 947 vlapic_get_cr8(struct vlapic *vlapic) 948 { 949 uint8_t tpr; 950 951 tpr = vlapic_get_tpr(vlapic); 952 return (tpr >> 4); 953 } 954 955 static bool 956 vlapic_is_icr_valid(uint64_t icrval) 957 { 958 uint32_t mode = icrval & APIC_DELMODE_MASK; 959 uint32_t level = icrval & APIC_LEVEL_MASK; 960 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 961 uint32_t shorthand = icrval & APIC_DEST_MASK; 962 963 switch (mode) { 964 case APIC_DELMODE_FIXED: 965 if (trigger == APIC_TRIGMOD_EDGE) 966 return (true); 967 /* 968 * AMD allows a level assert IPI and Intel converts a level 969 * assert IPI into an edge IPI. 970 */ 971 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 972 return (true); 973 break; 974 case APIC_DELMODE_LOWPRIO: 975 case APIC_DELMODE_SMI: 976 case APIC_DELMODE_NMI: 977 case APIC_DELMODE_INIT: 978 if (trigger == APIC_TRIGMOD_EDGE && 979 (shorthand == APIC_DEST_DESTFLD || 980 shorthand == APIC_DEST_ALLESELF)) 981 return (true); 982 /* 983 * AMD allows a level assert IPI and Intel converts a level 984 * assert IPI into an edge IPI. 985 */ 986 if (trigger == APIC_TRIGMOD_LEVEL && 987 level == APIC_LEVEL_ASSERT && 988 (shorthand == APIC_DEST_DESTFLD || 989 shorthand == APIC_DEST_ALLESELF)) 990 return (true); 991 /* 992 * An level triggered deassert INIT is defined in the Intel 993 * Multiprocessor Specification and the Intel Software Developer 994 * Manual. Due to the MPS it's required to send a level assert 995 * INIT to a cpu and then a level deassert INIT. Some operating 996 * systems e.g. FreeBSD or Linux use that algorithm. According 997 * to the SDM a level deassert INIT is only supported by Pentium 998 * and P6 processors. It's always send to all cpus regardless of 999 * the destination or shorthand field. It resets the arbitration 1000 * id register. This register is not software accessible and 1001 * only required for the APIC bus arbitration. So, the level 1002 * deassert INIT doesn't need any emulation and we should ignore 1003 * it. The SDM also defines that newer processors don't support 1004 * the level deassert INIT and it's not valid any more. As it's 1005 * defined for older systems, it can't be invalid per se. 1006 * Otherwise, backward compatibility would be broken. However, 1007 * when returning false here, it'll be ignored which is the 1008 * desired behaviour. 1009 */ 1010 if (mode == APIC_DELMODE_INIT && 1011 trigger == APIC_TRIGMOD_LEVEL && 1012 level == APIC_LEVEL_DEASSERT) 1013 return (false); 1014 break; 1015 case APIC_DELMODE_STARTUP: 1016 if (shorthand == APIC_DEST_DESTFLD || 1017 shorthand == APIC_DEST_ALLESELF) 1018 return (true); 1019 break; 1020 case APIC_DELMODE_RR: 1021 /* Only available on AMD! */ 1022 if (trigger == APIC_TRIGMOD_EDGE && 1023 shorthand == APIC_DEST_DESTFLD) 1024 return (true); 1025 break; 1026 case APIC_DELMODE_RESV: 1027 return (false); 1028 default: 1029 __assert_unreachable(); 1030 } 1031 1032 return (false); 1033 } 1034 1035 int 1036 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 1037 { 1038 int i; 1039 bool phys; 1040 cpuset_t dmask, ipimask; 1041 uint64_t icrval; 1042 uint32_t dest, vec, mode, shorthand; 1043 struct vcpu *vcpu; 1044 struct vm_exit *vmexit; 1045 struct LAPIC *lapic; 1046 1047 lapic = vlapic->apic_page; 1048 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1049 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1050 1051 if (x2apic(vlapic)) 1052 dest = icrval >> 32; 1053 else 1054 dest = icrval >> (32 + 24); 1055 vec = icrval & APIC_VECTOR_MASK; 1056 mode = icrval & APIC_DELMODE_MASK; 1057 phys = (icrval & APIC_DESTMODE_LOG) == 0; 1058 shorthand = icrval & APIC_DEST_MASK; 1059 1060 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 1061 1062 switch (shorthand) { 1063 case APIC_DEST_DESTFLD: 1064 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); 1065 break; 1066 case APIC_DEST_SELF: 1067 CPU_SETOF(vlapic->vcpuid, &dmask); 1068 break; 1069 case APIC_DEST_ALLISELF: 1070 dmask = vm_active_cpus(vlapic->vm); 1071 break; 1072 case APIC_DEST_ALLESELF: 1073 dmask = vm_active_cpus(vlapic->vm); 1074 CPU_CLR(vlapic->vcpuid, &dmask); 1075 break; 1076 default: 1077 __assert_unreachable(); 1078 } 1079 1080 /* 1081 * Ignore invalid combinations of the icr. 1082 */ 1083 if (!vlapic_is_icr_valid(icrval)) { 1084 VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); 1085 return (0); 1086 } 1087 1088 /* 1089 * ipimask is a set of vCPUs needing userland handling of the current 1090 * IPI. 1091 */ 1092 CPU_ZERO(&ipimask); 1093 1094 switch (mode) { 1095 case APIC_DELMODE_FIXED: 1096 if (vec < 16) { 1097 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 1098 false); 1099 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 1100 return (0); 1101 } 1102 1103 CPU_FOREACH_ISSET(i, &dmask) { 1104 vcpu = vm_vcpu(vlapic->vm, i); 1105 lapic_intr_edge(vcpu, vec); 1106 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); 1107 vmm_stat_incr(vcpu, VLAPIC_IPI_RECV, 1); 1108 VLAPIC_CTR2(vlapic, 1109 "vlapic sending ipi %d to vcpuid %d", vec, i); 1110 } 1111 1112 break; 1113 case APIC_DELMODE_NMI: 1114 CPU_FOREACH_ISSET(i, &dmask) { 1115 vcpu = vm_vcpu(vlapic->vm, i); 1116 vm_inject_nmi(vcpu); 1117 VLAPIC_CTR1(vlapic, 1118 "vlapic sending ipi nmi to vcpuid %d", i); 1119 } 1120 1121 break; 1122 case APIC_DELMODE_INIT: 1123 case APIC_DELMODE_STARTUP: 1124 if (!vlapic->ipi_exit) { 1125 if (!phys) 1126 break; 1127 1128 i = vm_apicid2vcpuid(vlapic->vm, dest); 1129 if (i >= vm_get_maxcpus(vlapic->vm) || 1130 i == vlapic->vcpuid) 1131 break; 1132 1133 CPU_SETOF(i, &ipimask); 1134 1135 break; 1136 } 1137 1138 CPU_COPY(&dmask, &ipimask); 1139 break; 1140 default: 1141 return (1); 1142 } 1143 1144 if (!CPU_EMPTY(&ipimask)) { 1145 vmexit = vm_exitinfo(vlapic->vcpu); 1146 vmexit->exitcode = VM_EXITCODE_IPI; 1147 vmexit->u.ipi.mode = mode; 1148 vmexit->u.ipi.vector = vec; 1149 vmexit->u.ipi.dmask = ipimask; 1150 1151 *retu = true; 1152 } 1153 1154 return (0); 1155 } 1156 1157 static void 1158 vlapic_handle_init(struct vcpu *vcpu, void *arg) 1159 { 1160 struct vlapic *vlapic = vm_lapic(vcpu); 1161 1162 vlapic_reset(vlapic); 1163 } 1164 1165 int 1166 vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) 1167 { 1168 struct vlapic *vlapic = vm_lapic(vcpu); 1169 cpuset_t *dmask = &vme->u.ipi.dmask; 1170 uint8_t vec = vme->u.ipi.vector; 1171 1172 *retu = true; 1173 switch (vme->u.ipi.mode) { 1174 case APIC_DELMODE_INIT: { 1175 cpuset_t active, reinit; 1176 1177 active = vm_active_cpus(vcpu_vm(vcpu)); 1178 CPU_AND(&reinit, &active, dmask); 1179 if (!CPU_EMPTY(&reinit)) { 1180 vm_smp_rendezvous(vcpu, reinit, vlapic_handle_init, 1181 NULL); 1182 } 1183 vm_await_start(vcpu_vm(vcpu), dmask); 1184 1185 if (!vlapic->ipi_exit) 1186 *retu = false; 1187 1188 break; 1189 } 1190 case APIC_DELMODE_STARTUP: 1191 /* 1192 * Ignore SIPIs in any state other than wait-for-SIPI 1193 */ 1194 *dmask = vm_start_cpus(vcpu_vm(vcpu), dmask); 1195 1196 if (CPU_EMPTY(dmask)) { 1197 *retu = false; 1198 break; 1199 } 1200 1201 /* 1202 * Old bhyve versions don't support the IPI 1203 * exit. Translate it into the old style. 1204 */ 1205 if (!vlapic->ipi_exit) { 1206 vme->exitcode = VM_EXITCODE_SPINUP_AP; 1207 vme->u.spinup_ap.vcpu = CPU_FFS(dmask) - 1; 1208 vme->u.spinup_ap.rip = vec << PAGE_SHIFT; 1209 } 1210 1211 break; 1212 default: 1213 __assert_unreachable(); 1214 } 1215 1216 return (0); 1217 } 1218 1219 void 1220 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1221 { 1222 int vec; 1223 1224 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1225 1226 vec = val & 0xff; 1227 lapic_intr_edge(vlapic->vcpu, vec); 1228 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1); 1229 vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_RECV, 1); 1230 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1231 } 1232 1233 int 1234 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1235 { 1236 struct LAPIC *lapic = vlapic->apic_page; 1237 int idx, i, bitpos, vector; 1238 uint32_t *irrptr, val; 1239 1240 vlapic_update_ppr(vlapic); 1241 1242 if (vlapic->ops.pending_intr) 1243 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1244 1245 irrptr = &lapic->irr0; 1246 1247 for (i = 7; i >= 0; i--) { 1248 idx = i * 4; 1249 val = atomic_load_acq_int(&irrptr[idx]); 1250 bitpos = fls(val); 1251 if (bitpos != 0) { 1252 vector = i * 32 + (bitpos - 1); 1253 if (PRIO(vector) > PRIO(lapic->ppr)) { 1254 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1255 if (vecptr != NULL) 1256 *vecptr = vector; 1257 return (1); 1258 } else 1259 break; 1260 } 1261 } 1262 return (0); 1263 } 1264 1265 void 1266 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1267 { 1268 struct LAPIC *lapic = vlapic->apic_page; 1269 uint32_t *irrptr, *isrptr; 1270 int idx, stk_top; 1271 1272 if (vlapic->ops.intr_accepted) 1273 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1274 1275 /* 1276 * clear the ready bit for vector being accepted in irr 1277 * and set the vector as in service in isr. 1278 */ 1279 idx = (vector / 32) * 4; 1280 1281 irrptr = &lapic->irr0; 1282 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1283 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1284 1285 isrptr = &lapic->isr0; 1286 isrptr[idx] |= 1 << (vector % 32); 1287 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1288 1289 /* 1290 * Update the PPR 1291 */ 1292 vlapic->isrvec_stk_top++; 1293 1294 stk_top = vlapic->isrvec_stk_top; 1295 if (stk_top >= ISRVEC_STK_SIZE) 1296 panic("isrvec_stk_top overflow %d", stk_top); 1297 1298 vlapic->isrvec_stk[stk_top] = vector; 1299 } 1300 1301 void 1302 vlapic_svr_write_handler(struct vlapic *vlapic) 1303 { 1304 struct LAPIC *lapic; 1305 uint32_t old, new, changed; 1306 1307 lapic = vlapic->apic_page; 1308 1309 new = lapic->svr; 1310 old = vlapic->svr_last; 1311 vlapic->svr_last = new; 1312 1313 changed = old ^ new; 1314 if ((changed & APIC_SVR_ENABLE) != 0) { 1315 if ((new & APIC_SVR_ENABLE) == 0) { 1316 /* 1317 * The apic is now disabled so stop the apic timer 1318 * and mask all the LVT entries. 1319 */ 1320 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1321 VLAPIC_TIMER_LOCK(vlapic); 1322 callout_stop(&vlapic->callout); 1323 VLAPIC_TIMER_UNLOCK(vlapic); 1324 vlapic_mask_lvts(vlapic); 1325 } else { 1326 /* 1327 * The apic is now enabled so restart the apic timer 1328 * if it is configured in periodic mode. 1329 */ 1330 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1331 if (vlapic_periodic_timer(vlapic)) 1332 vlapic_icrtmr_write_handler(vlapic); 1333 } 1334 } 1335 } 1336 1337 int 1338 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1339 uint64_t *data, bool *retu) 1340 { 1341 struct LAPIC *lapic = vlapic->apic_page; 1342 uint32_t *reg; 1343 int i; 1344 1345 /* Ignore MMIO accesses in x2APIC mode */ 1346 if (x2apic(vlapic) && mmio_access) { 1347 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1348 offset); 1349 *data = 0; 1350 goto done; 1351 } 1352 1353 if (!x2apic(vlapic) && !mmio_access) { 1354 /* 1355 * XXX Generate GP fault for MSR accesses in xAPIC mode 1356 */ 1357 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1358 "xAPIC mode", offset); 1359 *data = 0; 1360 goto done; 1361 } 1362 1363 if (offset > sizeof(*lapic)) { 1364 *data = 0; 1365 goto done; 1366 } 1367 1368 offset &= ~3; 1369 switch(offset) 1370 { 1371 case APIC_OFFSET_ID: 1372 *data = lapic->id; 1373 break; 1374 case APIC_OFFSET_VER: 1375 *data = lapic->version; 1376 break; 1377 case APIC_OFFSET_TPR: 1378 *data = vlapic_get_tpr(vlapic); 1379 break; 1380 case APIC_OFFSET_APR: 1381 *data = lapic->apr; 1382 break; 1383 case APIC_OFFSET_PPR: 1384 *data = lapic->ppr; 1385 break; 1386 case APIC_OFFSET_EOI: 1387 *data = lapic->eoi; 1388 break; 1389 case APIC_OFFSET_LDR: 1390 *data = lapic->ldr; 1391 break; 1392 case APIC_OFFSET_DFR: 1393 *data = lapic->dfr; 1394 break; 1395 case APIC_OFFSET_SVR: 1396 *data = lapic->svr; 1397 break; 1398 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1399 i = (offset - APIC_OFFSET_ISR0) >> 2; 1400 reg = &lapic->isr0; 1401 *data = *(reg + i); 1402 break; 1403 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1404 i = (offset - APIC_OFFSET_TMR0) >> 2; 1405 reg = &lapic->tmr0; 1406 *data = *(reg + i); 1407 break; 1408 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1409 i = (offset - APIC_OFFSET_IRR0) >> 2; 1410 reg = &lapic->irr0; 1411 *data = atomic_load_acq_int(reg + i); 1412 break; 1413 case APIC_OFFSET_ESR: 1414 *data = lapic->esr; 1415 break; 1416 case APIC_OFFSET_ICR_LOW: 1417 *data = lapic->icr_lo; 1418 if (x2apic(vlapic)) 1419 *data |= (uint64_t)lapic->icr_hi << 32; 1420 break; 1421 case APIC_OFFSET_ICR_HI: 1422 *data = lapic->icr_hi; 1423 break; 1424 case APIC_OFFSET_CMCI_LVT: 1425 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1426 *data = vlapic_get_lvt(vlapic, offset); 1427 #ifdef INVARIANTS 1428 reg = vlapic_get_lvtptr(vlapic, offset); 1429 KASSERT(*data == *reg, ("inconsistent lvt value at " 1430 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1431 #endif 1432 break; 1433 case APIC_OFFSET_TIMER_ICR: 1434 *data = lapic->icr_timer; 1435 break; 1436 case APIC_OFFSET_TIMER_CCR: 1437 *data = vlapic_get_ccr(vlapic); 1438 break; 1439 case APIC_OFFSET_TIMER_DCR: 1440 *data = lapic->dcr_timer; 1441 break; 1442 case APIC_OFFSET_SELF_IPI: 1443 /* 1444 * XXX generate a GP fault if vlapic is in x2apic mode 1445 */ 1446 *data = 0; 1447 break; 1448 case APIC_OFFSET_RRR: 1449 default: 1450 *data = 0; 1451 break; 1452 } 1453 done: 1454 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1455 return 0; 1456 } 1457 1458 int 1459 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1460 uint64_t data, bool *retu) 1461 { 1462 struct LAPIC *lapic = vlapic->apic_page; 1463 uint32_t *regptr; 1464 int retval; 1465 1466 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1467 ("vlapic_write: invalid offset %#lx", offset)); 1468 1469 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1470 offset, data); 1471 1472 if (offset > sizeof(*lapic)) 1473 return (0); 1474 1475 /* Ignore MMIO accesses in x2APIC mode */ 1476 if (x2apic(vlapic) && mmio_access) { 1477 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1478 "in x2APIC mode", data, offset); 1479 return (0); 1480 } 1481 1482 /* 1483 * XXX Generate GP fault for MSR accesses in xAPIC mode 1484 */ 1485 if (!x2apic(vlapic) && !mmio_access) { 1486 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1487 "in xAPIC mode", data, offset); 1488 return (0); 1489 } 1490 1491 retval = 0; 1492 switch(offset) 1493 { 1494 case APIC_OFFSET_ID: 1495 lapic->id = data; 1496 vlapic_id_write_handler(vlapic); 1497 break; 1498 case APIC_OFFSET_TPR: 1499 vlapic_set_tpr(vlapic, data & 0xff); 1500 break; 1501 case APIC_OFFSET_EOI: 1502 vlapic_process_eoi(vlapic); 1503 break; 1504 case APIC_OFFSET_LDR: 1505 lapic->ldr = data; 1506 vlapic_ldr_write_handler(vlapic); 1507 break; 1508 case APIC_OFFSET_DFR: 1509 lapic->dfr = data; 1510 vlapic_dfr_write_handler(vlapic); 1511 break; 1512 case APIC_OFFSET_SVR: 1513 lapic->svr = data; 1514 vlapic_svr_write_handler(vlapic); 1515 break; 1516 case APIC_OFFSET_ICR_LOW: 1517 lapic->icr_lo = data; 1518 if (x2apic(vlapic)) 1519 lapic->icr_hi = data >> 32; 1520 retval = vlapic_icrlo_write_handler(vlapic, retu); 1521 break; 1522 case APIC_OFFSET_ICR_HI: 1523 lapic->icr_hi = data; 1524 break; 1525 case APIC_OFFSET_CMCI_LVT: 1526 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1527 regptr = vlapic_get_lvtptr(vlapic, offset); 1528 *regptr = data; 1529 vlapic_lvt_write_handler(vlapic, offset); 1530 break; 1531 case APIC_OFFSET_TIMER_ICR: 1532 lapic->icr_timer = data; 1533 vlapic_icrtmr_write_handler(vlapic); 1534 break; 1535 1536 case APIC_OFFSET_TIMER_DCR: 1537 lapic->dcr_timer = data; 1538 vlapic_dcr_write_handler(vlapic); 1539 break; 1540 1541 case APIC_OFFSET_ESR: 1542 vlapic_esr_write_handler(vlapic); 1543 break; 1544 1545 case APIC_OFFSET_SELF_IPI: 1546 if (x2apic(vlapic)) 1547 vlapic_self_ipi_handler(vlapic, data); 1548 break; 1549 1550 case APIC_OFFSET_VER: 1551 case APIC_OFFSET_APR: 1552 case APIC_OFFSET_PPR: 1553 case APIC_OFFSET_RRR: 1554 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1555 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1556 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1557 case APIC_OFFSET_TIMER_CCR: 1558 default: 1559 // Read only. 1560 break; 1561 } 1562 1563 return (retval); 1564 } 1565 1566 static void 1567 vlapic_reset(struct vlapic *vlapic) 1568 { 1569 struct LAPIC *lapic; 1570 1571 lapic = vlapic->apic_page; 1572 bzero(lapic, sizeof(struct LAPIC)); 1573 1574 lapic->id = vlapic_get_id(vlapic); 1575 lapic->version = VLAPIC_VERSION; 1576 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1577 lapic->dfr = 0xffffffff; 1578 lapic->svr = APIC_SVR_VECTOR; 1579 vlapic_mask_lvts(vlapic); 1580 vlapic_reset_tmr(vlapic); 1581 1582 lapic->dcr_timer = 0; 1583 vlapic_dcr_write_handler(vlapic); 1584 1585 vlapic->svr_last = lapic->svr; 1586 } 1587 1588 void 1589 vlapic_init(struct vlapic *vlapic) 1590 { 1591 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1592 KASSERT(vlapic->vcpuid >= 0 && 1593 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1594 ("vlapic_init: vcpuid is not initialized")); 1595 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1596 "initialized")); 1597 1598 /* 1599 * If the vlapic is configured in x2apic mode then it will be 1600 * accessed in the critical section via the MSR emulation code. 1601 * 1602 * Therefore the timer mutex must be a spinlock because blockable 1603 * mutexes cannot be acquired in a critical section. 1604 */ 1605 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1606 callout_init(&vlapic->callout, 1); 1607 1608 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1609 1610 if (vlapic->vcpuid == 0) 1611 vlapic->msr_apicbase |= APICBASE_BSP; 1612 1613 vlapic->ipi_exit = false; 1614 1615 vlapic_reset(vlapic); 1616 } 1617 1618 void 1619 vlapic_cleanup(struct vlapic *vlapic) 1620 { 1621 1622 callout_drain(&vlapic->callout); 1623 mtx_destroy(&vlapic->timer_mtx); 1624 } 1625 1626 uint64_t 1627 vlapic_get_apicbase(struct vlapic *vlapic) 1628 { 1629 1630 return (vlapic->msr_apicbase); 1631 } 1632 1633 int 1634 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1635 { 1636 1637 if (vlapic->msr_apicbase != new) { 1638 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1639 "not supported", vlapic->msr_apicbase, new); 1640 return (-1); 1641 } 1642 1643 return (0); 1644 } 1645 1646 void 1647 vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1648 { 1649 struct vlapic *vlapic; 1650 struct LAPIC *lapic; 1651 1652 vlapic = vm_lapic(vcpu); 1653 1654 if (state == X2APIC_DISABLED) 1655 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1656 else 1657 vlapic->msr_apicbase |= APICBASE_X2APIC; 1658 1659 /* 1660 * Reset the local APIC registers whose values are mode-dependent. 1661 * 1662 * XXX this works because the APIC mode can be changed only at vcpu 1663 * initialization time. 1664 */ 1665 lapic = vlapic->apic_page; 1666 lapic->id = vlapic_get_id(vlapic); 1667 if (x2apic(vlapic)) { 1668 lapic->ldr = x2apic_ldr(vlapic); 1669 lapic->dfr = 0; 1670 } else { 1671 lapic->ldr = 0; 1672 lapic->dfr = 0xffffffff; 1673 } 1674 1675 if (state == X2APIC_ENABLED) { 1676 if (vlapic->ops.enable_x2apic_mode) 1677 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1678 } 1679 } 1680 1681 void 1682 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1683 int delmode, int vec) 1684 { 1685 struct vcpu *vcpu; 1686 bool lowprio; 1687 int vcpuid; 1688 cpuset_t dmask; 1689 1690 if (delmode != IOART_DELFIXED && 1691 delmode != IOART_DELLOPRI && 1692 delmode != IOART_DELEXINT) { 1693 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1694 return; 1695 } 1696 lowprio = (delmode == IOART_DELLOPRI); 1697 1698 /* 1699 * We don't provide any virtual interrupt redirection hardware so 1700 * all interrupts originating from the ioapic or MSI specify the 1701 * 'dest' in the legacy xAPIC format. 1702 */ 1703 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1704 1705 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1706 vcpu = vm_vcpu(vm, vcpuid); 1707 if (delmode == IOART_DELEXINT) { 1708 vm_inject_extint(vcpu); 1709 } else { 1710 lapic_set_intr(vcpu, vec, level); 1711 } 1712 } 1713 } 1714 1715 void 1716 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1717 { 1718 /* 1719 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1720 * 1721 * This is done by leveraging features like Posted Interrupts (Intel) 1722 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1723 * 1724 * If neither of these features are available then fallback to 1725 * sending an IPI to 'hostcpu'. 1726 */ 1727 if (vlapic->ops.post_intr) 1728 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1729 else 1730 ipi_cpu(hostcpu, ipinum); 1731 } 1732 1733 bool 1734 vlapic_enabled(struct vlapic *vlapic) 1735 { 1736 struct LAPIC *lapic = vlapic->apic_page; 1737 1738 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1739 (lapic->svr & APIC_SVR_ENABLE) != 0) 1740 return (true); 1741 else 1742 return (false); 1743 } 1744 1745 static void 1746 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1747 { 1748 struct LAPIC *lapic; 1749 uint32_t *tmrptr, mask; 1750 int idx; 1751 1752 lapic = vlapic->apic_page; 1753 tmrptr = &lapic->tmr0; 1754 idx = (vector / 32) * 4; 1755 mask = 1 << (vector % 32); 1756 if (level) 1757 tmrptr[idx] |= mask; 1758 else 1759 tmrptr[idx] &= ~mask; 1760 1761 if (vlapic->ops.set_tmr != NULL) 1762 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1763 } 1764 1765 void 1766 vlapic_reset_tmr(struct vlapic *vlapic) 1767 { 1768 int vector; 1769 1770 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1771 1772 for (vector = 0; vector <= 255; vector++) 1773 vlapic_set_tmr(vlapic, vector, false); 1774 } 1775 1776 void 1777 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1778 int delmode, int vector) 1779 { 1780 cpuset_t dmask; 1781 bool lowprio; 1782 1783 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1784 1785 /* 1786 * A level trigger is valid only for fixed and lowprio delivery modes. 1787 */ 1788 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1789 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1790 "delivery-mode %d", delmode); 1791 return; 1792 } 1793 1794 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1795 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1796 1797 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1798 return; 1799 1800 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1801 vlapic_set_tmr(vlapic, vector, true); 1802 } 1803 1804 #ifdef BHYVE_SNAPSHOT 1805 static void 1806 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1807 { 1808 /* The implementation is similar to the one in the 1809 * `vlapic_icrtmr_write_handler` function 1810 */ 1811 sbintime_t sbt; 1812 struct bintime bt; 1813 1814 VLAPIC_TIMER_LOCK(vlapic); 1815 1816 bt = vlapic->timer_freq_bt; 1817 bintime_mul(&bt, ccr); 1818 1819 if (ccr != 0) { 1820 binuptime(&vlapic->timer_fire_bt); 1821 bintime_add(&vlapic->timer_fire_bt, &bt); 1822 1823 sbt = bttosbt(bt); 1824 vlapic_callout_reset(vlapic, sbt); 1825 } else { 1826 /* even if the CCR was 0, periodic timers should be reset */ 1827 if (vlapic_periodic_timer(vlapic)) { 1828 binuptime(&vlapic->timer_fire_bt); 1829 bintime_add(&vlapic->timer_fire_bt, 1830 &vlapic->timer_period_bt); 1831 sbt = bttosbt(vlapic->timer_period_bt); 1832 1833 callout_stop(&vlapic->callout); 1834 vlapic_callout_reset(vlapic, sbt); 1835 } 1836 } 1837 1838 VLAPIC_TIMER_UNLOCK(vlapic); 1839 } 1840 1841 int 1842 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1843 { 1844 int ret; 1845 struct vcpu *vcpu; 1846 struct vlapic *vlapic; 1847 struct LAPIC *lapic; 1848 uint32_t ccr; 1849 uint16_t i, maxcpus; 1850 1851 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1852 1853 ret = 0; 1854 1855 maxcpus = vm_get_maxcpus(vm); 1856 for (i = 0; i < maxcpus; i++) { 1857 vcpu = vm_vcpu(vm, i); 1858 if (vcpu == NULL) 1859 continue; 1860 vlapic = vm_lapic(vcpu); 1861 1862 /* snapshot the page first; timer period depends on icr_timer */ 1863 lapic = vlapic->apic_page; 1864 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1865 1866 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1867 1868 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1869 meta, ret, done); 1870 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1871 meta, ret, done); 1872 1873 /* 1874 * Timer period is equal to 'icr_timer' ticks at a frequency of 1875 * 'timer_freq_bt'. 1876 */ 1877 if (meta->op == VM_SNAPSHOT_RESTORE) { 1878 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1879 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1880 } 1881 1882 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1883 sizeof(vlapic->isrvec_stk), 1884 meta, ret, done); 1885 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1886 1887 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1888 sizeof(vlapic->lvt_last), 1889 meta, ret, done); 1890 1891 if (meta->op == VM_SNAPSHOT_SAVE) 1892 ccr = vlapic_get_ccr(vlapic); 1893 1894 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1895 1896 if (meta->op == VM_SNAPSHOT_RESTORE && 1897 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1898 /* Reset the value of the 'timer_fire_bt' and the vlapic 1899 * callout based on the value of the current count 1900 * register saved when the VM snapshot was created. 1901 * If initial count register is 0, timer is not used. 1902 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1903 */ 1904 vlapic_reset_callout(vlapic, ccr); 1905 } 1906 } 1907 1908 done: 1909 return (ret); 1910 } 1911 #endif 1912