1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bhyve_snapshot.h" 36 37 #include <sys/param.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/systm.h> 43 #include <sys/smp.h> 44 45 #include <x86/specialreg.h> 46 #include <x86/apicreg.h> 47 48 #include <machine/clock.h> 49 #include <machine/smp.h> 50 51 #include <machine/vmm.h> 52 #include <machine/vmm_snapshot.h> 53 54 #include "vmm_lapic.h" 55 #include "vmm_ktr.h" 56 #include "vmm_stat.h" 57 58 #include "vlapic.h" 59 #include "vlapic_priv.h" 60 #include "vioapic.h" 61 62 #define PRIO(x) ((x) >> 4) 63 64 #define VLAPIC_VERSION (0x14) 65 66 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 67 68 /* 69 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 70 * vlapic_callout_handler() and vcpu accesses to: 71 * - timer_freq_bt, timer_period_bt, timer_fire_bt 72 * - timer LVT register 73 */ 74 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 75 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 76 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 77 78 /* 79 * APIC timer frequency: 80 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 81 * - power-of-two to avoid loss of precision when converted to a bintime. 82 */ 83 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 84 85 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 86 static void vlapic_callout_handler(void *arg); 87 static void vlapic_reset(struct vlapic *vlapic); 88 89 static __inline uint32_t 90 vlapic_get_id(struct vlapic *vlapic) 91 { 92 93 if (x2apic(vlapic)) 94 return (vlapic->vcpuid); 95 else 96 return (vlapic->vcpuid << 24); 97 } 98 99 static uint32_t 100 x2apic_ldr(struct vlapic *vlapic) 101 { 102 int apicid; 103 uint32_t ldr; 104 105 apicid = vlapic_get_id(vlapic); 106 ldr = 1 << (apicid & 0xf); 107 ldr |= (apicid & 0xffff0) << 12; 108 return (ldr); 109 } 110 111 void 112 vlapic_dfr_write_handler(struct vlapic *vlapic) 113 { 114 struct LAPIC *lapic; 115 116 lapic = vlapic->apic_page; 117 if (x2apic(vlapic)) { 118 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 119 lapic->dfr); 120 lapic->dfr = 0; 121 return; 122 } 123 124 lapic->dfr &= APIC_DFR_MODEL_MASK; 125 lapic->dfr |= APIC_DFR_RESERVED; 126 127 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 128 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 129 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 130 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 131 else 132 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 133 } 134 135 void 136 vlapic_ldr_write_handler(struct vlapic *vlapic) 137 { 138 struct LAPIC *lapic; 139 140 lapic = vlapic->apic_page; 141 142 /* LDR is read-only in x2apic mode */ 143 if (x2apic(vlapic)) { 144 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 145 lapic->ldr); 146 lapic->ldr = x2apic_ldr(vlapic); 147 } else { 148 lapic->ldr &= ~APIC_LDR_RESERVED; 149 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 150 } 151 } 152 153 void 154 vlapic_id_write_handler(struct vlapic *vlapic) 155 { 156 struct LAPIC *lapic; 157 158 /* 159 * We don't allow the ID register to be modified so reset it back to 160 * its default value. 161 */ 162 lapic = vlapic->apic_page; 163 lapic->id = vlapic_get_id(vlapic); 164 } 165 166 static int 167 vlapic_timer_divisor(uint32_t dcr) 168 { 169 switch (dcr & 0xB) { 170 case APIC_TDCR_1: 171 return (1); 172 case APIC_TDCR_2: 173 return (2); 174 case APIC_TDCR_4: 175 return (4); 176 case APIC_TDCR_8: 177 return (8); 178 case APIC_TDCR_16: 179 return (16); 180 case APIC_TDCR_32: 181 return (32); 182 case APIC_TDCR_64: 183 return (64); 184 case APIC_TDCR_128: 185 return (128); 186 default: 187 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 188 } 189 } 190 191 #if 0 192 static inline void 193 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 194 { 195 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 196 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 197 *lvt & APIC_LVTT_M); 198 } 199 #endif 200 201 static uint32_t 202 vlapic_get_ccr(struct vlapic *vlapic) 203 { 204 struct bintime bt_now, bt_rem; 205 struct LAPIC *lapic __diagused; 206 uint32_t ccr; 207 208 ccr = 0; 209 lapic = vlapic->apic_page; 210 211 VLAPIC_TIMER_LOCK(vlapic); 212 if (callout_active(&vlapic->callout)) { 213 /* 214 * If the timer is scheduled to expire in the future then 215 * compute the value of 'ccr' based on the remaining time. 216 */ 217 binuptime(&bt_now); 218 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 219 bt_rem = vlapic->timer_fire_bt; 220 bintime_sub(&bt_rem, &bt_now); 221 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 222 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 223 } 224 } 225 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 226 "icr_timer is %#x", ccr, lapic->icr_timer)); 227 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 228 ccr, lapic->icr_timer); 229 VLAPIC_TIMER_UNLOCK(vlapic); 230 return (ccr); 231 } 232 233 void 234 vlapic_dcr_write_handler(struct vlapic *vlapic) 235 { 236 struct LAPIC *lapic; 237 int divisor; 238 239 lapic = vlapic->apic_page; 240 VLAPIC_TIMER_LOCK(vlapic); 241 242 divisor = vlapic_timer_divisor(lapic->dcr_timer); 243 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 244 lapic->dcr_timer, divisor); 245 246 /* 247 * Update the timer frequency and the timer period. 248 * 249 * XXX changes to the frequency divider will not take effect until 250 * the timer is reloaded. 251 */ 252 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 253 vlapic->timer_period_bt = vlapic->timer_freq_bt; 254 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 255 256 VLAPIC_TIMER_UNLOCK(vlapic); 257 } 258 259 void 260 vlapic_esr_write_handler(struct vlapic *vlapic) 261 { 262 struct LAPIC *lapic; 263 264 lapic = vlapic->apic_page; 265 lapic->esr = vlapic->esr_pending; 266 vlapic->esr_pending = 0; 267 } 268 269 int 270 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 271 { 272 struct LAPIC *lapic; 273 uint32_t *irrptr, *tmrptr, mask; 274 int idx; 275 276 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 277 278 lapic = vlapic->apic_page; 279 if (!(lapic->svr & APIC_SVR_ENABLE)) { 280 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 281 "interrupt %d", vector); 282 return (0); 283 } 284 285 if (vector < 16) { 286 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 287 false); 288 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 289 vector); 290 return (1); 291 } 292 293 if (vlapic->ops.set_intr_ready) 294 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 295 296 idx = (vector / 32) * 4; 297 mask = 1 << (vector % 32); 298 299 irrptr = &lapic->irr0; 300 atomic_set_int(&irrptr[idx], mask); 301 302 /* 303 * Verify that the trigger-mode of the interrupt matches with 304 * the vlapic TMR registers. 305 */ 306 tmrptr = &lapic->tmr0; 307 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 308 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 309 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 310 level ? "level" : "edge"); 311 } 312 313 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 314 return (1); 315 } 316 317 static __inline uint32_t * 318 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 319 { 320 struct LAPIC *lapic = vlapic->apic_page; 321 int i; 322 323 switch (offset) { 324 case APIC_OFFSET_CMCI_LVT: 325 return (&lapic->lvt_cmci); 326 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 327 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 328 return ((&lapic->lvt_timer) + i); 329 default: 330 panic("vlapic_get_lvt: invalid LVT\n"); 331 } 332 } 333 334 static __inline int 335 lvt_off_to_idx(uint32_t offset) 336 { 337 int index; 338 339 switch (offset) { 340 case APIC_OFFSET_CMCI_LVT: 341 index = APIC_LVT_CMCI; 342 break; 343 case APIC_OFFSET_TIMER_LVT: 344 index = APIC_LVT_TIMER; 345 break; 346 case APIC_OFFSET_THERM_LVT: 347 index = APIC_LVT_THERMAL; 348 break; 349 case APIC_OFFSET_PERF_LVT: 350 index = APIC_LVT_PMC; 351 break; 352 case APIC_OFFSET_LINT0_LVT: 353 index = APIC_LVT_LINT0; 354 break; 355 case APIC_OFFSET_LINT1_LVT: 356 index = APIC_LVT_LINT1; 357 break; 358 case APIC_OFFSET_ERROR_LVT: 359 index = APIC_LVT_ERROR; 360 break; 361 default: 362 index = -1; 363 break; 364 } 365 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 366 "invalid lvt index %d for offset %#x", index, offset)); 367 368 return (index); 369 } 370 371 static __inline uint32_t 372 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 373 { 374 int idx; 375 uint32_t val; 376 377 idx = lvt_off_to_idx(offset); 378 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 379 return (val); 380 } 381 382 void 383 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 384 { 385 uint32_t *lvtptr, mask, val; 386 struct LAPIC *lapic; 387 int idx; 388 389 lapic = vlapic->apic_page; 390 lvtptr = vlapic_get_lvtptr(vlapic, offset); 391 val = *lvtptr; 392 idx = lvt_off_to_idx(offset); 393 394 if (!(lapic->svr & APIC_SVR_ENABLE)) 395 val |= APIC_LVT_M; 396 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 397 switch (offset) { 398 case APIC_OFFSET_TIMER_LVT: 399 mask |= APIC_LVTT_TM; 400 break; 401 case APIC_OFFSET_ERROR_LVT: 402 break; 403 case APIC_OFFSET_LINT0_LVT: 404 case APIC_OFFSET_LINT1_LVT: 405 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 406 /* FALLTHROUGH */ 407 default: 408 mask |= APIC_LVT_DM; 409 break; 410 } 411 val &= mask; 412 *lvtptr = val; 413 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 414 } 415 416 static void 417 vlapic_mask_lvts(struct vlapic *vlapic) 418 { 419 struct LAPIC *lapic = vlapic->apic_page; 420 421 lapic->lvt_cmci |= APIC_LVT_M; 422 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 423 424 lapic->lvt_timer |= APIC_LVT_M; 425 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 426 427 lapic->lvt_thermal |= APIC_LVT_M; 428 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 429 430 lapic->lvt_pcint |= APIC_LVT_M; 431 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 432 433 lapic->lvt_lint0 |= APIC_LVT_M; 434 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 435 436 lapic->lvt_lint1 |= APIC_LVT_M; 437 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 438 439 lapic->lvt_error |= APIC_LVT_M; 440 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 441 } 442 443 static int 444 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 445 { 446 uint32_t mode, reg, vec; 447 448 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 449 450 if (reg & APIC_LVT_M) 451 return (0); 452 vec = reg & APIC_LVT_VECTOR; 453 mode = reg & APIC_LVT_DM; 454 455 switch (mode) { 456 case APIC_LVT_DM_FIXED: 457 if (vec < 16) { 458 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 459 lvt == APIC_LVT_ERROR); 460 return (0); 461 } 462 if (vlapic_set_intr_ready(vlapic, vec, false)) 463 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 464 break; 465 case APIC_LVT_DM_NMI: 466 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 467 break; 468 case APIC_LVT_DM_EXTINT: 469 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 470 break; 471 default: 472 // Other modes ignored 473 return (0); 474 } 475 return (1); 476 } 477 478 #if 1 479 static void 480 dump_isrvec_stk(struct vlapic *vlapic) 481 { 482 int i; 483 uint32_t *isrptr; 484 485 isrptr = &vlapic->apic_page->isr0; 486 for (i = 0; i < 8; i++) 487 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 488 489 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 490 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 491 } 492 #endif 493 494 /* 495 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 496 * in Intel Architecture Manual Vol 3a. 497 */ 498 static void 499 vlapic_update_ppr(struct vlapic *vlapic) 500 { 501 int isrvec, tpr, ppr; 502 503 /* 504 * Note that the value on the stack at index 0 is always 0. 505 * 506 * This is a placeholder for the value of ISRV when none of the 507 * bits is set in the ISRx registers. 508 */ 509 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 510 tpr = vlapic->apic_page->tpr; 511 512 #if 1 513 { 514 int i, lastprio, curprio, vector, idx; 515 uint32_t *isrptr; 516 517 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 518 panic("isrvec_stk is corrupted: %d", isrvec); 519 520 /* 521 * Make sure that the priority of the nested interrupts is 522 * always increasing. 523 */ 524 lastprio = -1; 525 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 526 curprio = PRIO(vlapic->isrvec_stk[i]); 527 if (curprio <= lastprio) { 528 dump_isrvec_stk(vlapic); 529 panic("isrvec_stk does not satisfy invariant"); 530 } 531 lastprio = curprio; 532 } 533 534 /* 535 * Make sure that each bit set in the ISRx registers has a 536 * corresponding entry on the isrvec stack. 537 */ 538 i = 1; 539 isrptr = &vlapic->apic_page->isr0; 540 for (vector = 0; vector < 256; vector++) { 541 idx = (vector / 32) * 4; 542 if (isrptr[idx] & (1 << (vector % 32))) { 543 if (i > vlapic->isrvec_stk_top || 544 vlapic->isrvec_stk[i] != vector) { 545 dump_isrvec_stk(vlapic); 546 panic("ISR and isrvec_stk out of sync"); 547 } 548 i++; 549 } 550 } 551 } 552 #endif 553 554 if (PRIO(tpr) >= PRIO(isrvec)) 555 ppr = tpr; 556 else 557 ppr = isrvec & 0xf0; 558 559 vlapic->apic_page->ppr = ppr; 560 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 561 } 562 563 void 564 vlapic_sync_tpr(struct vlapic *vlapic) 565 { 566 vlapic_update_ppr(vlapic); 567 } 568 569 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 570 571 static void 572 vlapic_process_eoi(struct vlapic *vlapic) 573 { 574 struct LAPIC *lapic = vlapic->apic_page; 575 uint32_t *isrptr, *tmrptr; 576 int i, idx, bitpos, vector; 577 578 isrptr = &lapic->isr0; 579 tmrptr = &lapic->tmr0; 580 581 for (i = 7; i >= 0; i--) { 582 idx = i * 4; 583 bitpos = fls(isrptr[idx]); 584 if (bitpos-- != 0) { 585 if (vlapic->isrvec_stk_top <= 0) { 586 panic("invalid vlapic isrvec_stk_top %d", 587 vlapic->isrvec_stk_top); 588 } 589 isrptr[idx] &= ~(1 << bitpos); 590 vector = i * 32 + bitpos; 591 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 592 vector); 593 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 594 vlapic->isrvec_stk_top--; 595 vlapic_update_ppr(vlapic); 596 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 597 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 598 vector); 599 } 600 return; 601 } 602 } 603 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 604 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 605 } 606 607 static __inline int 608 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 609 { 610 611 return (lvt & mask); 612 } 613 614 static __inline int 615 vlapic_periodic_timer(struct vlapic *vlapic) 616 { 617 uint32_t lvt; 618 619 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 620 621 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 622 } 623 624 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 625 626 static void 627 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 628 { 629 630 vlapic->esr_pending |= mask; 631 632 /* 633 * Avoid infinite recursion if the error LVT itself is configured with 634 * an illegal vector. 635 */ 636 if (lvt_error) 637 return; 638 639 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 640 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 641 } 642 } 643 644 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 645 646 static void 647 vlapic_fire_timer(struct vlapic *vlapic) 648 { 649 650 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 651 652 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 653 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 654 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 655 } 656 } 657 658 static VMM_STAT(VLAPIC_INTR_CMC, 659 "corrected machine check interrupts generated by vlapic"); 660 661 void 662 vlapic_fire_cmci(struct vlapic *vlapic) 663 { 664 665 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 666 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 667 } 668 } 669 670 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 671 "lvts triggered"); 672 673 int 674 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 675 { 676 677 if (vlapic_enabled(vlapic) == false) { 678 /* 679 * When the local APIC is global/hardware disabled, 680 * LINT[1:0] pins are configured as INTR and NMI pins, 681 * respectively. 682 */ 683 switch (vector) { 684 case APIC_LVT_LINT0: 685 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 686 break; 687 case APIC_LVT_LINT1: 688 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 689 break; 690 default: 691 break; 692 } 693 return (0); 694 } 695 696 switch (vector) { 697 case APIC_LVT_LINT0: 698 case APIC_LVT_LINT1: 699 case APIC_LVT_TIMER: 700 case APIC_LVT_ERROR: 701 case APIC_LVT_PMC: 702 case APIC_LVT_THERMAL: 703 case APIC_LVT_CMCI: 704 if (vlapic_fire_lvt(vlapic, vector)) { 705 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 706 LVTS_TRIGGERRED, vector, 1); 707 } 708 break; 709 default: 710 return (EINVAL); 711 } 712 return (0); 713 } 714 715 static void 716 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 717 { 718 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 719 vlapic_callout_handler, vlapic, 0); 720 } 721 722 static void 723 vlapic_callout_handler(void *arg) 724 { 725 struct vlapic *vlapic; 726 struct bintime bt, btnow; 727 sbintime_t rem_sbt; 728 729 vlapic = arg; 730 731 VLAPIC_TIMER_LOCK(vlapic); 732 if (callout_pending(&vlapic->callout)) /* callout was reset */ 733 goto done; 734 735 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 736 goto done; 737 738 callout_deactivate(&vlapic->callout); 739 740 vlapic_fire_timer(vlapic); 741 742 if (vlapic_periodic_timer(vlapic)) { 743 binuptime(&btnow); 744 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 745 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 746 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 747 vlapic->timer_fire_bt.frac)); 748 749 /* 750 * Compute the delta between when the timer was supposed to 751 * fire and the present time. 752 */ 753 bt = btnow; 754 bintime_sub(&bt, &vlapic->timer_fire_bt); 755 756 rem_sbt = bttosbt(vlapic->timer_period_bt); 757 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 758 /* 759 * Adjust the time until the next countdown downward 760 * to account for the lost time. 761 */ 762 rem_sbt -= bttosbt(bt); 763 } else { 764 /* 765 * If the delta is greater than the timer period then 766 * just reset our time base instead of trying to catch 767 * up. 768 */ 769 vlapic->timer_fire_bt = btnow; 770 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 771 "usecs, period is %lu usecs - resetting time base", 772 bttosbt(bt) / SBT_1US, 773 bttosbt(vlapic->timer_period_bt) / SBT_1US); 774 } 775 776 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 777 vlapic_callout_reset(vlapic, rem_sbt); 778 } 779 done: 780 VLAPIC_TIMER_UNLOCK(vlapic); 781 } 782 783 void 784 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 785 { 786 struct LAPIC *lapic; 787 sbintime_t sbt; 788 uint32_t icr_timer; 789 790 VLAPIC_TIMER_LOCK(vlapic); 791 792 lapic = vlapic->apic_page; 793 icr_timer = lapic->icr_timer; 794 795 vlapic->timer_period_bt = vlapic->timer_freq_bt; 796 bintime_mul(&vlapic->timer_period_bt, icr_timer); 797 798 if (icr_timer != 0) { 799 binuptime(&vlapic->timer_fire_bt); 800 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 801 802 sbt = bttosbt(vlapic->timer_period_bt); 803 vlapic_callout_reset(vlapic, sbt); 804 } else 805 callout_stop(&vlapic->callout); 806 807 VLAPIC_TIMER_UNLOCK(vlapic); 808 } 809 810 /* 811 * This function populates 'dmask' with the set of vcpus that match the 812 * addressing specified by the (dest, phys, lowprio) tuple. 813 * 814 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 815 * or xAPIC (8-bit) destination field. 816 */ 817 static void 818 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 819 bool lowprio, bool x2apic_dest) 820 { 821 struct vlapic *vlapic; 822 uint32_t dfr, ldr, ldest, cluster; 823 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 824 cpuset_t amask; 825 int vcpuid; 826 827 if ((x2apic_dest && dest == 0xffffffff) || 828 (!x2apic_dest && dest == 0xff)) { 829 /* 830 * Broadcast in both logical and physical modes. 831 */ 832 *dmask = vm_active_cpus(vm); 833 return; 834 } 835 836 if (phys) { 837 /* 838 * Physical mode: destination is APIC ID. 839 */ 840 CPU_ZERO(dmask); 841 vcpuid = vm_apicid2vcpuid(vm, dest); 842 amask = vm_active_cpus(vm); 843 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 844 CPU_SET(vcpuid, dmask); 845 } else { 846 /* 847 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 848 * bitmask. This model is only available in the xAPIC mode. 849 */ 850 mda_flat_ldest = dest & 0xff; 851 852 /* 853 * In the "Cluster Model" the MDA is used to identify a 854 * specific cluster and a set of APICs in that cluster. 855 */ 856 if (x2apic_dest) { 857 mda_cluster_id = dest >> 16; 858 mda_cluster_ldest = dest & 0xffff; 859 } else { 860 mda_cluster_id = (dest >> 4) & 0xf; 861 mda_cluster_ldest = dest & 0xf; 862 } 863 864 /* 865 * Logical mode: match each APIC that has a bit set 866 * in its LDR that matches a bit in the ldest. 867 */ 868 CPU_ZERO(dmask); 869 amask = vm_active_cpus(vm); 870 CPU_FOREACH_ISSET(vcpuid, &amask) { 871 vlapic = vm_lapic(vm, vcpuid); 872 dfr = vlapic->apic_page->dfr; 873 ldr = vlapic->apic_page->ldr; 874 875 if ((dfr & APIC_DFR_MODEL_MASK) == 876 APIC_DFR_MODEL_FLAT) { 877 ldest = ldr >> 24; 878 mda_ldest = mda_flat_ldest; 879 } else if ((dfr & APIC_DFR_MODEL_MASK) == 880 APIC_DFR_MODEL_CLUSTER) { 881 if (x2apic(vlapic)) { 882 cluster = ldr >> 16; 883 ldest = ldr & 0xffff; 884 } else { 885 cluster = ldr >> 28; 886 ldest = (ldr >> 24) & 0xf; 887 } 888 if (cluster != mda_cluster_id) 889 continue; 890 mda_ldest = mda_cluster_ldest; 891 } else { 892 /* 893 * Guest has configured a bad logical 894 * model for this vcpu - skip it. 895 */ 896 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 897 "model %x - cannot deliver interrupt", dfr); 898 continue; 899 } 900 901 if ((mda_ldest & ldest) != 0) { 902 CPU_SET(vcpuid, dmask); 903 if (lowprio) 904 break; 905 } 906 } 907 } 908 } 909 910 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 911 912 static void 913 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 914 { 915 struct LAPIC *lapic = vlapic->apic_page; 916 917 if (lapic->tpr != val) { 918 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 919 "from %#x to %#x", lapic->tpr, val); 920 lapic->tpr = val; 921 vlapic_update_ppr(vlapic); 922 } 923 } 924 925 static uint8_t 926 vlapic_get_tpr(struct vlapic *vlapic) 927 { 928 struct LAPIC *lapic = vlapic->apic_page; 929 930 return (lapic->tpr); 931 } 932 933 void 934 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 935 { 936 uint8_t tpr; 937 938 if (val & ~0xf) { 939 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 940 return; 941 } 942 943 tpr = val << 4; 944 vlapic_set_tpr(vlapic, tpr); 945 } 946 947 uint64_t 948 vlapic_get_cr8(struct vlapic *vlapic) 949 { 950 uint8_t tpr; 951 952 tpr = vlapic_get_tpr(vlapic); 953 return (tpr >> 4); 954 } 955 956 static bool 957 vlapic_is_icr_valid(uint64_t icrval) 958 { 959 uint32_t mode = icrval & APIC_DELMODE_MASK; 960 uint32_t level = icrval & APIC_LEVEL_MASK; 961 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 962 uint32_t shorthand = icrval & APIC_DEST_MASK; 963 964 switch (mode) { 965 case APIC_DELMODE_FIXED: 966 if (trigger == APIC_TRIGMOD_EDGE) 967 return (true); 968 /* 969 * AMD allows a level assert IPI and Intel converts a level 970 * assert IPI into an edge IPI. 971 */ 972 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 973 return (true); 974 break; 975 case APIC_DELMODE_LOWPRIO: 976 case APIC_DELMODE_SMI: 977 case APIC_DELMODE_NMI: 978 case APIC_DELMODE_INIT: 979 if (trigger == APIC_TRIGMOD_EDGE && 980 (shorthand == APIC_DEST_DESTFLD || 981 shorthand == APIC_DEST_ALLESELF)) 982 return (true); 983 /* 984 * AMD allows a level assert IPI and Intel converts a level 985 * assert IPI into an edge IPI. 986 */ 987 if (trigger == APIC_TRIGMOD_LEVEL && 988 level == APIC_LEVEL_ASSERT && 989 (shorthand == APIC_DEST_DESTFLD || 990 shorthand == APIC_DEST_ALLESELF)) 991 return (true); 992 /* 993 * An level triggered deassert INIT is defined in the Intel 994 * Multiprocessor Specification and the Intel Software Developer 995 * Manual. Due to the MPS it's required to send a level assert 996 * INIT to a cpu and then a level deassert INIT. Some operating 997 * systems e.g. FreeBSD or Linux use that algorithm. According 998 * to the SDM a level deassert INIT is only supported by Pentium 999 * and P6 processors. It's always send to all cpus regardless of 1000 * the destination or shorthand field. It resets the arbitration 1001 * id register. This register is not software accessible and 1002 * only required for the APIC bus arbitration. So, the level 1003 * deassert INIT doesn't need any emulation and we should ignore 1004 * it. The SDM also defines that newer processors don't support 1005 * the level deassert INIT and it's not valid any more. As it's 1006 * defined for older systems, it can't be invalid per se. 1007 * Otherwise, backward compatibility would be broken. However, 1008 * when returning false here, it'll be ignored which is the 1009 * desired behaviour. 1010 */ 1011 if (mode == APIC_DELMODE_INIT && 1012 trigger == APIC_TRIGMOD_LEVEL && 1013 level == APIC_LEVEL_DEASSERT) 1014 return (false); 1015 break; 1016 case APIC_DELMODE_STARTUP: 1017 if (shorthand == APIC_DEST_DESTFLD || 1018 shorthand == APIC_DEST_ALLESELF) 1019 return (true); 1020 break; 1021 case APIC_DELMODE_RR: 1022 /* Only available on AMD! */ 1023 if (trigger == APIC_TRIGMOD_EDGE && 1024 shorthand == APIC_DEST_DESTFLD) 1025 return (true); 1026 break; 1027 case APIC_DELMODE_RESV: 1028 return (false); 1029 default: 1030 __assert_unreachable(); 1031 } 1032 1033 return (false); 1034 } 1035 1036 int 1037 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 1038 { 1039 int i; 1040 bool phys; 1041 cpuset_t dmask, ipimask; 1042 uint64_t icrval; 1043 uint32_t dest, vec, mode, shorthand; 1044 struct vlapic *vlapic2; 1045 struct vm_exit *vmexit; 1046 struct LAPIC *lapic; 1047 1048 lapic = vlapic->apic_page; 1049 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1050 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1051 1052 if (x2apic(vlapic)) 1053 dest = icrval >> 32; 1054 else 1055 dest = icrval >> (32 + 24); 1056 vec = icrval & APIC_VECTOR_MASK; 1057 mode = icrval & APIC_DELMODE_MASK; 1058 phys = (icrval & APIC_DESTMODE_LOG) == 0; 1059 shorthand = icrval & APIC_DEST_MASK; 1060 1061 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 1062 1063 switch (shorthand) { 1064 case APIC_DEST_DESTFLD: 1065 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); 1066 break; 1067 case APIC_DEST_SELF: 1068 CPU_SETOF(vlapic->vcpuid, &dmask); 1069 break; 1070 case APIC_DEST_ALLISELF: 1071 dmask = vm_active_cpus(vlapic->vm); 1072 break; 1073 case APIC_DEST_ALLESELF: 1074 dmask = vm_active_cpus(vlapic->vm); 1075 CPU_CLR(vlapic->vcpuid, &dmask); 1076 break; 1077 default: 1078 __assert_unreachable(); 1079 } 1080 1081 /* 1082 * Ignore invalid combinations of the icr. 1083 */ 1084 if (!vlapic_is_icr_valid(icrval)) { 1085 VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval); 1086 return (0); 1087 } 1088 1089 /* 1090 * ipimask is a set of vCPUs needing userland handling of the current 1091 * IPI. 1092 */ 1093 CPU_ZERO(&ipimask); 1094 1095 switch (mode) { 1096 case APIC_DELMODE_FIXED: 1097 if (vec < 16) { 1098 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 1099 false); 1100 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 1101 return (0); 1102 } 1103 1104 CPU_FOREACH_ISSET(i, &dmask) { 1105 lapic_intr_edge(vlapic->vm, i, vec); 1106 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1107 IPIS_SENT, i, 1); 1108 VLAPIC_CTR2(vlapic, 1109 "vlapic sending ipi %d to vcpuid %d", vec, i); 1110 } 1111 1112 break; 1113 case APIC_DELMODE_NMI: 1114 CPU_FOREACH_ISSET(i, &dmask) { 1115 vm_inject_nmi(vlapic->vm, i); 1116 VLAPIC_CTR1(vlapic, 1117 "vlapic sending ipi nmi to vcpuid %d", i); 1118 } 1119 1120 break; 1121 case APIC_DELMODE_INIT: 1122 CPU_FOREACH_ISSET(i, &dmask) { 1123 /* 1124 * Userland which doesn't support the IPI exit requires 1125 * that the boot state is set to SIPI here. 1126 */ 1127 vlapic2 = vm_lapic(vlapic->vm, i); 1128 vlapic2->boot_state = BS_SIPI; 1129 CPU_SET(i, &ipimask); 1130 } 1131 1132 break; 1133 case APIC_DELMODE_STARTUP: 1134 CPU_FOREACH_ISSET(i, &dmask) { 1135 vlapic2 = vm_lapic(vlapic->vm, i); 1136 /* 1137 * Ignore SIPIs in any state other than wait-for-SIPI 1138 */ 1139 if (vlapic2->boot_state != BS_SIPI) 1140 continue; 1141 vlapic2->boot_state = BS_RUNNING; 1142 CPU_SET(i, &ipimask); 1143 } 1144 1145 break; 1146 default: 1147 return (1); 1148 } 1149 1150 if (!CPU_EMPTY(&ipimask)) { 1151 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1152 vmexit->exitcode = VM_EXITCODE_IPI; 1153 vmexit->u.ipi.mode = mode; 1154 vmexit->u.ipi.vector = vec; 1155 vmexit->u.ipi.dmask = dmask; 1156 1157 *retu = true; 1158 1159 /* 1160 * Old bhyve versions don't support the IPI exit. Translate it 1161 * into the old style. 1162 */ 1163 if (!vlapic->ipi_exit) { 1164 if (mode == APIC_DELMODE_STARTUP) { 1165 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1166 vmexit->u.spinup_ap.vcpu = CPU_FFS(&ipimask) - 1; 1167 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1168 } else { 1169 *retu = false; 1170 } 1171 } 1172 } 1173 1174 return (0); 1175 } 1176 1177 static void 1178 vlapic_handle_init(struct vm *vm, int vcpuid, void *arg) 1179 { 1180 struct vlapic *vlapic = vm_lapic(vm, vcpuid); 1181 1182 vlapic_reset(vlapic); 1183 1184 /* vlapic_reset modifies the boot state. */ 1185 vlapic->boot_state = BS_SIPI; 1186 } 1187 1188 int 1189 vm_handle_ipi(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu) 1190 { 1191 *retu = true; 1192 switch (vme->u.ipi.mode) { 1193 case APIC_DELMODE_INIT: 1194 vm_smp_rendezvous(vm, vcpuid, vme->u.ipi.dmask, 1195 vlapic_handle_init, NULL); 1196 break; 1197 case APIC_DELMODE_STARTUP: 1198 break; 1199 default: 1200 return (1); 1201 } 1202 1203 return (0); 1204 } 1205 1206 void 1207 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1208 { 1209 int vec; 1210 1211 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1212 1213 vec = val & 0xff; 1214 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1215 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1216 vlapic->vcpuid, 1); 1217 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1218 } 1219 1220 int 1221 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1222 { 1223 struct LAPIC *lapic = vlapic->apic_page; 1224 int idx, i, bitpos, vector; 1225 uint32_t *irrptr, val; 1226 1227 vlapic_update_ppr(vlapic); 1228 1229 if (vlapic->ops.pending_intr) 1230 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1231 1232 irrptr = &lapic->irr0; 1233 1234 for (i = 7; i >= 0; i--) { 1235 idx = i * 4; 1236 val = atomic_load_acq_int(&irrptr[idx]); 1237 bitpos = fls(val); 1238 if (bitpos != 0) { 1239 vector = i * 32 + (bitpos - 1); 1240 if (PRIO(vector) > PRIO(lapic->ppr)) { 1241 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1242 if (vecptr != NULL) 1243 *vecptr = vector; 1244 return (1); 1245 } else 1246 break; 1247 } 1248 } 1249 return (0); 1250 } 1251 1252 void 1253 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1254 { 1255 struct LAPIC *lapic = vlapic->apic_page; 1256 uint32_t *irrptr, *isrptr; 1257 int idx, stk_top; 1258 1259 if (vlapic->ops.intr_accepted) 1260 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1261 1262 /* 1263 * clear the ready bit for vector being accepted in irr 1264 * and set the vector as in service in isr. 1265 */ 1266 idx = (vector / 32) * 4; 1267 1268 irrptr = &lapic->irr0; 1269 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1270 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1271 1272 isrptr = &lapic->isr0; 1273 isrptr[idx] |= 1 << (vector % 32); 1274 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1275 1276 /* 1277 * Update the PPR 1278 */ 1279 vlapic->isrvec_stk_top++; 1280 1281 stk_top = vlapic->isrvec_stk_top; 1282 if (stk_top >= ISRVEC_STK_SIZE) 1283 panic("isrvec_stk_top overflow %d", stk_top); 1284 1285 vlapic->isrvec_stk[stk_top] = vector; 1286 } 1287 1288 void 1289 vlapic_svr_write_handler(struct vlapic *vlapic) 1290 { 1291 struct LAPIC *lapic; 1292 uint32_t old, new, changed; 1293 1294 lapic = vlapic->apic_page; 1295 1296 new = lapic->svr; 1297 old = vlapic->svr_last; 1298 vlapic->svr_last = new; 1299 1300 changed = old ^ new; 1301 if ((changed & APIC_SVR_ENABLE) != 0) { 1302 if ((new & APIC_SVR_ENABLE) == 0) { 1303 /* 1304 * The apic is now disabled so stop the apic timer 1305 * and mask all the LVT entries. 1306 */ 1307 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1308 VLAPIC_TIMER_LOCK(vlapic); 1309 callout_stop(&vlapic->callout); 1310 VLAPIC_TIMER_UNLOCK(vlapic); 1311 vlapic_mask_lvts(vlapic); 1312 } else { 1313 /* 1314 * The apic is now enabled so restart the apic timer 1315 * if it is configured in periodic mode. 1316 */ 1317 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1318 if (vlapic_periodic_timer(vlapic)) 1319 vlapic_icrtmr_write_handler(vlapic); 1320 } 1321 } 1322 } 1323 1324 int 1325 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1326 uint64_t *data, bool *retu) 1327 { 1328 struct LAPIC *lapic = vlapic->apic_page; 1329 uint32_t *reg; 1330 int i; 1331 1332 /* Ignore MMIO accesses in x2APIC mode */ 1333 if (x2apic(vlapic) && mmio_access) { 1334 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1335 offset); 1336 *data = 0; 1337 goto done; 1338 } 1339 1340 if (!x2apic(vlapic) && !mmio_access) { 1341 /* 1342 * XXX Generate GP fault for MSR accesses in xAPIC mode 1343 */ 1344 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1345 "xAPIC mode", offset); 1346 *data = 0; 1347 goto done; 1348 } 1349 1350 if (offset > sizeof(*lapic)) { 1351 *data = 0; 1352 goto done; 1353 } 1354 1355 offset &= ~3; 1356 switch(offset) 1357 { 1358 case APIC_OFFSET_ID: 1359 *data = lapic->id; 1360 break; 1361 case APIC_OFFSET_VER: 1362 *data = lapic->version; 1363 break; 1364 case APIC_OFFSET_TPR: 1365 *data = vlapic_get_tpr(vlapic); 1366 break; 1367 case APIC_OFFSET_APR: 1368 *data = lapic->apr; 1369 break; 1370 case APIC_OFFSET_PPR: 1371 *data = lapic->ppr; 1372 break; 1373 case APIC_OFFSET_EOI: 1374 *data = lapic->eoi; 1375 break; 1376 case APIC_OFFSET_LDR: 1377 *data = lapic->ldr; 1378 break; 1379 case APIC_OFFSET_DFR: 1380 *data = lapic->dfr; 1381 break; 1382 case APIC_OFFSET_SVR: 1383 *data = lapic->svr; 1384 break; 1385 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1386 i = (offset - APIC_OFFSET_ISR0) >> 2; 1387 reg = &lapic->isr0; 1388 *data = *(reg + i); 1389 break; 1390 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1391 i = (offset - APIC_OFFSET_TMR0) >> 2; 1392 reg = &lapic->tmr0; 1393 *data = *(reg + i); 1394 break; 1395 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1396 i = (offset - APIC_OFFSET_IRR0) >> 2; 1397 reg = &lapic->irr0; 1398 *data = atomic_load_acq_int(reg + i); 1399 break; 1400 case APIC_OFFSET_ESR: 1401 *data = lapic->esr; 1402 break; 1403 case APIC_OFFSET_ICR_LOW: 1404 *data = lapic->icr_lo; 1405 if (x2apic(vlapic)) 1406 *data |= (uint64_t)lapic->icr_hi << 32; 1407 break; 1408 case APIC_OFFSET_ICR_HI: 1409 *data = lapic->icr_hi; 1410 break; 1411 case APIC_OFFSET_CMCI_LVT: 1412 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1413 *data = vlapic_get_lvt(vlapic, offset); 1414 #ifdef INVARIANTS 1415 reg = vlapic_get_lvtptr(vlapic, offset); 1416 KASSERT(*data == *reg, ("inconsistent lvt value at " 1417 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1418 #endif 1419 break; 1420 case APIC_OFFSET_TIMER_ICR: 1421 *data = lapic->icr_timer; 1422 break; 1423 case APIC_OFFSET_TIMER_CCR: 1424 *data = vlapic_get_ccr(vlapic); 1425 break; 1426 case APIC_OFFSET_TIMER_DCR: 1427 *data = lapic->dcr_timer; 1428 break; 1429 case APIC_OFFSET_SELF_IPI: 1430 /* 1431 * XXX generate a GP fault if vlapic is in x2apic mode 1432 */ 1433 *data = 0; 1434 break; 1435 case APIC_OFFSET_RRR: 1436 default: 1437 *data = 0; 1438 break; 1439 } 1440 done: 1441 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1442 return 0; 1443 } 1444 1445 int 1446 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1447 uint64_t data, bool *retu) 1448 { 1449 struct LAPIC *lapic = vlapic->apic_page; 1450 uint32_t *regptr; 1451 int retval; 1452 1453 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1454 ("vlapic_write: invalid offset %#lx", offset)); 1455 1456 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1457 offset, data); 1458 1459 if (offset > sizeof(*lapic)) 1460 return (0); 1461 1462 /* Ignore MMIO accesses in x2APIC mode */ 1463 if (x2apic(vlapic) && mmio_access) { 1464 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1465 "in x2APIC mode", data, offset); 1466 return (0); 1467 } 1468 1469 /* 1470 * XXX Generate GP fault for MSR accesses in xAPIC mode 1471 */ 1472 if (!x2apic(vlapic) && !mmio_access) { 1473 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1474 "in xAPIC mode", data, offset); 1475 return (0); 1476 } 1477 1478 retval = 0; 1479 switch(offset) 1480 { 1481 case APIC_OFFSET_ID: 1482 lapic->id = data; 1483 vlapic_id_write_handler(vlapic); 1484 break; 1485 case APIC_OFFSET_TPR: 1486 vlapic_set_tpr(vlapic, data & 0xff); 1487 break; 1488 case APIC_OFFSET_EOI: 1489 vlapic_process_eoi(vlapic); 1490 break; 1491 case APIC_OFFSET_LDR: 1492 lapic->ldr = data; 1493 vlapic_ldr_write_handler(vlapic); 1494 break; 1495 case APIC_OFFSET_DFR: 1496 lapic->dfr = data; 1497 vlapic_dfr_write_handler(vlapic); 1498 break; 1499 case APIC_OFFSET_SVR: 1500 lapic->svr = data; 1501 vlapic_svr_write_handler(vlapic); 1502 break; 1503 case APIC_OFFSET_ICR_LOW: 1504 lapic->icr_lo = data; 1505 if (x2apic(vlapic)) 1506 lapic->icr_hi = data >> 32; 1507 retval = vlapic_icrlo_write_handler(vlapic, retu); 1508 break; 1509 case APIC_OFFSET_ICR_HI: 1510 lapic->icr_hi = data; 1511 break; 1512 case APIC_OFFSET_CMCI_LVT: 1513 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1514 regptr = vlapic_get_lvtptr(vlapic, offset); 1515 *regptr = data; 1516 vlapic_lvt_write_handler(vlapic, offset); 1517 break; 1518 case APIC_OFFSET_TIMER_ICR: 1519 lapic->icr_timer = data; 1520 vlapic_icrtmr_write_handler(vlapic); 1521 break; 1522 1523 case APIC_OFFSET_TIMER_DCR: 1524 lapic->dcr_timer = data; 1525 vlapic_dcr_write_handler(vlapic); 1526 break; 1527 1528 case APIC_OFFSET_ESR: 1529 vlapic_esr_write_handler(vlapic); 1530 break; 1531 1532 case APIC_OFFSET_SELF_IPI: 1533 if (x2apic(vlapic)) 1534 vlapic_self_ipi_handler(vlapic, data); 1535 break; 1536 1537 case APIC_OFFSET_VER: 1538 case APIC_OFFSET_APR: 1539 case APIC_OFFSET_PPR: 1540 case APIC_OFFSET_RRR: 1541 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1542 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1543 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1544 case APIC_OFFSET_TIMER_CCR: 1545 default: 1546 // Read only. 1547 break; 1548 } 1549 1550 return (retval); 1551 } 1552 1553 static void 1554 vlapic_reset(struct vlapic *vlapic) 1555 { 1556 struct LAPIC *lapic; 1557 1558 lapic = vlapic->apic_page; 1559 bzero(lapic, sizeof(struct LAPIC)); 1560 1561 lapic->id = vlapic_get_id(vlapic); 1562 lapic->version = VLAPIC_VERSION; 1563 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1564 lapic->dfr = 0xffffffff; 1565 lapic->svr = APIC_SVR_VECTOR; 1566 vlapic_mask_lvts(vlapic); 1567 vlapic_reset_tmr(vlapic); 1568 1569 lapic->dcr_timer = 0; 1570 vlapic_dcr_write_handler(vlapic); 1571 1572 if (vlapic->vcpuid == 0) 1573 vlapic->boot_state = BS_RUNNING; /* BSP */ 1574 else 1575 vlapic->boot_state = BS_INIT; /* AP */ 1576 1577 vlapic->svr_last = lapic->svr; 1578 } 1579 1580 void 1581 vlapic_init(struct vlapic *vlapic) 1582 { 1583 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1584 KASSERT(vlapic->vcpuid >= 0 && 1585 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1586 ("vlapic_init: vcpuid is not initialized")); 1587 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1588 "initialized")); 1589 1590 /* 1591 * If the vlapic is configured in x2apic mode then it will be 1592 * accessed in the critical section via the MSR emulation code. 1593 * 1594 * Therefore the timer mutex must be a spinlock because blockable 1595 * mutexes cannot be acquired in a critical section. 1596 */ 1597 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1598 callout_init(&vlapic->callout, 1); 1599 1600 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1601 1602 if (vlapic->vcpuid == 0) 1603 vlapic->msr_apicbase |= APICBASE_BSP; 1604 1605 vlapic->ipi_exit = false; 1606 1607 vlapic_reset(vlapic); 1608 } 1609 1610 void 1611 vlapic_cleanup(struct vlapic *vlapic) 1612 { 1613 1614 callout_drain(&vlapic->callout); 1615 } 1616 1617 uint64_t 1618 vlapic_get_apicbase(struct vlapic *vlapic) 1619 { 1620 1621 return (vlapic->msr_apicbase); 1622 } 1623 1624 int 1625 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1626 { 1627 1628 if (vlapic->msr_apicbase != new) { 1629 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1630 "not supported", vlapic->msr_apicbase, new); 1631 return (-1); 1632 } 1633 1634 return (0); 1635 } 1636 1637 void 1638 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1639 { 1640 struct vlapic *vlapic; 1641 struct LAPIC *lapic; 1642 1643 vlapic = vm_lapic(vm, vcpuid); 1644 1645 if (state == X2APIC_DISABLED) 1646 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1647 else 1648 vlapic->msr_apicbase |= APICBASE_X2APIC; 1649 1650 /* 1651 * Reset the local APIC registers whose values are mode-dependent. 1652 * 1653 * XXX this works because the APIC mode can be changed only at vcpu 1654 * initialization time. 1655 */ 1656 lapic = vlapic->apic_page; 1657 lapic->id = vlapic_get_id(vlapic); 1658 if (x2apic(vlapic)) { 1659 lapic->ldr = x2apic_ldr(vlapic); 1660 lapic->dfr = 0; 1661 } else { 1662 lapic->ldr = 0; 1663 lapic->dfr = 0xffffffff; 1664 } 1665 1666 if (state == X2APIC_ENABLED) { 1667 if (vlapic->ops.enable_x2apic_mode) 1668 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1669 } 1670 } 1671 1672 void 1673 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1674 int delmode, int vec) 1675 { 1676 bool lowprio; 1677 int vcpuid; 1678 cpuset_t dmask; 1679 1680 if (delmode != IOART_DELFIXED && 1681 delmode != IOART_DELLOPRI && 1682 delmode != IOART_DELEXINT) { 1683 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1684 return; 1685 } 1686 lowprio = (delmode == IOART_DELLOPRI); 1687 1688 /* 1689 * We don't provide any virtual interrupt redirection hardware so 1690 * all interrupts originating from the ioapic or MSI specify the 1691 * 'dest' in the legacy xAPIC format. 1692 */ 1693 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1694 1695 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1696 if (delmode == IOART_DELEXINT) { 1697 vm_inject_extint(vm, vcpuid); 1698 } else { 1699 lapic_set_intr(vm, vcpuid, vec, level); 1700 } 1701 } 1702 } 1703 1704 void 1705 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1706 { 1707 /* 1708 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1709 * 1710 * This is done by leveraging features like Posted Interrupts (Intel) 1711 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1712 * 1713 * If neither of these features are available then fallback to 1714 * sending an IPI to 'hostcpu'. 1715 */ 1716 if (vlapic->ops.post_intr) 1717 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1718 else 1719 ipi_cpu(hostcpu, ipinum); 1720 } 1721 1722 bool 1723 vlapic_enabled(struct vlapic *vlapic) 1724 { 1725 struct LAPIC *lapic = vlapic->apic_page; 1726 1727 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1728 (lapic->svr & APIC_SVR_ENABLE) != 0) 1729 return (true); 1730 else 1731 return (false); 1732 } 1733 1734 static void 1735 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1736 { 1737 struct LAPIC *lapic; 1738 uint32_t *tmrptr, mask; 1739 int idx; 1740 1741 lapic = vlapic->apic_page; 1742 tmrptr = &lapic->tmr0; 1743 idx = (vector / 32) * 4; 1744 mask = 1 << (vector % 32); 1745 if (level) 1746 tmrptr[idx] |= mask; 1747 else 1748 tmrptr[idx] &= ~mask; 1749 1750 if (vlapic->ops.set_tmr != NULL) 1751 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1752 } 1753 1754 void 1755 vlapic_reset_tmr(struct vlapic *vlapic) 1756 { 1757 int vector; 1758 1759 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1760 1761 for (vector = 0; vector <= 255; vector++) 1762 vlapic_set_tmr(vlapic, vector, false); 1763 } 1764 1765 void 1766 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1767 int delmode, int vector) 1768 { 1769 cpuset_t dmask; 1770 bool lowprio; 1771 1772 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1773 1774 /* 1775 * A level trigger is valid only for fixed and lowprio delivery modes. 1776 */ 1777 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1778 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1779 "delivery-mode %d", delmode); 1780 return; 1781 } 1782 1783 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1784 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1785 1786 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1787 return; 1788 1789 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1790 vlapic_set_tmr(vlapic, vector, true); 1791 } 1792 1793 #ifdef BHYVE_SNAPSHOT 1794 static void 1795 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1796 { 1797 /* The implementation is similar to the one in the 1798 * `vlapic_icrtmr_write_handler` function 1799 */ 1800 sbintime_t sbt; 1801 struct bintime bt; 1802 1803 VLAPIC_TIMER_LOCK(vlapic); 1804 1805 bt = vlapic->timer_freq_bt; 1806 bintime_mul(&bt, ccr); 1807 1808 if (ccr != 0) { 1809 binuptime(&vlapic->timer_fire_bt); 1810 bintime_add(&vlapic->timer_fire_bt, &bt); 1811 1812 sbt = bttosbt(bt); 1813 vlapic_callout_reset(vlapic, sbt); 1814 } else { 1815 /* even if the CCR was 0, periodic timers should be reset */ 1816 if (vlapic_periodic_timer(vlapic)) { 1817 binuptime(&vlapic->timer_fire_bt); 1818 bintime_add(&vlapic->timer_fire_bt, 1819 &vlapic->timer_period_bt); 1820 sbt = bttosbt(vlapic->timer_period_bt); 1821 1822 callout_stop(&vlapic->callout); 1823 vlapic_callout_reset(vlapic, sbt); 1824 } 1825 } 1826 1827 VLAPIC_TIMER_UNLOCK(vlapic); 1828 } 1829 1830 int 1831 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1832 { 1833 int i, ret; 1834 struct vlapic *vlapic; 1835 struct LAPIC *lapic; 1836 uint32_t ccr; 1837 1838 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1839 1840 ret = 0; 1841 1842 for (i = 0; i < VM_MAXCPU; i++) { 1843 vlapic = vm_lapic(vm, i); 1844 1845 /* snapshot the page first; timer period depends on icr_timer */ 1846 lapic = vlapic->apic_page; 1847 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1848 1849 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1850 1851 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1852 meta, ret, done); 1853 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1854 meta, ret, done); 1855 1856 /* 1857 * Timer period is equal to 'icr_timer' ticks at a frequency of 1858 * 'timer_freq_bt'. 1859 */ 1860 if (meta->op == VM_SNAPSHOT_RESTORE) { 1861 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1862 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1863 } 1864 1865 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1866 sizeof(vlapic->isrvec_stk), 1867 meta, ret, done); 1868 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1869 SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done); 1870 1871 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1872 sizeof(vlapic->lvt_last), 1873 meta, ret, done); 1874 1875 if (meta->op == VM_SNAPSHOT_SAVE) 1876 ccr = vlapic_get_ccr(vlapic); 1877 1878 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1879 1880 if (meta->op == VM_SNAPSHOT_RESTORE && 1881 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1882 /* Reset the value of the 'timer_fire_bt' and the vlapic 1883 * callout based on the value of the current count 1884 * register saved when the VM snapshot was created. 1885 * If initial count register is 0, timer is not used. 1886 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1887 */ 1888 vlapic_reset_callout(vlapic, ccr); 1889 } 1890 } 1891 1892 done: 1893 return (ret); 1894 } 1895 #endif 1896