1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bhyve_snapshot.h" 36 37 #include <sys/param.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/systm.h> 43 #include <sys/smp.h> 44 45 #include <x86/specialreg.h> 46 #include <x86/apicreg.h> 47 48 #include <machine/clock.h> 49 #include <machine/smp.h> 50 51 #include <machine/vmm.h> 52 #include <machine/vmm_snapshot.h> 53 54 #include "vmm_lapic.h" 55 #include "vmm_ktr.h" 56 #include "vmm_stat.h" 57 58 #include "vlapic.h" 59 #include "vlapic_priv.h" 60 #include "vioapic.h" 61 62 #define PRIO(x) ((x) >> 4) 63 64 #define VLAPIC_VERSION (16) 65 66 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 67 68 /* 69 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 70 * vlapic_callout_handler() and vcpu accesses to: 71 * - timer_freq_bt, timer_period_bt, timer_fire_bt 72 * - timer LVT register 73 */ 74 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 75 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 76 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 77 78 /* 79 * APIC timer frequency: 80 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 81 * - power-of-two to avoid loss of precision when converted to a bintime. 82 */ 83 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 84 85 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 86 static void vlapic_callout_handler(void *arg); 87 88 static __inline uint32_t 89 vlapic_get_id(struct vlapic *vlapic) 90 { 91 92 if (x2apic(vlapic)) 93 return (vlapic->vcpuid); 94 else 95 return (vlapic->vcpuid << 24); 96 } 97 98 static uint32_t 99 x2apic_ldr(struct vlapic *vlapic) 100 { 101 int apicid; 102 uint32_t ldr; 103 104 apicid = vlapic_get_id(vlapic); 105 ldr = 1 << (apicid & 0xf); 106 ldr |= (apicid & 0xffff0) << 12; 107 return (ldr); 108 } 109 110 void 111 vlapic_dfr_write_handler(struct vlapic *vlapic) 112 { 113 struct LAPIC *lapic; 114 115 lapic = vlapic->apic_page; 116 if (x2apic(vlapic)) { 117 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 118 lapic->dfr); 119 lapic->dfr = 0; 120 return; 121 } 122 123 lapic->dfr &= APIC_DFR_MODEL_MASK; 124 lapic->dfr |= APIC_DFR_RESERVED; 125 126 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 127 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 128 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 129 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 130 else 131 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 132 } 133 134 void 135 vlapic_ldr_write_handler(struct vlapic *vlapic) 136 { 137 struct LAPIC *lapic; 138 139 lapic = vlapic->apic_page; 140 141 /* LDR is read-only in x2apic mode */ 142 if (x2apic(vlapic)) { 143 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 144 lapic->ldr); 145 lapic->ldr = x2apic_ldr(vlapic); 146 } else { 147 lapic->ldr &= ~APIC_LDR_RESERVED; 148 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 149 } 150 } 151 152 void 153 vlapic_id_write_handler(struct vlapic *vlapic) 154 { 155 struct LAPIC *lapic; 156 157 /* 158 * We don't allow the ID register to be modified so reset it back to 159 * its default value. 160 */ 161 lapic = vlapic->apic_page; 162 lapic->id = vlapic_get_id(vlapic); 163 } 164 165 static int 166 vlapic_timer_divisor(uint32_t dcr) 167 { 168 switch (dcr & 0xB) { 169 case APIC_TDCR_1: 170 return (1); 171 case APIC_TDCR_2: 172 return (2); 173 case APIC_TDCR_4: 174 return (4); 175 case APIC_TDCR_8: 176 return (8); 177 case APIC_TDCR_16: 178 return (16); 179 case APIC_TDCR_32: 180 return (32); 181 case APIC_TDCR_64: 182 return (64); 183 case APIC_TDCR_128: 184 return (128); 185 default: 186 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 187 } 188 } 189 190 #if 0 191 static inline void 192 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 193 { 194 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 195 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 196 *lvt & APIC_LVTT_M); 197 } 198 #endif 199 200 static uint32_t 201 vlapic_get_ccr(struct vlapic *vlapic) 202 { 203 struct bintime bt_now, bt_rem; 204 struct LAPIC *lapic; 205 uint32_t ccr; 206 207 ccr = 0; 208 lapic = vlapic->apic_page; 209 210 VLAPIC_TIMER_LOCK(vlapic); 211 if (callout_active(&vlapic->callout)) { 212 /* 213 * If the timer is scheduled to expire in the future then 214 * compute the value of 'ccr' based on the remaining time. 215 */ 216 binuptime(&bt_now); 217 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 218 bt_rem = vlapic->timer_fire_bt; 219 bintime_sub(&bt_rem, &bt_now); 220 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 221 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 222 } 223 } 224 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 225 "icr_timer is %#x", ccr, lapic->icr_timer)); 226 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 227 ccr, lapic->icr_timer); 228 VLAPIC_TIMER_UNLOCK(vlapic); 229 return (ccr); 230 } 231 232 void 233 vlapic_dcr_write_handler(struct vlapic *vlapic) 234 { 235 struct LAPIC *lapic; 236 int divisor; 237 238 lapic = vlapic->apic_page; 239 VLAPIC_TIMER_LOCK(vlapic); 240 241 divisor = vlapic_timer_divisor(lapic->dcr_timer); 242 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 243 lapic->dcr_timer, divisor); 244 245 /* 246 * Update the timer frequency and the timer period. 247 * 248 * XXX changes to the frequency divider will not take effect until 249 * the timer is reloaded. 250 */ 251 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 252 vlapic->timer_period_bt = vlapic->timer_freq_bt; 253 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 254 255 VLAPIC_TIMER_UNLOCK(vlapic); 256 } 257 258 void 259 vlapic_esr_write_handler(struct vlapic *vlapic) 260 { 261 struct LAPIC *lapic; 262 263 lapic = vlapic->apic_page; 264 lapic->esr = vlapic->esr_pending; 265 vlapic->esr_pending = 0; 266 } 267 268 int 269 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 270 { 271 struct LAPIC *lapic; 272 uint32_t *irrptr, *tmrptr, mask; 273 int idx; 274 275 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 276 277 lapic = vlapic->apic_page; 278 if (!(lapic->svr & APIC_SVR_ENABLE)) { 279 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 280 "interrupt %d", vector); 281 return (0); 282 } 283 284 if (vector < 16) { 285 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 286 false); 287 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 288 vector); 289 return (1); 290 } 291 292 if (vlapic->ops.set_intr_ready) 293 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 294 295 idx = (vector / 32) * 4; 296 mask = 1 << (vector % 32); 297 298 irrptr = &lapic->irr0; 299 atomic_set_int(&irrptr[idx], mask); 300 301 /* 302 * Verify that the trigger-mode of the interrupt matches with 303 * the vlapic TMR registers. 304 */ 305 tmrptr = &lapic->tmr0; 306 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 307 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 308 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 309 level ? "level" : "edge"); 310 } 311 312 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 313 return (1); 314 } 315 316 static __inline uint32_t * 317 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 318 { 319 struct LAPIC *lapic = vlapic->apic_page; 320 int i; 321 322 switch (offset) { 323 case APIC_OFFSET_CMCI_LVT: 324 return (&lapic->lvt_cmci); 325 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 326 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 327 return ((&lapic->lvt_timer) + i); 328 default: 329 panic("vlapic_get_lvt: invalid LVT\n"); 330 } 331 } 332 333 static __inline int 334 lvt_off_to_idx(uint32_t offset) 335 { 336 int index; 337 338 switch (offset) { 339 case APIC_OFFSET_CMCI_LVT: 340 index = APIC_LVT_CMCI; 341 break; 342 case APIC_OFFSET_TIMER_LVT: 343 index = APIC_LVT_TIMER; 344 break; 345 case APIC_OFFSET_THERM_LVT: 346 index = APIC_LVT_THERMAL; 347 break; 348 case APIC_OFFSET_PERF_LVT: 349 index = APIC_LVT_PMC; 350 break; 351 case APIC_OFFSET_LINT0_LVT: 352 index = APIC_LVT_LINT0; 353 break; 354 case APIC_OFFSET_LINT1_LVT: 355 index = APIC_LVT_LINT1; 356 break; 357 case APIC_OFFSET_ERROR_LVT: 358 index = APIC_LVT_ERROR; 359 break; 360 default: 361 index = -1; 362 break; 363 } 364 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 365 "invalid lvt index %d for offset %#x", index, offset)); 366 367 return (index); 368 } 369 370 static __inline uint32_t 371 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 372 { 373 int idx; 374 uint32_t val; 375 376 idx = lvt_off_to_idx(offset); 377 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 378 return (val); 379 } 380 381 void 382 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 383 { 384 uint32_t *lvtptr, mask, val; 385 struct LAPIC *lapic; 386 int idx; 387 388 lapic = vlapic->apic_page; 389 lvtptr = vlapic_get_lvtptr(vlapic, offset); 390 val = *lvtptr; 391 idx = lvt_off_to_idx(offset); 392 393 if (!(lapic->svr & APIC_SVR_ENABLE)) 394 val |= APIC_LVT_M; 395 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 396 switch (offset) { 397 case APIC_OFFSET_TIMER_LVT: 398 mask |= APIC_LVTT_TM; 399 break; 400 case APIC_OFFSET_ERROR_LVT: 401 break; 402 case APIC_OFFSET_LINT0_LVT: 403 case APIC_OFFSET_LINT1_LVT: 404 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 405 /* FALLTHROUGH */ 406 default: 407 mask |= APIC_LVT_DM; 408 break; 409 } 410 val &= mask; 411 *lvtptr = val; 412 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 413 } 414 415 static void 416 vlapic_mask_lvts(struct vlapic *vlapic) 417 { 418 struct LAPIC *lapic = vlapic->apic_page; 419 420 lapic->lvt_cmci |= APIC_LVT_M; 421 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 422 423 lapic->lvt_timer |= APIC_LVT_M; 424 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 425 426 lapic->lvt_thermal |= APIC_LVT_M; 427 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 428 429 lapic->lvt_pcint |= APIC_LVT_M; 430 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 431 432 lapic->lvt_lint0 |= APIC_LVT_M; 433 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 434 435 lapic->lvt_lint1 |= APIC_LVT_M; 436 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 437 438 lapic->lvt_error |= APIC_LVT_M; 439 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 440 } 441 442 static int 443 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 444 { 445 uint32_t mode, reg, vec; 446 447 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 448 449 if (reg & APIC_LVT_M) 450 return (0); 451 vec = reg & APIC_LVT_VECTOR; 452 mode = reg & APIC_LVT_DM; 453 454 switch (mode) { 455 case APIC_LVT_DM_FIXED: 456 if (vec < 16) { 457 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 458 lvt == APIC_LVT_ERROR); 459 return (0); 460 } 461 if (vlapic_set_intr_ready(vlapic, vec, false)) 462 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 463 break; 464 case APIC_LVT_DM_NMI: 465 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 466 break; 467 case APIC_LVT_DM_EXTINT: 468 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 469 break; 470 default: 471 // Other modes ignored 472 return (0); 473 } 474 return (1); 475 } 476 477 #if 1 478 static void 479 dump_isrvec_stk(struct vlapic *vlapic) 480 { 481 int i; 482 uint32_t *isrptr; 483 484 isrptr = &vlapic->apic_page->isr0; 485 for (i = 0; i < 8; i++) 486 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 487 488 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 489 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 490 } 491 #endif 492 493 /* 494 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 495 * in Intel Architecture Manual Vol 3a. 496 */ 497 static void 498 vlapic_update_ppr(struct vlapic *vlapic) 499 { 500 int isrvec, tpr, ppr; 501 502 /* 503 * Note that the value on the stack at index 0 is always 0. 504 * 505 * This is a placeholder for the value of ISRV when none of the 506 * bits is set in the ISRx registers. 507 */ 508 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 509 tpr = vlapic->apic_page->tpr; 510 511 #if 1 512 { 513 int i, lastprio, curprio, vector, idx; 514 uint32_t *isrptr; 515 516 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 517 panic("isrvec_stk is corrupted: %d", isrvec); 518 519 /* 520 * Make sure that the priority of the nested interrupts is 521 * always increasing. 522 */ 523 lastprio = -1; 524 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 525 curprio = PRIO(vlapic->isrvec_stk[i]); 526 if (curprio <= lastprio) { 527 dump_isrvec_stk(vlapic); 528 panic("isrvec_stk does not satisfy invariant"); 529 } 530 lastprio = curprio; 531 } 532 533 /* 534 * Make sure that each bit set in the ISRx registers has a 535 * corresponding entry on the isrvec stack. 536 */ 537 i = 1; 538 isrptr = &vlapic->apic_page->isr0; 539 for (vector = 0; vector < 256; vector++) { 540 idx = (vector / 32) * 4; 541 if (isrptr[idx] & (1 << (vector % 32))) { 542 if (i > vlapic->isrvec_stk_top || 543 vlapic->isrvec_stk[i] != vector) { 544 dump_isrvec_stk(vlapic); 545 panic("ISR and isrvec_stk out of sync"); 546 } 547 i++; 548 } 549 } 550 } 551 #endif 552 553 if (PRIO(tpr) >= PRIO(isrvec)) 554 ppr = tpr; 555 else 556 ppr = isrvec & 0xf0; 557 558 vlapic->apic_page->ppr = ppr; 559 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 560 } 561 562 void 563 vlapic_sync_tpr(struct vlapic *vlapic) 564 { 565 vlapic_update_ppr(vlapic); 566 } 567 568 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 569 570 static void 571 vlapic_process_eoi(struct vlapic *vlapic) 572 { 573 struct LAPIC *lapic = vlapic->apic_page; 574 uint32_t *isrptr, *tmrptr; 575 int i, idx, bitpos, vector; 576 577 isrptr = &lapic->isr0; 578 tmrptr = &lapic->tmr0; 579 580 for (i = 7; i >= 0; i--) { 581 idx = i * 4; 582 bitpos = fls(isrptr[idx]); 583 if (bitpos-- != 0) { 584 if (vlapic->isrvec_stk_top <= 0) { 585 panic("invalid vlapic isrvec_stk_top %d", 586 vlapic->isrvec_stk_top); 587 } 588 isrptr[idx] &= ~(1 << bitpos); 589 vector = i * 32 + bitpos; 590 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 591 vector); 592 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 593 vlapic->isrvec_stk_top--; 594 vlapic_update_ppr(vlapic); 595 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 596 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 597 vector); 598 } 599 return; 600 } 601 } 602 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 603 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 604 } 605 606 static __inline int 607 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 608 { 609 610 return (lvt & mask); 611 } 612 613 static __inline int 614 vlapic_periodic_timer(struct vlapic *vlapic) 615 { 616 uint32_t lvt; 617 618 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 619 620 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 621 } 622 623 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 624 625 static void 626 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 627 { 628 629 vlapic->esr_pending |= mask; 630 631 /* 632 * Avoid infinite recursion if the error LVT itself is configured with 633 * an illegal vector. 634 */ 635 if (lvt_error) 636 return; 637 638 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 639 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 640 } 641 } 642 643 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 644 645 static void 646 vlapic_fire_timer(struct vlapic *vlapic) 647 { 648 649 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 650 651 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 652 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 653 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 654 } 655 } 656 657 static VMM_STAT(VLAPIC_INTR_CMC, 658 "corrected machine check interrupts generated by vlapic"); 659 660 void 661 vlapic_fire_cmci(struct vlapic *vlapic) 662 { 663 664 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 665 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 666 } 667 } 668 669 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 670 "lvts triggered"); 671 672 int 673 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 674 { 675 676 if (vlapic_enabled(vlapic) == false) { 677 /* 678 * When the local APIC is global/hardware disabled, 679 * LINT[1:0] pins are configured as INTR and NMI pins, 680 * respectively. 681 */ 682 switch (vector) { 683 case APIC_LVT_LINT0: 684 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 685 break; 686 case APIC_LVT_LINT1: 687 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 688 break; 689 default: 690 break; 691 } 692 return (0); 693 } 694 695 switch (vector) { 696 case APIC_LVT_LINT0: 697 case APIC_LVT_LINT1: 698 case APIC_LVT_TIMER: 699 case APIC_LVT_ERROR: 700 case APIC_LVT_PMC: 701 case APIC_LVT_THERMAL: 702 case APIC_LVT_CMCI: 703 if (vlapic_fire_lvt(vlapic, vector)) { 704 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 705 LVTS_TRIGGERRED, vector, 1); 706 } 707 break; 708 default: 709 return (EINVAL); 710 } 711 return (0); 712 } 713 714 static void 715 vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) 716 { 717 callout_reset_sbt_curcpu(&vlapic->callout, t, 0, 718 vlapic_callout_handler, vlapic, 0); 719 } 720 721 static void 722 vlapic_callout_handler(void *arg) 723 { 724 struct vlapic *vlapic; 725 struct bintime bt, btnow; 726 sbintime_t rem_sbt; 727 728 vlapic = arg; 729 730 VLAPIC_TIMER_LOCK(vlapic); 731 if (callout_pending(&vlapic->callout)) /* callout was reset */ 732 goto done; 733 734 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 735 goto done; 736 737 callout_deactivate(&vlapic->callout); 738 739 vlapic_fire_timer(vlapic); 740 741 if (vlapic_periodic_timer(vlapic)) { 742 binuptime(&btnow); 743 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 744 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 745 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 746 vlapic->timer_fire_bt.frac)); 747 748 /* 749 * Compute the delta between when the timer was supposed to 750 * fire and the present time. 751 */ 752 bt = btnow; 753 bintime_sub(&bt, &vlapic->timer_fire_bt); 754 755 rem_sbt = bttosbt(vlapic->timer_period_bt); 756 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 757 /* 758 * Adjust the time until the next countdown downward 759 * to account for the lost time. 760 */ 761 rem_sbt -= bttosbt(bt); 762 } else { 763 /* 764 * If the delta is greater than the timer period then 765 * just reset our time base instead of trying to catch 766 * up. 767 */ 768 vlapic->timer_fire_bt = btnow; 769 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 770 "usecs, period is %lu usecs - resetting time base", 771 bttosbt(bt) / SBT_1US, 772 bttosbt(vlapic->timer_period_bt) / SBT_1US); 773 } 774 775 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 776 vlapic_callout_reset(vlapic, rem_sbt); 777 } 778 done: 779 VLAPIC_TIMER_UNLOCK(vlapic); 780 } 781 782 void 783 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 784 { 785 struct LAPIC *lapic; 786 sbintime_t sbt; 787 uint32_t icr_timer; 788 789 VLAPIC_TIMER_LOCK(vlapic); 790 791 lapic = vlapic->apic_page; 792 icr_timer = lapic->icr_timer; 793 794 vlapic->timer_period_bt = vlapic->timer_freq_bt; 795 bintime_mul(&vlapic->timer_period_bt, icr_timer); 796 797 if (icr_timer != 0) { 798 binuptime(&vlapic->timer_fire_bt); 799 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 800 801 sbt = bttosbt(vlapic->timer_period_bt); 802 vlapic_callout_reset(vlapic, sbt); 803 } else 804 callout_stop(&vlapic->callout); 805 806 VLAPIC_TIMER_UNLOCK(vlapic); 807 } 808 809 /* 810 * This function populates 'dmask' with the set of vcpus that match the 811 * addressing specified by the (dest, phys, lowprio) tuple. 812 * 813 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 814 * or xAPIC (8-bit) destination field. 815 */ 816 static void 817 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 818 bool lowprio, bool x2apic_dest) 819 { 820 struct vlapic *vlapic; 821 uint32_t dfr, ldr, ldest, cluster; 822 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 823 cpuset_t amask; 824 int vcpuid; 825 826 if ((x2apic_dest && dest == 0xffffffff) || 827 (!x2apic_dest && dest == 0xff)) { 828 /* 829 * Broadcast in both logical and physical modes. 830 */ 831 *dmask = vm_active_cpus(vm); 832 return; 833 } 834 835 if (phys) { 836 /* 837 * Physical mode: destination is APIC ID. 838 */ 839 CPU_ZERO(dmask); 840 vcpuid = vm_apicid2vcpuid(vm, dest); 841 amask = vm_active_cpus(vm); 842 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 843 CPU_SET(vcpuid, dmask); 844 } else { 845 /* 846 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 847 * bitmask. This model is only available in the xAPIC mode. 848 */ 849 mda_flat_ldest = dest & 0xff; 850 851 /* 852 * In the "Cluster Model" the MDA is used to identify a 853 * specific cluster and a set of APICs in that cluster. 854 */ 855 if (x2apic_dest) { 856 mda_cluster_id = dest >> 16; 857 mda_cluster_ldest = dest & 0xffff; 858 } else { 859 mda_cluster_id = (dest >> 4) & 0xf; 860 mda_cluster_ldest = dest & 0xf; 861 } 862 863 /* 864 * Logical mode: match each APIC that has a bit set 865 * in its LDR that matches a bit in the ldest. 866 */ 867 CPU_ZERO(dmask); 868 amask = vm_active_cpus(vm); 869 CPU_FOREACH_ISSET(vcpuid, &amask) { 870 vlapic = vm_lapic(vm, vcpuid); 871 dfr = vlapic->apic_page->dfr; 872 ldr = vlapic->apic_page->ldr; 873 874 if ((dfr & APIC_DFR_MODEL_MASK) == 875 APIC_DFR_MODEL_FLAT) { 876 ldest = ldr >> 24; 877 mda_ldest = mda_flat_ldest; 878 } else if ((dfr & APIC_DFR_MODEL_MASK) == 879 APIC_DFR_MODEL_CLUSTER) { 880 if (x2apic(vlapic)) { 881 cluster = ldr >> 16; 882 ldest = ldr & 0xffff; 883 } else { 884 cluster = ldr >> 28; 885 ldest = (ldr >> 24) & 0xf; 886 } 887 if (cluster != mda_cluster_id) 888 continue; 889 mda_ldest = mda_cluster_ldest; 890 } else { 891 /* 892 * Guest has configured a bad logical 893 * model for this vcpu - skip it. 894 */ 895 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 896 "model %x - cannot deliver interrupt", dfr); 897 continue; 898 } 899 900 if ((mda_ldest & ldest) != 0) { 901 CPU_SET(vcpuid, dmask); 902 if (lowprio) 903 break; 904 } 905 } 906 } 907 } 908 909 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 910 911 static void 912 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 913 { 914 struct LAPIC *lapic = vlapic->apic_page; 915 916 if (lapic->tpr != val) { 917 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 918 "from %#x to %#x", lapic->tpr, val); 919 lapic->tpr = val; 920 vlapic_update_ppr(vlapic); 921 } 922 } 923 924 static uint8_t 925 vlapic_get_tpr(struct vlapic *vlapic) 926 { 927 struct LAPIC *lapic = vlapic->apic_page; 928 929 return (lapic->tpr); 930 } 931 932 void 933 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 934 { 935 uint8_t tpr; 936 937 if (val & ~0xf) { 938 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 939 return; 940 } 941 942 tpr = val << 4; 943 vlapic_set_tpr(vlapic, tpr); 944 } 945 946 uint64_t 947 vlapic_get_cr8(struct vlapic *vlapic) 948 { 949 uint8_t tpr; 950 951 tpr = vlapic_get_tpr(vlapic); 952 return (tpr >> 4); 953 } 954 955 int 956 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 957 { 958 int i; 959 bool phys; 960 cpuset_t dmask; 961 uint64_t icrval; 962 uint32_t dest, vec, mode; 963 struct vlapic *vlapic2; 964 struct vm_exit *vmexit; 965 struct LAPIC *lapic; 966 uint16_t maxcpus; 967 968 lapic = vlapic->apic_page; 969 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 970 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 971 972 if (x2apic(vlapic)) 973 dest = icrval >> 32; 974 else 975 dest = icrval >> (32 + 24); 976 vec = icrval & APIC_VECTOR_MASK; 977 mode = icrval & APIC_DELMODE_MASK; 978 979 if (mode == APIC_DELMODE_FIXED && vec < 16) { 980 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 981 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 982 return (0); 983 } 984 985 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 986 987 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 988 switch (icrval & APIC_DEST_MASK) { 989 case APIC_DEST_DESTFLD: 990 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 991 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 992 x2apic(vlapic)); 993 break; 994 case APIC_DEST_SELF: 995 CPU_SETOF(vlapic->vcpuid, &dmask); 996 break; 997 case APIC_DEST_ALLISELF: 998 dmask = vm_active_cpus(vlapic->vm); 999 break; 1000 case APIC_DEST_ALLESELF: 1001 dmask = vm_active_cpus(vlapic->vm); 1002 CPU_CLR(vlapic->vcpuid, &dmask); 1003 break; 1004 default: 1005 CPU_ZERO(&dmask); /* satisfy gcc */ 1006 break; 1007 } 1008 1009 CPU_FOREACH_ISSET(i, &dmask) { 1010 if (mode == APIC_DELMODE_FIXED) { 1011 lapic_intr_edge(vlapic->vm, i, vec); 1012 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1013 IPIS_SENT, i, 1); 1014 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 1015 "to vcpuid %d", vec, i); 1016 } else { 1017 vm_inject_nmi(vlapic->vm, i); 1018 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 1019 "to vcpuid %d", i); 1020 } 1021 } 1022 1023 return (0); /* handled completely in the kernel */ 1024 } 1025 1026 maxcpus = vm_get_maxcpus(vlapic->vm); 1027 if (mode == APIC_DELMODE_INIT) { 1028 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 1029 return (0); 1030 1031 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1032 vlapic2 = vm_lapic(vlapic->vm, dest); 1033 1034 /* move from INIT to waiting-for-SIPI state */ 1035 if (vlapic2->boot_state == BS_INIT) { 1036 vlapic2->boot_state = BS_SIPI; 1037 } 1038 1039 return (0); 1040 } 1041 } 1042 1043 if (mode == APIC_DELMODE_STARTUP) { 1044 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1045 vlapic2 = vm_lapic(vlapic->vm, dest); 1046 1047 /* 1048 * Ignore SIPIs in any state other than wait-for-SIPI 1049 */ 1050 if (vlapic2->boot_state != BS_SIPI) 1051 return (0); 1052 1053 vlapic2->boot_state = BS_RUNNING; 1054 1055 *retu = true; 1056 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1057 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1058 vmexit->u.spinup_ap.vcpu = dest; 1059 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1060 1061 return (0); 1062 } 1063 } 1064 1065 /* 1066 * This will cause a return to userland. 1067 */ 1068 return (1); 1069 } 1070 1071 void 1072 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1073 { 1074 int vec; 1075 1076 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1077 1078 vec = val & 0xff; 1079 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1080 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1081 vlapic->vcpuid, 1); 1082 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1083 } 1084 1085 int 1086 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1087 { 1088 struct LAPIC *lapic = vlapic->apic_page; 1089 int idx, i, bitpos, vector; 1090 uint32_t *irrptr, val; 1091 1092 vlapic_update_ppr(vlapic); 1093 1094 if (vlapic->ops.pending_intr) 1095 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1096 1097 irrptr = &lapic->irr0; 1098 1099 for (i = 7; i >= 0; i--) { 1100 idx = i * 4; 1101 val = atomic_load_acq_int(&irrptr[idx]); 1102 bitpos = fls(val); 1103 if (bitpos != 0) { 1104 vector = i * 32 + (bitpos - 1); 1105 if (PRIO(vector) > PRIO(lapic->ppr)) { 1106 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1107 if (vecptr != NULL) 1108 *vecptr = vector; 1109 return (1); 1110 } else 1111 break; 1112 } 1113 } 1114 return (0); 1115 } 1116 1117 void 1118 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1119 { 1120 struct LAPIC *lapic = vlapic->apic_page; 1121 uint32_t *irrptr, *isrptr; 1122 int idx, stk_top; 1123 1124 if (vlapic->ops.intr_accepted) 1125 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1126 1127 /* 1128 * clear the ready bit for vector being accepted in irr 1129 * and set the vector as in service in isr. 1130 */ 1131 idx = (vector / 32) * 4; 1132 1133 irrptr = &lapic->irr0; 1134 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1135 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1136 1137 isrptr = &lapic->isr0; 1138 isrptr[idx] |= 1 << (vector % 32); 1139 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1140 1141 /* 1142 * Update the PPR 1143 */ 1144 vlapic->isrvec_stk_top++; 1145 1146 stk_top = vlapic->isrvec_stk_top; 1147 if (stk_top >= ISRVEC_STK_SIZE) 1148 panic("isrvec_stk_top overflow %d", stk_top); 1149 1150 vlapic->isrvec_stk[stk_top] = vector; 1151 } 1152 1153 void 1154 vlapic_svr_write_handler(struct vlapic *vlapic) 1155 { 1156 struct LAPIC *lapic; 1157 uint32_t old, new, changed; 1158 1159 lapic = vlapic->apic_page; 1160 1161 new = lapic->svr; 1162 old = vlapic->svr_last; 1163 vlapic->svr_last = new; 1164 1165 changed = old ^ new; 1166 if ((changed & APIC_SVR_ENABLE) != 0) { 1167 if ((new & APIC_SVR_ENABLE) == 0) { 1168 /* 1169 * The apic is now disabled so stop the apic timer 1170 * and mask all the LVT entries. 1171 */ 1172 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1173 VLAPIC_TIMER_LOCK(vlapic); 1174 callout_stop(&vlapic->callout); 1175 VLAPIC_TIMER_UNLOCK(vlapic); 1176 vlapic_mask_lvts(vlapic); 1177 } else { 1178 /* 1179 * The apic is now enabled so restart the apic timer 1180 * if it is configured in periodic mode. 1181 */ 1182 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1183 if (vlapic_periodic_timer(vlapic)) 1184 vlapic_icrtmr_write_handler(vlapic); 1185 } 1186 } 1187 } 1188 1189 int 1190 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1191 uint64_t *data, bool *retu) 1192 { 1193 struct LAPIC *lapic = vlapic->apic_page; 1194 uint32_t *reg; 1195 int i; 1196 1197 /* Ignore MMIO accesses in x2APIC mode */ 1198 if (x2apic(vlapic) && mmio_access) { 1199 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1200 offset); 1201 *data = 0; 1202 goto done; 1203 } 1204 1205 if (!x2apic(vlapic) && !mmio_access) { 1206 /* 1207 * XXX Generate GP fault for MSR accesses in xAPIC mode 1208 */ 1209 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1210 "xAPIC mode", offset); 1211 *data = 0; 1212 goto done; 1213 } 1214 1215 if (offset > sizeof(*lapic)) { 1216 *data = 0; 1217 goto done; 1218 } 1219 1220 offset &= ~3; 1221 switch(offset) 1222 { 1223 case APIC_OFFSET_ID: 1224 *data = lapic->id; 1225 break; 1226 case APIC_OFFSET_VER: 1227 *data = lapic->version; 1228 break; 1229 case APIC_OFFSET_TPR: 1230 *data = vlapic_get_tpr(vlapic); 1231 break; 1232 case APIC_OFFSET_APR: 1233 *data = lapic->apr; 1234 break; 1235 case APIC_OFFSET_PPR: 1236 *data = lapic->ppr; 1237 break; 1238 case APIC_OFFSET_EOI: 1239 *data = lapic->eoi; 1240 break; 1241 case APIC_OFFSET_LDR: 1242 *data = lapic->ldr; 1243 break; 1244 case APIC_OFFSET_DFR: 1245 *data = lapic->dfr; 1246 break; 1247 case APIC_OFFSET_SVR: 1248 *data = lapic->svr; 1249 break; 1250 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1251 i = (offset - APIC_OFFSET_ISR0) >> 2; 1252 reg = &lapic->isr0; 1253 *data = *(reg + i); 1254 break; 1255 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1256 i = (offset - APIC_OFFSET_TMR0) >> 2; 1257 reg = &lapic->tmr0; 1258 *data = *(reg + i); 1259 break; 1260 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1261 i = (offset - APIC_OFFSET_IRR0) >> 2; 1262 reg = &lapic->irr0; 1263 *data = atomic_load_acq_int(reg + i); 1264 break; 1265 case APIC_OFFSET_ESR: 1266 *data = lapic->esr; 1267 break; 1268 case APIC_OFFSET_ICR_LOW: 1269 *data = lapic->icr_lo; 1270 if (x2apic(vlapic)) 1271 *data |= (uint64_t)lapic->icr_hi << 32; 1272 break; 1273 case APIC_OFFSET_ICR_HI: 1274 *data = lapic->icr_hi; 1275 break; 1276 case APIC_OFFSET_CMCI_LVT: 1277 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1278 *data = vlapic_get_lvt(vlapic, offset); 1279 #ifdef INVARIANTS 1280 reg = vlapic_get_lvtptr(vlapic, offset); 1281 KASSERT(*data == *reg, ("inconsistent lvt value at " 1282 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1283 #endif 1284 break; 1285 case APIC_OFFSET_TIMER_ICR: 1286 *data = lapic->icr_timer; 1287 break; 1288 case APIC_OFFSET_TIMER_CCR: 1289 *data = vlapic_get_ccr(vlapic); 1290 break; 1291 case APIC_OFFSET_TIMER_DCR: 1292 *data = lapic->dcr_timer; 1293 break; 1294 case APIC_OFFSET_SELF_IPI: 1295 /* 1296 * XXX generate a GP fault if vlapic is in x2apic mode 1297 */ 1298 *data = 0; 1299 break; 1300 case APIC_OFFSET_RRR: 1301 default: 1302 *data = 0; 1303 break; 1304 } 1305 done: 1306 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1307 return 0; 1308 } 1309 1310 int 1311 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1312 uint64_t data, bool *retu) 1313 { 1314 struct LAPIC *lapic = vlapic->apic_page; 1315 uint32_t *regptr; 1316 int retval; 1317 1318 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1319 ("vlapic_write: invalid offset %#lx", offset)); 1320 1321 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1322 offset, data); 1323 1324 if (offset > sizeof(*lapic)) 1325 return (0); 1326 1327 /* Ignore MMIO accesses in x2APIC mode */ 1328 if (x2apic(vlapic) && mmio_access) { 1329 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1330 "in x2APIC mode", data, offset); 1331 return (0); 1332 } 1333 1334 /* 1335 * XXX Generate GP fault for MSR accesses in xAPIC mode 1336 */ 1337 if (!x2apic(vlapic) && !mmio_access) { 1338 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1339 "in xAPIC mode", data, offset); 1340 return (0); 1341 } 1342 1343 retval = 0; 1344 switch(offset) 1345 { 1346 case APIC_OFFSET_ID: 1347 lapic->id = data; 1348 vlapic_id_write_handler(vlapic); 1349 break; 1350 case APIC_OFFSET_TPR: 1351 vlapic_set_tpr(vlapic, data & 0xff); 1352 break; 1353 case APIC_OFFSET_EOI: 1354 vlapic_process_eoi(vlapic); 1355 break; 1356 case APIC_OFFSET_LDR: 1357 lapic->ldr = data; 1358 vlapic_ldr_write_handler(vlapic); 1359 break; 1360 case APIC_OFFSET_DFR: 1361 lapic->dfr = data; 1362 vlapic_dfr_write_handler(vlapic); 1363 break; 1364 case APIC_OFFSET_SVR: 1365 lapic->svr = data; 1366 vlapic_svr_write_handler(vlapic); 1367 break; 1368 case APIC_OFFSET_ICR_LOW: 1369 lapic->icr_lo = data; 1370 if (x2apic(vlapic)) 1371 lapic->icr_hi = data >> 32; 1372 retval = vlapic_icrlo_write_handler(vlapic, retu); 1373 break; 1374 case APIC_OFFSET_ICR_HI: 1375 lapic->icr_hi = data; 1376 break; 1377 case APIC_OFFSET_CMCI_LVT: 1378 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1379 regptr = vlapic_get_lvtptr(vlapic, offset); 1380 *regptr = data; 1381 vlapic_lvt_write_handler(vlapic, offset); 1382 break; 1383 case APIC_OFFSET_TIMER_ICR: 1384 lapic->icr_timer = data; 1385 vlapic_icrtmr_write_handler(vlapic); 1386 break; 1387 1388 case APIC_OFFSET_TIMER_DCR: 1389 lapic->dcr_timer = data; 1390 vlapic_dcr_write_handler(vlapic); 1391 break; 1392 1393 case APIC_OFFSET_ESR: 1394 vlapic_esr_write_handler(vlapic); 1395 break; 1396 1397 case APIC_OFFSET_SELF_IPI: 1398 if (x2apic(vlapic)) 1399 vlapic_self_ipi_handler(vlapic, data); 1400 break; 1401 1402 case APIC_OFFSET_VER: 1403 case APIC_OFFSET_APR: 1404 case APIC_OFFSET_PPR: 1405 case APIC_OFFSET_RRR: 1406 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1407 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1408 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1409 case APIC_OFFSET_TIMER_CCR: 1410 default: 1411 // Read only. 1412 break; 1413 } 1414 1415 return (retval); 1416 } 1417 1418 static void 1419 vlapic_reset(struct vlapic *vlapic) 1420 { 1421 struct LAPIC *lapic; 1422 1423 lapic = vlapic->apic_page; 1424 bzero(lapic, sizeof(struct LAPIC)); 1425 1426 lapic->id = vlapic_get_id(vlapic); 1427 lapic->version = VLAPIC_VERSION; 1428 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1429 lapic->dfr = 0xffffffff; 1430 lapic->svr = APIC_SVR_VECTOR; 1431 vlapic_mask_lvts(vlapic); 1432 vlapic_reset_tmr(vlapic); 1433 1434 lapic->dcr_timer = 0; 1435 vlapic_dcr_write_handler(vlapic); 1436 1437 if (vlapic->vcpuid == 0) 1438 vlapic->boot_state = BS_RUNNING; /* BSP */ 1439 else 1440 vlapic->boot_state = BS_INIT; /* AP */ 1441 1442 vlapic->svr_last = lapic->svr; 1443 } 1444 1445 void 1446 vlapic_init(struct vlapic *vlapic) 1447 { 1448 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1449 KASSERT(vlapic->vcpuid >= 0 && 1450 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1451 ("vlapic_init: vcpuid is not initialized")); 1452 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1453 "initialized")); 1454 1455 /* 1456 * If the vlapic is configured in x2apic mode then it will be 1457 * accessed in the critical section via the MSR emulation code. 1458 * 1459 * Therefore the timer mutex must be a spinlock because blockable 1460 * mutexes cannot be acquired in a critical section. 1461 */ 1462 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1463 callout_init(&vlapic->callout, 1); 1464 1465 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1466 1467 if (vlapic->vcpuid == 0) 1468 vlapic->msr_apicbase |= APICBASE_BSP; 1469 1470 vlapic_reset(vlapic); 1471 } 1472 1473 void 1474 vlapic_cleanup(struct vlapic *vlapic) 1475 { 1476 1477 callout_drain(&vlapic->callout); 1478 } 1479 1480 uint64_t 1481 vlapic_get_apicbase(struct vlapic *vlapic) 1482 { 1483 1484 return (vlapic->msr_apicbase); 1485 } 1486 1487 int 1488 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1489 { 1490 1491 if (vlapic->msr_apicbase != new) { 1492 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1493 "not supported", vlapic->msr_apicbase, new); 1494 return (-1); 1495 } 1496 1497 return (0); 1498 } 1499 1500 void 1501 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1502 { 1503 struct vlapic *vlapic; 1504 struct LAPIC *lapic; 1505 1506 vlapic = vm_lapic(vm, vcpuid); 1507 1508 if (state == X2APIC_DISABLED) 1509 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1510 else 1511 vlapic->msr_apicbase |= APICBASE_X2APIC; 1512 1513 /* 1514 * Reset the local APIC registers whose values are mode-dependent. 1515 * 1516 * XXX this works because the APIC mode can be changed only at vcpu 1517 * initialization time. 1518 */ 1519 lapic = vlapic->apic_page; 1520 lapic->id = vlapic_get_id(vlapic); 1521 if (x2apic(vlapic)) { 1522 lapic->ldr = x2apic_ldr(vlapic); 1523 lapic->dfr = 0; 1524 } else { 1525 lapic->ldr = 0; 1526 lapic->dfr = 0xffffffff; 1527 } 1528 1529 if (state == X2APIC_ENABLED) { 1530 if (vlapic->ops.enable_x2apic_mode) 1531 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1532 } 1533 } 1534 1535 void 1536 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1537 int delmode, int vec) 1538 { 1539 bool lowprio; 1540 int vcpuid; 1541 cpuset_t dmask; 1542 1543 if (delmode != IOART_DELFIXED && 1544 delmode != IOART_DELLOPRI && 1545 delmode != IOART_DELEXINT) { 1546 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1547 return; 1548 } 1549 lowprio = (delmode == IOART_DELLOPRI); 1550 1551 /* 1552 * We don't provide any virtual interrupt redirection hardware so 1553 * all interrupts originating from the ioapic or MSI specify the 1554 * 'dest' in the legacy xAPIC format. 1555 */ 1556 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1557 1558 CPU_FOREACH_ISSET(vcpuid, &dmask) { 1559 if (delmode == IOART_DELEXINT) { 1560 vm_inject_extint(vm, vcpuid); 1561 } else { 1562 lapic_set_intr(vm, vcpuid, vec, level); 1563 } 1564 } 1565 } 1566 1567 void 1568 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1569 { 1570 /* 1571 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1572 * 1573 * This is done by leveraging features like Posted Interrupts (Intel) 1574 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1575 * 1576 * If neither of these features are available then fallback to 1577 * sending an IPI to 'hostcpu'. 1578 */ 1579 if (vlapic->ops.post_intr) 1580 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1581 else 1582 ipi_cpu(hostcpu, ipinum); 1583 } 1584 1585 bool 1586 vlapic_enabled(struct vlapic *vlapic) 1587 { 1588 struct LAPIC *lapic = vlapic->apic_page; 1589 1590 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1591 (lapic->svr & APIC_SVR_ENABLE) != 0) 1592 return (true); 1593 else 1594 return (false); 1595 } 1596 1597 static void 1598 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1599 { 1600 struct LAPIC *lapic; 1601 uint32_t *tmrptr, mask; 1602 int idx; 1603 1604 lapic = vlapic->apic_page; 1605 tmrptr = &lapic->tmr0; 1606 idx = (vector / 32) * 4; 1607 mask = 1 << (vector % 32); 1608 if (level) 1609 tmrptr[idx] |= mask; 1610 else 1611 tmrptr[idx] &= ~mask; 1612 1613 if (vlapic->ops.set_tmr != NULL) 1614 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1615 } 1616 1617 void 1618 vlapic_reset_tmr(struct vlapic *vlapic) 1619 { 1620 int vector; 1621 1622 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1623 1624 for (vector = 0; vector <= 255; vector++) 1625 vlapic_set_tmr(vlapic, vector, false); 1626 } 1627 1628 void 1629 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1630 int delmode, int vector) 1631 { 1632 cpuset_t dmask; 1633 bool lowprio; 1634 1635 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1636 1637 /* 1638 * A level trigger is valid only for fixed and lowprio delivery modes. 1639 */ 1640 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1641 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1642 "delivery-mode %d", delmode); 1643 return; 1644 } 1645 1646 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1647 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1648 1649 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1650 return; 1651 1652 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1653 vlapic_set_tmr(vlapic, vector, true); 1654 } 1655 1656 #ifdef BHYVE_SNAPSHOT 1657 static void 1658 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1659 { 1660 /* The implementation is similar to the one in the 1661 * `vlapic_icrtmr_write_handler` function 1662 */ 1663 sbintime_t sbt; 1664 struct bintime bt; 1665 1666 VLAPIC_TIMER_LOCK(vlapic); 1667 1668 bt = vlapic->timer_freq_bt; 1669 bintime_mul(&bt, ccr); 1670 1671 if (ccr != 0) { 1672 binuptime(&vlapic->timer_fire_bt); 1673 bintime_add(&vlapic->timer_fire_bt, &bt); 1674 1675 sbt = bttosbt(bt); 1676 vlapic_callout_reset(vlapic, sbt); 1677 } else { 1678 /* even if the CCR was 0, periodic timers should be reset */ 1679 if (vlapic_periodic_timer(vlapic)) { 1680 binuptime(&vlapic->timer_fire_bt); 1681 bintime_add(&vlapic->timer_fire_bt, 1682 &vlapic->timer_period_bt); 1683 sbt = bttosbt(vlapic->timer_period_bt); 1684 1685 callout_stop(&vlapic->callout); 1686 vlapic_callout_reset(vlapic, sbt); 1687 } 1688 } 1689 1690 VLAPIC_TIMER_UNLOCK(vlapic); 1691 } 1692 1693 int 1694 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1695 { 1696 int i, ret; 1697 struct vlapic *vlapic; 1698 struct LAPIC *lapic; 1699 uint32_t ccr; 1700 1701 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1702 1703 ret = 0; 1704 1705 for (i = 0; i < VM_MAXCPU; i++) { 1706 vlapic = vm_lapic(vm, i); 1707 1708 /* snapshot the page first; timer period depends on icr_timer */ 1709 lapic = vlapic->apic_page; 1710 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1711 1712 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1713 1714 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1715 meta, ret, done); 1716 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1717 meta, ret, done); 1718 1719 /* 1720 * Timer period is equal to 'icr_timer' ticks at a frequency of 1721 * 'timer_freq_bt'. 1722 */ 1723 if (meta->op == VM_SNAPSHOT_RESTORE) { 1724 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1725 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1726 } 1727 1728 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1729 sizeof(vlapic->isrvec_stk), 1730 meta, ret, done); 1731 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1732 SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done); 1733 1734 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1735 sizeof(vlapic->lvt_last), 1736 meta, ret, done); 1737 1738 if (meta->op == VM_SNAPSHOT_SAVE) 1739 ccr = vlapic_get_ccr(vlapic); 1740 1741 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1742 1743 if (meta->op == VM_SNAPSHOT_RESTORE && 1744 vlapic_enabled(vlapic) && lapic->icr_timer != 0) { 1745 /* Reset the value of the 'timer_fire_bt' and the vlapic 1746 * callout based on the value of the current count 1747 * register saved when the VM snapshot was created. 1748 * If initial count register is 0, timer is not used. 1749 * Look at "10.5.4 APIC Timer" in Software Developer Manual. 1750 */ 1751 vlapic_reset_callout(vlapic, ccr); 1752 } 1753 } 1754 1755 done: 1756 return (ret); 1757 } 1758 #endif 1759