1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bhyve_snapshot.h" 36 37 #include <sys/param.h> 38 #include <sys/lock.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/systm.h> 43 #include <sys/smp.h> 44 45 #include <x86/specialreg.h> 46 #include <x86/apicreg.h> 47 48 #include <machine/clock.h> 49 #include <machine/smp.h> 50 51 #include <machine/vmm.h> 52 #include <machine/vmm_snapshot.h> 53 54 #include "vmm_lapic.h" 55 #include "vmm_ktr.h" 56 #include "vmm_stat.h" 57 58 #include "vlapic.h" 59 #include "vlapic_priv.h" 60 #include "vioapic.h" 61 62 #define PRIO(x) ((x) >> 4) 63 64 #define VLAPIC_VERSION (16) 65 66 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 67 68 /* 69 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 70 * vlapic_callout_handler() and vcpu accesses to: 71 * - timer_freq_bt, timer_period_bt, timer_fire_bt 72 * - timer LVT register 73 */ 74 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 75 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 76 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 77 78 /* 79 * APIC timer frequency: 80 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 81 * - power-of-two to avoid loss of precision when converted to a bintime. 82 */ 83 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 84 85 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 86 87 static __inline uint32_t 88 vlapic_get_id(struct vlapic *vlapic) 89 { 90 91 if (x2apic(vlapic)) 92 return (vlapic->vcpuid); 93 else 94 return (vlapic->vcpuid << 24); 95 } 96 97 static uint32_t 98 x2apic_ldr(struct vlapic *vlapic) 99 { 100 int apicid; 101 uint32_t ldr; 102 103 apicid = vlapic_get_id(vlapic); 104 ldr = 1 << (apicid & 0xf); 105 ldr |= (apicid & 0xffff0) << 12; 106 return (ldr); 107 } 108 109 void 110 vlapic_dfr_write_handler(struct vlapic *vlapic) 111 { 112 struct LAPIC *lapic; 113 114 lapic = vlapic->apic_page; 115 if (x2apic(vlapic)) { 116 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 117 lapic->dfr); 118 lapic->dfr = 0; 119 return; 120 } 121 122 lapic->dfr &= APIC_DFR_MODEL_MASK; 123 lapic->dfr |= APIC_DFR_RESERVED; 124 125 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 126 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 127 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 128 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 129 else 130 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 131 } 132 133 void 134 vlapic_ldr_write_handler(struct vlapic *vlapic) 135 { 136 struct LAPIC *lapic; 137 138 lapic = vlapic->apic_page; 139 140 /* LDR is read-only in x2apic mode */ 141 if (x2apic(vlapic)) { 142 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 143 lapic->ldr); 144 lapic->ldr = x2apic_ldr(vlapic); 145 } else { 146 lapic->ldr &= ~APIC_LDR_RESERVED; 147 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 148 } 149 } 150 151 void 152 vlapic_id_write_handler(struct vlapic *vlapic) 153 { 154 struct LAPIC *lapic; 155 156 /* 157 * We don't allow the ID register to be modified so reset it back to 158 * its default value. 159 */ 160 lapic = vlapic->apic_page; 161 lapic->id = vlapic_get_id(vlapic); 162 } 163 164 static int 165 vlapic_timer_divisor(uint32_t dcr) 166 { 167 switch (dcr & 0xB) { 168 case APIC_TDCR_1: 169 return (1); 170 case APIC_TDCR_2: 171 return (2); 172 case APIC_TDCR_4: 173 return (4); 174 case APIC_TDCR_8: 175 return (8); 176 case APIC_TDCR_16: 177 return (16); 178 case APIC_TDCR_32: 179 return (32); 180 case APIC_TDCR_64: 181 return (64); 182 case APIC_TDCR_128: 183 return (128); 184 default: 185 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 186 } 187 } 188 189 #if 0 190 static inline void 191 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 192 { 193 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 194 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 195 *lvt & APIC_LVTT_M); 196 } 197 #endif 198 199 static uint32_t 200 vlapic_get_ccr(struct vlapic *vlapic) 201 { 202 struct bintime bt_now, bt_rem; 203 struct LAPIC *lapic; 204 uint32_t ccr; 205 206 ccr = 0; 207 lapic = vlapic->apic_page; 208 209 VLAPIC_TIMER_LOCK(vlapic); 210 if (callout_active(&vlapic->callout)) { 211 /* 212 * If the timer is scheduled to expire in the future then 213 * compute the value of 'ccr' based on the remaining time. 214 */ 215 binuptime(&bt_now); 216 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 217 bt_rem = vlapic->timer_fire_bt; 218 bintime_sub(&bt_rem, &bt_now); 219 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 220 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 221 } 222 } 223 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 224 "icr_timer is %#x", ccr, lapic->icr_timer)); 225 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 226 ccr, lapic->icr_timer); 227 VLAPIC_TIMER_UNLOCK(vlapic); 228 return (ccr); 229 } 230 231 void 232 vlapic_dcr_write_handler(struct vlapic *vlapic) 233 { 234 struct LAPIC *lapic; 235 int divisor; 236 237 lapic = vlapic->apic_page; 238 VLAPIC_TIMER_LOCK(vlapic); 239 240 divisor = vlapic_timer_divisor(lapic->dcr_timer); 241 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 242 lapic->dcr_timer, divisor); 243 244 /* 245 * Update the timer frequency and the timer period. 246 * 247 * XXX changes to the frequency divider will not take effect until 248 * the timer is reloaded. 249 */ 250 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 251 vlapic->timer_period_bt = vlapic->timer_freq_bt; 252 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 253 254 VLAPIC_TIMER_UNLOCK(vlapic); 255 } 256 257 void 258 vlapic_esr_write_handler(struct vlapic *vlapic) 259 { 260 struct LAPIC *lapic; 261 262 lapic = vlapic->apic_page; 263 lapic->esr = vlapic->esr_pending; 264 vlapic->esr_pending = 0; 265 } 266 267 int 268 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 269 { 270 struct LAPIC *lapic; 271 uint32_t *irrptr, *tmrptr, mask; 272 int idx; 273 274 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 275 276 lapic = vlapic->apic_page; 277 if (!(lapic->svr & APIC_SVR_ENABLE)) { 278 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 279 "interrupt %d", vector); 280 return (0); 281 } 282 283 if (vector < 16) { 284 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 285 false); 286 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 287 vector); 288 return (1); 289 } 290 291 if (vlapic->ops.set_intr_ready) 292 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 293 294 idx = (vector / 32) * 4; 295 mask = 1 << (vector % 32); 296 297 irrptr = &lapic->irr0; 298 atomic_set_int(&irrptr[idx], mask); 299 300 /* 301 * Verify that the trigger-mode of the interrupt matches with 302 * the vlapic TMR registers. 303 */ 304 tmrptr = &lapic->tmr0; 305 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 306 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 307 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 308 level ? "level" : "edge"); 309 } 310 311 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 312 return (1); 313 } 314 315 static __inline uint32_t * 316 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 317 { 318 struct LAPIC *lapic = vlapic->apic_page; 319 int i; 320 321 switch (offset) { 322 case APIC_OFFSET_CMCI_LVT: 323 return (&lapic->lvt_cmci); 324 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 325 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 326 return ((&lapic->lvt_timer) + i); 327 default: 328 panic("vlapic_get_lvt: invalid LVT\n"); 329 } 330 } 331 332 static __inline int 333 lvt_off_to_idx(uint32_t offset) 334 { 335 int index; 336 337 switch (offset) { 338 case APIC_OFFSET_CMCI_LVT: 339 index = APIC_LVT_CMCI; 340 break; 341 case APIC_OFFSET_TIMER_LVT: 342 index = APIC_LVT_TIMER; 343 break; 344 case APIC_OFFSET_THERM_LVT: 345 index = APIC_LVT_THERMAL; 346 break; 347 case APIC_OFFSET_PERF_LVT: 348 index = APIC_LVT_PMC; 349 break; 350 case APIC_OFFSET_LINT0_LVT: 351 index = APIC_LVT_LINT0; 352 break; 353 case APIC_OFFSET_LINT1_LVT: 354 index = APIC_LVT_LINT1; 355 break; 356 case APIC_OFFSET_ERROR_LVT: 357 index = APIC_LVT_ERROR; 358 break; 359 default: 360 index = -1; 361 break; 362 } 363 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 364 "invalid lvt index %d for offset %#x", index, offset)); 365 366 return (index); 367 } 368 369 static __inline uint32_t 370 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 371 { 372 int idx; 373 uint32_t val; 374 375 idx = lvt_off_to_idx(offset); 376 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 377 return (val); 378 } 379 380 void 381 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 382 { 383 uint32_t *lvtptr, mask, val; 384 struct LAPIC *lapic; 385 int idx; 386 387 lapic = vlapic->apic_page; 388 lvtptr = vlapic_get_lvtptr(vlapic, offset); 389 val = *lvtptr; 390 idx = lvt_off_to_idx(offset); 391 392 if (!(lapic->svr & APIC_SVR_ENABLE)) 393 val |= APIC_LVT_M; 394 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 395 switch (offset) { 396 case APIC_OFFSET_TIMER_LVT: 397 mask |= APIC_LVTT_TM; 398 break; 399 case APIC_OFFSET_ERROR_LVT: 400 break; 401 case APIC_OFFSET_LINT0_LVT: 402 case APIC_OFFSET_LINT1_LVT: 403 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 404 /* FALLTHROUGH */ 405 default: 406 mask |= APIC_LVT_DM; 407 break; 408 } 409 val &= mask; 410 *lvtptr = val; 411 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 412 } 413 414 static void 415 vlapic_mask_lvts(struct vlapic *vlapic) 416 { 417 struct LAPIC *lapic = vlapic->apic_page; 418 419 lapic->lvt_cmci |= APIC_LVT_M; 420 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 421 422 lapic->lvt_timer |= APIC_LVT_M; 423 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 424 425 lapic->lvt_thermal |= APIC_LVT_M; 426 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 427 428 lapic->lvt_pcint |= APIC_LVT_M; 429 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 430 431 lapic->lvt_lint0 |= APIC_LVT_M; 432 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 433 434 lapic->lvt_lint1 |= APIC_LVT_M; 435 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 436 437 lapic->lvt_error |= APIC_LVT_M; 438 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 439 } 440 441 static int 442 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 443 { 444 uint32_t mode, reg, vec; 445 446 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 447 448 if (reg & APIC_LVT_M) 449 return (0); 450 vec = reg & APIC_LVT_VECTOR; 451 mode = reg & APIC_LVT_DM; 452 453 switch (mode) { 454 case APIC_LVT_DM_FIXED: 455 if (vec < 16) { 456 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 457 lvt == APIC_LVT_ERROR); 458 return (0); 459 } 460 if (vlapic_set_intr_ready(vlapic, vec, false)) 461 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 462 break; 463 case APIC_LVT_DM_NMI: 464 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 465 break; 466 case APIC_LVT_DM_EXTINT: 467 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 468 break; 469 default: 470 // Other modes ignored 471 return (0); 472 } 473 return (1); 474 } 475 476 #if 1 477 static void 478 dump_isrvec_stk(struct vlapic *vlapic) 479 { 480 int i; 481 uint32_t *isrptr; 482 483 isrptr = &vlapic->apic_page->isr0; 484 for (i = 0; i < 8; i++) 485 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 486 487 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 488 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 489 } 490 #endif 491 492 /* 493 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 494 * in Intel Architecture Manual Vol 3a. 495 */ 496 static void 497 vlapic_update_ppr(struct vlapic *vlapic) 498 { 499 int isrvec, tpr, ppr; 500 501 /* 502 * Note that the value on the stack at index 0 is always 0. 503 * 504 * This is a placeholder for the value of ISRV when none of the 505 * bits is set in the ISRx registers. 506 */ 507 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 508 tpr = vlapic->apic_page->tpr; 509 510 #if 1 511 { 512 int i, lastprio, curprio, vector, idx; 513 uint32_t *isrptr; 514 515 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 516 panic("isrvec_stk is corrupted: %d", isrvec); 517 518 /* 519 * Make sure that the priority of the nested interrupts is 520 * always increasing. 521 */ 522 lastprio = -1; 523 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 524 curprio = PRIO(vlapic->isrvec_stk[i]); 525 if (curprio <= lastprio) { 526 dump_isrvec_stk(vlapic); 527 panic("isrvec_stk does not satisfy invariant"); 528 } 529 lastprio = curprio; 530 } 531 532 /* 533 * Make sure that each bit set in the ISRx registers has a 534 * corresponding entry on the isrvec stack. 535 */ 536 i = 1; 537 isrptr = &vlapic->apic_page->isr0; 538 for (vector = 0; vector < 256; vector++) { 539 idx = (vector / 32) * 4; 540 if (isrptr[idx] & (1 << (vector % 32))) { 541 if (i > vlapic->isrvec_stk_top || 542 vlapic->isrvec_stk[i] != vector) { 543 dump_isrvec_stk(vlapic); 544 panic("ISR and isrvec_stk out of sync"); 545 } 546 i++; 547 } 548 } 549 } 550 #endif 551 552 if (PRIO(tpr) >= PRIO(isrvec)) 553 ppr = tpr; 554 else 555 ppr = isrvec & 0xf0; 556 557 vlapic->apic_page->ppr = ppr; 558 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 559 } 560 561 void 562 vlapic_sync_tpr(struct vlapic *vlapic) 563 { 564 vlapic_update_ppr(vlapic); 565 } 566 567 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 568 569 static void 570 vlapic_process_eoi(struct vlapic *vlapic) 571 { 572 struct LAPIC *lapic = vlapic->apic_page; 573 uint32_t *isrptr, *tmrptr; 574 int i, idx, bitpos, vector; 575 576 isrptr = &lapic->isr0; 577 tmrptr = &lapic->tmr0; 578 579 for (i = 7; i >= 0; i--) { 580 idx = i * 4; 581 bitpos = fls(isrptr[idx]); 582 if (bitpos-- != 0) { 583 if (vlapic->isrvec_stk_top <= 0) { 584 panic("invalid vlapic isrvec_stk_top %d", 585 vlapic->isrvec_stk_top); 586 } 587 isrptr[idx] &= ~(1 << bitpos); 588 vector = i * 32 + bitpos; 589 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 590 vector); 591 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 592 vlapic->isrvec_stk_top--; 593 vlapic_update_ppr(vlapic); 594 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 595 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 596 vector); 597 } 598 return; 599 } 600 } 601 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 602 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 603 } 604 605 static __inline int 606 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 607 { 608 609 return (lvt & mask); 610 } 611 612 static __inline int 613 vlapic_periodic_timer(struct vlapic *vlapic) 614 { 615 uint32_t lvt; 616 617 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 618 619 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 620 } 621 622 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 623 624 static void 625 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 626 { 627 628 vlapic->esr_pending |= mask; 629 630 /* 631 * Avoid infinite recursion if the error LVT itself is configured with 632 * an illegal vector. 633 */ 634 if (lvt_error) 635 return; 636 637 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 638 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 639 } 640 } 641 642 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 643 644 static void 645 vlapic_fire_timer(struct vlapic *vlapic) 646 { 647 648 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 649 650 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 651 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 652 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 653 } 654 } 655 656 static VMM_STAT(VLAPIC_INTR_CMC, 657 "corrected machine check interrupts generated by vlapic"); 658 659 void 660 vlapic_fire_cmci(struct vlapic *vlapic) 661 { 662 663 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 664 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 665 } 666 } 667 668 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 669 "lvts triggered"); 670 671 int 672 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 673 { 674 675 if (vlapic_enabled(vlapic) == false) { 676 /* 677 * When the local APIC is global/hardware disabled, 678 * LINT[1:0] pins are configured as INTR and NMI pins, 679 * respectively. 680 */ 681 switch (vector) { 682 case APIC_LVT_LINT0: 683 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 684 break; 685 case APIC_LVT_LINT1: 686 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 687 break; 688 default: 689 break; 690 } 691 return (0); 692 } 693 694 switch (vector) { 695 case APIC_LVT_LINT0: 696 case APIC_LVT_LINT1: 697 case APIC_LVT_TIMER: 698 case APIC_LVT_ERROR: 699 case APIC_LVT_PMC: 700 case APIC_LVT_THERMAL: 701 case APIC_LVT_CMCI: 702 if (vlapic_fire_lvt(vlapic, vector)) { 703 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 704 LVTS_TRIGGERRED, vector, 1); 705 } 706 break; 707 default: 708 return (EINVAL); 709 } 710 return (0); 711 } 712 713 static void 714 vlapic_callout_handler(void *arg) 715 { 716 struct vlapic *vlapic; 717 struct bintime bt, btnow; 718 sbintime_t rem_sbt; 719 720 vlapic = arg; 721 722 VLAPIC_TIMER_LOCK(vlapic); 723 if (callout_pending(&vlapic->callout)) /* callout was reset */ 724 goto done; 725 726 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 727 goto done; 728 729 callout_deactivate(&vlapic->callout); 730 731 vlapic_fire_timer(vlapic); 732 733 if (vlapic_periodic_timer(vlapic)) { 734 binuptime(&btnow); 735 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 736 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 737 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 738 vlapic->timer_fire_bt.frac)); 739 740 /* 741 * Compute the delta between when the timer was supposed to 742 * fire and the present time. 743 */ 744 bt = btnow; 745 bintime_sub(&bt, &vlapic->timer_fire_bt); 746 747 rem_sbt = bttosbt(vlapic->timer_period_bt); 748 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 749 /* 750 * Adjust the time until the next countdown downward 751 * to account for the lost time. 752 */ 753 rem_sbt -= bttosbt(bt); 754 } else { 755 /* 756 * If the delta is greater than the timer period then 757 * just reset our time base instead of trying to catch 758 * up. 759 */ 760 vlapic->timer_fire_bt = btnow; 761 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 762 "usecs, period is %lu usecs - resetting time base", 763 bttosbt(bt) / SBT_1US, 764 bttosbt(vlapic->timer_period_bt) / SBT_1US); 765 } 766 767 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 768 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 769 vlapic_callout_handler, vlapic, 0); 770 } 771 done: 772 VLAPIC_TIMER_UNLOCK(vlapic); 773 } 774 775 void 776 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 777 { 778 struct LAPIC *lapic; 779 sbintime_t sbt; 780 uint32_t icr_timer; 781 782 VLAPIC_TIMER_LOCK(vlapic); 783 784 lapic = vlapic->apic_page; 785 icr_timer = lapic->icr_timer; 786 787 vlapic->timer_period_bt = vlapic->timer_freq_bt; 788 bintime_mul(&vlapic->timer_period_bt, icr_timer); 789 790 if (icr_timer != 0) { 791 binuptime(&vlapic->timer_fire_bt); 792 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 793 794 sbt = bttosbt(vlapic->timer_period_bt); 795 callout_reset_sbt(&vlapic->callout, sbt, 0, 796 vlapic_callout_handler, vlapic, 0); 797 } else 798 callout_stop(&vlapic->callout); 799 800 VLAPIC_TIMER_UNLOCK(vlapic); 801 } 802 803 /* 804 * This function populates 'dmask' with the set of vcpus that match the 805 * addressing specified by the (dest, phys, lowprio) tuple. 806 * 807 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 808 * or xAPIC (8-bit) destination field. 809 */ 810 static void 811 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 812 bool lowprio, bool x2apic_dest) 813 { 814 struct vlapic *vlapic; 815 uint32_t dfr, ldr, ldest, cluster; 816 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 817 cpuset_t amask; 818 int vcpuid; 819 820 if ((x2apic_dest && dest == 0xffffffff) || 821 (!x2apic_dest && dest == 0xff)) { 822 /* 823 * Broadcast in both logical and physical modes. 824 */ 825 *dmask = vm_active_cpus(vm); 826 return; 827 } 828 829 if (phys) { 830 /* 831 * Physical mode: destination is APIC ID. 832 */ 833 CPU_ZERO(dmask); 834 vcpuid = vm_apicid2vcpuid(vm, dest); 835 amask = vm_active_cpus(vm); 836 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 837 CPU_SET(vcpuid, dmask); 838 } else { 839 /* 840 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 841 * bitmask. This model is only available in the xAPIC mode. 842 */ 843 mda_flat_ldest = dest & 0xff; 844 845 /* 846 * In the "Cluster Model" the MDA is used to identify a 847 * specific cluster and a set of APICs in that cluster. 848 */ 849 if (x2apic_dest) { 850 mda_cluster_id = dest >> 16; 851 mda_cluster_ldest = dest & 0xffff; 852 } else { 853 mda_cluster_id = (dest >> 4) & 0xf; 854 mda_cluster_ldest = dest & 0xf; 855 } 856 857 /* 858 * Logical mode: match each APIC that has a bit set 859 * in its LDR that matches a bit in the ldest. 860 */ 861 CPU_ZERO(dmask); 862 amask = vm_active_cpus(vm); 863 while ((vcpuid = CPU_FFS(&amask)) != 0) { 864 vcpuid--; 865 CPU_CLR(vcpuid, &amask); 866 867 vlapic = vm_lapic(vm, vcpuid); 868 dfr = vlapic->apic_page->dfr; 869 ldr = vlapic->apic_page->ldr; 870 871 if ((dfr & APIC_DFR_MODEL_MASK) == 872 APIC_DFR_MODEL_FLAT) { 873 ldest = ldr >> 24; 874 mda_ldest = mda_flat_ldest; 875 } else if ((dfr & APIC_DFR_MODEL_MASK) == 876 APIC_DFR_MODEL_CLUSTER) { 877 if (x2apic(vlapic)) { 878 cluster = ldr >> 16; 879 ldest = ldr & 0xffff; 880 } else { 881 cluster = ldr >> 28; 882 ldest = (ldr >> 24) & 0xf; 883 } 884 if (cluster != mda_cluster_id) 885 continue; 886 mda_ldest = mda_cluster_ldest; 887 } else { 888 /* 889 * Guest has configured a bad logical 890 * model for this vcpu - skip it. 891 */ 892 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 893 "model %x - cannot deliver interrupt", dfr); 894 continue; 895 } 896 897 if ((mda_ldest & ldest) != 0) { 898 CPU_SET(vcpuid, dmask); 899 if (lowprio) 900 break; 901 } 902 } 903 } 904 } 905 906 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 907 908 static void 909 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 910 { 911 struct LAPIC *lapic = vlapic->apic_page; 912 913 if (lapic->tpr != val) { 914 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 915 "from %#x to %#x", lapic->tpr, val); 916 lapic->tpr = val; 917 vlapic_update_ppr(vlapic); 918 } 919 } 920 921 static uint8_t 922 vlapic_get_tpr(struct vlapic *vlapic) 923 { 924 struct LAPIC *lapic = vlapic->apic_page; 925 926 return (lapic->tpr); 927 } 928 929 void 930 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 931 { 932 uint8_t tpr; 933 934 if (val & ~0xf) { 935 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 936 return; 937 } 938 939 tpr = val << 4; 940 vlapic_set_tpr(vlapic, tpr); 941 } 942 943 uint64_t 944 vlapic_get_cr8(struct vlapic *vlapic) 945 { 946 uint8_t tpr; 947 948 tpr = vlapic_get_tpr(vlapic); 949 return (tpr >> 4); 950 } 951 952 int 953 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 954 { 955 int i; 956 bool phys; 957 cpuset_t dmask; 958 uint64_t icrval; 959 uint32_t dest, vec, mode; 960 struct vlapic *vlapic2; 961 struct vm_exit *vmexit; 962 struct LAPIC *lapic; 963 uint16_t maxcpus; 964 965 lapic = vlapic->apic_page; 966 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 967 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 968 969 if (x2apic(vlapic)) 970 dest = icrval >> 32; 971 else 972 dest = icrval >> (32 + 24); 973 vec = icrval & APIC_VECTOR_MASK; 974 mode = icrval & APIC_DELMODE_MASK; 975 976 if (mode == APIC_DELMODE_FIXED && vec < 16) { 977 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 978 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 979 return (0); 980 } 981 982 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 983 984 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 985 switch (icrval & APIC_DEST_MASK) { 986 case APIC_DEST_DESTFLD: 987 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 988 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 989 x2apic(vlapic)); 990 break; 991 case APIC_DEST_SELF: 992 CPU_SETOF(vlapic->vcpuid, &dmask); 993 break; 994 case APIC_DEST_ALLISELF: 995 dmask = vm_active_cpus(vlapic->vm); 996 break; 997 case APIC_DEST_ALLESELF: 998 dmask = vm_active_cpus(vlapic->vm); 999 CPU_CLR(vlapic->vcpuid, &dmask); 1000 break; 1001 default: 1002 CPU_ZERO(&dmask); /* satisfy gcc */ 1003 break; 1004 } 1005 1006 while ((i = CPU_FFS(&dmask)) != 0) { 1007 i--; 1008 CPU_CLR(i, &dmask); 1009 if (mode == APIC_DELMODE_FIXED) { 1010 lapic_intr_edge(vlapic->vm, i, vec); 1011 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1012 IPIS_SENT, i, 1); 1013 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 1014 "to vcpuid %d", vec, i); 1015 } else { 1016 vm_inject_nmi(vlapic->vm, i); 1017 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 1018 "to vcpuid %d", i); 1019 } 1020 } 1021 1022 return (0); /* handled completely in the kernel */ 1023 } 1024 1025 maxcpus = vm_get_maxcpus(vlapic->vm); 1026 if (mode == APIC_DELMODE_INIT) { 1027 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 1028 return (0); 1029 1030 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1031 vlapic2 = vm_lapic(vlapic->vm, dest); 1032 1033 /* move from INIT to waiting-for-SIPI state */ 1034 if (vlapic2->boot_state == BS_INIT) { 1035 vlapic2->boot_state = BS_SIPI; 1036 } 1037 1038 return (0); 1039 } 1040 } 1041 1042 if (mode == APIC_DELMODE_STARTUP) { 1043 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1044 vlapic2 = vm_lapic(vlapic->vm, dest); 1045 1046 /* 1047 * Ignore SIPIs in any state other than wait-for-SIPI 1048 */ 1049 if (vlapic2->boot_state != BS_SIPI) 1050 return (0); 1051 1052 vlapic2->boot_state = BS_RUNNING; 1053 1054 *retu = true; 1055 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1056 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1057 vmexit->u.spinup_ap.vcpu = dest; 1058 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1059 1060 return (0); 1061 } 1062 } 1063 1064 /* 1065 * This will cause a return to userland. 1066 */ 1067 return (1); 1068 } 1069 1070 void 1071 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1072 { 1073 int vec; 1074 1075 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1076 1077 vec = val & 0xff; 1078 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1079 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1080 vlapic->vcpuid, 1); 1081 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1082 } 1083 1084 int 1085 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1086 { 1087 struct LAPIC *lapic = vlapic->apic_page; 1088 int idx, i, bitpos, vector; 1089 uint32_t *irrptr, val; 1090 1091 vlapic_update_ppr(vlapic); 1092 1093 if (vlapic->ops.pending_intr) 1094 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1095 1096 irrptr = &lapic->irr0; 1097 1098 for (i = 7; i >= 0; i--) { 1099 idx = i * 4; 1100 val = atomic_load_acq_int(&irrptr[idx]); 1101 bitpos = fls(val); 1102 if (bitpos != 0) { 1103 vector = i * 32 + (bitpos - 1); 1104 if (PRIO(vector) > PRIO(lapic->ppr)) { 1105 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1106 if (vecptr != NULL) 1107 *vecptr = vector; 1108 return (1); 1109 } else 1110 break; 1111 } 1112 } 1113 return (0); 1114 } 1115 1116 void 1117 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1118 { 1119 struct LAPIC *lapic = vlapic->apic_page; 1120 uint32_t *irrptr, *isrptr; 1121 int idx, stk_top; 1122 1123 if (vlapic->ops.intr_accepted) 1124 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1125 1126 /* 1127 * clear the ready bit for vector being accepted in irr 1128 * and set the vector as in service in isr. 1129 */ 1130 idx = (vector / 32) * 4; 1131 1132 irrptr = &lapic->irr0; 1133 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1134 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1135 1136 isrptr = &lapic->isr0; 1137 isrptr[idx] |= 1 << (vector % 32); 1138 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1139 1140 /* 1141 * Update the PPR 1142 */ 1143 vlapic->isrvec_stk_top++; 1144 1145 stk_top = vlapic->isrvec_stk_top; 1146 if (stk_top >= ISRVEC_STK_SIZE) 1147 panic("isrvec_stk_top overflow %d", stk_top); 1148 1149 vlapic->isrvec_stk[stk_top] = vector; 1150 } 1151 1152 void 1153 vlapic_svr_write_handler(struct vlapic *vlapic) 1154 { 1155 struct LAPIC *lapic; 1156 uint32_t old, new, changed; 1157 1158 lapic = vlapic->apic_page; 1159 1160 new = lapic->svr; 1161 old = vlapic->svr_last; 1162 vlapic->svr_last = new; 1163 1164 changed = old ^ new; 1165 if ((changed & APIC_SVR_ENABLE) != 0) { 1166 if ((new & APIC_SVR_ENABLE) == 0) { 1167 /* 1168 * The apic is now disabled so stop the apic timer 1169 * and mask all the LVT entries. 1170 */ 1171 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1172 VLAPIC_TIMER_LOCK(vlapic); 1173 callout_stop(&vlapic->callout); 1174 VLAPIC_TIMER_UNLOCK(vlapic); 1175 vlapic_mask_lvts(vlapic); 1176 } else { 1177 /* 1178 * The apic is now enabled so restart the apic timer 1179 * if it is configured in periodic mode. 1180 */ 1181 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1182 if (vlapic_periodic_timer(vlapic)) 1183 vlapic_icrtmr_write_handler(vlapic); 1184 } 1185 } 1186 } 1187 1188 int 1189 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1190 uint64_t *data, bool *retu) 1191 { 1192 struct LAPIC *lapic = vlapic->apic_page; 1193 uint32_t *reg; 1194 int i; 1195 1196 /* Ignore MMIO accesses in x2APIC mode */ 1197 if (x2apic(vlapic) && mmio_access) { 1198 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1199 offset); 1200 *data = 0; 1201 goto done; 1202 } 1203 1204 if (!x2apic(vlapic) && !mmio_access) { 1205 /* 1206 * XXX Generate GP fault for MSR accesses in xAPIC mode 1207 */ 1208 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1209 "xAPIC mode", offset); 1210 *data = 0; 1211 goto done; 1212 } 1213 1214 if (offset > sizeof(*lapic)) { 1215 *data = 0; 1216 goto done; 1217 } 1218 1219 offset &= ~3; 1220 switch(offset) 1221 { 1222 case APIC_OFFSET_ID: 1223 *data = lapic->id; 1224 break; 1225 case APIC_OFFSET_VER: 1226 *data = lapic->version; 1227 break; 1228 case APIC_OFFSET_TPR: 1229 *data = vlapic_get_tpr(vlapic); 1230 break; 1231 case APIC_OFFSET_APR: 1232 *data = lapic->apr; 1233 break; 1234 case APIC_OFFSET_PPR: 1235 *data = lapic->ppr; 1236 break; 1237 case APIC_OFFSET_EOI: 1238 *data = lapic->eoi; 1239 break; 1240 case APIC_OFFSET_LDR: 1241 *data = lapic->ldr; 1242 break; 1243 case APIC_OFFSET_DFR: 1244 *data = lapic->dfr; 1245 break; 1246 case APIC_OFFSET_SVR: 1247 *data = lapic->svr; 1248 break; 1249 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1250 i = (offset - APIC_OFFSET_ISR0) >> 2; 1251 reg = &lapic->isr0; 1252 *data = *(reg + i); 1253 break; 1254 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1255 i = (offset - APIC_OFFSET_TMR0) >> 2; 1256 reg = &lapic->tmr0; 1257 *data = *(reg + i); 1258 break; 1259 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1260 i = (offset - APIC_OFFSET_IRR0) >> 2; 1261 reg = &lapic->irr0; 1262 *data = atomic_load_acq_int(reg + i); 1263 break; 1264 case APIC_OFFSET_ESR: 1265 *data = lapic->esr; 1266 break; 1267 case APIC_OFFSET_ICR_LOW: 1268 *data = lapic->icr_lo; 1269 if (x2apic(vlapic)) 1270 *data |= (uint64_t)lapic->icr_hi << 32; 1271 break; 1272 case APIC_OFFSET_ICR_HI: 1273 *data = lapic->icr_hi; 1274 break; 1275 case APIC_OFFSET_CMCI_LVT: 1276 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1277 *data = vlapic_get_lvt(vlapic, offset); 1278 #ifdef INVARIANTS 1279 reg = vlapic_get_lvtptr(vlapic, offset); 1280 KASSERT(*data == *reg, ("inconsistent lvt value at " 1281 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1282 #endif 1283 break; 1284 case APIC_OFFSET_TIMER_ICR: 1285 *data = lapic->icr_timer; 1286 break; 1287 case APIC_OFFSET_TIMER_CCR: 1288 *data = vlapic_get_ccr(vlapic); 1289 break; 1290 case APIC_OFFSET_TIMER_DCR: 1291 *data = lapic->dcr_timer; 1292 break; 1293 case APIC_OFFSET_SELF_IPI: 1294 /* 1295 * XXX generate a GP fault if vlapic is in x2apic mode 1296 */ 1297 *data = 0; 1298 break; 1299 case APIC_OFFSET_RRR: 1300 default: 1301 *data = 0; 1302 break; 1303 } 1304 done: 1305 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1306 return 0; 1307 } 1308 1309 int 1310 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1311 uint64_t data, bool *retu) 1312 { 1313 struct LAPIC *lapic = vlapic->apic_page; 1314 uint32_t *regptr; 1315 int retval; 1316 1317 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1318 ("vlapic_write: invalid offset %#lx", offset)); 1319 1320 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1321 offset, data); 1322 1323 if (offset > sizeof(*lapic)) 1324 return (0); 1325 1326 /* Ignore MMIO accesses in x2APIC mode */ 1327 if (x2apic(vlapic) && mmio_access) { 1328 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1329 "in x2APIC mode", data, offset); 1330 return (0); 1331 } 1332 1333 /* 1334 * XXX Generate GP fault for MSR accesses in xAPIC mode 1335 */ 1336 if (!x2apic(vlapic) && !mmio_access) { 1337 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1338 "in xAPIC mode", data, offset); 1339 return (0); 1340 } 1341 1342 retval = 0; 1343 switch(offset) 1344 { 1345 case APIC_OFFSET_ID: 1346 lapic->id = data; 1347 vlapic_id_write_handler(vlapic); 1348 break; 1349 case APIC_OFFSET_TPR: 1350 vlapic_set_tpr(vlapic, data & 0xff); 1351 break; 1352 case APIC_OFFSET_EOI: 1353 vlapic_process_eoi(vlapic); 1354 break; 1355 case APIC_OFFSET_LDR: 1356 lapic->ldr = data; 1357 vlapic_ldr_write_handler(vlapic); 1358 break; 1359 case APIC_OFFSET_DFR: 1360 lapic->dfr = data; 1361 vlapic_dfr_write_handler(vlapic); 1362 break; 1363 case APIC_OFFSET_SVR: 1364 lapic->svr = data; 1365 vlapic_svr_write_handler(vlapic); 1366 break; 1367 case APIC_OFFSET_ICR_LOW: 1368 lapic->icr_lo = data; 1369 if (x2apic(vlapic)) 1370 lapic->icr_hi = data >> 32; 1371 retval = vlapic_icrlo_write_handler(vlapic, retu); 1372 break; 1373 case APIC_OFFSET_ICR_HI: 1374 lapic->icr_hi = data; 1375 break; 1376 case APIC_OFFSET_CMCI_LVT: 1377 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1378 regptr = vlapic_get_lvtptr(vlapic, offset); 1379 *regptr = data; 1380 vlapic_lvt_write_handler(vlapic, offset); 1381 break; 1382 case APIC_OFFSET_TIMER_ICR: 1383 lapic->icr_timer = data; 1384 vlapic_icrtmr_write_handler(vlapic); 1385 break; 1386 1387 case APIC_OFFSET_TIMER_DCR: 1388 lapic->dcr_timer = data; 1389 vlapic_dcr_write_handler(vlapic); 1390 break; 1391 1392 case APIC_OFFSET_ESR: 1393 vlapic_esr_write_handler(vlapic); 1394 break; 1395 1396 case APIC_OFFSET_SELF_IPI: 1397 if (x2apic(vlapic)) 1398 vlapic_self_ipi_handler(vlapic, data); 1399 break; 1400 1401 case APIC_OFFSET_VER: 1402 case APIC_OFFSET_APR: 1403 case APIC_OFFSET_PPR: 1404 case APIC_OFFSET_RRR: 1405 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1406 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1407 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1408 case APIC_OFFSET_TIMER_CCR: 1409 default: 1410 // Read only. 1411 break; 1412 } 1413 1414 return (retval); 1415 } 1416 1417 static void 1418 vlapic_reset(struct vlapic *vlapic) 1419 { 1420 struct LAPIC *lapic; 1421 1422 lapic = vlapic->apic_page; 1423 bzero(lapic, sizeof(struct LAPIC)); 1424 1425 lapic->id = vlapic_get_id(vlapic); 1426 lapic->version = VLAPIC_VERSION; 1427 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1428 lapic->dfr = 0xffffffff; 1429 lapic->svr = APIC_SVR_VECTOR; 1430 vlapic_mask_lvts(vlapic); 1431 vlapic_reset_tmr(vlapic); 1432 1433 lapic->dcr_timer = 0; 1434 vlapic_dcr_write_handler(vlapic); 1435 1436 if (vlapic->vcpuid == 0) 1437 vlapic->boot_state = BS_RUNNING; /* BSP */ 1438 else 1439 vlapic->boot_state = BS_INIT; /* AP */ 1440 1441 vlapic->svr_last = lapic->svr; 1442 } 1443 1444 void 1445 vlapic_init(struct vlapic *vlapic) 1446 { 1447 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1448 KASSERT(vlapic->vcpuid >= 0 && 1449 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1450 ("vlapic_init: vcpuid is not initialized")); 1451 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1452 "initialized")); 1453 1454 /* 1455 * If the vlapic is configured in x2apic mode then it will be 1456 * accessed in the critical section via the MSR emulation code. 1457 * 1458 * Therefore the timer mutex must be a spinlock because blockable 1459 * mutexes cannot be acquired in a critical section. 1460 */ 1461 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1462 callout_init(&vlapic->callout, 1); 1463 1464 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1465 1466 if (vlapic->vcpuid == 0) 1467 vlapic->msr_apicbase |= APICBASE_BSP; 1468 1469 vlapic_reset(vlapic); 1470 } 1471 1472 void 1473 vlapic_cleanup(struct vlapic *vlapic) 1474 { 1475 1476 callout_drain(&vlapic->callout); 1477 } 1478 1479 uint64_t 1480 vlapic_get_apicbase(struct vlapic *vlapic) 1481 { 1482 1483 return (vlapic->msr_apicbase); 1484 } 1485 1486 int 1487 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1488 { 1489 1490 if (vlapic->msr_apicbase != new) { 1491 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1492 "not supported", vlapic->msr_apicbase, new); 1493 return (-1); 1494 } 1495 1496 return (0); 1497 } 1498 1499 void 1500 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1501 { 1502 struct vlapic *vlapic; 1503 struct LAPIC *lapic; 1504 1505 vlapic = vm_lapic(vm, vcpuid); 1506 1507 if (state == X2APIC_DISABLED) 1508 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1509 else 1510 vlapic->msr_apicbase |= APICBASE_X2APIC; 1511 1512 /* 1513 * Reset the local APIC registers whose values are mode-dependent. 1514 * 1515 * XXX this works because the APIC mode can be changed only at vcpu 1516 * initialization time. 1517 */ 1518 lapic = vlapic->apic_page; 1519 lapic->id = vlapic_get_id(vlapic); 1520 if (x2apic(vlapic)) { 1521 lapic->ldr = x2apic_ldr(vlapic); 1522 lapic->dfr = 0; 1523 } else { 1524 lapic->ldr = 0; 1525 lapic->dfr = 0xffffffff; 1526 } 1527 1528 if (state == X2APIC_ENABLED) { 1529 if (vlapic->ops.enable_x2apic_mode) 1530 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1531 } 1532 } 1533 1534 void 1535 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1536 int delmode, int vec) 1537 { 1538 bool lowprio; 1539 int vcpuid; 1540 cpuset_t dmask; 1541 1542 if (delmode != IOART_DELFIXED && 1543 delmode != IOART_DELLOPRI && 1544 delmode != IOART_DELEXINT) { 1545 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1546 return; 1547 } 1548 lowprio = (delmode == IOART_DELLOPRI); 1549 1550 /* 1551 * We don't provide any virtual interrupt redirection hardware so 1552 * all interrupts originating from the ioapic or MSI specify the 1553 * 'dest' in the legacy xAPIC format. 1554 */ 1555 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1556 1557 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1558 vcpuid--; 1559 CPU_CLR(vcpuid, &dmask); 1560 if (delmode == IOART_DELEXINT) { 1561 vm_inject_extint(vm, vcpuid); 1562 } else { 1563 lapic_set_intr(vm, vcpuid, vec, level); 1564 } 1565 } 1566 } 1567 1568 void 1569 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1570 { 1571 /* 1572 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1573 * 1574 * This is done by leveraging features like Posted Interrupts (Intel) 1575 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1576 * 1577 * If neither of these features are available then fallback to 1578 * sending an IPI to 'hostcpu'. 1579 */ 1580 if (vlapic->ops.post_intr) 1581 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1582 else 1583 ipi_cpu(hostcpu, ipinum); 1584 } 1585 1586 bool 1587 vlapic_enabled(struct vlapic *vlapic) 1588 { 1589 struct LAPIC *lapic = vlapic->apic_page; 1590 1591 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1592 (lapic->svr & APIC_SVR_ENABLE) != 0) 1593 return (true); 1594 else 1595 return (false); 1596 } 1597 1598 static void 1599 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1600 { 1601 struct LAPIC *lapic; 1602 uint32_t *tmrptr, mask; 1603 int idx; 1604 1605 lapic = vlapic->apic_page; 1606 tmrptr = &lapic->tmr0; 1607 idx = (vector / 32) * 4; 1608 mask = 1 << (vector % 32); 1609 if (level) 1610 tmrptr[idx] |= mask; 1611 else 1612 tmrptr[idx] &= ~mask; 1613 1614 if (vlapic->ops.set_tmr != NULL) 1615 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1616 } 1617 1618 void 1619 vlapic_reset_tmr(struct vlapic *vlapic) 1620 { 1621 int vector; 1622 1623 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1624 1625 for (vector = 0; vector <= 255; vector++) 1626 vlapic_set_tmr(vlapic, vector, false); 1627 } 1628 1629 void 1630 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1631 int delmode, int vector) 1632 { 1633 cpuset_t dmask; 1634 bool lowprio; 1635 1636 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1637 1638 /* 1639 * A level trigger is valid only for fixed and lowprio delivery modes. 1640 */ 1641 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1642 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1643 "delivery-mode %d", delmode); 1644 return; 1645 } 1646 1647 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1648 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1649 1650 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1651 return; 1652 1653 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1654 vlapic_set_tmr(vlapic, vector, true); 1655 } 1656 1657 #ifdef BHYVE_SNAPSHOT 1658 static void 1659 vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr) 1660 { 1661 /* The implementation is similar to the one in the 1662 * `vlapic_icrtmr_write_handler` function 1663 */ 1664 sbintime_t sbt; 1665 struct bintime bt; 1666 1667 VLAPIC_TIMER_LOCK(vlapic); 1668 1669 bt = vlapic->timer_freq_bt; 1670 bintime_mul(&bt, ccr); 1671 1672 if (ccr != 0) { 1673 binuptime(&vlapic->timer_fire_bt); 1674 bintime_add(&vlapic->timer_fire_bt, &bt); 1675 1676 sbt = bttosbt(bt); 1677 callout_reset_sbt(&vlapic->callout, sbt, 0, 1678 vlapic_callout_handler, vlapic, 0); 1679 } else { 1680 /* even if the CCR was 0, periodic timers should be reset */ 1681 if (vlapic_periodic_timer(vlapic)) { 1682 binuptime(&vlapic->timer_fire_bt); 1683 bintime_add(&vlapic->timer_fire_bt, 1684 &vlapic->timer_period_bt); 1685 sbt = bttosbt(vlapic->timer_period_bt); 1686 1687 callout_stop(&vlapic->callout); 1688 callout_reset_sbt(&vlapic->callout, sbt, 0, 1689 vlapic_callout_handler, vlapic, 0); 1690 } 1691 } 1692 1693 VLAPIC_TIMER_UNLOCK(vlapic); 1694 } 1695 1696 int 1697 vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) 1698 { 1699 int i, ret; 1700 struct vlapic *vlapic; 1701 struct LAPIC *lapic; 1702 uint32_t ccr; 1703 1704 KASSERT(vm != NULL, ("%s: arg was NULL", __func__)); 1705 1706 ret = 0; 1707 1708 for (i = 0; i < VM_MAXCPU; i++) { 1709 vlapic = vm_lapic(vm, i); 1710 1711 /* snapshot the page first; timer period depends on icr_timer */ 1712 lapic = vlapic->apic_page; 1713 SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done); 1714 1715 SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); 1716 1717 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, 1718 meta, ret, done); 1719 SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, 1720 meta, ret, done); 1721 1722 /* 1723 * Timer period is equal to 'icr_timer' ticks at a frequency of 1724 * 'timer_freq_bt'. 1725 */ 1726 if (meta->op == VM_SNAPSHOT_RESTORE) { 1727 vlapic->timer_period_bt = vlapic->timer_freq_bt; 1728 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 1729 } 1730 1731 SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, 1732 sizeof(vlapic->isrvec_stk), 1733 meta, ret, done); 1734 SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); 1735 SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done); 1736 1737 SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, 1738 sizeof(vlapic->lvt_last), 1739 meta, ret, done); 1740 1741 if (meta->op == VM_SNAPSHOT_SAVE) 1742 ccr = vlapic_get_ccr(vlapic); 1743 1744 SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); 1745 1746 if (meta->op == VM_SNAPSHOT_RESTORE) { 1747 /* Reset the value of the 'timer_fire_bt' and the vlapic 1748 * callout based on the value of the current count 1749 * register saved when the VM snapshot was created 1750 */ 1751 vlapic_reset_callout(vlapic, ccr); 1752 } 1753 } 1754 1755 done: 1756 return (ret); 1757 } 1758 #endif 1759