1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/systm.h> 41 #include <sys/smp.h> 42 43 #include <x86/specialreg.h> 44 #include <x86/apicreg.h> 45 46 #include <machine/clock.h> 47 #include <machine/smp.h> 48 49 #include <machine/vmm.h> 50 51 #include "vmm_lapic.h" 52 #include "vmm_ktr.h" 53 #include "vmm_stat.h" 54 55 #include "vlapic.h" 56 #include "vlapic_priv.h" 57 #include "vioapic.h" 58 59 #define PRIO(x) ((x) >> 4) 60 61 #define VLAPIC_VERSION (16) 62 63 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 64 65 /* 66 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 67 * vlapic_callout_handler() and vcpu accesses to: 68 * - timer_freq_bt, timer_period_bt, timer_fire_bt 69 * - timer LVT register 70 */ 71 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 72 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 73 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 74 75 /* 76 * APIC timer frequency: 77 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 78 * - power-of-two to avoid loss of precision when converted to a bintime. 79 */ 80 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 81 82 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 83 84 static __inline uint32_t 85 vlapic_get_id(struct vlapic *vlapic) 86 { 87 88 if (x2apic(vlapic)) 89 return (vlapic->vcpuid); 90 else 91 return (vlapic->vcpuid << 24); 92 } 93 94 static uint32_t 95 x2apic_ldr(struct vlapic *vlapic) 96 { 97 int apicid; 98 uint32_t ldr; 99 100 apicid = vlapic_get_id(vlapic); 101 ldr = 1 << (apicid & 0xf); 102 ldr |= (apicid & 0xffff0) << 12; 103 return (ldr); 104 } 105 106 void 107 vlapic_dfr_write_handler(struct vlapic *vlapic) 108 { 109 struct LAPIC *lapic; 110 111 lapic = vlapic->apic_page; 112 if (x2apic(vlapic)) { 113 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 114 lapic->dfr); 115 lapic->dfr = 0; 116 return; 117 } 118 119 lapic->dfr &= APIC_DFR_MODEL_MASK; 120 lapic->dfr |= APIC_DFR_RESERVED; 121 122 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 123 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 124 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 125 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 126 else 127 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 128 } 129 130 void 131 vlapic_ldr_write_handler(struct vlapic *vlapic) 132 { 133 struct LAPIC *lapic; 134 135 lapic = vlapic->apic_page; 136 137 /* LDR is read-only in x2apic mode */ 138 if (x2apic(vlapic)) { 139 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 140 lapic->ldr); 141 lapic->ldr = x2apic_ldr(vlapic); 142 } else { 143 lapic->ldr &= ~APIC_LDR_RESERVED; 144 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 145 } 146 } 147 148 void 149 vlapic_id_write_handler(struct vlapic *vlapic) 150 { 151 struct LAPIC *lapic; 152 153 /* 154 * We don't allow the ID register to be modified so reset it back to 155 * its default value. 156 */ 157 lapic = vlapic->apic_page; 158 lapic->id = vlapic_get_id(vlapic); 159 } 160 161 static int 162 vlapic_timer_divisor(uint32_t dcr) 163 { 164 switch (dcr & 0xB) { 165 case APIC_TDCR_1: 166 return (1); 167 case APIC_TDCR_2: 168 return (2); 169 case APIC_TDCR_4: 170 return (4); 171 case APIC_TDCR_8: 172 return (8); 173 case APIC_TDCR_16: 174 return (16); 175 case APIC_TDCR_32: 176 return (32); 177 case APIC_TDCR_64: 178 return (64); 179 case APIC_TDCR_128: 180 return (128); 181 default: 182 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 183 } 184 } 185 186 #if 0 187 static inline void 188 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 189 { 190 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 191 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 192 *lvt & APIC_LVTT_M); 193 } 194 #endif 195 196 static uint32_t 197 vlapic_get_ccr(struct vlapic *vlapic) 198 { 199 struct bintime bt_now, bt_rem; 200 struct LAPIC *lapic; 201 uint32_t ccr; 202 203 ccr = 0; 204 lapic = vlapic->apic_page; 205 206 VLAPIC_TIMER_LOCK(vlapic); 207 if (callout_active(&vlapic->callout)) { 208 /* 209 * If the timer is scheduled to expire in the future then 210 * compute the value of 'ccr' based on the remaining time. 211 */ 212 binuptime(&bt_now); 213 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 214 bt_rem = vlapic->timer_fire_bt; 215 bintime_sub(&bt_rem, &bt_now); 216 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 217 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 218 } 219 } 220 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 221 "icr_timer is %#x", ccr, lapic->icr_timer)); 222 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 223 ccr, lapic->icr_timer); 224 VLAPIC_TIMER_UNLOCK(vlapic); 225 return (ccr); 226 } 227 228 void 229 vlapic_dcr_write_handler(struct vlapic *vlapic) 230 { 231 struct LAPIC *lapic; 232 int divisor; 233 234 lapic = vlapic->apic_page; 235 VLAPIC_TIMER_LOCK(vlapic); 236 237 divisor = vlapic_timer_divisor(lapic->dcr_timer); 238 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 239 lapic->dcr_timer, divisor); 240 241 /* 242 * Update the timer frequency and the timer period. 243 * 244 * XXX changes to the frequency divider will not take effect until 245 * the timer is reloaded. 246 */ 247 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 248 vlapic->timer_period_bt = vlapic->timer_freq_bt; 249 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 250 251 VLAPIC_TIMER_UNLOCK(vlapic); 252 } 253 254 void 255 vlapic_esr_write_handler(struct vlapic *vlapic) 256 { 257 struct LAPIC *lapic; 258 259 lapic = vlapic->apic_page; 260 lapic->esr = vlapic->esr_pending; 261 vlapic->esr_pending = 0; 262 } 263 264 int 265 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 266 { 267 struct LAPIC *lapic; 268 uint32_t *irrptr, *tmrptr, mask; 269 int idx; 270 271 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 272 273 lapic = vlapic->apic_page; 274 if (!(lapic->svr & APIC_SVR_ENABLE)) { 275 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 276 "interrupt %d", vector); 277 return (0); 278 } 279 280 if (vector < 16) { 281 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 282 false); 283 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 284 vector); 285 return (1); 286 } 287 288 if (vlapic->ops.set_intr_ready) 289 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 290 291 idx = (vector / 32) * 4; 292 mask = 1 << (vector % 32); 293 294 irrptr = &lapic->irr0; 295 atomic_set_int(&irrptr[idx], mask); 296 297 /* 298 * Verify that the trigger-mode of the interrupt matches with 299 * the vlapic TMR registers. 300 */ 301 tmrptr = &lapic->tmr0; 302 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 303 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 304 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 305 level ? "level" : "edge"); 306 } 307 308 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 309 return (1); 310 } 311 312 static __inline uint32_t * 313 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 314 { 315 struct LAPIC *lapic = vlapic->apic_page; 316 int i; 317 318 switch (offset) { 319 case APIC_OFFSET_CMCI_LVT: 320 return (&lapic->lvt_cmci); 321 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 322 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 323 return ((&lapic->lvt_timer) + i); 324 default: 325 panic("vlapic_get_lvt: invalid LVT\n"); 326 } 327 } 328 329 static __inline int 330 lvt_off_to_idx(uint32_t offset) 331 { 332 int index; 333 334 switch (offset) { 335 case APIC_OFFSET_CMCI_LVT: 336 index = APIC_LVT_CMCI; 337 break; 338 case APIC_OFFSET_TIMER_LVT: 339 index = APIC_LVT_TIMER; 340 break; 341 case APIC_OFFSET_THERM_LVT: 342 index = APIC_LVT_THERMAL; 343 break; 344 case APIC_OFFSET_PERF_LVT: 345 index = APIC_LVT_PMC; 346 break; 347 case APIC_OFFSET_LINT0_LVT: 348 index = APIC_LVT_LINT0; 349 break; 350 case APIC_OFFSET_LINT1_LVT: 351 index = APIC_LVT_LINT1; 352 break; 353 case APIC_OFFSET_ERROR_LVT: 354 index = APIC_LVT_ERROR; 355 break; 356 default: 357 index = -1; 358 break; 359 } 360 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 361 "invalid lvt index %d for offset %#x", index, offset)); 362 363 return (index); 364 } 365 366 static __inline uint32_t 367 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 368 { 369 int idx; 370 uint32_t val; 371 372 idx = lvt_off_to_idx(offset); 373 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 374 return (val); 375 } 376 377 void 378 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 379 { 380 uint32_t *lvtptr, mask, val; 381 struct LAPIC *lapic; 382 int idx; 383 384 lapic = vlapic->apic_page; 385 lvtptr = vlapic_get_lvtptr(vlapic, offset); 386 val = *lvtptr; 387 idx = lvt_off_to_idx(offset); 388 389 if (!(lapic->svr & APIC_SVR_ENABLE)) 390 val |= APIC_LVT_M; 391 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 392 switch (offset) { 393 case APIC_OFFSET_TIMER_LVT: 394 mask |= APIC_LVTT_TM; 395 break; 396 case APIC_OFFSET_ERROR_LVT: 397 break; 398 case APIC_OFFSET_LINT0_LVT: 399 case APIC_OFFSET_LINT1_LVT: 400 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 401 /* FALLTHROUGH */ 402 default: 403 mask |= APIC_LVT_DM; 404 break; 405 } 406 val &= mask; 407 *lvtptr = val; 408 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 409 } 410 411 static void 412 vlapic_mask_lvts(struct vlapic *vlapic) 413 { 414 struct LAPIC *lapic = vlapic->apic_page; 415 416 lapic->lvt_cmci |= APIC_LVT_M; 417 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 418 419 lapic->lvt_timer |= APIC_LVT_M; 420 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 421 422 lapic->lvt_thermal |= APIC_LVT_M; 423 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 424 425 lapic->lvt_pcint |= APIC_LVT_M; 426 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 427 428 lapic->lvt_lint0 |= APIC_LVT_M; 429 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 430 431 lapic->lvt_lint1 |= APIC_LVT_M; 432 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 433 434 lapic->lvt_error |= APIC_LVT_M; 435 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 436 } 437 438 static int 439 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt) 440 { 441 uint32_t mode, reg, vec; 442 443 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 444 445 if (reg & APIC_LVT_M) 446 return (0); 447 vec = reg & APIC_LVT_VECTOR; 448 mode = reg & APIC_LVT_DM; 449 450 switch (mode) { 451 case APIC_LVT_DM_FIXED: 452 if (vec < 16) { 453 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 454 lvt == APIC_LVT_ERROR); 455 return (0); 456 } 457 if (vlapic_set_intr_ready(vlapic, vec, false)) 458 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 459 break; 460 case APIC_LVT_DM_NMI: 461 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 462 break; 463 case APIC_LVT_DM_EXTINT: 464 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 465 break; 466 default: 467 // Other modes ignored 468 return (0); 469 } 470 return (1); 471 } 472 473 #if 1 474 static void 475 dump_isrvec_stk(struct vlapic *vlapic) 476 { 477 int i; 478 uint32_t *isrptr; 479 480 isrptr = &vlapic->apic_page->isr0; 481 for (i = 0; i < 8; i++) 482 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 483 484 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 485 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 486 } 487 #endif 488 489 /* 490 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 491 * in Intel Architecture Manual Vol 3a. 492 */ 493 static void 494 vlapic_update_ppr(struct vlapic *vlapic) 495 { 496 int isrvec, tpr, ppr; 497 498 /* 499 * Note that the value on the stack at index 0 is always 0. 500 * 501 * This is a placeholder for the value of ISRV when none of the 502 * bits is set in the ISRx registers. 503 */ 504 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 505 tpr = vlapic->apic_page->tpr; 506 507 #if 1 508 { 509 int i, lastprio, curprio, vector, idx; 510 uint32_t *isrptr; 511 512 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 513 panic("isrvec_stk is corrupted: %d", isrvec); 514 515 /* 516 * Make sure that the priority of the nested interrupts is 517 * always increasing. 518 */ 519 lastprio = -1; 520 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 521 curprio = PRIO(vlapic->isrvec_stk[i]); 522 if (curprio <= lastprio) { 523 dump_isrvec_stk(vlapic); 524 panic("isrvec_stk does not satisfy invariant"); 525 } 526 lastprio = curprio; 527 } 528 529 /* 530 * Make sure that each bit set in the ISRx registers has a 531 * corresponding entry on the isrvec stack. 532 */ 533 i = 1; 534 isrptr = &vlapic->apic_page->isr0; 535 for (vector = 0; vector < 256; vector++) { 536 idx = (vector / 32) * 4; 537 if (isrptr[idx] & (1 << (vector % 32))) { 538 if (i > vlapic->isrvec_stk_top || 539 vlapic->isrvec_stk[i] != vector) { 540 dump_isrvec_stk(vlapic); 541 panic("ISR and isrvec_stk out of sync"); 542 } 543 i++; 544 } 545 } 546 } 547 #endif 548 549 if (PRIO(tpr) >= PRIO(isrvec)) 550 ppr = tpr; 551 else 552 ppr = isrvec & 0xf0; 553 554 vlapic->apic_page->ppr = ppr; 555 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 556 } 557 558 void 559 vlapic_sync_tpr(struct vlapic *vlapic) 560 { 561 vlapic_update_ppr(vlapic); 562 } 563 564 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 565 566 static void 567 vlapic_process_eoi(struct vlapic *vlapic) 568 { 569 struct LAPIC *lapic = vlapic->apic_page; 570 uint32_t *isrptr, *tmrptr; 571 int i, idx, bitpos, vector; 572 573 isrptr = &lapic->isr0; 574 tmrptr = &lapic->tmr0; 575 576 for (i = 7; i >= 0; i--) { 577 idx = i * 4; 578 bitpos = fls(isrptr[idx]); 579 if (bitpos-- != 0) { 580 if (vlapic->isrvec_stk_top <= 0) { 581 panic("invalid vlapic isrvec_stk_top %d", 582 vlapic->isrvec_stk_top); 583 } 584 isrptr[idx] &= ~(1 << bitpos); 585 vector = i * 32 + bitpos; 586 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 587 vector); 588 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 589 vlapic->isrvec_stk_top--; 590 vlapic_update_ppr(vlapic); 591 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 592 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 593 vector); 594 } 595 return; 596 } 597 } 598 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 599 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 600 } 601 602 static __inline int 603 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 604 { 605 606 return (lvt & mask); 607 } 608 609 static __inline int 610 vlapic_periodic_timer(struct vlapic *vlapic) 611 { 612 uint32_t lvt; 613 614 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 615 616 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 617 } 618 619 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 620 621 static void 622 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 623 { 624 625 vlapic->esr_pending |= mask; 626 627 /* 628 * Avoid infinite recursion if the error LVT itself is configured with 629 * an illegal vector. 630 */ 631 if (lvt_error) 632 return; 633 634 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 635 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 636 } 637 } 638 639 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 640 641 static void 642 vlapic_fire_timer(struct vlapic *vlapic) 643 { 644 645 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 646 647 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 648 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 649 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 650 } 651 } 652 653 static VMM_STAT(VLAPIC_INTR_CMC, 654 "corrected machine check interrupts generated by vlapic"); 655 656 void 657 vlapic_fire_cmci(struct vlapic *vlapic) 658 { 659 660 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 661 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 662 } 663 } 664 665 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 666 "lvts triggered"); 667 668 int 669 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 670 { 671 672 if (vlapic_enabled(vlapic) == false) { 673 /* 674 * When the local APIC is global/hardware disabled, 675 * LINT[1:0] pins are configured as INTR and NMI pins, 676 * respectively. 677 */ 678 switch (vector) { 679 case APIC_LVT_LINT0: 680 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 681 break; 682 case APIC_LVT_LINT1: 683 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 684 break; 685 default: 686 break; 687 } 688 return (0); 689 } 690 691 switch (vector) { 692 case APIC_LVT_LINT0: 693 case APIC_LVT_LINT1: 694 case APIC_LVT_TIMER: 695 case APIC_LVT_ERROR: 696 case APIC_LVT_PMC: 697 case APIC_LVT_THERMAL: 698 case APIC_LVT_CMCI: 699 if (vlapic_fire_lvt(vlapic, vector)) { 700 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 701 LVTS_TRIGGERRED, vector, 1); 702 } 703 break; 704 default: 705 return (EINVAL); 706 } 707 return (0); 708 } 709 710 static void 711 vlapic_callout_handler(void *arg) 712 { 713 struct vlapic *vlapic; 714 struct bintime bt, btnow; 715 sbintime_t rem_sbt; 716 717 vlapic = arg; 718 719 VLAPIC_TIMER_LOCK(vlapic); 720 if (callout_pending(&vlapic->callout)) /* callout was reset */ 721 goto done; 722 723 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 724 goto done; 725 726 callout_deactivate(&vlapic->callout); 727 728 vlapic_fire_timer(vlapic); 729 730 if (vlapic_periodic_timer(vlapic)) { 731 binuptime(&btnow); 732 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 733 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 734 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 735 vlapic->timer_fire_bt.frac)); 736 737 /* 738 * Compute the delta between when the timer was supposed to 739 * fire and the present time. 740 */ 741 bt = btnow; 742 bintime_sub(&bt, &vlapic->timer_fire_bt); 743 744 rem_sbt = bttosbt(vlapic->timer_period_bt); 745 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 746 /* 747 * Adjust the time until the next countdown downward 748 * to account for the lost time. 749 */ 750 rem_sbt -= bttosbt(bt); 751 } else { 752 /* 753 * If the delta is greater than the timer period then 754 * just reset our time base instead of trying to catch 755 * up. 756 */ 757 vlapic->timer_fire_bt = btnow; 758 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 759 "usecs, period is %lu usecs - resetting time base", 760 bttosbt(bt) / SBT_1US, 761 bttosbt(vlapic->timer_period_bt) / SBT_1US); 762 } 763 764 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 765 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 766 vlapic_callout_handler, vlapic, 0); 767 } 768 done: 769 VLAPIC_TIMER_UNLOCK(vlapic); 770 } 771 772 void 773 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 774 { 775 struct LAPIC *lapic; 776 sbintime_t sbt; 777 uint32_t icr_timer; 778 779 VLAPIC_TIMER_LOCK(vlapic); 780 781 lapic = vlapic->apic_page; 782 icr_timer = lapic->icr_timer; 783 784 vlapic->timer_period_bt = vlapic->timer_freq_bt; 785 bintime_mul(&vlapic->timer_period_bt, icr_timer); 786 787 if (icr_timer != 0) { 788 binuptime(&vlapic->timer_fire_bt); 789 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 790 791 sbt = bttosbt(vlapic->timer_period_bt); 792 callout_reset_sbt(&vlapic->callout, sbt, 0, 793 vlapic_callout_handler, vlapic, 0); 794 } else 795 callout_stop(&vlapic->callout); 796 797 VLAPIC_TIMER_UNLOCK(vlapic); 798 } 799 800 /* 801 * This function populates 'dmask' with the set of vcpus that match the 802 * addressing specified by the (dest, phys, lowprio) tuple. 803 * 804 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 805 * or xAPIC (8-bit) destination field. 806 */ 807 static void 808 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 809 bool lowprio, bool x2apic_dest) 810 { 811 struct vlapic *vlapic; 812 uint32_t dfr, ldr, ldest, cluster; 813 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 814 cpuset_t amask; 815 int vcpuid; 816 817 if ((x2apic_dest && dest == 0xffffffff) || 818 (!x2apic_dest && dest == 0xff)) { 819 /* 820 * Broadcast in both logical and physical modes. 821 */ 822 *dmask = vm_active_cpus(vm); 823 return; 824 } 825 826 if (phys) { 827 /* 828 * Physical mode: destination is APIC ID. 829 */ 830 CPU_ZERO(dmask); 831 vcpuid = vm_apicid2vcpuid(vm, dest); 832 amask = vm_active_cpus(vm); 833 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 834 CPU_SET(vcpuid, dmask); 835 } else { 836 /* 837 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 838 * bitmask. This model is only available in the xAPIC mode. 839 */ 840 mda_flat_ldest = dest & 0xff; 841 842 /* 843 * In the "Cluster Model" the MDA is used to identify a 844 * specific cluster and a set of APICs in that cluster. 845 */ 846 if (x2apic_dest) { 847 mda_cluster_id = dest >> 16; 848 mda_cluster_ldest = dest & 0xffff; 849 } else { 850 mda_cluster_id = (dest >> 4) & 0xf; 851 mda_cluster_ldest = dest & 0xf; 852 } 853 854 /* 855 * Logical mode: match each APIC that has a bit set 856 * in its LDR that matches a bit in the ldest. 857 */ 858 CPU_ZERO(dmask); 859 amask = vm_active_cpus(vm); 860 while ((vcpuid = CPU_FFS(&amask)) != 0) { 861 vcpuid--; 862 CPU_CLR(vcpuid, &amask); 863 864 vlapic = vm_lapic(vm, vcpuid); 865 dfr = vlapic->apic_page->dfr; 866 ldr = vlapic->apic_page->ldr; 867 868 if ((dfr & APIC_DFR_MODEL_MASK) == 869 APIC_DFR_MODEL_FLAT) { 870 ldest = ldr >> 24; 871 mda_ldest = mda_flat_ldest; 872 } else if ((dfr & APIC_DFR_MODEL_MASK) == 873 APIC_DFR_MODEL_CLUSTER) { 874 if (x2apic(vlapic)) { 875 cluster = ldr >> 16; 876 ldest = ldr & 0xffff; 877 } else { 878 cluster = ldr >> 28; 879 ldest = (ldr >> 24) & 0xf; 880 } 881 if (cluster != mda_cluster_id) 882 continue; 883 mda_ldest = mda_cluster_ldest; 884 } else { 885 /* 886 * Guest has configured a bad logical 887 * model for this vcpu - skip it. 888 */ 889 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 890 "model %x - cannot deliver interrupt", dfr); 891 continue; 892 } 893 894 if ((mda_ldest & ldest) != 0) { 895 CPU_SET(vcpuid, dmask); 896 if (lowprio) 897 break; 898 } 899 } 900 } 901 } 902 903 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 904 905 static void 906 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 907 { 908 struct LAPIC *lapic = vlapic->apic_page; 909 910 if (lapic->tpr != val) { 911 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 912 "from %#x to %#x", lapic->tpr, val); 913 lapic->tpr = val; 914 vlapic_update_ppr(vlapic); 915 } 916 } 917 918 static uint8_t 919 vlapic_get_tpr(struct vlapic *vlapic) 920 { 921 struct LAPIC *lapic = vlapic->apic_page; 922 923 return (lapic->tpr); 924 } 925 926 void 927 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 928 { 929 uint8_t tpr; 930 931 if (val & ~0xf) { 932 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 933 return; 934 } 935 936 tpr = val << 4; 937 vlapic_set_tpr(vlapic, tpr); 938 } 939 940 uint64_t 941 vlapic_get_cr8(struct vlapic *vlapic) 942 { 943 uint8_t tpr; 944 945 tpr = vlapic_get_tpr(vlapic); 946 return (tpr >> 4); 947 } 948 949 int 950 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 951 { 952 int i; 953 bool phys; 954 cpuset_t dmask; 955 uint64_t icrval; 956 uint32_t dest, vec, mode; 957 struct vlapic *vlapic2; 958 struct vm_exit *vmexit; 959 struct LAPIC *lapic; 960 uint16_t maxcpus; 961 962 lapic = vlapic->apic_page; 963 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 964 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 965 966 if (x2apic(vlapic)) 967 dest = icrval >> 32; 968 else 969 dest = icrval >> (32 + 24); 970 vec = icrval & APIC_VECTOR_MASK; 971 mode = icrval & APIC_DELMODE_MASK; 972 973 if (mode == APIC_DELMODE_FIXED && vec < 16) { 974 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 975 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 976 return (0); 977 } 978 979 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 980 981 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 982 switch (icrval & APIC_DEST_MASK) { 983 case APIC_DEST_DESTFLD: 984 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 985 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 986 x2apic(vlapic)); 987 break; 988 case APIC_DEST_SELF: 989 CPU_SETOF(vlapic->vcpuid, &dmask); 990 break; 991 case APIC_DEST_ALLISELF: 992 dmask = vm_active_cpus(vlapic->vm); 993 break; 994 case APIC_DEST_ALLESELF: 995 dmask = vm_active_cpus(vlapic->vm); 996 CPU_CLR(vlapic->vcpuid, &dmask); 997 break; 998 default: 999 CPU_ZERO(&dmask); /* satisfy gcc */ 1000 break; 1001 } 1002 1003 while ((i = CPU_FFS(&dmask)) != 0) { 1004 i--; 1005 CPU_CLR(i, &dmask); 1006 if (mode == APIC_DELMODE_FIXED) { 1007 lapic_intr_edge(vlapic->vm, i, vec); 1008 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1009 IPIS_SENT, i, 1); 1010 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 1011 "to vcpuid %d", vec, i); 1012 } else { 1013 vm_inject_nmi(vlapic->vm, i); 1014 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 1015 "to vcpuid %d", i); 1016 } 1017 } 1018 1019 return (0); /* handled completely in the kernel */ 1020 } 1021 1022 maxcpus = vm_get_maxcpus(vlapic->vm); 1023 if (mode == APIC_DELMODE_INIT) { 1024 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 1025 return (0); 1026 1027 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1028 vlapic2 = vm_lapic(vlapic->vm, dest); 1029 1030 /* move from INIT to waiting-for-SIPI state */ 1031 if (vlapic2->boot_state == BS_INIT) { 1032 vlapic2->boot_state = BS_SIPI; 1033 } 1034 1035 return (0); 1036 } 1037 } 1038 1039 if (mode == APIC_DELMODE_STARTUP) { 1040 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1041 vlapic2 = vm_lapic(vlapic->vm, dest); 1042 1043 /* 1044 * Ignore SIPIs in any state other than wait-for-SIPI 1045 */ 1046 if (vlapic2->boot_state != BS_SIPI) 1047 return (0); 1048 1049 vlapic2->boot_state = BS_RUNNING; 1050 1051 *retu = true; 1052 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1053 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1054 vmexit->u.spinup_ap.vcpu = dest; 1055 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1056 1057 return (0); 1058 } 1059 } 1060 1061 /* 1062 * This will cause a return to userland. 1063 */ 1064 return (1); 1065 } 1066 1067 void 1068 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1069 { 1070 int vec; 1071 1072 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1073 1074 vec = val & 0xff; 1075 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1076 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1077 vlapic->vcpuid, 1); 1078 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1079 } 1080 1081 int 1082 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1083 { 1084 struct LAPIC *lapic = vlapic->apic_page; 1085 int idx, i, bitpos, vector; 1086 uint32_t *irrptr, val; 1087 1088 vlapic_update_ppr(vlapic); 1089 1090 if (vlapic->ops.pending_intr) 1091 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1092 1093 irrptr = &lapic->irr0; 1094 1095 for (i = 7; i >= 0; i--) { 1096 idx = i * 4; 1097 val = atomic_load_acq_int(&irrptr[idx]); 1098 bitpos = fls(val); 1099 if (bitpos != 0) { 1100 vector = i * 32 + (bitpos - 1); 1101 if (PRIO(vector) > PRIO(lapic->ppr)) { 1102 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1103 if (vecptr != NULL) 1104 *vecptr = vector; 1105 return (1); 1106 } else 1107 break; 1108 } 1109 } 1110 return (0); 1111 } 1112 1113 void 1114 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1115 { 1116 struct LAPIC *lapic = vlapic->apic_page; 1117 uint32_t *irrptr, *isrptr; 1118 int idx, stk_top; 1119 1120 if (vlapic->ops.intr_accepted) 1121 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1122 1123 /* 1124 * clear the ready bit for vector being accepted in irr 1125 * and set the vector as in service in isr. 1126 */ 1127 idx = (vector / 32) * 4; 1128 1129 irrptr = &lapic->irr0; 1130 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1131 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1132 1133 isrptr = &lapic->isr0; 1134 isrptr[idx] |= 1 << (vector % 32); 1135 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1136 1137 /* 1138 * Update the PPR 1139 */ 1140 vlapic->isrvec_stk_top++; 1141 1142 stk_top = vlapic->isrvec_stk_top; 1143 if (stk_top >= ISRVEC_STK_SIZE) 1144 panic("isrvec_stk_top overflow %d", stk_top); 1145 1146 vlapic->isrvec_stk[stk_top] = vector; 1147 } 1148 1149 void 1150 vlapic_svr_write_handler(struct vlapic *vlapic) 1151 { 1152 struct LAPIC *lapic; 1153 uint32_t old, new, changed; 1154 1155 lapic = vlapic->apic_page; 1156 1157 new = lapic->svr; 1158 old = vlapic->svr_last; 1159 vlapic->svr_last = new; 1160 1161 changed = old ^ new; 1162 if ((changed & APIC_SVR_ENABLE) != 0) { 1163 if ((new & APIC_SVR_ENABLE) == 0) { 1164 /* 1165 * The apic is now disabled so stop the apic timer 1166 * and mask all the LVT entries. 1167 */ 1168 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1169 VLAPIC_TIMER_LOCK(vlapic); 1170 callout_stop(&vlapic->callout); 1171 VLAPIC_TIMER_UNLOCK(vlapic); 1172 vlapic_mask_lvts(vlapic); 1173 } else { 1174 /* 1175 * The apic is now enabled so restart the apic timer 1176 * if it is configured in periodic mode. 1177 */ 1178 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1179 if (vlapic_periodic_timer(vlapic)) 1180 vlapic_icrtmr_write_handler(vlapic); 1181 } 1182 } 1183 } 1184 1185 int 1186 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1187 uint64_t *data, bool *retu) 1188 { 1189 struct LAPIC *lapic = vlapic->apic_page; 1190 uint32_t *reg; 1191 int i; 1192 1193 /* Ignore MMIO accesses in x2APIC mode */ 1194 if (x2apic(vlapic) && mmio_access) { 1195 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1196 offset); 1197 *data = 0; 1198 goto done; 1199 } 1200 1201 if (!x2apic(vlapic) && !mmio_access) { 1202 /* 1203 * XXX Generate GP fault for MSR accesses in xAPIC mode 1204 */ 1205 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1206 "xAPIC mode", offset); 1207 *data = 0; 1208 goto done; 1209 } 1210 1211 if (offset > sizeof(*lapic)) { 1212 *data = 0; 1213 goto done; 1214 } 1215 1216 offset &= ~3; 1217 switch(offset) 1218 { 1219 case APIC_OFFSET_ID: 1220 *data = lapic->id; 1221 break; 1222 case APIC_OFFSET_VER: 1223 *data = lapic->version; 1224 break; 1225 case APIC_OFFSET_TPR: 1226 *data = vlapic_get_tpr(vlapic); 1227 break; 1228 case APIC_OFFSET_APR: 1229 *data = lapic->apr; 1230 break; 1231 case APIC_OFFSET_PPR: 1232 *data = lapic->ppr; 1233 break; 1234 case APIC_OFFSET_EOI: 1235 *data = lapic->eoi; 1236 break; 1237 case APIC_OFFSET_LDR: 1238 *data = lapic->ldr; 1239 break; 1240 case APIC_OFFSET_DFR: 1241 *data = lapic->dfr; 1242 break; 1243 case APIC_OFFSET_SVR: 1244 *data = lapic->svr; 1245 break; 1246 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1247 i = (offset - APIC_OFFSET_ISR0) >> 2; 1248 reg = &lapic->isr0; 1249 *data = *(reg + i); 1250 break; 1251 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1252 i = (offset - APIC_OFFSET_TMR0) >> 2; 1253 reg = &lapic->tmr0; 1254 *data = *(reg + i); 1255 break; 1256 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1257 i = (offset - APIC_OFFSET_IRR0) >> 2; 1258 reg = &lapic->irr0; 1259 *data = atomic_load_acq_int(reg + i); 1260 break; 1261 case APIC_OFFSET_ESR: 1262 *data = lapic->esr; 1263 break; 1264 case APIC_OFFSET_ICR_LOW: 1265 *data = lapic->icr_lo; 1266 if (x2apic(vlapic)) 1267 *data |= (uint64_t)lapic->icr_hi << 32; 1268 break; 1269 case APIC_OFFSET_ICR_HI: 1270 *data = lapic->icr_hi; 1271 break; 1272 case APIC_OFFSET_CMCI_LVT: 1273 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1274 *data = vlapic_get_lvt(vlapic, offset); 1275 #ifdef INVARIANTS 1276 reg = vlapic_get_lvtptr(vlapic, offset); 1277 KASSERT(*data == *reg, ("inconsistent lvt value at " 1278 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1279 #endif 1280 break; 1281 case APIC_OFFSET_TIMER_ICR: 1282 *data = lapic->icr_timer; 1283 break; 1284 case APIC_OFFSET_TIMER_CCR: 1285 *data = vlapic_get_ccr(vlapic); 1286 break; 1287 case APIC_OFFSET_TIMER_DCR: 1288 *data = lapic->dcr_timer; 1289 break; 1290 case APIC_OFFSET_SELF_IPI: 1291 /* 1292 * XXX generate a GP fault if vlapic is in x2apic mode 1293 */ 1294 *data = 0; 1295 break; 1296 case APIC_OFFSET_RRR: 1297 default: 1298 *data = 0; 1299 break; 1300 } 1301 done: 1302 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1303 return 0; 1304 } 1305 1306 int 1307 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1308 uint64_t data, bool *retu) 1309 { 1310 struct LAPIC *lapic = vlapic->apic_page; 1311 uint32_t *regptr; 1312 int retval; 1313 1314 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1315 ("vlapic_write: invalid offset %#lx", offset)); 1316 1317 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1318 offset, data); 1319 1320 if (offset > sizeof(*lapic)) 1321 return (0); 1322 1323 /* Ignore MMIO accesses in x2APIC mode */ 1324 if (x2apic(vlapic) && mmio_access) { 1325 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1326 "in x2APIC mode", data, offset); 1327 return (0); 1328 } 1329 1330 /* 1331 * XXX Generate GP fault for MSR accesses in xAPIC mode 1332 */ 1333 if (!x2apic(vlapic) && !mmio_access) { 1334 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1335 "in xAPIC mode", data, offset); 1336 return (0); 1337 } 1338 1339 retval = 0; 1340 switch(offset) 1341 { 1342 case APIC_OFFSET_ID: 1343 lapic->id = data; 1344 vlapic_id_write_handler(vlapic); 1345 break; 1346 case APIC_OFFSET_TPR: 1347 vlapic_set_tpr(vlapic, data & 0xff); 1348 break; 1349 case APIC_OFFSET_EOI: 1350 vlapic_process_eoi(vlapic); 1351 break; 1352 case APIC_OFFSET_LDR: 1353 lapic->ldr = data; 1354 vlapic_ldr_write_handler(vlapic); 1355 break; 1356 case APIC_OFFSET_DFR: 1357 lapic->dfr = data; 1358 vlapic_dfr_write_handler(vlapic); 1359 break; 1360 case APIC_OFFSET_SVR: 1361 lapic->svr = data; 1362 vlapic_svr_write_handler(vlapic); 1363 break; 1364 case APIC_OFFSET_ICR_LOW: 1365 lapic->icr_lo = data; 1366 if (x2apic(vlapic)) 1367 lapic->icr_hi = data >> 32; 1368 retval = vlapic_icrlo_write_handler(vlapic, retu); 1369 break; 1370 case APIC_OFFSET_ICR_HI: 1371 lapic->icr_hi = data; 1372 break; 1373 case APIC_OFFSET_CMCI_LVT: 1374 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1375 regptr = vlapic_get_lvtptr(vlapic, offset); 1376 *regptr = data; 1377 vlapic_lvt_write_handler(vlapic, offset); 1378 break; 1379 case APIC_OFFSET_TIMER_ICR: 1380 lapic->icr_timer = data; 1381 vlapic_icrtmr_write_handler(vlapic); 1382 break; 1383 1384 case APIC_OFFSET_TIMER_DCR: 1385 lapic->dcr_timer = data; 1386 vlapic_dcr_write_handler(vlapic); 1387 break; 1388 1389 case APIC_OFFSET_ESR: 1390 vlapic_esr_write_handler(vlapic); 1391 break; 1392 1393 case APIC_OFFSET_SELF_IPI: 1394 if (x2apic(vlapic)) 1395 vlapic_self_ipi_handler(vlapic, data); 1396 break; 1397 1398 case APIC_OFFSET_VER: 1399 case APIC_OFFSET_APR: 1400 case APIC_OFFSET_PPR: 1401 case APIC_OFFSET_RRR: 1402 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1403 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1404 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1405 case APIC_OFFSET_TIMER_CCR: 1406 default: 1407 // Read only. 1408 break; 1409 } 1410 1411 return (retval); 1412 } 1413 1414 static void 1415 vlapic_reset(struct vlapic *vlapic) 1416 { 1417 struct LAPIC *lapic; 1418 1419 lapic = vlapic->apic_page; 1420 bzero(lapic, sizeof(struct LAPIC)); 1421 1422 lapic->id = vlapic_get_id(vlapic); 1423 lapic->version = VLAPIC_VERSION; 1424 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1425 lapic->dfr = 0xffffffff; 1426 lapic->svr = APIC_SVR_VECTOR; 1427 vlapic_mask_lvts(vlapic); 1428 vlapic_reset_tmr(vlapic); 1429 1430 lapic->dcr_timer = 0; 1431 vlapic_dcr_write_handler(vlapic); 1432 1433 if (vlapic->vcpuid == 0) 1434 vlapic->boot_state = BS_RUNNING; /* BSP */ 1435 else 1436 vlapic->boot_state = BS_INIT; /* AP */ 1437 1438 vlapic->svr_last = lapic->svr; 1439 } 1440 1441 void 1442 vlapic_init(struct vlapic *vlapic) 1443 { 1444 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1445 KASSERT(vlapic->vcpuid >= 0 && 1446 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1447 ("vlapic_init: vcpuid is not initialized")); 1448 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1449 "initialized")); 1450 1451 /* 1452 * If the vlapic is configured in x2apic mode then it will be 1453 * accessed in the critical section via the MSR emulation code. 1454 * 1455 * Therefore the timer mutex must be a spinlock because blockable 1456 * mutexes cannot be acquired in a critical section. 1457 */ 1458 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1459 callout_init(&vlapic->callout, 1); 1460 1461 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1462 1463 if (vlapic->vcpuid == 0) 1464 vlapic->msr_apicbase |= APICBASE_BSP; 1465 1466 vlapic_reset(vlapic); 1467 } 1468 1469 void 1470 vlapic_cleanup(struct vlapic *vlapic) 1471 { 1472 1473 callout_drain(&vlapic->callout); 1474 } 1475 1476 uint64_t 1477 vlapic_get_apicbase(struct vlapic *vlapic) 1478 { 1479 1480 return (vlapic->msr_apicbase); 1481 } 1482 1483 int 1484 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1485 { 1486 1487 if (vlapic->msr_apicbase != new) { 1488 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1489 "not supported", vlapic->msr_apicbase, new); 1490 return (-1); 1491 } 1492 1493 return (0); 1494 } 1495 1496 void 1497 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1498 { 1499 struct vlapic *vlapic; 1500 struct LAPIC *lapic; 1501 1502 vlapic = vm_lapic(vm, vcpuid); 1503 1504 if (state == X2APIC_DISABLED) 1505 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1506 else 1507 vlapic->msr_apicbase |= APICBASE_X2APIC; 1508 1509 /* 1510 * Reset the local APIC registers whose values are mode-dependent. 1511 * 1512 * XXX this works because the APIC mode can be changed only at vcpu 1513 * initialization time. 1514 */ 1515 lapic = vlapic->apic_page; 1516 lapic->id = vlapic_get_id(vlapic); 1517 if (x2apic(vlapic)) { 1518 lapic->ldr = x2apic_ldr(vlapic); 1519 lapic->dfr = 0; 1520 } else { 1521 lapic->ldr = 0; 1522 lapic->dfr = 0xffffffff; 1523 } 1524 1525 if (state == X2APIC_ENABLED) { 1526 if (vlapic->ops.enable_x2apic_mode) 1527 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1528 } 1529 } 1530 1531 void 1532 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1533 int delmode, int vec) 1534 { 1535 bool lowprio; 1536 int vcpuid; 1537 cpuset_t dmask; 1538 1539 if (delmode != IOART_DELFIXED && 1540 delmode != IOART_DELLOPRI && 1541 delmode != IOART_DELEXINT) { 1542 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1543 return; 1544 } 1545 lowprio = (delmode == IOART_DELLOPRI); 1546 1547 /* 1548 * We don't provide any virtual interrupt redirection hardware so 1549 * all interrupts originating from the ioapic or MSI specify the 1550 * 'dest' in the legacy xAPIC format. 1551 */ 1552 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1553 1554 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1555 vcpuid--; 1556 CPU_CLR(vcpuid, &dmask); 1557 if (delmode == IOART_DELEXINT) { 1558 vm_inject_extint(vm, vcpuid); 1559 } else { 1560 lapic_set_intr(vm, vcpuid, vec, level); 1561 } 1562 } 1563 } 1564 1565 void 1566 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1567 { 1568 /* 1569 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1570 * 1571 * This is done by leveraging features like Posted Interrupts (Intel) 1572 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1573 * 1574 * If neither of these features are available then fallback to 1575 * sending an IPI to 'hostcpu'. 1576 */ 1577 if (vlapic->ops.post_intr) 1578 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1579 else 1580 ipi_cpu(hostcpu, ipinum); 1581 } 1582 1583 bool 1584 vlapic_enabled(struct vlapic *vlapic) 1585 { 1586 struct LAPIC *lapic = vlapic->apic_page; 1587 1588 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1589 (lapic->svr & APIC_SVR_ENABLE) != 0) 1590 return (true); 1591 else 1592 return (false); 1593 } 1594 1595 static void 1596 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1597 { 1598 struct LAPIC *lapic; 1599 uint32_t *tmrptr, mask; 1600 int idx; 1601 1602 lapic = vlapic->apic_page; 1603 tmrptr = &lapic->tmr0; 1604 idx = (vector / 32) * 4; 1605 mask = 1 << (vector % 32); 1606 if (level) 1607 tmrptr[idx] |= mask; 1608 else 1609 tmrptr[idx] &= ~mask; 1610 1611 if (vlapic->ops.set_tmr != NULL) 1612 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1613 } 1614 1615 void 1616 vlapic_reset_tmr(struct vlapic *vlapic) 1617 { 1618 int vector; 1619 1620 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1621 1622 for (vector = 0; vector <= 255; vector++) 1623 vlapic_set_tmr(vlapic, vector, false); 1624 } 1625 1626 void 1627 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1628 int delmode, int vector) 1629 { 1630 cpuset_t dmask; 1631 bool lowprio; 1632 1633 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1634 1635 /* 1636 * A level trigger is valid only for fixed and lowprio delivery modes. 1637 */ 1638 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1639 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1640 "delivery-mode %d", delmode); 1641 return; 1642 } 1643 1644 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1645 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1646 1647 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1648 return; 1649 1650 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1651 vlapic_set_tmr(vlapic, vector, true); 1652 } 1653