1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/lock.h> 34 #include <sys/kernel.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/systm.h> 38 #include <sys/smp.h> 39 40 #include <x86/specialreg.h> 41 #include <x86/apicreg.h> 42 43 #include <machine/clock.h> 44 #include <machine/smp.h> 45 46 #include <machine/vmm.h> 47 48 #include "vmm_ipi.h" 49 #include "vmm_lapic.h" 50 #include "vmm_ktr.h" 51 #include "vmm_stat.h" 52 53 #include "vlapic.h" 54 #include "vlapic_priv.h" 55 #include "vioapic.h" 56 57 #define PRIO(x) ((x) >> 4) 58 59 #define VLAPIC_VERSION (16) 60 61 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 62 63 /* 64 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 65 * vlapic_callout_handler() and vcpu accesses to: 66 * - timer_freq_bt, timer_period_bt, timer_fire_bt 67 * - timer LVT register 68 */ 69 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 70 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 71 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 72 73 /* 74 * APIC timer frequency: 75 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 76 * - power-of-two to avoid loss of precision when converted to a bintime. 77 */ 78 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 79 80 static __inline uint32_t 81 vlapic_get_id(struct vlapic *vlapic) 82 { 83 84 if (x2apic(vlapic)) 85 return (vlapic->vcpuid); 86 else 87 return (vlapic->vcpuid << 24); 88 } 89 90 static uint32_t 91 x2apic_ldr(struct vlapic *vlapic) 92 { 93 int apicid; 94 uint32_t ldr; 95 96 apicid = vlapic_get_id(vlapic); 97 ldr = 1 << (apicid & 0xf); 98 ldr |= (apicid & 0xffff0) << 12; 99 return (ldr); 100 } 101 102 void 103 vlapic_dfr_write_handler(struct vlapic *vlapic) 104 { 105 struct LAPIC *lapic; 106 107 lapic = vlapic->apic_page; 108 if (x2apic(vlapic)) { 109 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 110 lapic->dfr); 111 lapic->dfr = 0; 112 return; 113 } 114 115 lapic->dfr &= APIC_DFR_MODEL_MASK; 116 lapic->dfr |= APIC_DFR_RESERVED; 117 118 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 119 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 120 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 121 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 122 else 123 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 124 } 125 126 void 127 vlapic_ldr_write_handler(struct vlapic *vlapic) 128 { 129 struct LAPIC *lapic; 130 131 lapic = vlapic->apic_page; 132 133 /* LDR is read-only in x2apic mode */ 134 if (x2apic(vlapic)) { 135 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 136 lapic->ldr); 137 lapic->ldr = x2apic_ldr(vlapic); 138 } else { 139 lapic->ldr &= ~APIC_LDR_RESERVED; 140 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 141 } 142 } 143 144 void 145 vlapic_id_write_handler(struct vlapic *vlapic) 146 { 147 struct LAPIC *lapic; 148 149 /* 150 * We don't allow the ID register to be modified so reset it back to 151 * its default value. 152 */ 153 lapic = vlapic->apic_page; 154 lapic->id = vlapic_get_id(vlapic); 155 } 156 157 static int 158 vlapic_timer_divisor(uint32_t dcr) 159 { 160 switch (dcr & 0xB) { 161 case APIC_TDCR_1: 162 return (1); 163 case APIC_TDCR_2: 164 return (2); 165 case APIC_TDCR_4: 166 return (4); 167 case APIC_TDCR_8: 168 return (8); 169 case APIC_TDCR_16: 170 return (16); 171 case APIC_TDCR_32: 172 return (32); 173 case APIC_TDCR_64: 174 return (64); 175 case APIC_TDCR_128: 176 return (128); 177 default: 178 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 179 } 180 } 181 182 #if 0 183 static inline void 184 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 185 { 186 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 187 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 188 *lvt & APIC_LVTT_M); 189 } 190 #endif 191 192 static uint32_t 193 vlapic_get_ccr(struct vlapic *vlapic) 194 { 195 struct bintime bt_now, bt_rem; 196 struct LAPIC *lapic; 197 uint32_t ccr; 198 199 ccr = 0; 200 lapic = vlapic->apic_page; 201 202 VLAPIC_TIMER_LOCK(vlapic); 203 if (callout_active(&vlapic->callout)) { 204 /* 205 * If the timer is scheduled to expire in the future then 206 * compute the value of 'ccr' based on the remaining time. 207 */ 208 binuptime(&bt_now); 209 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 210 bt_rem = vlapic->timer_fire_bt; 211 bintime_sub(&bt_rem, &bt_now); 212 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 213 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 214 } 215 } 216 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 217 "icr_timer is %#x", ccr, lapic->icr_timer)); 218 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 219 ccr, lapic->icr_timer); 220 VLAPIC_TIMER_UNLOCK(vlapic); 221 return (ccr); 222 } 223 224 void 225 vlapic_dcr_write_handler(struct vlapic *vlapic) 226 { 227 struct LAPIC *lapic; 228 int divisor; 229 230 lapic = vlapic->apic_page; 231 VLAPIC_TIMER_LOCK(vlapic); 232 233 divisor = vlapic_timer_divisor(lapic->dcr_timer); 234 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 235 lapic->dcr_timer, divisor); 236 237 /* 238 * Update the timer frequency and the timer period. 239 * 240 * XXX changes to the frequency divider will not take effect until 241 * the timer is reloaded. 242 */ 243 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 244 vlapic->timer_period_bt = vlapic->timer_freq_bt; 245 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 246 247 VLAPIC_TIMER_UNLOCK(vlapic); 248 } 249 250 void 251 vlapic_esr_write_handler(struct vlapic *vlapic) 252 { 253 struct LAPIC *lapic; 254 255 lapic = vlapic->apic_page; 256 lapic->esr = vlapic->esr_pending; 257 vlapic->esr_pending = 0; 258 } 259 260 int 261 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 262 { 263 struct LAPIC *lapic; 264 uint32_t *irrptr, *tmrptr, mask; 265 int idx; 266 267 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 268 269 lapic = vlapic->apic_page; 270 if (!(lapic->svr & APIC_SVR_ENABLE)) { 271 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 272 "interrupt %d", vector); 273 return (0); 274 } 275 276 if (vector < 16) { 277 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); 278 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 279 vector); 280 return (1); 281 } 282 283 if (vlapic->ops.set_intr_ready) 284 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 285 286 idx = (vector / 32) * 4; 287 mask = 1 << (vector % 32); 288 289 irrptr = &lapic->irr0; 290 atomic_set_int(&irrptr[idx], mask); 291 292 /* 293 * Verify that the trigger-mode of the interrupt matches with 294 * the vlapic TMR registers. 295 */ 296 tmrptr = &lapic->tmr0; 297 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 298 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 299 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 300 level ? "level" : "edge"); 301 } 302 303 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 304 return (1); 305 } 306 307 static __inline uint32_t * 308 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 309 { 310 struct LAPIC *lapic = vlapic->apic_page; 311 int i; 312 313 switch (offset) { 314 case APIC_OFFSET_CMCI_LVT: 315 return (&lapic->lvt_cmci); 316 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 317 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 318 return ((&lapic->lvt_timer) + i);; 319 default: 320 panic("vlapic_get_lvt: invalid LVT\n"); 321 } 322 } 323 324 static __inline int 325 lvt_off_to_idx(uint32_t offset) 326 { 327 int index; 328 329 switch (offset) { 330 case APIC_OFFSET_CMCI_LVT: 331 index = APIC_LVT_CMCI; 332 break; 333 case APIC_OFFSET_TIMER_LVT: 334 index = APIC_LVT_TIMER; 335 break; 336 case APIC_OFFSET_THERM_LVT: 337 index = APIC_LVT_THERMAL; 338 break; 339 case APIC_OFFSET_PERF_LVT: 340 index = APIC_LVT_PMC; 341 break; 342 case APIC_OFFSET_LINT0_LVT: 343 index = APIC_LVT_LINT0; 344 break; 345 case APIC_OFFSET_LINT1_LVT: 346 index = APIC_LVT_LINT1; 347 break; 348 case APIC_OFFSET_ERROR_LVT: 349 index = APIC_LVT_ERROR; 350 break; 351 default: 352 index = -1; 353 break; 354 } 355 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 356 "invalid lvt index %d for offset %#x", index, offset)); 357 358 return (index); 359 } 360 361 static __inline uint32_t 362 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 363 { 364 int idx; 365 uint32_t val; 366 367 idx = lvt_off_to_idx(offset); 368 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 369 return (val); 370 } 371 372 void 373 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 374 { 375 uint32_t *lvtptr, mask, val; 376 struct LAPIC *lapic; 377 int idx; 378 379 lapic = vlapic->apic_page; 380 lvtptr = vlapic_get_lvtptr(vlapic, offset); 381 val = *lvtptr; 382 idx = lvt_off_to_idx(offset); 383 384 if (!(lapic->svr & APIC_SVR_ENABLE)) 385 val |= APIC_LVT_M; 386 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 387 switch (offset) { 388 case APIC_OFFSET_TIMER_LVT: 389 mask |= APIC_LVTT_TM; 390 break; 391 case APIC_OFFSET_ERROR_LVT: 392 break; 393 case APIC_OFFSET_LINT0_LVT: 394 case APIC_OFFSET_LINT1_LVT: 395 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 396 /* FALLTHROUGH */ 397 default: 398 mask |= APIC_LVT_DM; 399 break; 400 } 401 val &= mask; 402 *lvtptr = val; 403 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 404 } 405 406 static void 407 vlapic_mask_lvts(struct vlapic *vlapic) 408 { 409 struct LAPIC *lapic = vlapic->apic_page; 410 411 lapic->lvt_cmci |= APIC_LVT_M; 412 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 413 414 lapic->lvt_timer |= APIC_LVT_M; 415 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 416 417 lapic->lvt_thermal |= APIC_LVT_M; 418 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 419 420 lapic->lvt_pcint |= APIC_LVT_M; 421 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 422 423 lapic->lvt_lint0 |= APIC_LVT_M; 424 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 425 426 lapic->lvt_lint1 |= APIC_LVT_M; 427 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 428 429 lapic->lvt_error |= APIC_LVT_M; 430 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 431 } 432 433 static int 434 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) 435 { 436 uint32_t vec, mode; 437 438 if (lvt & APIC_LVT_M) 439 return (0); 440 441 vec = lvt & APIC_LVT_VECTOR; 442 mode = lvt & APIC_LVT_DM; 443 444 switch (mode) { 445 case APIC_LVT_DM_FIXED: 446 if (vec < 16) { 447 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 448 return (0); 449 } 450 if (vlapic_set_intr_ready(vlapic, vec, false)) 451 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 452 break; 453 case APIC_LVT_DM_NMI: 454 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 455 break; 456 case APIC_LVT_DM_EXTINT: 457 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 458 break; 459 default: 460 // Other modes ignored 461 return (0); 462 } 463 return (1); 464 } 465 466 #if 1 467 static void 468 dump_isrvec_stk(struct vlapic *vlapic) 469 { 470 int i; 471 uint32_t *isrptr; 472 473 isrptr = &vlapic->apic_page->isr0; 474 for (i = 0; i < 8; i++) 475 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 476 477 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 478 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 479 } 480 #endif 481 482 /* 483 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 484 * in Intel Architecture Manual Vol 3a. 485 */ 486 static void 487 vlapic_update_ppr(struct vlapic *vlapic) 488 { 489 int isrvec, tpr, ppr; 490 491 /* 492 * Note that the value on the stack at index 0 is always 0. 493 * 494 * This is a placeholder for the value of ISRV when none of the 495 * bits is set in the ISRx registers. 496 */ 497 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 498 tpr = vlapic->apic_page->tpr; 499 500 #if 1 501 { 502 int i, lastprio, curprio, vector, idx; 503 uint32_t *isrptr; 504 505 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 506 panic("isrvec_stk is corrupted: %d", isrvec); 507 508 /* 509 * Make sure that the priority of the nested interrupts is 510 * always increasing. 511 */ 512 lastprio = -1; 513 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 514 curprio = PRIO(vlapic->isrvec_stk[i]); 515 if (curprio <= lastprio) { 516 dump_isrvec_stk(vlapic); 517 panic("isrvec_stk does not satisfy invariant"); 518 } 519 lastprio = curprio; 520 } 521 522 /* 523 * Make sure that each bit set in the ISRx registers has a 524 * corresponding entry on the isrvec stack. 525 */ 526 i = 1; 527 isrptr = &vlapic->apic_page->isr0; 528 for (vector = 0; vector < 256; vector++) { 529 idx = (vector / 32) * 4; 530 if (isrptr[idx] & (1 << (vector % 32))) { 531 if (i > vlapic->isrvec_stk_top || 532 vlapic->isrvec_stk[i] != vector) { 533 dump_isrvec_stk(vlapic); 534 panic("ISR and isrvec_stk out of sync"); 535 } 536 i++; 537 } 538 } 539 } 540 #endif 541 542 if (PRIO(tpr) >= PRIO(isrvec)) 543 ppr = tpr; 544 else 545 ppr = isrvec & 0xf0; 546 547 vlapic->apic_page->ppr = ppr; 548 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 549 } 550 551 static void 552 vlapic_process_eoi(struct vlapic *vlapic) 553 { 554 struct LAPIC *lapic = vlapic->apic_page; 555 uint32_t *isrptr, *tmrptr; 556 int i, idx, bitpos, vector; 557 558 isrptr = &lapic->isr0; 559 tmrptr = &lapic->tmr0; 560 561 /* 562 * The x86 architecture reserves the the first 32 vectors for use 563 * by the processor. 564 */ 565 for (i = 7; i > 0; i--) { 566 idx = i * 4; 567 bitpos = fls(isrptr[idx]); 568 if (bitpos-- != 0) { 569 if (vlapic->isrvec_stk_top <= 0) { 570 panic("invalid vlapic isrvec_stk_top %d", 571 vlapic->isrvec_stk_top); 572 } 573 isrptr[idx] &= ~(1 << bitpos); 574 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 575 vlapic->isrvec_stk_top--; 576 vlapic_update_ppr(vlapic); 577 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 578 vector = i * 32 + bitpos; 579 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 580 vector); 581 } 582 return; 583 } 584 } 585 } 586 587 static __inline int 588 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 589 { 590 591 return (lvt & mask); 592 } 593 594 static __inline int 595 vlapic_periodic_timer(struct vlapic *vlapic) 596 { 597 uint32_t lvt; 598 599 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 600 601 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 602 } 603 604 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 605 606 void 607 vlapic_set_error(struct vlapic *vlapic, uint32_t mask) 608 { 609 uint32_t lvt; 610 611 vlapic->esr_pending |= mask; 612 if (vlapic->esr_firing) 613 return; 614 vlapic->esr_firing = 1; 615 616 // The error LVT always uses the fixed delivery mode. 617 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 618 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 619 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 620 } 621 vlapic->esr_firing = 0; 622 } 623 624 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 625 626 static void 627 vlapic_fire_timer(struct vlapic *vlapic) 628 { 629 uint32_t lvt; 630 631 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 632 633 // The timer LVT always uses the fixed delivery mode. 634 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 635 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 636 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 637 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 638 } 639 } 640 641 static VMM_STAT(VLAPIC_INTR_CMC, 642 "corrected machine check interrupts generated by vlapic"); 643 644 void 645 vlapic_fire_cmci(struct vlapic *vlapic) 646 { 647 uint32_t lvt; 648 649 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 650 if (vlapic_fire_lvt(vlapic, lvt)) { 651 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 652 } 653 } 654 655 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 656 "lvts triggered"); 657 658 int 659 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 660 { 661 uint32_t lvt; 662 663 if (vlapic_enabled(vlapic) == false) { 664 /* 665 * When the local APIC is global/hardware disabled, 666 * LINT[1:0] pins are configured as INTR and NMI pins, 667 * respectively. 668 */ 669 switch (vector) { 670 case APIC_LVT_LINT0: 671 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 672 break; 673 case APIC_LVT_LINT1: 674 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 675 break; 676 default: 677 break; 678 } 679 return (0); 680 } 681 682 switch (vector) { 683 case APIC_LVT_LINT0: 684 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); 685 break; 686 case APIC_LVT_LINT1: 687 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); 688 break; 689 case APIC_LVT_TIMER: 690 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 691 lvt |= APIC_LVT_DM_FIXED; 692 break; 693 case APIC_LVT_ERROR: 694 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 695 lvt |= APIC_LVT_DM_FIXED; 696 break; 697 case APIC_LVT_PMC: 698 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); 699 break; 700 case APIC_LVT_THERMAL: 701 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); 702 break; 703 case APIC_LVT_CMCI: 704 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 705 break; 706 default: 707 return (EINVAL); 708 } 709 if (vlapic_fire_lvt(vlapic, lvt)) { 710 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 711 LVTS_TRIGGERRED, vector, 1); 712 } 713 return (0); 714 } 715 716 static void 717 vlapic_callout_handler(void *arg) 718 { 719 struct vlapic *vlapic; 720 struct bintime bt, btnow; 721 sbintime_t rem_sbt; 722 723 vlapic = arg; 724 725 VLAPIC_TIMER_LOCK(vlapic); 726 if (callout_pending(&vlapic->callout)) /* callout was reset */ 727 goto done; 728 729 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 730 goto done; 731 732 callout_deactivate(&vlapic->callout); 733 734 vlapic_fire_timer(vlapic); 735 736 if (vlapic_periodic_timer(vlapic)) { 737 binuptime(&btnow); 738 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 739 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 740 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 741 vlapic->timer_fire_bt.frac)); 742 743 /* 744 * Compute the delta between when the timer was supposed to 745 * fire and the present time. 746 */ 747 bt = btnow; 748 bintime_sub(&bt, &vlapic->timer_fire_bt); 749 750 rem_sbt = bttosbt(vlapic->timer_period_bt); 751 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 752 /* 753 * Adjust the time until the next countdown downward 754 * to account for the lost time. 755 */ 756 rem_sbt -= bttosbt(bt); 757 } else { 758 /* 759 * If the delta is greater than the timer period then 760 * just reset our time base instead of trying to catch 761 * up. 762 */ 763 vlapic->timer_fire_bt = btnow; 764 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 765 "usecs, period is %lu usecs - resetting time base", 766 bttosbt(bt) / SBT_1US, 767 bttosbt(vlapic->timer_period_bt) / SBT_1US); 768 } 769 770 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 771 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 772 vlapic_callout_handler, vlapic, 0); 773 } 774 done: 775 VLAPIC_TIMER_UNLOCK(vlapic); 776 } 777 778 void 779 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 780 { 781 struct LAPIC *lapic; 782 sbintime_t sbt; 783 uint32_t icr_timer; 784 785 VLAPIC_TIMER_LOCK(vlapic); 786 787 lapic = vlapic->apic_page; 788 icr_timer = lapic->icr_timer; 789 790 vlapic->timer_period_bt = vlapic->timer_freq_bt; 791 bintime_mul(&vlapic->timer_period_bt, icr_timer); 792 793 if (icr_timer != 0) { 794 binuptime(&vlapic->timer_fire_bt); 795 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 796 797 sbt = bttosbt(vlapic->timer_period_bt); 798 callout_reset_sbt(&vlapic->callout, sbt, 0, 799 vlapic_callout_handler, vlapic, 0); 800 } else 801 callout_stop(&vlapic->callout); 802 803 VLAPIC_TIMER_UNLOCK(vlapic); 804 } 805 806 /* 807 * This function populates 'dmask' with the set of vcpus that match the 808 * addressing specified by the (dest, phys, lowprio) tuple. 809 * 810 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 811 * or xAPIC (8-bit) destination field. 812 */ 813 static void 814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 815 bool lowprio, bool x2apic_dest) 816 { 817 struct vlapic *vlapic; 818 uint32_t dfr, ldr, ldest, cluster; 819 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 820 cpuset_t amask; 821 int vcpuid; 822 823 if ((x2apic_dest && dest == 0xffffffff) || 824 (!x2apic_dest && dest == 0xff)) { 825 /* 826 * Broadcast in both logical and physical modes. 827 */ 828 *dmask = vm_active_cpus(vm); 829 return; 830 } 831 832 if (phys) { 833 /* 834 * Physical mode: destination is APIC ID. 835 */ 836 CPU_ZERO(dmask); 837 vcpuid = vm_apicid2vcpuid(vm, dest); 838 if (vcpuid < VM_MAXCPU) 839 CPU_SET(vcpuid, dmask); 840 } else { 841 /* 842 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 843 * bitmask. This model is only avilable in the xAPIC mode. 844 */ 845 mda_flat_ldest = dest & 0xff; 846 847 /* 848 * In the "Cluster Model" the MDA is used to identify a 849 * specific cluster and a set of APICs in that cluster. 850 */ 851 if (x2apic_dest) { 852 mda_cluster_id = dest >> 16; 853 mda_cluster_ldest = dest & 0xffff; 854 } else { 855 mda_cluster_id = (dest >> 4) & 0xf; 856 mda_cluster_ldest = dest & 0xf; 857 } 858 859 /* 860 * Logical mode: match each APIC that has a bit set 861 * in it's LDR that matches a bit in the ldest. 862 */ 863 CPU_ZERO(dmask); 864 amask = vm_active_cpus(vm); 865 while ((vcpuid = CPU_FFS(&amask)) != 0) { 866 vcpuid--; 867 CPU_CLR(vcpuid, &amask); 868 869 vlapic = vm_lapic(vm, vcpuid); 870 dfr = vlapic->apic_page->dfr; 871 ldr = vlapic->apic_page->ldr; 872 873 if ((dfr & APIC_DFR_MODEL_MASK) == 874 APIC_DFR_MODEL_FLAT) { 875 ldest = ldr >> 24; 876 mda_ldest = mda_flat_ldest; 877 } else if ((dfr & APIC_DFR_MODEL_MASK) == 878 APIC_DFR_MODEL_CLUSTER) { 879 if (x2apic(vlapic)) { 880 cluster = ldr >> 16; 881 ldest = ldr & 0xffff; 882 } else { 883 cluster = ldr >> 28; 884 ldest = (ldr >> 24) & 0xf; 885 } 886 if (cluster != mda_cluster_id) 887 continue; 888 mda_ldest = mda_cluster_ldest; 889 } else { 890 /* 891 * Guest has configured a bad logical 892 * model for this vcpu - skip it. 893 */ 894 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 895 "model %x - cannot deliver interrupt", dfr); 896 continue; 897 } 898 899 if ((mda_ldest & ldest) != 0) { 900 CPU_SET(vcpuid, dmask); 901 if (lowprio) 902 break; 903 } 904 } 905 } 906 } 907 908 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 909 910 static void 911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 912 { 913 struct LAPIC *lapic = vlapic->apic_page; 914 915 if (lapic->tpr != val) { 916 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 917 "from %#x to %#x", lapic->tpr, val); 918 lapic->tpr = val; 919 vlapic_update_ppr(vlapic); 920 } 921 } 922 923 static uint8_t 924 vlapic_get_tpr(struct vlapic *vlapic) 925 { 926 struct LAPIC *lapic = vlapic->apic_page; 927 928 return (lapic->tpr); 929 } 930 931 void 932 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 933 { 934 uint8_t tpr; 935 936 if (val & ~0xf) { 937 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 938 return; 939 } 940 941 tpr = val << 4; 942 vlapic_set_tpr(vlapic, tpr); 943 } 944 945 uint64_t 946 vlapic_get_cr8(struct vlapic *vlapic) 947 { 948 uint8_t tpr; 949 950 tpr = vlapic_get_tpr(vlapic); 951 return (tpr >> 4); 952 } 953 954 int 955 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 956 { 957 int i; 958 bool phys; 959 cpuset_t dmask; 960 uint64_t icrval; 961 uint32_t dest, vec, mode; 962 struct vlapic *vlapic2; 963 struct vm_exit *vmexit; 964 struct LAPIC *lapic; 965 966 lapic = vlapic->apic_page; 967 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 968 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 969 970 if (x2apic(vlapic)) 971 dest = icrval >> 32; 972 else 973 dest = icrval >> (32 + 24); 974 vec = icrval & APIC_VECTOR_MASK; 975 mode = icrval & APIC_DELMODE_MASK; 976 977 if (mode == APIC_DELMODE_FIXED && vec < 16) { 978 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 979 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 980 return (0); 981 } 982 983 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 984 985 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 986 switch (icrval & APIC_DEST_MASK) { 987 case APIC_DEST_DESTFLD: 988 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 989 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 990 x2apic(vlapic)); 991 break; 992 case APIC_DEST_SELF: 993 CPU_SETOF(vlapic->vcpuid, &dmask); 994 break; 995 case APIC_DEST_ALLISELF: 996 dmask = vm_active_cpus(vlapic->vm); 997 break; 998 case APIC_DEST_ALLESELF: 999 dmask = vm_active_cpus(vlapic->vm); 1000 CPU_CLR(vlapic->vcpuid, &dmask); 1001 break; 1002 default: 1003 CPU_ZERO(&dmask); /* satisfy gcc */ 1004 break; 1005 } 1006 1007 while ((i = CPU_FFS(&dmask)) != 0) { 1008 i--; 1009 CPU_CLR(i, &dmask); 1010 if (mode == APIC_DELMODE_FIXED) { 1011 lapic_intr_edge(vlapic->vm, i, vec); 1012 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1013 IPIS_SENT, i, 1); 1014 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 1015 "to vcpuid %d", vec, i); 1016 } else { 1017 vm_inject_nmi(vlapic->vm, i); 1018 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 1019 "to vcpuid %d", i); 1020 } 1021 } 1022 1023 return (0); /* handled completely in the kernel */ 1024 } 1025 1026 if (mode == APIC_DELMODE_INIT) { 1027 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 1028 return (0); 1029 1030 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 1031 vlapic2 = vm_lapic(vlapic->vm, dest); 1032 1033 /* move from INIT to waiting-for-SIPI state */ 1034 if (vlapic2->boot_state == BS_INIT) { 1035 vlapic2->boot_state = BS_SIPI; 1036 } 1037 1038 return (0); 1039 } 1040 } 1041 1042 if (mode == APIC_DELMODE_STARTUP) { 1043 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 1044 vlapic2 = vm_lapic(vlapic->vm, dest); 1045 1046 /* 1047 * Ignore SIPIs in any state other than wait-for-SIPI 1048 */ 1049 if (vlapic2->boot_state != BS_SIPI) 1050 return (0); 1051 1052 vlapic2->boot_state = BS_RUNNING; 1053 1054 *retu = true; 1055 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1056 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1057 vmexit->u.spinup_ap.vcpu = dest; 1058 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1059 1060 return (0); 1061 } 1062 } 1063 1064 /* 1065 * This will cause a return to userland. 1066 */ 1067 return (1); 1068 } 1069 1070 void 1071 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1072 { 1073 int vec; 1074 1075 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1076 1077 vec = val & 0xff; 1078 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1079 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1080 vlapic->vcpuid, 1); 1081 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1082 } 1083 1084 int 1085 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1086 { 1087 struct LAPIC *lapic = vlapic->apic_page; 1088 int idx, i, bitpos, vector; 1089 uint32_t *irrptr, val; 1090 1091 if (vlapic->ops.pending_intr) 1092 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1093 1094 irrptr = &lapic->irr0; 1095 1096 /* 1097 * The x86 architecture reserves the the first 32 vectors for use 1098 * by the processor. 1099 */ 1100 for (i = 7; i > 0; i--) { 1101 idx = i * 4; 1102 val = atomic_load_acq_int(&irrptr[idx]); 1103 bitpos = fls(val); 1104 if (bitpos != 0) { 1105 vector = i * 32 + (bitpos - 1); 1106 if (PRIO(vector) > PRIO(lapic->ppr)) { 1107 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1108 if (vecptr != NULL) 1109 *vecptr = vector; 1110 return (1); 1111 } else 1112 break; 1113 } 1114 } 1115 return (0); 1116 } 1117 1118 void 1119 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1120 { 1121 struct LAPIC *lapic = vlapic->apic_page; 1122 uint32_t *irrptr, *isrptr; 1123 int idx, stk_top; 1124 1125 if (vlapic->ops.intr_accepted) 1126 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1127 1128 /* 1129 * clear the ready bit for vector being accepted in irr 1130 * and set the vector as in service in isr. 1131 */ 1132 idx = (vector / 32) * 4; 1133 1134 irrptr = &lapic->irr0; 1135 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1136 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1137 1138 isrptr = &lapic->isr0; 1139 isrptr[idx] |= 1 << (vector % 32); 1140 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1141 1142 /* 1143 * Update the PPR 1144 */ 1145 vlapic->isrvec_stk_top++; 1146 1147 stk_top = vlapic->isrvec_stk_top; 1148 if (stk_top >= ISRVEC_STK_SIZE) 1149 panic("isrvec_stk_top overflow %d", stk_top); 1150 1151 vlapic->isrvec_stk[stk_top] = vector; 1152 vlapic_update_ppr(vlapic); 1153 } 1154 1155 void 1156 vlapic_svr_write_handler(struct vlapic *vlapic) 1157 { 1158 struct LAPIC *lapic; 1159 uint32_t old, new, changed; 1160 1161 lapic = vlapic->apic_page; 1162 1163 new = lapic->svr; 1164 old = vlapic->svr_last; 1165 vlapic->svr_last = new; 1166 1167 changed = old ^ new; 1168 if ((changed & APIC_SVR_ENABLE) != 0) { 1169 if ((new & APIC_SVR_ENABLE) == 0) { 1170 /* 1171 * The apic is now disabled so stop the apic timer 1172 * and mask all the LVT entries. 1173 */ 1174 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1175 VLAPIC_TIMER_LOCK(vlapic); 1176 callout_stop(&vlapic->callout); 1177 VLAPIC_TIMER_UNLOCK(vlapic); 1178 vlapic_mask_lvts(vlapic); 1179 } else { 1180 /* 1181 * The apic is now enabled so restart the apic timer 1182 * if it is configured in periodic mode. 1183 */ 1184 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1185 if (vlapic_periodic_timer(vlapic)) 1186 vlapic_icrtmr_write_handler(vlapic); 1187 } 1188 } 1189 } 1190 1191 int 1192 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1193 uint64_t *data, bool *retu) 1194 { 1195 struct LAPIC *lapic = vlapic->apic_page; 1196 uint32_t *reg; 1197 int i; 1198 1199 /* Ignore MMIO accesses in x2APIC mode */ 1200 if (x2apic(vlapic) && mmio_access) { 1201 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1202 offset); 1203 *data = 0; 1204 goto done; 1205 } 1206 1207 if (!x2apic(vlapic) && !mmio_access) { 1208 /* 1209 * XXX Generate GP fault for MSR accesses in xAPIC mode 1210 */ 1211 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1212 "xAPIC mode", offset); 1213 *data = 0; 1214 goto done; 1215 } 1216 1217 if (offset > sizeof(*lapic)) { 1218 *data = 0; 1219 goto done; 1220 } 1221 1222 offset &= ~3; 1223 switch(offset) 1224 { 1225 case APIC_OFFSET_ID: 1226 *data = lapic->id; 1227 break; 1228 case APIC_OFFSET_VER: 1229 *data = lapic->version; 1230 break; 1231 case APIC_OFFSET_TPR: 1232 *data = vlapic_get_tpr(vlapic); 1233 break; 1234 case APIC_OFFSET_APR: 1235 *data = lapic->apr; 1236 break; 1237 case APIC_OFFSET_PPR: 1238 *data = lapic->ppr; 1239 break; 1240 case APIC_OFFSET_EOI: 1241 *data = lapic->eoi; 1242 break; 1243 case APIC_OFFSET_LDR: 1244 *data = lapic->ldr; 1245 break; 1246 case APIC_OFFSET_DFR: 1247 *data = lapic->dfr; 1248 break; 1249 case APIC_OFFSET_SVR: 1250 *data = lapic->svr; 1251 break; 1252 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1253 i = (offset - APIC_OFFSET_ISR0) >> 2; 1254 reg = &lapic->isr0; 1255 *data = *(reg + i); 1256 break; 1257 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1258 i = (offset - APIC_OFFSET_TMR0) >> 2; 1259 reg = &lapic->tmr0; 1260 *data = *(reg + i); 1261 break; 1262 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1263 i = (offset - APIC_OFFSET_IRR0) >> 2; 1264 reg = &lapic->irr0; 1265 *data = atomic_load_acq_int(reg + i); 1266 break; 1267 case APIC_OFFSET_ESR: 1268 *data = lapic->esr; 1269 break; 1270 case APIC_OFFSET_ICR_LOW: 1271 *data = lapic->icr_lo; 1272 if (x2apic(vlapic)) 1273 *data |= (uint64_t)lapic->icr_hi << 32; 1274 break; 1275 case APIC_OFFSET_ICR_HI: 1276 *data = lapic->icr_hi; 1277 break; 1278 case APIC_OFFSET_CMCI_LVT: 1279 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1280 *data = vlapic_get_lvt(vlapic, offset); 1281 #ifdef INVARIANTS 1282 reg = vlapic_get_lvtptr(vlapic, offset); 1283 KASSERT(*data == *reg, ("inconsistent lvt value at " 1284 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1285 #endif 1286 break; 1287 case APIC_OFFSET_TIMER_ICR: 1288 *data = lapic->icr_timer; 1289 break; 1290 case APIC_OFFSET_TIMER_CCR: 1291 *data = vlapic_get_ccr(vlapic); 1292 break; 1293 case APIC_OFFSET_TIMER_DCR: 1294 *data = lapic->dcr_timer; 1295 break; 1296 case APIC_OFFSET_SELF_IPI: 1297 /* 1298 * XXX generate a GP fault if vlapic is in x2apic mode 1299 */ 1300 *data = 0; 1301 break; 1302 case APIC_OFFSET_RRR: 1303 default: 1304 *data = 0; 1305 break; 1306 } 1307 done: 1308 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1309 return 0; 1310 } 1311 1312 int 1313 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1314 uint64_t data, bool *retu) 1315 { 1316 struct LAPIC *lapic = vlapic->apic_page; 1317 uint32_t *regptr; 1318 int retval; 1319 1320 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1321 ("vlapic_write: invalid offset %#lx", offset)); 1322 1323 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1324 offset, data); 1325 1326 if (offset > sizeof(*lapic)) 1327 return (0); 1328 1329 /* Ignore MMIO accesses in x2APIC mode */ 1330 if (x2apic(vlapic) && mmio_access) { 1331 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1332 "in x2APIC mode", data, offset); 1333 return (0); 1334 } 1335 1336 /* 1337 * XXX Generate GP fault for MSR accesses in xAPIC mode 1338 */ 1339 if (!x2apic(vlapic) && !mmio_access) { 1340 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1341 "in xAPIC mode", data, offset); 1342 return (0); 1343 } 1344 1345 retval = 0; 1346 switch(offset) 1347 { 1348 case APIC_OFFSET_ID: 1349 lapic->id = data; 1350 vlapic_id_write_handler(vlapic); 1351 break; 1352 case APIC_OFFSET_TPR: 1353 vlapic_set_tpr(vlapic, data & 0xff); 1354 break; 1355 case APIC_OFFSET_EOI: 1356 vlapic_process_eoi(vlapic); 1357 break; 1358 case APIC_OFFSET_LDR: 1359 lapic->ldr = data; 1360 vlapic_ldr_write_handler(vlapic); 1361 break; 1362 case APIC_OFFSET_DFR: 1363 lapic->dfr = data; 1364 vlapic_dfr_write_handler(vlapic); 1365 break; 1366 case APIC_OFFSET_SVR: 1367 lapic->svr = data; 1368 vlapic_svr_write_handler(vlapic); 1369 break; 1370 case APIC_OFFSET_ICR_LOW: 1371 lapic->icr_lo = data; 1372 if (x2apic(vlapic)) 1373 lapic->icr_hi = data >> 32; 1374 retval = vlapic_icrlo_write_handler(vlapic, retu); 1375 break; 1376 case APIC_OFFSET_ICR_HI: 1377 lapic->icr_hi = data; 1378 break; 1379 case APIC_OFFSET_CMCI_LVT: 1380 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1381 regptr = vlapic_get_lvtptr(vlapic, offset); 1382 *regptr = data; 1383 vlapic_lvt_write_handler(vlapic, offset); 1384 break; 1385 case APIC_OFFSET_TIMER_ICR: 1386 lapic->icr_timer = data; 1387 vlapic_icrtmr_write_handler(vlapic); 1388 break; 1389 1390 case APIC_OFFSET_TIMER_DCR: 1391 lapic->dcr_timer = data; 1392 vlapic_dcr_write_handler(vlapic); 1393 break; 1394 1395 case APIC_OFFSET_ESR: 1396 vlapic_esr_write_handler(vlapic); 1397 break; 1398 1399 case APIC_OFFSET_SELF_IPI: 1400 if (x2apic(vlapic)) 1401 vlapic_self_ipi_handler(vlapic, data); 1402 break; 1403 1404 case APIC_OFFSET_VER: 1405 case APIC_OFFSET_APR: 1406 case APIC_OFFSET_PPR: 1407 case APIC_OFFSET_RRR: 1408 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1409 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1410 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1411 case APIC_OFFSET_TIMER_CCR: 1412 default: 1413 // Read only. 1414 break; 1415 } 1416 1417 return (retval); 1418 } 1419 1420 static void 1421 vlapic_reset(struct vlapic *vlapic) 1422 { 1423 struct LAPIC *lapic; 1424 1425 lapic = vlapic->apic_page; 1426 bzero(lapic, sizeof(struct LAPIC)); 1427 1428 lapic->id = vlapic_get_id(vlapic); 1429 lapic->version = VLAPIC_VERSION; 1430 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1431 lapic->dfr = 0xffffffff; 1432 lapic->svr = APIC_SVR_VECTOR; 1433 vlapic_mask_lvts(vlapic); 1434 vlapic_reset_tmr(vlapic); 1435 1436 lapic->dcr_timer = 0; 1437 vlapic_dcr_write_handler(vlapic); 1438 1439 if (vlapic->vcpuid == 0) 1440 vlapic->boot_state = BS_RUNNING; /* BSP */ 1441 else 1442 vlapic->boot_state = BS_INIT; /* AP */ 1443 1444 vlapic->svr_last = lapic->svr; 1445 } 1446 1447 void 1448 vlapic_init(struct vlapic *vlapic) 1449 { 1450 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1451 KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, 1452 ("vlapic_init: vcpuid is not initialized")); 1453 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1454 "initialized")); 1455 1456 /* 1457 * If the vlapic is configured in x2apic mode then it will be 1458 * accessed in the critical section via the MSR emulation code. 1459 * 1460 * Therefore the timer mutex must be a spinlock because blockable 1461 * mutexes cannot be acquired in a critical section. 1462 */ 1463 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1464 callout_init(&vlapic->callout, 1); 1465 1466 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1467 1468 if (vlapic->vcpuid == 0) 1469 vlapic->msr_apicbase |= APICBASE_BSP; 1470 1471 vlapic_reset(vlapic); 1472 } 1473 1474 void 1475 vlapic_cleanup(struct vlapic *vlapic) 1476 { 1477 1478 callout_drain(&vlapic->callout); 1479 } 1480 1481 uint64_t 1482 vlapic_get_apicbase(struct vlapic *vlapic) 1483 { 1484 1485 return (vlapic->msr_apicbase); 1486 } 1487 1488 int 1489 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1490 { 1491 1492 if (vlapic->msr_apicbase != new) { 1493 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1494 "not supported", vlapic->msr_apicbase, new); 1495 return (-1); 1496 } 1497 1498 return (0); 1499 } 1500 1501 void 1502 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1503 { 1504 struct vlapic *vlapic; 1505 struct LAPIC *lapic; 1506 1507 vlapic = vm_lapic(vm, vcpuid); 1508 1509 if (state == X2APIC_DISABLED) 1510 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1511 else 1512 vlapic->msr_apicbase |= APICBASE_X2APIC; 1513 1514 /* 1515 * Reset the local APIC registers whose values are mode-dependent. 1516 * 1517 * XXX this works because the APIC mode can be changed only at vcpu 1518 * initialization time. 1519 */ 1520 lapic = vlapic->apic_page; 1521 lapic->id = vlapic_get_id(vlapic); 1522 if (x2apic(vlapic)) { 1523 lapic->ldr = x2apic_ldr(vlapic); 1524 lapic->dfr = 0; 1525 } else { 1526 lapic->ldr = 0; 1527 lapic->dfr = 0xffffffff; 1528 } 1529 1530 if (state == X2APIC_ENABLED) { 1531 if (vlapic->ops.enable_x2apic_mode) 1532 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1533 } 1534 } 1535 1536 void 1537 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1538 int delmode, int vec) 1539 { 1540 bool lowprio; 1541 int vcpuid; 1542 cpuset_t dmask; 1543 1544 if (delmode != IOART_DELFIXED && 1545 delmode != IOART_DELLOPRI && 1546 delmode != IOART_DELEXINT) { 1547 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1548 return; 1549 } 1550 lowprio = (delmode == IOART_DELLOPRI); 1551 1552 /* 1553 * We don't provide any virtual interrupt redirection hardware so 1554 * all interrupts originating from the ioapic or MSI specify the 1555 * 'dest' in the legacy xAPIC format. 1556 */ 1557 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1558 1559 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1560 vcpuid--; 1561 CPU_CLR(vcpuid, &dmask); 1562 if (delmode == IOART_DELEXINT) { 1563 vm_inject_extint(vm, vcpuid); 1564 } else { 1565 lapic_set_intr(vm, vcpuid, vec, level); 1566 } 1567 } 1568 } 1569 1570 void 1571 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1572 { 1573 /* 1574 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1575 * 1576 * This is done by leveraging features like Posted Interrupts (Intel) 1577 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1578 * 1579 * If neither of these features are available then fallback to 1580 * sending an IPI to 'hostcpu'. 1581 */ 1582 if (vlapic->ops.post_intr) 1583 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1584 else 1585 ipi_cpu(hostcpu, ipinum); 1586 } 1587 1588 bool 1589 vlapic_enabled(struct vlapic *vlapic) 1590 { 1591 struct LAPIC *lapic = vlapic->apic_page; 1592 1593 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1594 (lapic->svr & APIC_SVR_ENABLE) != 0) 1595 return (true); 1596 else 1597 return (false); 1598 } 1599 1600 static void 1601 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1602 { 1603 struct LAPIC *lapic; 1604 uint32_t *tmrptr, mask; 1605 int idx; 1606 1607 lapic = vlapic->apic_page; 1608 tmrptr = &lapic->tmr0; 1609 idx = (vector / 32) * 4; 1610 mask = 1 << (vector % 32); 1611 if (level) 1612 tmrptr[idx] |= mask; 1613 else 1614 tmrptr[idx] &= ~mask; 1615 1616 if (vlapic->ops.set_tmr != NULL) 1617 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1618 } 1619 1620 void 1621 vlapic_reset_tmr(struct vlapic *vlapic) 1622 { 1623 int vector; 1624 1625 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1626 1627 for (vector = 0; vector <= 255; vector++) 1628 vlapic_set_tmr(vlapic, vector, false); 1629 } 1630 1631 void 1632 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1633 int delmode, int vector) 1634 { 1635 cpuset_t dmask; 1636 bool lowprio; 1637 1638 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1639 1640 /* 1641 * A level trigger is valid only for fixed and lowprio delivery modes. 1642 */ 1643 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1644 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1645 "delivery-mode %d", delmode); 1646 return; 1647 } 1648 1649 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1650 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1651 1652 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1653 return; 1654 1655 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1656 vlapic_set_tmr(vlapic, vector, true); 1657 } 1658