1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/lock.h> 34 #include <sys/kernel.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/systm.h> 38 #include <sys/smp.h> 39 40 #include <x86/specialreg.h> 41 #include <x86/apicreg.h> 42 43 #include <machine/clock.h> 44 #include <machine/smp.h> 45 46 #include <machine/vmm.h> 47 48 #include "vmm_lapic.h" 49 #include "vmm_ktr.h" 50 #include "vmm_stat.h" 51 52 #include "vlapic.h" 53 #include "vlapic_priv.h" 54 #include "vioapic.h" 55 56 #define PRIO(x) ((x) >> 4) 57 58 #define VLAPIC_VERSION (16) 59 60 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 61 62 /* 63 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 64 * vlapic_callout_handler() and vcpu accesses to: 65 * - timer_freq_bt, timer_period_bt, timer_fire_bt 66 * - timer LVT register 67 */ 68 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 69 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 70 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 71 72 /* 73 * APIC timer frequency: 74 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 75 * - power-of-two to avoid loss of precision when converted to a bintime. 76 */ 77 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 78 79 static __inline uint32_t 80 vlapic_get_id(struct vlapic *vlapic) 81 { 82 83 if (x2apic(vlapic)) 84 return (vlapic->vcpuid); 85 else 86 return (vlapic->vcpuid << 24); 87 } 88 89 static uint32_t 90 x2apic_ldr(struct vlapic *vlapic) 91 { 92 int apicid; 93 uint32_t ldr; 94 95 apicid = vlapic_get_id(vlapic); 96 ldr = 1 << (apicid & 0xf); 97 ldr |= (apicid & 0xffff0) << 12; 98 return (ldr); 99 } 100 101 void 102 vlapic_dfr_write_handler(struct vlapic *vlapic) 103 { 104 struct LAPIC *lapic; 105 106 lapic = vlapic->apic_page; 107 if (x2apic(vlapic)) { 108 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 109 lapic->dfr); 110 lapic->dfr = 0; 111 return; 112 } 113 114 lapic->dfr &= APIC_DFR_MODEL_MASK; 115 lapic->dfr |= APIC_DFR_RESERVED; 116 117 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 118 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 119 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 120 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 121 else 122 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 123 } 124 125 void 126 vlapic_ldr_write_handler(struct vlapic *vlapic) 127 { 128 struct LAPIC *lapic; 129 130 lapic = vlapic->apic_page; 131 132 /* LDR is read-only in x2apic mode */ 133 if (x2apic(vlapic)) { 134 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 135 lapic->ldr); 136 lapic->ldr = x2apic_ldr(vlapic); 137 } else { 138 lapic->ldr &= ~APIC_LDR_RESERVED; 139 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 140 } 141 } 142 143 void 144 vlapic_id_write_handler(struct vlapic *vlapic) 145 { 146 struct LAPIC *lapic; 147 148 /* 149 * We don't allow the ID register to be modified so reset it back to 150 * its default value. 151 */ 152 lapic = vlapic->apic_page; 153 lapic->id = vlapic_get_id(vlapic); 154 } 155 156 static int 157 vlapic_timer_divisor(uint32_t dcr) 158 { 159 switch (dcr & 0xB) { 160 case APIC_TDCR_1: 161 return (1); 162 case APIC_TDCR_2: 163 return (2); 164 case APIC_TDCR_4: 165 return (4); 166 case APIC_TDCR_8: 167 return (8); 168 case APIC_TDCR_16: 169 return (16); 170 case APIC_TDCR_32: 171 return (32); 172 case APIC_TDCR_64: 173 return (64); 174 case APIC_TDCR_128: 175 return (128); 176 default: 177 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 178 } 179 } 180 181 #if 0 182 static inline void 183 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 184 { 185 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 186 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 187 *lvt & APIC_LVTT_M); 188 } 189 #endif 190 191 static uint32_t 192 vlapic_get_ccr(struct vlapic *vlapic) 193 { 194 struct bintime bt_now, bt_rem; 195 struct LAPIC *lapic; 196 uint32_t ccr; 197 198 ccr = 0; 199 lapic = vlapic->apic_page; 200 201 VLAPIC_TIMER_LOCK(vlapic); 202 if (callout_active(&vlapic->callout)) { 203 /* 204 * If the timer is scheduled to expire in the future then 205 * compute the value of 'ccr' based on the remaining time. 206 */ 207 binuptime(&bt_now); 208 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 209 bt_rem = vlapic->timer_fire_bt; 210 bintime_sub(&bt_rem, &bt_now); 211 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 212 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 213 } 214 } 215 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 216 "icr_timer is %#x", ccr, lapic->icr_timer)); 217 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 218 ccr, lapic->icr_timer); 219 VLAPIC_TIMER_UNLOCK(vlapic); 220 return (ccr); 221 } 222 223 void 224 vlapic_dcr_write_handler(struct vlapic *vlapic) 225 { 226 struct LAPIC *lapic; 227 int divisor; 228 229 lapic = vlapic->apic_page; 230 VLAPIC_TIMER_LOCK(vlapic); 231 232 divisor = vlapic_timer_divisor(lapic->dcr_timer); 233 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 234 lapic->dcr_timer, divisor); 235 236 /* 237 * Update the timer frequency and the timer period. 238 * 239 * XXX changes to the frequency divider will not take effect until 240 * the timer is reloaded. 241 */ 242 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 243 vlapic->timer_period_bt = vlapic->timer_freq_bt; 244 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 245 246 VLAPIC_TIMER_UNLOCK(vlapic); 247 } 248 249 void 250 vlapic_esr_write_handler(struct vlapic *vlapic) 251 { 252 struct LAPIC *lapic; 253 254 lapic = vlapic->apic_page; 255 lapic->esr = vlapic->esr_pending; 256 vlapic->esr_pending = 0; 257 } 258 259 int 260 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 261 { 262 struct LAPIC *lapic; 263 uint32_t *irrptr, *tmrptr, mask; 264 int idx; 265 266 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 267 268 lapic = vlapic->apic_page; 269 if (!(lapic->svr & APIC_SVR_ENABLE)) { 270 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 271 "interrupt %d", vector); 272 return (0); 273 } 274 275 if (vector < 16) { 276 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); 277 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 278 vector); 279 return (1); 280 } 281 282 if (vlapic->ops.set_intr_ready) 283 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 284 285 idx = (vector / 32) * 4; 286 mask = 1 << (vector % 32); 287 288 irrptr = &lapic->irr0; 289 atomic_set_int(&irrptr[idx], mask); 290 291 /* 292 * Verify that the trigger-mode of the interrupt matches with 293 * the vlapic TMR registers. 294 */ 295 tmrptr = &lapic->tmr0; 296 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 297 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 298 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 299 level ? "level" : "edge"); 300 } 301 302 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 303 return (1); 304 } 305 306 static __inline uint32_t * 307 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 308 { 309 struct LAPIC *lapic = vlapic->apic_page; 310 int i; 311 312 switch (offset) { 313 case APIC_OFFSET_CMCI_LVT: 314 return (&lapic->lvt_cmci); 315 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 316 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 317 return ((&lapic->lvt_timer) + i);; 318 default: 319 panic("vlapic_get_lvt: invalid LVT\n"); 320 } 321 } 322 323 static __inline int 324 lvt_off_to_idx(uint32_t offset) 325 { 326 int index; 327 328 switch (offset) { 329 case APIC_OFFSET_CMCI_LVT: 330 index = APIC_LVT_CMCI; 331 break; 332 case APIC_OFFSET_TIMER_LVT: 333 index = APIC_LVT_TIMER; 334 break; 335 case APIC_OFFSET_THERM_LVT: 336 index = APIC_LVT_THERMAL; 337 break; 338 case APIC_OFFSET_PERF_LVT: 339 index = APIC_LVT_PMC; 340 break; 341 case APIC_OFFSET_LINT0_LVT: 342 index = APIC_LVT_LINT0; 343 break; 344 case APIC_OFFSET_LINT1_LVT: 345 index = APIC_LVT_LINT1; 346 break; 347 case APIC_OFFSET_ERROR_LVT: 348 index = APIC_LVT_ERROR; 349 break; 350 default: 351 index = -1; 352 break; 353 } 354 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 355 "invalid lvt index %d for offset %#x", index, offset)); 356 357 return (index); 358 } 359 360 static __inline uint32_t 361 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 362 { 363 int idx; 364 uint32_t val; 365 366 idx = lvt_off_to_idx(offset); 367 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 368 return (val); 369 } 370 371 void 372 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 373 { 374 uint32_t *lvtptr, mask, val; 375 struct LAPIC *lapic; 376 int idx; 377 378 lapic = vlapic->apic_page; 379 lvtptr = vlapic_get_lvtptr(vlapic, offset); 380 val = *lvtptr; 381 idx = lvt_off_to_idx(offset); 382 383 if (!(lapic->svr & APIC_SVR_ENABLE)) 384 val |= APIC_LVT_M; 385 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 386 switch (offset) { 387 case APIC_OFFSET_TIMER_LVT: 388 mask |= APIC_LVTT_TM; 389 break; 390 case APIC_OFFSET_ERROR_LVT: 391 break; 392 case APIC_OFFSET_LINT0_LVT: 393 case APIC_OFFSET_LINT1_LVT: 394 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 395 /* FALLTHROUGH */ 396 default: 397 mask |= APIC_LVT_DM; 398 break; 399 } 400 val &= mask; 401 *lvtptr = val; 402 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 403 } 404 405 static void 406 vlapic_mask_lvts(struct vlapic *vlapic) 407 { 408 struct LAPIC *lapic = vlapic->apic_page; 409 410 lapic->lvt_cmci |= APIC_LVT_M; 411 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 412 413 lapic->lvt_timer |= APIC_LVT_M; 414 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 415 416 lapic->lvt_thermal |= APIC_LVT_M; 417 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 418 419 lapic->lvt_pcint |= APIC_LVT_M; 420 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 421 422 lapic->lvt_lint0 |= APIC_LVT_M; 423 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 424 425 lapic->lvt_lint1 |= APIC_LVT_M; 426 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 427 428 lapic->lvt_error |= APIC_LVT_M; 429 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 430 } 431 432 static int 433 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) 434 { 435 uint32_t vec, mode; 436 437 if (lvt & APIC_LVT_M) 438 return (0); 439 440 vec = lvt & APIC_LVT_VECTOR; 441 mode = lvt & APIC_LVT_DM; 442 443 switch (mode) { 444 case APIC_LVT_DM_FIXED: 445 if (vec < 16) { 446 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 447 return (0); 448 } 449 if (vlapic_set_intr_ready(vlapic, vec, false)) 450 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 451 break; 452 case APIC_LVT_DM_NMI: 453 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 454 break; 455 case APIC_LVT_DM_EXTINT: 456 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 457 break; 458 default: 459 // Other modes ignored 460 return (0); 461 } 462 return (1); 463 } 464 465 #if 1 466 static void 467 dump_isrvec_stk(struct vlapic *vlapic) 468 { 469 int i; 470 uint32_t *isrptr; 471 472 isrptr = &vlapic->apic_page->isr0; 473 for (i = 0; i < 8; i++) 474 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 475 476 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 477 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 478 } 479 #endif 480 481 /* 482 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 483 * in Intel Architecture Manual Vol 3a. 484 */ 485 static void 486 vlapic_update_ppr(struct vlapic *vlapic) 487 { 488 int isrvec, tpr, ppr; 489 490 /* 491 * Note that the value on the stack at index 0 is always 0. 492 * 493 * This is a placeholder for the value of ISRV when none of the 494 * bits is set in the ISRx registers. 495 */ 496 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 497 tpr = vlapic->apic_page->tpr; 498 499 #if 1 500 { 501 int i, lastprio, curprio, vector, idx; 502 uint32_t *isrptr; 503 504 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 505 panic("isrvec_stk is corrupted: %d", isrvec); 506 507 /* 508 * Make sure that the priority of the nested interrupts is 509 * always increasing. 510 */ 511 lastprio = -1; 512 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 513 curprio = PRIO(vlapic->isrvec_stk[i]); 514 if (curprio <= lastprio) { 515 dump_isrvec_stk(vlapic); 516 panic("isrvec_stk does not satisfy invariant"); 517 } 518 lastprio = curprio; 519 } 520 521 /* 522 * Make sure that each bit set in the ISRx registers has a 523 * corresponding entry on the isrvec stack. 524 */ 525 i = 1; 526 isrptr = &vlapic->apic_page->isr0; 527 for (vector = 0; vector < 256; vector++) { 528 idx = (vector / 32) * 4; 529 if (isrptr[idx] & (1 << (vector % 32))) { 530 if (i > vlapic->isrvec_stk_top || 531 vlapic->isrvec_stk[i] != vector) { 532 dump_isrvec_stk(vlapic); 533 panic("ISR and isrvec_stk out of sync"); 534 } 535 i++; 536 } 537 } 538 } 539 #endif 540 541 if (PRIO(tpr) >= PRIO(isrvec)) 542 ppr = tpr; 543 else 544 ppr = isrvec & 0xf0; 545 546 vlapic->apic_page->ppr = ppr; 547 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 548 } 549 550 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 551 552 static void 553 vlapic_process_eoi(struct vlapic *vlapic) 554 { 555 struct LAPIC *lapic = vlapic->apic_page; 556 uint32_t *isrptr, *tmrptr; 557 int i, idx, bitpos, vector; 558 559 isrptr = &lapic->isr0; 560 tmrptr = &lapic->tmr0; 561 562 for (i = 7; i >= 0; i--) { 563 idx = i * 4; 564 bitpos = fls(isrptr[idx]); 565 if (bitpos-- != 0) { 566 if (vlapic->isrvec_stk_top <= 0) { 567 panic("invalid vlapic isrvec_stk_top %d", 568 vlapic->isrvec_stk_top); 569 } 570 isrptr[idx] &= ~(1 << bitpos); 571 vector = i * 32 + bitpos; 572 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 573 vector); 574 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 575 vlapic->isrvec_stk_top--; 576 vlapic_update_ppr(vlapic); 577 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 578 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 579 vector); 580 } 581 return; 582 } 583 } 584 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 585 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 586 } 587 588 static __inline int 589 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 590 { 591 592 return (lvt & mask); 593 } 594 595 static __inline int 596 vlapic_periodic_timer(struct vlapic *vlapic) 597 { 598 uint32_t lvt; 599 600 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 601 602 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 603 } 604 605 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 606 607 void 608 vlapic_set_error(struct vlapic *vlapic, uint32_t mask) 609 { 610 uint32_t lvt; 611 612 vlapic->esr_pending |= mask; 613 if (vlapic->esr_firing) 614 return; 615 vlapic->esr_firing = 1; 616 617 // The error LVT always uses the fixed delivery mode. 618 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 619 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 620 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 621 } 622 vlapic->esr_firing = 0; 623 } 624 625 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 626 627 static void 628 vlapic_fire_timer(struct vlapic *vlapic) 629 { 630 uint32_t lvt; 631 632 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 633 634 // The timer LVT always uses the fixed delivery mode. 635 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 636 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 637 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 638 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 639 } 640 } 641 642 static VMM_STAT(VLAPIC_INTR_CMC, 643 "corrected machine check interrupts generated by vlapic"); 644 645 void 646 vlapic_fire_cmci(struct vlapic *vlapic) 647 { 648 uint32_t lvt; 649 650 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 651 if (vlapic_fire_lvt(vlapic, lvt)) { 652 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 653 } 654 } 655 656 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 657 "lvts triggered"); 658 659 int 660 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 661 { 662 uint32_t lvt; 663 664 if (vlapic_enabled(vlapic) == false) { 665 /* 666 * When the local APIC is global/hardware disabled, 667 * LINT[1:0] pins are configured as INTR and NMI pins, 668 * respectively. 669 */ 670 switch (vector) { 671 case APIC_LVT_LINT0: 672 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 673 break; 674 case APIC_LVT_LINT1: 675 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 676 break; 677 default: 678 break; 679 } 680 return (0); 681 } 682 683 switch (vector) { 684 case APIC_LVT_LINT0: 685 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); 686 break; 687 case APIC_LVT_LINT1: 688 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); 689 break; 690 case APIC_LVT_TIMER: 691 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 692 lvt |= APIC_LVT_DM_FIXED; 693 break; 694 case APIC_LVT_ERROR: 695 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 696 lvt |= APIC_LVT_DM_FIXED; 697 break; 698 case APIC_LVT_PMC: 699 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); 700 break; 701 case APIC_LVT_THERMAL: 702 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); 703 break; 704 case APIC_LVT_CMCI: 705 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 706 break; 707 default: 708 return (EINVAL); 709 } 710 if (vlapic_fire_lvt(vlapic, lvt)) { 711 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 712 LVTS_TRIGGERRED, vector, 1); 713 } 714 return (0); 715 } 716 717 static void 718 vlapic_callout_handler(void *arg) 719 { 720 struct vlapic *vlapic; 721 struct bintime bt, btnow; 722 sbintime_t rem_sbt; 723 724 vlapic = arg; 725 726 VLAPIC_TIMER_LOCK(vlapic); 727 if (callout_pending(&vlapic->callout)) /* callout was reset */ 728 goto done; 729 730 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 731 goto done; 732 733 callout_deactivate(&vlapic->callout); 734 735 vlapic_fire_timer(vlapic); 736 737 if (vlapic_periodic_timer(vlapic)) { 738 binuptime(&btnow); 739 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 740 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 741 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 742 vlapic->timer_fire_bt.frac)); 743 744 /* 745 * Compute the delta between when the timer was supposed to 746 * fire and the present time. 747 */ 748 bt = btnow; 749 bintime_sub(&bt, &vlapic->timer_fire_bt); 750 751 rem_sbt = bttosbt(vlapic->timer_period_bt); 752 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 753 /* 754 * Adjust the time until the next countdown downward 755 * to account for the lost time. 756 */ 757 rem_sbt -= bttosbt(bt); 758 } else { 759 /* 760 * If the delta is greater than the timer period then 761 * just reset our time base instead of trying to catch 762 * up. 763 */ 764 vlapic->timer_fire_bt = btnow; 765 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 766 "usecs, period is %lu usecs - resetting time base", 767 bttosbt(bt) / SBT_1US, 768 bttosbt(vlapic->timer_period_bt) / SBT_1US); 769 } 770 771 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 772 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 773 vlapic_callout_handler, vlapic, 0); 774 } 775 done: 776 VLAPIC_TIMER_UNLOCK(vlapic); 777 } 778 779 void 780 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 781 { 782 struct LAPIC *lapic; 783 sbintime_t sbt; 784 uint32_t icr_timer; 785 786 VLAPIC_TIMER_LOCK(vlapic); 787 788 lapic = vlapic->apic_page; 789 icr_timer = lapic->icr_timer; 790 791 vlapic->timer_period_bt = vlapic->timer_freq_bt; 792 bintime_mul(&vlapic->timer_period_bt, icr_timer); 793 794 if (icr_timer != 0) { 795 binuptime(&vlapic->timer_fire_bt); 796 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 797 798 sbt = bttosbt(vlapic->timer_period_bt); 799 callout_reset_sbt(&vlapic->callout, sbt, 0, 800 vlapic_callout_handler, vlapic, 0); 801 } else 802 callout_stop(&vlapic->callout); 803 804 VLAPIC_TIMER_UNLOCK(vlapic); 805 } 806 807 /* 808 * This function populates 'dmask' with the set of vcpus that match the 809 * addressing specified by the (dest, phys, lowprio) tuple. 810 * 811 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 812 * or xAPIC (8-bit) destination field. 813 */ 814 static void 815 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 816 bool lowprio, bool x2apic_dest) 817 { 818 struct vlapic *vlapic; 819 uint32_t dfr, ldr, ldest, cluster; 820 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 821 cpuset_t amask; 822 int vcpuid; 823 824 if ((x2apic_dest && dest == 0xffffffff) || 825 (!x2apic_dest && dest == 0xff)) { 826 /* 827 * Broadcast in both logical and physical modes. 828 */ 829 *dmask = vm_active_cpus(vm); 830 return; 831 } 832 833 if (phys) { 834 /* 835 * Physical mode: destination is APIC ID. 836 */ 837 CPU_ZERO(dmask); 838 vcpuid = vm_apicid2vcpuid(vm, dest); 839 if (vcpuid < VM_MAXCPU) 840 CPU_SET(vcpuid, dmask); 841 } else { 842 /* 843 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 844 * bitmask. This model is only avilable in the xAPIC mode. 845 */ 846 mda_flat_ldest = dest & 0xff; 847 848 /* 849 * In the "Cluster Model" the MDA is used to identify a 850 * specific cluster and a set of APICs in that cluster. 851 */ 852 if (x2apic_dest) { 853 mda_cluster_id = dest >> 16; 854 mda_cluster_ldest = dest & 0xffff; 855 } else { 856 mda_cluster_id = (dest >> 4) & 0xf; 857 mda_cluster_ldest = dest & 0xf; 858 } 859 860 /* 861 * Logical mode: match each APIC that has a bit set 862 * in it's LDR that matches a bit in the ldest. 863 */ 864 CPU_ZERO(dmask); 865 amask = vm_active_cpus(vm); 866 while ((vcpuid = CPU_FFS(&amask)) != 0) { 867 vcpuid--; 868 CPU_CLR(vcpuid, &amask); 869 870 vlapic = vm_lapic(vm, vcpuid); 871 dfr = vlapic->apic_page->dfr; 872 ldr = vlapic->apic_page->ldr; 873 874 if ((dfr & APIC_DFR_MODEL_MASK) == 875 APIC_DFR_MODEL_FLAT) { 876 ldest = ldr >> 24; 877 mda_ldest = mda_flat_ldest; 878 } else if ((dfr & APIC_DFR_MODEL_MASK) == 879 APIC_DFR_MODEL_CLUSTER) { 880 if (x2apic(vlapic)) { 881 cluster = ldr >> 16; 882 ldest = ldr & 0xffff; 883 } else { 884 cluster = ldr >> 28; 885 ldest = (ldr >> 24) & 0xf; 886 } 887 if (cluster != mda_cluster_id) 888 continue; 889 mda_ldest = mda_cluster_ldest; 890 } else { 891 /* 892 * Guest has configured a bad logical 893 * model for this vcpu - skip it. 894 */ 895 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 896 "model %x - cannot deliver interrupt", dfr); 897 continue; 898 } 899 900 if ((mda_ldest & ldest) != 0) { 901 CPU_SET(vcpuid, dmask); 902 if (lowprio) 903 break; 904 } 905 } 906 } 907 } 908 909 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 910 911 static void 912 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 913 { 914 struct LAPIC *lapic = vlapic->apic_page; 915 916 if (lapic->tpr != val) { 917 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 918 "from %#x to %#x", lapic->tpr, val); 919 lapic->tpr = val; 920 vlapic_update_ppr(vlapic); 921 } 922 } 923 924 static uint8_t 925 vlapic_get_tpr(struct vlapic *vlapic) 926 { 927 struct LAPIC *lapic = vlapic->apic_page; 928 929 return (lapic->tpr); 930 } 931 932 void 933 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 934 { 935 uint8_t tpr; 936 937 if (val & ~0xf) { 938 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 939 return; 940 } 941 942 tpr = val << 4; 943 vlapic_set_tpr(vlapic, tpr); 944 } 945 946 uint64_t 947 vlapic_get_cr8(struct vlapic *vlapic) 948 { 949 uint8_t tpr; 950 951 tpr = vlapic_get_tpr(vlapic); 952 return (tpr >> 4); 953 } 954 955 int 956 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 957 { 958 int i; 959 bool phys; 960 cpuset_t dmask; 961 uint64_t icrval; 962 uint32_t dest, vec, mode; 963 struct vlapic *vlapic2; 964 struct vm_exit *vmexit; 965 struct LAPIC *lapic; 966 967 lapic = vlapic->apic_page; 968 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 969 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 970 971 if (x2apic(vlapic)) 972 dest = icrval >> 32; 973 else 974 dest = icrval >> (32 + 24); 975 vec = icrval & APIC_VECTOR_MASK; 976 mode = icrval & APIC_DELMODE_MASK; 977 978 if (mode == APIC_DELMODE_FIXED && vec < 16) { 979 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 980 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 981 return (0); 982 } 983 984 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 985 986 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 987 switch (icrval & APIC_DEST_MASK) { 988 case APIC_DEST_DESTFLD: 989 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 990 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 991 x2apic(vlapic)); 992 break; 993 case APIC_DEST_SELF: 994 CPU_SETOF(vlapic->vcpuid, &dmask); 995 break; 996 case APIC_DEST_ALLISELF: 997 dmask = vm_active_cpus(vlapic->vm); 998 break; 999 case APIC_DEST_ALLESELF: 1000 dmask = vm_active_cpus(vlapic->vm); 1001 CPU_CLR(vlapic->vcpuid, &dmask); 1002 break; 1003 default: 1004 CPU_ZERO(&dmask); /* satisfy gcc */ 1005 break; 1006 } 1007 1008 while ((i = CPU_FFS(&dmask)) != 0) { 1009 i--; 1010 CPU_CLR(i, &dmask); 1011 if (mode == APIC_DELMODE_FIXED) { 1012 lapic_intr_edge(vlapic->vm, i, vec); 1013 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1014 IPIS_SENT, i, 1); 1015 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 1016 "to vcpuid %d", vec, i); 1017 } else { 1018 vm_inject_nmi(vlapic->vm, i); 1019 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 1020 "to vcpuid %d", i); 1021 } 1022 } 1023 1024 return (0); /* handled completely in the kernel */ 1025 } 1026 1027 if (mode == APIC_DELMODE_INIT) { 1028 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 1029 return (0); 1030 1031 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 1032 vlapic2 = vm_lapic(vlapic->vm, dest); 1033 1034 /* move from INIT to waiting-for-SIPI state */ 1035 if (vlapic2->boot_state == BS_INIT) { 1036 vlapic2->boot_state = BS_SIPI; 1037 } 1038 1039 return (0); 1040 } 1041 } 1042 1043 if (mode == APIC_DELMODE_STARTUP) { 1044 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 1045 vlapic2 = vm_lapic(vlapic->vm, dest); 1046 1047 /* 1048 * Ignore SIPIs in any state other than wait-for-SIPI 1049 */ 1050 if (vlapic2->boot_state != BS_SIPI) 1051 return (0); 1052 1053 vlapic2->boot_state = BS_RUNNING; 1054 1055 *retu = true; 1056 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1057 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1058 vmexit->u.spinup_ap.vcpu = dest; 1059 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1060 1061 return (0); 1062 } 1063 } 1064 1065 /* 1066 * This will cause a return to userland. 1067 */ 1068 return (1); 1069 } 1070 1071 void 1072 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1073 { 1074 int vec; 1075 1076 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1077 1078 vec = val & 0xff; 1079 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1080 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1081 vlapic->vcpuid, 1); 1082 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1083 } 1084 1085 int 1086 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1087 { 1088 struct LAPIC *lapic = vlapic->apic_page; 1089 int idx, i, bitpos, vector; 1090 uint32_t *irrptr, val; 1091 1092 if (vlapic->ops.pending_intr) 1093 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1094 1095 irrptr = &lapic->irr0; 1096 1097 for (i = 7; i >= 0; i--) { 1098 idx = i * 4; 1099 val = atomic_load_acq_int(&irrptr[idx]); 1100 bitpos = fls(val); 1101 if (bitpos != 0) { 1102 vector = i * 32 + (bitpos - 1); 1103 if (PRIO(vector) > PRIO(lapic->ppr)) { 1104 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1105 if (vecptr != NULL) 1106 *vecptr = vector; 1107 return (1); 1108 } else 1109 break; 1110 } 1111 } 1112 return (0); 1113 } 1114 1115 void 1116 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1117 { 1118 struct LAPIC *lapic = vlapic->apic_page; 1119 uint32_t *irrptr, *isrptr; 1120 int idx, stk_top; 1121 1122 if (vlapic->ops.intr_accepted) 1123 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1124 1125 /* 1126 * clear the ready bit for vector being accepted in irr 1127 * and set the vector as in service in isr. 1128 */ 1129 idx = (vector / 32) * 4; 1130 1131 irrptr = &lapic->irr0; 1132 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1133 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1134 1135 isrptr = &lapic->isr0; 1136 isrptr[idx] |= 1 << (vector % 32); 1137 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1138 1139 /* 1140 * Update the PPR 1141 */ 1142 vlapic->isrvec_stk_top++; 1143 1144 stk_top = vlapic->isrvec_stk_top; 1145 if (stk_top >= ISRVEC_STK_SIZE) 1146 panic("isrvec_stk_top overflow %d", stk_top); 1147 1148 vlapic->isrvec_stk[stk_top] = vector; 1149 vlapic_update_ppr(vlapic); 1150 } 1151 1152 void 1153 vlapic_svr_write_handler(struct vlapic *vlapic) 1154 { 1155 struct LAPIC *lapic; 1156 uint32_t old, new, changed; 1157 1158 lapic = vlapic->apic_page; 1159 1160 new = lapic->svr; 1161 old = vlapic->svr_last; 1162 vlapic->svr_last = new; 1163 1164 changed = old ^ new; 1165 if ((changed & APIC_SVR_ENABLE) != 0) { 1166 if ((new & APIC_SVR_ENABLE) == 0) { 1167 /* 1168 * The apic is now disabled so stop the apic timer 1169 * and mask all the LVT entries. 1170 */ 1171 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1172 VLAPIC_TIMER_LOCK(vlapic); 1173 callout_stop(&vlapic->callout); 1174 VLAPIC_TIMER_UNLOCK(vlapic); 1175 vlapic_mask_lvts(vlapic); 1176 } else { 1177 /* 1178 * The apic is now enabled so restart the apic timer 1179 * if it is configured in periodic mode. 1180 */ 1181 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1182 if (vlapic_periodic_timer(vlapic)) 1183 vlapic_icrtmr_write_handler(vlapic); 1184 } 1185 } 1186 } 1187 1188 int 1189 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1190 uint64_t *data, bool *retu) 1191 { 1192 struct LAPIC *lapic = vlapic->apic_page; 1193 uint32_t *reg; 1194 int i; 1195 1196 /* Ignore MMIO accesses in x2APIC mode */ 1197 if (x2apic(vlapic) && mmio_access) { 1198 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1199 offset); 1200 *data = 0; 1201 goto done; 1202 } 1203 1204 if (!x2apic(vlapic) && !mmio_access) { 1205 /* 1206 * XXX Generate GP fault for MSR accesses in xAPIC mode 1207 */ 1208 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1209 "xAPIC mode", offset); 1210 *data = 0; 1211 goto done; 1212 } 1213 1214 if (offset > sizeof(*lapic)) { 1215 *data = 0; 1216 goto done; 1217 } 1218 1219 offset &= ~3; 1220 switch(offset) 1221 { 1222 case APIC_OFFSET_ID: 1223 *data = lapic->id; 1224 break; 1225 case APIC_OFFSET_VER: 1226 *data = lapic->version; 1227 break; 1228 case APIC_OFFSET_TPR: 1229 *data = vlapic_get_tpr(vlapic); 1230 break; 1231 case APIC_OFFSET_APR: 1232 *data = lapic->apr; 1233 break; 1234 case APIC_OFFSET_PPR: 1235 *data = lapic->ppr; 1236 break; 1237 case APIC_OFFSET_EOI: 1238 *data = lapic->eoi; 1239 break; 1240 case APIC_OFFSET_LDR: 1241 *data = lapic->ldr; 1242 break; 1243 case APIC_OFFSET_DFR: 1244 *data = lapic->dfr; 1245 break; 1246 case APIC_OFFSET_SVR: 1247 *data = lapic->svr; 1248 break; 1249 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1250 i = (offset - APIC_OFFSET_ISR0) >> 2; 1251 reg = &lapic->isr0; 1252 *data = *(reg + i); 1253 break; 1254 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1255 i = (offset - APIC_OFFSET_TMR0) >> 2; 1256 reg = &lapic->tmr0; 1257 *data = *(reg + i); 1258 break; 1259 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1260 i = (offset - APIC_OFFSET_IRR0) >> 2; 1261 reg = &lapic->irr0; 1262 *data = atomic_load_acq_int(reg + i); 1263 break; 1264 case APIC_OFFSET_ESR: 1265 *data = lapic->esr; 1266 break; 1267 case APIC_OFFSET_ICR_LOW: 1268 *data = lapic->icr_lo; 1269 if (x2apic(vlapic)) 1270 *data |= (uint64_t)lapic->icr_hi << 32; 1271 break; 1272 case APIC_OFFSET_ICR_HI: 1273 *data = lapic->icr_hi; 1274 break; 1275 case APIC_OFFSET_CMCI_LVT: 1276 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1277 *data = vlapic_get_lvt(vlapic, offset); 1278 #ifdef INVARIANTS 1279 reg = vlapic_get_lvtptr(vlapic, offset); 1280 KASSERT(*data == *reg, ("inconsistent lvt value at " 1281 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1282 #endif 1283 break; 1284 case APIC_OFFSET_TIMER_ICR: 1285 *data = lapic->icr_timer; 1286 break; 1287 case APIC_OFFSET_TIMER_CCR: 1288 *data = vlapic_get_ccr(vlapic); 1289 break; 1290 case APIC_OFFSET_TIMER_DCR: 1291 *data = lapic->dcr_timer; 1292 break; 1293 case APIC_OFFSET_SELF_IPI: 1294 /* 1295 * XXX generate a GP fault if vlapic is in x2apic mode 1296 */ 1297 *data = 0; 1298 break; 1299 case APIC_OFFSET_RRR: 1300 default: 1301 *data = 0; 1302 break; 1303 } 1304 done: 1305 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1306 return 0; 1307 } 1308 1309 int 1310 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1311 uint64_t data, bool *retu) 1312 { 1313 struct LAPIC *lapic = vlapic->apic_page; 1314 uint32_t *regptr; 1315 int retval; 1316 1317 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1318 ("vlapic_write: invalid offset %#lx", offset)); 1319 1320 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1321 offset, data); 1322 1323 if (offset > sizeof(*lapic)) 1324 return (0); 1325 1326 /* Ignore MMIO accesses in x2APIC mode */ 1327 if (x2apic(vlapic) && mmio_access) { 1328 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1329 "in x2APIC mode", data, offset); 1330 return (0); 1331 } 1332 1333 /* 1334 * XXX Generate GP fault for MSR accesses in xAPIC mode 1335 */ 1336 if (!x2apic(vlapic) && !mmio_access) { 1337 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1338 "in xAPIC mode", data, offset); 1339 return (0); 1340 } 1341 1342 retval = 0; 1343 switch(offset) 1344 { 1345 case APIC_OFFSET_ID: 1346 lapic->id = data; 1347 vlapic_id_write_handler(vlapic); 1348 break; 1349 case APIC_OFFSET_TPR: 1350 vlapic_set_tpr(vlapic, data & 0xff); 1351 break; 1352 case APIC_OFFSET_EOI: 1353 vlapic_process_eoi(vlapic); 1354 break; 1355 case APIC_OFFSET_LDR: 1356 lapic->ldr = data; 1357 vlapic_ldr_write_handler(vlapic); 1358 break; 1359 case APIC_OFFSET_DFR: 1360 lapic->dfr = data; 1361 vlapic_dfr_write_handler(vlapic); 1362 break; 1363 case APIC_OFFSET_SVR: 1364 lapic->svr = data; 1365 vlapic_svr_write_handler(vlapic); 1366 break; 1367 case APIC_OFFSET_ICR_LOW: 1368 lapic->icr_lo = data; 1369 if (x2apic(vlapic)) 1370 lapic->icr_hi = data >> 32; 1371 retval = vlapic_icrlo_write_handler(vlapic, retu); 1372 break; 1373 case APIC_OFFSET_ICR_HI: 1374 lapic->icr_hi = data; 1375 break; 1376 case APIC_OFFSET_CMCI_LVT: 1377 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1378 regptr = vlapic_get_lvtptr(vlapic, offset); 1379 *regptr = data; 1380 vlapic_lvt_write_handler(vlapic, offset); 1381 break; 1382 case APIC_OFFSET_TIMER_ICR: 1383 lapic->icr_timer = data; 1384 vlapic_icrtmr_write_handler(vlapic); 1385 break; 1386 1387 case APIC_OFFSET_TIMER_DCR: 1388 lapic->dcr_timer = data; 1389 vlapic_dcr_write_handler(vlapic); 1390 break; 1391 1392 case APIC_OFFSET_ESR: 1393 vlapic_esr_write_handler(vlapic); 1394 break; 1395 1396 case APIC_OFFSET_SELF_IPI: 1397 if (x2apic(vlapic)) 1398 vlapic_self_ipi_handler(vlapic, data); 1399 break; 1400 1401 case APIC_OFFSET_VER: 1402 case APIC_OFFSET_APR: 1403 case APIC_OFFSET_PPR: 1404 case APIC_OFFSET_RRR: 1405 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1406 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1407 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1408 case APIC_OFFSET_TIMER_CCR: 1409 default: 1410 // Read only. 1411 break; 1412 } 1413 1414 return (retval); 1415 } 1416 1417 static void 1418 vlapic_reset(struct vlapic *vlapic) 1419 { 1420 struct LAPIC *lapic; 1421 1422 lapic = vlapic->apic_page; 1423 bzero(lapic, sizeof(struct LAPIC)); 1424 1425 lapic->id = vlapic_get_id(vlapic); 1426 lapic->version = VLAPIC_VERSION; 1427 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1428 lapic->dfr = 0xffffffff; 1429 lapic->svr = APIC_SVR_VECTOR; 1430 vlapic_mask_lvts(vlapic); 1431 vlapic_reset_tmr(vlapic); 1432 1433 lapic->dcr_timer = 0; 1434 vlapic_dcr_write_handler(vlapic); 1435 1436 if (vlapic->vcpuid == 0) 1437 vlapic->boot_state = BS_RUNNING; /* BSP */ 1438 else 1439 vlapic->boot_state = BS_INIT; /* AP */ 1440 1441 vlapic->svr_last = lapic->svr; 1442 } 1443 1444 void 1445 vlapic_init(struct vlapic *vlapic) 1446 { 1447 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1448 KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, 1449 ("vlapic_init: vcpuid is not initialized")); 1450 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1451 "initialized")); 1452 1453 /* 1454 * If the vlapic is configured in x2apic mode then it will be 1455 * accessed in the critical section via the MSR emulation code. 1456 * 1457 * Therefore the timer mutex must be a spinlock because blockable 1458 * mutexes cannot be acquired in a critical section. 1459 */ 1460 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1461 callout_init(&vlapic->callout, 1); 1462 1463 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1464 1465 if (vlapic->vcpuid == 0) 1466 vlapic->msr_apicbase |= APICBASE_BSP; 1467 1468 vlapic_reset(vlapic); 1469 } 1470 1471 void 1472 vlapic_cleanup(struct vlapic *vlapic) 1473 { 1474 1475 callout_drain(&vlapic->callout); 1476 } 1477 1478 uint64_t 1479 vlapic_get_apicbase(struct vlapic *vlapic) 1480 { 1481 1482 return (vlapic->msr_apicbase); 1483 } 1484 1485 int 1486 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1487 { 1488 1489 if (vlapic->msr_apicbase != new) { 1490 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1491 "not supported", vlapic->msr_apicbase, new); 1492 return (-1); 1493 } 1494 1495 return (0); 1496 } 1497 1498 void 1499 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1500 { 1501 struct vlapic *vlapic; 1502 struct LAPIC *lapic; 1503 1504 vlapic = vm_lapic(vm, vcpuid); 1505 1506 if (state == X2APIC_DISABLED) 1507 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1508 else 1509 vlapic->msr_apicbase |= APICBASE_X2APIC; 1510 1511 /* 1512 * Reset the local APIC registers whose values are mode-dependent. 1513 * 1514 * XXX this works because the APIC mode can be changed only at vcpu 1515 * initialization time. 1516 */ 1517 lapic = vlapic->apic_page; 1518 lapic->id = vlapic_get_id(vlapic); 1519 if (x2apic(vlapic)) { 1520 lapic->ldr = x2apic_ldr(vlapic); 1521 lapic->dfr = 0; 1522 } else { 1523 lapic->ldr = 0; 1524 lapic->dfr = 0xffffffff; 1525 } 1526 1527 if (state == X2APIC_ENABLED) { 1528 if (vlapic->ops.enable_x2apic_mode) 1529 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1530 } 1531 } 1532 1533 void 1534 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1535 int delmode, int vec) 1536 { 1537 bool lowprio; 1538 int vcpuid; 1539 cpuset_t dmask; 1540 1541 if (delmode != IOART_DELFIXED && 1542 delmode != IOART_DELLOPRI && 1543 delmode != IOART_DELEXINT) { 1544 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1545 return; 1546 } 1547 lowprio = (delmode == IOART_DELLOPRI); 1548 1549 /* 1550 * We don't provide any virtual interrupt redirection hardware so 1551 * all interrupts originating from the ioapic or MSI specify the 1552 * 'dest' in the legacy xAPIC format. 1553 */ 1554 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1555 1556 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1557 vcpuid--; 1558 CPU_CLR(vcpuid, &dmask); 1559 if (delmode == IOART_DELEXINT) { 1560 vm_inject_extint(vm, vcpuid); 1561 } else { 1562 lapic_set_intr(vm, vcpuid, vec, level); 1563 } 1564 } 1565 } 1566 1567 void 1568 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1569 { 1570 /* 1571 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1572 * 1573 * This is done by leveraging features like Posted Interrupts (Intel) 1574 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1575 * 1576 * If neither of these features are available then fallback to 1577 * sending an IPI to 'hostcpu'. 1578 */ 1579 if (vlapic->ops.post_intr) 1580 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1581 else 1582 ipi_cpu(hostcpu, ipinum); 1583 } 1584 1585 bool 1586 vlapic_enabled(struct vlapic *vlapic) 1587 { 1588 struct LAPIC *lapic = vlapic->apic_page; 1589 1590 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1591 (lapic->svr & APIC_SVR_ENABLE) != 0) 1592 return (true); 1593 else 1594 return (false); 1595 } 1596 1597 static void 1598 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1599 { 1600 struct LAPIC *lapic; 1601 uint32_t *tmrptr, mask; 1602 int idx; 1603 1604 lapic = vlapic->apic_page; 1605 tmrptr = &lapic->tmr0; 1606 idx = (vector / 32) * 4; 1607 mask = 1 << (vector % 32); 1608 if (level) 1609 tmrptr[idx] |= mask; 1610 else 1611 tmrptr[idx] &= ~mask; 1612 1613 if (vlapic->ops.set_tmr != NULL) 1614 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1615 } 1616 1617 void 1618 vlapic_reset_tmr(struct vlapic *vlapic) 1619 { 1620 int vector; 1621 1622 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1623 1624 for (vector = 0; vector <= 255; vector++) 1625 vlapic_set_tmr(vlapic, vector, false); 1626 } 1627 1628 void 1629 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1630 int delmode, int vector) 1631 { 1632 cpuset_t dmask; 1633 bool lowprio; 1634 1635 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1636 1637 /* 1638 * A level trigger is valid only for fixed and lowprio delivery modes. 1639 */ 1640 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1641 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1642 "delivery-mode %d", delmode); 1643 return; 1644 } 1645 1646 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1647 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1648 1649 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1650 return; 1651 1652 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1653 vlapic_set_tmr(vlapic, vector, true); 1654 } 1655