1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/lock.h> 34 #include <sys/kernel.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/systm.h> 38 #include <sys/smp.h> 39 40 #include <x86/specialreg.h> 41 #include <x86/apicreg.h> 42 43 #include <machine/clock.h> 44 #include <machine/smp.h> 45 46 #include <machine/vmm.h> 47 48 #include "vmm_ipi.h" 49 #include "vmm_lapic.h" 50 #include "vmm_ktr.h" 51 #include "vmm_stat.h" 52 53 #include "vlapic.h" 54 #include "vlapic_priv.h" 55 #include "vatpic.h" 56 #include "vioapic.h" 57 58 #define PRIO(x) ((x) >> 4) 59 60 #define VLAPIC_VERSION (16) 61 62 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 63 64 /* 65 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 66 * vlapic_callout_handler() and vcpu accesses to: 67 * - timer_freq_bt, timer_period_bt, timer_fire_bt 68 * - timer LVT register 69 */ 70 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 71 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 72 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 73 74 #define VLAPIC_BUS_FREQ tsc_freq 75 76 static __inline uint32_t 77 vlapic_get_id(struct vlapic *vlapic) 78 { 79 80 if (x2apic(vlapic)) 81 return (vlapic->vcpuid); 82 else 83 return (vlapic->vcpuid << 24); 84 } 85 86 static uint32_t 87 x2apic_ldr(struct vlapic *vlapic) 88 { 89 int apicid; 90 uint32_t ldr; 91 92 apicid = vlapic_get_id(vlapic); 93 ldr = 1 << (apicid & 0xf); 94 ldr |= (apicid & 0xffff0) << 12; 95 return (ldr); 96 } 97 98 void 99 vlapic_dfr_write_handler(struct vlapic *vlapic) 100 { 101 struct LAPIC *lapic; 102 103 lapic = vlapic->apic_page; 104 if (x2apic(vlapic)) { 105 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 106 lapic->dfr); 107 lapic->dfr = 0; 108 return; 109 } 110 111 lapic->dfr &= APIC_DFR_MODEL_MASK; 112 lapic->dfr |= APIC_DFR_RESERVED; 113 114 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 115 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 116 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 117 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 118 else 119 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 120 } 121 122 void 123 vlapic_ldr_write_handler(struct vlapic *vlapic) 124 { 125 struct LAPIC *lapic; 126 127 lapic = vlapic->apic_page; 128 129 /* LDR is read-only in x2apic mode */ 130 if (x2apic(vlapic)) { 131 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 132 lapic->ldr); 133 lapic->ldr = x2apic_ldr(vlapic); 134 } else { 135 lapic->ldr &= ~APIC_LDR_RESERVED; 136 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 137 } 138 } 139 140 void 141 vlapic_id_write_handler(struct vlapic *vlapic) 142 { 143 struct LAPIC *lapic; 144 145 /* 146 * We don't allow the ID register to be modified so reset it back to 147 * its default value. 148 */ 149 lapic = vlapic->apic_page; 150 lapic->id = vlapic_get_id(vlapic); 151 } 152 153 static int 154 vlapic_timer_divisor(uint32_t dcr) 155 { 156 switch (dcr & 0xB) { 157 case APIC_TDCR_1: 158 return (1); 159 case APIC_TDCR_2: 160 return (2); 161 case APIC_TDCR_4: 162 return (4); 163 case APIC_TDCR_8: 164 return (8); 165 case APIC_TDCR_16: 166 return (16); 167 case APIC_TDCR_32: 168 return (32); 169 case APIC_TDCR_64: 170 return (64); 171 case APIC_TDCR_128: 172 return (128); 173 default: 174 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 175 } 176 } 177 178 #if 0 179 static inline void 180 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 181 { 182 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 183 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 184 *lvt & APIC_LVTT_M); 185 } 186 #endif 187 188 static uint32_t 189 vlapic_get_ccr(struct vlapic *vlapic) 190 { 191 struct bintime bt_now, bt_rem; 192 struct LAPIC *lapic; 193 uint32_t ccr; 194 195 ccr = 0; 196 lapic = vlapic->apic_page; 197 198 VLAPIC_TIMER_LOCK(vlapic); 199 if (callout_active(&vlapic->callout)) { 200 /* 201 * If the timer is scheduled to expire in the future then 202 * compute the value of 'ccr' based on the remaining time. 203 */ 204 binuptime(&bt_now); 205 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 206 bt_rem = vlapic->timer_fire_bt; 207 bintime_sub(&bt_rem, &bt_now); 208 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 209 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 210 } 211 } 212 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 213 "icr_timer is %#x", ccr, lapic->icr_timer)); 214 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 215 ccr, lapic->icr_timer); 216 VLAPIC_TIMER_UNLOCK(vlapic); 217 return (ccr); 218 } 219 220 void 221 vlapic_dcr_write_handler(struct vlapic *vlapic) 222 { 223 struct LAPIC *lapic; 224 int divisor; 225 226 lapic = vlapic->apic_page; 227 VLAPIC_TIMER_LOCK(vlapic); 228 229 divisor = vlapic_timer_divisor(lapic->dcr_timer); 230 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 231 lapic->dcr_timer, divisor); 232 233 /* 234 * Update the timer frequency and the timer period. 235 * 236 * XXX changes to the frequency divider will not take effect until 237 * the timer is reloaded. 238 */ 239 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 240 vlapic->timer_period_bt = vlapic->timer_freq_bt; 241 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 242 243 VLAPIC_TIMER_UNLOCK(vlapic); 244 } 245 246 void 247 vlapic_esr_write_handler(struct vlapic *vlapic) 248 { 249 struct LAPIC *lapic; 250 251 lapic = vlapic->apic_page; 252 lapic->esr = vlapic->esr_pending; 253 vlapic->esr_pending = 0; 254 } 255 256 int 257 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 258 { 259 struct LAPIC *lapic; 260 uint32_t *irrptr, *tmrptr, mask; 261 int idx; 262 263 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 264 265 lapic = vlapic->apic_page; 266 if (!(lapic->svr & APIC_SVR_ENABLE)) { 267 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 268 "interrupt %d", vector); 269 return (0); 270 } 271 272 if (vector < 16) { 273 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); 274 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 275 vector); 276 return (1); 277 } 278 279 if (vlapic->ops.set_intr_ready) 280 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 281 282 idx = (vector / 32) * 4; 283 mask = 1 << (vector % 32); 284 285 irrptr = &lapic->irr0; 286 atomic_set_int(&irrptr[idx], mask); 287 288 /* 289 * Verify that the trigger-mode of the interrupt matches with 290 * the vlapic TMR registers. 291 */ 292 tmrptr = &lapic->tmr0; 293 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 294 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 295 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 296 level ? "level" : "edge"); 297 } 298 299 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 300 return (1); 301 } 302 303 static VMM_STAT(VLAPIC_EXTINT_COUNT, "number of ExtINTs received by vlapic"); 304 305 static void 306 vlapic_deliver_extint(struct vlapic *vlapic) 307 { 308 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_EXTINT_COUNT, 1); 309 vlapic->extint_pending = true; 310 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, false); 311 } 312 313 static __inline uint32_t * 314 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 315 { 316 struct LAPIC *lapic = vlapic->apic_page; 317 int i; 318 319 switch (offset) { 320 case APIC_OFFSET_CMCI_LVT: 321 return (&lapic->lvt_cmci); 322 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 323 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 324 return ((&lapic->lvt_timer) + i);; 325 default: 326 panic("vlapic_get_lvt: invalid LVT\n"); 327 } 328 } 329 330 static __inline int 331 lvt_off_to_idx(uint32_t offset) 332 { 333 int index; 334 335 switch (offset) { 336 case APIC_OFFSET_CMCI_LVT: 337 index = APIC_LVT_CMCI; 338 break; 339 case APIC_OFFSET_TIMER_LVT: 340 index = APIC_LVT_TIMER; 341 break; 342 case APIC_OFFSET_THERM_LVT: 343 index = APIC_LVT_THERMAL; 344 break; 345 case APIC_OFFSET_PERF_LVT: 346 index = APIC_LVT_PMC; 347 break; 348 case APIC_OFFSET_LINT0_LVT: 349 index = APIC_LVT_LINT0; 350 break; 351 case APIC_OFFSET_LINT1_LVT: 352 index = APIC_LVT_LINT1; 353 break; 354 case APIC_OFFSET_ERROR_LVT: 355 index = APIC_LVT_ERROR; 356 break; 357 default: 358 index = -1; 359 break; 360 } 361 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 362 "invalid lvt index %d for offset %#x", index, offset)); 363 364 return (index); 365 } 366 367 static __inline uint32_t 368 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 369 { 370 int idx; 371 uint32_t val; 372 373 idx = lvt_off_to_idx(offset); 374 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 375 return (val); 376 } 377 378 void 379 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 380 { 381 uint32_t *lvtptr, mask, val; 382 struct LAPIC *lapic; 383 int idx; 384 385 lapic = vlapic->apic_page; 386 lvtptr = vlapic_get_lvtptr(vlapic, offset); 387 val = *lvtptr; 388 idx = lvt_off_to_idx(offset); 389 390 if (!(lapic->svr & APIC_SVR_ENABLE)) 391 val |= APIC_LVT_M; 392 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 393 switch (offset) { 394 case APIC_OFFSET_TIMER_LVT: 395 mask |= APIC_LVTT_TM; 396 break; 397 case APIC_OFFSET_ERROR_LVT: 398 break; 399 case APIC_OFFSET_LINT0_LVT: 400 case APIC_OFFSET_LINT1_LVT: 401 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 402 /* FALLTHROUGH */ 403 default: 404 mask |= APIC_LVT_DM; 405 break; 406 } 407 val &= mask; 408 *lvtptr = val; 409 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 410 } 411 412 static void 413 vlapic_mask_lvts(struct vlapic *vlapic) 414 { 415 struct LAPIC *lapic = vlapic->apic_page; 416 417 lapic->lvt_cmci |= APIC_LVT_M; 418 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 419 420 lapic->lvt_timer |= APIC_LVT_M; 421 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 422 423 lapic->lvt_thermal |= APIC_LVT_M; 424 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 425 426 lapic->lvt_pcint |= APIC_LVT_M; 427 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 428 429 lapic->lvt_lint0 |= APIC_LVT_M; 430 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 431 432 lapic->lvt_lint1 |= APIC_LVT_M; 433 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 434 435 lapic->lvt_error |= APIC_LVT_M; 436 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 437 } 438 439 static int 440 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) 441 { 442 uint32_t vec, mode; 443 444 if (lvt & APIC_LVT_M) 445 return (0); 446 447 vec = lvt & APIC_LVT_VECTOR; 448 mode = lvt & APIC_LVT_DM; 449 450 switch (mode) { 451 case APIC_LVT_DM_FIXED: 452 if (vec < 16) { 453 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 454 return (0); 455 } 456 if (vlapic_set_intr_ready(vlapic, vec, false)) 457 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 458 break; 459 case APIC_LVT_DM_NMI: 460 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 461 break; 462 case APIC_LVT_DM_EXTINT: 463 vlapic_deliver_extint(vlapic); 464 break; 465 default: 466 // Other modes ignored 467 return (0); 468 } 469 return (1); 470 } 471 472 #if 1 473 static void 474 dump_isrvec_stk(struct vlapic *vlapic) 475 { 476 int i; 477 uint32_t *isrptr; 478 479 isrptr = &vlapic->apic_page->isr0; 480 for (i = 0; i < 8; i++) 481 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 482 483 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 484 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 485 } 486 #endif 487 488 /* 489 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 490 * in Intel Architecture Manual Vol 3a. 491 */ 492 static void 493 vlapic_update_ppr(struct vlapic *vlapic) 494 { 495 int isrvec, tpr, ppr; 496 497 /* 498 * Note that the value on the stack at index 0 is always 0. 499 * 500 * This is a placeholder for the value of ISRV when none of the 501 * bits is set in the ISRx registers. 502 */ 503 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 504 tpr = vlapic->apic_page->tpr; 505 506 #if 1 507 { 508 int i, lastprio, curprio, vector, idx; 509 uint32_t *isrptr; 510 511 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 512 panic("isrvec_stk is corrupted: %d", isrvec); 513 514 /* 515 * Make sure that the priority of the nested interrupts is 516 * always increasing. 517 */ 518 lastprio = -1; 519 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 520 curprio = PRIO(vlapic->isrvec_stk[i]); 521 if (curprio <= lastprio) { 522 dump_isrvec_stk(vlapic); 523 panic("isrvec_stk does not satisfy invariant"); 524 } 525 lastprio = curprio; 526 } 527 528 /* 529 * Make sure that each bit set in the ISRx registers has a 530 * corresponding entry on the isrvec stack. 531 */ 532 i = 1; 533 isrptr = &vlapic->apic_page->isr0; 534 for (vector = 0; vector < 256; vector++) { 535 idx = (vector / 32) * 4; 536 if (isrptr[idx] & (1 << (vector % 32))) { 537 if (i > vlapic->isrvec_stk_top || 538 vlapic->isrvec_stk[i] != vector) { 539 dump_isrvec_stk(vlapic); 540 panic("ISR and isrvec_stk out of sync"); 541 } 542 i++; 543 } 544 } 545 } 546 #endif 547 548 if (PRIO(tpr) >= PRIO(isrvec)) 549 ppr = tpr; 550 else 551 ppr = isrvec & 0xf0; 552 553 vlapic->apic_page->ppr = ppr; 554 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 555 } 556 557 static void 558 vlapic_process_eoi(struct vlapic *vlapic) 559 { 560 struct LAPIC *lapic = vlapic->apic_page; 561 uint32_t *isrptr, *tmrptr; 562 int i, idx, bitpos, vector; 563 564 isrptr = &lapic->isr0; 565 tmrptr = &lapic->tmr0; 566 567 /* 568 * The x86 architecture reserves the the first 32 vectors for use 569 * by the processor. 570 */ 571 for (i = 7; i > 0; i--) { 572 idx = i * 4; 573 bitpos = fls(isrptr[idx]); 574 if (bitpos-- != 0) { 575 if (vlapic->isrvec_stk_top <= 0) { 576 panic("invalid vlapic isrvec_stk_top %d", 577 vlapic->isrvec_stk_top); 578 } 579 isrptr[idx] &= ~(1 << bitpos); 580 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 581 vlapic->isrvec_stk_top--; 582 vlapic_update_ppr(vlapic); 583 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 584 vector = i * 32 + bitpos; 585 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 586 vector); 587 } 588 return; 589 } 590 } 591 } 592 593 static __inline int 594 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 595 { 596 597 return (lvt & mask); 598 } 599 600 static __inline int 601 vlapic_periodic_timer(struct vlapic *vlapic) 602 { 603 uint32_t lvt; 604 605 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 606 607 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 608 } 609 610 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 611 612 void 613 vlapic_set_error(struct vlapic *vlapic, uint32_t mask) 614 { 615 uint32_t lvt; 616 617 vlapic->esr_pending |= mask; 618 if (vlapic->esr_firing) 619 return; 620 vlapic->esr_firing = 1; 621 622 // The error LVT always uses the fixed delivery mode. 623 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 624 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 625 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 626 } 627 vlapic->esr_firing = 0; 628 } 629 630 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 631 632 static void 633 vlapic_fire_timer(struct vlapic *vlapic) 634 { 635 uint32_t lvt; 636 637 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 638 639 // The timer LVT always uses the fixed delivery mode. 640 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 641 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 642 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 643 } 644 } 645 646 static VMM_STAT(VLAPIC_INTR_CMC, 647 "corrected machine check interrupts generated by vlapic"); 648 649 void 650 vlapic_fire_cmci(struct vlapic *vlapic) 651 { 652 uint32_t lvt; 653 654 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 655 if (vlapic_fire_lvt(vlapic, lvt)) { 656 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 657 } 658 } 659 660 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 661 "lvts triggered"); 662 663 int 664 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 665 { 666 uint32_t lvt; 667 668 if (vlapic_enabled(vlapic) == false) { 669 /* 670 * When the local APIC is global/hardware disabled, 671 * LINT[1:0] pins are configured as INTR and NMI pins, 672 * respectively. 673 */ 674 switch (vector) { 675 case APIC_LVT_LINT0: 676 vlapic_deliver_extint(vlapic); 677 break; 678 case APIC_LVT_LINT1: 679 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 680 break; 681 default: 682 break; 683 } 684 return (0); 685 } 686 687 switch (vector) { 688 case APIC_LVT_LINT0: 689 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); 690 break; 691 case APIC_LVT_LINT1: 692 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); 693 break; 694 case APIC_LVT_TIMER: 695 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 696 lvt |= APIC_LVT_DM_FIXED; 697 break; 698 case APIC_LVT_ERROR: 699 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 700 lvt |= APIC_LVT_DM_FIXED; 701 break; 702 case APIC_LVT_PMC: 703 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); 704 break; 705 case APIC_LVT_THERMAL: 706 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); 707 break; 708 case APIC_LVT_CMCI: 709 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 710 break; 711 default: 712 return (EINVAL); 713 } 714 if (vlapic_fire_lvt(vlapic, lvt)) { 715 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 716 LVTS_TRIGGERRED, vector, 1); 717 } 718 return (0); 719 } 720 721 static void 722 vlapic_callout_handler(void *arg) 723 { 724 struct vlapic *vlapic; 725 struct bintime bt, btnow; 726 sbintime_t rem_sbt; 727 728 vlapic = arg; 729 730 VLAPIC_TIMER_LOCK(vlapic); 731 if (callout_pending(&vlapic->callout)) /* callout was reset */ 732 goto done; 733 734 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 735 goto done; 736 737 callout_deactivate(&vlapic->callout); 738 739 vlapic_fire_timer(vlapic); 740 741 if (vlapic_periodic_timer(vlapic)) { 742 binuptime(&btnow); 743 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 744 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 745 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 746 vlapic->timer_fire_bt.frac)); 747 748 /* 749 * Compute the delta between when the timer was supposed to 750 * fire and the present time. 751 */ 752 bt = btnow; 753 bintime_sub(&bt, &vlapic->timer_fire_bt); 754 755 rem_sbt = bttosbt(vlapic->timer_period_bt); 756 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 757 /* 758 * Adjust the time until the next countdown downward 759 * to account for the lost time. 760 */ 761 rem_sbt -= bttosbt(bt); 762 } else { 763 /* 764 * If the delta is greater than the timer period then 765 * just reset our time base instead of trying to catch 766 * up. 767 */ 768 vlapic->timer_fire_bt = btnow; 769 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 770 "usecs, period is %lu usecs - resetting time base", 771 bttosbt(bt) / SBT_1US, 772 bttosbt(vlapic->timer_period_bt) / SBT_1US); 773 } 774 775 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 776 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 777 vlapic_callout_handler, vlapic, 0); 778 } 779 done: 780 VLAPIC_TIMER_UNLOCK(vlapic); 781 } 782 783 void 784 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 785 { 786 struct LAPIC *lapic; 787 sbintime_t sbt; 788 uint32_t icr_timer; 789 790 VLAPIC_TIMER_LOCK(vlapic); 791 792 lapic = vlapic->apic_page; 793 icr_timer = lapic->icr_timer; 794 795 vlapic->timer_period_bt = vlapic->timer_freq_bt; 796 bintime_mul(&vlapic->timer_period_bt, icr_timer); 797 798 if (icr_timer != 0) { 799 binuptime(&vlapic->timer_fire_bt); 800 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 801 802 sbt = bttosbt(vlapic->timer_period_bt); 803 callout_reset_sbt(&vlapic->callout, sbt, 0, 804 vlapic_callout_handler, vlapic, 0); 805 } else 806 callout_stop(&vlapic->callout); 807 808 VLAPIC_TIMER_UNLOCK(vlapic); 809 } 810 811 /* 812 * This function populates 'dmask' with the set of vcpus that match the 813 * addressing specified by the (dest, phys, lowprio) tuple. 814 * 815 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 816 * or xAPIC (8-bit) destination field. 817 */ 818 static void 819 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 820 bool lowprio, bool x2apic_dest) 821 { 822 struct vlapic *vlapic; 823 uint32_t dfr, ldr, ldest, cluster; 824 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 825 cpuset_t amask; 826 int vcpuid; 827 828 if ((x2apic_dest && dest == 0xffffffff) || 829 (!x2apic_dest && dest == 0xff)) { 830 /* 831 * Broadcast in both logical and physical modes. 832 */ 833 *dmask = vm_active_cpus(vm); 834 return; 835 } 836 837 if (phys) { 838 /* 839 * Physical mode: destination is APIC ID. 840 */ 841 CPU_ZERO(dmask); 842 vcpuid = vm_apicid2vcpuid(vm, dest); 843 if (vcpuid < VM_MAXCPU) 844 CPU_SET(vcpuid, dmask); 845 } else { 846 /* 847 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 848 * bitmask. This model is only avilable in the xAPIC mode. 849 */ 850 mda_flat_ldest = dest & 0xff; 851 852 /* 853 * In the "Cluster Model" the MDA is used to identify a 854 * specific cluster and a set of APICs in that cluster. 855 */ 856 if (x2apic_dest) { 857 mda_cluster_id = dest >> 16; 858 mda_cluster_ldest = dest & 0xffff; 859 } else { 860 mda_cluster_id = (dest >> 4) & 0xf; 861 mda_cluster_ldest = dest & 0xf; 862 } 863 864 /* 865 * Logical mode: match each APIC that has a bit set 866 * in it's LDR that matches a bit in the ldest. 867 */ 868 CPU_ZERO(dmask); 869 amask = vm_active_cpus(vm); 870 while ((vcpuid = CPU_FFS(&amask)) != 0) { 871 vcpuid--; 872 CPU_CLR(vcpuid, &amask); 873 874 vlapic = vm_lapic(vm, vcpuid); 875 dfr = vlapic->apic_page->dfr; 876 ldr = vlapic->apic_page->ldr; 877 878 if ((dfr & APIC_DFR_MODEL_MASK) == 879 APIC_DFR_MODEL_FLAT) { 880 ldest = ldr >> 24; 881 mda_ldest = mda_flat_ldest; 882 } else if ((dfr & APIC_DFR_MODEL_MASK) == 883 APIC_DFR_MODEL_CLUSTER) { 884 if (x2apic(vlapic)) { 885 cluster = ldr >> 16; 886 ldest = ldr & 0xffff; 887 } else { 888 cluster = ldr >> 28; 889 ldest = (ldr >> 24) & 0xf; 890 } 891 if (cluster != mda_cluster_id) 892 continue; 893 mda_ldest = mda_cluster_ldest; 894 } else { 895 /* 896 * Guest has configured a bad logical 897 * model for this vcpu - skip it. 898 */ 899 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 900 "model %x - cannot deliver interrupt", dfr); 901 continue; 902 } 903 904 if ((mda_ldest & ldest) != 0) { 905 CPU_SET(vcpuid, dmask); 906 if (lowprio) 907 break; 908 } 909 } 910 } 911 } 912 913 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 914 915 int 916 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 917 { 918 int i; 919 bool phys; 920 cpuset_t dmask; 921 uint64_t icrval; 922 uint32_t dest, vec, mode; 923 struct vlapic *vlapic2; 924 struct vm_exit *vmexit; 925 struct LAPIC *lapic; 926 927 lapic = vlapic->apic_page; 928 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 929 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 930 931 if (x2apic(vlapic)) 932 dest = icrval >> 32; 933 else 934 dest = icrval >> (32 + 24); 935 vec = icrval & APIC_VECTOR_MASK; 936 mode = icrval & APIC_DELMODE_MASK; 937 938 if (mode == APIC_DELMODE_FIXED && vec < 16) { 939 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 940 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 941 return (0); 942 } 943 944 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 945 946 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 947 switch (icrval & APIC_DEST_MASK) { 948 case APIC_DEST_DESTFLD: 949 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 950 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 951 x2apic(vlapic)); 952 break; 953 case APIC_DEST_SELF: 954 CPU_SETOF(vlapic->vcpuid, &dmask); 955 break; 956 case APIC_DEST_ALLISELF: 957 dmask = vm_active_cpus(vlapic->vm); 958 break; 959 case APIC_DEST_ALLESELF: 960 dmask = vm_active_cpus(vlapic->vm); 961 CPU_CLR(vlapic->vcpuid, &dmask); 962 break; 963 default: 964 CPU_ZERO(&dmask); /* satisfy gcc */ 965 break; 966 } 967 968 while ((i = CPU_FFS(&dmask)) != 0) { 969 i--; 970 CPU_CLR(i, &dmask); 971 if (mode == APIC_DELMODE_FIXED) { 972 lapic_intr_edge(vlapic->vm, i, vec); 973 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 974 IPIS_SENT, i, 1); 975 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 976 "to vcpuid %d", vec, i); 977 } else { 978 vm_inject_nmi(vlapic->vm, i); 979 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 980 "to vcpuid %d", i); 981 } 982 } 983 984 return (0); /* handled completely in the kernel */ 985 } 986 987 if (mode == APIC_DELMODE_INIT) { 988 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 989 return (0); 990 991 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 992 vlapic2 = vm_lapic(vlapic->vm, dest); 993 994 /* move from INIT to waiting-for-SIPI state */ 995 if (vlapic2->boot_state == BS_INIT) { 996 vlapic2->boot_state = BS_SIPI; 997 } 998 999 return (0); 1000 } 1001 } 1002 1003 if (mode == APIC_DELMODE_STARTUP) { 1004 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 1005 vlapic2 = vm_lapic(vlapic->vm, dest); 1006 1007 /* 1008 * Ignore SIPIs in any state other than wait-for-SIPI 1009 */ 1010 if (vlapic2->boot_state != BS_SIPI) 1011 return (0); 1012 1013 /* 1014 * XXX this assumes that the startup IPI always succeeds 1015 */ 1016 vlapic2->boot_state = BS_RUNNING; 1017 vm_activate_cpu(vlapic2->vm, dest); 1018 1019 *retu = true; 1020 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1021 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1022 vmexit->u.spinup_ap.vcpu = dest; 1023 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1024 1025 return (0); 1026 } 1027 } 1028 1029 /* 1030 * This will cause a return to userland. 1031 */ 1032 return (1); 1033 } 1034 1035 void 1036 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1037 { 1038 int vec; 1039 1040 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1041 1042 vec = val & 0xff; 1043 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1044 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1045 vlapic->vcpuid, 1); 1046 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1047 } 1048 1049 int 1050 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1051 { 1052 struct LAPIC *lapic = vlapic->apic_page; 1053 int idx, i, bitpos, vector; 1054 uint32_t *irrptr, val; 1055 1056 if (vlapic->extint_pending) { 1057 if (vecptr == NULL) 1058 return (1); 1059 else 1060 return (vatpic_pending_intr(vlapic->vm, vecptr)); 1061 } 1062 1063 if (vlapic->ops.pending_intr) 1064 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1065 1066 irrptr = &lapic->irr0; 1067 1068 /* 1069 * The x86 architecture reserves the the first 32 vectors for use 1070 * by the processor. 1071 */ 1072 for (i = 7; i > 0; i--) { 1073 idx = i * 4; 1074 val = atomic_load_acq_int(&irrptr[idx]); 1075 bitpos = fls(val); 1076 if (bitpos != 0) { 1077 vector = i * 32 + (bitpos - 1); 1078 if (PRIO(vector) > PRIO(lapic->ppr)) { 1079 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1080 if (vecptr != NULL) 1081 *vecptr = vector; 1082 return (1); 1083 } else 1084 break; 1085 } 1086 } 1087 return (0); 1088 } 1089 1090 void 1091 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1092 { 1093 struct LAPIC *lapic = vlapic->apic_page; 1094 uint32_t *irrptr, *isrptr; 1095 int idx, stk_top; 1096 1097 if (vlapic->extint_pending) { 1098 vlapic->extint_pending = false; 1099 vatpic_intr_accepted(vlapic->vm, vector); 1100 return; 1101 } 1102 1103 if (vlapic->ops.intr_accepted) 1104 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1105 1106 /* 1107 * clear the ready bit for vector being accepted in irr 1108 * and set the vector as in service in isr. 1109 */ 1110 idx = (vector / 32) * 4; 1111 1112 irrptr = &lapic->irr0; 1113 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1114 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1115 1116 isrptr = &lapic->isr0; 1117 isrptr[idx] |= 1 << (vector % 32); 1118 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1119 1120 /* 1121 * Update the PPR 1122 */ 1123 vlapic->isrvec_stk_top++; 1124 1125 stk_top = vlapic->isrvec_stk_top; 1126 if (stk_top >= ISRVEC_STK_SIZE) 1127 panic("isrvec_stk_top overflow %d", stk_top); 1128 1129 vlapic->isrvec_stk[stk_top] = vector; 1130 vlapic_update_ppr(vlapic); 1131 } 1132 1133 void 1134 vlapic_svr_write_handler(struct vlapic *vlapic) 1135 { 1136 struct LAPIC *lapic; 1137 uint32_t old, new, changed; 1138 1139 lapic = vlapic->apic_page; 1140 1141 new = lapic->svr; 1142 old = vlapic->svr_last; 1143 vlapic->svr_last = new; 1144 1145 changed = old ^ new; 1146 if ((changed & APIC_SVR_ENABLE) != 0) { 1147 if ((new & APIC_SVR_ENABLE) == 0) { 1148 /* 1149 * The apic is now disabled so stop the apic timer 1150 * and mask all the LVT entries. 1151 */ 1152 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1153 VLAPIC_TIMER_LOCK(vlapic); 1154 callout_stop(&vlapic->callout); 1155 VLAPIC_TIMER_UNLOCK(vlapic); 1156 vlapic_mask_lvts(vlapic); 1157 } else { 1158 /* 1159 * The apic is now enabled so restart the apic timer 1160 * if it is configured in periodic mode. 1161 */ 1162 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1163 if (vlapic_periodic_timer(vlapic)) 1164 vlapic_icrtmr_write_handler(vlapic); 1165 } 1166 } 1167 } 1168 1169 int 1170 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1171 uint64_t *data, bool *retu) 1172 { 1173 struct LAPIC *lapic = vlapic->apic_page; 1174 uint32_t *reg; 1175 int i; 1176 1177 /* Ignore MMIO accesses in x2APIC mode */ 1178 if (x2apic(vlapic) && mmio_access) { 1179 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1180 offset); 1181 *data = 0; 1182 goto done; 1183 } 1184 1185 if (!x2apic(vlapic) && !mmio_access) { 1186 /* 1187 * XXX Generate GP fault for MSR accesses in xAPIC mode 1188 */ 1189 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1190 "xAPIC mode", offset); 1191 *data = 0; 1192 goto done; 1193 } 1194 1195 if (offset > sizeof(*lapic)) { 1196 *data = 0; 1197 goto done; 1198 } 1199 1200 offset &= ~3; 1201 switch(offset) 1202 { 1203 case APIC_OFFSET_ID: 1204 *data = lapic->id; 1205 break; 1206 case APIC_OFFSET_VER: 1207 *data = lapic->version; 1208 break; 1209 case APIC_OFFSET_TPR: 1210 *data = lapic->tpr; 1211 break; 1212 case APIC_OFFSET_APR: 1213 *data = lapic->apr; 1214 break; 1215 case APIC_OFFSET_PPR: 1216 *data = lapic->ppr; 1217 break; 1218 case APIC_OFFSET_EOI: 1219 *data = lapic->eoi; 1220 break; 1221 case APIC_OFFSET_LDR: 1222 *data = lapic->ldr; 1223 break; 1224 case APIC_OFFSET_DFR: 1225 *data = lapic->dfr; 1226 break; 1227 case APIC_OFFSET_SVR: 1228 *data = lapic->svr; 1229 break; 1230 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1231 i = (offset - APIC_OFFSET_ISR0) >> 2; 1232 reg = &lapic->isr0; 1233 *data = *(reg + i); 1234 break; 1235 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1236 i = (offset - APIC_OFFSET_TMR0) >> 2; 1237 reg = &lapic->tmr0; 1238 *data = *(reg + i); 1239 break; 1240 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1241 i = (offset - APIC_OFFSET_IRR0) >> 2; 1242 reg = &lapic->irr0; 1243 *data = atomic_load_acq_int(reg + i); 1244 break; 1245 case APIC_OFFSET_ESR: 1246 *data = lapic->esr; 1247 break; 1248 case APIC_OFFSET_ICR_LOW: 1249 *data = lapic->icr_lo; 1250 if (x2apic(vlapic)) 1251 *data |= (uint64_t)lapic->icr_hi << 32; 1252 break; 1253 case APIC_OFFSET_ICR_HI: 1254 *data = lapic->icr_hi; 1255 break; 1256 case APIC_OFFSET_CMCI_LVT: 1257 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1258 *data = vlapic_get_lvt(vlapic, offset); 1259 #ifdef INVARIANTS 1260 reg = vlapic_get_lvtptr(vlapic, offset); 1261 KASSERT(*data == *reg, ("inconsistent lvt value at " 1262 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1263 #endif 1264 break; 1265 case APIC_OFFSET_TIMER_ICR: 1266 *data = lapic->icr_timer; 1267 break; 1268 case APIC_OFFSET_TIMER_CCR: 1269 *data = vlapic_get_ccr(vlapic); 1270 break; 1271 case APIC_OFFSET_TIMER_DCR: 1272 *data = lapic->dcr_timer; 1273 break; 1274 case APIC_OFFSET_SELF_IPI: 1275 /* 1276 * XXX generate a GP fault if vlapic is in x2apic mode 1277 */ 1278 *data = 0; 1279 break; 1280 case APIC_OFFSET_RRR: 1281 default: 1282 *data = 0; 1283 break; 1284 } 1285 done: 1286 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1287 return 0; 1288 } 1289 1290 int 1291 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1292 uint64_t data, bool *retu) 1293 { 1294 struct LAPIC *lapic = vlapic->apic_page; 1295 uint32_t *regptr; 1296 int retval; 1297 1298 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1299 ("vlapic_write: invalid offset %#lx", offset)); 1300 1301 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1302 offset, data); 1303 1304 if (offset > sizeof(*lapic)) 1305 return (0); 1306 1307 /* Ignore MMIO accesses in x2APIC mode */ 1308 if (x2apic(vlapic) && mmio_access) { 1309 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1310 "in x2APIC mode", data, offset); 1311 return (0); 1312 } 1313 1314 /* 1315 * XXX Generate GP fault for MSR accesses in xAPIC mode 1316 */ 1317 if (!x2apic(vlapic) && !mmio_access) { 1318 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1319 "in xAPIC mode", data, offset); 1320 return (0); 1321 } 1322 1323 retval = 0; 1324 switch(offset) 1325 { 1326 case APIC_OFFSET_ID: 1327 lapic->id = data; 1328 vlapic_id_write_handler(vlapic); 1329 break; 1330 case APIC_OFFSET_TPR: 1331 lapic->tpr = data & 0xff; 1332 vlapic_update_ppr(vlapic); 1333 break; 1334 case APIC_OFFSET_EOI: 1335 vlapic_process_eoi(vlapic); 1336 break; 1337 case APIC_OFFSET_LDR: 1338 lapic->ldr = data; 1339 vlapic_ldr_write_handler(vlapic); 1340 break; 1341 case APIC_OFFSET_DFR: 1342 lapic->dfr = data; 1343 vlapic_dfr_write_handler(vlapic); 1344 break; 1345 case APIC_OFFSET_SVR: 1346 lapic->svr = data; 1347 vlapic_svr_write_handler(vlapic); 1348 break; 1349 case APIC_OFFSET_ICR_LOW: 1350 lapic->icr_lo = data; 1351 if (x2apic(vlapic)) 1352 lapic->icr_hi = data >> 32; 1353 retval = vlapic_icrlo_write_handler(vlapic, retu); 1354 break; 1355 case APIC_OFFSET_ICR_HI: 1356 lapic->icr_hi = data; 1357 break; 1358 case APIC_OFFSET_CMCI_LVT: 1359 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1360 regptr = vlapic_get_lvtptr(vlapic, offset); 1361 *regptr = data; 1362 vlapic_lvt_write_handler(vlapic, offset); 1363 break; 1364 case APIC_OFFSET_TIMER_ICR: 1365 lapic->icr_timer = data; 1366 vlapic_icrtmr_write_handler(vlapic); 1367 break; 1368 1369 case APIC_OFFSET_TIMER_DCR: 1370 lapic->dcr_timer = data; 1371 vlapic_dcr_write_handler(vlapic); 1372 break; 1373 1374 case APIC_OFFSET_ESR: 1375 vlapic_esr_write_handler(vlapic); 1376 break; 1377 1378 case APIC_OFFSET_SELF_IPI: 1379 if (x2apic(vlapic)) 1380 vlapic_self_ipi_handler(vlapic, data); 1381 break; 1382 1383 case APIC_OFFSET_VER: 1384 case APIC_OFFSET_APR: 1385 case APIC_OFFSET_PPR: 1386 case APIC_OFFSET_RRR: 1387 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1388 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1389 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1390 case APIC_OFFSET_TIMER_CCR: 1391 default: 1392 // Read only. 1393 break; 1394 } 1395 1396 return (retval); 1397 } 1398 1399 static void 1400 vlapic_reset(struct vlapic *vlapic) 1401 { 1402 struct LAPIC *lapic; 1403 1404 lapic = vlapic->apic_page; 1405 bzero(lapic, sizeof(struct LAPIC)); 1406 1407 lapic->id = vlapic_get_id(vlapic); 1408 lapic->version = VLAPIC_VERSION; 1409 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1410 lapic->dfr = 0xffffffff; 1411 lapic->svr = APIC_SVR_VECTOR; 1412 vlapic_mask_lvts(vlapic); 1413 vlapic_reset_tmr(vlapic); 1414 1415 lapic->dcr_timer = 0; 1416 vlapic_dcr_write_handler(vlapic); 1417 1418 if (vlapic->vcpuid == 0) 1419 vlapic->boot_state = BS_RUNNING; /* BSP */ 1420 else 1421 vlapic->boot_state = BS_INIT; /* AP */ 1422 1423 vlapic->svr_last = lapic->svr; 1424 } 1425 1426 void 1427 vlapic_init(struct vlapic *vlapic) 1428 { 1429 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1430 KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, 1431 ("vlapic_init: vcpuid is not initialized")); 1432 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1433 "initialized")); 1434 1435 /* 1436 * If the vlapic is configured in x2apic mode then it will be 1437 * accessed in the critical section via the MSR emulation code. 1438 * 1439 * Therefore the timer mutex must be a spinlock because blockable 1440 * mutexes cannot be acquired in a critical section. 1441 */ 1442 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1443 callout_init(&vlapic->callout, 1); 1444 1445 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1446 1447 if (vlapic->vcpuid == 0) 1448 vlapic->msr_apicbase |= APICBASE_BSP; 1449 1450 vlapic_reset(vlapic); 1451 } 1452 1453 void 1454 vlapic_cleanup(struct vlapic *vlapic) 1455 { 1456 1457 callout_drain(&vlapic->callout); 1458 } 1459 1460 uint64_t 1461 vlapic_get_apicbase(struct vlapic *vlapic) 1462 { 1463 1464 return (vlapic->msr_apicbase); 1465 } 1466 1467 int 1468 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1469 { 1470 1471 if (vlapic->msr_apicbase != new) { 1472 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1473 "not supported", vlapic->msr_apicbase, new); 1474 return (-1); 1475 } 1476 1477 return (0); 1478 } 1479 1480 void 1481 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1482 { 1483 struct vlapic *vlapic; 1484 struct LAPIC *lapic; 1485 1486 vlapic = vm_lapic(vm, vcpuid); 1487 1488 if (state == X2APIC_DISABLED) 1489 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1490 else 1491 vlapic->msr_apicbase |= APICBASE_X2APIC; 1492 1493 /* 1494 * Reset the local APIC registers whose values are mode-dependent. 1495 * 1496 * XXX this works because the APIC mode can be changed only at vcpu 1497 * initialization time. 1498 */ 1499 lapic = vlapic->apic_page; 1500 lapic->id = vlapic_get_id(vlapic); 1501 if (x2apic(vlapic)) { 1502 lapic->ldr = x2apic_ldr(vlapic); 1503 lapic->dfr = 0; 1504 } else { 1505 lapic->ldr = 0; 1506 lapic->dfr = 0xffffffff; 1507 } 1508 1509 if (state == X2APIC_ENABLED) { 1510 if (vlapic->ops.enable_x2apic_mode) 1511 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1512 } 1513 } 1514 1515 void 1516 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1517 int delmode, int vec) 1518 { 1519 bool lowprio; 1520 int vcpuid; 1521 cpuset_t dmask; 1522 1523 if (delmode != IOART_DELFIXED && 1524 delmode != IOART_DELLOPRI && 1525 delmode != IOART_DELEXINT) { 1526 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1527 return; 1528 } 1529 lowprio = (delmode == IOART_DELLOPRI); 1530 1531 /* 1532 * We don't provide any virtual interrupt redirection hardware so 1533 * all interrupts originating from the ioapic or MSI specify the 1534 * 'dest' in the legacy xAPIC format. 1535 */ 1536 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1537 1538 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1539 vcpuid--; 1540 CPU_CLR(vcpuid, &dmask); 1541 if (delmode == IOART_DELEXINT) { 1542 vlapic_deliver_extint(vm_lapic(vm, vcpuid)); 1543 } else { 1544 lapic_set_intr(vm, vcpuid, vec, level); 1545 } 1546 } 1547 } 1548 1549 void 1550 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1551 { 1552 /* 1553 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1554 * 1555 * This is done by leveraging features like Posted Interrupts (Intel) 1556 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1557 * 1558 * If neither of these features are available then fallback to 1559 * sending an IPI to 'hostcpu'. 1560 */ 1561 if (vlapic->ops.post_intr) 1562 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1563 else 1564 ipi_cpu(hostcpu, ipinum); 1565 } 1566 1567 bool 1568 vlapic_enabled(struct vlapic *vlapic) 1569 { 1570 struct LAPIC *lapic = vlapic->apic_page; 1571 1572 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1573 (lapic->svr & APIC_SVR_ENABLE) != 0) 1574 return (true); 1575 else 1576 return (false); 1577 } 1578 1579 static void 1580 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1581 { 1582 struct LAPIC *lapic; 1583 uint32_t *tmrptr, mask; 1584 int idx; 1585 1586 lapic = vlapic->apic_page; 1587 tmrptr = &lapic->tmr0; 1588 idx = (vector / 32) * 4; 1589 mask = 1 << (vector % 32); 1590 if (level) 1591 tmrptr[idx] |= mask; 1592 else 1593 tmrptr[idx] &= ~mask; 1594 1595 if (vlapic->ops.set_tmr != NULL) 1596 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1597 } 1598 1599 void 1600 vlapic_reset_tmr(struct vlapic *vlapic) 1601 { 1602 int vector; 1603 1604 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1605 1606 for (vector = 0; vector <= 255; vector++) 1607 vlapic_set_tmr(vlapic, vector, false); 1608 } 1609 1610 void 1611 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1612 int delmode, int vector) 1613 { 1614 cpuset_t dmask; 1615 bool lowprio; 1616 1617 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1618 1619 /* 1620 * A level trigger is valid only for fixed and lowprio delivery modes. 1621 */ 1622 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1623 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1624 "delivery-mode %d", delmode); 1625 return; 1626 } 1627 1628 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1629 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1630 1631 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1632 return; 1633 1634 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1635 vlapic_set_tmr(vlapic, vector, true); 1636 } 1637