1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/lock.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/systm.h> 40 #include <sys/smp.h> 41 42 #include <x86/specialreg.h> 43 #include <x86/apicreg.h> 44 45 #include <machine/clock.h> 46 #include <machine/smp.h> 47 48 #include <machine/vmm.h> 49 50 #include "vmm_lapic.h" 51 #include "vmm_ktr.h" 52 #include "vmm_stat.h" 53 54 #include "vlapic.h" 55 #include "vlapic_priv.h" 56 #include "vioapic.h" 57 58 #define PRIO(x) ((x) >> 4) 59 60 #define VLAPIC_VERSION (16) 61 62 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 63 64 /* 65 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 66 * vlapic_callout_handler() and vcpu accesses to: 67 * - timer_freq_bt, timer_period_bt, timer_fire_bt 68 * - timer LVT register 69 */ 70 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 71 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 72 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 73 74 /* 75 * APIC timer frequency: 76 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 77 * - power-of-two to avoid loss of precision when converted to a bintime. 78 */ 79 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 80 81 static __inline uint32_t 82 vlapic_get_id(struct vlapic *vlapic) 83 { 84 85 if (x2apic(vlapic)) 86 return (vlapic->vcpuid); 87 else 88 return (vlapic->vcpuid << 24); 89 } 90 91 static uint32_t 92 x2apic_ldr(struct vlapic *vlapic) 93 { 94 int apicid; 95 uint32_t ldr; 96 97 apicid = vlapic_get_id(vlapic); 98 ldr = 1 << (apicid & 0xf); 99 ldr |= (apicid & 0xffff0) << 12; 100 return (ldr); 101 } 102 103 void 104 vlapic_dfr_write_handler(struct vlapic *vlapic) 105 { 106 struct LAPIC *lapic; 107 108 lapic = vlapic->apic_page; 109 if (x2apic(vlapic)) { 110 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 111 lapic->dfr); 112 lapic->dfr = 0; 113 return; 114 } 115 116 lapic->dfr &= APIC_DFR_MODEL_MASK; 117 lapic->dfr |= APIC_DFR_RESERVED; 118 119 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 120 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 121 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 122 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 123 else 124 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 125 } 126 127 void 128 vlapic_ldr_write_handler(struct vlapic *vlapic) 129 { 130 struct LAPIC *lapic; 131 132 lapic = vlapic->apic_page; 133 134 /* LDR is read-only in x2apic mode */ 135 if (x2apic(vlapic)) { 136 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 137 lapic->ldr); 138 lapic->ldr = x2apic_ldr(vlapic); 139 } else { 140 lapic->ldr &= ~APIC_LDR_RESERVED; 141 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 142 } 143 } 144 145 void 146 vlapic_id_write_handler(struct vlapic *vlapic) 147 { 148 struct LAPIC *lapic; 149 150 /* 151 * We don't allow the ID register to be modified so reset it back to 152 * its default value. 153 */ 154 lapic = vlapic->apic_page; 155 lapic->id = vlapic_get_id(vlapic); 156 } 157 158 static int 159 vlapic_timer_divisor(uint32_t dcr) 160 { 161 switch (dcr & 0xB) { 162 case APIC_TDCR_1: 163 return (1); 164 case APIC_TDCR_2: 165 return (2); 166 case APIC_TDCR_4: 167 return (4); 168 case APIC_TDCR_8: 169 return (8); 170 case APIC_TDCR_16: 171 return (16); 172 case APIC_TDCR_32: 173 return (32); 174 case APIC_TDCR_64: 175 return (64); 176 case APIC_TDCR_128: 177 return (128); 178 default: 179 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 180 } 181 } 182 183 #if 0 184 static inline void 185 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 186 { 187 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 188 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 189 *lvt & APIC_LVTT_M); 190 } 191 #endif 192 193 static uint32_t 194 vlapic_get_ccr(struct vlapic *vlapic) 195 { 196 struct bintime bt_now, bt_rem; 197 struct LAPIC *lapic; 198 uint32_t ccr; 199 200 ccr = 0; 201 lapic = vlapic->apic_page; 202 203 VLAPIC_TIMER_LOCK(vlapic); 204 if (callout_active(&vlapic->callout)) { 205 /* 206 * If the timer is scheduled to expire in the future then 207 * compute the value of 'ccr' based on the remaining time. 208 */ 209 binuptime(&bt_now); 210 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 211 bt_rem = vlapic->timer_fire_bt; 212 bintime_sub(&bt_rem, &bt_now); 213 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 214 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 215 } 216 } 217 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 218 "icr_timer is %#x", ccr, lapic->icr_timer)); 219 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 220 ccr, lapic->icr_timer); 221 VLAPIC_TIMER_UNLOCK(vlapic); 222 return (ccr); 223 } 224 225 void 226 vlapic_dcr_write_handler(struct vlapic *vlapic) 227 { 228 struct LAPIC *lapic; 229 int divisor; 230 231 lapic = vlapic->apic_page; 232 VLAPIC_TIMER_LOCK(vlapic); 233 234 divisor = vlapic_timer_divisor(lapic->dcr_timer); 235 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 236 lapic->dcr_timer, divisor); 237 238 /* 239 * Update the timer frequency and the timer period. 240 * 241 * XXX changes to the frequency divider will not take effect until 242 * the timer is reloaded. 243 */ 244 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 245 vlapic->timer_period_bt = vlapic->timer_freq_bt; 246 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 247 248 VLAPIC_TIMER_UNLOCK(vlapic); 249 } 250 251 void 252 vlapic_esr_write_handler(struct vlapic *vlapic) 253 { 254 struct LAPIC *lapic; 255 256 lapic = vlapic->apic_page; 257 lapic->esr = vlapic->esr_pending; 258 vlapic->esr_pending = 0; 259 } 260 261 int 262 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 263 { 264 struct LAPIC *lapic; 265 uint32_t *irrptr, *tmrptr, mask; 266 int idx; 267 268 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 269 270 lapic = vlapic->apic_page; 271 if (!(lapic->svr & APIC_SVR_ENABLE)) { 272 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 273 "interrupt %d", vector); 274 return (0); 275 } 276 277 if (vector < 16) { 278 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); 279 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 280 vector); 281 return (1); 282 } 283 284 if (vlapic->ops.set_intr_ready) 285 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 286 287 idx = (vector / 32) * 4; 288 mask = 1 << (vector % 32); 289 290 irrptr = &lapic->irr0; 291 atomic_set_int(&irrptr[idx], mask); 292 293 /* 294 * Verify that the trigger-mode of the interrupt matches with 295 * the vlapic TMR registers. 296 */ 297 tmrptr = &lapic->tmr0; 298 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 299 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 300 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 301 level ? "level" : "edge"); 302 } 303 304 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 305 return (1); 306 } 307 308 static __inline uint32_t * 309 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 310 { 311 struct LAPIC *lapic = vlapic->apic_page; 312 int i; 313 314 switch (offset) { 315 case APIC_OFFSET_CMCI_LVT: 316 return (&lapic->lvt_cmci); 317 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 318 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 319 return ((&lapic->lvt_timer) + i); 320 default: 321 panic("vlapic_get_lvt: invalid LVT\n"); 322 } 323 } 324 325 static __inline int 326 lvt_off_to_idx(uint32_t offset) 327 { 328 int index; 329 330 switch (offset) { 331 case APIC_OFFSET_CMCI_LVT: 332 index = APIC_LVT_CMCI; 333 break; 334 case APIC_OFFSET_TIMER_LVT: 335 index = APIC_LVT_TIMER; 336 break; 337 case APIC_OFFSET_THERM_LVT: 338 index = APIC_LVT_THERMAL; 339 break; 340 case APIC_OFFSET_PERF_LVT: 341 index = APIC_LVT_PMC; 342 break; 343 case APIC_OFFSET_LINT0_LVT: 344 index = APIC_LVT_LINT0; 345 break; 346 case APIC_OFFSET_LINT1_LVT: 347 index = APIC_LVT_LINT1; 348 break; 349 case APIC_OFFSET_ERROR_LVT: 350 index = APIC_LVT_ERROR; 351 break; 352 default: 353 index = -1; 354 break; 355 } 356 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 357 "invalid lvt index %d for offset %#x", index, offset)); 358 359 return (index); 360 } 361 362 static __inline uint32_t 363 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 364 { 365 int idx; 366 uint32_t val; 367 368 idx = lvt_off_to_idx(offset); 369 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 370 return (val); 371 } 372 373 void 374 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 375 { 376 uint32_t *lvtptr, mask, val; 377 struct LAPIC *lapic; 378 int idx; 379 380 lapic = vlapic->apic_page; 381 lvtptr = vlapic_get_lvtptr(vlapic, offset); 382 val = *lvtptr; 383 idx = lvt_off_to_idx(offset); 384 385 if (!(lapic->svr & APIC_SVR_ENABLE)) 386 val |= APIC_LVT_M; 387 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 388 switch (offset) { 389 case APIC_OFFSET_TIMER_LVT: 390 mask |= APIC_LVTT_TM; 391 break; 392 case APIC_OFFSET_ERROR_LVT: 393 break; 394 case APIC_OFFSET_LINT0_LVT: 395 case APIC_OFFSET_LINT1_LVT: 396 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 397 /* FALLTHROUGH */ 398 default: 399 mask |= APIC_LVT_DM; 400 break; 401 } 402 val &= mask; 403 *lvtptr = val; 404 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 405 } 406 407 static void 408 vlapic_mask_lvts(struct vlapic *vlapic) 409 { 410 struct LAPIC *lapic = vlapic->apic_page; 411 412 lapic->lvt_cmci |= APIC_LVT_M; 413 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 414 415 lapic->lvt_timer |= APIC_LVT_M; 416 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 417 418 lapic->lvt_thermal |= APIC_LVT_M; 419 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 420 421 lapic->lvt_pcint |= APIC_LVT_M; 422 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 423 424 lapic->lvt_lint0 |= APIC_LVT_M; 425 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 426 427 lapic->lvt_lint1 |= APIC_LVT_M; 428 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 429 430 lapic->lvt_error |= APIC_LVT_M; 431 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 432 } 433 434 static int 435 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) 436 { 437 uint32_t vec, mode; 438 439 if (lvt & APIC_LVT_M) 440 return (0); 441 442 vec = lvt & APIC_LVT_VECTOR; 443 mode = lvt & APIC_LVT_DM; 444 445 switch (mode) { 446 case APIC_LVT_DM_FIXED: 447 if (vec < 16) { 448 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 449 return (0); 450 } 451 if (vlapic_set_intr_ready(vlapic, vec, false)) 452 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 453 break; 454 case APIC_LVT_DM_NMI: 455 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 456 break; 457 case APIC_LVT_DM_EXTINT: 458 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 459 break; 460 default: 461 // Other modes ignored 462 return (0); 463 } 464 return (1); 465 } 466 467 #if 1 468 static void 469 dump_isrvec_stk(struct vlapic *vlapic) 470 { 471 int i; 472 uint32_t *isrptr; 473 474 isrptr = &vlapic->apic_page->isr0; 475 for (i = 0; i < 8; i++) 476 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 477 478 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 479 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 480 } 481 #endif 482 483 /* 484 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 485 * in Intel Architecture Manual Vol 3a. 486 */ 487 static void 488 vlapic_update_ppr(struct vlapic *vlapic) 489 { 490 int isrvec, tpr, ppr; 491 492 /* 493 * Note that the value on the stack at index 0 is always 0. 494 * 495 * This is a placeholder for the value of ISRV when none of the 496 * bits is set in the ISRx registers. 497 */ 498 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 499 tpr = vlapic->apic_page->tpr; 500 501 #if 1 502 { 503 int i, lastprio, curprio, vector, idx; 504 uint32_t *isrptr; 505 506 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 507 panic("isrvec_stk is corrupted: %d", isrvec); 508 509 /* 510 * Make sure that the priority of the nested interrupts is 511 * always increasing. 512 */ 513 lastprio = -1; 514 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 515 curprio = PRIO(vlapic->isrvec_stk[i]); 516 if (curprio <= lastprio) { 517 dump_isrvec_stk(vlapic); 518 panic("isrvec_stk does not satisfy invariant"); 519 } 520 lastprio = curprio; 521 } 522 523 /* 524 * Make sure that each bit set in the ISRx registers has a 525 * corresponding entry on the isrvec stack. 526 */ 527 i = 1; 528 isrptr = &vlapic->apic_page->isr0; 529 for (vector = 0; vector < 256; vector++) { 530 idx = (vector / 32) * 4; 531 if (isrptr[idx] & (1 << (vector % 32))) { 532 if (i > vlapic->isrvec_stk_top || 533 vlapic->isrvec_stk[i] != vector) { 534 dump_isrvec_stk(vlapic); 535 panic("ISR and isrvec_stk out of sync"); 536 } 537 i++; 538 } 539 } 540 } 541 #endif 542 543 if (PRIO(tpr) >= PRIO(isrvec)) 544 ppr = tpr; 545 else 546 ppr = isrvec & 0xf0; 547 548 vlapic->apic_page->ppr = ppr; 549 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 550 } 551 552 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 553 554 static void 555 vlapic_process_eoi(struct vlapic *vlapic) 556 { 557 struct LAPIC *lapic = vlapic->apic_page; 558 uint32_t *isrptr, *tmrptr; 559 int i, idx, bitpos, vector; 560 561 isrptr = &lapic->isr0; 562 tmrptr = &lapic->tmr0; 563 564 for (i = 7; i >= 0; i--) { 565 idx = i * 4; 566 bitpos = fls(isrptr[idx]); 567 if (bitpos-- != 0) { 568 if (vlapic->isrvec_stk_top <= 0) { 569 panic("invalid vlapic isrvec_stk_top %d", 570 vlapic->isrvec_stk_top); 571 } 572 isrptr[idx] &= ~(1 << bitpos); 573 vector = i * 32 + bitpos; 574 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 575 vector); 576 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 577 vlapic->isrvec_stk_top--; 578 vlapic_update_ppr(vlapic); 579 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 580 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 581 vector); 582 } 583 return; 584 } 585 } 586 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 587 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 588 } 589 590 static __inline int 591 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 592 { 593 594 return (lvt & mask); 595 } 596 597 static __inline int 598 vlapic_periodic_timer(struct vlapic *vlapic) 599 { 600 uint32_t lvt; 601 602 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 603 604 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 605 } 606 607 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 608 609 void 610 vlapic_set_error(struct vlapic *vlapic, uint32_t mask) 611 { 612 uint32_t lvt; 613 614 vlapic->esr_pending |= mask; 615 if (vlapic->esr_firing) 616 return; 617 vlapic->esr_firing = 1; 618 619 // The error LVT always uses the fixed delivery mode. 620 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 621 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 622 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 623 } 624 vlapic->esr_firing = 0; 625 } 626 627 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 628 629 static void 630 vlapic_fire_timer(struct vlapic *vlapic) 631 { 632 uint32_t lvt; 633 634 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 635 636 // The timer LVT always uses the fixed delivery mode. 637 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 638 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 639 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 640 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 641 } 642 } 643 644 static VMM_STAT(VLAPIC_INTR_CMC, 645 "corrected machine check interrupts generated by vlapic"); 646 647 void 648 vlapic_fire_cmci(struct vlapic *vlapic) 649 { 650 uint32_t lvt; 651 652 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 653 if (vlapic_fire_lvt(vlapic, lvt)) { 654 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 655 } 656 } 657 658 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 659 "lvts triggered"); 660 661 int 662 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 663 { 664 uint32_t lvt; 665 666 if (vlapic_enabled(vlapic) == false) { 667 /* 668 * When the local APIC is global/hardware disabled, 669 * LINT[1:0] pins are configured as INTR and NMI pins, 670 * respectively. 671 */ 672 switch (vector) { 673 case APIC_LVT_LINT0: 674 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 675 break; 676 case APIC_LVT_LINT1: 677 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 678 break; 679 default: 680 break; 681 } 682 return (0); 683 } 684 685 switch (vector) { 686 case APIC_LVT_LINT0: 687 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); 688 break; 689 case APIC_LVT_LINT1: 690 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); 691 break; 692 case APIC_LVT_TIMER: 693 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 694 lvt |= APIC_LVT_DM_FIXED; 695 break; 696 case APIC_LVT_ERROR: 697 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 698 lvt |= APIC_LVT_DM_FIXED; 699 break; 700 case APIC_LVT_PMC: 701 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); 702 break; 703 case APIC_LVT_THERMAL: 704 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); 705 break; 706 case APIC_LVT_CMCI: 707 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 708 break; 709 default: 710 return (EINVAL); 711 } 712 if (vlapic_fire_lvt(vlapic, lvt)) { 713 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 714 LVTS_TRIGGERRED, vector, 1); 715 } 716 return (0); 717 } 718 719 static void 720 vlapic_callout_handler(void *arg) 721 { 722 struct vlapic *vlapic; 723 struct bintime bt, btnow; 724 sbintime_t rem_sbt; 725 726 vlapic = arg; 727 728 VLAPIC_TIMER_LOCK(vlapic); 729 if (callout_pending(&vlapic->callout)) /* callout was reset */ 730 goto done; 731 732 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 733 goto done; 734 735 callout_deactivate(&vlapic->callout); 736 737 vlapic_fire_timer(vlapic); 738 739 if (vlapic_periodic_timer(vlapic)) { 740 binuptime(&btnow); 741 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 742 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 743 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 744 vlapic->timer_fire_bt.frac)); 745 746 /* 747 * Compute the delta between when the timer was supposed to 748 * fire and the present time. 749 */ 750 bt = btnow; 751 bintime_sub(&bt, &vlapic->timer_fire_bt); 752 753 rem_sbt = bttosbt(vlapic->timer_period_bt); 754 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 755 /* 756 * Adjust the time until the next countdown downward 757 * to account for the lost time. 758 */ 759 rem_sbt -= bttosbt(bt); 760 } else { 761 /* 762 * If the delta is greater than the timer period then 763 * just reset our time base instead of trying to catch 764 * up. 765 */ 766 vlapic->timer_fire_bt = btnow; 767 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 768 "usecs, period is %lu usecs - resetting time base", 769 bttosbt(bt) / SBT_1US, 770 bttosbt(vlapic->timer_period_bt) / SBT_1US); 771 } 772 773 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 774 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 775 vlapic_callout_handler, vlapic, 0); 776 } 777 done: 778 VLAPIC_TIMER_UNLOCK(vlapic); 779 } 780 781 void 782 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 783 { 784 struct LAPIC *lapic; 785 sbintime_t sbt; 786 uint32_t icr_timer; 787 788 VLAPIC_TIMER_LOCK(vlapic); 789 790 lapic = vlapic->apic_page; 791 icr_timer = lapic->icr_timer; 792 793 vlapic->timer_period_bt = vlapic->timer_freq_bt; 794 bintime_mul(&vlapic->timer_period_bt, icr_timer); 795 796 if (icr_timer != 0) { 797 binuptime(&vlapic->timer_fire_bt); 798 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 799 800 sbt = bttosbt(vlapic->timer_period_bt); 801 callout_reset_sbt(&vlapic->callout, sbt, 0, 802 vlapic_callout_handler, vlapic, 0); 803 } else 804 callout_stop(&vlapic->callout); 805 806 VLAPIC_TIMER_UNLOCK(vlapic); 807 } 808 809 /* 810 * This function populates 'dmask' with the set of vcpus that match the 811 * addressing specified by the (dest, phys, lowprio) tuple. 812 * 813 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 814 * or xAPIC (8-bit) destination field. 815 */ 816 static void 817 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 818 bool lowprio, bool x2apic_dest) 819 { 820 struct vlapic *vlapic; 821 uint32_t dfr, ldr, ldest, cluster; 822 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 823 cpuset_t amask; 824 int vcpuid; 825 826 if ((x2apic_dest && dest == 0xffffffff) || 827 (!x2apic_dest && dest == 0xff)) { 828 /* 829 * Broadcast in both logical and physical modes. 830 */ 831 *dmask = vm_active_cpus(vm); 832 return; 833 } 834 835 if (phys) { 836 /* 837 * Physical mode: destination is APIC ID. 838 */ 839 CPU_ZERO(dmask); 840 vcpuid = vm_apicid2vcpuid(vm, dest); 841 amask = vm_active_cpus(vm); 842 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 843 CPU_SET(vcpuid, dmask); 844 } else { 845 /* 846 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 847 * bitmask. This model is only available in the xAPIC mode. 848 */ 849 mda_flat_ldest = dest & 0xff; 850 851 /* 852 * In the "Cluster Model" the MDA is used to identify a 853 * specific cluster and a set of APICs in that cluster. 854 */ 855 if (x2apic_dest) { 856 mda_cluster_id = dest >> 16; 857 mda_cluster_ldest = dest & 0xffff; 858 } else { 859 mda_cluster_id = (dest >> 4) & 0xf; 860 mda_cluster_ldest = dest & 0xf; 861 } 862 863 /* 864 * Logical mode: match each APIC that has a bit set 865 * in its LDR that matches a bit in the ldest. 866 */ 867 CPU_ZERO(dmask); 868 amask = vm_active_cpus(vm); 869 while ((vcpuid = CPU_FFS(&amask)) != 0) { 870 vcpuid--; 871 CPU_CLR(vcpuid, &amask); 872 873 vlapic = vm_lapic(vm, vcpuid); 874 dfr = vlapic->apic_page->dfr; 875 ldr = vlapic->apic_page->ldr; 876 877 if ((dfr & APIC_DFR_MODEL_MASK) == 878 APIC_DFR_MODEL_FLAT) { 879 ldest = ldr >> 24; 880 mda_ldest = mda_flat_ldest; 881 } else if ((dfr & APIC_DFR_MODEL_MASK) == 882 APIC_DFR_MODEL_CLUSTER) { 883 if (x2apic(vlapic)) { 884 cluster = ldr >> 16; 885 ldest = ldr & 0xffff; 886 } else { 887 cluster = ldr >> 28; 888 ldest = (ldr >> 24) & 0xf; 889 } 890 if (cluster != mda_cluster_id) 891 continue; 892 mda_ldest = mda_cluster_ldest; 893 } else { 894 /* 895 * Guest has configured a bad logical 896 * model for this vcpu - skip it. 897 */ 898 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 899 "model %x - cannot deliver interrupt", dfr); 900 continue; 901 } 902 903 if ((mda_ldest & ldest) != 0) { 904 CPU_SET(vcpuid, dmask); 905 if (lowprio) 906 break; 907 } 908 } 909 } 910 } 911 912 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 913 914 static void 915 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 916 { 917 struct LAPIC *lapic = vlapic->apic_page; 918 919 if (lapic->tpr != val) { 920 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 921 "from %#x to %#x", lapic->tpr, val); 922 lapic->tpr = val; 923 vlapic_update_ppr(vlapic); 924 } 925 } 926 927 static uint8_t 928 vlapic_get_tpr(struct vlapic *vlapic) 929 { 930 struct LAPIC *lapic = vlapic->apic_page; 931 932 return (lapic->tpr); 933 } 934 935 void 936 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 937 { 938 uint8_t tpr; 939 940 if (val & ~0xf) { 941 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 942 return; 943 } 944 945 tpr = val << 4; 946 vlapic_set_tpr(vlapic, tpr); 947 } 948 949 uint64_t 950 vlapic_get_cr8(struct vlapic *vlapic) 951 { 952 uint8_t tpr; 953 954 tpr = vlapic_get_tpr(vlapic); 955 return (tpr >> 4); 956 } 957 958 int 959 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 960 { 961 int i; 962 bool phys; 963 cpuset_t dmask; 964 uint64_t icrval; 965 uint32_t dest, vec, mode; 966 struct vlapic *vlapic2; 967 struct vm_exit *vmexit; 968 struct LAPIC *lapic; 969 uint16_t maxcpus; 970 971 lapic = vlapic->apic_page; 972 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 973 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 974 975 if (x2apic(vlapic)) 976 dest = icrval >> 32; 977 else 978 dest = icrval >> (32 + 24); 979 vec = icrval & APIC_VECTOR_MASK; 980 mode = icrval & APIC_DELMODE_MASK; 981 982 if (mode == APIC_DELMODE_FIXED && vec < 16) { 983 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 984 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 985 return (0); 986 } 987 988 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 989 990 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 991 switch (icrval & APIC_DEST_MASK) { 992 case APIC_DEST_DESTFLD: 993 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 994 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 995 x2apic(vlapic)); 996 break; 997 case APIC_DEST_SELF: 998 CPU_SETOF(vlapic->vcpuid, &dmask); 999 break; 1000 case APIC_DEST_ALLISELF: 1001 dmask = vm_active_cpus(vlapic->vm); 1002 break; 1003 case APIC_DEST_ALLESELF: 1004 dmask = vm_active_cpus(vlapic->vm); 1005 CPU_CLR(vlapic->vcpuid, &dmask); 1006 break; 1007 default: 1008 CPU_ZERO(&dmask); /* satisfy gcc */ 1009 break; 1010 } 1011 1012 while ((i = CPU_FFS(&dmask)) != 0) { 1013 i--; 1014 CPU_CLR(i, &dmask); 1015 if (mode == APIC_DELMODE_FIXED) { 1016 lapic_intr_edge(vlapic->vm, i, vec); 1017 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 1018 IPIS_SENT, i, 1); 1019 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 1020 "to vcpuid %d", vec, i); 1021 } else { 1022 vm_inject_nmi(vlapic->vm, i); 1023 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 1024 "to vcpuid %d", i); 1025 } 1026 } 1027 1028 return (0); /* handled completely in the kernel */ 1029 } 1030 1031 maxcpus = vm_get_maxcpus(vlapic->vm); 1032 if (mode == APIC_DELMODE_INIT) { 1033 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 1034 return (0); 1035 1036 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1037 vlapic2 = vm_lapic(vlapic->vm, dest); 1038 1039 /* move from INIT to waiting-for-SIPI state */ 1040 if (vlapic2->boot_state == BS_INIT) { 1041 vlapic2->boot_state = BS_SIPI; 1042 } 1043 1044 return (0); 1045 } 1046 } 1047 1048 if (mode == APIC_DELMODE_STARTUP) { 1049 if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { 1050 vlapic2 = vm_lapic(vlapic->vm, dest); 1051 1052 /* 1053 * Ignore SIPIs in any state other than wait-for-SIPI 1054 */ 1055 if (vlapic2->boot_state != BS_SIPI) 1056 return (0); 1057 1058 vlapic2->boot_state = BS_RUNNING; 1059 1060 *retu = true; 1061 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1062 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1063 vmexit->u.spinup_ap.vcpu = dest; 1064 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1065 1066 return (0); 1067 } 1068 } 1069 1070 /* 1071 * This will cause a return to userland. 1072 */ 1073 return (1); 1074 } 1075 1076 void 1077 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1078 { 1079 int vec; 1080 1081 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1082 1083 vec = val & 0xff; 1084 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1085 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1086 vlapic->vcpuid, 1); 1087 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1088 } 1089 1090 int 1091 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1092 { 1093 struct LAPIC *lapic = vlapic->apic_page; 1094 int idx, i, bitpos, vector; 1095 uint32_t *irrptr, val; 1096 1097 if (vlapic->ops.pending_intr) 1098 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1099 1100 irrptr = &lapic->irr0; 1101 1102 for (i = 7; i >= 0; i--) { 1103 idx = i * 4; 1104 val = atomic_load_acq_int(&irrptr[idx]); 1105 bitpos = fls(val); 1106 if (bitpos != 0) { 1107 vector = i * 32 + (bitpos - 1); 1108 if (PRIO(vector) > PRIO(lapic->ppr)) { 1109 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1110 if (vecptr != NULL) 1111 *vecptr = vector; 1112 return (1); 1113 } else 1114 break; 1115 } 1116 } 1117 return (0); 1118 } 1119 1120 void 1121 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1122 { 1123 struct LAPIC *lapic = vlapic->apic_page; 1124 uint32_t *irrptr, *isrptr; 1125 int idx, stk_top; 1126 1127 if (vlapic->ops.intr_accepted) 1128 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1129 1130 /* 1131 * clear the ready bit for vector being accepted in irr 1132 * and set the vector as in service in isr. 1133 */ 1134 idx = (vector / 32) * 4; 1135 1136 irrptr = &lapic->irr0; 1137 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1138 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1139 1140 isrptr = &lapic->isr0; 1141 isrptr[idx] |= 1 << (vector % 32); 1142 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1143 1144 /* 1145 * Update the PPR 1146 */ 1147 vlapic->isrvec_stk_top++; 1148 1149 stk_top = vlapic->isrvec_stk_top; 1150 if (stk_top >= ISRVEC_STK_SIZE) 1151 panic("isrvec_stk_top overflow %d", stk_top); 1152 1153 vlapic->isrvec_stk[stk_top] = vector; 1154 vlapic_update_ppr(vlapic); 1155 } 1156 1157 void 1158 vlapic_svr_write_handler(struct vlapic *vlapic) 1159 { 1160 struct LAPIC *lapic; 1161 uint32_t old, new, changed; 1162 1163 lapic = vlapic->apic_page; 1164 1165 new = lapic->svr; 1166 old = vlapic->svr_last; 1167 vlapic->svr_last = new; 1168 1169 changed = old ^ new; 1170 if ((changed & APIC_SVR_ENABLE) != 0) { 1171 if ((new & APIC_SVR_ENABLE) == 0) { 1172 /* 1173 * The apic is now disabled so stop the apic timer 1174 * and mask all the LVT entries. 1175 */ 1176 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1177 VLAPIC_TIMER_LOCK(vlapic); 1178 callout_stop(&vlapic->callout); 1179 VLAPIC_TIMER_UNLOCK(vlapic); 1180 vlapic_mask_lvts(vlapic); 1181 } else { 1182 /* 1183 * The apic is now enabled so restart the apic timer 1184 * if it is configured in periodic mode. 1185 */ 1186 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1187 if (vlapic_periodic_timer(vlapic)) 1188 vlapic_icrtmr_write_handler(vlapic); 1189 } 1190 } 1191 } 1192 1193 int 1194 vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1195 uint64_t *data, bool *retu) 1196 { 1197 struct LAPIC *lapic = vlapic->apic_page; 1198 uint32_t *reg; 1199 int i; 1200 1201 /* Ignore MMIO accesses in x2APIC mode */ 1202 if (x2apic(vlapic) && mmio_access) { 1203 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1204 offset); 1205 *data = 0; 1206 goto done; 1207 } 1208 1209 if (!x2apic(vlapic) && !mmio_access) { 1210 /* 1211 * XXX Generate GP fault for MSR accesses in xAPIC mode 1212 */ 1213 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1214 "xAPIC mode", offset); 1215 *data = 0; 1216 goto done; 1217 } 1218 1219 if (offset > sizeof(*lapic)) { 1220 *data = 0; 1221 goto done; 1222 } 1223 1224 offset &= ~3; 1225 switch(offset) 1226 { 1227 case APIC_OFFSET_ID: 1228 *data = lapic->id; 1229 break; 1230 case APIC_OFFSET_VER: 1231 *data = lapic->version; 1232 break; 1233 case APIC_OFFSET_TPR: 1234 *data = vlapic_get_tpr(vlapic); 1235 break; 1236 case APIC_OFFSET_APR: 1237 *data = lapic->apr; 1238 break; 1239 case APIC_OFFSET_PPR: 1240 *data = lapic->ppr; 1241 break; 1242 case APIC_OFFSET_EOI: 1243 *data = lapic->eoi; 1244 break; 1245 case APIC_OFFSET_LDR: 1246 *data = lapic->ldr; 1247 break; 1248 case APIC_OFFSET_DFR: 1249 *data = lapic->dfr; 1250 break; 1251 case APIC_OFFSET_SVR: 1252 *data = lapic->svr; 1253 break; 1254 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1255 i = (offset - APIC_OFFSET_ISR0) >> 2; 1256 reg = &lapic->isr0; 1257 *data = *(reg + i); 1258 break; 1259 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1260 i = (offset - APIC_OFFSET_TMR0) >> 2; 1261 reg = &lapic->tmr0; 1262 *data = *(reg + i); 1263 break; 1264 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1265 i = (offset - APIC_OFFSET_IRR0) >> 2; 1266 reg = &lapic->irr0; 1267 *data = atomic_load_acq_int(reg + i); 1268 break; 1269 case APIC_OFFSET_ESR: 1270 *data = lapic->esr; 1271 break; 1272 case APIC_OFFSET_ICR_LOW: 1273 *data = lapic->icr_lo; 1274 if (x2apic(vlapic)) 1275 *data |= (uint64_t)lapic->icr_hi << 32; 1276 break; 1277 case APIC_OFFSET_ICR_HI: 1278 *data = lapic->icr_hi; 1279 break; 1280 case APIC_OFFSET_CMCI_LVT: 1281 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1282 *data = vlapic_get_lvt(vlapic, offset); 1283 #ifdef INVARIANTS 1284 reg = vlapic_get_lvtptr(vlapic, offset); 1285 KASSERT(*data == *reg, ("inconsistent lvt value at " 1286 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1287 #endif 1288 break; 1289 case APIC_OFFSET_TIMER_ICR: 1290 *data = lapic->icr_timer; 1291 break; 1292 case APIC_OFFSET_TIMER_CCR: 1293 *data = vlapic_get_ccr(vlapic); 1294 break; 1295 case APIC_OFFSET_TIMER_DCR: 1296 *data = lapic->dcr_timer; 1297 break; 1298 case APIC_OFFSET_SELF_IPI: 1299 /* 1300 * XXX generate a GP fault if vlapic is in x2apic mode 1301 */ 1302 *data = 0; 1303 break; 1304 case APIC_OFFSET_RRR: 1305 default: 1306 *data = 0; 1307 break; 1308 } 1309 done: 1310 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1311 return 0; 1312 } 1313 1314 int 1315 vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1316 uint64_t data, bool *retu) 1317 { 1318 struct LAPIC *lapic = vlapic->apic_page; 1319 uint32_t *regptr; 1320 int retval; 1321 1322 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1323 ("vlapic_write: invalid offset %#lx", offset)); 1324 1325 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1326 offset, data); 1327 1328 if (offset > sizeof(*lapic)) 1329 return (0); 1330 1331 /* Ignore MMIO accesses in x2APIC mode */ 1332 if (x2apic(vlapic) && mmio_access) { 1333 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1334 "in x2APIC mode", data, offset); 1335 return (0); 1336 } 1337 1338 /* 1339 * XXX Generate GP fault for MSR accesses in xAPIC mode 1340 */ 1341 if (!x2apic(vlapic) && !mmio_access) { 1342 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1343 "in xAPIC mode", data, offset); 1344 return (0); 1345 } 1346 1347 retval = 0; 1348 switch(offset) 1349 { 1350 case APIC_OFFSET_ID: 1351 lapic->id = data; 1352 vlapic_id_write_handler(vlapic); 1353 break; 1354 case APIC_OFFSET_TPR: 1355 vlapic_set_tpr(vlapic, data & 0xff); 1356 break; 1357 case APIC_OFFSET_EOI: 1358 vlapic_process_eoi(vlapic); 1359 break; 1360 case APIC_OFFSET_LDR: 1361 lapic->ldr = data; 1362 vlapic_ldr_write_handler(vlapic); 1363 break; 1364 case APIC_OFFSET_DFR: 1365 lapic->dfr = data; 1366 vlapic_dfr_write_handler(vlapic); 1367 break; 1368 case APIC_OFFSET_SVR: 1369 lapic->svr = data; 1370 vlapic_svr_write_handler(vlapic); 1371 break; 1372 case APIC_OFFSET_ICR_LOW: 1373 lapic->icr_lo = data; 1374 if (x2apic(vlapic)) 1375 lapic->icr_hi = data >> 32; 1376 retval = vlapic_icrlo_write_handler(vlapic, retu); 1377 break; 1378 case APIC_OFFSET_ICR_HI: 1379 lapic->icr_hi = data; 1380 break; 1381 case APIC_OFFSET_CMCI_LVT: 1382 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1383 regptr = vlapic_get_lvtptr(vlapic, offset); 1384 *regptr = data; 1385 vlapic_lvt_write_handler(vlapic, offset); 1386 break; 1387 case APIC_OFFSET_TIMER_ICR: 1388 lapic->icr_timer = data; 1389 vlapic_icrtmr_write_handler(vlapic); 1390 break; 1391 1392 case APIC_OFFSET_TIMER_DCR: 1393 lapic->dcr_timer = data; 1394 vlapic_dcr_write_handler(vlapic); 1395 break; 1396 1397 case APIC_OFFSET_ESR: 1398 vlapic_esr_write_handler(vlapic); 1399 break; 1400 1401 case APIC_OFFSET_SELF_IPI: 1402 if (x2apic(vlapic)) 1403 vlapic_self_ipi_handler(vlapic, data); 1404 break; 1405 1406 case APIC_OFFSET_VER: 1407 case APIC_OFFSET_APR: 1408 case APIC_OFFSET_PPR: 1409 case APIC_OFFSET_RRR: 1410 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1411 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1412 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1413 case APIC_OFFSET_TIMER_CCR: 1414 default: 1415 // Read only. 1416 break; 1417 } 1418 1419 return (retval); 1420 } 1421 1422 static void 1423 vlapic_reset(struct vlapic *vlapic) 1424 { 1425 struct LAPIC *lapic; 1426 1427 lapic = vlapic->apic_page; 1428 bzero(lapic, sizeof(struct LAPIC)); 1429 1430 lapic->id = vlapic_get_id(vlapic); 1431 lapic->version = VLAPIC_VERSION; 1432 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1433 lapic->dfr = 0xffffffff; 1434 lapic->svr = APIC_SVR_VECTOR; 1435 vlapic_mask_lvts(vlapic); 1436 vlapic_reset_tmr(vlapic); 1437 1438 lapic->dcr_timer = 0; 1439 vlapic_dcr_write_handler(vlapic); 1440 1441 if (vlapic->vcpuid == 0) 1442 vlapic->boot_state = BS_RUNNING; /* BSP */ 1443 else 1444 vlapic->boot_state = BS_INIT; /* AP */ 1445 1446 vlapic->svr_last = lapic->svr; 1447 } 1448 1449 void 1450 vlapic_init(struct vlapic *vlapic) 1451 { 1452 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1453 KASSERT(vlapic->vcpuid >= 0 && 1454 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1455 ("vlapic_init: vcpuid is not initialized")); 1456 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1457 "initialized")); 1458 1459 /* 1460 * If the vlapic is configured in x2apic mode then it will be 1461 * accessed in the critical section via the MSR emulation code. 1462 * 1463 * Therefore the timer mutex must be a spinlock because blockable 1464 * mutexes cannot be acquired in a critical section. 1465 */ 1466 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1467 callout_init(&vlapic->callout, 1); 1468 1469 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1470 1471 if (vlapic->vcpuid == 0) 1472 vlapic->msr_apicbase |= APICBASE_BSP; 1473 1474 vlapic_reset(vlapic); 1475 } 1476 1477 void 1478 vlapic_cleanup(struct vlapic *vlapic) 1479 { 1480 1481 callout_drain(&vlapic->callout); 1482 } 1483 1484 uint64_t 1485 vlapic_get_apicbase(struct vlapic *vlapic) 1486 { 1487 1488 return (vlapic->msr_apicbase); 1489 } 1490 1491 int 1492 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1493 { 1494 1495 if (vlapic->msr_apicbase != new) { 1496 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1497 "not supported", vlapic->msr_apicbase, new); 1498 return (-1); 1499 } 1500 1501 return (0); 1502 } 1503 1504 void 1505 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1506 { 1507 struct vlapic *vlapic; 1508 struct LAPIC *lapic; 1509 1510 vlapic = vm_lapic(vm, vcpuid); 1511 1512 if (state == X2APIC_DISABLED) 1513 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1514 else 1515 vlapic->msr_apicbase |= APICBASE_X2APIC; 1516 1517 /* 1518 * Reset the local APIC registers whose values are mode-dependent. 1519 * 1520 * XXX this works because the APIC mode can be changed only at vcpu 1521 * initialization time. 1522 */ 1523 lapic = vlapic->apic_page; 1524 lapic->id = vlapic_get_id(vlapic); 1525 if (x2apic(vlapic)) { 1526 lapic->ldr = x2apic_ldr(vlapic); 1527 lapic->dfr = 0; 1528 } else { 1529 lapic->ldr = 0; 1530 lapic->dfr = 0xffffffff; 1531 } 1532 1533 if (state == X2APIC_ENABLED) { 1534 if (vlapic->ops.enable_x2apic_mode) 1535 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1536 } 1537 } 1538 1539 void 1540 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1541 int delmode, int vec) 1542 { 1543 bool lowprio; 1544 int vcpuid; 1545 cpuset_t dmask; 1546 1547 if (delmode != IOART_DELFIXED && 1548 delmode != IOART_DELLOPRI && 1549 delmode != IOART_DELEXINT) { 1550 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1551 return; 1552 } 1553 lowprio = (delmode == IOART_DELLOPRI); 1554 1555 /* 1556 * We don't provide any virtual interrupt redirection hardware so 1557 * all interrupts originating from the ioapic or MSI specify the 1558 * 'dest' in the legacy xAPIC format. 1559 */ 1560 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1561 1562 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1563 vcpuid--; 1564 CPU_CLR(vcpuid, &dmask); 1565 if (delmode == IOART_DELEXINT) { 1566 vm_inject_extint(vm, vcpuid); 1567 } else { 1568 lapic_set_intr(vm, vcpuid, vec, level); 1569 } 1570 } 1571 } 1572 1573 void 1574 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1575 { 1576 /* 1577 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1578 * 1579 * This is done by leveraging features like Posted Interrupts (Intel) 1580 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1581 * 1582 * If neither of these features are available then fallback to 1583 * sending an IPI to 'hostcpu'. 1584 */ 1585 if (vlapic->ops.post_intr) 1586 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1587 else 1588 ipi_cpu(hostcpu, ipinum); 1589 } 1590 1591 bool 1592 vlapic_enabled(struct vlapic *vlapic) 1593 { 1594 struct LAPIC *lapic = vlapic->apic_page; 1595 1596 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1597 (lapic->svr & APIC_SVR_ENABLE) != 0) 1598 return (true); 1599 else 1600 return (false); 1601 } 1602 1603 static void 1604 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1605 { 1606 struct LAPIC *lapic; 1607 uint32_t *tmrptr, mask; 1608 int idx; 1609 1610 lapic = vlapic->apic_page; 1611 tmrptr = &lapic->tmr0; 1612 idx = (vector / 32) * 4; 1613 mask = 1 << (vector % 32); 1614 if (level) 1615 tmrptr[idx] |= mask; 1616 else 1617 tmrptr[idx] &= ~mask; 1618 1619 if (vlapic->ops.set_tmr != NULL) 1620 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1621 } 1622 1623 void 1624 vlapic_reset_tmr(struct vlapic *vlapic) 1625 { 1626 int vector; 1627 1628 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1629 1630 for (vector = 0; vector <= 255; vector++) 1631 vlapic_set_tmr(vlapic, vector, false); 1632 } 1633 1634 void 1635 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1636 int delmode, int vector) 1637 { 1638 cpuset_t dmask; 1639 bool lowprio; 1640 1641 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1642 1643 /* 1644 * A level trigger is valid only for fixed and lowprio delivery modes. 1645 */ 1646 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1647 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1648 "delivery-mode %d", delmode); 1649 return; 1650 } 1651 1652 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1653 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1654 1655 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1656 return; 1657 1658 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1659 vlapic_set_tmr(vlapic, vector, true); 1660 } 1661