1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/lock.h> 34 #include <sys/kernel.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/systm.h> 38 #include <sys/smp.h> 39 40 #include <x86/specialreg.h> 41 #include <x86/apicreg.h> 42 43 #include <machine/clock.h> 44 #include <machine/smp.h> 45 46 #include <machine/vmm.h> 47 48 #include "vmm_ipi.h" 49 #include "vmm_lapic.h" 50 #include "vmm_ktr.h" 51 #include "vmm_stat.h" 52 53 #include "vlapic.h" 54 #include "vlapic_priv.h" 55 #include "vioapic.h" 56 57 #define PRIO(x) ((x) >> 4) 58 59 #define VLAPIC_VERSION (16) 60 61 #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 62 63 /* 64 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 65 * vlapic_callout_handler() and vcpu accesses to: 66 * - timer_freq_bt, timer_period_bt, timer_fire_bt 67 * - timer LVT register 68 */ 69 #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 70 #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 71 #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 72 73 #define VLAPIC_BUS_FREQ tsc_freq 74 75 static __inline uint32_t 76 vlapic_get_id(struct vlapic *vlapic) 77 { 78 79 if (x2apic(vlapic)) 80 return (vlapic->vcpuid); 81 else 82 return (vlapic->vcpuid << 24); 83 } 84 85 static uint32_t 86 x2apic_ldr(struct vlapic *vlapic) 87 { 88 int apicid; 89 uint32_t ldr; 90 91 apicid = vlapic_get_id(vlapic); 92 ldr = 1 << (apicid & 0xf); 93 ldr |= (apicid & 0xffff0) << 12; 94 return (ldr); 95 } 96 97 void 98 vlapic_dfr_write_handler(struct vlapic *vlapic) 99 { 100 struct LAPIC *lapic; 101 102 lapic = vlapic->apic_page; 103 if (x2apic(vlapic)) { 104 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 105 lapic->dfr); 106 lapic->dfr = 0; 107 return; 108 } 109 110 lapic->dfr &= APIC_DFR_MODEL_MASK; 111 lapic->dfr |= APIC_DFR_RESERVED; 112 113 if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 114 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 115 else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 116 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 117 else 118 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 119 } 120 121 void 122 vlapic_ldr_write_handler(struct vlapic *vlapic) 123 { 124 struct LAPIC *lapic; 125 126 lapic = vlapic->apic_page; 127 128 /* LDR is read-only in x2apic mode */ 129 if (x2apic(vlapic)) { 130 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 131 lapic->ldr); 132 lapic->ldr = x2apic_ldr(vlapic); 133 } else { 134 lapic->ldr &= ~APIC_LDR_RESERVED; 135 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 136 } 137 } 138 139 void 140 vlapic_id_write_handler(struct vlapic *vlapic) 141 { 142 struct LAPIC *lapic; 143 144 /* 145 * We don't allow the ID register to be modified so reset it back to 146 * its default value. 147 */ 148 lapic = vlapic->apic_page; 149 lapic->id = vlapic_get_id(vlapic); 150 } 151 152 static int 153 vlapic_timer_divisor(uint32_t dcr) 154 { 155 switch (dcr & 0xB) { 156 case APIC_TDCR_1: 157 return (1); 158 case APIC_TDCR_2: 159 return (2); 160 case APIC_TDCR_4: 161 return (4); 162 case APIC_TDCR_8: 163 return (8); 164 case APIC_TDCR_16: 165 return (16); 166 case APIC_TDCR_32: 167 return (32); 168 case APIC_TDCR_64: 169 return (64); 170 case APIC_TDCR_128: 171 return (128); 172 default: 173 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 174 } 175 } 176 177 #if 0 178 static inline void 179 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 180 { 181 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 182 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 183 *lvt & APIC_LVTT_M); 184 } 185 #endif 186 187 static uint32_t 188 vlapic_get_ccr(struct vlapic *vlapic) 189 { 190 struct bintime bt_now, bt_rem; 191 struct LAPIC *lapic; 192 uint32_t ccr; 193 194 ccr = 0; 195 lapic = vlapic->apic_page; 196 197 VLAPIC_TIMER_LOCK(vlapic); 198 if (callout_active(&vlapic->callout)) { 199 /* 200 * If the timer is scheduled to expire in the future then 201 * compute the value of 'ccr' based on the remaining time. 202 */ 203 binuptime(&bt_now); 204 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 205 bt_rem = vlapic->timer_fire_bt; 206 bintime_sub(&bt_rem, &bt_now); 207 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 208 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 209 } 210 } 211 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 212 "icr_timer is %#x", ccr, lapic->icr_timer)); 213 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 214 ccr, lapic->icr_timer); 215 VLAPIC_TIMER_UNLOCK(vlapic); 216 return (ccr); 217 } 218 219 void 220 vlapic_dcr_write_handler(struct vlapic *vlapic) 221 { 222 struct LAPIC *lapic; 223 int divisor; 224 225 lapic = vlapic->apic_page; 226 VLAPIC_TIMER_LOCK(vlapic); 227 228 divisor = vlapic_timer_divisor(lapic->dcr_timer); 229 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 230 lapic->dcr_timer, divisor); 231 232 /* 233 * Update the timer frequency and the timer period. 234 * 235 * XXX changes to the frequency divider will not take effect until 236 * the timer is reloaded. 237 */ 238 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 239 vlapic->timer_period_bt = vlapic->timer_freq_bt; 240 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 241 242 VLAPIC_TIMER_UNLOCK(vlapic); 243 } 244 245 void 246 vlapic_esr_write_handler(struct vlapic *vlapic) 247 { 248 struct LAPIC *lapic; 249 250 lapic = vlapic->apic_page; 251 lapic->esr = vlapic->esr_pending; 252 vlapic->esr_pending = 0; 253 } 254 255 int 256 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 257 { 258 struct LAPIC *lapic; 259 uint32_t *irrptr, *tmrptr, mask; 260 int idx; 261 262 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 263 264 lapic = vlapic->apic_page; 265 if (!(lapic->svr & APIC_SVR_ENABLE)) { 266 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 267 "interrupt %d", vector); 268 return (0); 269 } 270 271 if (vector < 16) { 272 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); 273 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 274 vector); 275 return (1); 276 } 277 278 if (vlapic->ops.set_intr_ready) 279 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 280 281 idx = (vector / 32) * 4; 282 mask = 1 << (vector % 32); 283 284 irrptr = &lapic->irr0; 285 atomic_set_int(&irrptr[idx], mask); 286 287 /* 288 * Verify that the trigger-mode of the interrupt matches with 289 * the vlapic TMR registers. 290 */ 291 tmrptr = &lapic->tmr0; 292 KASSERT((tmrptr[idx] & mask) == (level ? mask : 0), 293 ("vlapic TMR[%d] is 0x%08x but interrupt is %s-triggered", 294 idx / 4, tmrptr[idx], level ? "level" : "edge")); 295 296 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 297 return (1); 298 } 299 300 static __inline uint32_t * 301 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 302 { 303 struct LAPIC *lapic = vlapic->apic_page; 304 int i; 305 306 switch (offset) { 307 case APIC_OFFSET_CMCI_LVT: 308 return (&lapic->lvt_cmci); 309 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 310 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 311 return ((&lapic->lvt_timer) + i);; 312 default: 313 panic("vlapic_get_lvt: invalid LVT\n"); 314 } 315 } 316 317 static __inline int 318 lvt_off_to_idx(uint32_t offset) 319 { 320 int index; 321 322 switch (offset) { 323 case APIC_OFFSET_CMCI_LVT: 324 index = APIC_LVT_CMCI; 325 break; 326 case APIC_OFFSET_TIMER_LVT: 327 index = APIC_LVT_TIMER; 328 break; 329 case APIC_OFFSET_THERM_LVT: 330 index = APIC_LVT_THERMAL; 331 break; 332 case APIC_OFFSET_PERF_LVT: 333 index = APIC_LVT_PMC; 334 break; 335 case APIC_OFFSET_LINT0_LVT: 336 index = APIC_LVT_LINT0; 337 break; 338 case APIC_OFFSET_LINT1_LVT: 339 index = APIC_LVT_LINT1; 340 break; 341 case APIC_OFFSET_ERROR_LVT: 342 index = APIC_LVT_ERROR; 343 break; 344 default: 345 index = -1; 346 break; 347 } 348 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 349 "invalid lvt index %d for offset %#x", index, offset)); 350 351 return (index); 352 } 353 354 static __inline uint32_t 355 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 356 { 357 int idx; 358 uint32_t val; 359 360 idx = lvt_off_to_idx(offset); 361 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 362 return (val); 363 } 364 365 void 366 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 367 { 368 uint32_t *lvtptr, mask, val; 369 struct LAPIC *lapic; 370 int idx; 371 372 lapic = vlapic->apic_page; 373 lvtptr = vlapic_get_lvtptr(vlapic, offset); 374 val = *lvtptr; 375 idx = lvt_off_to_idx(offset); 376 377 if (!(lapic->svr & APIC_SVR_ENABLE)) 378 val |= APIC_LVT_M; 379 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 380 switch (offset) { 381 case APIC_OFFSET_TIMER_LVT: 382 mask |= APIC_LVTT_TM; 383 break; 384 case APIC_OFFSET_ERROR_LVT: 385 break; 386 case APIC_OFFSET_LINT0_LVT: 387 case APIC_OFFSET_LINT1_LVT: 388 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 389 /* FALLTHROUGH */ 390 default: 391 mask |= APIC_LVT_DM; 392 break; 393 } 394 val &= mask; 395 *lvtptr = val; 396 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 397 } 398 399 static void 400 vlapic_mask_lvts(struct vlapic *vlapic) 401 { 402 struct LAPIC *lapic = vlapic->apic_page; 403 404 lapic->lvt_cmci |= APIC_LVT_M; 405 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 406 407 lapic->lvt_timer |= APIC_LVT_M; 408 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 409 410 lapic->lvt_thermal |= APIC_LVT_M; 411 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 412 413 lapic->lvt_pcint |= APIC_LVT_M; 414 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 415 416 lapic->lvt_lint0 |= APIC_LVT_M; 417 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 418 419 lapic->lvt_lint1 |= APIC_LVT_M; 420 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 421 422 lapic->lvt_error |= APIC_LVT_M; 423 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 424 } 425 426 static int 427 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) 428 { 429 uint32_t vec, mode; 430 431 if (lvt & APIC_LVT_M) 432 return (0); 433 434 vec = lvt & APIC_LVT_VECTOR; 435 mode = lvt & APIC_LVT_DM; 436 437 switch (mode) { 438 case APIC_LVT_DM_FIXED: 439 if (vec < 16) { 440 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 441 return (0); 442 } 443 if (vlapic_set_intr_ready(vlapic, vec, false)) 444 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 445 break; 446 case APIC_LVT_DM_NMI: 447 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 448 break; 449 default: 450 // Other modes ignored 451 return (0); 452 } 453 return (1); 454 } 455 456 #if 1 457 static void 458 dump_isrvec_stk(struct vlapic *vlapic) 459 { 460 int i; 461 uint32_t *isrptr; 462 463 isrptr = &vlapic->apic_page->isr0; 464 for (i = 0; i < 8; i++) 465 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 466 467 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 468 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 469 } 470 #endif 471 472 /* 473 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 474 * in Intel Architecture Manual Vol 3a. 475 */ 476 static void 477 vlapic_update_ppr(struct vlapic *vlapic) 478 { 479 int isrvec, tpr, ppr; 480 481 /* 482 * Note that the value on the stack at index 0 is always 0. 483 * 484 * This is a placeholder for the value of ISRV when none of the 485 * bits is set in the ISRx registers. 486 */ 487 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 488 tpr = vlapic->apic_page->tpr; 489 490 #if 1 491 { 492 int i, lastprio, curprio, vector, idx; 493 uint32_t *isrptr; 494 495 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 496 panic("isrvec_stk is corrupted: %d", isrvec); 497 498 /* 499 * Make sure that the priority of the nested interrupts is 500 * always increasing. 501 */ 502 lastprio = -1; 503 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 504 curprio = PRIO(vlapic->isrvec_stk[i]); 505 if (curprio <= lastprio) { 506 dump_isrvec_stk(vlapic); 507 panic("isrvec_stk does not satisfy invariant"); 508 } 509 lastprio = curprio; 510 } 511 512 /* 513 * Make sure that each bit set in the ISRx registers has a 514 * corresponding entry on the isrvec stack. 515 */ 516 i = 1; 517 isrptr = &vlapic->apic_page->isr0; 518 for (vector = 0; vector < 256; vector++) { 519 idx = (vector / 32) * 4; 520 if (isrptr[idx] & (1 << (vector % 32))) { 521 if (i > vlapic->isrvec_stk_top || 522 vlapic->isrvec_stk[i] != vector) { 523 dump_isrvec_stk(vlapic); 524 panic("ISR and isrvec_stk out of sync"); 525 } 526 i++; 527 } 528 } 529 } 530 #endif 531 532 if (PRIO(tpr) >= PRIO(isrvec)) 533 ppr = tpr; 534 else 535 ppr = isrvec & 0xf0; 536 537 vlapic->apic_page->ppr = ppr; 538 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 539 } 540 541 static void 542 vlapic_process_eoi(struct vlapic *vlapic) 543 { 544 struct LAPIC *lapic = vlapic->apic_page; 545 uint32_t *isrptr, *tmrptr; 546 int i, idx, bitpos, vector; 547 548 isrptr = &lapic->isr0; 549 tmrptr = &lapic->tmr0; 550 551 /* 552 * The x86 architecture reserves the the first 32 vectors for use 553 * by the processor. 554 */ 555 for (i = 7; i > 0; i--) { 556 idx = i * 4; 557 bitpos = fls(isrptr[idx]); 558 if (bitpos-- != 0) { 559 if (vlapic->isrvec_stk_top <= 0) { 560 panic("invalid vlapic isrvec_stk_top %d", 561 vlapic->isrvec_stk_top); 562 } 563 isrptr[idx] &= ~(1 << bitpos); 564 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 565 vlapic->isrvec_stk_top--; 566 vlapic_update_ppr(vlapic); 567 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 568 vector = i * 32 + bitpos; 569 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 570 vector); 571 } 572 return; 573 } 574 } 575 } 576 577 static __inline int 578 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 579 { 580 581 return (lvt & mask); 582 } 583 584 static __inline int 585 vlapic_periodic_timer(struct vlapic *vlapic) 586 { 587 uint32_t lvt; 588 589 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 590 591 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 592 } 593 594 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 595 596 void 597 vlapic_set_error(struct vlapic *vlapic, uint32_t mask) 598 { 599 uint32_t lvt; 600 601 vlapic->esr_pending |= mask; 602 if (vlapic->esr_firing) 603 return; 604 vlapic->esr_firing = 1; 605 606 // The error LVT always uses the fixed delivery mode. 607 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 608 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 609 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 610 } 611 vlapic->esr_firing = 0; 612 } 613 614 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 615 616 static void 617 vlapic_fire_timer(struct vlapic *vlapic) 618 { 619 uint32_t lvt; 620 621 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 622 623 // The timer LVT always uses the fixed delivery mode. 624 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 625 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 626 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 627 } 628 } 629 630 static VMM_STAT(VLAPIC_INTR_CMC, 631 "corrected machine check interrupts generated by vlapic"); 632 633 void 634 vlapic_fire_cmci(struct vlapic *vlapic) 635 { 636 uint32_t lvt; 637 638 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 639 if (vlapic_fire_lvt(vlapic, lvt)) { 640 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 641 } 642 } 643 644 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 645 "lvts triggered"); 646 647 int 648 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 649 { 650 uint32_t lvt; 651 652 switch (vector) { 653 case APIC_LVT_LINT0: 654 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); 655 break; 656 case APIC_LVT_LINT1: 657 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); 658 break; 659 case APIC_LVT_TIMER: 660 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 661 lvt |= APIC_LVT_DM_FIXED; 662 break; 663 case APIC_LVT_ERROR: 664 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 665 lvt |= APIC_LVT_DM_FIXED; 666 break; 667 case APIC_LVT_PMC: 668 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); 669 break; 670 case APIC_LVT_THERMAL: 671 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); 672 break; 673 case APIC_LVT_CMCI: 674 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 675 break; 676 default: 677 return (EINVAL); 678 } 679 if (vlapic_fire_lvt(vlapic, lvt)) { 680 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 681 LVTS_TRIGGERRED, vector, 1); 682 } 683 return (0); 684 } 685 686 static void 687 vlapic_callout_handler(void *arg) 688 { 689 struct vlapic *vlapic; 690 struct bintime bt, btnow; 691 sbintime_t rem_sbt; 692 693 vlapic = arg; 694 695 VLAPIC_TIMER_LOCK(vlapic); 696 if (callout_pending(&vlapic->callout)) /* callout was reset */ 697 goto done; 698 699 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 700 goto done; 701 702 callout_deactivate(&vlapic->callout); 703 704 vlapic_fire_timer(vlapic); 705 706 if (vlapic_periodic_timer(vlapic)) { 707 binuptime(&btnow); 708 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 709 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 710 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 711 vlapic->timer_fire_bt.frac)); 712 713 /* 714 * Compute the delta between when the timer was supposed to 715 * fire and the present time. 716 */ 717 bt = btnow; 718 bintime_sub(&bt, &vlapic->timer_fire_bt); 719 720 rem_sbt = bttosbt(vlapic->timer_period_bt); 721 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 722 /* 723 * Adjust the time until the next countdown downward 724 * to account for the lost time. 725 */ 726 rem_sbt -= bttosbt(bt); 727 } else { 728 /* 729 * If the delta is greater than the timer period then 730 * just reset our time base instead of trying to catch 731 * up. 732 */ 733 vlapic->timer_fire_bt = btnow; 734 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 735 "usecs, period is %lu usecs - resetting time base", 736 bttosbt(bt) / SBT_1US, 737 bttosbt(vlapic->timer_period_bt) / SBT_1US); 738 } 739 740 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 741 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 742 vlapic_callout_handler, vlapic, 0); 743 } 744 done: 745 VLAPIC_TIMER_UNLOCK(vlapic); 746 } 747 748 void 749 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 750 { 751 struct LAPIC *lapic; 752 sbintime_t sbt; 753 uint32_t icr_timer; 754 755 VLAPIC_TIMER_LOCK(vlapic); 756 757 lapic = vlapic->apic_page; 758 icr_timer = lapic->icr_timer; 759 760 vlapic->timer_period_bt = vlapic->timer_freq_bt; 761 bintime_mul(&vlapic->timer_period_bt, icr_timer); 762 763 if (icr_timer != 0) { 764 binuptime(&vlapic->timer_fire_bt); 765 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 766 767 sbt = bttosbt(vlapic->timer_period_bt); 768 callout_reset_sbt(&vlapic->callout, sbt, 0, 769 vlapic_callout_handler, vlapic, 0); 770 } else 771 callout_stop(&vlapic->callout); 772 773 VLAPIC_TIMER_UNLOCK(vlapic); 774 } 775 776 /* 777 * This function populates 'dmask' with the set of vcpus that match the 778 * addressing specified by the (dest, phys, lowprio) tuple. 779 * 780 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 781 * or xAPIC (8-bit) destination field. 782 */ 783 static void 784 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 785 bool lowprio, bool x2apic_dest) 786 { 787 struct vlapic *vlapic; 788 uint32_t dfr, ldr, ldest, cluster; 789 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 790 cpuset_t amask; 791 int vcpuid; 792 793 if ((x2apic_dest && dest == 0xffffffff) || 794 (!x2apic_dest && dest == 0xff)) { 795 /* 796 * Broadcast in both logical and physical modes. 797 */ 798 *dmask = vm_active_cpus(vm); 799 return; 800 } 801 802 if (phys) { 803 /* 804 * Physical mode: destination is APIC ID. 805 */ 806 CPU_ZERO(dmask); 807 vcpuid = vm_apicid2vcpuid(vm, dest); 808 if (vcpuid < VM_MAXCPU) 809 CPU_SET(vcpuid, dmask); 810 } else { 811 /* 812 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 813 * bitmask. This model is only avilable in the xAPIC mode. 814 */ 815 mda_flat_ldest = dest & 0xff; 816 817 /* 818 * In the "Cluster Model" the MDA is used to identify a 819 * specific cluster and a set of APICs in that cluster. 820 */ 821 if (x2apic_dest) { 822 mda_cluster_id = dest >> 16; 823 mda_cluster_ldest = dest & 0xffff; 824 } else { 825 mda_cluster_id = (dest >> 4) & 0xf; 826 mda_cluster_ldest = dest & 0xf; 827 } 828 829 /* 830 * Logical mode: match each APIC that has a bit set 831 * in it's LDR that matches a bit in the ldest. 832 */ 833 CPU_ZERO(dmask); 834 amask = vm_active_cpus(vm); 835 while ((vcpuid = CPU_FFS(&amask)) != 0) { 836 vcpuid--; 837 CPU_CLR(vcpuid, &amask); 838 839 vlapic = vm_lapic(vm, vcpuid); 840 dfr = vlapic->apic_page->dfr; 841 ldr = vlapic->apic_page->ldr; 842 843 if ((dfr & APIC_DFR_MODEL_MASK) == 844 APIC_DFR_MODEL_FLAT) { 845 ldest = ldr >> 24; 846 mda_ldest = mda_flat_ldest; 847 } else if ((dfr & APIC_DFR_MODEL_MASK) == 848 APIC_DFR_MODEL_CLUSTER) { 849 if (x2apic(vlapic)) { 850 cluster = ldr >> 16; 851 ldest = ldr & 0xffff; 852 } else { 853 cluster = ldr >> 28; 854 ldest = (ldr >> 24) & 0xf; 855 } 856 if (cluster != mda_cluster_id) 857 continue; 858 mda_ldest = mda_cluster_ldest; 859 } else { 860 /* 861 * Guest has configured a bad logical 862 * model for this vcpu - skip it. 863 */ 864 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 865 "model %x - cannot deliver interrupt", dfr); 866 continue; 867 } 868 869 if ((mda_ldest & ldest) != 0) { 870 CPU_SET(vcpuid, dmask); 871 if (lowprio) 872 break; 873 } 874 } 875 } 876 } 877 878 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 879 880 int 881 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 882 { 883 int i; 884 bool phys; 885 cpuset_t dmask; 886 uint64_t icrval; 887 uint32_t dest, vec, mode; 888 struct vlapic *vlapic2; 889 struct vm_exit *vmexit; 890 struct LAPIC *lapic; 891 892 lapic = vlapic->apic_page; 893 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 894 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 895 896 if (x2apic(vlapic)) 897 dest = icrval >> 32; 898 else 899 dest = icrval >> (32 + 24); 900 vec = icrval & APIC_VECTOR_MASK; 901 mode = icrval & APIC_DELMODE_MASK; 902 903 if (mode == APIC_DELMODE_FIXED && vec < 16) { 904 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 905 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 906 return (0); 907 } 908 909 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 910 911 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 912 switch (icrval & APIC_DEST_MASK) { 913 case APIC_DEST_DESTFLD: 914 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 915 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 916 x2apic(vlapic)); 917 break; 918 case APIC_DEST_SELF: 919 CPU_SETOF(vlapic->vcpuid, &dmask); 920 break; 921 case APIC_DEST_ALLISELF: 922 dmask = vm_active_cpus(vlapic->vm); 923 break; 924 case APIC_DEST_ALLESELF: 925 dmask = vm_active_cpus(vlapic->vm); 926 CPU_CLR(vlapic->vcpuid, &dmask); 927 break; 928 default: 929 CPU_ZERO(&dmask); /* satisfy gcc */ 930 break; 931 } 932 933 while ((i = CPU_FFS(&dmask)) != 0) { 934 i--; 935 CPU_CLR(i, &dmask); 936 if (mode == APIC_DELMODE_FIXED) { 937 lapic_intr_edge(vlapic->vm, i, vec); 938 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 939 IPIS_SENT, i, 1); 940 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 941 "to vcpuid %d", vec, i); 942 } else { 943 vm_inject_nmi(vlapic->vm, i); 944 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 945 "to vcpuid %d", i); 946 } 947 } 948 949 return (0); /* handled completely in the kernel */ 950 } 951 952 if (mode == APIC_DELMODE_INIT) { 953 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 954 return (0); 955 956 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 957 vlapic2 = vm_lapic(vlapic->vm, dest); 958 959 /* move from INIT to waiting-for-SIPI state */ 960 if (vlapic2->boot_state == BS_INIT) { 961 vlapic2->boot_state = BS_SIPI; 962 } 963 964 return (0); 965 } 966 } 967 968 if (mode == APIC_DELMODE_STARTUP) { 969 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 970 vlapic2 = vm_lapic(vlapic->vm, dest); 971 972 /* 973 * Ignore SIPIs in any state other than wait-for-SIPI 974 */ 975 if (vlapic2->boot_state != BS_SIPI) 976 return (0); 977 978 /* 979 * XXX this assumes that the startup IPI always succeeds 980 */ 981 vlapic2->boot_state = BS_RUNNING; 982 vm_activate_cpu(vlapic2->vm, dest); 983 984 *retu = true; 985 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 986 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 987 vmexit->u.spinup_ap.vcpu = dest; 988 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 989 990 return (0); 991 } 992 } 993 994 /* 995 * This will cause a return to userland. 996 */ 997 return (1); 998 } 999 1000 int 1001 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1002 { 1003 struct LAPIC *lapic = vlapic->apic_page; 1004 int idx, i, bitpos, vector; 1005 uint32_t *irrptr, val; 1006 1007 if (vlapic->ops.pending_intr) 1008 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1009 1010 irrptr = &lapic->irr0; 1011 1012 /* 1013 * The x86 architecture reserves the the first 32 vectors for use 1014 * by the processor. 1015 */ 1016 for (i = 7; i > 0; i--) { 1017 idx = i * 4; 1018 val = atomic_load_acq_int(&irrptr[idx]); 1019 bitpos = fls(val); 1020 if (bitpos != 0) { 1021 vector = i * 32 + (bitpos - 1); 1022 if (PRIO(vector) > PRIO(lapic->ppr)) { 1023 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1024 if (vecptr != NULL) 1025 *vecptr = vector; 1026 return (1); 1027 } else 1028 break; 1029 } 1030 } 1031 return (0); 1032 } 1033 1034 void 1035 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1036 { 1037 struct LAPIC *lapic = vlapic->apic_page; 1038 uint32_t *irrptr, *isrptr; 1039 int idx, stk_top; 1040 1041 if (vlapic->ops.intr_accepted) 1042 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1043 1044 /* 1045 * clear the ready bit for vector being accepted in irr 1046 * and set the vector as in service in isr. 1047 */ 1048 idx = (vector / 32) * 4; 1049 1050 irrptr = &lapic->irr0; 1051 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1052 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1053 1054 isrptr = &lapic->isr0; 1055 isrptr[idx] |= 1 << (vector % 32); 1056 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1057 1058 /* 1059 * Update the PPR 1060 */ 1061 vlapic->isrvec_stk_top++; 1062 1063 stk_top = vlapic->isrvec_stk_top; 1064 if (stk_top >= ISRVEC_STK_SIZE) 1065 panic("isrvec_stk_top overflow %d", stk_top); 1066 1067 vlapic->isrvec_stk[stk_top] = vector; 1068 vlapic_update_ppr(vlapic); 1069 } 1070 1071 void 1072 vlapic_svr_write_handler(struct vlapic *vlapic) 1073 { 1074 struct LAPIC *lapic; 1075 uint32_t old, new, changed; 1076 1077 lapic = vlapic->apic_page; 1078 1079 new = lapic->svr; 1080 old = vlapic->svr_last; 1081 vlapic->svr_last = new; 1082 1083 changed = old ^ new; 1084 if ((changed & APIC_SVR_ENABLE) != 0) { 1085 if ((new & APIC_SVR_ENABLE) == 0) { 1086 /* 1087 * The apic is now disabled so stop the apic timer 1088 * and mask all the LVT entries. 1089 */ 1090 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1091 VLAPIC_TIMER_LOCK(vlapic); 1092 callout_stop(&vlapic->callout); 1093 VLAPIC_TIMER_UNLOCK(vlapic); 1094 vlapic_mask_lvts(vlapic); 1095 } else { 1096 /* 1097 * The apic is now enabled so restart the apic timer 1098 * if it is configured in periodic mode. 1099 */ 1100 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1101 if (vlapic_periodic_timer(vlapic)) 1102 vlapic_icrtmr_write_handler(vlapic); 1103 } 1104 } 1105 } 1106 1107 int 1108 vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) 1109 { 1110 struct LAPIC *lapic = vlapic->apic_page; 1111 uint32_t *reg; 1112 int i; 1113 1114 if (offset > sizeof(*lapic)) { 1115 *data = 0; 1116 goto done; 1117 } 1118 1119 offset &= ~3; 1120 switch(offset) 1121 { 1122 case APIC_OFFSET_ID: 1123 *data = lapic->id; 1124 break; 1125 case APIC_OFFSET_VER: 1126 *data = lapic->version; 1127 break; 1128 case APIC_OFFSET_TPR: 1129 *data = lapic->tpr; 1130 break; 1131 case APIC_OFFSET_APR: 1132 *data = lapic->apr; 1133 break; 1134 case APIC_OFFSET_PPR: 1135 *data = lapic->ppr; 1136 break; 1137 case APIC_OFFSET_EOI: 1138 *data = lapic->eoi; 1139 break; 1140 case APIC_OFFSET_LDR: 1141 *data = lapic->ldr; 1142 break; 1143 case APIC_OFFSET_DFR: 1144 *data = lapic->dfr; 1145 break; 1146 case APIC_OFFSET_SVR: 1147 *data = lapic->svr; 1148 break; 1149 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1150 i = (offset - APIC_OFFSET_ISR0) >> 2; 1151 reg = &lapic->isr0; 1152 *data = *(reg + i); 1153 break; 1154 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1155 i = (offset - APIC_OFFSET_TMR0) >> 2; 1156 reg = &lapic->tmr0; 1157 *data = *(reg + i); 1158 break; 1159 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1160 i = (offset - APIC_OFFSET_IRR0) >> 2; 1161 reg = &lapic->irr0; 1162 *data = atomic_load_acq_int(reg + i); 1163 break; 1164 case APIC_OFFSET_ESR: 1165 *data = lapic->esr; 1166 break; 1167 case APIC_OFFSET_ICR_LOW: 1168 *data = lapic->icr_lo; 1169 if (x2apic(vlapic)) 1170 *data |= (uint64_t)lapic->icr_hi << 32; 1171 break; 1172 case APIC_OFFSET_ICR_HI: 1173 *data = lapic->icr_hi; 1174 break; 1175 case APIC_OFFSET_CMCI_LVT: 1176 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1177 *data = vlapic_get_lvt(vlapic, offset); 1178 #ifdef INVARIANTS 1179 reg = vlapic_get_lvtptr(vlapic, offset); 1180 KASSERT(*data == *reg, ("inconsistent lvt value at " 1181 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1182 #endif 1183 break; 1184 case APIC_OFFSET_TIMER_ICR: 1185 *data = lapic->icr_timer; 1186 break; 1187 case APIC_OFFSET_TIMER_CCR: 1188 *data = vlapic_get_ccr(vlapic); 1189 break; 1190 case APIC_OFFSET_TIMER_DCR: 1191 *data = lapic->dcr_timer; 1192 break; 1193 case APIC_OFFSET_RRR: 1194 default: 1195 *data = 0; 1196 break; 1197 } 1198 done: 1199 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1200 return 0; 1201 } 1202 1203 int 1204 vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) 1205 { 1206 struct LAPIC *lapic = vlapic->apic_page; 1207 uint32_t *regptr; 1208 int retval; 1209 1210 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1211 ("vlapic_write: invalid offset %#lx", offset)); 1212 1213 VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data); 1214 1215 if (offset > sizeof(*lapic)) { 1216 return 0; 1217 } 1218 1219 retval = 0; 1220 switch(offset) 1221 { 1222 case APIC_OFFSET_ID: 1223 lapic->id = data; 1224 vlapic_id_write_handler(vlapic); 1225 break; 1226 case APIC_OFFSET_TPR: 1227 lapic->tpr = data & 0xff; 1228 vlapic_update_ppr(vlapic); 1229 break; 1230 case APIC_OFFSET_EOI: 1231 vlapic_process_eoi(vlapic); 1232 break; 1233 case APIC_OFFSET_LDR: 1234 lapic->ldr = data; 1235 vlapic_ldr_write_handler(vlapic); 1236 break; 1237 case APIC_OFFSET_DFR: 1238 lapic->dfr = data; 1239 vlapic_dfr_write_handler(vlapic); 1240 break; 1241 case APIC_OFFSET_SVR: 1242 lapic->svr = data; 1243 vlapic_svr_write_handler(vlapic); 1244 break; 1245 case APIC_OFFSET_ICR_LOW: 1246 lapic->icr_lo = data; 1247 if (x2apic(vlapic)) 1248 lapic->icr_hi = data >> 32; 1249 retval = vlapic_icrlo_write_handler(vlapic, retu); 1250 break; 1251 case APIC_OFFSET_ICR_HI: 1252 lapic->icr_hi = data; 1253 break; 1254 case APIC_OFFSET_CMCI_LVT: 1255 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1256 regptr = vlapic_get_lvtptr(vlapic, offset); 1257 *regptr = data; 1258 vlapic_lvt_write_handler(vlapic, offset); 1259 break; 1260 case APIC_OFFSET_TIMER_ICR: 1261 lapic->icr_timer = data; 1262 vlapic_icrtmr_write_handler(vlapic); 1263 break; 1264 1265 case APIC_OFFSET_TIMER_DCR: 1266 lapic->dcr_timer = data; 1267 vlapic_dcr_write_handler(vlapic); 1268 break; 1269 1270 case APIC_OFFSET_ESR: 1271 vlapic_esr_write_handler(vlapic); 1272 break; 1273 case APIC_OFFSET_VER: 1274 case APIC_OFFSET_APR: 1275 case APIC_OFFSET_PPR: 1276 case APIC_OFFSET_RRR: 1277 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1278 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1279 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1280 case APIC_OFFSET_TIMER_CCR: 1281 default: 1282 // Read only. 1283 break; 1284 } 1285 1286 return (retval); 1287 } 1288 1289 static void 1290 vlapic_reset(struct vlapic *vlapic) 1291 { 1292 struct LAPIC *lapic; 1293 1294 lapic = vlapic->apic_page; 1295 bzero(lapic, sizeof(struct LAPIC)); 1296 1297 lapic->id = vlapic_get_id(vlapic); 1298 lapic->version = VLAPIC_VERSION; 1299 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1300 lapic->dfr = 0xffffffff; 1301 lapic->svr = APIC_SVR_VECTOR; 1302 vlapic_mask_lvts(vlapic); 1303 vlapic_reset_tmr(vlapic); 1304 1305 lapic->dcr_timer = 0; 1306 vlapic_dcr_write_handler(vlapic); 1307 1308 if (vlapic->vcpuid == 0) 1309 vlapic->boot_state = BS_RUNNING; /* BSP */ 1310 else 1311 vlapic->boot_state = BS_INIT; /* AP */ 1312 1313 vlapic->svr_last = lapic->svr; 1314 } 1315 1316 void 1317 vlapic_init(struct vlapic *vlapic) 1318 { 1319 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1320 KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, 1321 ("vlapic_init: vcpuid is not initialized")); 1322 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1323 "initialized")); 1324 1325 /* 1326 * If the vlapic is configured in x2apic mode then it will be 1327 * accessed in the critical section via the MSR emulation code. 1328 * 1329 * Therefore the timer mutex must be a spinlock because blockable 1330 * mutexes cannot be acquired in a critical section. 1331 */ 1332 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1333 callout_init(&vlapic->callout, 1); 1334 1335 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1336 1337 if (vlapic->vcpuid == 0) 1338 vlapic->msr_apicbase |= APICBASE_BSP; 1339 1340 vlapic_reset(vlapic); 1341 } 1342 1343 void 1344 vlapic_cleanup(struct vlapic *vlapic) 1345 { 1346 1347 callout_drain(&vlapic->callout); 1348 } 1349 1350 uint64_t 1351 vlapic_get_apicbase(struct vlapic *vlapic) 1352 { 1353 1354 return (vlapic->msr_apicbase); 1355 } 1356 1357 void 1358 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1359 { 1360 struct LAPIC *lapic; 1361 enum x2apic_state state; 1362 uint64_t old; 1363 int err; 1364 1365 err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state); 1366 if (err) 1367 panic("vlapic_set_apicbase: err %d fetching x2apic state", err); 1368 1369 if (state == X2APIC_DISABLED) 1370 new &= ~APICBASE_X2APIC; 1371 1372 old = vlapic->msr_apicbase; 1373 vlapic->msr_apicbase = new; 1374 1375 /* 1376 * If the vlapic is switching between xAPIC and x2APIC modes then 1377 * reset the mode-dependent registers. 1378 */ 1379 if ((old ^ new) & APICBASE_X2APIC) { 1380 lapic = vlapic->apic_page; 1381 lapic->id = vlapic_get_id(vlapic); 1382 if (x2apic(vlapic)) { 1383 lapic->ldr = x2apic_ldr(vlapic); 1384 lapic->dfr = 0; 1385 } else { 1386 lapic->ldr = 0; 1387 lapic->dfr = 0xffffffff; 1388 } 1389 } 1390 } 1391 1392 void 1393 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1394 { 1395 struct vlapic *vlapic; 1396 1397 vlapic = vm_lapic(vm, vcpuid); 1398 1399 if (state == X2APIC_DISABLED) 1400 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1401 } 1402 1403 void 1404 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1405 int delmode, int vec) 1406 { 1407 bool lowprio; 1408 int vcpuid; 1409 cpuset_t dmask; 1410 1411 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1412 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1413 return; 1414 } 1415 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1416 1417 /* 1418 * We don't provide any virtual interrupt redirection hardware so 1419 * all interrupts originating from the ioapic or MSI specify the 1420 * 'dest' in the legacy xAPIC format. 1421 */ 1422 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1423 1424 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1425 vcpuid--; 1426 CPU_CLR(vcpuid, &dmask); 1427 lapic_set_intr(vm, vcpuid, vec, level); 1428 } 1429 } 1430 1431 void 1432 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1433 { 1434 /* 1435 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1436 * 1437 * This is done by leveraging features like Posted Interrupts (Intel) 1438 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1439 * 1440 * If neither of these features are available then fallback to 1441 * sending an IPI to 'hostcpu'. 1442 */ 1443 if (vlapic->ops.post_intr) 1444 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1445 else 1446 ipi_cpu(hostcpu, ipinum); 1447 } 1448 1449 bool 1450 vlapic_enabled(struct vlapic *vlapic) 1451 { 1452 struct LAPIC *lapic = vlapic->apic_page; 1453 1454 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1455 (lapic->svr & APIC_SVR_ENABLE) != 0) 1456 return (true); 1457 else 1458 return (false); 1459 } 1460 1461 static void 1462 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1463 { 1464 struct LAPIC *lapic; 1465 uint32_t *tmrptr, mask; 1466 int idx; 1467 1468 lapic = vlapic->apic_page; 1469 tmrptr = &lapic->tmr0; 1470 idx = (vector / 32) * 4; 1471 mask = 1 << (vector % 32); 1472 if (level) 1473 tmrptr[idx] |= mask; 1474 else 1475 tmrptr[idx] &= ~mask; 1476 1477 if (vlapic->ops.set_tmr != NULL) 1478 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1479 } 1480 1481 void 1482 vlapic_reset_tmr(struct vlapic *vlapic) 1483 { 1484 int vector; 1485 1486 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1487 1488 for (vector = 0; vector <= 255; vector++) 1489 vlapic_set_tmr(vlapic, vector, false); 1490 } 1491 1492 void 1493 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1494 int delmode, int vector) 1495 { 1496 cpuset_t dmask; 1497 bool lowprio; 1498 1499 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1500 1501 /* 1502 * A level trigger is valid only for fixed and lowprio delivery modes. 1503 */ 1504 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1505 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1506 "delivery-mode %d", delmode); 1507 return; 1508 } 1509 1510 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1511 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1512 1513 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1514 return; 1515 1516 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1517 vlapic_set_tmr(vlapic, vector, true); 1518 } 1519