1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 /* 32 * This file and its contents are supplied under the terms of the 33 * Common Development and Distribution License ("CDDL"), version 1.0. 34 * You may only use this file in accordance with the terms of version 35 * 1.0 of the CDDL. 36 * 37 * A full copy of the text of the CDDL should have accompanied this 38 * source. A copy of the CDDL is also available via the Internet at 39 * http://www.illumos.org/license/CDDL. 40 * 41 * Copyright 2014 Pluribus Networks Inc. 42 * Copyright 2018 Joyent, Inc. 43 * Copyright 2020 Oxide Computer Company 44 */ 45 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/kmem.h> 52 #include <sys/mutex.h> 53 #include <sys/systm.h> 54 #include <sys/cpuset.h> 55 56 #include <x86/specialreg.h> 57 #include <x86/apicreg.h> 58 59 #include <machine/clock.h> 60 61 #include <machine/vmm.h> 62 #include <sys/vmm_kernel.h> 63 64 #include "vmm_lapic.h" 65 #include "vmm_stat.h" 66 67 #include "vlapic.h" 68 #include "vlapic_priv.h" 69 #include "vioapic.h" 70 71 72 /* 73 * The 4 high bits of a given interrupt vector represent its priority. The same 74 * is true for the contents of the TPR when it is used to calculate the ultimate 75 * PPR of an APIC - the 4 high bits hold the priority. 76 */ 77 #define PRIO(x) ((x) & 0xf0) 78 79 #define VLAPIC_VERSION (16) 80 81 /* 82 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the 83 * vlapic_callout_handler() and vcpu accesses to: 84 * - timer_freq_bt, timer_period_bt, timer_fire_bt 85 * - timer LVT register 86 */ 87 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock)) 88 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock)) 89 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock)) 90 91 /* 92 * APIC timer frequency: 93 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 94 * - power-of-two to avoid loss of precision when calculating times 95 */ 96 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 97 98 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL 99 100 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 101 static void vlapic_callout_handler(void *arg); 102 103 #ifdef __ISRVEC_DEBUG 104 static void vlapic_isrstk_accept(struct vlapic *, int); 105 static void vlapic_isrstk_eoi(struct vlapic *, int); 106 static void vlapic_isrstk_verify(const struct vlapic *); 107 #endif /* __ISRVEC_DEBUG */ 108 109 110 static __inline bool 111 vlapic_x2mode(const struct vlapic *vlapic) 112 { 113 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); 114 } 115 116 static __inline bool 117 vlapic_hw_disabled(const struct vlapic *vlapic) 118 { 119 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); 120 } 121 122 static __inline bool 123 vlapic_sw_disabled(const struct vlapic *vlapic) 124 { 125 const struct LAPIC *lapic = vlapic->apic_page; 126 127 return ((lapic->svr & APIC_SVR_ENABLE) == 0); 128 } 129 130 static __inline bool 131 vlapic_enabled(const struct vlapic *vlapic) 132 { 133 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); 134 } 135 136 static __inline uint32_t 137 vlapic_get_id(struct vlapic *vlapic) 138 { 139 140 if (vlapic_x2mode(vlapic)) 141 return (vlapic->vcpuid); 142 else 143 return (vlapic->vcpuid << 24); 144 } 145 146 static uint32_t 147 x2apic_ldr(struct vlapic *vlapic) 148 { 149 int apicid; 150 uint32_t ldr; 151 152 apicid = vlapic_get_id(vlapic); 153 ldr = 1 << (apicid & 0xf); 154 ldr |= (apicid & 0xffff0) << 12; 155 return (ldr); 156 } 157 158 void 159 vlapic_dfr_write_handler(struct vlapic *vlapic) 160 { 161 struct LAPIC *lapic; 162 163 lapic = vlapic->apic_page; 164 if (vlapic_x2mode(vlapic)) { 165 /* Ignore write to DFR in x2APIC mode */ 166 lapic->dfr = 0; 167 return; 168 } 169 170 lapic->dfr &= APIC_DFR_MODEL_MASK; 171 lapic->dfr |= APIC_DFR_RESERVED; 172 } 173 174 void 175 vlapic_ldr_write_handler(struct vlapic *vlapic) 176 { 177 struct LAPIC *lapic; 178 179 lapic = vlapic->apic_page; 180 181 /* LDR is read-only in x2apic mode */ 182 if (vlapic_x2mode(vlapic)) { 183 /* Ignore write to LDR in x2APIC mode */ 184 lapic->ldr = x2apic_ldr(vlapic); 185 } else { 186 lapic->ldr &= ~APIC_LDR_RESERVED; 187 } 188 } 189 190 void 191 vlapic_id_write_handler(struct vlapic *vlapic) 192 { 193 struct LAPIC *lapic; 194 195 /* 196 * We don't allow the ID register to be modified so reset it back to 197 * its default value. 198 */ 199 lapic = vlapic->apic_page; 200 lapic->id = vlapic_get_id(vlapic); 201 } 202 203 static int 204 vlapic_timer_divisor(uint32_t dcr) 205 { 206 switch (dcr & 0xB) { 207 case APIC_TDCR_1: 208 return (1); 209 case APIC_TDCR_2: 210 return (2); 211 case APIC_TDCR_4: 212 return (4); 213 case APIC_TDCR_8: 214 return (8); 215 case APIC_TDCR_16: 216 return (16); 217 case APIC_TDCR_32: 218 return (32); 219 case APIC_TDCR_64: 220 return (64); 221 case APIC_TDCR_128: 222 return (128); 223 default: 224 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 225 } 226 } 227 228 #if 0 229 static inline void 230 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 231 { 232 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 233 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 234 *lvt & APIC_LVTT_M); 235 } 236 #endif 237 238 static uint32_t 239 vlapic_get_ccr(struct vlapic *vlapic) 240 { 241 struct LAPIC *lapic; 242 uint32_t ccr; 243 244 ccr = 0; 245 lapic = vlapic->apic_page; 246 247 VLAPIC_TIMER_LOCK(vlapic); 248 if (callout_active(&vlapic->callout)) { 249 /* 250 * If the timer is scheduled to expire in the future then 251 * compute the value of 'ccr' based on the remaining time. 252 */ 253 254 const hrtime_t now = gethrtime(); 255 if (vlapic->timer_fire_when > now) { 256 ccr += hrt_freq_count(vlapic->timer_fire_when - now, 257 vlapic->timer_cur_freq); 258 } 259 } 260 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, " 261 "icr_timer is %x", ccr, lapic->icr_timer)); 262 VLAPIC_TIMER_UNLOCK(vlapic); 263 return (ccr); 264 } 265 266 void 267 vlapic_dcr_write_handler(struct vlapic *vlapic) 268 { 269 struct LAPIC *lapic; 270 int divisor; 271 272 lapic = vlapic->apic_page; 273 VLAPIC_TIMER_LOCK(vlapic); 274 275 divisor = vlapic_timer_divisor(lapic->dcr_timer); 276 277 /* 278 * Update the timer frequency and the timer period. 279 * 280 * XXX changes to the frequency divider will not take effect until 281 * the timer is reloaded. 282 */ 283 vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor; 284 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 285 lapic->icr_timer); 286 287 VLAPIC_TIMER_UNLOCK(vlapic); 288 } 289 290 void 291 vlapic_esr_write_handler(struct vlapic *vlapic) 292 { 293 struct LAPIC *lapic; 294 295 lapic = vlapic->apic_page; 296 lapic->esr = vlapic->esr_pending; 297 vlapic->esr_pending = 0; 298 } 299 300 vcpu_notify_t 301 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 302 { 303 struct LAPIC *lapic; 304 uint32_t *irrptr, *tmrptr, mask, tmr; 305 int idx; 306 307 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 308 309 lapic = vlapic->apic_page; 310 if (!(lapic->svr & APIC_SVR_ENABLE)) { 311 /* ignore interrupt on software-disabled APIC */ 312 return (VCPU_NOTIFY_NONE); 313 } 314 315 if (vector < 16) { 316 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 317 false); 318 319 /* 320 * If the error LVT is configured to interrupt the vCPU, it will 321 * have delivered a notification through that mechanism. 322 */ 323 return (VCPU_NOTIFY_NONE); 324 } 325 326 if (vlapic->ops.set_intr_ready) { 327 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 328 } 329 330 idx = (vector / 32) * 4; 331 mask = 1 << (vector % 32); 332 tmrptr = &lapic->tmr0; 333 irrptr = &lapic->irr0; 334 335 /* 336 * Update TMR for requested vector, if necessary. 337 * This must be done prior to asserting the bit in IRR so that the 338 * proper TMR state is always visible before the to-be-queued interrupt 339 * can be injected. 340 */ 341 tmr = atomic_load_acq_32(&tmrptr[idx]); 342 if ((tmr & mask) != (level ? mask : 0)) { 343 if (level) { 344 atomic_set_int(&tmrptr[idx], mask); 345 } else { 346 atomic_clear_int(&tmrptr[idx], mask); 347 } 348 } 349 350 /* Now set the bit in IRR */ 351 atomic_set_int(&irrptr[idx], mask); 352 353 return (VCPU_NOTIFY_EXIT); 354 } 355 356 static __inline uint32_t * 357 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 358 { 359 struct LAPIC *lapic = vlapic->apic_page; 360 int i; 361 362 switch (offset) { 363 case APIC_OFFSET_CMCI_LVT: 364 return (&lapic->lvt_cmci); 365 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 366 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 367 return ((&lapic->lvt_timer) + i); 368 default: 369 panic("vlapic_get_lvt: invalid LVT\n"); 370 } 371 } 372 373 static __inline int 374 lvt_off_to_idx(uint32_t offset) 375 { 376 int index; 377 378 switch (offset) { 379 case APIC_OFFSET_CMCI_LVT: 380 index = APIC_LVT_CMCI; 381 break; 382 case APIC_OFFSET_TIMER_LVT: 383 index = APIC_LVT_TIMER; 384 break; 385 case APIC_OFFSET_THERM_LVT: 386 index = APIC_LVT_THERMAL; 387 break; 388 case APIC_OFFSET_PERF_LVT: 389 index = APIC_LVT_PMC; 390 break; 391 case APIC_OFFSET_LINT0_LVT: 392 index = APIC_LVT_LINT0; 393 break; 394 case APIC_OFFSET_LINT1_LVT: 395 index = APIC_LVT_LINT1; 396 break; 397 case APIC_OFFSET_ERROR_LVT: 398 index = APIC_LVT_ERROR; 399 break; 400 default: 401 index = -1; 402 break; 403 } 404 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 405 "invalid lvt index %d for offset %x", index, offset)); 406 407 return (index); 408 } 409 410 static __inline uint32_t 411 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 412 { 413 int idx; 414 uint32_t val; 415 416 idx = lvt_off_to_idx(offset); 417 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 418 return (val); 419 } 420 421 void 422 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 423 { 424 uint32_t *lvtptr, mask, val; 425 struct LAPIC *lapic; 426 int idx; 427 428 lapic = vlapic->apic_page; 429 lvtptr = vlapic_get_lvtptr(vlapic, offset); 430 val = *lvtptr; 431 idx = lvt_off_to_idx(offset); 432 433 if (!(lapic->svr & APIC_SVR_ENABLE)) 434 val |= APIC_LVT_M; 435 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 436 switch (offset) { 437 case APIC_OFFSET_TIMER_LVT: 438 mask |= APIC_LVTT_TM; 439 break; 440 case APIC_OFFSET_ERROR_LVT: 441 break; 442 case APIC_OFFSET_LINT0_LVT: 443 case APIC_OFFSET_LINT1_LVT: 444 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 445 /* FALLTHROUGH */ 446 default: 447 mask |= APIC_LVT_DM; 448 break; 449 } 450 val &= mask; 451 *lvtptr = val; 452 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 453 } 454 455 static void 456 vlapic_mask_lvts(struct vlapic *vlapic) 457 { 458 struct LAPIC *lapic = vlapic->apic_page; 459 460 lapic->lvt_cmci |= APIC_LVT_M; 461 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 462 463 lapic->lvt_timer |= APIC_LVT_M; 464 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 465 466 lapic->lvt_thermal |= APIC_LVT_M; 467 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 468 469 lapic->lvt_pcint |= APIC_LVT_M; 470 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 471 472 lapic->lvt_lint0 |= APIC_LVT_M; 473 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 474 475 lapic->lvt_lint1 |= APIC_LVT_M; 476 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 477 478 lapic->lvt_error |= APIC_LVT_M; 479 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 480 } 481 482 static int 483 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt) 484 { 485 uint32_t mode, reg, vec; 486 vcpu_notify_t notify; 487 488 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 489 490 if (reg & APIC_LVT_M) 491 return (0); 492 vec = reg & APIC_LVT_VECTOR; 493 mode = reg & APIC_LVT_DM; 494 495 switch (mode) { 496 case APIC_LVT_DM_FIXED: 497 if (vec < 16) { 498 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 499 lvt == APIC_LVT_ERROR); 500 return (0); 501 } 502 notify = vlapic_set_intr_ready(vlapic, vec, false); 503 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify); 504 break; 505 case APIC_LVT_DM_NMI: 506 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 507 break; 508 case APIC_LVT_DM_EXTINT: 509 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid); 510 break; 511 default: 512 // Other modes ignored 513 return (0); 514 } 515 return (1); 516 } 517 518 static uint_t 519 vlapic_active_isr(struct vlapic *vlapic) 520 { 521 int i; 522 uint32_t *isrp; 523 524 isrp = &vlapic->apic_page->isr7; 525 526 for (i = 7; i >= 0; i--, isrp -= 4) { 527 uint32_t reg = *isrp; 528 529 if (reg != 0) { 530 uint_t vec = (i * 32) + bsrl(reg); 531 532 if (vec < 16) { 533 /* 534 * Truncate the illegal low vectors to value of 535 * 0, indicating that no active ISR was found. 536 */ 537 return (0); 538 } 539 return (vec); 540 } 541 } 542 543 return (0); 544 } 545 546 /* 547 * After events which might arbitrarily change the value of PPR, such as a TPR 548 * write or an EOI, calculate that new PPR value and store it in the APIC page. 549 */ 550 static void 551 vlapic_update_ppr(struct vlapic *vlapic) 552 { 553 int isrvec, tpr, ppr; 554 555 isrvec = vlapic_active_isr(vlapic); 556 tpr = vlapic->apic_page->tpr; 557 558 /* 559 * Algorithm adopted from section "Interrupt, Task and Processor 560 * Priority" in Intel Architecture Manual Vol 3a. 561 */ 562 if (PRIO(tpr) >= PRIO(isrvec)) { 563 ppr = tpr; 564 } else { 565 ppr = PRIO(isrvec); 566 } 567 568 vlapic->apic_page->ppr = ppr; 569 } 570 571 /* 572 * When a vector is asserted in ISR as in-service, the PPR must be raised to the 573 * priority of that vector, as the vCPU would have been at a lower priority in 574 * order for the vector to be accepted. 575 */ 576 static void 577 vlapic_raise_ppr(struct vlapic *vlapic, int vec) 578 { 579 struct LAPIC *lapic = vlapic->apic_page; 580 int ppr; 581 582 ppr = PRIO(vec); 583 584 #ifdef __ISRVEC_DEBUG 585 KASSERT(vec >= 16 && vec < 256, ("invalid vector %d", vec)); 586 KASSERT(ppr > lapic->tpr, ("ppr %x <= tpr %x", ppr, lapic->tpr)); 587 KASSERT(ppr > lapic->ppr, ("ppr %x <= old ppr %x", ppr, lapic->ppr)); 588 KASSERT(vec == (int)vlapic_active_isr(vlapic), ("ISR missing for ppr")); 589 #endif /* __ISRVEC_DEBUG */ 590 591 lapic->ppr = ppr; 592 } 593 594 void 595 vlapic_sync_tpr(struct vlapic *vlapic) 596 { 597 vlapic_update_ppr(vlapic); 598 } 599 600 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 601 602 static void 603 vlapic_process_eoi(struct vlapic *vlapic) 604 { 605 struct LAPIC *lapic = vlapic->apic_page; 606 uint32_t *isrptr, *tmrptr; 607 int i; 608 uint_t idx, bitpos, vector; 609 610 isrptr = &lapic->isr0; 611 tmrptr = &lapic->tmr0; 612 613 for (i = 7; i >= 0; i--) { 614 idx = i * 4; 615 if (isrptr[idx] != 0) { 616 bitpos = bsrl(isrptr[idx]); 617 vector = i * 32 + bitpos; 618 619 isrptr[idx] &= ~(1 << bitpos); 620 #ifdef __ISRVEC_DEBUG 621 vlapic_isrstk_eoi(vlapic, vector); 622 #endif 623 vlapic_update_ppr(vlapic); 624 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 625 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 626 vector); 627 } 628 return; 629 } 630 } 631 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 632 } 633 634 static __inline int 635 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 636 { 637 638 return (lvt & mask); 639 } 640 641 static __inline int 642 vlapic_periodic_timer(struct vlapic *vlapic) 643 { 644 uint32_t lvt; 645 646 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 647 648 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 649 } 650 651 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 652 653 static void 654 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 655 { 656 657 vlapic->esr_pending |= mask; 658 659 /* 660 * Avoid infinite recursion if the error LVT itself is configured with 661 * an illegal vector. 662 */ 663 if (lvt_error) 664 return; 665 666 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 667 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 668 } 669 } 670 671 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 672 673 static void 674 vlapic_fire_timer(struct vlapic *vlapic) 675 { 676 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 677 678 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 679 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 680 } 681 } 682 683 static VMM_STAT(VLAPIC_INTR_CMC, 684 "corrected machine check interrupts generated by vlapic"); 685 686 void 687 vlapic_fire_cmci(struct vlapic *vlapic) 688 { 689 690 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 691 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 692 } 693 } 694 695 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 696 "lvts triggered"); 697 698 int 699 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 700 { 701 if (!vlapic_enabled(vlapic)) { 702 /* 703 * When the local APIC is global/hardware disabled, 704 * LINT[1:0] pins are configured as INTR and NMI pins, 705 * respectively. 706 */ 707 switch (vector) { 708 case APIC_LVT_LINT0: 709 (void) vm_inject_extint(vlapic->vm, 710 vlapic->vcpuid); 711 break; 712 case APIC_LVT_LINT1: 713 (void) vm_inject_nmi(vlapic->vm, 714 vlapic->vcpuid); 715 break; 716 default: 717 break; 718 } 719 return (0); 720 } 721 722 switch (vector) { 723 case APIC_LVT_LINT0: 724 case APIC_LVT_LINT1: 725 case APIC_LVT_TIMER: 726 case APIC_LVT_ERROR: 727 case APIC_LVT_PMC: 728 case APIC_LVT_THERMAL: 729 case APIC_LVT_CMCI: 730 if (vlapic_fire_lvt(vlapic, vector)) { 731 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 732 LVTS_TRIGGERRED, vector, 1); 733 } 734 break; 735 default: 736 return (EINVAL); 737 } 738 return (0); 739 } 740 741 static void 742 vlapic_callout_reset(struct vlapic *vlapic) 743 { 744 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, 745 vlapic_callout_handler, vlapic, C_ABSOLUTE); 746 } 747 748 static void 749 vlapic_callout_handler(void *arg) 750 { 751 struct vlapic *vlapic = arg; 752 753 VLAPIC_TIMER_LOCK(vlapic); 754 if (callout_pending(&vlapic->callout)) /* callout was reset */ 755 goto done; 756 757 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 758 goto done; 759 760 callout_deactivate(&vlapic->callout); 761 762 vlapic_fire_timer(vlapic); 763 764 if (vlapic_periodic_timer(vlapic)) { 765 /* 766 * Compute the delta between when the timer was supposed to 767 * fire and the present time. We can depend on the fact that 768 * cyclics (which underly these callouts) will never be called 769 * early. 770 */ 771 const hrtime_t now = gethrtime(); 772 const hrtime_t delta = now - vlapic->timer_fire_when; 773 if (delta >= vlapic->timer_period) { 774 /* 775 * If we are so behind that we have missed an entire 776 * timer period, reset the time base rather than 777 * attempting to catch up. 778 */ 779 vlapic->timer_fire_when = now + vlapic->timer_period; 780 } else { 781 vlapic->timer_fire_when += vlapic->timer_period; 782 } 783 vlapic_callout_reset(vlapic); 784 } 785 done: 786 VLAPIC_TIMER_UNLOCK(vlapic); 787 } 788 789 void 790 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 791 { 792 struct LAPIC *lapic = vlapic->apic_page; 793 794 VLAPIC_TIMER_LOCK(vlapic); 795 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 796 lapic->icr_timer); 797 if (vlapic->timer_period != 0) { 798 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; 799 vlapic_callout_reset(vlapic); 800 } else { 801 vlapic->timer_fire_when = 0; 802 callout_stop(&vlapic->callout); 803 } 804 VLAPIC_TIMER_UNLOCK(vlapic); 805 } 806 807 /* 808 * This function populates 'dmask' with the set of vcpus that match the 809 * addressing specified by the (dest, phys, lowprio) tuple. 810 * 811 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 812 * or xAPIC (8-bit) destination field. 813 */ 814 void 815 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 816 bool lowprio, bool x2apic_dest) 817 { 818 struct vlapic *vlapic; 819 uint32_t dfr, ldr, ldest, cluster; 820 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 821 cpuset_t amask; 822 int vcpuid; 823 824 if ((x2apic_dest && dest == 0xffffffff) || 825 (!x2apic_dest && dest == 0xff)) { 826 /* 827 * Broadcast in both logical and physical modes. 828 */ 829 *dmask = vm_active_cpus(vm); 830 return; 831 } 832 833 if (phys) { 834 /* 835 * Physical mode: destination is APIC ID. 836 */ 837 CPU_ZERO(dmask); 838 vcpuid = vm_apicid2vcpuid(vm, dest); 839 amask = vm_active_cpus(vm); 840 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 841 CPU_SET(vcpuid, dmask); 842 } else { 843 /* 844 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 845 * bitmask. This model is only available in the xAPIC mode. 846 */ 847 mda_flat_ldest = dest & 0xff; 848 849 /* 850 * In the "Cluster Model" the MDA is used to identify a 851 * specific cluster and a set of APICs in that cluster. 852 */ 853 if (x2apic_dest) { 854 mda_cluster_id = dest >> 16; 855 mda_cluster_ldest = dest & 0xffff; 856 } else { 857 mda_cluster_id = (dest >> 4) & 0xf; 858 mda_cluster_ldest = dest & 0xf; 859 } 860 861 /* 862 * Logical mode: match each APIC that has a bit set 863 * in its LDR that matches a bit in the ldest. 864 */ 865 CPU_ZERO(dmask); 866 amask = vm_active_cpus(vm); 867 while ((vcpuid = CPU_FFS(&amask)) != 0) { 868 vcpuid--; 869 CPU_CLR(vcpuid, &amask); 870 871 vlapic = vm_lapic(vm, vcpuid); 872 dfr = vlapic->apic_page->dfr; 873 ldr = vlapic->apic_page->ldr; 874 875 if ((dfr & APIC_DFR_MODEL_MASK) == 876 APIC_DFR_MODEL_FLAT) { 877 ldest = ldr >> 24; 878 mda_ldest = mda_flat_ldest; 879 } else if ((dfr & APIC_DFR_MODEL_MASK) == 880 APIC_DFR_MODEL_CLUSTER) { 881 if (vlapic_x2mode(vlapic)) { 882 cluster = ldr >> 16; 883 ldest = ldr & 0xffff; 884 } else { 885 cluster = ldr >> 28; 886 ldest = (ldr >> 24) & 0xf; 887 } 888 if (cluster != mda_cluster_id) 889 continue; 890 mda_ldest = mda_cluster_ldest; 891 } else { 892 /* 893 * Guest has configured a bad logical 894 * model for this vcpu - skip it. 895 */ 896 continue; 897 } 898 899 if ((mda_ldest & ldest) != 0) { 900 CPU_SET(vcpuid, dmask); 901 if (lowprio) 902 break; 903 } 904 } 905 } 906 } 907 908 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 909 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 910 911 static void 912 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 913 { 914 struct LAPIC *lapic = vlapic->apic_page; 915 916 if (lapic->tpr != val) { 917 lapic->tpr = val; 918 vlapic_update_ppr(vlapic); 919 } 920 } 921 922 void 923 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 924 { 925 uint8_t tpr; 926 927 if (val & ~0xf) { 928 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 929 return; 930 } 931 932 tpr = val << 4; 933 vlapic_set_tpr(vlapic, tpr); 934 } 935 936 uint64_t 937 vlapic_get_cr8(struct vlapic *vlapic) 938 { 939 const struct LAPIC *lapic = vlapic->apic_page; 940 941 return (lapic->tpr >> 4); 942 } 943 944 void 945 vlapic_icrlo_write_handler(struct vlapic *vlapic) 946 { 947 int i; 948 cpuset_t dmask; 949 uint64_t icrval; 950 uint32_t dest, vec, mode, dsh; 951 struct LAPIC *lapic; 952 953 lapic = vlapic->apic_page; 954 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 955 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 956 957 if (vlapic_x2mode(vlapic)) 958 dest = icrval >> 32; 959 else 960 dest = icrval >> (32 + 24); 961 vec = icrval & APIC_VECTOR_MASK; 962 mode = icrval & APIC_DELMODE_MASK; 963 dsh = icrval & APIC_DEST_MASK; 964 965 if (mode == APIC_DELMODE_FIXED && vec < 16) { 966 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 967 return; 968 } 969 if (mode == APIC_DELMODE_INIT && 970 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { 971 /* No work required to deassert INIT */ 972 return; 973 } 974 if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) && 975 !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) { 976 /* 977 * While Intel makes no mention of restrictions for destination 978 * shorthand when sending INIT or SIPI, AMD requires either a 979 * specific destination or all-excluding self. Common use seems 980 * to be restricted to those two cases. Until handling is in 981 * place to halt a guest which makes such a frivolous request, 982 * we will ignore them. 983 */ 984 return; 985 } 986 987 switch (dsh) { 988 case APIC_DEST_DESTFLD: 989 vlapic_calcdest(vlapic->vm, &dmask, dest, 990 (icrval & APIC_DESTMODE_LOG) == 0, false, 991 vlapic_x2mode(vlapic)); 992 break; 993 case APIC_DEST_SELF: 994 CPU_SETOF(vlapic->vcpuid, &dmask); 995 break; 996 case APIC_DEST_ALLISELF: 997 dmask = vm_active_cpus(vlapic->vm); 998 break; 999 case APIC_DEST_ALLESELF: 1000 dmask = vm_active_cpus(vlapic->vm); 1001 CPU_CLR(vlapic->vcpuid, &dmask); 1002 break; 1003 default: 1004 /* 1005 * All possible delivery notations are covered above. 1006 * We should never end up here. 1007 */ 1008 panic("unknown delivery shorthand: %x", dsh); 1009 } 1010 1011 while ((i = CPU_FFS(&dmask)) != 0) { 1012 i--; 1013 CPU_CLR(i, &dmask); 1014 switch (mode) { 1015 case APIC_DELMODE_FIXED: 1016 (void) lapic_intr_edge(vlapic->vm, i, vec); 1017 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, 1018 VLAPIC_IPI_SEND, 1); 1019 vmm_stat_incr(vlapic->vm, i, 1020 VLAPIC_IPI_RECV, 1); 1021 break; 1022 case APIC_DELMODE_NMI: 1023 (void) vm_inject_nmi(vlapic->vm, i); 1024 break; 1025 case APIC_DELMODE_INIT: 1026 (void) vm_inject_init(vlapic->vm, i); 1027 break; 1028 case APIC_DELMODE_STARTUP: 1029 (void) vm_inject_sipi(vlapic->vm, i, vec); 1030 break; 1031 case APIC_DELMODE_LOWPRIO: 1032 case APIC_DELMODE_SMI: 1033 default: 1034 /* Unhandled IPI modes (for now) */ 1035 break; 1036 } 1037 } 1038 } 1039 1040 void 1041 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) 1042 { 1043 const int vec = val & 0xff; 1044 1045 /* self-IPI is only exposed via x2APIC */ 1046 ASSERT(vlapic_x2mode(vlapic)); 1047 1048 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1049 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); 1050 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); 1051 } 1052 1053 int 1054 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1055 { 1056 struct LAPIC *lapic = vlapic->apic_page; 1057 int idx, i, bitpos, vector; 1058 uint32_t *irrptr, val; 1059 1060 if (vlapic->ops.sync_state) { 1061 (*vlapic->ops.sync_state)(vlapic); 1062 } 1063 1064 irrptr = &lapic->irr0; 1065 1066 for (i = 7; i >= 0; i--) { 1067 idx = i * 4; 1068 val = atomic_load_acq_int(&irrptr[idx]); 1069 bitpos = fls(val); 1070 if (bitpos != 0) { 1071 vector = i * 32 + (bitpos - 1); 1072 if (PRIO(vector) > PRIO(lapic->ppr)) { 1073 if (vecptr != NULL) 1074 *vecptr = vector; 1075 return (1); 1076 } else 1077 break; 1078 } 1079 } 1080 return (0); 1081 } 1082 1083 void 1084 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1085 { 1086 struct LAPIC *lapic = vlapic->apic_page; 1087 uint32_t *irrptr, *isrptr; 1088 int idx; 1089 1090 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector)); 1091 1092 if (vlapic->ops.intr_accepted) 1093 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1094 1095 /* 1096 * clear the ready bit for vector being accepted in irr 1097 * and set the vector as in service in isr. 1098 */ 1099 idx = (vector / 32) * 4; 1100 1101 irrptr = &lapic->irr0; 1102 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1103 1104 isrptr = &lapic->isr0; 1105 isrptr[idx] |= 1 << (vector % 32); 1106 1107 /* 1108 * The only way a fresh vector could be accepted into ISR is if it was 1109 * of a higher priority than the current PPR. With that vector now 1110 * in-service, the PPR must be raised. 1111 */ 1112 vlapic_raise_ppr(vlapic, vector); 1113 1114 #ifdef __ISRVEC_DEBUG 1115 vlapic_isrstk_accept(vlapic, vector); 1116 #endif 1117 } 1118 1119 void 1120 vlapic_svr_write_handler(struct vlapic *vlapic) 1121 { 1122 struct LAPIC *lapic; 1123 uint32_t old, new, changed; 1124 1125 lapic = vlapic->apic_page; 1126 1127 new = lapic->svr; 1128 old = vlapic->svr_last; 1129 vlapic->svr_last = new; 1130 1131 changed = old ^ new; 1132 if ((changed & APIC_SVR_ENABLE) != 0) { 1133 if ((new & APIC_SVR_ENABLE) == 0) { 1134 /* 1135 * The apic is now disabled so stop the apic timer 1136 * and mask all the LVT entries. 1137 */ 1138 VLAPIC_TIMER_LOCK(vlapic); 1139 callout_stop(&vlapic->callout); 1140 VLAPIC_TIMER_UNLOCK(vlapic); 1141 vlapic_mask_lvts(vlapic); 1142 } else { 1143 /* 1144 * The apic is now enabled so restart the apic timer 1145 * if it is configured in periodic mode. 1146 */ 1147 if (vlapic_periodic_timer(vlapic)) 1148 vlapic_icrtmr_write_handler(vlapic); 1149 } 1150 } 1151 } 1152 1153 static bool 1154 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) 1155 { 1156 struct LAPIC *lapic = vlapic->apic_page; 1157 uint32_t *reg; 1158 int i; 1159 1160 ASSERT3U(offset & 0x3, ==, 0); 1161 ASSERT3U(offset, <, PAGESIZE); 1162 ASSERT3P(outp, !=, NULL); 1163 1164 uint32_t data = 0; 1165 switch (offset) { 1166 case APIC_OFFSET_ID: 1167 data = lapic->id; 1168 break; 1169 case APIC_OFFSET_VER: 1170 data = lapic->version; 1171 break; 1172 case APIC_OFFSET_TPR: 1173 data = lapic->tpr; 1174 break; 1175 case APIC_OFFSET_APR: 1176 data = lapic->apr; 1177 break; 1178 case APIC_OFFSET_PPR: 1179 data = lapic->ppr; 1180 break; 1181 case APIC_OFFSET_LDR: 1182 data = lapic->ldr; 1183 break; 1184 case APIC_OFFSET_DFR: 1185 data = lapic->dfr; 1186 break; 1187 case APIC_OFFSET_SVR: 1188 data = lapic->svr; 1189 break; 1190 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1191 i = (offset - APIC_OFFSET_ISR0) >> 2; 1192 reg = &lapic->isr0; 1193 data = *(reg + i); 1194 break; 1195 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1196 i = (offset - APIC_OFFSET_TMR0) >> 2; 1197 reg = &lapic->tmr0; 1198 data = *(reg + i); 1199 break; 1200 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1201 i = (offset - APIC_OFFSET_IRR0) >> 2; 1202 reg = &lapic->irr0; 1203 data = atomic_load_acq_int(reg + i); 1204 break; 1205 case APIC_OFFSET_ESR: 1206 data = lapic->esr; 1207 break; 1208 case APIC_OFFSET_ICR_LOW: 1209 data = lapic->icr_lo; 1210 break; 1211 case APIC_OFFSET_ICR_HI: 1212 data = lapic->icr_hi; 1213 break; 1214 case APIC_OFFSET_CMCI_LVT: 1215 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1216 data = vlapic_get_lvt(vlapic, offset); 1217 #ifdef INVARIANTS 1218 reg = vlapic_get_lvtptr(vlapic, offset); 1219 ASSERT3U(data, ==, *reg); 1220 #endif 1221 break; 1222 case APIC_OFFSET_TIMER_ICR: 1223 data = lapic->icr_timer; 1224 break; 1225 case APIC_OFFSET_TIMER_CCR: 1226 data = vlapic_get_ccr(vlapic); 1227 break; 1228 case APIC_OFFSET_TIMER_DCR: 1229 data = lapic->dcr_timer; 1230 break; 1231 case APIC_OFFSET_RRR: 1232 data = 0; 1233 break; 1234 1235 case APIC_OFFSET_SELF_IPI: 1236 case APIC_OFFSET_EOI: 1237 /* Write-only register */ 1238 *outp = 0; 1239 return (false); 1240 1241 default: 1242 /* Invalid register */ 1243 *outp = 0; 1244 return (false); 1245 } 1246 1247 *outp = data; 1248 return (true); 1249 } 1250 1251 static bool 1252 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) 1253 { 1254 struct LAPIC *lapic = vlapic->apic_page; 1255 uint32_t *regptr; 1256 1257 ASSERT3U(offset & 0xf, ==, 0); 1258 ASSERT3U(offset, <, PAGESIZE); 1259 1260 switch (offset) { 1261 case APIC_OFFSET_ID: 1262 lapic->id = data; 1263 vlapic_id_write_handler(vlapic); 1264 break; 1265 case APIC_OFFSET_TPR: 1266 vlapic_set_tpr(vlapic, data & 0xff); 1267 break; 1268 case APIC_OFFSET_EOI: 1269 vlapic_process_eoi(vlapic); 1270 break; 1271 case APIC_OFFSET_LDR: 1272 lapic->ldr = data; 1273 vlapic_ldr_write_handler(vlapic); 1274 break; 1275 case APIC_OFFSET_DFR: 1276 lapic->dfr = data; 1277 vlapic_dfr_write_handler(vlapic); 1278 break; 1279 case APIC_OFFSET_SVR: 1280 lapic->svr = data; 1281 vlapic_svr_write_handler(vlapic); 1282 break; 1283 case APIC_OFFSET_ICR_LOW: 1284 lapic->icr_lo = data; 1285 vlapic_icrlo_write_handler(vlapic); 1286 break; 1287 case APIC_OFFSET_ICR_HI: 1288 lapic->icr_hi = data; 1289 break; 1290 case APIC_OFFSET_CMCI_LVT: 1291 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1292 regptr = vlapic_get_lvtptr(vlapic, offset); 1293 *regptr = data; 1294 vlapic_lvt_write_handler(vlapic, offset); 1295 break; 1296 case APIC_OFFSET_TIMER_ICR: 1297 lapic->icr_timer = data; 1298 vlapic_icrtmr_write_handler(vlapic); 1299 break; 1300 1301 case APIC_OFFSET_TIMER_DCR: 1302 lapic->dcr_timer = data; 1303 vlapic_dcr_write_handler(vlapic); 1304 break; 1305 1306 case APIC_OFFSET_ESR: 1307 vlapic_esr_write_handler(vlapic); 1308 break; 1309 1310 case APIC_OFFSET_SELF_IPI: 1311 if (vlapic_x2mode(vlapic)) 1312 vlapic_self_ipi_handler(vlapic, data); 1313 break; 1314 1315 case APIC_OFFSET_VER: 1316 case APIC_OFFSET_APR: 1317 case APIC_OFFSET_PPR: 1318 case APIC_OFFSET_RRR: 1319 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1320 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1321 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1322 case APIC_OFFSET_TIMER_CCR: 1323 /* Read-only register */ 1324 return (false); 1325 1326 default: 1327 /* Invalid register */ 1328 return (false); 1329 } 1330 1331 return (true); 1332 } 1333 1334 void 1335 vlapic_reset(struct vlapic *vlapic) 1336 { 1337 struct LAPIC *lapic = vlapic->apic_page; 1338 uint32_t *isrptr, *tmrptr, *irrptr; 1339 1340 /* Reset any timer-related state first */ 1341 VLAPIC_TIMER_LOCK(vlapic); 1342 callout_stop(&vlapic->callout); 1343 lapic->icr_timer = 0; 1344 lapic->ccr_timer = 0; 1345 VLAPIC_TIMER_UNLOCK(vlapic); 1346 lapic->dcr_timer = 0; 1347 vlapic_dcr_write_handler(vlapic); 1348 1349 /* 1350 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so 1351 * it is not leftover after the reset. This is performed after the APIC 1352 * timer has been stopped, in case it happened to fire just prior to 1353 * being deactivated. 1354 */ 1355 if (vlapic->ops.sync_state) { 1356 (*vlapic->ops.sync_state)(vlapic); 1357 } 1358 1359 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1360 if (vlapic->vcpuid == 0) 1361 vlapic->msr_apicbase |= APICBASE_BSP; 1362 1363 lapic->id = vlapic_get_id(vlapic); 1364 lapic->version = VLAPIC_VERSION; 1365 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1366 1367 lapic->tpr = 0; 1368 lapic->apr = 0; 1369 lapic->ppr = 0; 1370 1371 #ifdef __ISRVEC_DEBUG 1372 /* With the PPR cleared, the isrvec tracking should be reset too */ 1373 vlapic->isrvec_stk_top = 0; 1374 #endif 1375 1376 lapic->eoi = 0; 1377 lapic->ldr = 0; 1378 lapic->dfr = 0xffffffff; 1379 lapic->svr = APIC_SVR_VECTOR; 1380 vlapic->svr_last = lapic->svr; 1381 1382 isrptr = &lapic->isr0; 1383 tmrptr = &lapic->tmr0; 1384 irrptr = &lapic->irr0; 1385 for (uint_t i = 0; i < 8; i++) { 1386 atomic_store_rel_int(&isrptr[i * 4], 0); 1387 atomic_store_rel_int(&tmrptr[i * 4], 0); 1388 atomic_store_rel_int(&irrptr[i * 4], 0); 1389 } 1390 1391 lapic->esr = 0; 1392 vlapic->esr_pending = 0; 1393 lapic->icr_lo = 0; 1394 lapic->icr_hi = 0; 1395 1396 lapic->lvt_cmci = 0; 1397 lapic->lvt_timer = 0; 1398 lapic->lvt_thermal = 0; 1399 lapic->lvt_pcint = 0; 1400 lapic->lvt_lint0 = 0; 1401 lapic->lvt_lint1 = 0; 1402 lapic->lvt_error = 0; 1403 vlapic_mask_lvts(vlapic); 1404 } 1405 1406 void 1407 vlapic_init(struct vlapic *vlapic) 1408 { 1409 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1410 KASSERT(vlapic->vcpuid >= 0 && 1411 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1412 ("vlapic_init: vcpuid is not initialized")); 1413 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1414 "initialized")); 1415 1416 /* 1417 * If the vlapic is configured in x2apic mode then it will be 1418 * accessed in the critical section via the MSR emulation code. 1419 * 1420 * Therefore the timer mutex must be a spinlock because blockable 1421 * mutexes cannot be acquired in a critical section. 1422 */ 1423 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL); 1424 callout_init(&vlapic->callout, 1); 1425 1426 vlapic_reset(vlapic); 1427 } 1428 1429 void 1430 vlapic_cleanup(struct vlapic *vlapic) 1431 { 1432 callout_drain(&vlapic->callout); 1433 mutex_destroy(&vlapic->timer_lock); 1434 } 1435 1436 int 1437 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, 1438 uint_t size) 1439 { 1440 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1441 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1442 1443 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1444 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1445 *valp = UINT64_MAX; 1446 return (0); 1447 } 1448 1449 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1450 uint32_t raw = 0; 1451 (void) vlapic_read(vlapic, off & ~0xf, &raw); 1452 1453 /* Shift and mask reads which are small and/or unaligned */ 1454 const uint8_t align = off & 0xf; 1455 if (align < 4) { 1456 *valp = (uint64_t)raw << (align * 8); 1457 } else { 1458 *valp = 0; 1459 } 1460 1461 return (0); 1462 } 1463 1464 int 1465 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, 1466 uint_t size) 1467 { 1468 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1469 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1470 1471 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1472 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1473 return (0); 1474 } 1475 1476 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1477 /* Ignore writes which are not 32-bits wide and 16-byte aligned */ 1478 if ((off & 0xf) != 0 || size != 4) { 1479 return (0); 1480 } 1481 1482 (void) vlapic_write(vlapic, off, (uint32_t)val); 1483 return (0); 1484 } 1485 1486 /* Should attempts to change the APIC base address be rejected with a #GP? */ 1487 int vlapic_gp_on_addr_change = 1; 1488 1489 static vm_msr_result_t 1490 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) 1491 { 1492 const uint64_t diff = vlapic->msr_apicbase ^ val; 1493 1494 /* 1495 * Until the LAPIC emulation for switching between xAPIC and x2APIC 1496 * modes is more polished, it will remain off-limits from being altered 1497 * by the guest. 1498 */ 1499 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | 1500 APICBASE_BSP; 1501 if ((diff & reserved_bits) != 0) { 1502 return (VMR_GP); 1503 } 1504 1505 /* We do not presently allow the LAPIC access address to be modified. */ 1506 if ((diff & APICBASE_ADDR_MASK) != 0) { 1507 /* 1508 * Explicitly rebuffing such requests with a #GP is the most 1509 * straightforward way to handle the situation, but certain 1510 * consumers (such as the KVM unit tests) may balk at the 1511 * otherwise unexpected exception. 1512 */ 1513 if (vlapic_gp_on_addr_change) { 1514 return (VMR_GP); 1515 } 1516 1517 /* If silence is required, just ignore the address change. */ 1518 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; 1519 } 1520 1521 vlapic->msr_apicbase = val; 1522 return (VMR_OK); 1523 } 1524 1525 static __inline uint16_t 1526 vlapic_msr_to_regoff(uint32_t msr) 1527 { 1528 ASSERT3U(msr, >=, MSR_APIC_000); 1529 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); 1530 1531 return ((msr - MSR_APIC_000) << 4); 1532 } 1533 1534 bool 1535 vlapic_owned_msr(uint32_t msr) 1536 { 1537 if (msr == MSR_APICBASE) { 1538 return (true); 1539 } 1540 if (msr >= MSR_APIC_000 && 1541 msr < (MSR_APIC_000 + 0x100)) { 1542 return (true); 1543 } 1544 return (false); 1545 } 1546 1547 vm_msr_result_t 1548 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) 1549 { 1550 ASSERT(vlapic_owned_msr(msr)); 1551 ASSERT3P(valp, !=, NULL); 1552 1553 if (msr == MSR_APICBASE) { 1554 *valp = vlapic->msr_apicbase; 1555 return (VMR_OK); 1556 } 1557 1558 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1559 if (!vlapic_x2mode(vlapic)) { 1560 return (VMR_GP); 1561 } 1562 1563 uint64_t out = 0; 1564 const uint16_t reg = vlapic_msr_to_regoff(msr); 1565 switch (reg) { 1566 case APIC_OFFSET_ICR_LOW: { 1567 /* Read from ICR register gets entire (64-bit) value */ 1568 uint32_t low = 0, high = 0; 1569 bool valid; 1570 1571 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); 1572 VERIFY(valid); 1573 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); 1574 VERIFY(valid); 1575 1576 *valp = ((uint64_t)high << 32) | low; 1577 return (VMR_OK); 1578 } 1579 case APIC_OFFSET_ICR_HI: 1580 /* Already covered by ICR_LOW */ 1581 return (VMR_GP); 1582 default: 1583 break; 1584 } 1585 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { 1586 return (VMR_GP); 1587 } 1588 *valp = out; 1589 return (VMR_OK); 1590 } 1591 1592 vm_msr_result_t 1593 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) 1594 { 1595 ASSERT(vlapic_owned_msr(msr)); 1596 1597 if (msr == MSR_APICBASE) { 1598 return (vlapic_set_apicbase(vlapic, val)); 1599 } 1600 1601 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1602 if (!vlapic_x2mode(vlapic)) { 1603 return (VMR_GP); 1604 } 1605 1606 const uint16_t reg = vlapic_msr_to_regoff(msr); 1607 switch (reg) { 1608 case APIC_OFFSET_ICR_LOW: { 1609 /* Write to ICR register sets entire (64-bit) value */ 1610 bool valid; 1611 1612 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); 1613 VERIFY(valid); 1614 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); 1615 VERIFY(valid); 1616 return (VMR_OK); 1617 } 1618 case APIC_OFFSET_ICR_HI: 1619 /* Already covered by ICR_LOW */ 1620 return (VMR_GP); 1621 case APIC_OFFSET_ESR: 1622 /* Only 0 may be written from x2APIC mode */ 1623 if (val != 0) { 1624 return (VMR_GP); 1625 } 1626 break; 1627 default: 1628 break; 1629 } 1630 if (!vlapic_write(vlapic, reg, val)) { 1631 return (VMR_GP); 1632 } 1633 return (VMR_OK); 1634 } 1635 1636 void 1637 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1638 { 1639 struct vlapic *vlapic; 1640 struct LAPIC *lapic; 1641 1642 vlapic = vm_lapic(vm, vcpuid); 1643 1644 if (state == X2APIC_DISABLED) 1645 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1646 else 1647 vlapic->msr_apicbase |= APICBASE_X2APIC; 1648 1649 /* 1650 * Reset the local APIC registers whose values are mode-dependent. 1651 * 1652 * XXX this works because the APIC mode can be changed only at vcpu 1653 * initialization time. 1654 */ 1655 lapic = vlapic->apic_page; 1656 lapic->id = vlapic_get_id(vlapic); 1657 if (vlapic_x2mode(vlapic)) { 1658 lapic->ldr = x2apic_ldr(vlapic); 1659 lapic->dfr = 0; 1660 } else { 1661 lapic->ldr = 0; 1662 lapic->dfr = 0xffffffff; 1663 } 1664 1665 if (state == X2APIC_ENABLED) { 1666 if (vlapic->ops.enable_x2apic_mode) 1667 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1668 } 1669 } 1670 1671 void 1672 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1673 int delmode, int vec) 1674 { 1675 bool lowprio; 1676 int vcpuid; 1677 cpuset_t dmask; 1678 1679 if (delmode != IOART_DELFIXED && 1680 delmode != IOART_DELLOPRI && 1681 delmode != IOART_DELEXINT) { 1682 /* Invalid delivery mode */ 1683 return; 1684 } 1685 lowprio = (delmode == IOART_DELLOPRI); 1686 1687 /* 1688 * We don't provide any virtual interrupt redirection hardware so 1689 * all interrupts originating from the ioapic or MSI specify the 1690 * 'dest' in the legacy xAPIC format. 1691 */ 1692 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1693 1694 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1695 vcpuid--; 1696 CPU_CLR(vcpuid, &dmask); 1697 if (delmode == IOART_DELEXINT) { 1698 (void) vm_inject_extint(vm, vcpuid); 1699 } else { 1700 (void) lapic_set_intr(vm, vcpuid, vec, level); 1701 } 1702 } 1703 } 1704 1705 void 1706 vlapic_post_intr(struct vlapic *vlapic, int hostcpu) 1707 { 1708 /* 1709 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1710 * 1711 * This is done by leveraging features like Posted Interrupts (Intel) 1712 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1713 * 1714 * If neither of these features are available then fallback to 1715 * sending an IPI to 'hostcpu'. 1716 */ 1717 if (vlapic->ops.post_intr) 1718 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1719 else 1720 poke_cpu(hostcpu); 1721 } 1722 1723 void 1724 vlapic_localize_resources(struct vlapic *vlapic) 1725 { 1726 vmm_glue_callout_localize(&vlapic->callout); 1727 } 1728 1729 #ifdef __ISRVEC_DEBUG 1730 static void 1731 vlapic_isrstk_eoi(struct vlapic *vlapic, int vector) 1732 { 1733 if (vlapic->isrvec_stk_top <= 0) { 1734 panic("invalid vlapic isrvec_stk_top %d", 1735 vlapic->isrvec_stk_top); 1736 } 1737 vlapic->isrvec_stk_top--; 1738 vlapic_isrstk_verify(vlapic); 1739 } 1740 1741 static void 1742 vlapic_isrstk_accept(struct vlapic *vlapic, int vector) 1743 { 1744 int stk_top; 1745 1746 vlapic->isrvec_stk_top++; 1747 1748 stk_top = vlapic->isrvec_stk_top; 1749 if (stk_top >= ISRVEC_STK_SIZE) 1750 panic("isrvec_stk_top overflow %d", stk_top); 1751 1752 vlapic->isrvec_stk[stk_top] = vector; 1753 vlapic_isrstk_verify(vlapic); 1754 } 1755 1756 static void 1757 vlapic_isrstk_dump(const struct vlapic *vlapic) 1758 { 1759 int i; 1760 uint32_t *isrptr; 1761 1762 isrptr = &vlapic->apic_page->isr0; 1763 for (i = 0; i < 8; i++) 1764 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 1765 1766 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 1767 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 1768 } 1769 1770 static void 1771 vlapic_isrstk_verify(const struct vlapic *vlapic) 1772 { 1773 int i, lastprio, curprio, vector, idx; 1774 uint32_t *isrptr; 1775 1776 /* 1777 * Note: The value at index 0 in isrvec_stk is always 0. 1778 * 1779 * It is a placeholder for the value of ISR vector when no bits are set 1780 * in the ISRx registers. 1781 */ 1782 if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) { 1783 panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]); 1784 } 1785 1786 /* 1787 * Make sure that the priority of the nested interrupts is 1788 * always increasing. 1789 */ 1790 lastprio = -1; 1791 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 1792 curprio = PRIO(vlapic->isrvec_stk[i]); 1793 if (curprio <= lastprio) { 1794 vlapic_isrstk_dump(vlapic); 1795 panic("isrvec_stk does not satisfy invariant"); 1796 } 1797 lastprio = curprio; 1798 } 1799 1800 /* 1801 * Make sure that each bit set in the ISRx registers has a 1802 * corresponding entry on the isrvec stack. 1803 */ 1804 i = 1; 1805 isrptr = &vlapic->apic_page->isr0; 1806 for (vector = 0; vector < 256; vector++) { 1807 idx = (vector / 32) * 4; 1808 if (isrptr[idx] & (1 << (vector % 32))) { 1809 if (i > vlapic->isrvec_stk_top || 1810 vlapic->isrvec_stk[i] != vector) { 1811 vlapic_isrstk_dump(vlapic); 1812 panic("ISR and isrvec_stk out of sync"); 1813 } 1814 i++; 1815 } 1816 } 1817 } 1818 #endif 1819