1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 /* 32 * This file and its contents are supplied under the terms of the 33 * Common Development and Distribution License ("CDDL"), version 1.0. 34 * You may only use this file in accordance with the terms of version 35 * 1.0 of the CDDL. 36 * 37 * A full copy of the text of the CDDL should have accompanied this 38 * source. A copy of the CDDL is also available via the Internet at 39 * http://www.illumos.org/license/CDDL. 40 * 41 * Copyright 2014 Pluribus Networks Inc. 42 * Copyright 2018 Joyent, Inc. 43 * Copyright 2020 Oxide Computer Company 44 */ 45 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/systm.h> 54 #include <sys/cpuset.h> 55 56 #include <x86/specialreg.h> 57 #include <x86/apicreg.h> 58 59 #include <machine/clock.h> 60 61 #include <machine/vmm.h> 62 #include <sys/vmm_kernel.h> 63 64 #include "vmm_lapic.h" 65 #include "vmm_ktr.h" 66 #include "vmm_stat.h" 67 68 #include "vlapic.h" 69 #include "vlapic_priv.h" 70 #include "vioapic.h" 71 72 73 /* 74 * The 4 high bits of a given interrupt vector represent its priority. The same 75 * is true for the contents of the TPR when it is used to calculate the ultimate 76 * PPR of an APIC - the 4 high bits hold the priority. 77 */ 78 #define PRIO(x) ((x) & 0xf0) 79 80 #define VLAPIC_VERSION (16) 81 82 /* 83 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the 84 * vlapic_callout_handler() and vcpu accesses to: 85 * - timer_freq_bt, timer_period_bt, timer_fire_bt 86 * - timer LVT register 87 */ 88 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock)) 89 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock)) 90 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock)) 91 92 /* 93 * APIC timer frequency: 94 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 95 * - power-of-two to avoid loss of precision when calculating times 96 */ 97 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 98 99 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL 100 101 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 102 static void vlapic_callout_handler(void *arg); 103 104 #ifdef __ISRVEC_DEBUG 105 static void vlapic_isrstk_accept(struct vlapic *, int); 106 static void vlapic_isrstk_eoi(struct vlapic *, int); 107 static void vlapic_isrstk_verify(const struct vlapic *); 108 #endif /* __ISRVEC_DEBUG */ 109 110 111 static __inline bool 112 vlapic_x2mode(const struct vlapic *vlapic) 113 { 114 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); 115 } 116 117 static __inline bool 118 vlapic_hw_disabled(const struct vlapic *vlapic) 119 { 120 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); 121 } 122 123 static __inline bool 124 vlapic_sw_disabled(const struct vlapic *vlapic) 125 { 126 const struct LAPIC *lapic = vlapic->apic_page; 127 128 return ((lapic->svr & APIC_SVR_ENABLE) == 0); 129 } 130 131 static __inline bool 132 vlapic_enabled(const struct vlapic *vlapic) 133 { 134 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); 135 } 136 137 static __inline uint32_t 138 vlapic_get_id(struct vlapic *vlapic) 139 { 140 141 if (vlapic_x2mode(vlapic)) 142 return (vlapic->vcpuid); 143 else 144 return (vlapic->vcpuid << 24); 145 } 146 147 static uint32_t 148 x2apic_ldr(struct vlapic *vlapic) 149 { 150 int apicid; 151 uint32_t ldr; 152 153 apicid = vlapic_get_id(vlapic); 154 ldr = 1 << (apicid & 0xf); 155 ldr |= (apicid & 0xffff0) << 12; 156 return (ldr); 157 } 158 159 void 160 vlapic_dfr_write_handler(struct vlapic *vlapic) 161 { 162 struct LAPIC *lapic; 163 164 lapic = vlapic->apic_page; 165 if (vlapic_x2mode(vlapic)) { 166 VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 167 lapic->dfr); 168 lapic->dfr = 0; 169 return; 170 } 171 172 lapic->dfr &= APIC_DFR_MODEL_MASK; 173 lapic->dfr |= APIC_DFR_RESERVED; 174 } 175 176 void 177 vlapic_ldr_write_handler(struct vlapic *vlapic) 178 { 179 struct LAPIC *lapic; 180 181 lapic = vlapic->apic_page; 182 183 /* LDR is read-only in x2apic mode */ 184 if (vlapic_x2mode(vlapic)) { 185 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 186 lapic->ldr); 187 lapic->ldr = x2apic_ldr(vlapic); 188 } else { 189 lapic->ldr &= ~APIC_LDR_RESERVED; 190 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 191 } 192 } 193 194 void 195 vlapic_id_write_handler(struct vlapic *vlapic) 196 { 197 struct LAPIC *lapic; 198 199 /* 200 * We don't allow the ID register to be modified so reset it back to 201 * its default value. 202 */ 203 lapic = vlapic->apic_page; 204 lapic->id = vlapic_get_id(vlapic); 205 } 206 207 static int 208 vlapic_timer_divisor(uint32_t dcr) 209 { 210 switch (dcr & 0xB) { 211 case APIC_TDCR_1: 212 return (1); 213 case APIC_TDCR_2: 214 return (2); 215 case APIC_TDCR_4: 216 return (4); 217 case APIC_TDCR_8: 218 return (8); 219 case APIC_TDCR_16: 220 return (16); 221 case APIC_TDCR_32: 222 return (32); 223 case APIC_TDCR_64: 224 return (64); 225 case APIC_TDCR_128: 226 return (128); 227 default: 228 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 229 } 230 } 231 232 #if 0 233 static inline void 234 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 235 { 236 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 237 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 238 *lvt & APIC_LVTT_M); 239 } 240 #endif 241 242 static uint32_t 243 vlapic_get_ccr(struct vlapic *vlapic) 244 { 245 struct LAPIC *lapic; 246 uint32_t ccr; 247 248 ccr = 0; 249 lapic = vlapic->apic_page; 250 251 VLAPIC_TIMER_LOCK(vlapic); 252 if (callout_active(&vlapic->callout)) { 253 /* 254 * If the timer is scheduled to expire in the future then 255 * compute the value of 'ccr' based on the remaining time. 256 */ 257 258 const hrtime_t now = gethrtime(); 259 if (vlapic->timer_fire_when > now) { 260 ccr += hrt_freq_count(vlapic->timer_fire_when - now, 261 vlapic->timer_cur_freq); 262 } 263 } 264 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, " 265 "icr_timer is %x", ccr, lapic->icr_timer)); 266 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 267 ccr, lapic->icr_timer); 268 VLAPIC_TIMER_UNLOCK(vlapic); 269 return (ccr); 270 } 271 272 void 273 vlapic_dcr_write_handler(struct vlapic *vlapic) 274 { 275 struct LAPIC *lapic; 276 int divisor; 277 278 lapic = vlapic->apic_page; 279 VLAPIC_TIMER_LOCK(vlapic); 280 281 divisor = vlapic_timer_divisor(lapic->dcr_timer); 282 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 283 lapic->dcr_timer, divisor); 284 285 /* 286 * Update the timer frequency and the timer period. 287 * 288 * XXX changes to the frequency divider will not take effect until 289 * the timer is reloaded. 290 */ 291 vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor; 292 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 293 lapic->icr_timer); 294 295 VLAPIC_TIMER_UNLOCK(vlapic); 296 } 297 298 void 299 vlapic_esr_write_handler(struct vlapic *vlapic) 300 { 301 struct LAPIC *lapic; 302 303 lapic = vlapic->apic_page; 304 lapic->esr = vlapic->esr_pending; 305 vlapic->esr_pending = 0; 306 } 307 308 vcpu_notify_t 309 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 310 { 311 struct LAPIC *lapic; 312 uint32_t *irrptr, *tmrptr, mask, tmr; 313 int idx; 314 315 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 316 317 lapic = vlapic->apic_page; 318 if (!(lapic->svr & APIC_SVR_ENABLE)) { 319 /* ignore interrupt on software-disabled APIC */ 320 return (VCPU_NOTIFY_NONE); 321 } 322 323 if (vector < 16) { 324 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 325 false); 326 327 /* 328 * If the error LVT is configured to interrupt the vCPU, it will 329 * have delivered a notification through that mechanism. 330 */ 331 return (VCPU_NOTIFY_NONE); 332 } 333 334 if (vlapic->ops.set_intr_ready) { 335 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 336 } 337 338 idx = (vector / 32) * 4; 339 mask = 1 << (vector % 32); 340 tmrptr = &lapic->tmr0; 341 irrptr = &lapic->irr0; 342 343 /* 344 * Update TMR for requested vector, if necessary. 345 * This must be done prior to asserting the bit in IRR so that the 346 * proper TMR state is always visible before the to-be-queued interrupt 347 * can be injected. 348 */ 349 tmr = atomic_load_acq_32(&tmrptr[idx]); 350 if ((tmr & mask) != (level ? mask : 0)) { 351 if (level) { 352 atomic_set_int(&tmrptr[idx], mask); 353 } else { 354 atomic_clear_int(&tmrptr[idx], mask); 355 } 356 } 357 358 /* Now set the bit in IRR */ 359 atomic_set_int(&irrptr[idx], mask); 360 361 return (VCPU_NOTIFY_EXIT); 362 } 363 364 static __inline uint32_t * 365 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 366 { 367 struct LAPIC *lapic = vlapic->apic_page; 368 int i; 369 370 switch (offset) { 371 case APIC_OFFSET_CMCI_LVT: 372 return (&lapic->lvt_cmci); 373 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 374 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 375 return ((&lapic->lvt_timer) + i); 376 default: 377 panic("vlapic_get_lvt: invalid LVT\n"); 378 } 379 } 380 381 static __inline int 382 lvt_off_to_idx(uint32_t offset) 383 { 384 int index; 385 386 switch (offset) { 387 case APIC_OFFSET_CMCI_LVT: 388 index = APIC_LVT_CMCI; 389 break; 390 case APIC_OFFSET_TIMER_LVT: 391 index = APIC_LVT_TIMER; 392 break; 393 case APIC_OFFSET_THERM_LVT: 394 index = APIC_LVT_THERMAL; 395 break; 396 case APIC_OFFSET_PERF_LVT: 397 index = APIC_LVT_PMC; 398 break; 399 case APIC_OFFSET_LINT0_LVT: 400 index = APIC_LVT_LINT0; 401 break; 402 case APIC_OFFSET_LINT1_LVT: 403 index = APIC_LVT_LINT1; 404 break; 405 case APIC_OFFSET_ERROR_LVT: 406 index = APIC_LVT_ERROR; 407 break; 408 default: 409 index = -1; 410 break; 411 } 412 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 413 "invalid lvt index %d for offset %x", index, offset)); 414 415 return (index); 416 } 417 418 static __inline uint32_t 419 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 420 { 421 int idx; 422 uint32_t val; 423 424 idx = lvt_off_to_idx(offset); 425 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 426 return (val); 427 } 428 429 void 430 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 431 { 432 uint32_t *lvtptr, mask, val; 433 struct LAPIC *lapic; 434 int idx; 435 436 lapic = vlapic->apic_page; 437 lvtptr = vlapic_get_lvtptr(vlapic, offset); 438 val = *lvtptr; 439 idx = lvt_off_to_idx(offset); 440 441 if (!(lapic->svr & APIC_SVR_ENABLE)) 442 val |= APIC_LVT_M; 443 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 444 switch (offset) { 445 case APIC_OFFSET_TIMER_LVT: 446 mask |= APIC_LVTT_TM; 447 break; 448 case APIC_OFFSET_ERROR_LVT: 449 break; 450 case APIC_OFFSET_LINT0_LVT: 451 case APIC_OFFSET_LINT1_LVT: 452 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 453 /* FALLTHROUGH */ 454 default: 455 mask |= APIC_LVT_DM; 456 break; 457 } 458 val &= mask; 459 *lvtptr = val; 460 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 461 } 462 463 static void 464 vlapic_mask_lvts(struct vlapic *vlapic) 465 { 466 struct LAPIC *lapic = vlapic->apic_page; 467 468 lapic->lvt_cmci |= APIC_LVT_M; 469 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 470 471 lapic->lvt_timer |= APIC_LVT_M; 472 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 473 474 lapic->lvt_thermal |= APIC_LVT_M; 475 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 476 477 lapic->lvt_pcint |= APIC_LVT_M; 478 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 479 480 lapic->lvt_lint0 |= APIC_LVT_M; 481 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 482 483 lapic->lvt_lint1 |= APIC_LVT_M; 484 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 485 486 lapic->lvt_error |= APIC_LVT_M; 487 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 488 } 489 490 static int 491 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt) 492 { 493 uint32_t mode, reg, vec; 494 vcpu_notify_t notify; 495 496 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 497 498 if (reg & APIC_LVT_M) 499 return (0); 500 vec = reg & APIC_LVT_VECTOR; 501 mode = reg & APIC_LVT_DM; 502 503 switch (mode) { 504 case APIC_LVT_DM_FIXED: 505 if (vec < 16) { 506 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 507 lvt == APIC_LVT_ERROR); 508 return (0); 509 } 510 notify = vlapic_set_intr_ready(vlapic, vec, false); 511 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify); 512 break; 513 case APIC_LVT_DM_NMI: 514 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 515 break; 516 case APIC_LVT_DM_EXTINT: 517 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid); 518 break; 519 default: 520 // Other modes ignored 521 return (0); 522 } 523 return (1); 524 } 525 526 static uint_t 527 vlapic_active_isr(struct vlapic *vlapic) 528 { 529 int i; 530 uint32_t *isrp; 531 532 isrp = &vlapic->apic_page->isr7; 533 534 for (i = 7; i >= 0; i--, isrp -= 4) { 535 uint32_t reg = *isrp; 536 537 if (reg != 0) { 538 uint_t vec = (i * 32) + bsrl(reg); 539 540 if (vec < 16) { 541 /* 542 * Truncate the illegal low vectors to value of 543 * 0, indicating that no active ISR was found. 544 */ 545 return (0); 546 } 547 return (vec); 548 } 549 } 550 551 return (0); 552 } 553 554 /* 555 * After events which might arbitrarily change the value of PPR, such as a TPR 556 * write or an EOI, calculate that new PPR value and store it in the APIC page. 557 */ 558 static void 559 vlapic_update_ppr(struct vlapic *vlapic) 560 { 561 int isrvec, tpr, ppr; 562 563 isrvec = vlapic_active_isr(vlapic); 564 tpr = vlapic->apic_page->tpr; 565 566 /* 567 * Algorithm adopted from section "Interrupt, Task and Processor 568 * Priority" in Intel Architecture Manual Vol 3a. 569 */ 570 if (PRIO(tpr) >= PRIO(isrvec)) { 571 ppr = tpr; 572 } else { 573 ppr = PRIO(isrvec); 574 } 575 576 vlapic->apic_page->ppr = ppr; 577 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 578 } 579 580 /* 581 * When a vector is asserted in ISR as in-service, the PPR must be raised to the 582 * priority of that vector, as the vCPU would have been at a lower priority in 583 * order for the vector to be accepted. 584 */ 585 static void 586 vlapic_raise_ppr(struct vlapic *vlapic, int vec) 587 { 588 struct LAPIC *lapic = vlapic->apic_page; 589 int ppr; 590 591 ppr = PRIO(vec); 592 593 #ifdef __ISRVEC_DEBUG 594 KASSERT(vec >= 16 && vec < 256, ("invalid vector %d", vec)); 595 KASSERT(ppr > lapic->tpr, ("ppr %x <= tpr %x", ppr, lapic->tpr)); 596 KASSERT(ppr > lapic->ppr, ("ppr %x <= old ppr %x", ppr, lapic->ppr)); 597 KASSERT(vec == (int)vlapic_active_isr(vlapic), ("ISR missing for ppr")); 598 #endif /* __ISRVEC_DEBUG */ 599 600 lapic->ppr = ppr; 601 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 602 } 603 604 void 605 vlapic_sync_tpr(struct vlapic *vlapic) 606 { 607 vlapic_update_ppr(vlapic); 608 } 609 610 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 611 612 static void 613 vlapic_process_eoi(struct vlapic *vlapic) 614 { 615 struct LAPIC *lapic = vlapic->apic_page; 616 uint32_t *isrptr, *tmrptr; 617 int i; 618 uint_t idx, bitpos, vector; 619 620 isrptr = &lapic->isr0; 621 tmrptr = &lapic->tmr0; 622 623 for (i = 7; i >= 0; i--) { 624 idx = i * 4; 625 if (isrptr[idx] != 0) { 626 bitpos = bsrl(isrptr[idx]); 627 vector = i * 32 + bitpos; 628 629 isrptr[idx] &= ~(1 << bitpos); 630 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", 631 vector); 632 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 633 #ifdef __ISRVEC_DEBUG 634 vlapic_isrstk_eoi(vlapic, vector); 635 #endif 636 vlapic_update_ppr(vlapic); 637 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 638 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 639 vector); 640 } 641 return; 642 } 643 } 644 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI"); 645 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 646 } 647 648 static __inline int 649 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 650 { 651 652 return (lvt & mask); 653 } 654 655 static __inline int 656 vlapic_periodic_timer(struct vlapic *vlapic) 657 { 658 uint32_t lvt; 659 660 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 661 662 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 663 } 664 665 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 666 667 static void 668 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 669 { 670 671 vlapic->esr_pending |= mask; 672 673 /* 674 * Avoid infinite recursion if the error LVT itself is configured with 675 * an illegal vector. 676 */ 677 if (lvt_error) 678 return; 679 680 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 681 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 682 } 683 } 684 685 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 686 687 static void 688 vlapic_fire_timer(struct vlapic *vlapic) 689 { 690 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 691 692 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 693 VLAPIC_CTR0(vlapic, "vlapic timer fired"); 694 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 695 } 696 } 697 698 static VMM_STAT(VLAPIC_INTR_CMC, 699 "corrected machine check interrupts generated by vlapic"); 700 701 void 702 vlapic_fire_cmci(struct vlapic *vlapic) 703 { 704 705 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 706 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 707 } 708 } 709 710 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 711 "lvts triggered"); 712 713 int 714 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 715 { 716 if (!vlapic_enabled(vlapic)) { 717 /* 718 * When the local APIC is global/hardware disabled, 719 * LINT[1:0] pins are configured as INTR and NMI pins, 720 * respectively. 721 */ 722 switch (vector) { 723 case APIC_LVT_LINT0: 724 (void) vm_inject_extint(vlapic->vm, 725 vlapic->vcpuid); 726 break; 727 case APIC_LVT_LINT1: 728 (void) vm_inject_nmi(vlapic->vm, 729 vlapic->vcpuid); 730 break; 731 default: 732 break; 733 } 734 return (0); 735 } 736 737 switch (vector) { 738 case APIC_LVT_LINT0: 739 case APIC_LVT_LINT1: 740 case APIC_LVT_TIMER: 741 case APIC_LVT_ERROR: 742 case APIC_LVT_PMC: 743 case APIC_LVT_THERMAL: 744 case APIC_LVT_CMCI: 745 if (vlapic_fire_lvt(vlapic, vector)) { 746 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 747 LVTS_TRIGGERRED, vector, 1); 748 } 749 break; 750 default: 751 return (EINVAL); 752 } 753 return (0); 754 } 755 756 static void 757 vlapic_callout_reset(struct vlapic *vlapic) 758 { 759 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, 760 vlapic_callout_handler, vlapic, C_ABSOLUTE); 761 } 762 763 static void 764 vlapic_callout_handler(void *arg) 765 { 766 struct vlapic *vlapic = arg; 767 768 VLAPIC_TIMER_LOCK(vlapic); 769 if (callout_pending(&vlapic->callout)) /* callout was reset */ 770 goto done; 771 772 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 773 goto done; 774 775 callout_deactivate(&vlapic->callout); 776 777 vlapic_fire_timer(vlapic); 778 779 if (vlapic_periodic_timer(vlapic)) { 780 /* 781 * Compute the delta between when the timer was supposed to 782 * fire and the present time. We can depend on the fact that 783 * cyclics (which underly these callouts) will never be called 784 * early. 785 */ 786 const hrtime_t now = gethrtime(); 787 const hrtime_t delta = now - vlapic->timer_fire_when; 788 if (delta >= vlapic->timer_period) { 789 /* 790 * If we are so behind that we have missed an entire 791 * timer period, reset the time base rather than 792 * attempting to catch up. 793 */ 794 vlapic->timer_fire_when = now + vlapic->timer_period; 795 } else { 796 vlapic->timer_fire_when += vlapic->timer_period; 797 } 798 vlapic_callout_reset(vlapic); 799 } 800 done: 801 VLAPIC_TIMER_UNLOCK(vlapic); 802 } 803 804 void 805 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 806 { 807 struct LAPIC *lapic = vlapic->apic_page; 808 809 VLAPIC_TIMER_LOCK(vlapic); 810 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 811 lapic->icr_timer); 812 if (vlapic->timer_period != 0) { 813 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; 814 vlapic_callout_reset(vlapic); 815 } else { 816 vlapic->timer_fire_when = 0; 817 callout_stop(&vlapic->callout); 818 } 819 VLAPIC_TIMER_UNLOCK(vlapic); 820 } 821 822 /* 823 * This function populates 'dmask' with the set of vcpus that match the 824 * addressing specified by the (dest, phys, lowprio) tuple. 825 * 826 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 827 * or xAPIC (8-bit) destination field. 828 */ 829 void 830 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 831 bool lowprio, bool x2apic_dest) 832 { 833 struct vlapic *vlapic; 834 uint32_t dfr, ldr, ldest, cluster; 835 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 836 cpuset_t amask; 837 int vcpuid; 838 839 if ((x2apic_dest && dest == 0xffffffff) || 840 (!x2apic_dest && dest == 0xff)) { 841 /* 842 * Broadcast in both logical and physical modes. 843 */ 844 *dmask = vm_active_cpus(vm); 845 return; 846 } 847 848 if (phys) { 849 /* 850 * Physical mode: destination is APIC ID. 851 */ 852 CPU_ZERO(dmask); 853 vcpuid = vm_apicid2vcpuid(vm, dest); 854 amask = vm_active_cpus(vm); 855 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 856 CPU_SET(vcpuid, dmask); 857 } else { 858 /* 859 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 860 * bitmask. This model is only available in the xAPIC mode. 861 */ 862 mda_flat_ldest = dest & 0xff; 863 864 /* 865 * In the "Cluster Model" the MDA is used to identify a 866 * specific cluster and a set of APICs in that cluster. 867 */ 868 if (x2apic_dest) { 869 mda_cluster_id = dest >> 16; 870 mda_cluster_ldest = dest & 0xffff; 871 } else { 872 mda_cluster_id = (dest >> 4) & 0xf; 873 mda_cluster_ldest = dest & 0xf; 874 } 875 876 /* 877 * Logical mode: match each APIC that has a bit set 878 * in its LDR that matches a bit in the ldest. 879 */ 880 CPU_ZERO(dmask); 881 amask = vm_active_cpus(vm); 882 while ((vcpuid = CPU_FFS(&amask)) != 0) { 883 vcpuid--; 884 CPU_CLR(vcpuid, &amask); 885 886 vlapic = vm_lapic(vm, vcpuid); 887 dfr = vlapic->apic_page->dfr; 888 ldr = vlapic->apic_page->ldr; 889 890 if ((dfr & APIC_DFR_MODEL_MASK) == 891 APIC_DFR_MODEL_FLAT) { 892 ldest = ldr >> 24; 893 mda_ldest = mda_flat_ldest; 894 } else if ((dfr & APIC_DFR_MODEL_MASK) == 895 APIC_DFR_MODEL_CLUSTER) { 896 if (vlapic_x2mode(vlapic)) { 897 cluster = ldr >> 16; 898 ldest = ldr & 0xffff; 899 } else { 900 cluster = ldr >> 28; 901 ldest = (ldr >> 24) & 0xf; 902 } 903 if (cluster != mda_cluster_id) 904 continue; 905 mda_ldest = mda_cluster_ldest; 906 } else { 907 /* 908 * Guest has configured a bad logical 909 * model for this vcpu - skip it. 910 */ 911 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 912 "model %x - cannot deliver interrupt", dfr); 913 continue; 914 } 915 916 if ((mda_ldest & ldest) != 0) { 917 CPU_SET(vcpuid, dmask); 918 if (lowprio) 919 break; 920 } 921 } 922 } 923 } 924 925 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 926 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 927 928 static void 929 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 930 { 931 struct LAPIC *lapic = vlapic->apic_page; 932 933 if (lapic->tpr != val) { 934 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed " 935 "from %#x to %#x", lapic->tpr, val); 936 lapic->tpr = val; 937 vlapic_update_ppr(vlapic); 938 } 939 } 940 941 void 942 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 943 { 944 uint8_t tpr; 945 946 if (val & ~0xf) { 947 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 948 return; 949 } 950 951 tpr = val << 4; 952 vlapic_set_tpr(vlapic, tpr); 953 } 954 955 uint64_t 956 vlapic_get_cr8(struct vlapic *vlapic) 957 { 958 const struct LAPIC *lapic = vlapic->apic_page; 959 960 return (lapic->tpr >> 4); 961 } 962 963 void 964 vlapic_icrlo_write_handler(struct vlapic *vlapic) 965 { 966 int i; 967 cpuset_t dmask; 968 uint64_t icrval; 969 uint32_t dest, vec, mode, dsh; 970 struct LAPIC *lapic; 971 972 lapic = vlapic->apic_page; 973 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 974 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 975 976 if (vlapic_x2mode(vlapic)) 977 dest = icrval >> 32; 978 else 979 dest = icrval >> (32 + 24); 980 vec = icrval & APIC_VECTOR_MASK; 981 mode = icrval & APIC_DELMODE_MASK; 982 dsh = icrval & APIC_DEST_MASK; 983 984 if (mode == APIC_DELMODE_FIXED && vec < 16) { 985 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 986 return; 987 } 988 if (mode == APIC_DELMODE_INIT && 989 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { 990 /* No work required to deassert INIT */ 991 return; 992 } 993 if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) && 994 !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) { 995 /* 996 * While Intel makes no mention of restrictions for destination 997 * shorthand when sending INIT or SIPI, AMD requires either a 998 * specific destination or all-excluding self. Common use seems 999 * to be restricted to those two cases. Until handling is in 1000 * place to halt a guest which makes such a frivolous request, 1001 * we will ignore them. 1002 */ 1003 return; 1004 } 1005 1006 switch (dsh) { 1007 case APIC_DEST_DESTFLD: 1008 vlapic_calcdest(vlapic->vm, &dmask, dest, 1009 (icrval & APIC_DESTMODE_LOG) == 0, false, 1010 vlapic_x2mode(vlapic)); 1011 break; 1012 case APIC_DEST_SELF: 1013 CPU_SETOF(vlapic->vcpuid, &dmask); 1014 break; 1015 case APIC_DEST_ALLISELF: 1016 dmask = vm_active_cpus(vlapic->vm); 1017 break; 1018 case APIC_DEST_ALLESELF: 1019 dmask = vm_active_cpus(vlapic->vm); 1020 CPU_CLR(vlapic->vcpuid, &dmask); 1021 break; 1022 default: 1023 /* 1024 * All possible delivery notations are covered above. 1025 * We should never end up here. 1026 */ 1027 panic("unknown delivery shorthand: %x", dsh); 1028 } 1029 1030 while ((i = CPU_FFS(&dmask)) != 0) { 1031 i--; 1032 CPU_CLR(i, &dmask); 1033 switch (mode) { 1034 case APIC_DELMODE_FIXED: 1035 (void) lapic_intr_edge(vlapic->vm, i, vec); 1036 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, 1037 VLAPIC_IPI_SEND, 1); 1038 vmm_stat_incr(vlapic->vm, i, 1039 VLAPIC_IPI_RECV, 1); 1040 break; 1041 case APIC_DELMODE_NMI: 1042 (void) vm_inject_nmi(vlapic->vm, i); 1043 break; 1044 case APIC_DELMODE_INIT: 1045 (void) vm_inject_init(vlapic->vm, i); 1046 break; 1047 case APIC_DELMODE_STARTUP: 1048 (void) vm_inject_sipi(vlapic->vm, i, vec); 1049 break; 1050 case APIC_DELMODE_LOWPRIO: 1051 case APIC_DELMODE_SMI: 1052 default: 1053 /* Unhandled IPI modes (for now) */ 1054 break; 1055 } 1056 } 1057 } 1058 1059 void 1060 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) 1061 { 1062 const int vec = val & 0xff; 1063 1064 /* self-IPI is only exposed via x2APIC */ 1065 ASSERT(vlapic_x2mode(vlapic)); 1066 1067 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1068 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); 1069 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); 1070 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1071 } 1072 1073 int 1074 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1075 { 1076 struct LAPIC *lapic = vlapic->apic_page; 1077 int idx, i, bitpos, vector; 1078 uint32_t *irrptr, val; 1079 1080 if (vlapic->ops.sync_state) { 1081 (*vlapic->ops.sync_state)(vlapic); 1082 } 1083 1084 irrptr = &lapic->irr0; 1085 1086 for (i = 7; i >= 0; i--) { 1087 idx = i * 4; 1088 val = atomic_load_acq_int(&irrptr[idx]); 1089 bitpos = fls(val); 1090 if (bitpos != 0) { 1091 vector = i * 32 + (bitpos - 1); 1092 if (PRIO(vector) > PRIO(lapic->ppr)) { 1093 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1094 if (vecptr != NULL) 1095 *vecptr = vector; 1096 return (1); 1097 } else 1098 break; 1099 } 1100 } 1101 return (0); 1102 } 1103 1104 void 1105 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1106 { 1107 struct LAPIC *lapic = vlapic->apic_page; 1108 uint32_t *irrptr, *isrptr; 1109 int idx; 1110 1111 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector)); 1112 1113 if (vlapic->ops.intr_accepted) 1114 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1115 1116 /* 1117 * clear the ready bit for vector being accepted in irr 1118 * and set the vector as in service in isr. 1119 */ 1120 idx = (vector / 32) * 4; 1121 1122 irrptr = &lapic->irr0; 1123 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1124 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1125 1126 isrptr = &lapic->isr0; 1127 isrptr[idx] |= 1 << (vector % 32); 1128 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1129 1130 /* 1131 * The only way a fresh vector could be accepted into ISR is if it was 1132 * of a higher priority than the current PPR. With that vector now 1133 * in-service, the PPR must be raised. 1134 */ 1135 vlapic_raise_ppr(vlapic, vector); 1136 1137 #ifdef __ISRVEC_DEBUG 1138 vlapic_isrstk_accept(vlapic, vector); 1139 #endif 1140 } 1141 1142 void 1143 vlapic_svr_write_handler(struct vlapic *vlapic) 1144 { 1145 struct LAPIC *lapic; 1146 uint32_t old, new, changed; 1147 1148 lapic = vlapic->apic_page; 1149 1150 new = lapic->svr; 1151 old = vlapic->svr_last; 1152 vlapic->svr_last = new; 1153 1154 changed = old ^ new; 1155 if ((changed & APIC_SVR_ENABLE) != 0) { 1156 if ((new & APIC_SVR_ENABLE) == 0) { 1157 /* 1158 * The apic is now disabled so stop the apic timer 1159 * and mask all the LVT entries. 1160 */ 1161 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1162 VLAPIC_TIMER_LOCK(vlapic); 1163 callout_stop(&vlapic->callout); 1164 VLAPIC_TIMER_UNLOCK(vlapic); 1165 vlapic_mask_lvts(vlapic); 1166 } else { 1167 /* 1168 * The apic is now enabled so restart the apic timer 1169 * if it is configured in periodic mode. 1170 */ 1171 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1172 if (vlapic_periodic_timer(vlapic)) 1173 vlapic_icrtmr_write_handler(vlapic); 1174 } 1175 } 1176 } 1177 1178 static bool 1179 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) 1180 { 1181 struct LAPIC *lapic = vlapic->apic_page; 1182 uint32_t *reg; 1183 int i; 1184 1185 ASSERT3U(offset & 0x3, ==, 0); 1186 ASSERT3U(offset, <, PAGESIZE); 1187 ASSERT3P(outp, !=, NULL); 1188 1189 uint32_t data = 0; 1190 switch (offset) { 1191 case APIC_OFFSET_ID: 1192 data = lapic->id; 1193 break; 1194 case APIC_OFFSET_VER: 1195 data = lapic->version; 1196 break; 1197 case APIC_OFFSET_TPR: 1198 data = lapic->tpr; 1199 break; 1200 case APIC_OFFSET_APR: 1201 data = lapic->apr; 1202 break; 1203 case APIC_OFFSET_PPR: 1204 data = lapic->ppr; 1205 break; 1206 case APIC_OFFSET_LDR: 1207 data = lapic->ldr; 1208 break; 1209 case APIC_OFFSET_DFR: 1210 data = lapic->dfr; 1211 break; 1212 case APIC_OFFSET_SVR: 1213 data = lapic->svr; 1214 break; 1215 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1216 i = (offset - APIC_OFFSET_ISR0) >> 2; 1217 reg = &lapic->isr0; 1218 data = *(reg + i); 1219 break; 1220 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1221 i = (offset - APIC_OFFSET_TMR0) >> 2; 1222 reg = &lapic->tmr0; 1223 data = *(reg + i); 1224 break; 1225 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1226 i = (offset - APIC_OFFSET_IRR0) >> 2; 1227 reg = &lapic->irr0; 1228 data = atomic_load_acq_int(reg + i); 1229 break; 1230 case APIC_OFFSET_ESR: 1231 data = lapic->esr; 1232 break; 1233 case APIC_OFFSET_ICR_LOW: 1234 data = lapic->icr_lo; 1235 break; 1236 case APIC_OFFSET_ICR_HI: 1237 data = lapic->icr_hi; 1238 break; 1239 case APIC_OFFSET_CMCI_LVT: 1240 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1241 data = vlapic_get_lvt(vlapic, offset); 1242 #ifdef INVARIANTS 1243 reg = vlapic_get_lvtptr(vlapic, offset); 1244 ASSERT3U(data, ==, *reg); 1245 #endif 1246 break; 1247 case APIC_OFFSET_TIMER_ICR: 1248 data = lapic->icr_timer; 1249 break; 1250 case APIC_OFFSET_TIMER_CCR: 1251 data = vlapic_get_ccr(vlapic); 1252 break; 1253 case APIC_OFFSET_TIMER_DCR: 1254 data = lapic->dcr_timer; 1255 break; 1256 case APIC_OFFSET_RRR: 1257 data = 0; 1258 break; 1259 1260 case APIC_OFFSET_SELF_IPI: 1261 case APIC_OFFSET_EOI: 1262 /* Write-only register */ 1263 *outp = 0; 1264 return (false); 1265 1266 default: 1267 /* Invalid register */ 1268 *outp = 0; 1269 return (false); 1270 } 1271 1272 *outp = data; 1273 return (true); 1274 } 1275 1276 static bool 1277 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) 1278 { 1279 struct LAPIC *lapic = vlapic->apic_page; 1280 uint32_t *regptr; 1281 1282 ASSERT3U(offset & 0xf, ==, 0); 1283 ASSERT3U(offset, <, PAGESIZE); 1284 1285 switch (offset) { 1286 case APIC_OFFSET_ID: 1287 lapic->id = data; 1288 vlapic_id_write_handler(vlapic); 1289 break; 1290 case APIC_OFFSET_TPR: 1291 vlapic_set_tpr(vlapic, data & 0xff); 1292 break; 1293 case APIC_OFFSET_EOI: 1294 vlapic_process_eoi(vlapic); 1295 break; 1296 case APIC_OFFSET_LDR: 1297 lapic->ldr = data; 1298 vlapic_ldr_write_handler(vlapic); 1299 break; 1300 case APIC_OFFSET_DFR: 1301 lapic->dfr = data; 1302 vlapic_dfr_write_handler(vlapic); 1303 break; 1304 case APIC_OFFSET_SVR: 1305 lapic->svr = data; 1306 vlapic_svr_write_handler(vlapic); 1307 break; 1308 case APIC_OFFSET_ICR_LOW: 1309 lapic->icr_lo = data; 1310 vlapic_icrlo_write_handler(vlapic); 1311 break; 1312 case APIC_OFFSET_ICR_HI: 1313 lapic->icr_hi = data; 1314 break; 1315 case APIC_OFFSET_CMCI_LVT: 1316 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1317 regptr = vlapic_get_lvtptr(vlapic, offset); 1318 *regptr = data; 1319 vlapic_lvt_write_handler(vlapic, offset); 1320 break; 1321 case APIC_OFFSET_TIMER_ICR: 1322 lapic->icr_timer = data; 1323 vlapic_icrtmr_write_handler(vlapic); 1324 break; 1325 1326 case APIC_OFFSET_TIMER_DCR: 1327 lapic->dcr_timer = data; 1328 vlapic_dcr_write_handler(vlapic); 1329 break; 1330 1331 case APIC_OFFSET_ESR: 1332 vlapic_esr_write_handler(vlapic); 1333 break; 1334 1335 case APIC_OFFSET_SELF_IPI: 1336 if (vlapic_x2mode(vlapic)) 1337 vlapic_self_ipi_handler(vlapic, data); 1338 break; 1339 1340 case APIC_OFFSET_VER: 1341 case APIC_OFFSET_APR: 1342 case APIC_OFFSET_PPR: 1343 case APIC_OFFSET_RRR: 1344 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1345 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1346 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1347 case APIC_OFFSET_TIMER_CCR: 1348 /* Read-only register */ 1349 return (false); 1350 1351 default: 1352 /* Invalid register */ 1353 return (false); 1354 } 1355 1356 return (true); 1357 } 1358 1359 void 1360 vlapic_reset(struct vlapic *vlapic) 1361 { 1362 struct LAPIC *lapic = vlapic->apic_page; 1363 uint32_t *isrptr, *tmrptr, *irrptr; 1364 1365 /* Reset any timer-related state first */ 1366 VLAPIC_TIMER_LOCK(vlapic); 1367 callout_stop(&vlapic->callout); 1368 lapic->icr_timer = 0; 1369 lapic->ccr_timer = 0; 1370 VLAPIC_TIMER_UNLOCK(vlapic); 1371 lapic->dcr_timer = 0; 1372 vlapic_dcr_write_handler(vlapic); 1373 1374 /* 1375 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so 1376 * it is not leftover after the reset. This is performed after the APIC 1377 * timer has been stopped, in case it happened to fire just prior to 1378 * being deactivated. 1379 */ 1380 if (vlapic->ops.sync_state) { 1381 (*vlapic->ops.sync_state)(vlapic); 1382 } 1383 1384 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1385 if (vlapic->vcpuid == 0) 1386 vlapic->msr_apicbase |= APICBASE_BSP; 1387 1388 lapic->id = vlapic_get_id(vlapic); 1389 lapic->version = VLAPIC_VERSION; 1390 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1391 1392 lapic->tpr = 0; 1393 lapic->apr = 0; 1394 lapic->ppr = 0; 1395 1396 #ifdef __ISRVEC_DEBUG 1397 /* With the PPR cleared, the isrvec tracking should be reset too */ 1398 vlapic->isrvec_stk_top = 0; 1399 #endif 1400 1401 lapic->eoi = 0; 1402 lapic->ldr = 0; 1403 lapic->dfr = 0xffffffff; 1404 lapic->svr = APIC_SVR_VECTOR; 1405 vlapic->svr_last = lapic->svr; 1406 1407 isrptr = &lapic->isr0; 1408 tmrptr = &lapic->tmr0; 1409 irrptr = &lapic->irr0; 1410 for (uint_t i = 0; i < 8; i++) { 1411 atomic_store_rel_int(&isrptr[i * 4], 0); 1412 atomic_store_rel_int(&tmrptr[i * 4], 0); 1413 atomic_store_rel_int(&irrptr[i * 4], 0); 1414 } 1415 1416 lapic->esr = 0; 1417 vlapic->esr_pending = 0; 1418 lapic->icr_lo = 0; 1419 lapic->icr_hi = 0; 1420 1421 lapic->lvt_cmci = 0; 1422 lapic->lvt_timer = 0; 1423 lapic->lvt_thermal = 0; 1424 lapic->lvt_pcint = 0; 1425 lapic->lvt_lint0 = 0; 1426 lapic->lvt_lint1 = 0; 1427 lapic->lvt_error = 0; 1428 vlapic_mask_lvts(vlapic); 1429 } 1430 1431 void 1432 vlapic_init(struct vlapic *vlapic) 1433 { 1434 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1435 KASSERT(vlapic->vcpuid >= 0 && 1436 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1437 ("vlapic_init: vcpuid is not initialized")); 1438 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1439 "initialized")); 1440 1441 /* 1442 * If the vlapic is configured in x2apic mode then it will be 1443 * accessed in the critical section via the MSR emulation code. 1444 * 1445 * Therefore the timer mutex must be a spinlock because blockable 1446 * mutexes cannot be acquired in a critical section. 1447 */ 1448 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL); 1449 callout_init(&vlapic->callout, 1); 1450 1451 vlapic_reset(vlapic); 1452 } 1453 1454 void 1455 vlapic_cleanup(struct vlapic *vlapic) 1456 { 1457 callout_drain(&vlapic->callout); 1458 mutex_destroy(&vlapic->timer_lock); 1459 } 1460 1461 int 1462 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, 1463 uint_t size) 1464 { 1465 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1466 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1467 1468 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1469 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1470 *valp = UINT64_MAX; 1471 return (0); 1472 } 1473 1474 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1475 uint32_t raw = 0; 1476 (void) vlapic_read(vlapic, off & ~0xf, &raw); 1477 1478 /* Shift and mask reads which are small and/or unaligned */ 1479 const uint8_t align = off & 0xf; 1480 if (align < 4) { 1481 *valp = (uint64_t)raw << (align * 8); 1482 } else { 1483 *valp = 0; 1484 } 1485 1486 return (0); 1487 } 1488 1489 int 1490 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, 1491 uint_t size) 1492 { 1493 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1494 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1495 1496 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1497 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1498 return (0); 1499 } 1500 1501 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1502 /* Ignore writes which are not 32-bits wide and 16-byte aligned */ 1503 if ((off & 0xf) != 0 || size != 4) { 1504 return (0); 1505 } 1506 1507 (void) vlapic_write(vlapic, off, (uint32_t)val); 1508 return (0); 1509 } 1510 1511 /* Should attempts to change the APIC base address be rejected with a #GP? */ 1512 int vlapic_gp_on_addr_change = 1; 1513 1514 static vm_msr_result_t 1515 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) 1516 { 1517 const uint64_t diff = vlapic->msr_apicbase ^ val; 1518 1519 /* 1520 * Until the LAPIC emulation for switching between xAPIC and x2APIC 1521 * modes is more polished, it will remain off-limits from being altered 1522 * by the guest. 1523 */ 1524 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | 1525 APICBASE_BSP; 1526 if ((diff & reserved_bits) != 0) { 1527 return (VMR_GP); 1528 } 1529 1530 /* We do not presently allow the LAPIC access address to be modified. */ 1531 if ((diff & APICBASE_ADDR_MASK) != 0) { 1532 /* 1533 * Explicitly rebuffing such requests with a #GP is the most 1534 * straightforward way to handle the situation, but certain 1535 * consumers (such as the KVM unit tests) may balk at the 1536 * otherwise unexpected exception. 1537 */ 1538 if (vlapic_gp_on_addr_change) { 1539 return (VMR_GP); 1540 } 1541 1542 /* If silence is required, just ignore the address change. */ 1543 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; 1544 } 1545 1546 vlapic->msr_apicbase = val; 1547 return (VMR_OK); 1548 } 1549 1550 static __inline uint16_t 1551 vlapic_msr_to_regoff(uint32_t msr) 1552 { 1553 ASSERT3U(msr, >=, MSR_APIC_000); 1554 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); 1555 1556 return ((msr - MSR_APIC_000) << 4); 1557 } 1558 1559 bool 1560 vlapic_owned_msr(uint32_t msr) 1561 { 1562 if (msr == MSR_APICBASE) { 1563 return (true); 1564 } 1565 if (msr >= MSR_APIC_000 && 1566 msr < (MSR_APIC_000 + 0x100)) { 1567 return (true); 1568 } 1569 return (false); 1570 } 1571 1572 vm_msr_result_t 1573 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) 1574 { 1575 ASSERT(vlapic_owned_msr(msr)); 1576 ASSERT3P(valp, !=, NULL); 1577 1578 if (msr == MSR_APICBASE) { 1579 *valp = vlapic->msr_apicbase; 1580 return (VMR_OK); 1581 } 1582 1583 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1584 if (!vlapic_x2mode(vlapic)) { 1585 return (VMR_GP); 1586 } 1587 1588 uint64_t out = 0; 1589 const uint16_t reg = vlapic_msr_to_regoff(msr); 1590 switch (reg) { 1591 case APIC_OFFSET_ICR_LOW: { 1592 /* Read from ICR register gets entire (64-bit) value */ 1593 uint32_t low = 0, high = 0; 1594 bool valid; 1595 1596 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); 1597 VERIFY(valid); 1598 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); 1599 VERIFY(valid); 1600 1601 *valp = ((uint64_t)high << 32) | low; 1602 return (VMR_OK); 1603 } 1604 case APIC_OFFSET_ICR_HI: 1605 /* Already covered by ICR_LOW */ 1606 return (VMR_GP); 1607 default: 1608 break; 1609 } 1610 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { 1611 return (VMR_GP); 1612 } 1613 *valp = out; 1614 return (VMR_OK); 1615 } 1616 1617 vm_msr_result_t 1618 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) 1619 { 1620 ASSERT(vlapic_owned_msr(msr)); 1621 1622 if (msr == MSR_APICBASE) { 1623 return (vlapic_set_apicbase(vlapic, val)); 1624 } 1625 1626 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1627 if (!vlapic_x2mode(vlapic)) { 1628 return (VMR_GP); 1629 } 1630 1631 const uint16_t reg = vlapic_msr_to_regoff(msr); 1632 switch (reg) { 1633 case APIC_OFFSET_ICR_LOW: { 1634 /* Write to ICR register sets entire (64-bit) value */ 1635 bool valid; 1636 1637 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); 1638 VERIFY(valid); 1639 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); 1640 VERIFY(valid); 1641 return (VMR_OK); 1642 } 1643 case APIC_OFFSET_ICR_HI: 1644 /* Already covered by ICR_LOW */ 1645 return (VMR_GP); 1646 case APIC_OFFSET_ESR: 1647 /* Only 0 may be written from x2APIC mode */ 1648 if (val != 0) { 1649 return (VMR_GP); 1650 } 1651 break; 1652 default: 1653 break; 1654 } 1655 if (!vlapic_write(vlapic, reg, val)) { 1656 return (VMR_GP); 1657 } 1658 return (VMR_OK); 1659 } 1660 1661 void 1662 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1663 { 1664 struct vlapic *vlapic; 1665 struct LAPIC *lapic; 1666 1667 vlapic = vm_lapic(vm, vcpuid); 1668 1669 if (state == X2APIC_DISABLED) 1670 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1671 else 1672 vlapic->msr_apicbase |= APICBASE_X2APIC; 1673 1674 /* 1675 * Reset the local APIC registers whose values are mode-dependent. 1676 * 1677 * XXX this works because the APIC mode can be changed only at vcpu 1678 * initialization time. 1679 */ 1680 lapic = vlapic->apic_page; 1681 lapic->id = vlapic_get_id(vlapic); 1682 if (vlapic_x2mode(vlapic)) { 1683 lapic->ldr = x2apic_ldr(vlapic); 1684 lapic->dfr = 0; 1685 } else { 1686 lapic->ldr = 0; 1687 lapic->dfr = 0xffffffff; 1688 } 1689 1690 if (state == X2APIC_ENABLED) { 1691 if (vlapic->ops.enable_x2apic_mode) 1692 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1693 } 1694 } 1695 1696 void 1697 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1698 int delmode, int vec) 1699 { 1700 bool lowprio; 1701 int vcpuid; 1702 cpuset_t dmask; 1703 1704 if (delmode != IOART_DELFIXED && 1705 delmode != IOART_DELLOPRI && 1706 delmode != IOART_DELEXINT) { 1707 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1708 return; 1709 } 1710 lowprio = (delmode == IOART_DELLOPRI); 1711 1712 /* 1713 * We don't provide any virtual interrupt redirection hardware so 1714 * all interrupts originating from the ioapic or MSI specify the 1715 * 'dest' in the legacy xAPIC format. 1716 */ 1717 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1718 1719 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1720 vcpuid--; 1721 CPU_CLR(vcpuid, &dmask); 1722 if (delmode == IOART_DELEXINT) { 1723 (void) vm_inject_extint(vm, vcpuid); 1724 } else { 1725 (void) lapic_set_intr(vm, vcpuid, vec, level); 1726 } 1727 } 1728 } 1729 1730 void 1731 vlapic_post_intr(struct vlapic *vlapic, int hostcpu) 1732 { 1733 /* 1734 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1735 * 1736 * This is done by leveraging features like Posted Interrupts (Intel) 1737 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1738 * 1739 * If neither of these features are available then fallback to 1740 * sending an IPI to 'hostcpu'. 1741 */ 1742 if (vlapic->ops.post_intr) 1743 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1744 else 1745 poke_cpu(hostcpu); 1746 } 1747 1748 void 1749 vlapic_localize_resources(struct vlapic *vlapic) 1750 { 1751 vmm_glue_callout_localize(&vlapic->callout); 1752 } 1753 1754 #ifdef __ISRVEC_DEBUG 1755 static void 1756 vlapic_isrstk_eoi(struct vlapic *vlapic, int vector) 1757 { 1758 if (vlapic->isrvec_stk_top <= 0) { 1759 panic("invalid vlapic isrvec_stk_top %d", 1760 vlapic->isrvec_stk_top); 1761 } 1762 vlapic->isrvec_stk_top--; 1763 vlapic_isrstk_verify(vlapic); 1764 } 1765 1766 static void 1767 vlapic_isrstk_accept(struct vlapic *vlapic, int vector) 1768 { 1769 int stk_top; 1770 1771 vlapic->isrvec_stk_top++; 1772 1773 stk_top = vlapic->isrvec_stk_top; 1774 if (stk_top >= ISRVEC_STK_SIZE) 1775 panic("isrvec_stk_top overflow %d", stk_top); 1776 1777 vlapic->isrvec_stk[stk_top] = vector; 1778 vlapic_isrstk_verify(vlapic); 1779 } 1780 1781 static void 1782 vlapic_isrstk_dump(const struct vlapic *vlapic) 1783 { 1784 int i; 1785 uint32_t *isrptr; 1786 1787 isrptr = &vlapic->apic_page->isr0; 1788 for (i = 0; i < 8; i++) 1789 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 1790 1791 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 1792 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 1793 } 1794 1795 static void 1796 vlapic_isrstk_verify(const struct vlapic *vlapic) 1797 { 1798 int i, lastprio, curprio, vector, idx; 1799 uint32_t *isrptr; 1800 1801 /* 1802 * Note: The value at index 0 in isrvec_stk is always 0. 1803 * 1804 * It is a placeholder for the value of ISR vector when no bits are set 1805 * in the ISRx registers. 1806 */ 1807 if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) { 1808 panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]); 1809 } 1810 1811 /* 1812 * Make sure that the priority of the nested interrupts is 1813 * always increasing. 1814 */ 1815 lastprio = -1; 1816 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 1817 curprio = PRIO(vlapic->isrvec_stk[i]); 1818 if (curprio <= lastprio) { 1819 vlapic_isrstk_dump(vlapic); 1820 panic("isrvec_stk does not satisfy invariant"); 1821 } 1822 lastprio = curprio; 1823 } 1824 1825 /* 1826 * Make sure that each bit set in the ISRx registers has a 1827 * corresponding entry on the isrvec stack. 1828 */ 1829 i = 1; 1830 isrptr = &vlapic->apic_page->isr0; 1831 for (vector = 0; vector < 256; vector++) { 1832 idx = (vector / 32) * 4; 1833 if (isrptr[idx] & (1 << (vector % 32))) { 1834 if (i > vlapic->isrvec_stk_top || 1835 vlapic->isrvec_stk[i] != vector) { 1836 vlapic_isrstk_dump(vlapic); 1837 panic("ISR and isrvec_stk out of sync"); 1838 } 1839 i++; 1840 } 1841 } 1842 } 1843 #endif 1844