1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * Copyright (c) 2019 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 /* 30 * This file and its contents are supplied under the terms of the 31 * Common Development and Distribution License ("CDDL"), version 1.0. 32 * You may only use this file in accordance with the terms of version 33 * 1.0 of the CDDL. 34 * 35 * A full copy of the text of the CDDL should have accompanied this 36 * source. A copy of the CDDL is also available via the Internet at 37 * http://www.illumos.org/license/CDDL. 38 * 39 * Copyright 2014 Pluribus Networks Inc. 40 * Copyright 2018 Joyent, Inc. 41 * Copyright 2024 Oxide Computer Company 42 */ 43 44 #include <sys/cdefs.h> 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/kmem.h> 49 #include <sys/mutex.h> 50 #include <sys/systm.h> 51 #include <sys/cpuset.h> 52 53 #include <x86/specialreg.h> 54 #include <x86/apicreg.h> 55 56 #include <machine/clock.h> 57 58 #include <machine/vmm.h> 59 #include <sys/vmm_kernel.h> 60 61 #include "vmm_lapic.h" 62 #include "vmm_stat.h" 63 64 #include "vlapic.h" 65 #include "vlapic_priv.h" 66 #include "vioapic.h" 67 68 69 /* 70 * The 4 high bits of a given interrupt vector represent its priority. The same 71 * is true for the contents of the TPR when it is used to calculate the ultimate 72 * PPR of an APIC - the 4 high bits hold the priority. 73 */ 74 #define PRIO(x) ((x) & 0xf0) 75 76 #define VLAPIC_VERSION (0x14) 77 78 /* 79 * The 'vlapic->timer_lock' is used to provide mutual exclusion between the 80 * vlapic_callout_handler() and vcpu accesses to: 81 * - timer_freq_bt, timer_period_bt, timer_fire_bt 82 * - timer LVT register 83 */ 84 #define VLAPIC_TIMER_LOCK(vlapic) mutex_enter(&((vlapic)->timer_lock)) 85 #define VLAPIC_TIMER_UNLOCK(vlapic) mutex_exit(&((vlapic)->timer_lock)) 86 #define VLAPIC_TIMER_LOCKED(vlapic) MUTEX_HELD(&((vlapic)->timer_lock)) 87 88 /* 89 * APIC timer frequency: 90 * - arbitrary but chosen to be in the ballpark of contemporary hardware. 91 * - power-of-two to avoid loss of precision when calculating times 92 */ 93 #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) 94 95 #define APICBASE_ADDR_MASK 0xfffffffffffff000UL 96 97 #define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \ 98 APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \ 99 APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \ 100 APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER) 101 102 static void vlapic_set_error(struct vlapic *, uint32_t, bool); 103 static void vlapic_callout_handler(void *arg); 104 105 static __inline bool 106 vlapic_x2mode(const struct vlapic *vlapic) 107 { 108 return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); 109 } 110 111 static __inline bool 112 vlapic_hw_disabled(const struct vlapic *vlapic) 113 { 114 return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); 115 } 116 117 static __inline bool 118 vlapic_sw_disabled(const struct vlapic *vlapic) 119 { 120 const struct LAPIC *lapic = vlapic->apic_page; 121 122 return ((lapic->svr & APIC_SVR_ENABLE) == 0); 123 } 124 125 static __inline bool 126 vlapic_enabled(const struct vlapic *vlapic) 127 { 128 return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); 129 } 130 131 static __inline uint32_t 132 vlapic_get_id(const struct vlapic *vlapic) 133 { 134 135 if (vlapic_x2mode(vlapic)) 136 return (vlapic->vcpuid); 137 else 138 return (vlapic->vcpuid << 24); 139 } 140 141 static uint32_t 142 x2apic_ldr(const struct vlapic *vlapic) 143 { 144 int apicid; 145 uint32_t ldr; 146 147 apicid = vlapic_get_id(vlapic); 148 ldr = 1 << (apicid & 0xf); 149 ldr |= (apicid & 0xffff0) << 12; 150 return (ldr); 151 } 152 153 void 154 vlapic_dfr_write_handler(struct vlapic *vlapic) 155 { 156 struct LAPIC *lapic; 157 158 lapic = vlapic->apic_page; 159 if (vlapic_x2mode(vlapic)) { 160 /* Ignore write to DFR in x2APIC mode */ 161 lapic->dfr = 0; 162 return; 163 } 164 165 lapic->dfr &= APIC_DFR_MODEL_MASK; 166 lapic->dfr |= APIC_DFR_RESERVED; 167 } 168 169 void 170 vlapic_ldr_write_handler(struct vlapic *vlapic) 171 { 172 struct LAPIC *lapic; 173 174 lapic = vlapic->apic_page; 175 176 /* LDR is read-only in x2apic mode */ 177 if (vlapic_x2mode(vlapic)) { 178 /* Ignore write to LDR in x2APIC mode */ 179 lapic->ldr = x2apic_ldr(vlapic); 180 } else { 181 lapic->ldr &= ~APIC_LDR_RESERVED; 182 } 183 } 184 185 void 186 vlapic_id_write_handler(struct vlapic *vlapic) 187 { 188 struct LAPIC *lapic; 189 190 /* 191 * We don't allow the ID register to be modified so reset it back to 192 * its default value. 193 */ 194 lapic = vlapic->apic_page; 195 lapic->id = vlapic_get_id(vlapic); 196 } 197 198 static int 199 vlapic_timer_divisor(uint32_t dcr) 200 { 201 switch (dcr & 0xB) { 202 case APIC_TDCR_1: 203 return (1); 204 case APIC_TDCR_2: 205 return (2); 206 case APIC_TDCR_4: 207 return (4); 208 case APIC_TDCR_8: 209 return (8); 210 case APIC_TDCR_16: 211 return (16); 212 case APIC_TDCR_32: 213 return (32); 214 case APIC_TDCR_64: 215 return (64); 216 case APIC_TDCR_128: 217 return (128); 218 default: 219 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 220 } 221 } 222 223 static uint32_t 224 vlapic_get_ccr(struct vlapic *vlapic) 225 { 226 struct LAPIC *lapic; 227 uint32_t ccr; 228 229 ccr = 0; 230 lapic = vlapic->apic_page; 231 232 VLAPIC_TIMER_LOCK(vlapic); 233 if (callout_active(&vlapic->callout)) { 234 /* 235 * If the timer is scheduled to expire in the future then 236 * compute the value of 'ccr' based on the remaining time. 237 */ 238 239 const hrtime_t now = gethrtime(); 240 if (vlapic->timer_fire_when > now) { 241 ccr += hrt_freq_count(vlapic->timer_fire_when - now, 242 vlapic->timer_cur_freq); 243 } 244 } 245 246 /* 247 * Clamp CCR value to that programmed in ICR - its theoretical maximum. 248 * Normal operation should never result in this being necessary. Only 249 * strange circumstances due to state importation as part of instance 250 * save/restore or live-migration require such wariness. 251 */ 252 if (ccr > lapic->icr_timer) { 253 ccr = lapic->icr_timer; 254 vlapic->stats.vs_clamp_ccr++; 255 } 256 VLAPIC_TIMER_UNLOCK(vlapic); 257 return (ccr); 258 } 259 260 static void 261 vlapic_update_divider(struct vlapic *vlapic) 262 { 263 struct LAPIC *lapic = vlapic->apic_page; 264 265 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 266 267 vlapic->timer_cur_freq = 268 VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer); 269 vlapic->timer_period = 270 hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer); 271 } 272 273 void 274 vlapic_dcr_write_handler(struct vlapic *vlapic) 275 { 276 /* 277 * Update the timer frequency and the timer period. 278 * 279 * XXX changes to the frequency divider will not take effect until 280 * the timer is reloaded. 281 */ 282 VLAPIC_TIMER_LOCK(vlapic); 283 vlapic_update_divider(vlapic); 284 VLAPIC_TIMER_UNLOCK(vlapic); 285 } 286 287 void 288 vlapic_esr_write_handler(struct vlapic *vlapic) 289 { 290 struct LAPIC *lapic; 291 292 lapic = vlapic->apic_page; 293 lapic->esr = vlapic->esr_pending; 294 vlapic->esr_pending = 0; 295 } 296 297 vcpu_notify_t 298 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 299 { 300 struct LAPIC *lapic; 301 uint32_t *irrptr, *tmrptr, mask, tmr; 302 int idx; 303 304 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 305 306 lapic = vlapic->apic_page; 307 if (!(lapic->svr & APIC_SVR_ENABLE)) { 308 /* ignore interrupt on software-disabled APIC */ 309 return (VCPU_NOTIFY_NONE); 310 } 311 312 if (vector < 16) { 313 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR, 314 false); 315 316 /* 317 * If the error LVT is configured to interrupt the vCPU, it will 318 * have delivered a notification through that mechanism. 319 */ 320 return (VCPU_NOTIFY_NONE); 321 } 322 323 if (vlapic->ops.set_intr_ready) { 324 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 325 } 326 327 idx = (vector / 32) * 4; 328 mask = 1 << (vector % 32); 329 tmrptr = &lapic->tmr0; 330 irrptr = &lapic->irr0; 331 332 /* 333 * Update TMR for requested vector, if necessary. 334 * This must be done prior to asserting the bit in IRR so that the 335 * proper TMR state is always visible before the to-be-queued interrupt 336 * can be injected. 337 */ 338 tmr = atomic_load_acq_32(&tmrptr[idx]); 339 if ((tmr & mask) != (level ? mask : 0)) { 340 if (level) { 341 atomic_set_int(&tmrptr[idx], mask); 342 } else { 343 atomic_clear_int(&tmrptr[idx], mask); 344 } 345 } 346 347 /* Now set the bit in IRR */ 348 atomic_set_int(&irrptr[idx], mask); 349 350 return (VCPU_NOTIFY_EXIT); 351 } 352 353 static __inline uint32_t * 354 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 355 { 356 struct LAPIC *lapic = vlapic->apic_page; 357 int i; 358 359 switch (offset) { 360 case APIC_OFFSET_CMCI_LVT: 361 return (&lapic->lvt_cmci); 362 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 363 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 364 return ((&lapic->lvt_timer) + i); 365 default: 366 panic("vlapic_get_lvt: invalid LVT\n"); 367 } 368 } 369 370 static __inline int 371 lvt_off_to_idx(uint32_t offset) 372 { 373 int index; 374 375 switch (offset) { 376 case APIC_OFFSET_CMCI_LVT: 377 index = APIC_LVT_CMCI; 378 break; 379 case APIC_OFFSET_TIMER_LVT: 380 index = APIC_LVT_TIMER; 381 break; 382 case APIC_OFFSET_THERM_LVT: 383 index = APIC_LVT_THERMAL; 384 break; 385 case APIC_OFFSET_PERF_LVT: 386 index = APIC_LVT_PMC; 387 break; 388 case APIC_OFFSET_LINT0_LVT: 389 index = APIC_LVT_LINT0; 390 break; 391 case APIC_OFFSET_LINT1_LVT: 392 index = APIC_LVT_LINT1; 393 break; 394 case APIC_OFFSET_ERROR_LVT: 395 index = APIC_LVT_ERROR; 396 break; 397 default: 398 index = -1; 399 break; 400 } 401 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 402 "invalid lvt index %d for offset %x", index, offset)); 403 404 return (index); 405 } 406 407 static __inline uint32_t 408 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 409 { 410 int idx; 411 uint32_t val; 412 413 idx = lvt_off_to_idx(offset); 414 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 415 return (val); 416 } 417 418 void 419 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 420 { 421 uint32_t *lvtptr, mask, val; 422 struct LAPIC *lapic; 423 int idx; 424 425 lapic = vlapic->apic_page; 426 lvtptr = vlapic_get_lvtptr(vlapic, offset); 427 val = *lvtptr; 428 idx = lvt_off_to_idx(offset); 429 430 if (!(lapic->svr & APIC_SVR_ENABLE)) 431 val |= APIC_LVT_M; 432 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 433 switch (offset) { 434 case APIC_OFFSET_TIMER_LVT: 435 mask |= APIC_LVTT_TM; 436 break; 437 case APIC_OFFSET_ERROR_LVT: 438 break; 439 case APIC_OFFSET_LINT0_LVT: 440 case APIC_OFFSET_LINT1_LVT: 441 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 442 /* FALLTHROUGH */ 443 default: 444 mask |= APIC_LVT_DM; 445 break; 446 } 447 val &= mask; 448 *lvtptr = val; 449 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 450 } 451 452 static void 453 vlapic_refresh_lvts(struct vlapic *vlapic) 454 { 455 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 456 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 457 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 458 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 459 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 460 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 461 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 462 } 463 464 static void 465 vlapic_mask_lvts(struct vlapic *vlapic) 466 { 467 struct LAPIC *lapic = vlapic->apic_page; 468 469 lapic->lvt_cmci |= APIC_LVT_M; 470 lapic->lvt_timer |= APIC_LVT_M; 471 lapic->lvt_thermal |= APIC_LVT_M; 472 lapic->lvt_pcint |= APIC_LVT_M; 473 lapic->lvt_lint0 |= APIC_LVT_M; 474 lapic->lvt_lint1 |= APIC_LVT_M; 475 lapic->lvt_error |= APIC_LVT_M; 476 vlapic_refresh_lvts(vlapic); 477 } 478 479 static int 480 vlapic_fire_lvt(struct vlapic *vlapic, uint_t lvt) 481 { 482 uint32_t mode, reg, vec; 483 vcpu_notify_t notify; 484 485 reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]); 486 487 if (reg & APIC_LVT_M) 488 return (0); 489 vec = reg & APIC_LVT_VECTOR; 490 mode = reg & APIC_LVT_DM; 491 492 switch (mode) { 493 case APIC_LVT_DM_FIXED: 494 if (vec < 16) { 495 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, 496 lvt == APIC_LVT_ERROR); 497 return (0); 498 } 499 notify = vlapic_set_intr_ready(vlapic, vec, false); 500 vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify); 501 break; 502 case APIC_LVT_DM_NMI: 503 (void) vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 504 break; 505 case APIC_LVT_DM_EXTINT: 506 (void) vm_inject_extint(vlapic->vm, vlapic->vcpuid); 507 break; 508 default: 509 // Other modes ignored 510 return (0); 511 } 512 return (1); 513 } 514 515 static uint_t 516 vlapic_active_isr(struct vlapic *vlapic) 517 { 518 int i; 519 uint32_t *isrp; 520 521 isrp = &vlapic->apic_page->isr7; 522 523 for (i = 7; i >= 0; i--, isrp -= 4) { 524 uint32_t reg = *isrp; 525 526 if (reg != 0) { 527 uint_t vec = (i * 32) + bsrl(reg); 528 529 if (vec < 16) { 530 /* 531 * Truncate the illegal low vectors to value of 532 * 0, indicating that no active ISR was found. 533 */ 534 return (0); 535 } 536 return (vec); 537 } 538 } 539 540 return (0); 541 } 542 543 /* 544 * After events which might arbitrarily change the value of PPR, such as a TPR 545 * write or an EOI, calculate that new PPR value and store it in the APIC page. 546 */ 547 static void 548 vlapic_update_ppr(struct vlapic *vlapic) 549 { 550 int isrvec, tpr, ppr; 551 552 isrvec = vlapic_active_isr(vlapic); 553 tpr = vlapic->apic_page->tpr; 554 555 /* 556 * Algorithm adopted from section "Interrupt, Task and Processor 557 * Priority" in Intel Architecture Manual Vol 3a. 558 */ 559 if (PRIO(tpr) >= PRIO(isrvec)) { 560 ppr = tpr; 561 } else { 562 ppr = PRIO(isrvec); 563 } 564 565 vlapic->apic_page->ppr = ppr; 566 } 567 568 /* 569 * When a vector is asserted in ISR as in-service, the PPR must be raised to the 570 * priority of that vector, as the vCPU would have been at a lower priority in 571 * order for the vector to be accepted. 572 */ 573 static void 574 vlapic_raise_ppr(struct vlapic *vlapic, int vec) 575 { 576 struct LAPIC *lapic = vlapic->apic_page; 577 int ppr; 578 579 ppr = PRIO(vec); 580 581 lapic->ppr = ppr; 582 } 583 584 void 585 vlapic_sync_tpr(struct vlapic *vlapic) 586 { 587 vlapic_update_ppr(vlapic); 588 } 589 590 static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); 591 592 static void 593 vlapic_process_eoi(struct vlapic *vlapic) 594 { 595 struct LAPIC *lapic = vlapic->apic_page; 596 uint32_t *isrptr, *tmrptr; 597 int i; 598 uint_t idx, bitpos, vector; 599 600 isrptr = &lapic->isr0; 601 tmrptr = &lapic->tmr0; 602 603 for (i = 7; i >= 0; i--) { 604 idx = i * 4; 605 if (isrptr[idx] != 0) { 606 bitpos = bsrl(isrptr[idx]); 607 vector = i * 32 + bitpos; 608 609 isrptr[idx] &= ~(1 << bitpos); 610 vlapic_update_ppr(vlapic); 611 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 612 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 613 vector); 614 } 615 return; 616 } 617 } 618 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1); 619 } 620 621 static __inline int 622 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 623 { 624 625 return (lvt & mask); 626 } 627 628 static __inline int 629 vlapic_periodic_timer(struct vlapic *vlapic) 630 { 631 uint32_t lvt; 632 633 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 634 635 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 636 } 637 638 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 639 640 static void 641 vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error) 642 { 643 644 vlapic->esr_pending |= mask; 645 646 /* 647 * Avoid infinite recursion if the error LVT itself is configured with 648 * an illegal vector. 649 */ 650 if (lvt_error) 651 return; 652 653 if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) { 654 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 655 } 656 } 657 658 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 659 660 static void 661 vlapic_fire_timer(struct vlapic *vlapic) 662 { 663 ASSERT(VLAPIC_TIMER_LOCKED(vlapic)); 664 665 if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) { 666 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 667 } 668 } 669 670 static VMM_STAT(VLAPIC_INTR_CMC, 671 "corrected machine check interrupts generated by vlapic"); 672 673 void 674 vlapic_fire_cmci(struct vlapic *vlapic) 675 { 676 677 if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) { 678 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 679 } 680 } 681 682 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 683 "lvts triggered"); 684 685 int 686 vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 687 { 688 if (!vlapic_enabled(vlapic)) { 689 /* 690 * When the local APIC is global/hardware disabled, 691 * LINT[1:0] pins are configured as INTR and NMI pins, 692 * respectively. 693 */ 694 switch (vector) { 695 case APIC_LVT_LINT0: 696 (void) vm_inject_extint(vlapic->vm, 697 vlapic->vcpuid); 698 break; 699 case APIC_LVT_LINT1: 700 (void) vm_inject_nmi(vlapic->vm, 701 vlapic->vcpuid); 702 break; 703 default: 704 break; 705 } 706 return (0); 707 } 708 709 switch (vector) { 710 case APIC_LVT_LINT0: 711 case APIC_LVT_LINT1: 712 case APIC_LVT_TIMER: 713 case APIC_LVT_ERROR: 714 case APIC_LVT_PMC: 715 case APIC_LVT_THERMAL: 716 case APIC_LVT_CMCI: 717 if (vlapic_fire_lvt(vlapic, vector)) { 718 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 719 LVTS_TRIGGERRED, vector, 1); 720 } 721 break; 722 default: 723 return (EINVAL); 724 } 725 return (0); 726 } 727 728 static void 729 vlapic_callout_reset(struct vlapic *vlapic) 730 { 731 callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, 732 vlapic_callout_handler, vlapic, C_ABSOLUTE); 733 } 734 735 static void 736 vlapic_callout_handler(void *arg) 737 { 738 struct vlapic *vlapic = arg; 739 740 VLAPIC_TIMER_LOCK(vlapic); 741 if (callout_pending(&vlapic->callout)) /* callout was reset */ 742 goto done; 743 744 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 745 goto done; 746 747 callout_deactivate(&vlapic->callout); 748 749 vlapic_fire_timer(vlapic); 750 751 /* 752 * We should not end up here with timer_period == 0, but to prevent a 753 * runaway periodic timer, it is checked anyways. 754 */ 755 if (vlapic_periodic_timer(vlapic) && vlapic->timer_period != 0) { 756 /* 757 * Compute the delta between when the timer was supposed to 758 * fire and the present time. We can depend on the fact that 759 * cyclics (which underly these callouts) will never be called 760 * early. 761 */ 762 const hrtime_t now = gethrtime(); 763 const hrtime_t delta = now - vlapic->timer_fire_when; 764 if (delta >= vlapic->timer_period) { 765 /* 766 * If we are so behind that we have missed an entire 767 * timer period, reset the time base rather than 768 * attempting to catch up. 769 */ 770 vlapic->timer_fire_when = now + vlapic->timer_period; 771 } else { 772 vlapic->timer_fire_when += vlapic->timer_period; 773 } 774 vlapic_callout_reset(vlapic); 775 } else { 776 /* 777 * Clear the target time so that logic can distinguish from a 778 * timer which has fired (where the value is zero) from one 779 * which is held pending due to the instance being paused (where 780 * the value is non-zero, but the callout is not pending). 781 */ 782 vlapic->timer_fire_when = 0; 783 } 784 done: 785 VLAPIC_TIMER_UNLOCK(vlapic); 786 } 787 788 void 789 vlapic_icrtmr_write_handler(struct vlapic *vlapic) 790 { 791 struct LAPIC *lapic = vlapic->apic_page; 792 793 VLAPIC_TIMER_LOCK(vlapic); 794 vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, 795 lapic->icr_timer); 796 if (vlapic->timer_period != 0) { 797 vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; 798 vlapic_callout_reset(vlapic); 799 } else { 800 vlapic->timer_fire_when = 0; 801 callout_stop(&vlapic->callout); 802 } 803 VLAPIC_TIMER_UNLOCK(vlapic); 804 } 805 806 /* 807 * This function populates 'dmask' with the set of vcpus that match the 808 * addressing specified by the (dest, phys, lowprio) tuple. 809 * 810 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 811 * or xAPIC (8-bit) destination field. 812 */ 813 void 814 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 815 bool lowprio, bool x2apic_dest) 816 { 817 struct vlapic *vlapic; 818 uint32_t dfr, ldr, ldest, cluster; 819 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 820 cpuset_t amask; 821 int vcpuid; 822 823 if ((x2apic_dest && dest == 0xffffffff) || 824 (!x2apic_dest && dest == 0xff)) { 825 /* 826 * Broadcast in both logical and physical modes. 827 */ 828 *dmask = vm_active_cpus(vm); 829 return; 830 } 831 832 if (phys) { 833 /* 834 * Physical mode: destination is APIC ID. 835 */ 836 CPU_ZERO(dmask); 837 vcpuid = vm_apicid2vcpuid(vm, dest); 838 amask = vm_active_cpus(vm); 839 if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) 840 CPU_SET(vcpuid, dmask); 841 } else { 842 /* 843 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 844 * bitmask. This model is only available in the xAPIC mode. 845 */ 846 mda_flat_ldest = dest & 0xff; 847 848 /* 849 * In the "Cluster Model" the MDA is used to identify a 850 * specific cluster and a set of APICs in that cluster. 851 */ 852 if (x2apic_dest) { 853 mda_cluster_id = dest >> 16; 854 mda_cluster_ldest = dest & 0xffff; 855 } else { 856 mda_cluster_id = (dest >> 4) & 0xf; 857 mda_cluster_ldest = dest & 0xf; 858 } 859 860 /* 861 * Logical mode: match each APIC that has a bit set 862 * in its LDR that matches a bit in the ldest. 863 */ 864 CPU_ZERO(dmask); 865 amask = vm_active_cpus(vm); 866 while ((vcpuid = CPU_FFS(&amask)) != 0) { 867 vcpuid--; 868 CPU_CLR(vcpuid, &amask); 869 870 vlapic = vm_lapic(vm, vcpuid); 871 dfr = vlapic->apic_page->dfr; 872 ldr = vlapic->apic_page->ldr; 873 874 if ((dfr & APIC_DFR_MODEL_MASK) == 875 APIC_DFR_MODEL_FLAT) { 876 ldest = ldr >> 24; 877 mda_ldest = mda_flat_ldest; 878 } else if ((dfr & APIC_DFR_MODEL_MASK) == 879 APIC_DFR_MODEL_CLUSTER) { 880 if (vlapic_x2mode(vlapic)) { 881 cluster = ldr >> 16; 882 ldest = ldr & 0xffff; 883 } else { 884 cluster = ldr >> 28; 885 ldest = (ldr >> 24) & 0xf; 886 } 887 if (cluster != mda_cluster_id) 888 continue; 889 mda_ldest = mda_cluster_ldest; 890 } else { 891 /* 892 * Guest has configured a bad logical 893 * model for this vcpu - skip it. 894 */ 895 continue; 896 } 897 898 if ((mda_ldest & ldest) != 0) { 899 CPU_SET(vcpuid, dmask); 900 if (lowprio) 901 break; 902 } 903 } 904 } 905 } 906 907 static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu"); 908 static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu"); 909 910 static void 911 vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) 912 { 913 struct LAPIC *lapic = vlapic->apic_page; 914 915 if (lapic->tpr != val) { 916 lapic->tpr = val; 917 vlapic_update_ppr(vlapic); 918 } 919 } 920 921 void 922 vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) 923 { 924 uint8_t tpr; 925 926 if (val & ~0xf) { 927 vm_inject_gp(vlapic->vm, vlapic->vcpuid); 928 return; 929 } 930 931 tpr = val << 4; 932 vlapic_set_tpr(vlapic, tpr); 933 } 934 935 uint64_t 936 vlapic_get_cr8(const struct vlapic *vlapic) 937 { 938 const struct LAPIC *lapic = vlapic->apic_page; 939 940 return (lapic->tpr >> 4); 941 } 942 943 static bool 944 vlapic_is_icr_valid(uint64_t icrval) 945 { 946 uint32_t mode = icrval & APIC_DELMODE_MASK; 947 uint32_t level = icrval & APIC_LEVEL_MASK; 948 uint32_t trigger = icrval & APIC_TRIGMOD_MASK; 949 uint32_t shorthand = icrval & APIC_DEST_MASK; 950 951 switch (mode) { 952 case APIC_DELMODE_FIXED: 953 if (trigger == APIC_TRIGMOD_EDGE) 954 return (true); 955 /* 956 * AMD allows a level assert IPI and Intel converts a level 957 * assert IPI into an edge IPI. 958 */ 959 if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT) 960 return (true); 961 break; 962 case APIC_DELMODE_LOWPRIO: 963 case APIC_DELMODE_SMI: 964 case APIC_DELMODE_NMI: 965 case APIC_DELMODE_INIT: 966 if (trigger == APIC_TRIGMOD_EDGE && 967 (shorthand == APIC_DEST_DESTFLD || 968 shorthand == APIC_DEST_ALLESELF)) { 969 return (true); 970 } 971 /* 972 * AMD allows a level assert IPI and Intel converts a level 973 * assert IPI into an edge IPI. 974 */ 975 if (trigger == APIC_TRIGMOD_LEVEL && 976 level == APIC_LEVEL_ASSERT && 977 (shorthand == APIC_DEST_DESTFLD || 978 shorthand == APIC_DEST_ALLESELF)) { 979 return (true); 980 } 981 /* 982 * An level triggered deassert INIT is defined in the Intel 983 * Multiprocessor Specification and the Intel Software Developer 984 * Manual. Due to the MPS it's required to send a level assert 985 * INIT to a cpu and then a level deassert INIT. Some operating 986 * systems e.g. FreeBSD or Linux use that algorithm. According 987 * to the SDM a level deassert INIT is only supported by Pentium 988 * and P6 processors. It's always send to all cpus regardless of 989 * the destination or shorthand field. It resets the arbitration 990 * id register. This register is not software accessible and 991 * only required for the APIC bus arbitration. So, the level 992 * deassert INIT doesn't need any emulation and we should ignore 993 * it. The SDM also defines that newer processors don't support 994 * the level deassert INIT and it's not valid any more. As it's 995 * defined for older systems, it can't be invalid per se. 996 * Otherwise, backward compatibility would be broken. However, 997 * when returning false here, it'll be ignored which is the 998 * desired behaviour. 999 */ 1000 if (mode == APIC_DELMODE_INIT && 1001 trigger == APIC_TRIGMOD_LEVEL && 1002 level == APIC_LEVEL_DEASSERT) { 1003 return (false); 1004 } 1005 break; 1006 case APIC_DELMODE_STARTUP: 1007 if (shorthand == APIC_DEST_DESTFLD || 1008 shorthand == APIC_DEST_ALLESELF) { 1009 return (true); 1010 } 1011 break; 1012 case APIC_DELMODE_RR: 1013 /* Only available on AMD! */ 1014 if (trigger == APIC_TRIGMOD_EDGE && 1015 shorthand == APIC_DEST_DESTFLD) { 1016 return (true); 1017 } 1018 break; 1019 case APIC_DELMODE_RESV: 1020 return (false); 1021 default: 1022 panic("vlapic_is_icr_valid: invalid mode 0x%08x", mode); 1023 } 1024 1025 return (false); 1026 } 1027 1028 void 1029 vlapic_icrlo_write_handler(struct vlapic *vlapic) 1030 { 1031 int i; 1032 cpuset_t dmask; 1033 uint64_t icrval; 1034 uint32_t dest, vec, mode, dsh; 1035 struct LAPIC *lapic; 1036 1037 lapic = vlapic->apic_page; 1038 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 1039 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1040 1041 /* 1042 * Ignore invalid combinations of the icr. 1043 */ 1044 if (!vlapic_is_icr_valid(icrval)) 1045 return; 1046 1047 if (vlapic_x2mode(vlapic)) 1048 dest = icrval >> 32; 1049 else 1050 dest = icrval >> (32 + 24); 1051 vec = icrval & APIC_VECTOR_MASK; 1052 mode = icrval & APIC_DELMODE_MASK; 1053 dsh = icrval & APIC_DEST_MASK; 1054 1055 if (mode == APIC_DELMODE_FIXED && vec < 16) { 1056 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); 1057 return; 1058 } 1059 1060 if (mode == APIC_DELMODE_INIT && 1061 (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { 1062 /* No work required to deassert INIT */ 1063 return; 1064 } 1065 1066 switch (dsh) { 1067 case APIC_DEST_DESTFLD: 1068 vlapic_calcdest(vlapic->vm, &dmask, dest, 1069 (icrval & APIC_DESTMODE_LOG) == 0, false, 1070 vlapic_x2mode(vlapic)); 1071 break; 1072 case APIC_DEST_SELF: 1073 CPU_SETOF(vlapic->vcpuid, &dmask); 1074 break; 1075 case APIC_DEST_ALLISELF: 1076 dmask = vm_active_cpus(vlapic->vm); 1077 break; 1078 case APIC_DEST_ALLESELF: 1079 dmask = vm_active_cpus(vlapic->vm); 1080 CPU_CLR(vlapic->vcpuid, &dmask); 1081 break; 1082 default: 1083 /* 1084 * All possible delivery notations are covered above. 1085 * We should never end up here. 1086 */ 1087 panic("unknown delivery shorthand: %x", dsh); 1088 } 1089 1090 while ((i = CPU_FFS(&dmask)) != 0) { 1091 i--; 1092 CPU_CLR(i, &dmask); 1093 switch (mode) { 1094 case APIC_DELMODE_FIXED: 1095 (void) lapic_intr_edge(vlapic->vm, i, vec); 1096 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, 1097 VLAPIC_IPI_SEND, 1); 1098 vmm_stat_incr(vlapic->vm, i, 1099 VLAPIC_IPI_RECV, 1); 1100 break; 1101 case APIC_DELMODE_NMI: 1102 (void) vm_inject_nmi(vlapic->vm, i); 1103 break; 1104 case APIC_DELMODE_INIT: 1105 (void) vm_inject_init(vlapic->vm, i); 1106 break; 1107 case APIC_DELMODE_STARTUP: 1108 (void) vm_inject_sipi(vlapic->vm, i, vec); 1109 break; 1110 case APIC_DELMODE_LOWPRIO: 1111 case APIC_DELMODE_SMI: 1112 default: 1113 /* Unhandled IPI modes (for now) */ 1114 break; 1115 } 1116 } 1117 } 1118 1119 void 1120 vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) 1121 { 1122 const int vec = val & 0xff; 1123 1124 /* self-IPI is only exposed via x2APIC */ 1125 ASSERT(vlapic_x2mode(vlapic)); 1126 1127 (void) lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1128 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); 1129 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); 1130 } 1131 1132 int 1133 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1134 { 1135 struct LAPIC *lapic = vlapic->apic_page; 1136 int idx, i, bitpos, vector; 1137 uint32_t *irrptr, val; 1138 1139 if (vlapic->ops.sync_state) { 1140 (*vlapic->ops.sync_state)(vlapic); 1141 } 1142 1143 irrptr = &lapic->irr0; 1144 1145 for (i = 7; i >= 0; i--) { 1146 idx = i * 4; 1147 val = atomic_load_acq_int(&irrptr[idx]); 1148 bitpos = fls(val); 1149 if (bitpos != 0) { 1150 vector = i * 32 + (bitpos - 1); 1151 if (PRIO(vector) > PRIO(lapic->ppr)) { 1152 if (vecptr != NULL) 1153 *vecptr = vector; 1154 return (1); 1155 } else 1156 break; 1157 } 1158 } 1159 return (0); 1160 } 1161 1162 void 1163 vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1164 { 1165 struct LAPIC *lapic = vlapic->apic_page; 1166 uint32_t *irrptr, *isrptr; 1167 int idx; 1168 1169 KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector)); 1170 1171 if (vlapic->ops.intr_accepted) 1172 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1173 1174 /* 1175 * clear the ready bit for vector being accepted in irr 1176 * and set the vector as in service in isr. 1177 */ 1178 idx = (vector / 32) * 4; 1179 1180 irrptr = &lapic->irr0; 1181 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1182 1183 isrptr = &lapic->isr0; 1184 isrptr[idx] |= 1 << (vector % 32); 1185 1186 /* 1187 * The only way a fresh vector could be accepted into ISR is if it was 1188 * of a higher priority than the current PPR. With that vector now 1189 * in-service, the PPR must be raised. 1190 */ 1191 vlapic_raise_ppr(vlapic, vector); 1192 } 1193 1194 void 1195 vlapic_svr_write_handler(struct vlapic *vlapic) 1196 { 1197 struct LAPIC *lapic; 1198 uint32_t old, new, changed; 1199 1200 lapic = vlapic->apic_page; 1201 1202 new = lapic->svr; 1203 old = vlapic->svr_last; 1204 vlapic->svr_last = new; 1205 1206 changed = old ^ new; 1207 if ((changed & APIC_SVR_ENABLE) != 0) { 1208 if ((new & APIC_SVR_ENABLE) == 0) { 1209 /* 1210 * The apic is now disabled so stop the apic timer 1211 * and mask all the LVT entries. 1212 */ 1213 VLAPIC_TIMER_LOCK(vlapic); 1214 callout_stop(&vlapic->callout); 1215 VLAPIC_TIMER_UNLOCK(vlapic); 1216 vlapic_mask_lvts(vlapic); 1217 } else { 1218 /* 1219 * The apic is now enabled so restart the apic timer 1220 * if it is configured in periodic mode. 1221 */ 1222 if (vlapic_periodic_timer(vlapic)) 1223 vlapic_icrtmr_write_handler(vlapic); 1224 } 1225 } 1226 } 1227 1228 static bool 1229 vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) 1230 { 1231 struct LAPIC *lapic = vlapic->apic_page; 1232 uint32_t *reg; 1233 int i; 1234 1235 ASSERT3U(offset & 0x3, ==, 0); 1236 ASSERT3U(offset, <, PAGESIZE); 1237 ASSERT3P(outp, !=, NULL); 1238 1239 uint32_t data = 0; 1240 switch (offset) { 1241 case APIC_OFFSET_ID: 1242 data = lapic->id; 1243 break; 1244 case APIC_OFFSET_VER: 1245 data = lapic->version; 1246 break; 1247 case APIC_OFFSET_TPR: 1248 data = lapic->tpr; 1249 break; 1250 case APIC_OFFSET_APR: 1251 data = lapic->apr; 1252 break; 1253 case APIC_OFFSET_PPR: 1254 data = lapic->ppr; 1255 break; 1256 case APIC_OFFSET_LDR: 1257 data = lapic->ldr; 1258 break; 1259 case APIC_OFFSET_DFR: 1260 data = lapic->dfr; 1261 break; 1262 case APIC_OFFSET_SVR: 1263 data = lapic->svr; 1264 break; 1265 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1266 i = (offset - APIC_OFFSET_ISR0) >> 2; 1267 reg = &lapic->isr0; 1268 data = *(reg + i); 1269 break; 1270 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1271 i = (offset - APIC_OFFSET_TMR0) >> 2; 1272 reg = &lapic->tmr0; 1273 data = *(reg + i); 1274 break; 1275 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1276 i = (offset - APIC_OFFSET_IRR0) >> 2; 1277 reg = &lapic->irr0; 1278 data = atomic_load_acq_int(reg + i); 1279 break; 1280 case APIC_OFFSET_ESR: 1281 data = lapic->esr; 1282 break; 1283 case APIC_OFFSET_ICR_LOW: 1284 data = lapic->icr_lo; 1285 break; 1286 case APIC_OFFSET_ICR_HI: 1287 data = lapic->icr_hi; 1288 break; 1289 case APIC_OFFSET_CMCI_LVT: 1290 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1291 data = vlapic_get_lvt(vlapic, offset); 1292 #ifdef INVARIANTS 1293 reg = vlapic_get_lvtptr(vlapic, offset); 1294 ASSERT3U(data, ==, *reg); 1295 #endif 1296 break; 1297 case APIC_OFFSET_TIMER_ICR: 1298 data = lapic->icr_timer; 1299 break; 1300 case APIC_OFFSET_TIMER_CCR: 1301 data = vlapic_get_ccr(vlapic); 1302 break; 1303 case APIC_OFFSET_TIMER_DCR: 1304 data = lapic->dcr_timer; 1305 break; 1306 case APIC_OFFSET_RRR: 1307 data = 0; 1308 break; 1309 1310 case APIC_OFFSET_SELF_IPI: 1311 case APIC_OFFSET_EOI: 1312 /* Write-only register */ 1313 *outp = 0; 1314 return (false); 1315 1316 default: 1317 /* Invalid register */ 1318 *outp = 0; 1319 return (false); 1320 } 1321 1322 *outp = data; 1323 return (true); 1324 } 1325 1326 static bool 1327 vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) 1328 { 1329 struct LAPIC *lapic = vlapic->apic_page; 1330 uint32_t *regptr; 1331 1332 ASSERT3U(offset & 0xf, ==, 0); 1333 ASSERT3U(offset, <, PAGESIZE); 1334 1335 switch (offset) { 1336 case APIC_OFFSET_ID: 1337 lapic->id = data; 1338 vlapic_id_write_handler(vlapic); 1339 break; 1340 case APIC_OFFSET_TPR: 1341 vlapic_set_tpr(vlapic, data & 0xff); 1342 break; 1343 case APIC_OFFSET_EOI: 1344 vlapic_process_eoi(vlapic); 1345 break; 1346 case APIC_OFFSET_LDR: 1347 lapic->ldr = data; 1348 vlapic_ldr_write_handler(vlapic); 1349 break; 1350 case APIC_OFFSET_DFR: 1351 lapic->dfr = data; 1352 vlapic_dfr_write_handler(vlapic); 1353 break; 1354 case APIC_OFFSET_SVR: 1355 lapic->svr = data; 1356 vlapic_svr_write_handler(vlapic); 1357 break; 1358 case APIC_OFFSET_ICR_LOW: 1359 lapic->icr_lo = data; 1360 vlapic_icrlo_write_handler(vlapic); 1361 break; 1362 case APIC_OFFSET_ICR_HI: 1363 lapic->icr_hi = data; 1364 break; 1365 case APIC_OFFSET_CMCI_LVT: 1366 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1367 regptr = vlapic_get_lvtptr(vlapic, offset); 1368 *regptr = data; 1369 vlapic_lvt_write_handler(vlapic, offset); 1370 break; 1371 case APIC_OFFSET_TIMER_ICR: 1372 lapic->icr_timer = data; 1373 vlapic_icrtmr_write_handler(vlapic); 1374 break; 1375 1376 case APIC_OFFSET_TIMER_DCR: 1377 lapic->dcr_timer = data; 1378 vlapic_dcr_write_handler(vlapic); 1379 break; 1380 1381 case APIC_OFFSET_ESR: 1382 vlapic_esr_write_handler(vlapic); 1383 break; 1384 1385 case APIC_OFFSET_SELF_IPI: 1386 if (vlapic_x2mode(vlapic)) 1387 vlapic_self_ipi_handler(vlapic, data); 1388 break; 1389 1390 case APIC_OFFSET_VER: 1391 case APIC_OFFSET_APR: 1392 case APIC_OFFSET_PPR: 1393 case APIC_OFFSET_RRR: 1394 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1395 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1396 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1397 case APIC_OFFSET_TIMER_CCR: 1398 /* Read-only register */ 1399 return (false); 1400 1401 default: 1402 /* Invalid register */ 1403 return (false); 1404 } 1405 1406 return (true); 1407 } 1408 1409 void 1410 vlapic_reset(struct vlapic *vlapic) 1411 { 1412 struct LAPIC *lapic = vlapic->apic_page; 1413 uint32_t *isrptr, *tmrptr, *irrptr; 1414 1415 /* Reset any timer-related state first */ 1416 VLAPIC_TIMER_LOCK(vlapic); 1417 callout_stop(&vlapic->callout); 1418 vlapic->timer_fire_when = 0; 1419 lapic->icr_timer = 0; 1420 lapic->ccr_timer = 0; 1421 lapic->dcr_timer = 0; 1422 vlapic_update_divider(vlapic); 1423 VLAPIC_TIMER_UNLOCK(vlapic); 1424 1425 /* 1426 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so 1427 * it is not leftover after the reset. This is performed after the APIC 1428 * timer has been stopped, in case it happened to fire just prior to 1429 * being deactivated. 1430 */ 1431 if (vlapic->ops.sync_state) { 1432 (*vlapic->ops.sync_state)(vlapic); 1433 } 1434 1435 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1436 if (vlapic->vcpuid == 0) 1437 vlapic->msr_apicbase |= APICBASE_BSP; 1438 1439 lapic->id = vlapic_get_id(vlapic); 1440 lapic->version = VLAPIC_VERSION; 1441 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1442 1443 lapic->tpr = 0; 1444 lapic->apr = 0; 1445 lapic->ppr = 0; 1446 1447 lapic->eoi = 0; 1448 lapic->ldr = 0; 1449 lapic->dfr = 0xffffffff; 1450 lapic->svr = APIC_SVR_VECTOR; 1451 vlapic->svr_last = lapic->svr; 1452 1453 isrptr = &lapic->isr0; 1454 tmrptr = &lapic->tmr0; 1455 irrptr = &lapic->irr0; 1456 for (uint_t i = 0; i < 8; i++) { 1457 atomic_store_rel_int(&isrptr[i * 4], 0); 1458 atomic_store_rel_int(&tmrptr[i * 4], 0); 1459 atomic_store_rel_int(&irrptr[i * 4], 0); 1460 } 1461 1462 lapic->esr = 0; 1463 vlapic->esr_pending = 0; 1464 lapic->icr_lo = 0; 1465 lapic->icr_hi = 0; 1466 1467 lapic->lvt_cmci = 0; 1468 lapic->lvt_timer = 0; 1469 lapic->lvt_thermal = 0; 1470 lapic->lvt_pcint = 0; 1471 lapic->lvt_lint0 = 0; 1472 lapic->lvt_lint1 = 0; 1473 lapic->lvt_error = 0; 1474 vlapic_mask_lvts(vlapic); 1475 } 1476 1477 void 1478 vlapic_init(struct vlapic *vlapic) 1479 { 1480 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1481 KASSERT(vlapic->vcpuid >= 0 && 1482 vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), 1483 ("vlapic_init: vcpuid is not initialized")); 1484 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1485 "initialized")); 1486 1487 /* 1488 * If the vlapic is configured in x2apic mode then it will be 1489 * accessed in the critical section via the MSR emulation code. 1490 * 1491 * Therefore the timer mutex must be a spinlock because blockable 1492 * mutexes cannot be acquired in a critical section. 1493 */ 1494 mutex_init(&vlapic->timer_lock, NULL, MUTEX_ADAPTIVE, NULL); 1495 callout_init(&vlapic->callout, 1); 1496 1497 vlapic_reset(vlapic); 1498 } 1499 1500 void 1501 vlapic_cleanup(struct vlapic *vlapic) 1502 { 1503 callout_drain(&vlapic->callout); 1504 mutex_destroy(&vlapic->timer_lock); 1505 } 1506 1507 int 1508 vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, 1509 uint_t size) 1510 { 1511 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1512 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1513 1514 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1515 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1516 *valp = UINT64_MAX; 1517 return (0); 1518 } 1519 1520 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1521 uint32_t raw = 0; 1522 (void) vlapic_read(vlapic, off & ~0xf, &raw); 1523 1524 /* Shift and mask reads which are small and/or unaligned */ 1525 const uint8_t align = off & 0xf; 1526 if (align < 4) { 1527 *valp = (uint64_t)raw << (align * 8); 1528 } else { 1529 *valp = 0; 1530 } 1531 1532 return (0); 1533 } 1534 1535 int 1536 vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, 1537 uint_t size) 1538 { 1539 ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); 1540 ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); 1541 1542 /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ 1543 if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { 1544 return (0); 1545 } 1546 1547 const uint16_t off = gpa - DEFAULT_APIC_BASE; 1548 /* Ignore writes which are not 32-bits wide and 16-byte aligned */ 1549 if ((off & 0xf) != 0 || size != 4) { 1550 return (0); 1551 } 1552 1553 (void) vlapic_write(vlapic, off, (uint32_t)val); 1554 return (0); 1555 } 1556 1557 /* Should attempts to change the APIC base address be rejected with a #GP? */ 1558 int vlapic_gp_on_addr_change = 1; 1559 1560 static vm_msr_result_t 1561 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) 1562 { 1563 const uint64_t diff = vlapic->msr_apicbase ^ val; 1564 1565 /* 1566 * Until the LAPIC emulation for switching between xAPIC and x2APIC 1567 * modes is more polished, it will remain off-limits from being altered 1568 * by the guest. 1569 */ 1570 const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | 1571 APICBASE_BSP; 1572 if ((diff & reserved_bits) != 0) { 1573 return (VMR_GP); 1574 } 1575 1576 /* We do not presently allow the LAPIC access address to be modified. */ 1577 if ((diff & APICBASE_ADDR_MASK) != 0) { 1578 /* 1579 * Explicitly rebuffing such requests with a #GP is the most 1580 * straightforward way to handle the situation, but certain 1581 * consumers (such as the KVM unit tests) may balk at the 1582 * otherwise unexpected exception. 1583 */ 1584 if (vlapic_gp_on_addr_change) { 1585 return (VMR_GP); 1586 } 1587 1588 /* If silence is required, just ignore the address change. */ 1589 val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; 1590 } 1591 1592 vlapic->msr_apicbase = val; 1593 return (VMR_OK); 1594 } 1595 1596 static __inline uint16_t 1597 vlapic_msr_to_regoff(uint32_t msr) 1598 { 1599 ASSERT3U(msr, >=, MSR_APIC_000); 1600 ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); 1601 1602 return ((msr - MSR_APIC_000) << 4); 1603 } 1604 1605 bool 1606 vlapic_owned_msr(uint32_t msr) 1607 { 1608 if (msr == MSR_APICBASE) { 1609 return (true); 1610 } 1611 if (msr >= MSR_APIC_000 && 1612 msr < (MSR_APIC_000 + 0x100)) { 1613 return (true); 1614 } 1615 return (false); 1616 } 1617 1618 vm_msr_result_t 1619 vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) 1620 { 1621 ASSERT(vlapic_owned_msr(msr)); 1622 ASSERT3P(valp, !=, NULL); 1623 1624 if (msr == MSR_APICBASE) { 1625 *valp = vlapic->msr_apicbase; 1626 return (VMR_OK); 1627 } 1628 1629 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1630 if (!vlapic_x2mode(vlapic)) { 1631 return (VMR_GP); 1632 } 1633 1634 uint64_t out = 0; 1635 const uint16_t reg = vlapic_msr_to_regoff(msr); 1636 switch (reg) { 1637 case APIC_OFFSET_ICR_LOW: { 1638 /* Read from ICR register gets entire (64-bit) value */ 1639 uint32_t low = 0, high = 0; 1640 bool valid; 1641 1642 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); 1643 VERIFY(valid); 1644 valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); 1645 VERIFY(valid); 1646 1647 *valp = ((uint64_t)high << 32) | low; 1648 return (VMR_OK); 1649 } 1650 case APIC_OFFSET_ICR_HI: 1651 /* Already covered by ICR_LOW */ 1652 return (VMR_GP); 1653 default: 1654 break; 1655 } 1656 if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { 1657 return (VMR_GP); 1658 } 1659 *valp = out; 1660 return (VMR_OK); 1661 } 1662 1663 vm_msr_result_t 1664 vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) 1665 { 1666 ASSERT(vlapic_owned_msr(msr)); 1667 1668 if (msr == MSR_APICBASE) { 1669 return (vlapic_set_apicbase(vlapic, val)); 1670 } 1671 1672 /* #GP for x2APIC MSR accesses in xAPIC mode */ 1673 if (!vlapic_x2mode(vlapic)) { 1674 return (VMR_GP); 1675 } 1676 1677 const uint16_t reg = vlapic_msr_to_regoff(msr); 1678 switch (reg) { 1679 case APIC_OFFSET_ICR_LOW: { 1680 /* Write to ICR register sets entire (64-bit) value */ 1681 bool valid; 1682 1683 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); 1684 VERIFY(valid); 1685 valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); 1686 VERIFY(valid); 1687 return (VMR_OK); 1688 } 1689 case APIC_OFFSET_ICR_HI: 1690 /* Already covered by ICR_LOW */ 1691 return (VMR_GP); 1692 case APIC_OFFSET_ESR: 1693 /* Only 0 may be written from x2APIC mode */ 1694 if (val != 0) { 1695 return (VMR_GP); 1696 } 1697 break; 1698 default: 1699 break; 1700 } 1701 if (!vlapic_write(vlapic, reg, val)) { 1702 return (VMR_GP); 1703 } 1704 return (VMR_OK); 1705 } 1706 1707 void 1708 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1709 { 1710 struct vlapic *vlapic; 1711 struct LAPIC *lapic; 1712 1713 vlapic = vm_lapic(vm, vcpuid); 1714 1715 if (state == X2APIC_DISABLED) 1716 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1717 else 1718 vlapic->msr_apicbase |= APICBASE_X2APIC; 1719 1720 /* 1721 * Reset the local APIC registers whose values are mode-dependent. 1722 * 1723 * XXX this works because the APIC mode can be changed only at vcpu 1724 * initialization time. 1725 */ 1726 lapic = vlapic->apic_page; 1727 lapic->id = vlapic_get_id(vlapic); 1728 if (vlapic_x2mode(vlapic)) { 1729 lapic->ldr = x2apic_ldr(vlapic); 1730 lapic->dfr = 0; 1731 } else { 1732 lapic->ldr = 0; 1733 lapic->dfr = 0xffffffff; 1734 } 1735 1736 if (state == X2APIC_ENABLED) { 1737 if (vlapic->ops.enable_x2apic_mode) 1738 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1739 } 1740 } 1741 1742 void 1743 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1744 int delmode, int vec) 1745 { 1746 bool lowprio; 1747 int vcpuid; 1748 cpuset_t dmask; 1749 1750 if (delmode != IOART_DELFIXED && 1751 delmode != IOART_DELLOPRI && 1752 delmode != IOART_DELEXINT) { 1753 /* Invalid delivery mode */ 1754 return; 1755 } 1756 lowprio = (delmode == IOART_DELLOPRI); 1757 1758 /* 1759 * We don't provide any virtual interrupt redirection hardware so 1760 * all interrupts originating from the ioapic or MSI specify the 1761 * 'dest' in the legacy xAPIC format. 1762 */ 1763 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1764 1765 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1766 vcpuid--; 1767 CPU_CLR(vcpuid, &dmask); 1768 if (delmode == IOART_DELEXINT) { 1769 (void) vm_inject_extint(vm, vcpuid); 1770 } else { 1771 (void) lapic_set_intr(vm, vcpuid, vec, level); 1772 } 1773 } 1774 } 1775 1776 void 1777 vlapic_post_intr(struct vlapic *vlapic, int hostcpu) 1778 { 1779 /* 1780 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1781 * 1782 * This is done by leveraging features like Posted Interrupts (Intel) 1783 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1784 * 1785 * If neither of these features are available then fallback to 1786 * sending an IPI to 'hostcpu'. 1787 */ 1788 if (vlapic->ops.post_intr) 1789 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1790 else 1791 poke_cpu(hostcpu); 1792 } 1793 1794 void 1795 vlapic_localize_resources(struct vlapic *vlapic) 1796 { 1797 vmm_glue_callout_localize(&vlapic->callout); 1798 } 1799 1800 void 1801 vlapic_pause(struct vlapic *vlapic) 1802 { 1803 VLAPIC_TIMER_LOCK(vlapic); 1804 callout_stop(&vlapic->callout); 1805 VLAPIC_TIMER_UNLOCK(vlapic); 1806 1807 } 1808 1809 void 1810 vlapic_resume(struct vlapic *vlapic) 1811 { 1812 VLAPIC_TIMER_LOCK(vlapic); 1813 if (vlapic->timer_fire_when != 0) { 1814 vlapic_callout_reset(vlapic); 1815 } 1816 VLAPIC_TIMER_UNLOCK(vlapic); 1817 } 1818 1819 static int 1820 vlapic_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) 1821 { 1822 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1823 VERIFY3U(req->vdr_version, ==, 1); 1824 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1)); 1825 1826 struct vlapic *vlapic = vm_lapic(vm, vcpuid); 1827 struct vdi_lapic_v1 *out = req->vdr_data; 1828 1829 VLAPIC_TIMER_LOCK(vlapic); 1830 1831 if (vlapic->ops.sync_state) { 1832 (*vlapic->ops.sync_state)(vlapic); 1833 } 1834 1835 out->vl_msr_apicbase = vlapic->msr_apicbase; 1836 out->vl_esr_pending = vlapic->esr_pending; 1837 if (vlapic->timer_fire_when != 0) { 1838 out->vl_timer_target = 1839 vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when); 1840 } else { 1841 out->vl_timer_target = 0; 1842 } 1843 1844 const struct LAPIC *lapic = vlapic->apic_page; 1845 struct vdi_lapic_page_v1 *out_page = &out->vl_lapic; 1846 1847 /* 1848 * While this might appear, at first glance, to be missing some fields, 1849 * they are intentionally omitted: 1850 * - PPR: its contents are always generated at runtime 1851 * - EOI: write-only, and contents are ignored after handling 1852 * - RRD: (aka RRR) read-only and always 0 1853 * - CCR: calculated from underlying timer data 1854 */ 1855 out_page->vlp_id = lapic->id; 1856 out_page->vlp_version = lapic->version; 1857 out_page->vlp_tpr = lapic->tpr; 1858 out_page->vlp_apr = lapic->apr; 1859 out_page->vlp_ldr = lapic->ldr; 1860 out_page->vlp_dfr = lapic->dfr; 1861 out_page->vlp_svr = lapic->svr; 1862 out_page->vlp_esr = lapic->esr; 1863 out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 1864 out_page->vlp_icr_timer = lapic->icr_timer; 1865 out_page->vlp_dcr_timer = lapic->dcr_timer; 1866 1867 out_page->vlp_lvt_cmci = lapic->lvt_cmci; 1868 out_page->vlp_lvt_timer = lapic->lvt_timer; 1869 out_page->vlp_lvt_thermal = lapic->lvt_thermal; 1870 out_page->vlp_lvt_pcint = lapic->lvt_pcint; 1871 out_page->vlp_lvt_lint0 = lapic->lvt_lint0; 1872 out_page->vlp_lvt_lint1 = lapic->lvt_lint1; 1873 out_page->vlp_lvt_error = lapic->lvt_error; 1874 1875 const uint32_t *isrptr = &lapic->isr0; 1876 const uint32_t *tmrptr = &lapic->tmr0; 1877 const uint32_t *irrptr = &lapic->irr0; 1878 for (uint_t i = 0; i < 8; i++) { 1879 out_page->vlp_isr[i] = isrptr[i * 4]; 1880 out_page->vlp_tmr[i] = tmrptr[i * 4]; 1881 out_page->vlp_irr[i] = irrptr[i * 4]; 1882 } 1883 VLAPIC_TIMER_UNLOCK(vlapic); 1884 1885 return (0); 1886 } 1887 1888 static uint8_t 1889 popc8(uint8_t val) 1890 { 1891 uint8_t cnt; 1892 1893 for (cnt = 0; val != 0; val &= (val - 1)) { 1894 cnt++; 1895 } 1896 return (cnt); 1897 } 1898 1899 /* 1900 * Descriptions for the various failures which can occur when validating 1901 * to-be-written vlapic state. 1902 */ 1903 enum vlapic_validation_error { 1904 VVE_OK, 1905 VVE_BAD_ID, 1906 VVE_BAD_VERSION, 1907 VVE_BAD_MSR_BASE, 1908 VVE_BAD_ESR, 1909 VVE_BAD_TPR, 1910 VVE_LOW_VECTOR, 1911 VVE_ISR_PRIORITY, 1912 VVE_TIMER_MISMATCH, 1913 }; 1914 1915 static enum vlapic_validation_error 1916 vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req) 1917 { 1918 ASSERT(req->vdr_version == 1 && 1919 req->vdr_len >= sizeof (struct vdi_lapic_v1)); 1920 const struct vdi_lapic_v1 *src = req->vdr_data; 1921 1922 if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 || 1923 (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) { 1924 return (VVE_BAD_ESR); 1925 } 1926 1927 /* Use the same restrictions as the wrmsr accessor for now */ 1928 const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC | 1929 APICBASE_BSP; 1930 const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase; 1931 if ((diff & apicbase_reserved) != 0) { 1932 return (VVE_BAD_MSR_BASE); 1933 } 1934 1935 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1936 /* 1937 * Demand that ID match for now. This can be further updated when some 1938 * of the x2apic handling is improved. 1939 */ 1940 if (page->vlp_id != vlapic_get_id(vlapic)) { 1941 return (VVE_BAD_ID); 1942 } 1943 1944 if (page->vlp_version != vlapic->apic_page->version) { 1945 return (VVE_BAD_VERSION); 1946 } 1947 1948 if (page->vlp_tpr > 0xff) { 1949 return (VVE_BAD_TPR); 1950 } 1951 1952 /* Vectors 0-15 are not expected to be handled by the lapic */ 1953 if ((page->vlp_isr[0] & 0xffff) != 0 || 1954 (page->vlp_irr[0] & 0xffff) != 0 || 1955 (page->vlp_tmr[0] & 0xffff) != 0) { 1956 return (VVE_LOW_VECTOR); 1957 } 1958 1959 /* Only one interrupt should be in-service for each priority level */ 1960 for (uint_t i = 0; i < 8; i++) { 1961 if (popc8((uint8_t)page->vlp_isr[i]) > 1 || 1962 popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 || 1963 popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 || 1964 popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) { 1965 return (VVE_ISR_PRIORITY); 1966 } 1967 } 1968 1969 /* If icr_timer is zero, then a scheduled timer does not make sense */ 1970 if (page->vlp_icr_timer == 0 && src->vl_timer_target != 0) { 1971 return (VVE_TIMER_MISMATCH); 1972 } 1973 1974 return (VVE_OK); 1975 } 1976 1977 static int 1978 vlapic_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req) 1979 { 1980 VERIFY3U(req->vdr_class, ==, VDC_LAPIC); 1981 VERIFY3U(req->vdr_version, ==, 1); 1982 VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_lapic_v1)); 1983 1984 struct vlapic *vlapic = vm_lapic(vm, vcpuid); 1985 if (vlapic_data_validate(vlapic, req) != VVE_OK) { 1986 return (EINVAL); 1987 } 1988 const struct vdi_lapic_v1 *src = req->vdr_data; 1989 const struct vdi_lapic_page_v1 *page = &src->vl_lapic; 1990 struct LAPIC *lapic = vlapic->apic_page; 1991 1992 VLAPIC_TIMER_LOCK(vlapic); 1993 1994 /* Already ensured by vlapic_data_validate() */ 1995 VERIFY3U(page->vlp_version, ==, lapic->version); 1996 1997 vlapic->msr_apicbase = src->vl_msr_apicbase; 1998 vlapic->esr_pending = src->vl_esr_pending; 1999 2000 lapic->tpr = page->vlp_tpr; 2001 lapic->apr = page->vlp_apr; 2002 lapic->ldr = page->vlp_ldr; 2003 lapic->dfr = page->vlp_dfr; 2004 lapic->svr = page->vlp_svr; 2005 lapic->esr = page->vlp_esr; 2006 lapic->icr_lo = (uint32_t)page->vlp_icr; 2007 lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32); 2008 2009 lapic->icr_timer = page->vlp_icr_timer; 2010 lapic->dcr_timer = page->vlp_dcr_timer; 2011 vlapic_update_divider(vlapic); 2012 2013 /* cleanse LDR/DFR */ 2014 vlapic_ldr_write_handler(vlapic); 2015 vlapic_dfr_write_handler(vlapic); 2016 2017 lapic->lvt_cmci = page->vlp_lvt_cmci; 2018 lapic->lvt_timer = page->vlp_lvt_timer; 2019 lapic->lvt_thermal = page->vlp_lvt_thermal; 2020 lapic->lvt_pcint = page->vlp_lvt_pcint; 2021 lapic->lvt_lint0 = page->vlp_lvt_lint0; 2022 lapic->lvt_lint1 = page->vlp_lvt_lint1; 2023 lapic->lvt_error = page->vlp_lvt_error; 2024 /* cleanse LVTs */ 2025 vlapic_refresh_lvts(vlapic); 2026 2027 uint32_t *isrptr = &lapic->isr0; 2028 uint32_t *tmrptr = &lapic->tmr0; 2029 uint32_t *irrptr = &lapic->irr0; 2030 for (uint_t i = 0; i < 8; i++) { 2031 isrptr[i * 4] = page->vlp_isr[i]; 2032 tmrptr[i * 4] = page->vlp_tmr[i]; 2033 irrptr[i * 4] = page->vlp_irr[i]; 2034 } 2035 2036 if (src->vl_timer_target != 0) { 2037 vlapic->timer_fire_when = 2038 vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target); 2039 2040 /* 2041 * Check to see if timer expiration would result computed CCR 2042 * values in excess of what is configured in ICR/DCR. 2043 */ 2044 const hrtime_t now = gethrtime(); 2045 if (vlapic->timer_fire_when > now) { 2046 const uint32_t ccr = hrt_freq_count( 2047 vlapic->timer_fire_when - now, 2048 vlapic->timer_cur_freq); 2049 2050 /* 2051 * Until we have a richer event/logging system 2052 * available, just note such an overage as a stat. 2053 */ 2054 if (ccr > lapic->icr_timer) { 2055 vlapic->stats.vs_import_timer_overage++; 2056 } 2057 } 2058 2059 if (!vm_is_paused(vlapic->vm)) { 2060 vlapic_callout_reset(vlapic); 2061 } 2062 } else { 2063 vlapic->timer_fire_when = 0; 2064 } 2065 2066 if (vlapic->ops.sync_state) { 2067 (*vlapic->ops.sync_state)(vlapic); 2068 } 2069 VLAPIC_TIMER_UNLOCK(vlapic); 2070 2071 return (0); 2072 } 2073 2074 static const vmm_data_version_entry_t lapic_v1 = { 2075 .vdve_class = VDC_LAPIC, 2076 .vdve_version = 1, 2077 .vdve_len_expect = sizeof (struct vdi_lapic_v1), 2078 .vdve_vcpu_readf = vlapic_data_read, 2079 .vdve_vcpu_writef = vlapic_data_write, 2080 }; 2081 VMM_DATA_VERSION(lapic_v1); 2082