1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Local APIC virtualization 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2007 Novell 8 * Copyright (C) 2007 Intel 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 10 * 11 * Authors: 12 * Dor Laor <dor.laor@qumranet.com> 13 * Gregory Haskins <ghaskins@novell.com> 14 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 15 * 16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 17 */ 18 19 #include <linux/kvm_host.h> 20 #include <linux/kvm.h> 21 #include <linux/mm.h> 22 #include <linux/highmem.h> 23 #include <linux/smp.h> 24 #include <linux/hrtimer.h> 25 #include <linux/io.h> 26 #include <linux/export.h> 27 #include <linux/math64.h> 28 #include <linux/slab.h> 29 #include <asm/processor.h> 30 #include <asm/msr.h> 31 #include <asm/page.h> 32 #include <asm/current.h> 33 #include <asm/apicdef.h> 34 #include <asm/delay.h> 35 #include <linux/atomic.h> 36 #include <linux/jump_label.h> 37 #include "kvm_cache_regs.h" 38 #include "irq.h" 39 #include "trace.h" 40 #include "x86.h" 41 #include "cpuid.h" 42 #include "hyperv.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 /* 14 is the version for Xeon and Pentium 8.4.8*/ 56 #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) 57 #define LAPIC_MMIO_LENGTH (1 << 12) 58 /* followed define is not in apicdef.h */ 59 #define MAX_APIC_VECTOR 256 60 #define APIC_VECTORS_PER_REG 32 61 62 #define APIC_BROADCAST 0xFF 63 #define X2APIC_BROADCAST 0xFFFFFFFFul 64 65 static bool lapic_timer_advance_dynamic __read_mostly; 66 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ 67 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ 68 #define LAPIC_TIMER_ADVANCE_NS_INIT 1000 69 #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 70 /* step-by-step approximation to mitigate fluctuation */ 71 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 72 73 static inline int apic_test_vector(int vec, void *bitmap) 74 { 75 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 76 } 77 78 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 79 { 80 struct kvm_lapic *apic = vcpu->arch.apic; 81 82 return apic_test_vector(vector, apic->regs + APIC_ISR) || 83 apic_test_vector(vector, apic->regs + APIC_IRR); 84 } 85 86 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 87 { 88 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 89 } 90 91 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 92 { 93 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 94 } 95 96 struct static_key_deferred apic_hw_disabled __read_mostly; 97 struct static_key_deferred apic_sw_disabled __read_mostly; 98 99 static inline int apic_enabled(struct kvm_lapic *apic) 100 { 101 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 102 } 103 104 #define LVT_MASK \ 105 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 106 107 #define LINT_MASK \ 108 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 109 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 110 111 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 112 { 113 return apic->vcpu->vcpu_id; 114 } 115 116 bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) 117 { 118 return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); 119 } 120 EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); 121 122 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) 123 { 124 return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; 125 } 126 127 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, 128 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { 129 switch (map->mode) { 130 case KVM_APIC_MODE_X2APIC: { 131 u32 offset = (dest_id >> 16) * 16; 132 u32 max_apic_id = map->max_apic_id; 133 134 if (offset <= max_apic_id) { 135 u8 cluster_size = min(max_apic_id - offset + 1, 16U); 136 137 offset = array_index_nospec(offset, map->max_apic_id + 1); 138 *cluster = &map->phys_map[offset]; 139 *mask = dest_id & (0xffff >> (16 - cluster_size)); 140 } else { 141 *mask = 0; 142 } 143 144 return true; 145 } 146 case KVM_APIC_MODE_XAPIC_FLAT: 147 *cluster = map->xapic_flat_map; 148 *mask = dest_id & 0xff; 149 return true; 150 case KVM_APIC_MODE_XAPIC_CLUSTER: 151 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; 152 *mask = dest_id & 0xf; 153 return true; 154 default: 155 /* Not optimized. */ 156 return false; 157 } 158 } 159 160 static void kvm_apic_map_free(struct rcu_head *rcu) 161 { 162 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); 163 164 kvfree(map); 165 } 166 167 void kvm_recalculate_apic_map(struct kvm *kvm) 168 { 169 struct kvm_apic_map *new, *old = NULL; 170 struct kvm_vcpu *vcpu; 171 int i; 172 u32 max_id = 255; /* enough space for any xAPIC ID */ 173 174 if (!kvm->arch.apic_map_dirty) { 175 /* 176 * Read kvm->arch.apic_map_dirty before 177 * kvm->arch.apic_map 178 */ 179 smp_rmb(); 180 return; 181 } 182 183 mutex_lock(&kvm->arch.apic_map_lock); 184 if (!kvm->arch.apic_map_dirty) { 185 /* Someone else has updated the map. */ 186 mutex_unlock(&kvm->arch.apic_map_lock); 187 return; 188 } 189 190 kvm_for_each_vcpu(i, vcpu, kvm) 191 if (kvm_apic_present(vcpu)) 192 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); 193 194 new = kvzalloc(sizeof(struct kvm_apic_map) + 195 sizeof(struct kvm_lapic *) * ((u64)max_id + 1), 196 GFP_KERNEL_ACCOUNT); 197 198 if (!new) 199 goto out; 200 201 new->max_apic_id = max_id; 202 203 kvm_for_each_vcpu(i, vcpu, kvm) { 204 struct kvm_lapic *apic = vcpu->arch.apic; 205 struct kvm_lapic **cluster; 206 u16 mask; 207 u32 ldr; 208 u8 xapic_id; 209 u32 x2apic_id; 210 211 if (!kvm_apic_present(vcpu)) 212 continue; 213 214 xapic_id = kvm_xapic_id(apic); 215 x2apic_id = kvm_x2apic_id(apic); 216 217 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ 218 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && 219 x2apic_id <= new->max_apic_id) 220 new->phys_map[x2apic_id] = apic; 221 /* 222 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, 223 * prevent them from masking VCPUs with APIC ID <= 0xff. 224 */ 225 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) 226 new->phys_map[xapic_id] = apic; 227 228 if (!kvm_apic_sw_enabled(apic)) 229 continue; 230 231 ldr = kvm_lapic_get_reg(apic, APIC_LDR); 232 233 if (apic_x2apic_mode(apic)) { 234 new->mode |= KVM_APIC_MODE_X2APIC; 235 } else if (ldr) { 236 ldr = GET_APIC_LOGICAL_ID(ldr); 237 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 238 new->mode |= KVM_APIC_MODE_XAPIC_FLAT; 239 else 240 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; 241 } 242 243 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) 244 continue; 245 246 if (mask) 247 cluster[ffs(mask) - 1] = apic; 248 } 249 out: 250 old = rcu_dereference_protected(kvm->arch.apic_map, 251 lockdep_is_held(&kvm->arch.apic_map_lock)); 252 rcu_assign_pointer(kvm->arch.apic_map, new); 253 /* 254 * Write kvm->arch.apic_map before 255 * clearing apic->apic_map_dirty 256 */ 257 smp_wmb(); 258 kvm->arch.apic_map_dirty = false; 259 mutex_unlock(&kvm->arch.apic_map_lock); 260 261 if (old) 262 call_rcu(&old->rcu, kvm_apic_map_free); 263 264 kvm_make_scan_ioapic_request(kvm); 265 } 266 267 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 268 { 269 bool enabled = val & APIC_SPIV_APIC_ENABLED; 270 271 kvm_lapic_set_reg(apic, APIC_SPIV, val); 272 273 if (enabled != apic->sw_enabled) { 274 apic->sw_enabled = enabled; 275 if (enabled) 276 static_key_slow_dec_deferred(&apic_sw_disabled); 277 else 278 static_key_slow_inc(&apic_sw_disabled.key); 279 280 apic->vcpu->kvm->arch.apic_map_dirty = true; 281 } 282 } 283 284 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) 285 { 286 kvm_lapic_set_reg(apic, APIC_ID, id << 24); 287 apic->vcpu->kvm->arch.apic_map_dirty = true; 288 } 289 290 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 291 { 292 kvm_lapic_set_reg(apic, APIC_LDR, id); 293 apic->vcpu->kvm->arch.apic_map_dirty = true; 294 } 295 296 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 297 { 298 return ((id >> 4) << 16) | (1 << (id & 0xf)); 299 } 300 301 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 302 { 303 u32 ldr = kvm_apic_calc_x2apic_ldr(id); 304 305 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 306 307 kvm_lapic_set_reg(apic, APIC_ID, id); 308 kvm_lapic_set_reg(apic, APIC_LDR, ldr); 309 apic->vcpu->kvm->arch.apic_map_dirty = true; 310 } 311 312 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 313 { 314 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 315 } 316 317 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 318 { 319 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; 320 } 321 322 static inline int apic_lvtt_period(struct kvm_lapic *apic) 323 { 324 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; 325 } 326 327 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 328 { 329 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; 330 } 331 332 static inline int apic_lvt_nmi_mode(u32 lvt_val) 333 { 334 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 335 } 336 337 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 338 { 339 struct kvm_lapic *apic = vcpu->arch.apic; 340 struct kvm_cpuid_entry2 *feat; 341 u32 v = APIC_VERSION; 342 343 if (!lapic_in_kernel(vcpu)) 344 return; 345 346 /* 347 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) 348 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with 349 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC 350 * version first and level-triggered interrupts never get EOIed in 351 * IOAPIC. 352 */ 353 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 354 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && 355 !ioapic_in_kernel(vcpu->kvm)) 356 v |= APIC_LVR_DIRECTED_EOI; 357 kvm_lapic_set_reg(apic, APIC_LVR, v); 358 } 359 360 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = { 361 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 362 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 363 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 364 LINT_MASK, LINT_MASK, /* LVT0-1 */ 365 LVT_MASK /* LVTERR */ 366 }; 367 368 static int find_highest_vector(void *bitmap) 369 { 370 int vec; 371 u32 *reg; 372 373 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 374 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 375 reg = bitmap + REG_POS(vec); 376 if (*reg) 377 return __fls(*reg) + vec; 378 } 379 380 return -1; 381 } 382 383 static u8 count_vectors(void *bitmap) 384 { 385 int vec; 386 u32 *reg; 387 u8 count = 0; 388 389 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 390 reg = bitmap + REG_POS(vec); 391 count += hweight32(*reg); 392 } 393 394 return count; 395 } 396 397 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) 398 { 399 u32 i, vec; 400 u32 pir_val, irr_val, prev_irr_val; 401 int max_updated_irr; 402 403 max_updated_irr = -1; 404 *max_irr = -1; 405 406 for (i = vec = 0; i <= 7; i++, vec += 32) { 407 pir_val = READ_ONCE(pir[i]); 408 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); 409 if (pir_val) { 410 prev_irr_val = irr_val; 411 irr_val |= xchg(&pir[i], 0); 412 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; 413 if (prev_irr_val != irr_val) { 414 max_updated_irr = 415 __fls(irr_val ^ prev_irr_val) + vec; 416 } 417 } 418 if (irr_val) 419 *max_irr = __fls(irr_val) + vec; 420 } 421 422 return ((max_updated_irr != -1) && 423 (max_updated_irr == *max_irr)); 424 } 425 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 426 427 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) 428 { 429 struct kvm_lapic *apic = vcpu->arch.apic; 430 431 return __kvm_apic_update_irr(pir, apic->regs, max_irr); 432 } 433 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 434 435 static inline int apic_search_irr(struct kvm_lapic *apic) 436 { 437 return find_highest_vector(apic->regs + APIC_IRR); 438 } 439 440 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 441 { 442 int result; 443 444 /* 445 * Note that irr_pending is just a hint. It will be always 446 * true with virtual interrupt delivery enabled. 447 */ 448 if (!apic->irr_pending) 449 return -1; 450 451 result = apic_search_irr(apic); 452 ASSERT(result == -1 || result >= 16); 453 454 return result; 455 } 456 457 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 458 { 459 struct kvm_vcpu *vcpu; 460 461 vcpu = apic->vcpu; 462 463 if (unlikely(vcpu->arch.apicv_active)) { 464 /* need to update RVI */ 465 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 466 kvm_x86_ops.hwapic_irr_update(vcpu, 467 apic_find_highest_irr(apic)); 468 } else { 469 apic->irr_pending = false; 470 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 471 if (apic_search_irr(apic) != -1) 472 apic->irr_pending = true; 473 } 474 } 475 476 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 477 { 478 struct kvm_vcpu *vcpu; 479 480 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 481 return; 482 483 vcpu = apic->vcpu; 484 485 /* 486 * With APIC virtualization enabled, all caching is disabled 487 * because the processor can modify ISR under the hood. Instead 488 * just set SVI. 489 */ 490 if (unlikely(vcpu->arch.apicv_active)) 491 kvm_x86_ops.hwapic_isr_update(vcpu, vec); 492 else { 493 ++apic->isr_count; 494 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 495 /* 496 * ISR (in service register) bit is set when injecting an interrupt. 497 * The highest vector is injected. Thus the latest bit set matches 498 * the highest bit in ISR. 499 */ 500 apic->highest_isr_cache = vec; 501 } 502 } 503 504 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 505 { 506 int result; 507 508 /* 509 * Note that isr_count is always 1, and highest_isr_cache 510 * is always -1, with APIC virtualization enabled. 511 */ 512 if (!apic->isr_count) 513 return -1; 514 if (likely(apic->highest_isr_cache != -1)) 515 return apic->highest_isr_cache; 516 517 result = find_highest_vector(apic->regs + APIC_ISR); 518 ASSERT(result == -1 || result >= 16); 519 520 return result; 521 } 522 523 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 524 { 525 struct kvm_vcpu *vcpu; 526 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 527 return; 528 529 vcpu = apic->vcpu; 530 531 /* 532 * We do get here for APIC virtualization enabled if the guest 533 * uses the Hyper-V APIC enlightenment. In this case we may need 534 * to trigger a new interrupt delivery by writing the SVI field; 535 * on the other hand isr_count and highest_isr_cache are unused 536 * and must be left alone. 537 */ 538 if (unlikely(vcpu->arch.apicv_active)) 539 kvm_x86_ops.hwapic_isr_update(vcpu, 540 apic_find_highest_isr(apic)); 541 else { 542 --apic->isr_count; 543 BUG_ON(apic->isr_count < 0); 544 apic->highest_isr_cache = -1; 545 } 546 } 547 548 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 549 { 550 /* This may race with setting of irr in __apic_accept_irq() and 551 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 552 * will cause vmexit immediately and the value will be recalculated 553 * on the next vmentry. 554 */ 555 return apic_find_highest_irr(vcpu->arch.apic); 556 } 557 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 558 559 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 560 int vector, int level, int trig_mode, 561 struct dest_map *dest_map); 562 563 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 564 struct dest_map *dest_map) 565 { 566 struct kvm_lapic *apic = vcpu->arch.apic; 567 568 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 569 irq->level, irq->trig_mode, dest_map); 570 } 571 572 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, 573 struct kvm_lapic_irq *irq, u32 min) 574 { 575 int i, count = 0; 576 struct kvm_vcpu *vcpu; 577 578 if (min > map->max_apic_id) 579 return 0; 580 581 for_each_set_bit(i, ipi_bitmap, 582 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 583 if (map->phys_map[min + i]) { 584 vcpu = map->phys_map[min + i]->vcpu; 585 count += kvm_apic_set_irq(vcpu, irq, NULL); 586 } 587 } 588 589 return count; 590 } 591 592 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 593 unsigned long ipi_bitmap_high, u32 min, 594 unsigned long icr, int op_64_bit) 595 { 596 struct kvm_apic_map *map; 597 struct kvm_lapic_irq irq = {0}; 598 int cluster_size = op_64_bit ? 64 : 32; 599 int count; 600 601 if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) 602 return -KVM_EINVAL; 603 604 irq.vector = icr & APIC_VECTOR_MASK; 605 irq.delivery_mode = icr & APIC_MODE_MASK; 606 irq.level = (icr & APIC_INT_ASSERT) != 0; 607 irq.trig_mode = icr & APIC_INT_LEVELTRIG; 608 609 rcu_read_lock(); 610 map = rcu_dereference(kvm->arch.apic_map); 611 612 count = -EOPNOTSUPP; 613 if (likely(map)) { 614 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); 615 min += cluster_size; 616 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); 617 } 618 619 rcu_read_unlock(); 620 return count; 621 } 622 623 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 624 { 625 626 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 627 sizeof(val)); 628 } 629 630 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 631 { 632 633 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 634 sizeof(*val)); 635 } 636 637 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 638 { 639 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 640 } 641 642 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 643 { 644 u8 val; 645 if (pv_eoi_get_user(vcpu, &val) < 0) { 646 printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", 647 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 648 return false; 649 } 650 return val & 0x1; 651 } 652 653 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 654 { 655 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 656 printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n", 657 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 658 return; 659 } 660 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 661 } 662 663 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 664 { 665 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 666 printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n", 667 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 668 return; 669 } 670 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 671 } 672 673 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 674 { 675 int highest_irr; 676 if (apic->vcpu->arch.apicv_active) 677 highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu); 678 else 679 highest_irr = apic_find_highest_irr(apic); 680 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 681 return -1; 682 return highest_irr; 683 } 684 685 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) 686 { 687 u32 tpr, isrv, ppr, old_ppr; 688 int isr; 689 690 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI); 691 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI); 692 isr = apic_find_highest_isr(apic); 693 isrv = (isr != -1) ? isr : 0; 694 695 if ((tpr & 0xf0) >= (isrv & 0xf0)) 696 ppr = tpr & 0xff; 697 else 698 ppr = isrv & 0xf0; 699 700 *new_ppr = ppr; 701 if (old_ppr != ppr) 702 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); 703 704 return ppr < old_ppr; 705 } 706 707 static void apic_update_ppr(struct kvm_lapic *apic) 708 { 709 u32 ppr; 710 711 if (__apic_update_ppr(apic, &ppr) && 712 apic_has_interrupt_for_ppr(apic, ppr) != -1) 713 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 714 } 715 716 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) 717 { 718 apic_update_ppr(vcpu->arch.apic); 719 } 720 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); 721 722 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 723 { 724 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); 725 apic_update_ppr(apic); 726 } 727 728 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) 729 { 730 return mda == (apic_x2apic_mode(apic) ? 731 X2APIC_BROADCAST : APIC_BROADCAST); 732 } 733 734 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) 735 { 736 if (kvm_apic_broadcast(apic, mda)) 737 return true; 738 739 if (apic_x2apic_mode(apic)) 740 return mda == kvm_x2apic_id(apic); 741 742 /* 743 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if 744 * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and 745 * this allows unique addressing of VCPUs with APIC ID over 0xff. 746 * The 0xff condition is needed because writeable xAPIC ID. 747 */ 748 if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) 749 return true; 750 751 return mda == kvm_xapic_id(apic); 752 } 753 754 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) 755 { 756 u32 logical_id; 757 758 if (kvm_apic_broadcast(apic, mda)) 759 return true; 760 761 logical_id = kvm_lapic_get_reg(apic, APIC_LDR); 762 763 if (apic_x2apic_mode(apic)) 764 return ((logical_id >> 16) == (mda >> 16)) 765 && (logical_id & mda & 0xffff) != 0; 766 767 logical_id = GET_APIC_LOGICAL_ID(logical_id); 768 769 switch (kvm_lapic_get_reg(apic, APIC_DFR)) { 770 case APIC_DFR_FLAT: 771 return (logical_id & mda) != 0; 772 case APIC_DFR_CLUSTER: 773 return ((logical_id >> 4) == (mda >> 4)) 774 && (logical_id & mda & 0xf) != 0; 775 default: 776 return false; 777 } 778 } 779 780 /* The KVM local APIC implementation has two quirks: 781 * 782 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs 783 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. 784 * KVM doesn't do that aliasing. 785 * 786 * - in-kernel IOAPIC messages have to be delivered directly to 787 * x2APIC, because the kernel does not support interrupt remapping. 788 * In order to support broadcast without interrupt remapping, x2APIC 789 * rewrites the destination of non-IPI messages from APIC_BROADCAST 790 * to X2APIC_BROADCAST. 791 * 792 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is 793 * important when userspace wants to use x2APIC-format MSIs, because 794 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". 795 */ 796 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, 797 struct kvm_lapic *source, struct kvm_lapic *target) 798 { 799 bool ipi = source != NULL; 800 801 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && 802 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) 803 return X2APIC_BROADCAST; 804 805 return dest_id; 806 } 807 808 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 809 int shorthand, unsigned int dest, int dest_mode) 810 { 811 struct kvm_lapic *target = vcpu->arch.apic; 812 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 813 814 ASSERT(target); 815 switch (shorthand) { 816 case APIC_DEST_NOSHORT: 817 if (dest_mode == APIC_DEST_PHYSICAL) 818 return kvm_apic_match_physical_addr(target, mda); 819 else 820 return kvm_apic_match_logical_addr(target, mda); 821 case APIC_DEST_SELF: 822 return target == source; 823 case APIC_DEST_ALLINC: 824 return true; 825 case APIC_DEST_ALLBUT: 826 return target != source; 827 default: 828 return false; 829 } 830 } 831 EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 832 833 int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 834 const unsigned long *bitmap, u32 bitmap_size) 835 { 836 u32 mod; 837 int i, idx = -1; 838 839 mod = vector % dest_vcpus; 840 841 for (i = 0; i <= mod; i++) { 842 idx = find_next_bit(bitmap, bitmap_size, idx + 1); 843 BUG_ON(idx == bitmap_size); 844 } 845 846 return idx; 847 } 848 849 static void kvm_apic_disabled_lapic_found(struct kvm *kvm) 850 { 851 if (!kvm->arch.disabled_lapic_found) { 852 kvm->arch.disabled_lapic_found = true; 853 printk(KERN_INFO 854 "Disabled LAPIC found during irq injection\n"); 855 } 856 } 857 858 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, 859 struct kvm_lapic_irq *irq, struct kvm_apic_map *map) 860 { 861 if (kvm->arch.x2apic_broadcast_quirk_disabled) { 862 if ((irq->dest_id == APIC_BROADCAST && 863 map->mode != KVM_APIC_MODE_X2APIC)) 864 return true; 865 if (irq->dest_id == X2APIC_BROADCAST) 866 return true; 867 } else { 868 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); 869 if (irq->dest_id == (x2apic_ipi ? 870 X2APIC_BROADCAST : APIC_BROADCAST)) 871 return true; 872 } 873 874 return false; 875 } 876 877 /* Return true if the interrupt can be handled by using *bitmap as index mask 878 * for valid destinations in *dst array. 879 * Return false if kvm_apic_map_get_dest_lapic did nothing useful. 880 * Note: we may have zero kvm_lapic destinations when we return true, which 881 * means that the interrupt should be dropped. In this case, *bitmap would be 882 * zero and *dst undefined. 883 */ 884 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, 885 struct kvm_lapic **src, struct kvm_lapic_irq *irq, 886 struct kvm_apic_map *map, struct kvm_lapic ***dst, 887 unsigned long *bitmap) 888 { 889 int i, lowest; 890 891 if (irq->shorthand == APIC_DEST_SELF && src) { 892 *dst = src; 893 *bitmap = 1; 894 return true; 895 } else if (irq->shorthand) 896 return false; 897 898 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) 899 return false; 900 901 if (irq->dest_mode == APIC_DEST_PHYSICAL) { 902 if (irq->dest_id > map->max_apic_id) { 903 *bitmap = 0; 904 } else { 905 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); 906 *dst = &map->phys_map[dest_id]; 907 *bitmap = 1; 908 } 909 return true; 910 } 911 912 *bitmap = 0; 913 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, 914 (u16 *)bitmap)) 915 return false; 916 917 if (!kvm_lowest_prio_delivery(irq)) 918 return true; 919 920 if (!kvm_vector_hashing_enabled()) { 921 lowest = -1; 922 for_each_set_bit(i, bitmap, 16) { 923 if (!(*dst)[i]) 924 continue; 925 if (lowest < 0) 926 lowest = i; 927 else if (kvm_apic_compare_prio((*dst)[i]->vcpu, 928 (*dst)[lowest]->vcpu) < 0) 929 lowest = i; 930 } 931 } else { 932 if (!*bitmap) 933 return true; 934 935 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), 936 bitmap, 16); 937 938 if (!(*dst)[lowest]) { 939 kvm_apic_disabled_lapic_found(kvm); 940 *bitmap = 0; 941 return true; 942 } 943 } 944 945 *bitmap = (lowest >= 0) ? 1 << lowest : 0; 946 947 return true; 948 } 949 950 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 951 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 952 { 953 struct kvm_apic_map *map; 954 unsigned long bitmap; 955 struct kvm_lapic **dst = NULL; 956 int i; 957 bool ret; 958 959 *r = -1; 960 961 if (irq->shorthand == APIC_DEST_SELF) { 962 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 963 return true; 964 } 965 966 rcu_read_lock(); 967 map = rcu_dereference(kvm->arch.apic_map); 968 969 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); 970 if (ret) { 971 *r = 0; 972 for_each_set_bit(i, &bitmap, 16) { 973 if (!dst[i]) 974 continue; 975 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 976 } 977 } 978 979 rcu_read_unlock(); 980 return ret; 981 } 982 983 /* 984 * This routine tries to handle interrupts in posted mode, here is how 985 * it deals with different cases: 986 * - For single-destination interrupts, handle it in posted mode 987 * - Else if vector hashing is enabled and it is a lowest-priority 988 * interrupt, handle it in posted mode and use the following mechanism 989 * to find the destination vCPU. 990 * 1. For lowest-priority interrupts, store all the possible 991 * destination vCPUs in an array. 992 * 2. Use "guest vector % max number of destination vCPUs" to find 993 * the right destination vCPU in the array for the lowest-priority 994 * interrupt. 995 * - Otherwise, use remapped mode to inject the interrupt. 996 */ 997 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 998 struct kvm_vcpu **dest_vcpu) 999 { 1000 struct kvm_apic_map *map; 1001 unsigned long bitmap; 1002 struct kvm_lapic **dst = NULL; 1003 bool ret = false; 1004 1005 if (irq->shorthand) 1006 return false; 1007 1008 rcu_read_lock(); 1009 map = rcu_dereference(kvm->arch.apic_map); 1010 1011 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && 1012 hweight16(bitmap) == 1) { 1013 unsigned long i = find_first_bit(&bitmap, 16); 1014 1015 if (dst[i]) { 1016 *dest_vcpu = dst[i]->vcpu; 1017 ret = true; 1018 } 1019 } 1020 1021 rcu_read_unlock(); 1022 return ret; 1023 } 1024 1025 /* 1026 * Add a pending IRQ into lapic. 1027 * Return 1 if successfully added and 0 if discarded. 1028 */ 1029 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1030 int vector, int level, int trig_mode, 1031 struct dest_map *dest_map) 1032 { 1033 int result = 0; 1034 struct kvm_vcpu *vcpu = apic->vcpu; 1035 1036 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 1037 trig_mode, vector); 1038 switch (delivery_mode) { 1039 case APIC_DM_LOWEST: 1040 vcpu->arch.apic_arb_prio++; 1041 /* fall through */ 1042 case APIC_DM_FIXED: 1043 if (unlikely(trig_mode && !level)) 1044 break; 1045 1046 /* FIXME add logic for vcpu on reset */ 1047 if (unlikely(!apic_enabled(apic))) 1048 break; 1049 1050 result = 1; 1051 1052 if (dest_map) { 1053 __set_bit(vcpu->vcpu_id, dest_map->map); 1054 dest_map->vectors[vcpu->vcpu_id] = vector; 1055 } 1056 1057 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1058 if (trig_mode) 1059 kvm_lapic_set_vector(vector, 1060 apic->regs + APIC_TMR); 1061 else 1062 kvm_lapic_clear_vector(vector, 1063 apic->regs + APIC_TMR); 1064 } 1065 1066 if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) { 1067 kvm_lapic_set_irr(vector, apic); 1068 kvm_make_request(KVM_REQ_EVENT, vcpu); 1069 kvm_vcpu_kick(vcpu); 1070 } 1071 break; 1072 1073 case APIC_DM_REMRD: 1074 result = 1; 1075 vcpu->arch.pv.pv_unhalted = 1; 1076 kvm_make_request(KVM_REQ_EVENT, vcpu); 1077 kvm_vcpu_kick(vcpu); 1078 break; 1079 1080 case APIC_DM_SMI: 1081 result = 1; 1082 kvm_make_request(KVM_REQ_SMI, vcpu); 1083 kvm_vcpu_kick(vcpu); 1084 break; 1085 1086 case APIC_DM_NMI: 1087 result = 1; 1088 kvm_inject_nmi(vcpu); 1089 kvm_vcpu_kick(vcpu); 1090 break; 1091 1092 case APIC_DM_INIT: 1093 if (!trig_mode || level) { 1094 result = 1; 1095 /* assumes that there are only KVM_APIC_INIT/SIPI */ 1096 apic->pending_events = (1UL << KVM_APIC_INIT); 1097 kvm_make_request(KVM_REQ_EVENT, vcpu); 1098 kvm_vcpu_kick(vcpu); 1099 } 1100 break; 1101 1102 case APIC_DM_STARTUP: 1103 result = 1; 1104 apic->sipi_vector = vector; 1105 /* make sure sipi_vector is visible for the receiver */ 1106 smp_wmb(); 1107 set_bit(KVM_APIC_SIPI, &apic->pending_events); 1108 kvm_make_request(KVM_REQ_EVENT, vcpu); 1109 kvm_vcpu_kick(vcpu); 1110 break; 1111 1112 case APIC_DM_EXTINT: 1113 /* 1114 * Should only be called by kvm_apic_local_deliver() with LVT0, 1115 * before NMI watchdog was enabled. Already handled by 1116 * kvm_apic_accept_pic_intr(). 1117 */ 1118 break; 1119 1120 default: 1121 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 1122 delivery_mode); 1123 break; 1124 } 1125 return result; 1126 } 1127 1128 /* 1129 * This routine identifies the destination vcpus mask meant to receive the 1130 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find 1131 * out the destination vcpus array and set the bitmap or it traverses to 1132 * each available vcpu to identify the same. 1133 */ 1134 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, 1135 unsigned long *vcpu_bitmap) 1136 { 1137 struct kvm_lapic **dest_vcpu = NULL; 1138 struct kvm_lapic *src = NULL; 1139 struct kvm_apic_map *map; 1140 struct kvm_vcpu *vcpu; 1141 unsigned long bitmap; 1142 int i, vcpu_idx; 1143 bool ret; 1144 1145 rcu_read_lock(); 1146 map = rcu_dereference(kvm->arch.apic_map); 1147 1148 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, 1149 &bitmap); 1150 if (ret) { 1151 for_each_set_bit(i, &bitmap, 16) { 1152 if (!dest_vcpu[i]) 1153 continue; 1154 vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; 1155 __set_bit(vcpu_idx, vcpu_bitmap); 1156 } 1157 } else { 1158 kvm_for_each_vcpu(i, vcpu, kvm) { 1159 if (!kvm_apic_present(vcpu)) 1160 continue; 1161 if (!kvm_apic_match_dest(vcpu, NULL, 1162 irq->shorthand, 1163 irq->dest_id, 1164 irq->dest_mode)) 1165 continue; 1166 __set_bit(i, vcpu_bitmap); 1167 } 1168 } 1169 rcu_read_unlock(); 1170 } 1171 1172 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1173 { 1174 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1175 } 1176 1177 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1178 { 1179 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 1180 } 1181 1182 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 1183 { 1184 int trigger_mode; 1185 1186 /* Eoi the ioapic only if the ioapic doesn't own the vector. */ 1187 if (!kvm_ioapic_handles_vector(apic, vector)) 1188 return; 1189 1190 /* Request a KVM exit to inform the userspace IOAPIC. */ 1191 if (irqchip_split(apic->vcpu->kvm)) { 1192 apic->vcpu->arch.pending_ioapic_eoi = vector; 1193 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); 1194 return; 1195 } 1196 1197 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 1198 trigger_mode = IOAPIC_LEVEL_TRIG; 1199 else 1200 trigger_mode = IOAPIC_EDGE_TRIG; 1201 1202 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 1203 } 1204 1205 static int apic_set_eoi(struct kvm_lapic *apic) 1206 { 1207 int vector = apic_find_highest_isr(apic); 1208 1209 trace_kvm_eoi(apic, vector); 1210 1211 /* 1212 * Not every write EOI will has corresponding ISR, 1213 * one example is when Kernel check timer on setup_IO_APIC 1214 */ 1215 if (vector == -1) 1216 return vector; 1217 1218 apic_clear_isr(vector, apic); 1219 apic_update_ppr(apic); 1220 1221 if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) 1222 kvm_hv_synic_send_eoi(apic->vcpu, vector); 1223 1224 kvm_ioapic_send_eoi(apic, vector); 1225 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1226 return vector; 1227 } 1228 1229 /* 1230 * this interface assumes a trap-like exit, which has already finished 1231 * desired side effect including vISR and vPPR update. 1232 */ 1233 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 1234 { 1235 struct kvm_lapic *apic = vcpu->arch.apic; 1236 1237 trace_kvm_eoi(apic, vector); 1238 1239 kvm_ioapic_send_eoi(apic, vector); 1240 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1241 } 1242 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1243 1244 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) 1245 { 1246 struct kvm_lapic_irq irq; 1247 1248 irq.vector = icr_low & APIC_VECTOR_MASK; 1249 irq.delivery_mode = icr_low & APIC_MODE_MASK; 1250 irq.dest_mode = icr_low & APIC_DEST_MASK; 1251 irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1252 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1253 irq.shorthand = icr_low & APIC_SHORT_MASK; 1254 irq.msi_redir_hint = false; 1255 if (apic_x2apic_mode(apic)) 1256 irq.dest_id = icr_high; 1257 else 1258 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 1259 1260 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1261 1262 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1263 } 1264 1265 static u32 apic_get_tmcct(struct kvm_lapic *apic) 1266 { 1267 ktime_t remaining, now; 1268 s64 ns; 1269 u32 tmcct; 1270 1271 ASSERT(apic != NULL); 1272 1273 /* if initial count is 0, current count should also be 0 */ 1274 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || 1275 apic->lapic_timer.period == 0) 1276 return 0; 1277 1278 now = ktime_get(); 1279 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1280 if (ktime_to_ns(remaining) < 0) 1281 remaining = 0; 1282 1283 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 1284 tmcct = div64_u64(ns, 1285 (APIC_BUS_CYCLE_NS * apic->divide_count)); 1286 1287 return tmcct; 1288 } 1289 1290 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 1291 { 1292 struct kvm_vcpu *vcpu = apic->vcpu; 1293 struct kvm_run *run = vcpu->run; 1294 1295 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 1296 run->tpr_access.rip = kvm_rip_read(vcpu); 1297 run->tpr_access.is_write = write; 1298 } 1299 1300 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 1301 { 1302 if (apic->vcpu->arch.tpr_access_reporting) 1303 __report_tpr_access(apic, write); 1304 } 1305 1306 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 1307 { 1308 u32 val = 0; 1309 1310 if (offset >= LAPIC_MMIO_LENGTH) 1311 return 0; 1312 1313 switch (offset) { 1314 case APIC_ARBPRI: 1315 break; 1316 1317 case APIC_TMCCT: /* Timer CCR */ 1318 if (apic_lvtt_tscdeadline(apic)) 1319 return 0; 1320 1321 val = apic_get_tmcct(apic); 1322 break; 1323 case APIC_PROCPRI: 1324 apic_update_ppr(apic); 1325 val = kvm_lapic_get_reg(apic, offset); 1326 break; 1327 case APIC_TASKPRI: 1328 report_tpr_access(apic, false); 1329 /* fall thru */ 1330 default: 1331 val = kvm_lapic_get_reg(apic, offset); 1332 break; 1333 } 1334 1335 return val; 1336 } 1337 1338 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 1339 { 1340 return container_of(dev, struct kvm_lapic, dev); 1341 } 1342 1343 #define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) 1344 #define APIC_REGS_MASK(first, count) \ 1345 (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) 1346 1347 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1348 void *data) 1349 { 1350 unsigned char alignment = offset & 0xf; 1351 u32 result; 1352 /* this bitmask has a bit cleared for each reserved register */ 1353 u64 valid_reg_mask = 1354 APIC_REG_MASK(APIC_ID) | 1355 APIC_REG_MASK(APIC_LVR) | 1356 APIC_REG_MASK(APIC_TASKPRI) | 1357 APIC_REG_MASK(APIC_PROCPRI) | 1358 APIC_REG_MASK(APIC_LDR) | 1359 APIC_REG_MASK(APIC_DFR) | 1360 APIC_REG_MASK(APIC_SPIV) | 1361 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | 1362 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | 1363 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | 1364 APIC_REG_MASK(APIC_ESR) | 1365 APIC_REG_MASK(APIC_ICR) | 1366 APIC_REG_MASK(APIC_ICR2) | 1367 APIC_REG_MASK(APIC_LVTT) | 1368 APIC_REG_MASK(APIC_LVTTHMR) | 1369 APIC_REG_MASK(APIC_LVTPC) | 1370 APIC_REG_MASK(APIC_LVT0) | 1371 APIC_REG_MASK(APIC_LVT1) | 1372 APIC_REG_MASK(APIC_LVTERR) | 1373 APIC_REG_MASK(APIC_TMICT) | 1374 APIC_REG_MASK(APIC_TMCCT) | 1375 APIC_REG_MASK(APIC_TDCR); 1376 1377 /* ARBPRI is not valid on x2APIC */ 1378 if (!apic_x2apic_mode(apic)) 1379 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); 1380 1381 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) 1382 return 1; 1383 1384 result = __apic_read(apic, offset & ~0xf); 1385 1386 trace_kvm_apic_read(offset, result); 1387 1388 switch (len) { 1389 case 1: 1390 case 2: 1391 case 4: 1392 memcpy(data, (char *)&result + alignment, len); 1393 break; 1394 default: 1395 printk(KERN_ERR "Local APIC read with len = %x, " 1396 "should be 1,2, or 4 instead\n", len); 1397 break; 1398 } 1399 return 0; 1400 } 1401 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); 1402 1403 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1404 { 1405 return addr >= apic->base_address && 1406 addr < apic->base_address + LAPIC_MMIO_LENGTH; 1407 } 1408 1409 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1410 gpa_t address, int len, void *data) 1411 { 1412 struct kvm_lapic *apic = to_lapic(this); 1413 u32 offset = address - apic->base_address; 1414 1415 if (!apic_mmio_in_range(apic, address)) 1416 return -EOPNOTSUPP; 1417 1418 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 1419 if (!kvm_check_has_quirk(vcpu->kvm, 1420 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 1421 return -EOPNOTSUPP; 1422 1423 memset(data, 0xff, len); 1424 return 0; 1425 } 1426 1427 kvm_lapic_reg_read(apic, offset, len, data); 1428 1429 return 0; 1430 } 1431 1432 static void update_divide_count(struct kvm_lapic *apic) 1433 { 1434 u32 tmp1, tmp2, tdcr; 1435 1436 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR); 1437 tmp1 = tdcr & 0xf; 1438 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1439 apic->divide_count = 0x1 << (tmp2 & 0x7); 1440 } 1441 1442 static void limit_periodic_timer_frequency(struct kvm_lapic *apic) 1443 { 1444 /* 1445 * Do not allow the guest to program periodic timers with small 1446 * interval, since the hrtimers are not throttled by the host 1447 * scheduler. 1448 */ 1449 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1450 s64 min_period = min_timer_period_us * 1000LL; 1451 1452 if (apic->lapic_timer.period < min_period) { 1453 pr_info_ratelimited( 1454 "kvm: vcpu %i: requested %lld ns " 1455 "lapic timer period limited to %lld ns\n", 1456 apic->vcpu->vcpu_id, 1457 apic->lapic_timer.period, min_period); 1458 apic->lapic_timer.period = min_period; 1459 } 1460 } 1461 } 1462 1463 static void cancel_hv_timer(struct kvm_lapic *apic); 1464 1465 static void apic_update_lvtt(struct kvm_lapic *apic) 1466 { 1467 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & 1468 apic->lapic_timer.timer_mode_mask; 1469 1470 if (apic->lapic_timer.timer_mode != timer_mode) { 1471 if (apic_lvtt_tscdeadline(apic) != (timer_mode == 1472 APIC_LVT_TIMER_TSCDEADLINE)) { 1473 hrtimer_cancel(&apic->lapic_timer.timer); 1474 preempt_disable(); 1475 if (apic->lapic_timer.hv_timer_in_use) 1476 cancel_hv_timer(apic); 1477 preempt_enable(); 1478 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 1479 apic->lapic_timer.period = 0; 1480 apic->lapic_timer.tscdeadline = 0; 1481 } 1482 apic->lapic_timer.timer_mode = timer_mode; 1483 limit_periodic_timer_frequency(apic); 1484 } 1485 } 1486 1487 /* 1488 * On APICv, this test will cause a busy wait 1489 * during a higher-priority task. 1490 */ 1491 1492 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1493 { 1494 struct kvm_lapic *apic = vcpu->arch.apic; 1495 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); 1496 1497 if (kvm_apic_hw_enabled(apic)) { 1498 int vec = reg & APIC_VECTOR_MASK; 1499 void *bitmap = apic->regs + APIC_ISR; 1500 1501 if (vcpu->arch.apicv_active) 1502 bitmap = apic->regs + APIC_IRR; 1503 1504 if (apic_test_vector(vec, bitmap)) 1505 return true; 1506 } 1507 return false; 1508 } 1509 1510 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) 1511 { 1512 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; 1513 1514 /* 1515 * If the guest TSC is running at a different ratio than the host, then 1516 * convert the delay to nanoseconds to achieve an accurate delay. Note 1517 * that __delay() uses delay_tsc whenever the hardware has TSC, thus 1518 * always for VMX enabled hardware. 1519 */ 1520 if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { 1521 __delay(min(guest_cycles, 1522 nsec_to_cycles(vcpu, timer_advance_ns))); 1523 } else { 1524 u64 delay_ns = guest_cycles * 1000000ULL; 1525 do_div(delay_ns, vcpu->arch.virtual_tsc_khz); 1526 ndelay(min_t(u32, delay_ns, timer_advance_ns)); 1527 } 1528 } 1529 1530 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, 1531 s64 advance_expire_delta) 1532 { 1533 struct kvm_lapic *apic = vcpu->arch.apic; 1534 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; 1535 u64 ns; 1536 1537 /* Do not adjust for tiny fluctuations or large random spikes. */ 1538 if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || 1539 abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) 1540 return; 1541 1542 /* too early */ 1543 if (advance_expire_delta < 0) { 1544 ns = -advance_expire_delta * 1000000ULL; 1545 do_div(ns, vcpu->arch.virtual_tsc_khz); 1546 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1547 } else { 1548 /* too late */ 1549 ns = advance_expire_delta * 1000000ULL; 1550 do_div(ns, vcpu->arch.virtual_tsc_khz); 1551 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1552 } 1553 1554 if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) 1555 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 1556 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1557 } 1558 1559 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1560 { 1561 struct kvm_lapic *apic = vcpu->arch.apic; 1562 u64 guest_tsc, tsc_deadline; 1563 1564 if (apic->lapic_timer.expired_tscdeadline == 0) 1565 return; 1566 1567 tsc_deadline = apic->lapic_timer.expired_tscdeadline; 1568 apic->lapic_timer.expired_tscdeadline = 0; 1569 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1570 apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; 1571 1572 if (guest_tsc < tsc_deadline) 1573 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); 1574 1575 if (lapic_timer_advance_dynamic) 1576 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); 1577 } 1578 1579 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1580 { 1581 if (lapic_timer_int_injected(vcpu)) 1582 __kvm_wait_lapic_expire(vcpu); 1583 } 1584 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1585 1586 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) 1587 { 1588 struct kvm_timer *ktimer = &apic->lapic_timer; 1589 1590 kvm_apic_local_deliver(apic, APIC_LVTT); 1591 if (apic_lvtt_tscdeadline(apic)) { 1592 ktimer->tscdeadline = 0; 1593 } else if (apic_lvtt_oneshot(apic)) { 1594 ktimer->tscdeadline = 0; 1595 ktimer->target_expiration = 0; 1596 } 1597 } 1598 1599 static void apic_timer_expired(struct kvm_lapic *apic) 1600 { 1601 struct kvm_vcpu *vcpu = apic->vcpu; 1602 struct kvm_timer *ktimer = &apic->lapic_timer; 1603 1604 if (atomic_read(&apic->lapic_timer.pending)) 1605 return; 1606 1607 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1608 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1609 1610 if (kvm_use_posted_timer_interrupt(apic->vcpu)) { 1611 if (apic->lapic_timer.timer_advance_ns) 1612 __kvm_wait_lapic_expire(vcpu); 1613 kvm_apic_inject_pending_timer_irqs(apic); 1614 return; 1615 } 1616 1617 atomic_inc(&apic->lapic_timer.pending); 1618 kvm_set_pending_timer(vcpu); 1619 } 1620 1621 static void start_sw_tscdeadline(struct kvm_lapic *apic) 1622 { 1623 struct kvm_timer *ktimer = &apic->lapic_timer; 1624 u64 guest_tsc, tscdeadline = ktimer->tscdeadline; 1625 u64 ns = 0; 1626 ktime_t expire; 1627 struct kvm_vcpu *vcpu = apic->vcpu; 1628 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1629 unsigned long flags; 1630 ktime_t now; 1631 1632 if (unlikely(!tscdeadline || !this_tsc_khz)) 1633 return; 1634 1635 local_irq_save(flags); 1636 1637 now = ktime_get(); 1638 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1639 1640 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1641 do_div(ns, this_tsc_khz); 1642 1643 if (likely(tscdeadline > guest_tsc) && 1644 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1645 expire = ktime_add_ns(now, ns); 1646 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1647 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); 1648 } else 1649 apic_timer_expired(apic); 1650 1651 local_irq_restore(flags); 1652 } 1653 1654 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) 1655 { 1656 ktime_t now, remaining; 1657 u64 ns_remaining_old, ns_remaining_new; 1658 1659 apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) 1660 * APIC_BUS_CYCLE_NS * apic->divide_count; 1661 limit_periodic_timer_frequency(apic); 1662 1663 now = ktime_get(); 1664 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1665 if (ktime_to_ns(remaining) < 0) 1666 remaining = 0; 1667 1668 ns_remaining_old = ktime_to_ns(remaining); 1669 ns_remaining_new = mul_u64_u32_div(ns_remaining_old, 1670 apic->divide_count, old_divisor); 1671 1672 apic->lapic_timer.tscdeadline += 1673 nsec_to_cycles(apic->vcpu, ns_remaining_new) - 1674 nsec_to_cycles(apic->vcpu, ns_remaining_old); 1675 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); 1676 } 1677 1678 static bool set_target_expiration(struct kvm_lapic *apic) 1679 { 1680 ktime_t now; 1681 u64 tscl = rdtsc(); 1682 1683 now = ktime_get(); 1684 apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) 1685 * APIC_BUS_CYCLE_NS * apic->divide_count; 1686 1687 if (!apic->lapic_timer.period) { 1688 apic->lapic_timer.tscdeadline = 0; 1689 return false; 1690 } 1691 1692 limit_periodic_timer_frequency(apic); 1693 1694 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1695 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); 1696 apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); 1697 1698 return true; 1699 } 1700 1701 static void advance_periodic_target_expiration(struct kvm_lapic *apic) 1702 { 1703 ktime_t now = ktime_get(); 1704 u64 tscl = rdtsc(); 1705 ktime_t delta; 1706 1707 /* 1708 * Synchronize both deadlines to the same time source or 1709 * differences in the periods (caused by differences in the 1710 * underlying clocks or numerical approximation errors) will 1711 * cause the two to drift apart over time as the errors 1712 * accumulate. 1713 */ 1714 apic->lapic_timer.target_expiration = 1715 ktime_add_ns(apic->lapic_timer.target_expiration, 1716 apic->lapic_timer.period); 1717 delta = ktime_sub(apic->lapic_timer.target_expiration, now); 1718 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1719 nsec_to_cycles(apic->vcpu, delta); 1720 } 1721 1722 static void start_sw_period(struct kvm_lapic *apic) 1723 { 1724 if (!apic->lapic_timer.period) 1725 return; 1726 1727 if (ktime_after(ktime_get(), 1728 apic->lapic_timer.target_expiration)) { 1729 apic_timer_expired(apic); 1730 1731 if (apic_lvtt_oneshot(apic)) 1732 return; 1733 1734 advance_periodic_target_expiration(apic); 1735 } 1736 1737 hrtimer_start(&apic->lapic_timer.timer, 1738 apic->lapic_timer.target_expiration, 1739 HRTIMER_MODE_ABS_HARD); 1740 } 1741 1742 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) 1743 { 1744 if (!lapic_in_kernel(vcpu)) 1745 return false; 1746 1747 return vcpu->arch.apic->lapic_timer.hv_timer_in_use; 1748 } 1749 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); 1750 1751 static void cancel_hv_timer(struct kvm_lapic *apic) 1752 { 1753 WARN_ON(preemptible()); 1754 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1755 kvm_x86_ops.cancel_hv_timer(apic->vcpu); 1756 apic->lapic_timer.hv_timer_in_use = false; 1757 } 1758 1759 static bool start_hv_timer(struct kvm_lapic *apic) 1760 { 1761 struct kvm_timer *ktimer = &apic->lapic_timer; 1762 struct kvm_vcpu *vcpu = apic->vcpu; 1763 bool expired; 1764 1765 WARN_ON(preemptible()); 1766 if (!kvm_x86_ops.set_hv_timer) 1767 return false; 1768 1769 if (!ktimer->tscdeadline) 1770 return false; 1771 1772 if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) 1773 return false; 1774 1775 ktimer->hv_timer_in_use = true; 1776 hrtimer_cancel(&ktimer->timer); 1777 1778 /* 1779 * To simplify handling the periodic timer, leave the hv timer running 1780 * even if the deadline timer has expired, i.e. rely on the resulting 1781 * VM-Exit to recompute the periodic timer's target expiration. 1782 */ 1783 if (!apic_lvtt_period(apic)) { 1784 /* 1785 * Cancel the hv timer if the sw timer fired while the hv timer 1786 * was being programmed, or if the hv timer itself expired. 1787 */ 1788 if (atomic_read(&ktimer->pending)) { 1789 cancel_hv_timer(apic); 1790 } else if (expired) { 1791 apic_timer_expired(apic); 1792 cancel_hv_timer(apic); 1793 } 1794 } 1795 1796 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); 1797 1798 return true; 1799 } 1800 1801 static void start_sw_timer(struct kvm_lapic *apic) 1802 { 1803 struct kvm_timer *ktimer = &apic->lapic_timer; 1804 1805 WARN_ON(preemptible()); 1806 if (apic->lapic_timer.hv_timer_in_use) 1807 cancel_hv_timer(apic); 1808 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) 1809 return; 1810 1811 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1812 start_sw_period(apic); 1813 else if (apic_lvtt_tscdeadline(apic)) 1814 start_sw_tscdeadline(apic); 1815 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); 1816 } 1817 1818 static void restart_apic_timer(struct kvm_lapic *apic) 1819 { 1820 preempt_disable(); 1821 1822 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) 1823 goto out; 1824 1825 if (!start_hv_timer(apic)) 1826 start_sw_timer(apic); 1827 out: 1828 preempt_enable(); 1829 } 1830 1831 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 1832 { 1833 struct kvm_lapic *apic = vcpu->arch.apic; 1834 1835 preempt_disable(); 1836 /* If the preempt notifier has already run, it also called apic_timer_expired */ 1837 if (!apic->lapic_timer.hv_timer_in_use) 1838 goto out; 1839 WARN_ON(swait_active(&vcpu->wq)); 1840 cancel_hv_timer(apic); 1841 apic_timer_expired(apic); 1842 1843 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1844 advance_periodic_target_expiration(apic); 1845 restart_apic_timer(apic); 1846 } 1847 out: 1848 preempt_enable(); 1849 } 1850 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 1851 1852 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) 1853 { 1854 restart_apic_timer(vcpu->arch.apic); 1855 } 1856 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); 1857 1858 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) 1859 { 1860 struct kvm_lapic *apic = vcpu->arch.apic; 1861 1862 preempt_disable(); 1863 /* Possibly the TSC deadline timer is not enabled yet */ 1864 if (apic->lapic_timer.hv_timer_in_use) 1865 start_sw_timer(apic); 1866 preempt_enable(); 1867 } 1868 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); 1869 1870 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) 1871 { 1872 struct kvm_lapic *apic = vcpu->arch.apic; 1873 1874 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1875 restart_apic_timer(apic); 1876 } 1877 1878 static void start_apic_timer(struct kvm_lapic *apic) 1879 { 1880 atomic_set(&apic->lapic_timer.pending, 0); 1881 1882 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1883 && !set_target_expiration(apic)) 1884 return; 1885 1886 restart_apic_timer(apic); 1887 } 1888 1889 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1890 { 1891 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); 1892 1893 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 1894 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 1895 if (lvt0_in_nmi_mode) { 1896 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1897 } else 1898 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1899 } 1900 } 1901 1902 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1903 { 1904 int ret = 0; 1905 1906 trace_kvm_apic_write(reg, val); 1907 1908 switch (reg) { 1909 case APIC_ID: /* Local APIC ID */ 1910 if (!apic_x2apic_mode(apic)) 1911 kvm_apic_set_xapic_id(apic, val >> 24); 1912 else 1913 ret = 1; 1914 break; 1915 1916 case APIC_TASKPRI: 1917 report_tpr_access(apic, true); 1918 apic_set_tpr(apic, val & 0xff); 1919 break; 1920 1921 case APIC_EOI: 1922 apic_set_eoi(apic); 1923 break; 1924 1925 case APIC_LDR: 1926 if (!apic_x2apic_mode(apic)) 1927 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1928 else 1929 ret = 1; 1930 break; 1931 1932 case APIC_DFR: 1933 if (!apic_x2apic_mode(apic)) { 1934 kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1935 apic->vcpu->kvm->arch.apic_map_dirty = true; 1936 } else 1937 ret = 1; 1938 break; 1939 1940 case APIC_SPIV: { 1941 u32 mask = 0x3ff; 1942 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1943 mask |= APIC_SPIV_DIRECTED_EOI; 1944 apic_set_spiv(apic, val & mask); 1945 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1946 int i; 1947 u32 lvt_val; 1948 1949 for (i = 0; i < KVM_APIC_LVT_NUM; i++) { 1950 lvt_val = kvm_lapic_get_reg(apic, 1951 APIC_LVTT + 0x10 * i); 1952 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, 1953 lvt_val | APIC_LVT_MASKED); 1954 } 1955 apic_update_lvtt(apic); 1956 atomic_set(&apic->lapic_timer.pending, 0); 1957 1958 } 1959 break; 1960 } 1961 case APIC_ICR: 1962 /* No delay here, so we always clear the pending bit */ 1963 val &= ~(1 << 12); 1964 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); 1965 kvm_lapic_set_reg(apic, APIC_ICR, val); 1966 break; 1967 1968 case APIC_ICR2: 1969 if (!apic_x2apic_mode(apic)) 1970 val &= 0xff000000; 1971 kvm_lapic_set_reg(apic, APIC_ICR2, val); 1972 break; 1973 1974 case APIC_LVT0: 1975 apic_manage_nmi_watchdog(apic, val); 1976 /* fall through */ 1977 case APIC_LVTTHMR: 1978 case APIC_LVTPC: 1979 case APIC_LVT1: 1980 case APIC_LVTERR: { 1981 /* TODO: Check vector */ 1982 size_t size; 1983 u32 index; 1984 1985 if (!kvm_apic_sw_enabled(apic)) 1986 val |= APIC_LVT_MASKED; 1987 size = ARRAY_SIZE(apic_lvt_mask); 1988 index = array_index_nospec( 1989 (reg - APIC_LVTT) >> 4, size); 1990 val &= apic_lvt_mask[index]; 1991 kvm_lapic_set_reg(apic, reg, val); 1992 break; 1993 } 1994 1995 case APIC_LVTT: 1996 if (!kvm_apic_sw_enabled(apic)) 1997 val |= APIC_LVT_MASKED; 1998 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1999 kvm_lapic_set_reg(apic, APIC_LVTT, val); 2000 apic_update_lvtt(apic); 2001 break; 2002 2003 case APIC_TMICT: 2004 if (apic_lvtt_tscdeadline(apic)) 2005 break; 2006 2007 hrtimer_cancel(&apic->lapic_timer.timer); 2008 kvm_lapic_set_reg(apic, APIC_TMICT, val); 2009 start_apic_timer(apic); 2010 break; 2011 2012 case APIC_TDCR: { 2013 uint32_t old_divisor = apic->divide_count; 2014 2015 kvm_lapic_set_reg(apic, APIC_TDCR, val); 2016 update_divide_count(apic); 2017 if (apic->divide_count != old_divisor && 2018 apic->lapic_timer.period) { 2019 hrtimer_cancel(&apic->lapic_timer.timer); 2020 update_target_expiration(apic, old_divisor); 2021 restart_apic_timer(apic); 2022 } 2023 break; 2024 } 2025 case APIC_ESR: 2026 if (apic_x2apic_mode(apic) && val != 0) 2027 ret = 1; 2028 break; 2029 2030 case APIC_SELF_IPI: 2031 if (apic_x2apic_mode(apic)) { 2032 kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 2033 } else 2034 ret = 1; 2035 break; 2036 default: 2037 ret = 1; 2038 break; 2039 } 2040 2041 kvm_recalculate_apic_map(apic->vcpu->kvm); 2042 2043 return ret; 2044 } 2045 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); 2046 2047 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 2048 gpa_t address, int len, const void *data) 2049 { 2050 struct kvm_lapic *apic = to_lapic(this); 2051 unsigned int offset = address - apic->base_address; 2052 u32 val; 2053 2054 if (!apic_mmio_in_range(apic, address)) 2055 return -EOPNOTSUPP; 2056 2057 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 2058 if (!kvm_check_has_quirk(vcpu->kvm, 2059 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 2060 return -EOPNOTSUPP; 2061 2062 return 0; 2063 } 2064 2065 /* 2066 * APIC register must be aligned on 128-bits boundary. 2067 * 32/64/128 bits registers must be accessed thru 32 bits. 2068 * Refer SDM 8.4.1 2069 */ 2070 if (len != 4 || (offset & 0xf)) 2071 return 0; 2072 2073 val = *(u32*)data; 2074 2075 kvm_lapic_reg_write(apic, offset & 0xff0, val); 2076 2077 return 0; 2078 } 2079 2080 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 2081 { 2082 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 2083 } 2084 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 2085 2086 /* emulate APIC access in a trap manner */ 2087 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2088 { 2089 u32 val = 0; 2090 2091 /* hw has done the conditional check and inst decode */ 2092 offset &= 0xff0; 2093 2094 kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); 2095 2096 /* TODO: optimize to just emulate side effect w/o one more write */ 2097 kvm_lapic_reg_write(vcpu->arch.apic, offset, val); 2098 } 2099 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 2100 2101 void kvm_free_lapic(struct kvm_vcpu *vcpu) 2102 { 2103 struct kvm_lapic *apic = vcpu->arch.apic; 2104 2105 if (!vcpu->arch.apic) 2106 return; 2107 2108 hrtimer_cancel(&apic->lapic_timer.timer); 2109 2110 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 2111 static_key_slow_dec_deferred(&apic_hw_disabled); 2112 2113 if (!apic->sw_enabled) 2114 static_key_slow_dec_deferred(&apic_sw_disabled); 2115 2116 if (apic->regs) 2117 free_page((unsigned long)apic->regs); 2118 2119 kfree(apic); 2120 } 2121 2122 /* 2123 *---------------------------------------------------------------------- 2124 * LAPIC interface 2125 *---------------------------------------------------------------------- 2126 */ 2127 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 2128 { 2129 struct kvm_lapic *apic = vcpu->arch.apic; 2130 2131 if (!lapic_in_kernel(vcpu) || 2132 !apic_lvtt_tscdeadline(apic)) 2133 return 0; 2134 2135 return apic->lapic_timer.tscdeadline; 2136 } 2137 2138 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 2139 { 2140 struct kvm_lapic *apic = vcpu->arch.apic; 2141 2142 if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || 2143 apic_lvtt_period(apic)) 2144 return; 2145 2146 hrtimer_cancel(&apic->lapic_timer.timer); 2147 apic->lapic_timer.tscdeadline = data; 2148 start_apic_timer(apic); 2149 } 2150 2151 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 2152 { 2153 struct kvm_lapic *apic = vcpu->arch.apic; 2154 2155 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 2156 | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); 2157 } 2158 2159 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 2160 { 2161 u64 tpr; 2162 2163 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 2164 2165 return (tpr & 0xf0) >> 4; 2166 } 2167 2168 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 2169 { 2170 u64 old_value = vcpu->arch.apic_base; 2171 struct kvm_lapic *apic = vcpu->arch.apic; 2172 2173 if (!apic) 2174 value |= MSR_IA32_APICBASE_BSP; 2175 2176 vcpu->arch.apic_base = value; 2177 2178 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) 2179 kvm_update_cpuid(vcpu); 2180 2181 if (!apic) 2182 return; 2183 2184 /* update jump label if enable bit changes */ 2185 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 2186 if (value & MSR_IA32_APICBASE_ENABLE) { 2187 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2188 static_key_slow_dec_deferred(&apic_hw_disabled); 2189 } else { 2190 static_key_slow_inc(&apic_hw_disabled.key); 2191 vcpu->kvm->arch.apic_map_dirty = true; 2192 } 2193 } 2194 2195 if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) 2196 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); 2197 2198 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) 2199 kvm_x86_ops.set_virtual_apic_mode(vcpu); 2200 2201 apic->base_address = apic->vcpu->arch.apic_base & 2202 MSR_IA32_APICBASE_BASE; 2203 2204 if ((value & MSR_IA32_APICBASE_ENABLE) && 2205 apic->base_address != APIC_DEFAULT_PHYS_BASE) 2206 pr_warn_once("APIC base relocation is unsupported by KVM"); 2207 } 2208 2209 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) 2210 { 2211 struct kvm_lapic *apic = vcpu->arch.apic; 2212 2213 if (vcpu->arch.apicv_active) { 2214 /* irr_pending is always true when apicv is activated. */ 2215 apic->irr_pending = true; 2216 apic->isr_count = 1; 2217 } else { 2218 apic->irr_pending = (apic_search_irr(apic) != -1); 2219 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 2220 } 2221 } 2222 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); 2223 2224 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2225 { 2226 struct kvm_lapic *apic = vcpu->arch.apic; 2227 int i; 2228 2229 if (!apic) 2230 return; 2231 2232 vcpu->kvm->arch.apic_map_dirty = false; 2233 /* Stop the timer in case it's a reset to an active apic */ 2234 hrtimer_cancel(&apic->lapic_timer.timer); 2235 2236 if (!init_event) { 2237 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | 2238 MSR_IA32_APICBASE_ENABLE); 2239 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2240 } 2241 kvm_apic_set_version(apic->vcpu); 2242 2243 for (i = 0; i < KVM_APIC_LVT_NUM; i++) 2244 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 2245 apic_update_lvtt(apic); 2246 if (kvm_vcpu_is_reset_bsp(vcpu) && 2247 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 2248 kvm_lapic_set_reg(apic, APIC_LVT0, 2249 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 2250 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2251 2252 kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU); 2253 apic_set_spiv(apic, 0xff); 2254 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); 2255 if (!apic_x2apic_mode(apic)) 2256 kvm_apic_set_ldr(apic, 0); 2257 kvm_lapic_set_reg(apic, APIC_ESR, 0); 2258 kvm_lapic_set_reg(apic, APIC_ICR, 0); 2259 kvm_lapic_set_reg(apic, APIC_ICR2, 0); 2260 kvm_lapic_set_reg(apic, APIC_TDCR, 0); 2261 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 2262 for (i = 0; i < 8; i++) { 2263 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 2264 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 2265 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 2266 } 2267 kvm_apic_update_apicv(vcpu); 2268 apic->highest_isr_cache = -1; 2269 update_divide_count(apic); 2270 atomic_set(&apic->lapic_timer.pending, 0); 2271 if (kvm_vcpu_is_bsp(vcpu)) 2272 kvm_lapic_set_base(vcpu, 2273 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 2274 vcpu->arch.pv_eoi.msr_val = 0; 2275 apic_update_ppr(apic); 2276 if (vcpu->arch.apicv_active) { 2277 kvm_x86_ops.apicv_post_state_restore(vcpu); 2278 kvm_x86_ops.hwapic_irr_update(vcpu, -1); 2279 kvm_x86_ops.hwapic_isr_update(vcpu, -1); 2280 } 2281 2282 vcpu->arch.apic_arb_prio = 0; 2283 vcpu->arch.apic_attention = 0; 2284 2285 kvm_recalculate_apic_map(vcpu->kvm); 2286 } 2287 2288 /* 2289 *---------------------------------------------------------------------- 2290 * timer interface 2291 *---------------------------------------------------------------------- 2292 */ 2293 2294 static bool lapic_is_periodic(struct kvm_lapic *apic) 2295 { 2296 return apic_lvtt_period(apic); 2297 } 2298 2299 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 2300 { 2301 struct kvm_lapic *apic = vcpu->arch.apic; 2302 2303 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) 2304 return atomic_read(&apic->lapic_timer.pending); 2305 2306 return 0; 2307 } 2308 2309 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 2310 { 2311 u32 reg = kvm_lapic_get_reg(apic, lvt_type); 2312 int vector, mode, trig_mode; 2313 2314 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 2315 vector = reg & APIC_VECTOR_MASK; 2316 mode = reg & APIC_MODE_MASK; 2317 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 2318 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 2319 NULL); 2320 } 2321 return 0; 2322 } 2323 2324 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 2325 { 2326 struct kvm_lapic *apic = vcpu->arch.apic; 2327 2328 if (apic) 2329 kvm_apic_local_deliver(apic, APIC_LVT0); 2330 } 2331 2332 static const struct kvm_io_device_ops apic_mmio_ops = { 2333 .read = apic_mmio_read, 2334 .write = apic_mmio_write, 2335 }; 2336 2337 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 2338 { 2339 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 2340 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 2341 2342 apic_timer_expired(apic); 2343 2344 if (lapic_is_periodic(apic)) { 2345 advance_periodic_target_expiration(apic); 2346 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 2347 return HRTIMER_RESTART; 2348 } else 2349 return HRTIMER_NORESTART; 2350 } 2351 2352 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) 2353 { 2354 struct kvm_lapic *apic; 2355 2356 ASSERT(vcpu != NULL); 2357 2358 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2359 if (!apic) 2360 goto nomem; 2361 2362 vcpu->arch.apic = apic; 2363 2364 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 2365 if (!apic->regs) { 2366 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 2367 vcpu->vcpu_id); 2368 goto nomem_free_apic; 2369 } 2370 apic->vcpu = vcpu; 2371 2372 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2373 HRTIMER_MODE_ABS_HARD); 2374 apic->lapic_timer.timer.function = apic_timer_fn; 2375 if (timer_advance_ns == -1) { 2376 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 2377 lapic_timer_advance_dynamic = true; 2378 } else { 2379 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 2380 lapic_timer_advance_dynamic = false; 2381 } 2382 2383 /* 2384 * APIC is created enabled. This will prevent kvm_lapic_set_base from 2385 * thinking that APIC state has changed. 2386 */ 2387 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2388 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2389 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2390 2391 return 0; 2392 nomem_free_apic: 2393 kfree(apic); 2394 vcpu->arch.apic = NULL; 2395 nomem: 2396 return -ENOMEM; 2397 } 2398 2399 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 2400 { 2401 struct kvm_lapic *apic = vcpu->arch.apic; 2402 u32 ppr; 2403 2404 if (!kvm_apic_hw_enabled(apic)) 2405 return -1; 2406 2407 __apic_update_ppr(apic, &ppr); 2408 return apic_has_interrupt_for_ppr(apic, ppr); 2409 } 2410 2411 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 2412 { 2413 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); 2414 2415 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 2416 return 1; 2417 if ((lvt0 & APIC_LVT_MASKED) == 0 && 2418 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 2419 return 1; 2420 return 0; 2421 } 2422 2423 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 2424 { 2425 struct kvm_lapic *apic = vcpu->arch.apic; 2426 2427 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2428 kvm_apic_inject_pending_timer_irqs(apic); 2429 atomic_set(&apic->lapic_timer.pending, 0); 2430 } 2431 } 2432 2433 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 2434 { 2435 int vector = kvm_apic_has_interrupt(vcpu); 2436 struct kvm_lapic *apic = vcpu->arch.apic; 2437 u32 ppr; 2438 2439 if (vector == -1) 2440 return -1; 2441 2442 /* 2443 * We get here even with APIC virtualization enabled, if doing 2444 * nested virtualization and L1 runs with the "acknowledge interrupt 2445 * on exit" mode. Then we cannot inject the interrupt via RVI, 2446 * because the process would deliver it through the IDT. 2447 */ 2448 2449 apic_clear_irr(vector, apic); 2450 if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { 2451 /* 2452 * For auto-EOI interrupts, there might be another pending 2453 * interrupt above PPR, so check whether to raise another 2454 * KVM_REQ_EVENT. 2455 */ 2456 apic_update_ppr(apic); 2457 } else { 2458 /* 2459 * For normal interrupts, PPR has been raised and there cannot 2460 * be a higher-priority pending interrupt---except if there was 2461 * a concurrent interrupt injection, but that would have 2462 * triggered KVM_REQ_EVENT already. 2463 */ 2464 apic_set_isr(vector, apic); 2465 __apic_update_ppr(apic, &ppr); 2466 } 2467 2468 return vector; 2469 } 2470 2471 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, 2472 struct kvm_lapic_state *s, bool set) 2473 { 2474 if (apic_x2apic_mode(vcpu->arch.apic)) { 2475 u32 *id = (u32 *)(s->regs + APIC_ID); 2476 u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2477 2478 if (vcpu->kvm->arch.x2apic_format) { 2479 if (*id != vcpu->vcpu_id) 2480 return -EINVAL; 2481 } else { 2482 if (set) 2483 *id >>= 24; 2484 else 2485 *id <<= 24; 2486 } 2487 2488 /* In x2APIC mode, the LDR is fixed and based on the id */ 2489 if (set) 2490 *ldr = kvm_apic_calc_x2apic_ldr(*id); 2491 } 2492 2493 return 0; 2494 } 2495 2496 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2497 { 2498 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); 2499 return kvm_apic_state_fixup(vcpu, s, false); 2500 } 2501 2502 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2503 { 2504 struct kvm_lapic *apic = vcpu->arch.apic; 2505 int r; 2506 2507 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 2508 /* set SPIV separately to get count of SW disabled APICs right */ 2509 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 2510 2511 r = kvm_apic_state_fixup(vcpu, s, true); 2512 if (r) { 2513 kvm_recalculate_apic_map(vcpu->kvm); 2514 return r; 2515 } 2516 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); 2517 2518 kvm_recalculate_apic_map(vcpu->kvm); 2519 kvm_apic_set_version(vcpu); 2520 2521 apic_update_ppr(apic); 2522 hrtimer_cancel(&apic->lapic_timer.timer); 2523 apic_update_lvtt(apic); 2524 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2525 update_divide_count(apic); 2526 start_apic_timer(apic); 2527 kvm_apic_update_apicv(vcpu); 2528 apic->highest_isr_cache = -1; 2529 if (vcpu->arch.apicv_active) { 2530 kvm_x86_ops.apicv_post_state_restore(vcpu); 2531 kvm_x86_ops.hwapic_irr_update(vcpu, 2532 apic_find_highest_irr(apic)); 2533 kvm_x86_ops.hwapic_isr_update(vcpu, 2534 apic_find_highest_isr(apic)); 2535 } 2536 kvm_make_request(KVM_REQ_EVENT, vcpu); 2537 if (ioapic_in_kernel(vcpu->kvm)) 2538 kvm_rtc_eoi_tracking_restore_one(vcpu); 2539 2540 vcpu->arch.apic_arb_prio = 0; 2541 2542 return 0; 2543 } 2544 2545 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 2546 { 2547 struct hrtimer *timer; 2548 2549 if (!lapic_in_kernel(vcpu) || 2550 kvm_can_post_timer_interrupt(vcpu)) 2551 return; 2552 2553 timer = &vcpu->arch.apic->lapic_timer.timer; 2554 if (hrtimer_cancel(timer)) 2555 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD); 2556 } 2557 2558 /* 2559 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 2560 * 2561 * Detect whether guest triggered PV EOI since the 2562 * last entry. If yes, set EOI on guests's behalf. 2563 * Clear PV EOI in guest memory in any case. 2564 */ 2565 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 2566 struct kvm_lapic *apic) 2567 { 2568 bool pending; 2569 int vector; 2570 /* 2571 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 2572 * and KVM_PV_EOI_ENABLED in guest memory as follows: 2573 * 2574 * KVM_APIC_PV_EOI_PENDING is unset: 2575 * -> host disabled PV EOI. 2576 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 2577 * -> host enabled PV EOI, guest did not execute EOI yet. 2578 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 2579 * -> host enabled PV EOI, guest executed EOI. 2580 */ 2581 BUG_ON(!pv_eoi_enabled(vcpu)); 2582 pending = pv_eoi_get_pending(vcpu); 2583 /* 2584 * Clear pending bit in any case: it will be set again on vmentry. 2585 * While this might not be ideal from performance point of view, 2586 * this makes sure pv eoi is only enabled when we know it's safe. 2587 */ 2588 pv_eoi_clr_pending(vcpu); 2589 if (pending) 2590 return; 2591 vector = apic_set_eoi(apic); 2592 trace_kvm_pv_eoi(apic, vector); 2593 } 2594 2595 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 2596 { 2597 u32 data; 2598 2599 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 2600 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 2601 2602 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2603 return; 2604 2605 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2606 sizeof(u32))) 2607 return; 2608 2609 apic_set_tpr(vcpu->arch.apic, data & 0xff); 2610 } 2611 2612 /* 2613 * apic_sync_pv_eoi_to_guest - called before vmentry 2614 * 2615 * Detect whether it's safe to enable PV EOI and 2616 * if yes do so. 2617 */ 2618 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 2619 struct kvm_lapic *apic) 2620 { 2621 if (!pv_eoi_enabled(vcpu) || 2622 /* IRR set or many bits in ISR: could be nested. */ 2623 apic->irr_pending || 2624 /* Cache not set: could be safe but we don't bother. */ 2625 apic->highest_isr_cache == -1 || 2626 /* Need EOI to update ioapic. */ 2627 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { 2628 /* 2629 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 2630 * so we need not do anything here. 2631 */ 2632 return; 2633 } 2634 2635 pv_eoi_set_pending(apic->vcpu); 2636 } 2637 2638 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 2639 { 2640 u32 data, tpr; 2641 int max_irr, max_isr; 2642 struct kvm_lapic *apic = vcpu->arch.apic; 2643 2644 apic_sync_pv_eoi_to_guest(vcpu, apic); 2645 2646 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2647 return; 2648 2649 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff; 2650 max_irr = apic_find_highest_irr(apic); 2651 if (max_irr < 0) 2652 max_irr = 0; 2653 max_isr = apic_find_highest_isr(apic); 2654 if (max_isr < 0) 2655 max_isr = 0; 2656 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 2657 2658 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2659 sizeof(u32)); 2660 } 2661 2662 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 2663 { 2664 if (vapic_addr) { 2665 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2666 &vcpu->arch.apic->vapic_cache, 2667 vapic_addr, sizeof(u32))) 2668 return -EINVAL; 2669 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2670 } else { 2671 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2672 } 2673 2674 vcpu->arch.apic->vapic_addr = vapic_addr; 2675 return 0; 2676 } 2677 2678 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2679 { 2680 struct kvm_lapic *apic = vcpu->arch.apic; 2681 u32 reg = (msr - APIC_BASE_MSR) << 4; 2682 2683 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2684 return 1; 2685 2686 if (reg == APIC_ICR2) 2687 return 1; 2688 2689 /* if this is ICR write vector before command */ 2690 if (reg == APIC_ICR) 2691 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2692 return kvm_lapic_reg_write(apic, reg, (u32)data); 2693 } 2694 2695 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 2696 { 2697 struct kvm_lapic *apic = vcpu->arch.apic; 2698 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 2699 2700 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2701 return 1; 2702 2703 if (reg == APIC_DFR || reg == APIC_ICR2) 2704 return 1; 2705 2706 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2707 return 1; 2708 if (reg == APIC_ICR) 2709 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2710 2711 *data = (((u64)high) << 32) | low; 2712 2713 return 0; 2714 } 2715 2716 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 2717 { 2718 struct kvm_lapic *apic = vcpu->arch.apic; 2719 2720 if (!lapic_in_kernel(vcpu)) 2721 return 1; 2722 2723 /* if this is ICR write vector before command */ 2724 if (reg == APIC_ICR) 2725 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2726 return kvm_lapic_reg_write(apic, reg, (u32)data); 2727 } 2728 2729 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 2730 { 2731 struct kvm_lapic *apic = vcpu->arch.apic; 2732 u32 low, high = 0; 2733 2734 if (!lapic_in_kernel(vcpu)) 2735 return 1; 2736 2737 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2738 return 1; 2739 if (reg == APIC_ICR) 2740 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2741 2742 *data = (((u64)high) << 32) | low; 2743 2744 return 0; 2745 } 2746 2747 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) 2748 { 2749 u64 addr = data & ~KVM_MSR_ENABLED; 2750 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; 2751 unsigned long new_len; 2752 2753 if (!IS_ALIGNED(addr, 4)) 2754 return 1; 2755 2756 vcpu->arch.pv_eoi.msr_val = data; 2757 if (!pv_eoi_enabled(vcpu)) 2758 return 0; 2759 2760 if (addr == ghc->gpa && len <= ghc->len) 2761 new_len = ghc->len; 2762 else 2763 new_len = len; 2764 2765 return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); 2766 } 2767 2768 void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 2769 { 2770 struct kvm_lapic *apic = vcpu->arch.apic; 2771 u8 sipi_vector; 2772 unsigned long pe; 2773 2774 if (!lapic_in_kernel(vcpu) || !apic->pending_events) 2775 return; 2776 2777 /* 2778 * INITs are latched while CPU is in specific states 2779 * (SMM, VMX non-root mode, SVM with GIF=0). 2780 * Because a CPU cannot be in these states immediately 2781 * after it has processed an INIT signal (and thus in 2782 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs 2783 * and leave the INIT pending. 2784 */ 2785 if (kvm_vcpu_latch_init(vcpu)) { 2786 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); 2787 if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) 2788 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 2789 return; 2790 } 2791 2792 pe = xchg(&apic->pending_events, 0); 2793 if (test_bit(KVM_APIC_INIT, &pe)) { 2794 kvm_vcpu_reset(vcpu, true); 2795 if (kvm_vcpu_is_bsp(apic->vcpu)) 2796 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2797 else 2798 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 2799 } 2800 if (test_bit(KVM_APIC_SIPI, &pe) && 2801 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 2802 /* evaluate pending_events before reading the vector */ 2803 smp_rmb(); 2804 sipi_vector = apic->sipi_vector; 2805 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 2806 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2807 } 2808 } 2809 2810 void kvm_lapic_init(void) 2811 { 2812 /* do not patch jump label more than once per second */ 2813 jump_label_rate_limit(&apic_hw_disabled, HZ); 2814 jump_label_rate_limit(&apic_sw_disabled, HZ); 2815 } 2816 2817 void kvm_lapic_exit(void) 2818 { 2819 static_key_deferred_flush(&apic_hw_disabled); 2820 static_key_deferred_flush(&apic_sw_disabled); 2821 } 2822