1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Local APIC virtualization 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2007 Novell 8 * Copyright (C) 2007 Intel 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 10 * 11 * Authors: 12 * Dor Laor <dor.laor@qumranet.com> 13 * Gregory Haskins <ghaskins@novell.com> 14 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 15 * 16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 17 */ 18 19 #include <linux/kvm_host.h> 20 #include <linux/kvm.h> 21 #include <linux/mm.h> 22 #include <linux/highmem.h> 23 #include <linux/smp.h> 24 #include <linux/hrtimer.h> 25 #include <linux/io.h> 26 #include <linux/export.h> 27 #include <linux/math64.h> 28 #include <linux/slab.h> 29 #include <asm/processor.h> 30 #include <asm/msr.h> 31 #include <asm/page.h> 32 #include <asm/current.h> 33 #include <asm/apicdef.h> 34 #include <asm/delay.h> 35 #include <linux/atomic.h> 36 #include <linux/jump_label.h> 37 #include "kvm_cache_regs.h" 38 #include "irq.h" 39 #include "trace.h" 40 #include "x86.h" 41 #include "cpuid.h" 42 #include "hyperv.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 56 #define apic_debug(fmt, arg...) do {} while (0) 57 58 /* 14 is the version for Xeon and Pentium 8.4.8*/ 59 #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) 60 #define LAPIC_MMIO_LENGTH (1 << 12) 61 /* followed define is not in apicdef.h */ 62 #define APIC_SHORT_MASK 0xc0000 63 #define APIC_DEST_NOSHORT 0x0 64 #define APIC_DEST_MASK 0x800 65 #define MAX_APIC_VECTOR 256 66 #define APIC_VECTORS_PER_REG 32 67 68 #define APIC_BROADCAST 0xFF 69 #define X2APIC_BROADCAST 0xFFFFFFFFul 70 71 #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 72 #define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 73 /* step-by-step approximation to mitigate fluctuation */ 74 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 75 76 static inline int apic_test_vector(int vec, void *bitmap) 77 { 78 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 79 } 80 81 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 82 { 83 struct kvm_lapic *apic = vcpu->arch.apic; 84 85 return apic_test_vector(vector, apic->regs + APIC_ISR) || 86 apic_test_vector(vector, apic->regs + APIC_IRR); 87 } 88 89 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 90 { 91 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 92 } 93 94 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 95 { 96 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 97 } 98 99 struct static_key_deferred apic_hw_disabled __read_mostly; 100 struct static_key_deferred apic_sw_disabled __read_mostly; 101 102 static inline int apic_enabled(struct kvm_lapic *apic) 103 { 104 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 105 } 106 107 #define LVT_MASK \ 108 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 109 110 #define LINT_MASK \ 111 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 112 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 113 114 static inline u8 kvm_xapic_id(struct kvm_lapic *apic) 115 { 116 return kvm_lapic_get_reg(apic, APIC_ID) >> 24; 117 } 118 119 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 120 { 121 return apic->vcpu->vcpu_id; 122 } 123 124 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, 125 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { 126 switch (map->mode) { 127 case KVM_APIC_MODE_X2APIC: { 128 u32 offset = (dest_id >> 16) * 16; 129 u32 max_apic_id = map->max_apic_id; 130 131 if (offset <= max_apic_id) { 132 u8 cluster_size = min(max_apic_id - offset + 1, 16U); 133 134 offset = array_index_nospec(offset, map->max_apic_id + 1); 135 *cluster = &map->phys_map[offset]; 136 *mask = dest_id & (0xffff >> (16 - cluster_size)); 137 } else { 138 *mask = 0; 139 } 140 141 return true; 142 } 143 case KVM_APIC_MODE_XAPIC_FLAT: 144 *cluster = map->xapic_flat_map; 145 *mask = dest_id & 0xff; 146 return true; 147 case KVM_APIC_MODE_XAPIC_CLUSTER: 148 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; 149 *mask = dest_id & 0xf; 150 return true; 151 default: 152 /* Not optimized. */ 153 return false; 154 } 155 } 156 157 static void kvm_apic_map_free(struct rcu_head *rcu) 158 { 159 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); 160 161 kvfree(map); 162 } 163 164 static void recalculate_apic_map(struct kvm *kvm) 165 { 166 struct kvm_apic_map *new, *old = NULL; 167 struct kvm_vcpu *vcpu; 168 int i; 169 u32 max_id = 255; /* enough space for any xAPIC ID */ 170 171 mutex_lock(&kvm->arch.apic_map_lock); 172 173 kvm_for_each_vcpu(i, vcpu, kvm) 174 if (kvm_apic_present(vcpu)) 175 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); 176 177 new = kvzalloc(sizeof(struct kvm_apic_map) + 178 sizeof(struct kvm_lapic *) * ((u64)max_id + 1), 179 GFP_KERNEL_ACCOUNT); 180 181 if (!new) 182 goto out; 183 184 new->max_apic_id = max_id; 185 186 kvm_for_each_vcpu(i, vcpu, kvm) { 187 struct kvm_lapic *apic = vcpu->arch.apic; 188 struct kvm_lapic **cluster; 189 u16 mask; 190 u32 ldr; 191 u8 xapic_id; 192 u32 x2apic_id; 193 194 if (!kvm_apic_present(vcpu)) 195 continue; 196 197 xapic_id = kvm_xapic_id(apic); 198 x2apic_id = kvm_x2apic_id(apic); 199 200 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ 201 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && 202 x2apic_id <= new->max_apic_id) 203 new->phys_map[x2apic_id] = apic; 204 /* 205 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, 206 * prevent them from masking VCPUs with APIC ID <= 0xff. 207 */ 208 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) 209 new->phys_map[xapic_id] = apic; 210 211 ldr = kvm_lapic_get_reg(apic, APIC_LDR); 212 213 if (apic_x2apic_mode(apic)) { 214 new->mode |= KVM_APIC_MODE_X2APIC; 215 } else if (ldr) { 216 ldr = GET_APIC_LOGICAL_ID(ldr); 217 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 218 new->mode |= KVM_APIC_MODE_XAPIC_FLAT; 219 else 220 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; 221 } 222 223 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) 224 continue; 225 226 if (mask) 227 cluster[ffs(mask) - 1] = apic; 228 } 229 out: 230 old = rcu_dereference_protected(kvm->arch.apic_map, 231 lockdep_is_held(&kvm->arch.apic_map_lock)); 232 rcu_assign_pointer(kvm->arch.apic_map, new); 233 mutex_unlock(&kvm->arch.apic_map_lock); 234 235 if (old) 236 call_rcu(&old->rcu, kvm_apic_map_free); 237 238 kvm_make_scan_ioapic_request(kvm); 239 } 240 241 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 242 { 243 bool enabled = val & APIC_SPIV_APIC_ENABLED; 244 245 kvm_lapic_set_reg(apic, APIC_SPIV, val); 246 247 if (enabled != apic->sw_enabled) { 248 apic->sw_enabled = enabled; 249 if (enabled) 250 static_key_slow_dec_deferred(&apic_sw_disabled); 251 else 252 static_key_slow_inc(&apic_sw_disabled.key); 253 } 254 } 255 256 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) 257 { 258 kvm_lapic_set_reg(apic, APIC_ID, id << 24); 259 recalculate_apic_map(apic->vcpu->kvm); 260 } 261 262 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 263 { 264 kvm_lapic_set_reg(apic, APIC_LDR, id); 265 recalculate_apic_map(apic->vcpu->kvm); 266 } 267 268 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 269 { 270 return ((id >> 4) << 16) | (1 << (id & 0xf)); 271 } 272 273 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 274 { 275 u32 ldr = kvm_apic_calc_x2apic_ldr(id); 276 277 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 278 279 kvm_lapic_set_reg(apic, APIC_ID, id); 280 kvm_lapic_set_reg(apic, APIC_LDR, ldr); 281 recalculate_apic_map(apic->vcpu->kvm); 282 } 283 284 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 285 { 286 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 287 } 288 289 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 290 { 291 return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 292 } 293 294 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 295 { 296 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; 297 } 298 299 static inline int apic_lvtt_period(struct kvm_lapic *apic) 300 { 301 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; 302 } 303 304 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 305 { 306 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; 307 } 308 309 static inline int apic_lvt_nmi_mode(u32 lvt_val) 310 { 311 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 312 } 313 314 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 315 { 316 struct kvm_lapic *apic = vcpu->arch.apic; 317 struct kvm_cpuid_entry2 *feat; 318 u32 v = APIC_VERSION; 319 320 if (!lapic_in_kernel(vcpu)) 321 return; 322 323 /* 324 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) 325 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with 326 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC 327 * version first and level-triggered interrupts never get EOIed in 328 * IOAPIC. 329 */ 330 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 331 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && 332 !ioapic_in_kernel(vcpu->kvm)) 333 v |= APIC_LVR_DIRECTED_EOI; 334 kvm_lapic_set_reg(apic, APIC_LVR, v); 335 } 336 337 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = { 338 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 339 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 340 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 341 LINT_MASK, LINT_MASK, /* LVT0-1 */ 342 LVT_MASK /* LVTERR */ 343 }; 344 345 static int find_highest_vector(void *bitmap) 346 { 347 int vec; 348 u32 *reg; 349 350 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 351 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 352 reg = bitmap + REG_POS(vec); 353 if (*reg) 354 return __fls(*reg) + vec; 355 } 356 357 return -1; 358 } 359 360 static u8 count_vectors(void *bitmap) 361 { 362 int vec; 363 u32 *reg; 364 u8 count = 0; 365 366 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 367 reg = bitmap + REG_POS(vec); 368 count += hweight32(*reg); 369 } 370 371 return count; 372 } 373 374 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) 375 { 376 u32 i, vec; 377 u32 pir_val, irr_val, prev_irr_val; 378 int max_updated_irr; 379 380 max_updated_irr = -1; 381 *max_irr = -1; 382 383 for (i = vec = 0; i <= 7; i++, vec += 32) { 384 pir_val = READ_ONCE(pir[i]); 385 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); 386 if (pir_val) { 387 prev_irr_val = irr_val; 388 irr_val |= xchg(&pir[i], 0); 389 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; 390 if (prev_irr_val != irr_val) { 391 max_updated_irr = 392 __fls(irr_val ^ prev_irr_val) + vec; 393 } 394 } 395 if (irr_val) 396 *max_irr = __fls(irr_val) + vec; 397 } 398 399 return ((max_updated_irr != -1) && 400 (max_updated_irr == *max_irr)); 401 } 402 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 403 404 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) 405 { 406 struct kvm_lapic *apic = vcpu->arch.apic; 407 408 return __kvm_apic_update_irr(pir, apic->regs, max_irr); 409 } 410 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 411 412 static inline int apic_search_irr(struct kvm_lapic *apic) 413 { 414 return find_highest_vector(apic->regs + APIC_IRR); 415 } 416 417 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 418 { 419 int result; 420 421 /* 422 * Note that irr_pending is just a hint. It will be always 423 * true with virtual interrupt delivery enabled. 424 */ 425 if (!apic->irr_pending) 426 return -1; 427 428 result = apic_search_irr(apic); 429 ASSERT(result == -1 || result >= 16); 430 431 return result; 432 } 433 434 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 435 { 436 struct kvm_vcpu *vcpu; 437 438 vcpu = apic->vcpu; 439 440 if (unlikely(vcpu->arch.apicv_active)) { 441 /* need to update RVI */ 442 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 443 kvm_x86_ops->hwapic_irr_update(vcpu, 444 apic_find_highest_irr(apic)); 445 } else { 446 apic->irr_pending = false; 447 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 448 if (apic_search_irr(apic) != -1) 449 apic->irr_pending = true; 450 } 451 } 452 453 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 454 { 455 struct kvm_vcpu *vcpu; 456 457 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 458 return; 459 460 vcpu = apic->vcpu; 461 462 /* 463 * With APIC virtualization enabled, all caching is disabled 464 * because the processor can modify ISR under the hood. Instead 465 * just set SVI. 466 */ 467 if (unlikely(vcpu->arch.apicv_active)) 468 kvm_x86_ops->hwapic_isr_update(vcpu, vec); 469 else { 470 ++apic->isr_count; 471 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 472 /* 473 * ISR (in service register) bit is set when injecting an interrupt. 474 * The highest vector is injected. Thus the latest bit set matches 475 * the highest bit in ISR. 476 */ 477 apic->highest_isr_cache = vec; 478 } 479 } 480 481 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 482 { 483 int result; 484 485 /* 486 * Note that isr_count is always 1, and highest_isr_cache 487 * is always -1, with APIC virtualization enabled. 488 */ 489 if (!apic->isr_count) 490 return -1; 491 if (likely(apic->highest_isr_cache != -1)) 492 return apic->highest_isr_cache; 493 494 result = find_highest_vector(apic->regs + APIC_ISR); 495 ASSERT(result == -1 || result >= 16); 496 497 return result; 498 } 499 500 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 501 { 502 struct kvm_vcpu *vcpu; 503 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 504 return; 505 506 vcpu = apic->vcpu; 507 508 /* 509 * We do get here for APIC virtualization enabled if the guest 510 * uses the Hyper-V APIC enlightenment. In this case we may need 511 * to trigger a new interrupt delivery by writing the SVI field; 512 * on the other hand isr_count and highest_isr_cache are unused 513 * and must be left alone. 514 */ 515 if (unlikely(vcpu->arch.apicv_active)) 516 kvm_x86_ops->hwapic_isr_update(vcpu, 517 apic_find_highest_isr(apic)); 518 else { 519 --apic->isr_count; 520 BUG_ON(apic->isr_count < 0); 521 apic->highest_isr_cache = -1; 522 } 523 } 524 525 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 526 { 527 /* This may race with setting of irr in __apic_accept_irq() and 528 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 529 * will cause vmexit immediately and the value will be recalculated 530 * on the next vmentry. 531 */ 532 return apic_find_highest_irr(vcpu->arch.apic); 533 } 534 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 535 536 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 537 int vector, int level, int trig_mode, 538 struct dest_map *dest_map); 539 540 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 541 struct dest_map *dest_map) 542 { 543 struct kvm_lapic *apic = vcpu->arch.apic; 544 545 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 546 irq->level, irq->trig_mode, dest_map); 547 } 548 549 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 550 unsigned long ipi_bitmap_high, u32 min, 551 unsigned long icr, int op_64_bit) 552 { 553 int i; 554 struct kvm_apic_map *map; 555 struct kvm_vcpu *vcpu; 556 struct kvm_lapic_irq irq = {0}; 557 int cluster_size = op_64_bit ? 64 : 32; 558 int count = 0; 559 560 irq.vector = icr & APIC_VECTOR_MASK; 561 irq.delivery_mode = icr & APIC_MODE_MASK; 562 irq.level = (icr & APIC_INT_ASSERT) != 0; 563 irq.trig_mode = icr & APIC_INT_LEVELTRIG; 564 565 if (icr & APIC_DEST_MASK) 566 return -KVM_EINVAL; 567 if (icr & APIC_SHORT_MASK) 568 return -KVM_EINVAL; 569 570 rcu_read_lock(); 571 map = rcu_dereference(kvm->arch.apic_map); 572 573 if (unlikely(!map)) { 574 count = -EOPNOTSUPP; 575 goto out; 576 } 577 578 if (min > map->max_apic_id) 579 goto out; 580 /* Bits above cluster_size are masked in the caller. */ 581 for_each_set_bit(i, &ipi_bitmap_low, 582 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 583 if (map->phys_map[min + i]) { 584 vcpu = map->phys_map[min + i]->vcpu; 585 count += kvm_apic_set_irq(vcpu, &irq, NULL); 586 } 587 } 588 589 min += cluster_size; 590 591 if (min > map->max_apic_id) 592 goto out; 593 594 for_each_set_bit(i, &ipi_bitmap_high, 595 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 596 if (map->phys_map[min + i]) { 597 vcpu = map->phys_map[min + i]->vcpu; 598 count += kvm_apic_set_irq(vcpu, &irq, NULL); 599 } 600 } 601 602 out: 603 rcu_read_unlock(); 604 return count; 605 } 606 607 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 608 { 609 610 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 611 sizeof(val)); 612 } 613 614 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 615 { 616 617 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 618 sizeof(*val)); 619 } 620 621 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 622 { 623 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 624 } 625 626 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 627 { 628 u8 val; 629 if (pv_eoi_get_user(vcpu, &val) < 0) 630 apic_debug("Can't read EOI MSR value: 0x%llx\n", 631 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 632 return val & 0x1; 633 } 634 635 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 636 { 637 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 638 apic_debug("Can't set EOI MSR value: 0x%llx\n", 639 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 640 return; 641 } 642 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 643 } 644 645 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 646 { 647 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 648 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 649 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 650 return; 651 } 652 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 653 } 654 655 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 656 { 657 int highest_irr; 658 if (apic->vcpu->arch.apicv_active) 659 highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu); 660 else 661 highest_irr = apic_find_highest_irr(apic); 662 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 663 return -1; 664 return highest_irr; 665 } 666 667 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) 668 { 669 u32 tpr, isrv, ppr, old_ppr; 670 int isr; 671 672 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI); 673 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI); 674 isr = apic_find_highest_isr(apic); 675 isrv = (isr != -1) ? isr : 0; 676 677 if ((tpr & 0xf0) >= (isrv & 0xf0)) 678 ppr = tpr & 0xff; 679 else 680 ppr = isrv & 0xf0; 681 682 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 683 apic, ppr, isr, isrv); 684 685 *new_ppr = ppr; 686 if (old_ppr != ppr) 687 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); 688 689 return ppr < old_ppr; 690 } 691 692 static void apic_update_ppr(struct kvm_lapic *apic) 693 { 694 u32 ppr; 695 696 if (__apic_update_ppr(apic, &ppr) && 697 apic_has_interrupt_for_ppr(apic, ppr) != -1) 698 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 699 } 700 701 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) 702 { 703 apic_update_ppr(vcpu->arch.apic); 704 } 705 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); 706 707 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 708 { 709 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); 710 apic_update_ppr(apic); 711 } 712 713 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) 714 { 715 return mda == (apic_x2apic_mode(apic) ? 716 X2APIC_BROADCAST : APIC_BROADCAST); 717 } 718 719 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) 720 { 721 if (kvm_apic_broadcast(apic, mda)) 722 return true; 723 724 if (apic_x2apic_mode(apic)) 725 return mda == kvm_x2apic_id(apic); 726 727 /* 728 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if 729 * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and 730 * this allows unique addressing of VCPUs with APIC ID over 0xff. 731 * The 0xff condition is needed because writeable xAPIC ID. 732 */ 733 if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) 734 return true; 735 736 return mda == kvm_xapic_id(apic); 737 } 738 739 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) 740 { 741 u32 logical_id; 742 743 if (kvm_apic_broadcast(apic, mda)) 744 return true; 745 746 logical_id = kvm_lapic_get_reg(apic, APIC_LDR); 747 748 if (apic_x2apic_mode(apic)) 749 return ((logical_id >> 16) == (mda >> 16)) 750 && (logical_id & mda & 0xffff) != 0; 751 752 logical_id = GET_APIC_LOGICAL_ID(logical_id); 753 754 switch (kvm_lapic_get_reg(apic, APIC_DFR)) { 755 case APIC_DFR_FLAT: 756 return (logical_id & mda) != 0; 757 case APIC_DFR_CLUSTER: 758 return ((logical_id >> 4) == (mda >> 4)) 759 && (logical_id & mda & 0xf) != 0; 760 default: 761 apic_debug("Bad DFR vcpu %d: %08x\n", 762 apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR)); 763 return false; 764 } 765 } 766 767 /* The KVM local APIC implementation has two quirks: 768 * 769 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs 770 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. 771 * KVM doesn't do that aliasing. 772 * 773 * - in-kernel IOAPIC messages have to be delivered directly to 774 * x2APIC, because the kernel does not support interrupt remapping. 775 * In order to support broadcast without interrupt remapping, x2APIC 776 * rewrites the destination of non-IPI messages from APIC_BROADCAST 777 * to X2APIC_BROADCAST. 778 * 779 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is 780 * important when userspace wants to use x2APIC-format MSIs, because 781 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". 782 */ 783 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, 784 struct kvm_lapic *source, struct kvm_lapic *target) 785 { 786 bool ipi = source != NULL; 787 788 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && 789 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) 790 return X2APIC_BROADCAST; 791 792 return dest_id; 793 } 794 795 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 796 int short_hand, unsigned int dest, int dest_mode) 797 { 798 struct kvm_lapic *target = vcpu->arch.apic; 799 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 800 801 apic_debug("target %p, source %p, dest 0x%x, " 802 "dest_mode 0x%x, short_hand 0x%x\n", 803 target, source, dest, dest_mode, short_hand); 804 805 ASSERT(target); 806 switch (short_hand) { 807 case APIC_DEST_NOSHORT: 808 if (dest_mode == APIC_DEST_PHYSICAL) 809 return kvm_apic_match_physical_addr(target, mda); 810 else 811 return kvm_apic_match_logical_addr(target, mda); 812 case APIC_DEST_SELF: 813 return target == source; 814 case APIC_DEST_ALLINC: 815 return true; 816 case APIC_DEST_ALLBUT: 817 return target != source; 818 default: 819 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 820 short_hand); 821 return false; 822 } 823 } 824 EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 825 826 int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 827 const unsigned long *bitmap, u32 bitmap_size) 828 { 829 u32 mod; 830 int i, idx = -1; 831 832 mod = vector % dest_vcpus; 833 834 for (i = 0; i <= mod; i++) { 835 idx = find_next_bit(bitmap, bitmap_size, idx + 1); 836 BUG_ON(idx == bitmap_size); 837 } 838 839 return idx; 840 } 841 842 static void kvm_apic_disabled_lapic_found(struct kvm *kvm) 843 { 844 if (!kvm->arch.disabled_lapic_found) { 845 kvm->arch.disabled_lapic_found = true; 846 printk(KERN_INFO 847 "Disabled LAPIC found during irq injection\n"); 848 } 849 } 850 851 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, 852 struct kvm_lapic_irq *irq, struct kvm_apic_map *map) 853 { 854 if (kvm->arch.x2apic_broadcast_quirk_disabled) { 855 if ((irq->dest_id == APIC_BROADCAST && 856 map->mode != KVM_APIC_MODE_X2APIC)) 857 return true; 858 if (irq->dest_id == X2APIC_BROADCAST) 859 return true; 860 } else { 861 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); 862 if (irq->dest_id == (x2apic_ipi ? 863 X2APIC_BROADCAST : APIC_BROADCAST)) 864 return true; 865 } 866 867 return false; 868 } 869 870 /* Return true if the interrupt can be handled by using *bitmap as index mask 871 * for valid destinations in *dst array. 872 * Return false if kvm_apic_map_get_dest_lapic did nothing useful. 873 * Note: we may have zero kvm_lapic destinations when we return true, which 874 * means that the interrupt should be dropped. In this case, *bitmap would be 875 * zero and *dst undefined. 876 */ 877 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, 878 struct kvm_lapic **src, struct kvm_lapic_irq *irq, 879 struct kvm_apic_map *map, struct kvm_lapic ***dst, 880 unsigned long *bitmap) 881 { 882 int i, lowest; 883 884 if (irq->shorthand == APIC_DEST_SELF && src) { 885 *dst = src; 886 *bitmap = 1; 887 return true; 888 } else if (irq->shorthand) 889 return false; 890 891 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) 892 return false; 893 894 if (irq->dest_mode == APIC_DEST_PHYSICAL) { 895 if (irq->dest_id > map->max_apic_id) { 896 *bitmap = 0; 897 } else { 898 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); 899 *dst = &map->phys_map[dest_id]; 900 *bitmap = 1; 901 } 902 return true; 903 } 904 905 *bitmap = 0; 906 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, 907 (u16 *)bitmap)) 908 return false; 909 910 if (!kvm_lowest_prio_delivery(irq)) 911 return true; 912 913 if (!kvm_vector_hashing_enabled()) { 914 lowest = -1; 915 for_each_set_bit(i, bitmap, 16) { 916 if (!(*dst)[i]) 917 continue; 918 if (lowest < 0) 919 lowest = i; 920 else if (kvm_apic_compare_prio((*dst)[i]->vcpu, 921 (*dst)[lowest]->vcpu) < 0) 922 lowest = i; 923 } 924 } else { 925 if (!*bitmap) 926 return true; 927 928 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), 929 bitmap, 16); 930 931 if (!(*dst)[lowest]) { 932 kvm_apic_disabled_lapic_found(kvm); 933 *bitmap = 0; 934 return true; 935 } 936 } 937 938 *bitmap = (lowest >= 0) ? 1 << lowest : 0; 939 940 return true; 941 } 942 943 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 944 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 945 { 946 struct kvm_apic_map *map; 947 unsigned long bitmap; 948 struct kvm_lapic **dst = NULL; 949 int i; 950 bool ret; 951 952 *r = -1; 953 954 if (irq->shorthand == APIC_DEST_SELF) { 955 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 956 return true; 957 } 958 959 rcu_read_lock(); 960 map = rcu_dereference(kvm->arch.apic_map); 961 962 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); 963 if (ret) { 964 *r = 0; 965 for_each_set_bit(i, &bitmap, 16) { 966 if (!dst[i]) 967 continue; 968 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 969 } 970 } 971 972 rcu_read_unlock(); 973 return ret; 974 } 975 976 /* 977 * This routine tries to handler interrupts in posted mode, here is how 978 * it deals with different cases: 979 * - For single-destination interrupts, handle it in posted mode 980 * - Else if vector hashing is enabled and it is a lowest-priority 981 * interrupt, handle it in posted mode and use the following mechanism 982 * to find the destinaiton vCPU. 983 * 1. For lowest-priority interrupts, store all the possible 984 * destination vCPUs in an array. 985 * 2. Use "guest vector % max number of destination vCPUs" to find 986 * the right destination vCPU in the array for the lowest-priority 987 * interrupt. 988 * - Otherwise, use remapped mode to inject the interrupt. 989 */ 990 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 991 struct kvm_vcpu **dest_vcpu) 992 { 993 struct kvm_apic_map *map; 994 unsigned long bitmap; 995 struct kvm_lapic **dst = NULL; 996 bool ret = false; 997 998 if (irq->shorthand) 999 return false; 1000 1001 rcu_read_lock(); 1002 map = rcu_dereference(kvm->arch.apic_map); 1003 1004 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && 1005 hweight16(bitmap) == 1) { 1006 unsigned long i = find_first_bit(&bitmap, 16); 1007 1008 if (dst[i]) { 1009 *dest_vcpu = dst[i]->vcpu; 1010 ret = true; 1011 } 1012 } 1013 1014 rcu_read_unlock(); 1015 return ret; 1016 } 1017 1018 /* 1019 * Add a pending IRQ into lapic. 1020 * Return 1 if successfully added and 0 if discarded. 1021 */ 1022 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1023 int vector, int level, int trig_mode, 1024 struct dest_map *dest_map) 1025 { 1026 int result = 0; 1027 struct kvm_vcpu *vcpu = apic->vcpu; 1028 1029 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 1030 trig_mode, vector); 1031 switch (delivery_mode) { 1032 case APIC_DM_LOWEST: 1033 vcpu->arch.apic_arb_prio++; 1034 /* fall through */ 1035 case APIC_DM_FIXED: 1036 if (unlikely(trig_mode && !level)) 1037 break; 1038 1039 /* FIXME add logic for vcpu on reset */ 1040 if (unlikely(!apic_enabled(apic))) 1041 break; 1042 1043 result = 1; 1044 1045 if (dest_map) { 1046 __set_bit(vcpu->vcpu_id, dest_map->map); 1047 dest_map->vectors[vcpu->vcpu_id] = vector; 1048 } 1049 1050 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1051 if (trig_mode) 1052 kvm_lapic_set_vector(vector, 1053 apic->regs + APIC_TMR); 1054 else 1055 kvm_lapic_clear_vector(vector, 1056 apic->regs + APIC_TMR); 1057 } 1058 1059 if (vcpu->arch.apicv_active) 1060 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); 1061 else { 1062 kvm_lapic_set_irr(vector, apic); 1063 1064 kvm_make_request(KVM_REQ_EVENT, vcpu); 1065 kvm_vcpu_kick(vcpu); 1066 } 1067 break; 1068 1069 case APIC_DM_REMRD: 1070 result = 1; 1071 vcpu->arch.pv.pv_unhalted = 1; 1072 kvm_make_request(KVM_REQ_EVENT, vcpu); 1073 kvm_vcpu_kick(vcpu); 1074 break; 1075 1076 case APIC_DM_SMI: 1077 result = 1; 1078 kvm_make_request(KVM_REQ_SMI, vcpu); 1079 kvm_vcpu_kick(vcpu); 1080 break; 1081 1082 case APIC_DM_NMI: 1083 result = 1; 1084 kvm_inject_nmi(vcpu); 1085 kvm_vcpu_kick(vcpu); 1086 break; 1087 1088 case APIC_DM_INIT: 1089 if (!trig_mode || level) { 1090 result = 1; 1091 /* assumes that there are only KVM_APIC_INIT/SIPI */ 1092 apic->pending_events = (1UL << KVM_APIC_INIT); 1093 /* make sure pending_events is visible before sending 1094 * the request */ 1095 smp_wmb(); 1096 kvm_make_request(KVM_REQ_EVENT, vcpu); 1097 kvm_vcpu_kick(vcpu); 1098 } else { 1099 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 1100 vcpu->vcpu_id); 1101 } 1102 break; 1103 1104 case APIC_DM_STARTUP: 1105 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 1106 vcpu->vcpu_id, vector); 1107 result = 1; 1108 apic->sipi_vector = vector; 1109 /* make sure sipi_vector is visible for the receiver */ 1110 smp_wmb(); 1111 set_bit(KVM_APIC_SIPI, &apic->pending_events); 1112 kvm_make_request(KVM_REQ_EVENT, vcpu); 1113 kvm_vcpu_kick(vcpu); 1114 break; 1115 1116 case APIC_DM_EXTINT: 1117 /* 1118 * Should only be called by kvm_apic_local_deliver() with LVT0, 1119 * before NMI watchdog was enabled. Already handled by 1120 * kvm_apic_accept_pic_intr(). 1121 */ 1122 break; 1123 1124 default: 1125 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 1126 delivery_mode); 1127 break; 1128 } 1129 return result; 1130 } 1131 1132 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1133 { 1134 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1135 } 1136 1137 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1138 { 1139 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 1140 } 1141 1142 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 1143 { 1144 int trigger_mode; 1145 1146 /* Eoi the ioapic only if the ioapic doesn't own the vector. */ 1147 if (!kvm_ioapic_handles_vector(apic, vector)) 1148 return; 1149 1150 /* Request a KVM exit to inform the userspace IOAPIC. */ 1151 if (irqchip_split(apic->vcpu->kvm)) { 1152 apic->vcpu->arch.pending_ioapic_eoi = vector; 1153 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); 1154 return; 1155 } 1156 1157 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 1158 trigger_mode = IOAPIC_LEVEL_TRIG; 1159 else 1160 trigger_mode = IOAPIC_EDGE_TRIG; 1161 1162 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 1163 } 1164 1165 static int apic_set_eoi(struct kvm_lapic *apic) 1166 { 1167 int vector = apic_find_highest_isr(apic); 1168 1169 trace_kvm_eoi(apic, vector); 1170 1171 /* 1172 * Not every write EOI will has corresponding ISR, 1173 * one example is when Kernel check timer on setup_IO_APIC 1174 */ 1175 if (vector == -1) 1176 return vector; 1177 1178 apic_clear_isr(vector, apic); 1179 apic_update_ppr(apic); 1180 1181 if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) 1182 kvm_hv_synic_send_eoi(apic->vcpu, vector); 1183 1184 kvm_ioapic_send_eoi(apic, vector); 1185 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1186 return vector; 1187 } 1188 1189 /* 1190 * this interface assumes a trap-like exit, which has already finished 1191 * desired side effect including vISR and vPPR update. 1192 */ 1193 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 1194 { 1195 struct kvm_lapic *apic = vcpu->arch.apic; 1196 1197 trace_kvm_eoi(apic, vector); 1198 1199 kvm_ioapic_send_eoi(apic, vector); 1200 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1201 } 1202 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1203 1204 static void apic_send_ipi(struct kvm_lapic *apic) 1205 { 1206 u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR); 1207 u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2); 1208 struct kvm_lapic_irq irq; 1209 1210 irq.vector = icr_low & APIC_VECTOR_MASK; 1211 irq.delivery_mode = icr_low & APIC_MODE_MASK; 1212 irq.dest_mode = icr_low & APIC_DEST_MASK; 1213 irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1214 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1215 irq.shorthand = icr_low & APIC_SHORT_MASK; 1216 irq.msi_redir_hint = false; 1217 if (apic_x2apic_mode(apic)) 1218 irq.dest_id = icr_high; 1219 else 1220 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 1221 1222 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1223 1224 apic_debug("icr_high 0x%x, icr_low 0x%x, " 1225 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 1226 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, " 1227 "msi_redir_hint 0x%x\n", 1228 icr_high, icr_low, irq.shorthand, irq.dest_id, 1229 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 1230 irq.vector, irq.msi_redir_hint); 1231 1232 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1233 } 1234 1235 static u32 apic_get_tmcct(struct kvm_lapic *apic) 1236 { 1237 ktime_t remaining, now; 1238 s64 ns; 1239 u32 tmcct; 1240 1241 ASSERT(apic != NULL); 1242 1243 /* if initial count is 0, current count should also be 0 */ 1244 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || 1245 apic->lapic_timer.period == 0) 1246 return 0; 1247 1248 now = ktime_get(); 1249 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1250 if (ktime_to_ns(remaining) < 0) 1251 remaining = 0; 1252 1253 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 1254 tmcct = div64_u64(ns, 1255 (APIC_BUS_CYCLE_NS * apic->divide_count)); 1256 1257 return tmcct; 1258 } 1259 1260 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 1261 { 1262 struct kvm_vcpu *vcpu = apic->vcpu; 1263 struct kvm_run *run = vcpu->run; 1264 1265 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 1266 run->tpr_access.rip = kvm_rip_read(vcpu); 1267 run->tpr_access.is_write = write; 1268 } 1269 1270 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 1271 { 1272 if (apic->vcpu->arch.tpr_access_reporting) 1273 __report_tpr_access(apic, write); 1274 } 1275 1276 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 1277 { 1278 u32 val = 0; 1279 1280 if (offset >= LAPIC_MMIO_LENGTH) 1281 return 0; 1282 1283 switch (offset) { 1284 case APIC_ARBPRI: 1285 apic_debug("Access APIC ARBPRI register which is for P6\n"); 1286 break; 1287 1288 case APIC_TMCCT: /* Timer CCR */ 1289 if (apic_lvtt_tscdeadline(apic)) 1290 return 0; 1291 1292 val = apic_get_tmcct(apic); 1293 break; 1294 case APIC_PROCPRI: 1295 apic_update_ppr(apic); 1296 val = kvm_lapic_get_reg(apic, offset); 1297 break; 1298 case APIC_TASKPRI: 1299 report_tpr_access(apic, false); 1300 /* fall thru */ 1301 default: 1302 val = kvm_lapic_get_reg(apic, offset); 1303 break; 1304 } 1305 1306 return val; 1307 } 1308 1309 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 1310 { 1311 return container_of(dev, struct kvm_lapic, dev); 1312 } 1313 1314 #define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) 1315 #define APIC_REGS_MASK(first, count) \ 1316 (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) 1317 1318 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1319 void *data) 1320 { 1321 unsigned char alignment = offset & 0xf; 1322 u32 result; 1323 /* this bitmask has a bit cleared for each reserved register */ 1324 u64 valid_reg_mask = 1325 APIC_REG_MASK(APIC_ID) | 1326 APIC_REG_MASK(APIC_LVR) | 1327 APIC_REG_MASK(APIC_TASKPRI) | 1328 APIC_REG_MASK(APIC_PROCPRI) | 1329 APIC_REG_MASK(APIC_LDR) | 1330 APIC_REG_MASK(APIC_DFR) | 1331 APIC_REG_MASK(APIC_SPIV) | 1332 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | 1333 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | 1334 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | 1335 APIC_REG_MASK(APIC_ESR) | 1336 APIC_REG_MASK(APIC_ICR) | 1337 APIC_REG_MASK(APIC_ICR2) | 1338 APIC_REG_MASK(APIC_LVTT) | 1339 APIC_REG_MASK(APIC_LVTTHMR) | 1340 APIC_REG_MASK(APIC_LVTPC) | 1341 APIC_REG_MASK(APIC_LVT0) | 1342 APIC_REG_MASK(APIC_LVT1) | 1343 APIC_REG_MASK(APIC_LVTERR) | 1344 APIC_REG_MASK(APIC_TMICT) | 1345 APIC_REG_MASK(APIC_TMCCT) | 1346 APIC_REG_MASK(APIC_TDCR); 1347 1348 /* ARBPRI is not valid on x2APIC */ 1349 if (!apic_x2apic_mode(apic)) 1350 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); 1351 1352 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) { 1353 apic_debug("KVM_APIC_READ: read reserved register %x\n", 1354 offset); 1355 return 1; 1356 } 1357 1358 result = __apic_read(apic, offset & ~0xf); 1359 1360 trace_kvm_apic_read(offset, result); 1361 1362 switch (len) { 1363 case 1: 1364 case 2: 1365 case 4: 1366 memcpy(data, (char *)&result + alignment, len); 1367 break; 1368 default: 1369 printk(KERN_ERR "Local APIC read with len = %x, " 1370 "should be 1,2, or 4 instead\n", len); 1371 break; 1372 } 1373 return 0; 1374 } 1375 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); 1376 1377 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1378 { 1379 return addr >= apic->base_address && 1380 addr < apic->base_address + LAPIC_MMIO_LENGTH; 1381 } 1382 1383 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1384 gpa_t address, int len, void *data) 1385 { 1386 struct kvm_lapic *apic = to_lapic(this); 1387 u32 offset = address - apic->base_address; 1388 1389 if (!apic_mmio_in_range(apic, address)) 1390 return -EOPNOTSUPP; 1391 1392 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 1393 if (!kvm_check_has_quirk(vcpu->kvm, 1394 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 1395 return -EOPNOTSUPP; 1396 1397 memset(data, 0xff, len); 1398 return 0; 1399 } 1400 1401 kvm_lapic_reg_read(apic, offset, len, data); 1402 1403 return 0; 1404 } 1405 1406 static void update_divide_count(struct kvm_lapic *apic) 1407 { 1408 u32 tmp1, tmp2, tdcr; 1409 1410 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR); 1411 tmp1 = tdcr & 0xf; 1412 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1413 apic->divide_count = 0x1 << (tmp2 & 0x7); 1414 1415 apic_debug("timer divide count is 0x%x\n", 1416 apic->divide_count); 1417 } 1418 1419 static void limit_periodic_timer_frequency(struct kvm_lapic *apic) 1420 { 1421 /* 1422 * Do not allow the guest to program periodic timers with small 1423 * interval, since the hrtimers are not throttled by the host 1424 * scheduler. 1425 */ 1426 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1427 s64 min_period = min_timer_period_us * 1000LL; 1428 1429 if (apic->lapic_timer.period < min_period) { 1430 pr_info_ratelimited( 1431 "kvm: vcpu %i: requested %lld ns " 1432 "lapic timer period limited to %lld ns\n", 1433 apic->vcpu->vcpu_id, 1434 apic->lapic_timer.period, min_period); 1435 apic->lapic_timer.period = min_period; 1436 } 1437 } 1438 } 1439 1440 static void apic_update_lvtt(struct kvm_lapic *apic) 1441 { 1442 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & 1443 apic->lapic_timer.timer_mode_mask; 1444 1445 if (apic->lapic_timer.timer_mode != timer_mode) { 1446 if (apic_lvtt_tscdeadline(apic) != (timer_mode == 1447 APIC_LVT_TIMER_TSCDEADLINE)) { 1448 hrtimer_cancel(&apic->lapic_timer.timer); 1449 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 1450 apic->lapic_timer.period = 0; 1451 apic->lapic_timer.tscdeadline = 0; 1452 } 1453 apic->lapic_timer.timer_mode = timer_mode; 1454 limit_periodic_timer_frequency(apic); 1455 } 1456 } 1457 1458 static void apic_timer_expired(struct kvm_lapic *apic) 1459 { 1460 struct kvm_vcpu *vcpu = apic->vcpu; 1461 struct swait_queue_head *q = &vcpu->wq; 1462 struct kvm_timer *ktimer = &apic->lapic_timer; 1463 1464 if (atomic_read(&apic->lapic_timer.pending)) 1465 return; 1466 1467 atomic_inc(&apic->lapic_timer.pending); 1468 kvm_set_pending_timer(vcpu); 1469 1470 /* 1471 * For x86, the atomic_inc() is serialized, thus 1472 * using swait_active() is safe. 1473 */ 1474 if (swait_active(q)) 1475 swake_up_one(q); 1476 1477 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1478 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1479 } 1480 1481 /* 1482 * On APICv, this test will cause a busy wait 1483 * during a higher-priority task. 1484 */ 1485 1486 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1487 { 1488 struct kvm_lapic *apic = vcpu->arch.apic; 1489 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); 1490 1491 if (kvm_apic_hw_enabled(apic)) { 1492 int vec = reg & APIC_VECTOR_MASK; 1493 void *bitmap = apic->regs + APIC_ISR; 1494 1495 if (vcpu->arch.apicv_active) 1496 bitmap = apic->regs + APIC_IRR; 1497 1498 if (apic_test_vector(vec, bitmap)) 1499 return true; 1500 } 1501 return false; 1502 } 1503 1504 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) 1505 { 1506 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; 1507 1508 /* 1509 * If the guest TSC is running at a different ratio than the host, then 1510 * convert the delay to nanoseconds to achieve an accurate delay. Note 1511 * that __delay() uses delay_tsc whenever the hardware has TSC, thus 1512 * always for VMX enabled hardware. 1513 */ 1514 if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { 1515 __delay(min(guest_cycles, 1516 nsec_to_cycles(vcpu, timer_advance_ns))); 1517 } else { 1518 u64 delay_ns = guest_cycles * 1000000ULL; 1519 do_div(delay_ns, vcpu->arch.virtual_tsc_khz); 1520 ndelay(min_t(u32, delay_ns, timer_advance_ns)); 1521 } 1522 } 1523 1524 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, 1525 s64 advance_expire_delta) 1526 { 1527 struct kvm_lapic *apic = vcpu->arch.apic; 1528 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; 1529 u64 ns; 1530 1531 /* too early */ 1532 if (advance_expire_delta < 0) { 1533 ns = -advance_expire_delta * 1000000ULL; 1534 do_div(ns, vcpu->arch.virtual_tsc_khz); 1535 timer_advance_ns -= min((u32)ns, 1536 timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); 1537 } else { 1538 /* too late */ 1539 ns = advance_expire_delta * 1000000ULL; 1540 do_div(ns, vcpu->arch.virtual_tsc_khz); 1541 timer_advance_ns += min((u32)ns, 1542 timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); 1543 } 1544 1545 if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) 1546 apic->lapic_timer.timer_advance_adjust_done = true; 1547 if (unlikely(timer_advance_ns > 5000)) { 1548 timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; 1549 apic->lapic_timer.timer_advance_adjust_done = false; 1550 } 1551 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1552 } 1553 1554 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1555 { 1556 struct kvm_lapic *apic = vcpu->arch.apic; 1557 u64 guest_tsc, tsc_deadline; 1558 1559 if (apic->lapic_timer.expired_tscdeadline == 0) 1560 return; 1561 1562 if (!lapic_timer_int_injected(vcpu)) 1563 return; 1564 1565 tsc_deadline = apic->lapic_timer.expired_tscdeadline; 1566 apic->lapic_timer.expired_tscdeadline = 0; 1567 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1568 apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; 1569 1570 if (guest_tsc < tsc_deadline) 1571 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); 1572 1573 if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) 1574 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); 1575 } 1576 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1577 1578 static void start_sw_tscdeadline(struct kvm_lapic *apic) 1579 { 1580 struct kvm_timer *ktimer = &apic->lapic_timer; 1581 u64 guest_tsc, tscdeadline = ktimer->tscdeadline; 1582 u64 ns = 0; 1583 ktime_t expire; 1584 struct kvm_vcpu *vcpu = apic->vcpu; 1585 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1586 unsigned long flags; 1587 ktime_t now; 1588 1589 if (unlikely(!tscdeadline || !this_tsc_khz)) 1590 return; 1591 1592 local_irq_save(flags); 1593 1594 now = ktime_get(); 1595 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1596 1597 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1598 do_div(ns, this_tsc_khz); 1599 1600 if (likely(tscdeadline > guest_tsc) && 1601 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1602 expire = ktime_add_ns(now, ns); 1603 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1604 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED); 1605 } else 1606 apic_timer_expired(apic); 1607 1608 local_irq_restore(flags); 1609 } 1610 1611 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) 1612 { 1613 ktime_t now, remaining; 1614 u64 ns_remaining_old, ns_remaining_new; 1615 1616 apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) 1617 * APIC_BUS_CYCLE_NS * apic->divide_count; 1618 limit_periodic_timer_frequency(apic); 1619 1620 now = ktime_get(); 1621 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1622 if (ktime_to_ns(remaining) < 0) 1623 remaining = 0; 1624 1625 ns_remaining_old = ktime_to_ns(remaining); 1626 ns_remaining_new = mul_u64_u32_div(ns_remaining_old, 1627 apic->divide_count, old_divisor); 1628 1629 apic->lapic_timer.tscdeadline += 1630 nsec_to_cycles(apic->vcpu, ns_remaining_new) - 1631 nsec_to_cycles(apic->vcpu, ns_remaining_old); 1632 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); 1633 } 1634 1635 static bool set_target_expiration(struct kvm_lapic *apic) 1636 { 1637 ktime_t now; 1638 u64 tscl = rdtsc(); 1639 1640 now = ktime_get(); 1641 apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) 1642 * APIC_BUS_CYCLE_NS * apic->divide_count; 1643 1644 if (!apic->lapic_timer.period) { 1645 apic->lapic_timer.tscdeadline = 0; 1646 return false; 1647 } 1648 1649 limit_periodic_timer_frequency(apic); 1650 1651 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1652 PRIx64 ", " 1653 "timer initial count 0x%x, period %lldns, " 1654 "expire @ 0x%016" PRIx64 ".\n", __func__, 1655 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1656 kvm_lapic_get_reg(apic, APIC_TMICT), 1657 apic->lapic_timer.period, 1658 ktime_to_ns(ktime_add_ns(now, 1659 apic->lapic_timer.period))); 1660 1661 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1662 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); 1663 apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); 1664 1665 return true; 1666 } 1667 1668 static void advance_periodic_target_expiration(struct kvm_lapic *apic) 1669 { 1670 ktime_t now = ktime_get(); 1671 u64 tscl = rdtsc(); 1672 ktime_t delta; 1673 1674 /* 1675 * Synchronize both deadlines to the same time source or 1676 * differences in the periods (caused by differences in the 1677 * underlying clocks or numerical approximation errors) will 1678 * cause the two to drift apart over time as the errors 1679 * accumulate. 1680 */ 1681 apic->lapic_timer.target_expiration = 1682 ktime_add_ns(apic->lapic_timer.target_expiration, 1683 apic->lapic_timer.period); 1684 delta = ktime_sub(apic->lapic_timer.target_expiration, now); 1685 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1686 nsec_to_cycles(apic->vcpu, delta); 1687 } 1688 1689 static void start_sw_period(struct kvm_lapic *apic) 1690 { 1691 if (!apic->lapic_timer.period) 1692 return; 1693 1694 if (ktime_after(ktime_get(), 1695 apic->lapic_timer.target_expiration)) { 1696 apic_timer_expired(apic); 1697 1698 if (apic_lvtt_oneshot(apic)) 1699 return; 1700 1701 advance_periodic_target_expiration(apic); 1702 } 1703 1704 hrtimer_start(&apic->lapic_timer.timer, 1705 apic->lapic_timer.target_expiration, 1706 HRTIMER_MODE_ABS_PINNED); 1707 } 1708 1709 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) 1710 { 1711 if (!lapic_in_kernel(vcpu)) 1712 return false; 1713 1714 return vcpu->arch.apic->lapic_timer.hv_timer_in_use; 1715 } 1716 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); 1717 1718 static void cancel_hv_timer(struct kvm_lapic *apic) 1719 { 1720 WARN_ON(preemptible()); 1721 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1722 kvm_x86_ops->cancel_hv_timer(apic->vcpu); 1723 apic->lapic_timer.hv_timer_in_use = false; 1724 } 1725 1726 static bool start_hv_timer(struct kvm_lapic *apic) 1727 { 1728 struct kvm_timer *ktimer = &apic->lapic_timer; 1729 struct kvm_vcpu *vcpu = apic->vcpu; 1730 bool expired; 1731 1732 WARN_ON(preemptible()); 1733 if (!kvm_x86_ops->set_hv_timer) 1734 return false; 1735 1736 if (!ktimer->tscdeadline) 1737 return false; 1738 1739 if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) 1740 return false; 1741 1742 ktimer->hv_timer_in_use = true; 1743 hrtimer_cancel(&ktimer->timer); 1744 1745 /* 1746 * To simplify handling the periodic timer, leave the hv timer running 1747 * even if the deadline timer has expired, i.e. rely on the resulting 1748 * VM-Exit to recompute the periodic timer's target expiration. 1749 */ 1750 if (!apic_lvtt_period(apic)) { 1751 /* 1752 * Cancel the hv timer if the sw timer fired while the hv timer 1753 * was being programmed, or if the hv timer itself expired. 1754 */ 1755 if (atomic_read(&ktimer->pending)) { 1756 cancel_hv_timer(apic); 1757 } else if (expired) { 1758 apic_timer_expired(apic); 1759 cancel_hv_timer(apic); 1760 } 1761 } 1762 1763 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); 1764 1765 return true; 1766 } 1767 1768 static void start_sw_timer(struct kvm_lapic *apic) 1769 { 1770 struct kvm_timer *ktimer = &apic->lapic_timer; 1771 1772 WARN_ON(preemptible()); 1773 if (apic->lapic_timer.hv_timer_in_use) 1774 cancel_hv_timer(apic); 1775 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) 1776 return; 1777 1778 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1779 start_sw_period(apic); 1780 else if (apic_lvtt_tscdeadline(apic)) 1781 start_sw_tscdeadline(apic); 1782 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); 1783 } 1784 1785 static void restart_apic_timer(struct kvm_lapic *apic) 1786 { 1787 preempt_disable(); 1788 1789 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) 1790 goto out; 1791 1792 if (!start_hv_timer(apic)) 1793 start_sw_timer(apic); 1794 out: 1795 preempt_enable(); 1796 } 1797 1798 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 1799 { 1800 struct kvm_lapic *apic = vcpu->arch.apic; 1801 1802 preempt_disable(); 1803 /* If the preempt notifier has already run, it also called apic_timer_expired */ 1804 if (!apic->lapic_timer.hv_timer_in_use) 1805 goto out; 1806 WARN_ON(swait_active(&vcpu->wq)); 1807 cancel_hv_timer(apic); 1808 apic_timer_expired(apic); 1809 1810 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1811 advance_periodic_target_expiration(apic); 1812 restart_apic_timer(apic); 1813 } 1814 out: 1815 preempt_enable(); 1816 } 1817 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 1818 1819 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) 1820 { 1821 restart_apic_timer(vcpu->arch.apic); 1822 } 1823 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); 1824 1825 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) 1826 { 1827 struct kvm_lapic *apic = vcpu->arch.apic; 1828 1829 preempt_disable(); 1830 /* Possibly the TSC deadline timer is not enabled yet */ 1831 if (apic->lapic_timer.hv_timer_in_use) 1832 start_sw_timer(apic); 1833 preempt_enable(); 1834 } 1835 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); 1836 1837 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) 1838 { 1839 struct kvm_lapic *apic = vcpu->arch.apic; 1840 1841 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1842 restart_apic_timer(apic); 1843 } 1844 1845 static void start_apic_timer(struct kvm_lapic *apic) 1846 { 1847 atomic_set(&apic->lapic_timer.pending, 0); 1848 1849 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1850 && !set_target_expiration(apic)) 1851 return; 1852 1853 restart_apic_timer(apic); 1854 } 1855 1856 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1857 { 1858 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); 1859 1860 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 1861 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 1862 if (lvt0_in_nmi_mode) { 1863 apic_debug("Receive NMI setting on APIC_LVT0 " 1864 "for cpu %d\n", apic->vcpu->vcpu_id); 1865 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1866 } else 1867 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1868 } 1869 } 1870 1871 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1872 { 1873 int ret = 0; 1874 1875 trace_kvm_apic_write(reg, val); 1876 1877 switch (reg) { 1878 case APIC_ID: /* Local APIC ID */ 1879 if (!apic_x2apic_mode(apic)) 1880 kvm_apic_set_xapic_id(apic, val >> 24); 1881 else 1882 ret = 1; 1883 break; 1884 1885 case APIC_TASKPRI: 1886 report_tpr_access(apic, true); 1887 apic_set_tpr(apic, val & 0xff); 1888 break; 1889 1890 case APIC_EOI: 1891 apic_set_eoi(apic); 1892 break; 1893 1894 case APIC_LDR: 1895 if (!apic_x2apic_mode(apic)) 1896 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1897 else 1898 ret = 1; 1899 break; 1900 1901 case APIC_DFR: 1902 if (!apic_x2apic_mode(apic)) { 1903 kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1904 recalculate_apic_map(apic->vcpu->kvm); 1905 } else 1906 ret = 1; 1907 break; 1908 1909 case APIC_SPIV: { 1910 u32 mask = 0x3ff; 1911 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1912 mask |= APIC_SPIV_DIRECTED_EOI; 1913 apic_set_spiv(apic, val & mask); 1914 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1915 int i; 1916 u32 lvt_val; 1917 1918 for (i = 0; i < KVM_APIC_LVT_NUM; i++) { 1919 lvt_val = kvm_lapic_get_reg(apic, 1920 APIC_LVTT + 0x10 * i); 1921 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, 1922 lvt_val | APIC_LVT_MASKED); 1923 } 1924 apic_update_lvtt(apic); 1925 atomic_set(&apic->lapic_timer.pending, 0); 1926 1927 } 1928 break; 1929 } 1930 case APIC_ICR: 1931 /* No delay here, so we always clear the pending bit */ 1932 kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1933 apic_send_ipi(apic); 1934 break; 1935 1936 case APIC_ICR2: 1937 if (!apic_x2apic_mode(apic)) 1938 val &= 0xff000000; 1939 kvm_lapic_set_reg(apic, APIC_ICR2, val); 1940 break; 1941 1942 case APIC_LVT0: 1943 apic_manage_nmi_watchdog(apic, val); 1944 /* fall through */ 1945 case APIC_LVTTHMR: 1946 case APIC_LVTPC: 1947 case APIC_LVT1: 1948 case APIC_LVTERR: 1949 /* TODO: Check vector */ 1950 if (!kvm_apic_sw_enabled(apic)) 1951 val |= APIC_LVT_MASKED; 1952 1953 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1954 kvm_lapic_set_reg(apic, reg, val); 1955 1956 break; 1957 1958 case APIC_LVTT: 1959 if (!kvm_apic_sw_enabled(apic)) 1960 val |= APIC_LVT_MASKED; 1961 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1962 kvm_lapic_set_reg(apic, APIC_LVTT, val); 1963 apic_update_lvtt(apic); 1964 break; 1965 1966 case APIC_TMICT: 1967 if (apic_lvtt_tscdeadline(apic)) 1968 break; 1969 1970 hrtimer_cancel(&apic->lapic_timer.timer); 1971 kvm_lapic_set_reg(apic, APIC_TMICT, val); 1972 start_apic_timer(apic); 1973 break; 1974 1975 case APIC_TDCR: { 1976 uint32_t old_divisor = apic->divide_count; 1977 1978 if (val & 4) 1979 apic_debug("KVM_WRITE:TDCR %x\n", val); 1980 kvm_lapic_set_reg(apic, APIC_TDCR, val); 1981 update_divide_count(apic); 1982 if (apic->divide_count != old_divisor && 1983 apic->lapic_timer.period) { 1984 hrtimer_cancel(&apic->lapic_timer.timer); 1985 update_target_expiration(apic, old_divisor); 1986 restart_apic_timer(apic); 1987 } 1988 break; 1989 } 1990 case APIC_ESR: 1991 if (apic_x2apic_mode(apic) && val != 0) { 1992 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1993 ret = 1; 1994 } 1995 break; 1996 1997 case APIC_SELF_IPI: 1998 if (apic_x2apic_mode(apic)) { 1999 kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 2000 } else 2001 ret = 1; 2002 break; 2003 default: 2004 ret = 1; 2005 break; 2006 } 2007 if (ret) 2008 apic_debug("Local APIC Write to read-only register %x\n", reg); 2009 return ret; 2010 } 2011 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); 2012 2013 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 2014 gpa_t address, int len, const void *data) 2015 { 2016 struct kvm_lapic *apic = to_lapic(this); 2017 unsigned int offset = address - apic->base_address; 2018 u32 val; 2019 2020 if (!apic_mmio_in_range(apic, address)) 2021 return -EOPNOTSUPP; 2022 2023 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 2024 if (!kvm_check_has_quirk(vcpu->kvm, 2025 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 2026 return -EOPNOTSUPP; 2027 2028 return 0; 2029 } 2030 2031 /* 2032 * APIC register must be aligned on 128-bits boundary. 2033 * 32/64/128 bits registers must be accessed thru 32 bits. 2034 * Refer SDM 8.4.1 2035 */ 2036 if (len != 4 || (offset & 0xf)) { 2037 /* Don't shout loud, $infamous_os would cause only noise. */ 2038 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 2039 return 0; 2040 } 2041 2042 val = *(u32*)data; 2043 2044 /* too common printing */ 2045 if (offset != APIC_EOI) 2046 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 2047 "0x%x\n", __func__, offset, len, val); 2048 2049 kvm_lapic_reg_write(apic, offset, val); 2050 2051 return 0; 2052 } 2053 2054 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 2055 { 2056 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 2057 } 2058 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 2059 2060 /* emulate APIC access in a trap manner */ 2061 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2062 { 2063 u32 val = 0; 2064 2065 /* hw has done the conditional check and inst decode */ 2066 offset &= 0xff0; 2067 2068 kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); 2069 2070 /* TODO: optimize to just emulate side effect w/o one more write */ 2071 kvm_lapic_reg_write(vcpu->arch.apic, offset, val); 2072 } 2073 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 2074 2075 void kvm_free_lapic(struct kvm_vcpu *vcpu) 2076 { 2077 struct kvm_lapic *apic = vcpu->arch.apic; 2078 2079 if (!vcpu->arch.apic) 2080 return; 2081 2082 hrtimer_cancel(&apic->lapic_timer.timer); 2083 2084 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 2085 static_key_slow_dec_deferred(&apic_hw_disabled); 2086 2087 if (!apic->sw_enabled) 2088 static_key_slow_dec_deferred(&apic_sw_disabled); 2089 2090 if (apic->regs) 2091 free_page((unsigned long)apic->regs); 2092 2093 kfree(apic); 2094 } 2095 2096 /* 2097 *---------------------------------------------------------------------- 2098 * LAPIC interface 2099 *---------------------------------------------------------------------- 2100 */ 2101 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 2102 { 2103 struct kvm_lapic *apic = vcpu->arch.apic; 2104 2105 if (!lapic_in_kernel(vcpu) || 2106 !apic_lvtt_tscdeadline(apic)) 2107 return 0; 2108 2109 return apic->lapic_timer.tscdeadline; 2110 } 2111 2112 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 2113 { 2114 struct kvm_lapic *apic = vcpu->arch.apic; 2115 2116 if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || 2117 apic_lvtt_period(apic)) 2118 return; 2119 2120 hrtimer_cancel(&apic->lapic_timer.timer); 2121 apic->lapic_timer.tscdeadline = data; 2122 start_apic_timer(apic); 2123 } 2124 2125 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 2126 { 2127 struct kvm_lapic *apic = vcpu->arch.apic; 2128 2129 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 2130 | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); 2131 } 2132 2133 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 2134 { 2135 u64 tpr; 2136 2137 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 2138 2139 return (tpr & 0xf0) >> 4; 2140 } 2141 2142 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 2143 { 2144 u64 old_value = vcpu->arch.apic_base; 2145 struct kvm_lapic *apic = vcpu->arch.apic; 2146 2147 if (!apic) 2148 value |= MSR_IA32_APICBASE_BSP; 2149 2150 vcpu->arch.apic_base = value; 2151 2152 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) 2153 kvm_update_cpuid(vcpu); 2154 2155 if (!apic) 2156 return; 2157 2158 /* update jump label if enable bit changes */ 2159 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 2160 if (value & MSR_IA32_APICBASE_ENABLE) { 2161 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2162 static_key_slow_dec_deferred(&apic_hw_disabled); 2163 } else { 2164 static_key_slow_inc(&apic_hw_disabled.key); 2165 recalculate_apic_map(vcpu->kvm); 2166 } 2167 } 2168 2169 if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) 2170 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); 2171 2172 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) 2173 kvm_x86_ops->set_virtual_apic_mode(vcpu); 2174 2175 apic->base_address = apic->vcpu->arch.apic_base & 2176 MSR_IA32_APICBASE_BASE; 2177 2178 if ((value & MSR_IA32_APICBASE_ENABLE) && 2179 apic->base_address != APIC_DEFAULT_PHYS_BASE) 2180 pr_warn_once("APIC base relocation is unsupported by KVM"); 2181 2182 /* with FSB delivery interrupt, we can restart APIC functionality */ 2183 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 2184 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 2185 2186 } 2187 2188 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2189 { 2190 struct kvm_lapic *apic = vcpu->arch.apic; 2191 int i; 2192 2193 if (!apic) 2194 return; 2195 2196 apic_debug("%s\n", __func__); 2197 2198 /* Stop the timer in case it's a reset to an active apic */ 2199 hrtimer_cancel(&apic->lapic_timer.timer); 2200 2201 if (!init_event) { 2202 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | 2203 MSR_IA32_APICBASE_ENABLE); 2204 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2205 } 2206 kvm_apic_set_version(apic->vcpu); 2207 2208 for (i = 0; i < KVM_APIC_LVT_NUM; i++) 2209 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 2210 apic_update_lvtt(apic); 2211 if (kvm_vcpu_is_reset_bsp(vcpu) && 2212 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 2213 kvm_lapic_set_reg(apic, APIC_LVT0, 2214 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 2215 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2216 2217 kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU); 2218 apic_set_spiv(apic, 0xff); 2219 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); 2220 if (!apic_x2apic_mode(apic)) 2221 kvm_apic_set_ldr(apic, 0); 2222 kvm_lapic_set_reg(apic, APIC_ESR, 0); 2223 kvm_lapic_set_reg(apic, APIC_ICR, 0); 2224 kvm_lapic_set_reg(apic, APIC_ICR2, 0); 2225 kvm_lapic_set_reg(apic, APIC_TDCR, 0); 2226 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 2227 for (i = 0; i < 8; i++) { 2228 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 2229 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 2230 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 2231 } 2232 apic->irr_pending = vcpu->arch.apicv_active; 2233 apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; 2234 apic->highest_isr_cache = -1; 2235 update_divide_count(apic); 2236 atomic_set(&apic->lapic_timer.pending, 0); 2237 if (kvm_vcpu_is_bsp(vcpu)) 2238 kvm_lapic_set_base(vcpu, 2239 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 2240 vcpu->arch.pv_eoi.msr_val = 0; 2241 apic_update_ppr(apic); 2242 if (vcpu->arch.apicv_active) { 2243 kvm_x86_ops->apicv_post_state_restore(vcpu); 2244 kvm_x86_ops->hwapic_irr_update(vcpu, -1); 2245 kvm_x86_ops->hwapic_isr_update(vcpu, -1); 2246 } 2247 2248 vcpu->arch.apic_arb_prio = 0; 2249 vcpu->arch.apic_attention = 0; 2250 2251 apic_debug("%s: vcpu=%p, id=0x%x, base_msr=" 2252 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 2253 vcpu, kvm_lapic_get_reg(apic, APIC_ID), 2254 vcpu->arch.apic_base, apic->base_address); 2255 } 2256 2257 /* 2258 *---------------------------------------------------------------------- 2259 * timer interface 2260 *---------------------------------------------------------------------- 2261 */ 2262 2263 static bool lapic_is_periodic(struct kvm_lapic *apic) 2264 { 2265 return apic_lvtt_period(apic); 2266 } 2267 2268 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 2269 { 2270 struct kvm_lapic *apic = vcpu->arch.apic; 2271 2272 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) 2273 return atomic_read(&apic->lapic_timer.pending); 2274 2275 return 0; 2276 } 2277 2278 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 2279 { 2280 u32 reg = kvm_lapic_get_reg(apic, lvt_type); 2281 int vector, mode, trig_mode; 2282 2283 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 2284 vector = reg & APIC_VECTOR_MASK; 2285 mode = reg & APIC_MODE_MASK; 2286 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 2287 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 2288 NULL); 2289 } 2290 return 0; 2291 } 2292 2293 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 2294 { 2295 struct kvm_lapic *apic = vcpu->arch.apic; 2296 2297 if (apic) 2298 kvm_apic_local_deliver(apic, APIC_LVT0); 2299 } 2300 2301 static const struct kvm_io_device_ops apic_mmio_ops = { 2302 .read = apic_mmio_read, 2303 .write = apic_mmio_write, 2304 }; 2305 2306 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 2307 { 2308 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 2309 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 2310 2311 apic_timer_expired(apic); 2312 2313 if (lapic_is_periodic(apic)) { 2314 advance_periodic_target_expiration(apic); 2315 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 2316 return HRTIMER_RESTART; 2317 } else 2318 return HRTIMER_NORESTART; 2319 } 2320 2321 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) 2322 { 2323 struct kvm_lapic *apic; 2324 2325 ASSERT(vcpu != NULL); 2326 apic_debug("apic_init %d\n", vcpu->vcpu_id); 2327 2328 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2329 if (!apic) 2330 goto nomem; 2331 2332 vcpu->arch.apic = apic; 2333 2334 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 2335 if (!apic->regs) { 2336 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 2337 vcpu->vcpu_id); 2338 goto nomem_free_apic; 2339 } 2340 apic->vcpu = vcpu; 2341 2342 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2343 HRTIMER_MODE_ABS_PINNED); 2344 apic->lapic_timer.timer.function = apic_timer_fn; 2345 if (timer_advance_ns == -1) { 2346 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; 2347 apic->lapic_timer.timer_advance_adjust_done = false; 2348 } else { 2349 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 2350 apic->lapic_timer.timer_advance_adjust_done = true; 2351 } 2352 2353 2354 /* 2355 * APIC is created enabled. This will prevent kvm_lapic_set_base from 2356 * thinking that APIC state has changed. 2357 */ 2358 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2359 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2360 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2361 2362 return 0; 2363 nomem_free_apic: 2364 kfree(apic); 2365 vcpu->arch.apic = NULL; 2366 nomem: 2367 return -ENOMEM; 2368 } 2369 2370 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 2371 { 2372 struct kvm_lapic *apic = vcpu->arch.apic; 2373 u32 ppr; 2374 2375 if (!kvm_apic_hw_enabled(apic)) 2376 return -1; 2377 2378 __apic_update_ppr(apic, &ppr); 2379 return apic_has_interrupt_for_ppr(apic, ppr); 2380 } 2381 2382 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 2383 { 2384 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); 2385 int r = 0; 2386 2387 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 2388 r = 1; 2389 if ((lvt0 & APIC_LVT_MASKED) == 0 && 2390 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 2391 r = 1; 2392 return r; 2393 } 2394 2395 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 2396 { 2397 struct kvm_lapic *apic = vcpu->arch.apic; 2398 2399 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2400 kvm_apic_local_deliver(apic, APIC_LVTT); 2401 if (apic_lvtt_tscdeadline(apic)) 2402 apic->lapic_timer.tscdeadline = 0; 2403 if (apic_lvtt_oneshot(apic)) { 2404 apic->lapic_timer.tscdeadline = 0; 2405 apic->lapic_timer.target_expiration = 0; 2406 } 2407 atomic_set(&apic->lapic_timer.pending, 0); 2408 } 2409 } 2410 2411 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 2412 { 2413 int vector = kvm_apic_has_interrupt(vcpu); 2414 struct kvm_lapic *apic = vcpu->arch.apic; 2415 u32 ppr; 2416 2417 if (vector == -1) 2418 return -1; 2419 2420 /* 2421 * We get here even with APIC virtualization enabled, if doing 2422 * nested virtualization and L1 runs with the "acknowledge interrupt 2423 * on exit" mode. Then we cannot inject the interrupt via RVI, 2424 * because the process would deliver it through the IDT. 2425 */ 2426 2427 apic_clear_irr(vector, apic); 2428 if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { 2429 /* 2430 * For auto-EOI interrupts, there might be another pending 2431 * interrupt above PPR, so check whether to raise another 2432 * KVM_REQ_EVENT. 2433 */ 2434 apic_update_ppr(apic); 2435 } else { 2436 /* 2437 * For normal interrupts, PPR has been raised and there cannot 2438 * be a higher-priority pending interrupt---except if there was 2439 * a concurrent interrupt injection, but that would have 2440 * triggered KVM_REQ_EVENT already. 2441 */ 2442 apic_set_isr(vector, apic); 2443 __apic_update_ppr(apic, &ppr); 2444 } 2445 2446 return vector; 2447 } 2448 2449 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, 2450 struct kvm_lapic_state *s, bool set) 2451 { 2452 if (apic_x2apic_mode(vcpu->arch.apic)) { 2453 u32 *id = (u32 *)(s->regs + APIC_ID); 2454 u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2455 2456 if (vcpu->kvm->arch.x2apic_format) { 2457 if (*id != vcpu->vcpu_id) 2458 return -EINVAL; 2459 } else { 2460 if (set) 2461 *id >>= 24; 2462 else 2463 *id <<= 24; 2464 } 2465 2466 /* In x2APIC mode, the LDR is fixed and based on the id */ 2467 if (set) 2468 *ldr = kvm_apic_calc_x2apic_ldr(*id); 2469 } 2470 2471 return 0; 2472 } 2473 2474 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2475 { 2476 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); 2477 return kvm_apic_state_fixup(vcpu, s, false); 2478 } 2479 2480 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2481 { 2482 struct kvm_lapic *apic = vcpu->arch.apic; 2483 int r; 2484 2485 2486 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 2487 /* set SPIV separately to get count of SW disabled APICs right */ 2488 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 2489 2490 r = kvm_apic_state_fixup(vcpu, s, true); 2491 if (r) 2492 return r; 2493 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); 2494 2495 recalculate_apic_map(vcpu->kvm); 2496 kvm_apic_set_version(vcpu); 2497 2498 apic_update_ppr(apic); 2499 hrtimer_cancel(&apic->lapic_timer.timer); 2500 apic_update_lvtt(apic); 2501 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2502 update_divide_count(apic); 2503 start_apic_timer(apic); 2504 apic->irr_pending = true; 2505 apic->isr_count = vcpu->arch.apicv_active ? 2506 1 : count_vectors(apic->regs + APIC_ISR); 2507 apic->highest_isr_cache = -1; 2508 if (vcpu->arch.apicv_active) { 2509 kvm_x86_ops->apicv_post_state_restore(vcpu); 2510 kvm_x86_ops->hwapic_irr_update(vcpu, 2511 apic_find_highest_irr(apic)); 2512 kvm_x86_ops->hwapic_isr_update(vcpu, 2513 apic_find_highest_isr(apic)); 2514 } 2515 kvm_make_request(KVM_REQ_EVENT, vcpu); 2516 if (ioapic_in_kernel(vcpu->kvm)) 2517 kvm_rtc_eoi_tracking_restore_one(vcpu); 2518 2519 vcpu->arch.apic_arb_prio = 0; 2520 2521 return 0; 2522 } 2523 2524 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 2525 { 2526 struct hrtimer *timer; 2527 2528 if (!lapic_in_kernel(vcpu)) 2529 return; 2530 2531 timer = &vcpu->arch.apic->lapic_timer.timer; 2532 if (hrtimer_cancel(timer)) 2533 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); 2534 } 2535 2536 /* 2537 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 2538 * 2539 * Detect whether guest triggered PV EOI since the 2540 * last entry. If yes, set EOI on guests's behalf. 2541 * Clear PV EOI in guest memory in any case. 2542 */ 2543 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 2544 struct kvm_lapic *apic) 2545 { 2546 bool pending; 2547 int vector; 2548 /* 2549 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 2550 * and KVM_PV_EOI_ENABLED in guest memory as follows: 2551 * 2552 * KVM_APIC_PV_EOI_PENDING is unset: 2553 * -> host disabled PV EOI. 2554 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 2555 * -> host enabled PV EOI, guest did not execute EOI yet. 2556 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 2557 * -> host enabled PV EOI, guest executed EOI. 2558 */ 2559 BUG_ON(!pv_eoi_enabled(vcpu)); 2560 pending = pv_eoi_get_pending(vcpu); 2561 /* 2562 * Clear pending bit in any case: it will be set again on vmentry. 2563 * While this might not be ideal from performance point of view, 2564 * this makes sure pv eoi is only enabled when we know it's safe. 2565 */ 2566 pv_eoi_clr_pending(vcpu); 2567 if (pending) 2568 return; 2569 vector = apic_set_eoi(apic); 2570 trace_kvm_pv_eoi(apic, vector); 2571 } 2572 2573 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 2574 { 2575 u32 data; 2576 2577 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 2578 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 2579 2580 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2581 return; 2582 2583 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2584 sizeof(u32))) 2585 return; 2586 2587 apic_set_tpr(vcpu->arch.apic, data & 0xff); 2588 } 2589 2590 /* 2591 * apic_sync_pv_eoi_to_guest - called before vmentry 2592 * 2593 * Detect whether it's safe to enable PV EOI and 2594 * if yes do so. 2595 */ 2596 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 2597 struct kvm_lapic *apic) 2598 { 2599 if (!pv_eoi_enabled(vcpu) || 2600 /* IRR set or many bits in ISR: could be nested. */ 2601 apic->irr_pending || 2602 /* Cache not set: could be safe but we don't bother. */ 2603 apic->highest_isr_cache == -1 || 2604 /* Need EOI to update ioapic. */ 2605 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { 2606 /* 2607 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 2608 * so we need not do anything here. 2609 */ 2610 return; 2611 } 2612 2613 pv_eoi_set_pending(apic->vcpu); 2614 } 2615 2616 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 2617 { 2618 u32 data, tpr; 2619 int max_irr, max_isr; 2620 struct kvm_lapic *apic = vcpu->arch.apic; 2621 2622 apic_sync_pv_eoi_to_guest(vcpu, apic); 2623 2624 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2625 return; 2626 2627 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff; 2628 max_irr = apic_find_highest_irr(apic); 2629 if (max_irr < 0) 2630 max_irr = 0; 2631 max_isr = apic_find_highest_isr(apic); 2632 if (max_isr < 0) 2633 max_isr = 0; 2634 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 2635 2636 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2637 sizeof(u32)); 2638 } 2639 2640 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 2641 { 2642 if (vapic_addr) { 2643 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2644 &vcpu->arch.apic->vapic_cache, 2645 vapic_addr, sizeof(u32))) 2646 return -EINVAL; 2647 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2648 } else { 2649 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2650 } 2651 2652 vcpu->arch.apic->vapic_addr = vapic_addr; 2653 return 0; 2654 } 2655 2656 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2657 { 2658 struct kvm_lapic *apic = vcpu->arch.apic; 2659 u32 reg = (msr - APIC_BASE_MSR) << 4; 2660 2661 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2662 return 1; 2663 2664 if (reg == APIC_ICR2) 2665 return 1; 2666 2667 /* if this is ICR write vector before command */ 2668 if (reg == APIC_ICR) 2669 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2670 return kvm_lapic_reg_write(apic, reg, (u32)data); 2671 } 2672 2673 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 2674 { 2675 struct kvm_lapic *apic = vcpu->arch.apic; 2676 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 2677 2678 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2679 return 1; 2680 2681 if (reg == APIC_DFR || reg == APIC_ICR2) { 2682 apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n", 2683 reg); 2684 return 1; 2685 } 2686 2687 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2688 return 1; 2689 if (reg == APIC_ICR) 2690 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2691 2692 *data = (((u64)high) << 32) | low; 2693 2694 return 0; 2695 } 2696 2697 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 2698 { 2699 struct kvm_lapic *apic = vcpu->arch.apic; 2700 2701 if (!lapic_in_kernel(vcpu)) 2702 return 1; 2703 2704 /* if this is ICR write vector before command */ 2705 if (reg == APIC_ICR) 2706 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2707 return kvm_lapic_reg_write(apic, reg, (u32)data); 2708 } 2709 2710 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 2711 { 2712 struct kvm_lapic *apic = vcpu->arch.apic; 2713 u32 low, high = 0; 2714 2715 if (!lapic_in_kernel(vcpu)) 2716 return 1; 2717 2718 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2719 return 1; 2720 if (reg == APIC_ICR) 2721 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2722 2723 *data = (((u64)high) << 32) | low; 2724 2725 return 0; 2726 } 2727 2728 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) 2729 { 2730 u64 addr = data & ~KVM_MSR_ENABLED; 2731 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; 2732 unsigned long new_len; 2733 2734 if (!IS_ALIGNED(addr, 4)) 2735 return 1; 2736 2737 vcpu->arch.pv_eoi.msr_val = data; 2738 if (!pv_eoi_enabled(vcpu)) 2739 return 0; 2740 2741 if (addr == ghc->gpa && len <= ghc->len) 2742 new_len = ghc->len; 2743 else 2744 new_len = len; 2745 2746 return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); 2747 } 2748 2749 void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 2750 { 2751 struct kvm_lapic *apic = vcpu->arch.apic; 2752 u8 sipi_vector; 2753 unsigned long pe; 2754 2755 if (!lapic_in_kernel(vcpu) || !apic->pending_events) 2756 return; 2757 2758 /* 2759 * INITs are latched while in SMM. Because an SMM CPU cannot 2760 * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs 2761 * and delay processing of INIT until the next RSM. 2762 */ 2763 if (is_smm(vcpu)) { 2764 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); 2765 if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) 2766 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 2767 return; 2768 } 2769 2770 pe = xchg(&apic->pending_events, 0); 2771 if (test_bit(KVM_APIC_INIT, &pe)) { 2772 kvm_vcpu_reset(vcpu, true); 2773 if (kvm_vcpu_is_bsp(apic->vcpu)) 2774 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2775 else 2776 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 2777 } 2778 if (test_bit(KVM_APIC_SIPI, &pe) && 2779 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 2780 /* evaluate pending_events before reading the vector */ 2781 smp_rmb(); 2782 sipi_vector = apic->sipi_vector; 2783 apic_debug("vcpu %d received sipi with vector # %x\n", 2784 vcpu->vcpu_id, sipi_vector); 2785 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 2786 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2787 } 2788 } 2789 2790 void kvm_lapic_init(void) 2791 { 2792 /* do not patch jump label more than once per second */ 2793 jump_label_rate_limit(&apic_hw_disabled, HZ); 2794 jump_label_rate_limit(&apic_sw_disabled, HZ); 2795 } 2796 2797 void kvm_lapic_exit(void) 2798 { 2799 static_key_deferred_flush(&apic_hw_disabled); 2800 static_key_deferred_flush(&apic_sw_disabled); 2801 } 2802