1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include <linux/jump_label.h> 38 #include "kvm_cache_regs.h" 39 #include "irq.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 #define APIC_BUS_CYCLE_NS 1 56 57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 58 #define apic_debug(fmt, arg...) 59 60 #define APIC_LVT_NUM 6 61 /* 14 is the version for Xeon and Pentium 8.4.8*/ 62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63 #define LAPIC_MMIO_LENGTH (1 << 12) 64 /* followed define is not in apicdef.h */ 65 #define APIC_SHORT_MASK 0xc0000 66 #define APIC_DEST_NOSHORT 0x0 67 #define APIC_DEST_MASK 0x800 68 #define MAX_APIC_VECTOR 256 69 #define APIC_VECTORS_PER_REG 32 70 71 #define VEC_POS(v) ((v) & (32 - 1)) 72 #define REG_POS(v) (((v) >> 5) << 4) 73 74 static unsigned int min_timer_period_us = 500; 75 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 76 77 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 *((u32 *) (apic->regs + reg_off)) = val; 80 } 81 82 static inline int apic_test_vector(int vec, void *bitmap) 83 { 84 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 85 } 86 87 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 88 { 89 struct kvm_lapic *apic = vcpu->arch.apic; 90 91 return apic_test_vector(vector, apic->regs + APIC_ISR) || 92 apic_test_vector(vector, apic->regs + APIC_IRR); 93 } 94 95 static inline void apic_set_vector(int vec, void *bitmap) 96 { 97 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 98 } 99 100 static inline void apic_clear_vector(int vec, void *bitmap) 101 { 102 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 103 } 104 105 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 106 { 107 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 108 } 109 110 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 111 { 112 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 113 } 114 115 struct static_key_deferred apic_hw_disabled __read_mostly; 116 struct static_key_deferred apic_sw_disabled __read_mostly; 117 118 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 119 { 120 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { 121 if (val & APIC_SPIV_APIC_ENABLED) 122 static_key_slow_dec_deferred(&apic_sw_disabled); 123 else 124 static_key_slow_inc(&apic_sw_disabled.key); 125 } 126 apic_set_reg(apic, APIC_SPIV, val); 127 } 128 129 static inline int apic_enabled(struct kvm_lapic *apic) 130 { 131 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 132 } 133 134 #define LVT_MASK \ 135 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 136 137 #define LINT_MASK \ 138 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 139 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 140 141 static inline int kvm_apic_id(struct kvm_lapic *apic) 142 { 143 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 144 } 145 146 #define KVM_X2APIC_CID_BITS 0 147 148 static void recalculate_apic_map(struct kvm *kvm) 149 { 150 struct kvm_apic_map *new, *old = NULL; 151 struct kvm_vcpu *vcpu; 152 int i; 153 154 new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); 155 156 mutex_lock(&kvm->arch.apic_map_lock); 157 158 if (!new) 159 goto out; 160 161 new->ldr_bits = 8; 162 /* flat mode is default */ 163 new->cid_shift = 8; 164 new->cid_mask = 0; 165 new->lid_mask = 0xff; 166 167 kvm_for_each_vcpu(i, vcpu, kvm) { 168 struct kvm_lapic *apic = vcpu->arch.apic; 169 u16 cid, lid; 170 u32 ldr; 171 172 if (!kvm_apic_present(vcpu)) 173 continue; 174 175 /* 176 * All APICs have to be configured in the same mode by an OS. 177 * We take advatage of this while building logical id loockup 178 * table. After reset APICs are in xapic/flat mode, so if we 179 * find apic with different setting we assume this is the mode 180 * OS wants all apics to be in; build lookup table accordingly. 181 */ 182 if (apic_x2apic_mode(apic)) { 183 new->ldr_bits = 32; 184 new->cid_shift = 16; 185 new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1; 186 new->lid_mask = 0xffff; 187 } else if (kvm_apic_sw_enabled(apic) && 188 !new->cid_mask /* flat mode */ && 189 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { 190 new->cid_shift = 4; 191 new->cid_mask = 0xf; 192 new->lid_mask = 0xf; 193 } 194 195 new->phys_map[kvm_apic_id(apic)] = apic; 196 197 ldr = kvm_apic_get_reg(apic, APIC_LDR); 198 cid = apic_cluster_id(new, ldr); 199 lid = apic_logical_id(new, ldr); 200 201 if (lid) 202 new->logical_map[cid][ffs(lid) - 1] = apic; 203 } 204 out: 205 old = rcu_dereference_protected(kvm->arch.apic_map, 206 lockdep_is_held(&kvm->arch.apic_map_lock)); 207 rcu_assign_pointer(kvm->arch.apic_map, new); 208 mutex_unlock(&kvm->arch.apic_map_lock); 209 210 if (old) 211 kfree_rcu(old, rcu); 212 213 kvm_vcpu_request_scan_ioapic(kvm); 214 } 215 216 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 217 { 218 apic_set_reg(apic, APIC_ID, id << 24); 219 recalculate_apic_map(apic->vcpu->kvm); 220 } 221 222 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 223 { 224 apic_set_reg(apic, APIC_LDR, id); 225 recalculate_apic_map(apic->vcpu->kvm); 226 } 227 228 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 229 { 230 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 231 } 232 233 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 234 { 235 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 236 } 237 238 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 239 { 240 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 241 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 242 } 243 244 static inline int apic_lvtt_period(struct kvm_lapic *apic) 245 { 246 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 247 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 248 } 249 250 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 251 { 252 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 253 apic->lapic_timer.timer_mode_mask) == 254 APIC_LVT_TIMER_TSCDEADLINE); 255 } 256 257 static inline int apic_lvt_nmi_mode(u32 lvt_val) 258 { 259 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 260 } 261 262 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 263 { 264 struct kvm_lapic *apic = vcpu->arch.apic; 265 struct kvm_cpuid_entry2 *feat; 266 u32 v = APIC_VERSION; 267 268 if (!kvm_vcpu_has_lapic(vcpu)) 269 return; 270 271 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 272 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 273 v |= APIC_LVR_DIRECTED_EOI; 274 apic_set_reg(apic, APIC_LVR, v); 275 } 276 277 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 278 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 279 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 280 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 281 LINT_MASK, LINT_MASK, /* LVT0-1 */ 282 LVT_MASK /* LVTERR */ 283 }; 284 285 static int find_highest_vector(void *bitmap) 286 { 287 int vec; 288 u32 *reg; 289 290 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 291 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 292 reg = bitmap + REG_POS(vec); 293 if (*reg) 294 return fls(*reg) - 1 + vec; 295 } 296 297 return -1; 298 } 299 300 static u8 count_vectors(void *bitmap) 301 { 302 int vec; 303 u32 *reg; 304 u8 count = 0; 305 306 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 307 reg = bitmap + REG_POS(vec); 308 count += hweight32(*reg); 309 } 310 311 return count; 312 } 313 314 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) 315 { 316 u32 i, pir_val; 317 struct kvm_lapic *apic = vcpu->arch.apic; 318 319 for (i = 0; i <= 7; i++) { 320 pir_val = xchg(&pir[i], 0); 321 if (pir_val) 322 *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; 323 } 324 } 325 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 326 327 static inline void apic_set_irr(int vec, struct kvm_lapic *apic) 328 { 329 apic->irr_pending = true; 330 apic_set_vector(vec, apic->regs + APIC_IRR); 331 } 332 333 static inline int apic_search_irr(struct kvm_lapic *apic) 334 { 335 return find_highest_vector(apic->regs + APIC_IRR); 336 } 337 338 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 339 { 340 int result; 341 342 /* 343 * Note that irr_pending is just a hint. It will be always 344 * true with virtual interrupt delivery enabled. 345 */ 346 if (!apic->irr_pending) 347 return -1; 348 349 kvm_x86_ops->sync_pir_to_irr(apic->vcpu); 350 result = apic_search_irr(apic); 351 ASSERT(result == -1 || result >= 16); 352 353 return result; 354 } 355 356 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 357 { 358 apic->irr_pending = false; 359 apic_clear_vector(vec, apic->regs + APIC_IRR); 360 if (apic_search_irr(apic) != -1) 361 apic->irr_pending = true; 362 } 363 364 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 365 { 366 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 367 ++apic->isr_count; 368 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 369 /* 370 * ISR (in service register) bit is set when injecting an interrupt. 371 * The highest vector is injected. Thus the latest bit set matches 372 * the highest bit in ISR. 373 */ 374 apic->highest_isr_cache = vec; 375 } 376 377 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 378 { 379 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 380 --apic->isr_count; 381 BUG_ON(apic->isr_count < 0); 382 apic->highest_isr_cache = -1; 383 } 384 385 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 386 { 387 int highest_irr; 388 389 /* This may race with setting of irr in __apic_accept_irq() and 390 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 391 * will cause vmexit immediately and the value will be recalculated 392 * on the next vmentry. 393 */ 394 if (!kvm_vcpu_has_lapic(vcpu)) 395 return 0; 396 highest_irr = apic_find_highest_irr(vcpu->arch.apic); 397 398 return highest_irr; 399 } 400 401 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 402 int vector, int level, int trig_mode, 403 unsigned long *dest_map); 404 405 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 406 unsigned long *dest_map) 407 { 408 struct kvm_lapic *apic = vcpu->arch.apic; 409 410 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 411 irq->level, irq->trig_mode, dest_map); 412 } 413 414 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 415 { 416 417 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 418 sizeof(val)); 419 } 420 421 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 422 { 423 424 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 425 sizeof(*val)); 426 } 427 428 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 429 { 430 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 431 } 432 433 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 434 { 435 u8 val; 436 if (pv_eoi_get_user(vcpu, &val) < 0) 437 apic_debug("Can't read EOI MSR value: 0x%llx\n", 438 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 439 return val & 0x1; 440 } 441 442 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 443 { 444 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 445 apic_debug("Can't set EOI MSR value: 0x%llx\n", 446 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 447 return; 448 } 449 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 450 } 451 452 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 453 { 454 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 455 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 456 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 457 return; 458 } 459 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 460 } 461 462 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 463 { 464 int result; 465 466 /* Note that isr_count is always 1 with vid enabled */ 467 if (!apic->isr_count) 468 return -1; 469 if (likely(apic->highest_isr_cache != -1)) 470 return apic->highest_isr_cache; 471 472 result = find_highest_vector(apic->regs + APIC_ISR); 473 ASSERT(result == -1 || result >= 16); 474 475 return result; 476 } 477 478 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) 479 { 480 struct kvm_lapic *apic = vcpu->arch.apic; 481 int i; 482 483 for (i = 0; i < 8; i++) 484 apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); 485 } 486 487 static void apic_update_ppr(struct kvm_lapic *apic) 488 { 489 u32 tpr, isrv, ppr, old_ppr; 490 int isr; 491 492 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 493 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 494 isr = apic_find_highest_isr(apic); 495 isrv = (isr != -1) ? isr : 0; 496 497 if ((tpr & 0xf0) >= (isrv & 0xf0)) 498 ppr = tpr & 0xff; 499 else 500 ppr = isrv & 0xf0; 501 502 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 503 apic, ppr, isr, isrv); 504 505 if (old_ppr != ppr) { 506 apic_set_reg(apic, APIC_PROCPRI, ppr); 507 if (ppr < old_ppr) 508 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 509 } 510 } 511 512 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 513 { 514 apic_set_reg(apic, APIC_TASKPRI, tpr); 515 apic_update_ppr(apic); 516 } 517 518 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 519 { 520 return dest == 0xff || kvm_apic_id(apic) == dest; 521 } 522 523 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 524 { 525 int result = 0; 526 u32 logical_id; 527 528 if (apic_x2apic_mode(apic)) { 529 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 530 return logical_id & mda; 531 } 532 533 logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); 534 535 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 536 case APIC_DFR_FLAT: 537 if (logical_id & mda) 538 result = 1; 539 break; 540 case APIC_DFR_CLUSTER: 541 if (((logical_id >> 4) == (mda >> 0x4)) 542 && (logical_id & mda & 0xf)) 543 result = 1; 544 break; 545 default: 546 apic_debug("Bad DFR vcpu %d: %08x\n", 547 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 548 break; 549 } 550 551 return result; 552 } 553 554 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 555 int short_hand, int dest, int dest_mode) 556 { 557 int result = 0; 558 struct kvm_lapic *target = vcpu->arch.apic; 559 560 apic_debug("target %p, source %p, dest 0x%x, " 561 "dest_mode 0x%x, short_hand 0x%x\n", 562 target, source, dest, dest_mode, short_hand); 563 564 ASSERT(target); 565 switch (short_hand) { 566 case APIC_DEST_NOSHORT: 567 if (dest_mode == 0) 568 /* Physical mode. */ 569 result = kvm_apic_match_physical_addr(target, dest); 570 else 571 /* Logical mode. */ 572 result = kvm_apic_match_logical_addr(target, dest); 573 break; 574 case APIC_DEST_SELF: 575 result = (target == source); 576 break; 577 case APIC_DEST_ALLINC: 578 result = 1; 579 break; 580 case APIC_DEST_ALLBUT: 581 result = (target != source); 582 break; 583 default: 584 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 585 short_hand); 586 break; 587 } 588 589 return result; 590 } 591 592 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 593 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) 594 { 595 struct kvm_apic_map *map; 596 unsigned long bitmap = 1; 597 struct kvm_lapic **dst; 598 int i; 599 bool ret = false; 600 601 *r = -1; 602 603 if (irq->shorthand == APIC_DEST_SELF) { 604 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 605 return true; 606 } 607 608 if (irq->shorthand) 609 return false; 610 611 rcu_read_lock(); 612 map = rcu_dereference(kvm->arch.apic_map); 613 614 if (!map) 615 goto out; 616 617 if (irq->dest_mode == 0) { /* physical mode */ 618 if (irq->delivery_mode == APIC_DM_LOWEST || 619 irq->dest_id == 0xff) 620 goto out; 621 dst = &map->phys_map[irq->dest_id & 0xff]; 622 } else { 623 u32 mda = irq->dest_id << (32 - map->ldr_bits); 624 625 dst = map->logical_map[apic_cluster_id(map, mda)]; 626 627 bitmap = apic_logical_id(map, mda); 628 629 if (irq->delivery_mode == APIC_DM_LOWEST) { 630 int l = -1; 631 for_each_set_bit(i, &bitmap, 16) { 632 if (!dst[i]) 633 continue; 634 if (l < 0) 635 l = i; 636 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 637 l = i; 638 } 639 640 bitmap = (l >= 0) ? 1 << l : 0; 641 } 642 } 643 644 for_each_set_bit(i, &bitmap, 16) { 645 if (!dst[i]) 646 continue; 647 if (*r < 0) 648 *r = 0; 649 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 650 } 651 652 ret = true; 653 out: 654 rcu_read_unlock(); 655 return ret; 656 } 657 658 /* 659 * Add a pending IRQ into lapic. 660 * Return 1 if successfully added and 0 if discarded. 661 */ 662 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 663 int vector, int level, int trig_mode, 664 unsigned long *dest_map) 665 { 666 int result = 0; 667 struct kvm_vcpu *vcpu = apic->vcpu; 668 669 switch (delivery_mode) { 670 case APIC_DM_LOWEST: 671 vcpu->arch.apic_arb_prio++; 672 case APIC_DM_FIXED: 673 /* FIXME add logic for vcpu on reset */ 674 if (unlikely(!apic_enabled(apic))) 675 break; 676 677 result = 1; 678 679 if (dest_map) 680 __set_bit(vcpu->vcpu_id, dest_map); 681 682 if (kvm_x86_ops->deliver_posted_interrupt) 683 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); 684 else { 685 apic_set_irr(vector, apic); 686 687 kvm_make_request(KVM_REQ_EVENT, vcpu); 688 kvm_vcpu_kick(vcpu); 689 } 690 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 691 trig_mode, vector, false); 692 break; 693 694 case APIC_DM_REMRD: 695 result = 1; 696 vcpu->arch.pv.pv_unhalted = 1; 697 kvm_make_request(KVM_REQ_EVENT, vcpu); 698 kvm_vcpu_kick(vcpu); 699 break; 700 701 case APIC_DM_SMI: 702 apic_debug("Ignoring guest SMI\n"); 703 break; 704 705 case APIC_DM_NMI: 706 result = 1; 707 kvm_inject_nmi(vcpu); 708 kvm_vcpu_kick(vcpu); 709 break; 710 711 case APIC_DM_INIT: 712 if (!trig_mode || level) { 713 result = 1; 714 /* assumes that there are only KVM_APIC_INIT/SIPI */ 715 apic->pending_events = (1UL << KVM_APIC_INIT); 716 /* make sure pending_events is visible before sending 717 * the request */ 718 smp_wmb(); 719 kvm_make_request(KVM_REQ_EVENT, vcpu); 720 kvm_vcpu_kick(vcpu); 721 } else { 722 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 723 vcpu->vcpu_id); 724 } 725 break; 726 727 case APIC_DM_STARTUP: 728 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 729 vcpu->vcpu_id, vector); 730 result = 1; 731 apic->sipi_vector = vector; 732 /* make sure sipi_vector is visible for the receiver */ 733 smp_wmb(); 734 set_bit(KVM_APIC_SIPI, &apic->pending_events); 735 kvm_make_request(KVM_REQ_EVENT, vcpu); 736 kvm_vcpu_kick(vcpu); 737 break; 738 739 case APIC_DM_EXTINT: 740 /* 741 * Should only be called by kvm_apic_local_deliver() with LVT0, 742 * before NMI watchdog was enabled. Already handled by 743 * kvm_apic_accept_pic_intr(). 744 */ 745 break; 746 747 default: 748 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 749 delivery_mode); 750 break; 751 } 752 return result; 753 } 754 755 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 756 { 757 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 758 } 759 760 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 761 { 762 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 763 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 764 int trigger_mode; 765 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 766 trigger_mode = IOAPIC_LEVEL_TRIG; 767 else 768 trigger_mode = IOAPIC_EDGE_TRIG; 769 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 770 } 771 } 772 773 static int apic_set_eoi(struct kvm_lapic *apic) 774 { 775 int vector = apic_find_highest_isr(apic); 776 777 trace_kvm_eoi(apic, vector); 778 779 /* 780 * Not every write EOI will has corresponding ISR, 781 * one example is when Kernel check timer on setup_IO_APIC 782 */ 783 if (vector == -1) 784 return vector; 785 786 apic_clear_isr(vector, apic); 787 apic_update_ppr(apic); 788 789 kvm_ioapic_send_eoi(apic, vector); 790 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 791 return vector; 792 } 793 794 /* 795 * this interface assumes a trap-like exit, which has already finished 796 * desired side effect including vISR and vPPR update. 797 */ 798 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 799 { 800 struct kvm_lapic *apic = vcpu->arch.apic; 801 802 trace_kvm_eoi(apic, vector); 803 804 kvm_ioapic_send_eoi(apic, vector); 805 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 806 } 807 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 808 809 static void apic_send_ipi(struct kvm_lapic *apic) 810 { 811 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 812 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 813 struct kvm_lapic_irq irq; 814 815 irq.vector = icr_low & APIC_VECTOR_MASK; 816 irq.delivery_mode = icr_low & APIC_MODE_MASK; 817 irq.dest_mode = icr_low & APIC_DEST_MASK; 818 irq.level = icr_low & APIC_INT_ASSERT; 819 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 820 irq.shorthand = icr_low & APIC_SHORT_MASK; 821 if (apic_x2apic_mode(apic)) 822 irq.dest_id = icr_high; 823 else 824 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 825 826 trace_kvm_apic_ipi(icr_low, irq.dest_id); 827 828 apic_debug("icr_high 0x%x, icr_low 0x%x, " 829 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 830 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 831 icr_high, icr_low, irq.shorthand, irq.dest_id, 832 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 833 irq.vector); 834 835 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 836 } 837 838 static u32 apic_get_tmcct(struct kvm_lapic *apic) 839 { 840 ktime_t remaining; 841 s64 ns; 842 u32 tmcct; 843 844 ASSERT(apic != NULL); 845 846 /* if initial count is 0, current count should also be 0 */ 847 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 || 848 apic->lapic_timer.period == 0) 849 return 0; 850 851 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 852 if (ktime_to_ns(remaining) < 0) 853 remaining = ktime_set(0, 0); 854 855 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 856 tmcct = div64_u64(ns, 857 (APIC_BUS_CYCLE_NS * apic->divide_count)); 858 859 return tmcct; 860 } 861 862 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 863 { 864 struct kvm_vcpu *vcpu = apic->vcpu; 865 struct kvm_run *run = vcpu->run; 866 867 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 868 run->tpr_access.rip = kvm_rip_read(vcpu); 869 run->tpr_access.is_write = write; 870 } 871 872 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 873 { 874 if (apic->vcpu->arch.tpr_access_reporting) 875 __report_tpr_access(apic, write); 876 } 877 878 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 879 { 880 u32 val = 0; 881 882 if (offset >= LAPIC_MMIO_LENGTH) 883 return 0; 884 885 switch (offset) { 886 case APIC_ID: 887 if (apic_x2apic_mode(apic)) 888 val = kvm_apic_id(apic); 889 else 890 val = kvm_apic_id(apic) << 24; 891 break; 892 case APIC_ARBPRI: 893 apic_debug("Access APIC ARBPRI register which is for P6\n"); 894 break; 895 896 case APIC_TMCCT: /* Timer CCR */ 897 if (apic_lvtt_tscdeadline(apic)) 898 return 0; 899 900 val = apic_get_tmcct(apic); 901 break; 902 case APIC_PROCPRI: 903 apic_update_ppr(apic); 904 val = kvm_apic_get_reg(apic, offset); 905 break; 906 case APIC_TASKPRI: 907 report_tpr_access(apic, false); 908 /* fall thru */ 909 default: 910 val = kvm_apic_get_reg(apic, offset); 911 break; 912 } 913 914 return val; 915 } 916 917 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 918 { 919 return container_of(dev, struct kvm_lapic, dev); 920 } 921 922 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 923 void *data) 924 { 925 unsigned char alignment = offset & 0xf; 926 u32 result; 927 /* this bitmask has a bit cleared for each reserved register */ 928 static const u64 rmask = 0x43ff01ffffffe70cULL; 929 930 if ((alignment + len) > 4) { 931 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 932 offset, len); 933 return 1; 934 } 935 936 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 937 apic_debug("KVM_APIC_READ: read reserved register %x\n", 938 offset); 939 return 1; 940 } 941 942 result = __apic_read(apic, offset & ~0xf); 943 944 trace_kvm_apic_read(offset, result); 945 946 switch (len) { 947 case 1: 948 case 2: 949 case 4: 950 memcpy(data, (char *)&result + alignment, len); 951 break; 952 default: 953 printk(KERN_ERR "Local APIC read with len = %x, " 954 "should be 1,2, or 4 instead\n", len); 955 break; 956 } 957 return 0; 958 } 959 960 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 961 { 962 return kvm_apic_hw_enabled(apic) && 963 addr >= apic->base_address && 964 addr < apic->base_address + LAPIC_MMIO_LENGTH; 965 } 966 967 static int apic_mmio_read(struct kvm_io_device *this, 968 gpa_t address, int len, void *data) 969 { 970 struct kvm_lapic *apic = to_lapic(this); 971 u32 offset = address - apic->base_address; 972 973 if (!apic_mmio_in_range(apic, address)) 974 return -EOPNOTSUPP; 975 976 apic_reg_read(apic, offset, len, data); 977 978 return 0; 979 } 980 981 static void update_divide_count(struct kvm_lapic *apic) 982 { 983 u32 tmp1, tmp2, tdcr; 984 985 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 986 tmp1 = tdcr & 0xf; 987 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 988 apic->divide_count = 0x1 << (tmp2 & 0x7); 989 990 apic_debug("timer divide count is 0x%x\n", 991 apic->divide_count); 992 } 993 994 static void start_apic_timer(struct kvm_lapic *apic) 995 { 996 ktime_t now; 997 atomic_set(&apic->lapic_timer.pending, 0); 998 999 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 1000 /* lapic timer in oneshot or periodic mode */ 1001 now = apic->lapic_timer.timer.base->get_time(); 1002 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 1003 * APIC_BUS_CYCLE_NS * apic->divide_count; 1004 1005 if (!apic->lapic_timer.period) 1006 return; 1007 /* 1008 * Do not allow the guest to program periodic timers with small 1009 * interval, since the hrtimers are not throttled by the host 1010 * scheduler. 1011 */ 1012 if (apic_lvtt_period(apic)) { 1013 s64 min_period = min_timer_period_us * 1000LL; 1014 1015 if (apic->lapic_timer.period < min_period) { 1016 pr_info_ratelimited( 1017 "kvm: vcpu %i: requested %lld ns " 1018 "lapic timer period limited to %lld ns\n", 1019 apic->vcpu->vcpu_id, 1020 apic->lapic_timer.period, min_period); 1021 apic->lapic_timer.period = min_period; 1022 } 1023 } 1024 1025 hrtimer_start(&apic->lapic_timer.timer, 1026 ktime_add_ns(now, apic->lapic_timer.period), 1027 HRTIMER_MODE_ABS); 1028 1029 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1030 PRIx64 ", " 1031 "timer initial count 0x%x, period %lldns, " 1032 "expire @ 0x%016" PRIx64 ".\n", __func__, 1033 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1034 kvm_apic_get_reg(apic, APIC_TMICT), 1035 apic->lapic_timer.period, 1036 ktime_to_ns(ktime_add_ns(now, 1037 apic->lapic_timer.period))); 1038 } else if (apic_lvtt_tscdeadline(apic)) { 1039 /* lapic timer in tsc deadline mode */ 1040 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1041 u64 ns = 0; 1042 struct kvm_vcpu *vcpu = apic->vcpu; 1043 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1044 unsigned long flags; 1045 1046 if (unlikely(!tscdeadline || !this_tsc_khz)) 1047 return; 1048 1049 local_irq_save(flags); 1050 1051 now = apic->lapic_timer.timer.base->get_time(); 1052 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); 1053 if (likely(tscdeadline > guest_tsc)) { 1054 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1055 do_div(ns, this_tsc_khz); 1056 } 1057 hrtimer_start(&apic->lapic_timer.timer, 1058 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1059 1060 local_irq_restore(flags); 1061 } 1062 } 1063 1064 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1065 { 1066 int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); 1067 1068 if (apic_lvt_nmi_mode(lvt0_val)) { 1069 if (!nmi_wd_enabled) { 1070 apic_debug("Receive NMI setting on APIC_LVT0 " 1071 "for cpu %d\n", apic->vcpu->vcpu_id); 1072 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 1073 } 1074 } else if (nmi_wd_enabled) 1075 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 1076 } 1077 1078 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1079 { 1080 int ret = 0; 1081 1082 trace_kvm_apic_write(reg, val); 1083 1084 switch (reg) { 1085 case APIC_ID: /* Local APIC ID */ 1086 if (!apic_x2apic_mode(apic)) 1087 kvm_apic_set_id(apic, val >> 24); 1088 else 1089 ret = 1; 1090 break; 1091 1092 case APIC_TASKPRI: 1093 report_tpr_access(apic, true); 1094 apic_set_tpr(apic, val & 0xff); 1095 break; 1096 1097 case APIC_EOI: 1098 apic_set_eoi(apic); 1099 break; 1100 1101 case APIC_LDR: 1102 if (!apic_x2apic_mode(apic)) 1103 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1104 else 1105 ret = 1; 1106 break; 1107 1108 case APIC_DFR: 1109 if (!apic_x2apic_mode(apic)) { 1110 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1111 recalculate_apic_map(apic->vcpu->kvm); 1112 } else 1113 ret = 1; 1114 break; 1115 1116 case APIC_SPIV: { 1117 u32 mask = 0x3ff; 1118 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1119 mask |= APIC_SPIV_DIRECTED_EOI; 1120 apic_set_spiv(apic, val & mask); 1121 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1122 int i; 1123 u32 lvt_val; 1124 1125 for (i = 0; i < APIC_LVT_NUM; i++) { 1126 lvt_val = kvm_apic_get_reg(apic, 1127 APIC_LVTT + 0x10 * i); 1128 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1129 lvt_val | APIC_LVT_MASKED); 1130 } 1131 atomic_set(&apic->lapic_timer.pending, 0); 1132 1133 } 1134 break; 1135 } 1136 case APIC_ICR: 1137 /* No delay here, so we always clear the pending bit */ 1138 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1139 apic_send_ipi(apic); 1140 break; 1141 1142 case APIC_ICR2: 1143 if (!apic_x2apic_mode(apic)) 1144 val &= 0xff000000; 1145 apic_set_reg(apic, APIC_ICR2, val); 1146 break; 1147 1148 case APIC_LVT0: 1149 apic_manage_nmi_watchdog(apic, val); 1150 case APIC_LVTTHMR: 1151 case APIC_LVTPC: 1152 case APIC_LVT1: 1153 case APIC_LVTERR: 1154 /* TODO: Check vector */ 1155 if (!kvm_apic_sw_enabled(apic)) 1156 val |= APIC_LVT_MASKED; 1157 1158 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1159 apic_set_reg(apic, reg, val); 1160 1161 break; 1162 1163 case APIC_LVTT: 1164 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1165 apic->lapic_timer.timer_mode_mask) != 1166 (val & apic->lapic_timer.timer_mode_mask)) 1167 hrtimer_cancel(&apic->lapic_timer.timer); 1168 1169 if (!kvm_apic_sw_enabled(apic)) 1170 val |= APIC_LVT_MASKED; 1171 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1172 apic_set_reg(apic, APIC_LVTT, val); 1173 break; 1174 1175 case APIC_TMICT: 1176 if (apic_lvtt_tscdeadline(apic)) 1177 break; 1178 1179 hrtimer_cancel(&apic->lapic_timer.timer); 1180 apic_set_reg(apic, APIC_TMICT, val); 1181 start_apic_timer(apic); 1182 break; 1183 1184 case APIC_TDCR: 1185 if (val & 4) 1186 apic_debug("KVM_WRITE:TDCR %x\n", val); 1187 apic_set_reg(apic, APIC_TDCR, val); 1188 update_divide_count(apic); 1189 break; 1190 1191 case APIC_ESR: 1192 if (apic_x2apic_mode(apic) && val != 0) { 1193 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1194 ret = 1; 1195 } 1196 break; 1197 1198 case APIC_SELF_IPI: 1199 if (apic_x2apic_mode(apic)) { 1200 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1201 } else 1202 ret = 1; 1203 break; 1204 default: 1205 ret = 1; 1206 break; 1207 } 1208 if (ret) 1209 apic_debug("Local APIC Write to read-only register %x\n", reg); 1210 return ret; 1211 } 1212 1213 static int apic_mmio_write(struct kvm_io_device *this, 1214 gpa_t address, int len, const void *data) 1215 { 1216 struct kvm_lapic *apic = to_lapic(this); 1217 unsigned int offset = address - apic->base_address; 1218 u32 val; 1219 1220 if (!apic_mmio_in_range(apic, address)) 1221 return -EOPNOTSUPP; 1222 1223 /* 1224 * APIC register must be aligned on 128-bits boundary. 1225 * 32/64/128 bits registers must be accessed thru 32 bits. 1226 * Refer SDM 8.4.1 1227 */ 1228 if (len != 4 || (offset & 0xf)) { 1229 /* Don't shout loud, $infamous_os would cause only noise. */ 1230 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1231 return 0; 1232 } 1233 1234 val = *(u32*)data; 1235 1236 /* too common printing */ 1237 if (offset != APIC_EOI) 1238 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1239 "0x%x\n", __func__, offset, len, val); 1240 1241 apic_reg_write(apic, offset & 0xff0, val); 1242 1243 return 0; 1244 } 1245 1246 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1247 { 1248 if (kvm_vcpu_has_lapic(vcpu)) 1249 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1250 } 1251 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1252 1253 /* emulate APIC access in a trap manner */ 1254 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 1255 { 1256 u32 val = 0; 1257 1258 /* hw has done the conditional check and inst decode */ 1259 offset &= 0xff0; 1260 1261 apic_reg_read(vcpu->arch.apic, offset, 4, &val); 1262 1263 /* TODO: optimize to just emulate side effect w/o one more write */ 1264 apic_reg_write(vcpu->arch.apic, offset, val); 1265 } 1266 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 1267 1268 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1269 { 1270 struct kvm_lapic *apic = vcpu->arch.apic; 1271 1272 if (!vcpu->arch.apic) 1273 return; 1274 1275 hrtimer_cancel(&apic->lapic_timer.timer); 1276 1277 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1278 static_key_slow_dec_deferred(&apic_hw_disabled); 1279 1280 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1281 static_key_slow_dec_deferred(&apic_sw_disabled); 1282 1283 if (apic->regs) 1284 free_page((unsigned long)apic->regs); 1285 1286 kfree(apic); 1287 } 1288 1289 /* 1290 *---------------------------------------------------------------------- 1291 * LAPIC interface 1292 *---------------------------------------------------------------------- 1293 */ 1294 1295 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1296 { 1297 struct kvm_lapic *apic = vcpu->arch.apic; 1298 1299 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1300 apic_lvtt_period(apic)) 1301 return 0; 1302 1303 return apic->lapic_timer.tscdeadline; 1304 } 1305 1306 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1307 { 1308 struct kvm_lapic *apic = vcpu->arch.apic; 1309 1310 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1311 apic_lvtt_period(apic)) 1312 return; 1313 1314 hrtimer_cancel(&apic->lapic_timer.timer); 1315 apic->lapic_timer.tscdeadline = data; 1316 start_apic_timer(apic); 1317 } 1318 1319 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1320 { 1321 struct kvm_lapic *apic = vcpu->arch.apic; 1322 1323 if (!kvm_vcpu_has_lapic(vcpu)) 1324 return; 1325 1326 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1327 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1328 } 1329 1330 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1331 { 1332 u64 tpr; 1333 1334 if (!kvm_vcpu_has_lapic(vcpu)) 1335 return 0; 1336 1337 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1338 1339 return (tpr & 0xf0) >> 4; 1340 } 1341 1342 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1343 { 1344 u64 old_value = vcpu->arch.apic_base; 1345 struct kvm_lapic *apic = vcpu->arch.apic; 1346 1347 if (!apic) { 1348 value |= MSR_IA32_APICBASE_BSP; 1349 vcpu->arch.apic_base = value; 1350 return; 1351 } 1352 1353 /* update jump label if enable bit changes */ 1354 if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { 1355 if (value & MSR_IA32_APICBASE_ENABLE) 1356 static_key_slow_dec_deferred(&apic_hw_disabled); 1357 else 1358 static_key_slow_inc(&apic_hw_disabled.key); 1359 recalculate_apic_map(vcpu->kvm); 1360 } 1361 1362 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1363 value &= ~MSR_IA32_APICBASE_BSP; 1364 1365 vcpu->arch.apic_base = value; 1366 if ((old_value ^ value) & X2APIC_ENABLE) { 1367 if (value & X2APIC_ENABLE) { 1368 u32 id = kvm_apic_id(apic); 1369 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1370 kvm_apic_set_ldr(apic, ldr); 1371 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); 1372 } else 1373 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); 1374 } 1375 1376 apic->base_address = apic->vcpu->arch.apic_base & 1377 MSR_IA32_APICBASE_BASE; 1378 1379 /* with FSB delivery interrupt, we can restart APIC functionality */ 1380 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1381 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1382 1383 } 1384 1385 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1386 { 1387 struct kvm_lapic *apic; 1388 int i; 1389 1390 apic_debug("%s\n", __func__); 1391 1392 ASSERT(vcpu); 1393 apic = vcpu->arch.apic; 1394 ASSERT(apic != NULL); 1395 1396 /* Stop the timer in case it's a reset to an active apic */ 1397 hrtimer_cancel(&apic->lapic_timer.timer); 1398 1399 kvm_apic_set_id(apic, vcpu->vcpu_id); 1400 kvm_apic_set_version(apic->vcpu); 1401 1402 for (i = 0; i < APIC_LVT_NUM; i++) 1403 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1404 apic_set_reg(apic, APIC_LVT0, 1405 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1406 1407 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1408 apic_set_spiv(apic, 0xff); 1409 apic_set_reg(apic, APIC_TASKPRI, 0); 1410 kvm_apic_set_ldr(apic, 0); 1411 apic_set_reg(apic, APIC_ESR, 0); 1412 apic_set_reg(apic, APIC_ICR, 0); 1413 apic_set_reg(apic, APIC_ICR2, 0); 1414 apic_set_reg(apic, APIC_TDCR, 0); 1415 apic_set_reg(apic, APIC_TMICT, 0); 1416 for (i = 0; i < 8; i++) { 1417 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1418 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1419 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1420 } 1421 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); 1422 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); 1423 apic->highest_isr_cache = -1; 1424 update_divide_count(apic); 1425 atomic_set(&apic->lapic_timer.pending, 0); 1426 if (kvm_vcpu_is_bsp(vcpu)) 1427 kvm_lapic_set_base(vcpu, 1428 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 1429 vcpu->arch.pv_eoi.msr_val = 0; 1430 apic_update_ppr(apic); 1431 1432 vcpu->arch.apic_arb_prio = 0; 1433 vcpu->arch.apic_attention = 0; 1434 1435 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1436 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1437 vcpu, kvm_apic_id(apic), 1438 vcpu->arch.apic_base, apic->base_address); 1439 } 1440 1441 /* 1442 *---------------------------------------------------------------------- 1443 * timer interface 1444 *---------------------------------------------------------------------- 1445 */ 1446 1447 static bool lapic_is_periodic(struct kvm_lapic *apic) 1448 { 1449 return apic_lvtt_period(apic); 1450 } 1451 1452 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1453 { 1454 struct kvm_lapic *apic = vcpu->arch.apic; 1455 1456 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1457 apic_lvt_enabled(apic, APIC_LVTT)) 1458 return atomic_read(&apic->lapic_timer.pending); 1459 1460 return 0; 1461 } 1462 1463 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1464 { 1465 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1466 int vector, mode, trig_mode; 1467 1468 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1469 vector = reg & APIC_VECTOR_MASK; 1470 mode = reg & APIC_MODE_MASK; 1471 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1472 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 1473 NULL); 1474 } 1475 return 0; 1476 } 1477 1478 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1479 { 1480 struct kvm_lapic *apic = vcpu->arch.apic; 1481 1482 if (apic) 1483 kvm_apic_local_deliver(apic, APIC_LVT0); 1484 } 1485 1486 static const struct kvm_io_device_ops apic_mmio_ops = { 1487 .read = apic_mmio_read, 1488 .write = apic_mmio_write, 1489 }; 1490 1491 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1492 { 1493 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1494 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1495 struct kvm_vcpu *vcpu = apic->vcpu; 1496 wait_queue_head_t *q = &vcpu->wq; 1497 1498 /* 1499 * There is a race window between reading and incrementing, but we do 1500 * not care about potentially losing timer events in the !reinject 1501 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1502 * in vcpu_enter_guest. 1503 */ 1504 if (!atomic_read(&ktimer->pending)) { 1505 atomic_inc(&ktimer->pending); 1506 /* FIXME: this code should not know anything about vcpus */ 1507 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1508 } 1509 1510 if (waitqueue_active(q)) 1511 wake_up_interruptible(q); 1512 1513 if (lapic_is_periodic(apic)) { 1514 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1515 return HRTIMER_RESTART; 1516 } else 1517 return HRTIMER_NORESTART; 1518 } 1519 1520 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1521 { 1522 struct kvm_lapic *apic; 1523 1524 ASSERT(vcpu != NULL); 1525 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1526 1527 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1528 if (!apic) 1529 goto nomem; 1530 1531 vcpu->arch.apic = apic; 1532 1533 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1534 if (!apic->regs) { 1535 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1536 vcpu->vcpu_id); 1537 goto nomem_free_apic; 1538 } 1539 apic->vcpu = vcpu; 1540 1541 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1542 HRTIMER_MODE_ABS); 1543 apic->lapic_timer.timer.function = apic_timer_fn; 1544 1545 /* 1546 * APIC is created enabled. This will prevent kvm_lapic_set_base from 1547 * thinking that APIC satet has changed. 1548 */ 1549 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 1550 kvm_lapic_set_base(vcpu, 1551 APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); 1552 1553 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 1554 kvm_lapic_reset(vcpu); 1555 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1556 1557 return 0; 1558 nomem_free_apic: 1559 kfree(apic); 1560 nomem: 1561 return -ENOMEM; 1562 } 1563 1564 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1565 { 1566 struct kvm_lapic *apic = vcpu->arch.apic; 1567 int highest_irr; 1568 1569 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1570 return -1; 1571 1572 apic_update_ppr(apic); 1573 highest_irr = apic_find_highest_irr(apic); 1574 if ((highest_irr == -1) || 1575 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1576 return -1; 1577 return highest_irr; 1578 } 1579 1580 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1581 { 1582 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1583 int r = 0; 1584 1585 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1586 r = 1; 1587 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1588 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1589 r = 1; 1590 return r; 1591 } 1592 1593 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1594 { 1595 struct kvm_lapic *apic = vcpu->arch.apic; 1596 1597 if (!kvm_vcpu_has_lapic(vcpu)) 1598 return; 1599 1600 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1601 kvm_apic_local_deliver(apic, APIC_LVTT); 1602 atomic_set(&apic->lapic_timer.pending, 0); 1603 } 1604 } 1605 1606 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1607 { 1608 int vector = kvm_apic_has_interrupt(vcpu); 1609 struct kvm_lapic *apic = vcpu->arch.apic; 1610 1611 if (vector == -1) 1612 return -1; 1613 1614 apic_set_isr(vector, apic); 1615 apic_update_ppr(apic); 1616 apic_clear_irr(vector, apic); 1617 return vector; 1618 } 1619 1620 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 1621 struct kvm_lapic_state *s) 1622 { 1623 struct kvm_lapic *apic = vcpu->arch.apic; 1624 1625 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 1626 /* set SPIV separately to get count of SW disabled APICs right */ 1627 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 1628 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1629 /* call kvm_apic_set_id() to put apic into apic_map */ 1630 kvm_apic_set_id(apic, kvm_apic_id(apic)); 1631 kvm_apic_set_version(vcpu); 1632 1633 apic_update_ppr(apic); 1634 hrtimer_cancel(&apic->lapic_timer.timer); 1635 update_divide_count(apic); 1636 start_apic_timer(apic); 1637 apic->irr_pending = true; 1638 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1639 1 : count_vectors(apic->regs + APIC_ISR); 1640 apic->highest_isr_cache = -1; 1641 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); 1642 kvm_make_request(KVM_REQ_EVENT, vcpu); 1643 kvm_rtc_eoi_tracking_restore_one(vcpu); 1644 } 1645 1646 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1647 { 1648 struct hrtimer *timer; 1649 1650 if (!kvm_vcpu_has_lapic(vcpu)) 1651 return; 1652 1653 timer = &vcpu->arch.apic->lapic_timer.timer; 1654 if (hrtimer_cancel(timer)) 1655 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1656 } 1657 1658 /* 1659 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1660 * 1661 * Detect whether guest triggered PV EOI since the 1662 * last entry. If yes, set EOI on guests's behalf. 1663 * Clear PV EOI in guest memory in any case. 1664 */ 1665 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1666 struct kvm_lapic *apic) 1667 { 1668 bool pending; 1669 int vector; 1670 /* 1671 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1672 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1673 * 1674 * KVM_APIC_PV_EOI_PENDING is unset: 1675 * -> host disabled PV EOI. 1676 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1677 * -> host enabled PV EOI, guest did not execute EOI yet. 1678 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1679 * -> host enabled PV EOI, guest executed EOI. 1680 */ 1681 BUG_ON(!pv_eoi_enabled(vcpu)); 1682 pending = pv_eoi_get_pending(vcpu); 1683 /* 1684 * Clear pending bit in any case: it will be set again on vmentry. 1685 * While this might not be ideal from performance point of view, 1686 * this makes sure pv eoi is only enabled when we know it's safe. 1687 */ 1688 pv_eoi_clr_pending(vcpu); 1689 if (pending) 1690 return; 1691 vector = apic_set_eoi(apic); 1692 trace_kvm_pv_eoi(apic, vector); 1693 } 1694 1695 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1696 { 1697 u32 data; 1698 1699 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1700 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1701 1702 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1703 return; 1704 1705 kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 1706 sizeof(u32)); 1707 1708 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1709 } 1710 1711 /* 1712 * apic_sync_pv_eoi_to_guest - called before vmentry 1713 * 1714 * Detect whether it's safe to enable PV EOI and 1715 * if yes do so. 1716 */ 1717 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1718 struct kvm_lapic *apic) 1719 { 1720 if (!pv_eoi_enabled(vcpu) || 1721 /* IRR set or many bits in ISR: could be nested. */ 1722 apic->irr_pending || 1723 /* Cache not set: could be safe but we don't bother. */ 1724 apic->highest_isr_cache == -1 || 1725 /* Need EOI to update ioapic. */ 1726 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1727 /* 1728 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1729 * so we need not do anything here. 1730 */ 1731 return; 1732 } 1733 1734 pv_eoi_set_pending(apic->vcpu); 1735 } 1736 1737 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1738 { 1739 u32 data, tpr; 1740 int max_irr, max_isr; 1741 struct kvm_lapic *apic = vcpu->arch.apic; 1742 1743 apic_sync_pv_eoi_to_guest(vcpu, apic); 1744 1745 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1746 return; 1747 1748 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1749 max_irr = apic_find_highest_irr(apic); 1750 if (max_irr < 0) 1751 max_irr = 0; 1752 max_isr = apic_find_highest_isr(apic); 1753 if (max_isr < 0) 1754 max_isr = 0; 1755 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1756 1757 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 1758 sizeof(u32)); 1759 } 1760 1761 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1762 { 1763 if (vapic_addr) { 1764 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 1765 &vcpu->arch.apic->vapic_cache, 1766 vapic_addr, sizeof(u32))) 1767 return -EINVAL; 1768 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1769 } else { 1770 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1771 } 1772 1773 vcpu->arch.apic->vapic_addr = vapic_addr; 1774 return 0; 1775 } 1776 1777 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1778 { 1779 struct kvm_lapic *apic = vcpu->arch.apic; 1780 u32 reg = (msr - APIC_BASE_MSR) << 4; 1781 1782 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1783 return 1; 1784 1785 /* if this is ICR write vector before command */ 1786 if (msr == 0x830) 1787 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1788 return apic_reg_write(apic, reg, (u32)data); 1789 } 1790 1791 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1792 { 1793 struct kvm_lapic *apic = vcpu->arch.apic; 1794 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1795 1796 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1797 return 1; 1798 1799 if (apic_reg_read(apic, reg, 4, &low)) 1800 return 1; 1801 if (msr == 0x830) 1802 apic_reg_read(apic, APIC_ICR2, 4, &high); 1803 1804 *data = (((u64)high) << 32) | low; 1805 1806 return 0; 1807 } 1808 1809 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1810 { 1811 struct kvm_lapic *apic = vcpu->arch.apic; 1812 1813 if (!kvm_vcpu_has_lapic(vcpu)) 1814 return 1; 1815 1816 /* if this is ICR write vector before command */ 1817 if (reg == APIC_ICR) 1818 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1819 return apic_reg_write(apic, reg, (u32)data); 1820 } 1821 1822 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1823 { 1824 struct kvm_lapic *apic = vcpu->arch.apic; 1825 u32 low, high = 0; 1826 1827 if (!kvm_vcpu_has_lapic(vcpu)) 1828 return 1; 1829 1830 if (apic_reg_read(apic, reg, 4, &low)) 1831 return 1; 1832 if (reg == APIC_ICR) 1833 apic_reg_read(apic, APIC_ICR2, 4, &high); 1834 1835 *data = (((u64)high) << 32) | low; 1836 1837 return 0; 1838 } 1839 1840 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1841 { 1842 u64 addr = data & ~KVM_MSR_ENABLED; 1843 if (!IS_ALIGNED(addr, 4)) 1844 return 1; 1845 1846 vcpu->arch.pv_eoi.msr_val = data; 1847 if (!pv_eoi_enabled(vcpu)) 1848 return 0; 1849 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1850 addr, sizeof(u8)); 1851 } 1852 1853 void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 1854 { 1855 struct kvm_lapic *apic = vcpu->arch.apic; 1856 unsigned int sipi_vector; 1857 unsigned long pe; 1858 1859 if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) 1860 return; 1861 1862 pe = xchg(&apic->pending_events, 0); 1863 1864 if (test_bit(KVM_APIC_INIT, &pe)) { 1865 kvm_lapic_reset(vcpu); 1866 kvm_vcpu_reset(vcpu); 1867 if (kvm_vcpu_is_bsp(apic->vcpu)) 1868 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1869 else 1870 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 1871 } 1872 if (test_bit(KVM_APIC_SIPI, &pe) && 1873 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 1874 /* evaluate pending_events before reading the vector */ 1875 smp_rmb(); 1876 sipi_vector = apic->sipi_vector; 1877 pr_debug("vcpu %d received sipi with vector # %x\n", 1878 vcpu->vcpu_id, sipi_vector); 1879 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 1880 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1881 } 1882 } 1883 1884 void kvm_lapic_init(void) 1885 { 1886 /* do not patch jump label more than once per second */ 1887 jump_label_rate_limit(&apic_hw_disabled, HZ); 1888 jump_label_rate_limit(&apic_sw_disabled, HZ); 1889 } 1890