1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include <linux/jump_label.h> 38 #include "kvm_cache_regs.h" 39 #include "irq.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 #define APIC_BUS_CYCLE_NS 1 56 57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 58 #define apic_debug(fmt, arg...) 59 60 #define APIC_LVT_NUM 6 61 /* 14 is the version for Xeon and Pentium 8.4.8*/ 62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63 #define LAPIC_MMIO_LENGTH (1 << 12) 64 /* followed define is not in apicdef.h */ 65 #define APIC_SHORT_MASK 0xc0000 66 #define APIC_DEST_NOSHORT 0x0 67 #define APIC_DEST_MASK 0x800 68 #define MAX_APIC_VECTOR 256 69 #define APIC_VECTORS_PER_REG 32 70 71 #define VEC_POS(v) ((v) & (32 - 1)) 72 #define REG_POS(v) (((v) >> 5) << 4) 73 74 static unsigned int min_timer_period_us = 500; 75 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 76 77 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 *((u32 *) (apic->regs + reg_off)) = val; 80 } 81 82 static inline int apic_test_and_set_vector(int vec, void *bitmap) 83 { 84 return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 85 } 86 87 static inline int apic_test_and_clear_vector(int vec, void *bitmap) 88 { 89 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 90 } 91 92 static inline int apic_test_vector(int vec, void *bitmap) 93 { 94 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 95 } 96 97 static inline void apic_set_vector(int vec, void *bitmap) 98 { 99 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 100 } 101 102 static inline void apic_clear_vector(int vec, void *bitmap) 103 { 104 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 105 } 106 107 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 108 { 109 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 110 } 111 112 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 113 { 114 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 115 } 116 117 struct static_key_deferred apic_hw_disabled __read_mostly; 118 struct static_key_deferred apic_sw_disabled __read_mostly; 119 120 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 121 { 122 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { 123 if (val & APIC_SPIV_APIC_ENABLED) 124 static_key_slow_dec_deferred(&apic_sw_disabled); 125 else 126 static_key_slow_inc(&apic_sw_disabled.key); 127 } 128 apic_set_reg(apic, APIC_SPIV, val); 129 } 130 131 static inline int apic_enabled(struct kvm_lapic *apic) 132 { 133 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 134 } 135 136 #define LVT_MASK \ 137 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 138 139 #define LINT_MASK \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 142 143 static inline int apic_x2apic_mode(struct kvm_lapic *apic) 144 { 145 return apic->vcpu->arch.apic_base & X2APIC_ENABLE; 146 } 147 148 static inline int kvm_apic_id(struct kvm_lapic *apic) 149 { 150 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 151 } 152 153 static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) 154 { 155 u16 cid; 156 ldr >>= 32 - map->ldr_bits; 157 cid = (ldr >> map->cid_shift) & map->cid_mask; 158 159 BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); 160 161 return cid; 162 } 163 164 static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) 165 { 166 ldr >>= (32 - map->ldr_bits); 167 return ldr & map->lid_mask; 168 } 169 170 static void recalculate_apic_map(struct kvm *kvm) 171 { 172 struct kvm_apic_map *new, *old = NULL; 173 struct kvm_vcpu *vcpu; 174 int i; 175 176 new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); 177 178 mutex_lock(&kvm->arch.apic_map_lock); 179 180 if (!new) 181 goto out; 182 183 new->ldr_bits = 8; 184 /* flat mode is default */ 185 new->cid_shift = 8; 186 new->cid_mask = 0; 187 new->lid_mask = 0xff; 188 189 kvm_for_each_vcpu(i, vcpu, kvm) { 190 struct kvm_lapic *apic = vcpu->arch.apic; 191 u16 cid, lid; 192 u32 ldr; 193 194 if (!kvm_apic_present(vcpu)) 195 continue; 196 197 /* 198 * All APICs have to be configured in the same mode by an OS. 199 * We take advatage of this while building logical id loockup 200 * table. After reset APICs are in xapic/flat mode, so if we 201 * find apic with different setting we assume this is the mode 202 * OS wants all apics to be in; build lookup table accordingly. 203 */ 204 if (apic_x2apic_mode(apic)) { 205 new->ldr_bits = 32; 206 new->cid_shift = 16; 207 new->cid_mask = new->lid_mask = 0xffff; 208 } else if (kvm_apic_sw_enabled(apic) && 209 !new->cid_mask /* flat mode */ && 210 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { 211 new->cid_shift = 4; 212 new->cid_mask = 0xf; 213 new->lid_mask = 0xf; 214 } 215 216 new->phys_map[kvm_apic_id(apic)] = apic; 217 218 ldr = kvm_apic_get_reg(apic, APIC_LDR); 219 cid = apic_cluster_id(new, ldr); 220 lid = apic_logical_id(new, ldr); 221 222 if (lid) 223 new->logical_map[cid][ffs(lid) - 1] = apic; 224 } 225 out: 226 old = rcu_dereference_protected(kvm->arch.apic_map, 227 lockdep_is_held(&kvm->arch.apic_map_lock)); 228 rcu_assign_pointer(kvm->arch.apic_map, new); 229 mutex_unlock(&kvm->arch.apic_map_lock); 230 231 if (old) 232 kfree_rcu(old, rcu); 233 } 234 235 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 236 { 237 apic_set_reg(apic, APIC_ID, id << 24); 238 recalculate_apic_map(apic->vcpu->kvm); 239 } 240 241 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 242 { 243 apic_set_reg(apic, APIC_LDR, id); 244 recalculate_apic_map(apic->vcpu->kvm); 245 } 246 247 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 248 { 249 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 250 } 251 252 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 253 { 254 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 255 } 256 257 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 258 { 259 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 260 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 261 } 262 263 static inline int apic_lvtt_period(struct kvm_lapic *apic) 264 { 265 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 266 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 267 } 268 269 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 270 { 271 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 272 apic->lapic_timer.timer_mode_mask) == 273 APIC_LVT_TIMER_TSCDEADLINE); 274 } 275 276 static inline int apic_lvt_nmi_mode(u32 lvt_val) 277 { 278 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 279 } 280 281 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 282 { 283 struct kvm_lapic *apic = vcpu->arch.apic; 284 struct kvm_cpuid_entry2 *feat; 285 u32 v = APIC_VERSION; 286 287 if (!kvm_vcpu_has_lapic(vcpu)) 288 return; 289 290 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 291 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 292 v |= APIC_LVR_DIRECTED_EOI; 293 apic_set_reg(apic, APIC_LVR, v); 294 } 295 296 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 297 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 298 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 299 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 300 LINT_MASK, LINT_MASK, /* LVT0-1 */ 301 LVT_MASK /* LVTERR */ 302 }; 303 304 static int find_highest_vector(void *bitmap) 305 { 306 int vec; 307 u32 *reg; 308 309 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 310 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 311 reg = bitmap + REG_POS(vec); 312 if (*reg) 313 return fls(*reg) - 1 + vec; 314 } 315 316 return -1; 317 } 318 319 static u8 count_vectors(void *bitmap) 320 { 321 int vec; 322 u32 *reg; 323 u8 count = 0; 324 325 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 326 reg = bitmap + REG_POS(vec); 327 count += hweight32(*reg); 328 } 329 330 return count; 331 } 332 333 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) 334 { 335 apic->irr_pending = true; 336 return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); 337 } 338 339 static inline int apic_search_irr(struct kvm_lapic *apic) 340 { 341 return find_highest_vector(apic->regs + APIC_IRR); 342 } 343 344 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 345 { 346 int result; 347 348 if (!apic->irr_pending) 349 return -1; 350 351 result = apic_search_irr(apic); 352 ASSERT(result == -1 || result >= 16); 353 354 return result; 355 } 356 357 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 358 { 359 apic->irr_pending = false; 360 apic_clear_vector(vec, apic->regs + APIC_IRR); 361 if (apic_search_irr(apic) != -1) 362 apic->irr_pending = true; 363 } 364 365 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 366 { 367 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 368 ++apic->isr_count; 369 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 370 /* 371 * ISR (in service register) bit is set when injecting an interrupt. 372 * The highest vector is injected. Thus the latest bit set matches 373 * the highest bit in ISR. 374 */ 375 apic->highest_isr_cache = vec; 376 } 377 378 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 379 { 380 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 381 --apic->isr_count; 382 BUG_ON(apic->isr_count < 0); 383 apic->highest_isr_cache = -1; 384 } 385 386 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 387 { 388 int highest_irr; 389 390 /* This may race with setting of irr in __apic_accept_irq() and 391 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 392 * will cause vmexit immediately and the value will be recalculated 393 * on the next vmentry. 394 */ 395 if (!kvm_vcpu_has_lapic(vcpu)) 396 return 0; 397 highest_irr = apic_find_highest_irr(vcpu->arch.apic); 398 399 return highest_irr; 400 } 401 402 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 403 int vector, int level, int trig_mode); 404 405 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 406 { 407 struct kvm_lapic *apic = vcpu->arch.apic; 408 409 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 410 irq->level, irq->trig_mode); 411 } 412 413 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 414 { 415 416 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 417 sizeof(val)); 418 } 419 420 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 421 { 422 423 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 424 sizeof(*val)); 425 } 426 427 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 428 { 429 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 430 } 431 432 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 433 { 434 u8 val; 435 if (pv_eoi_get_user(vcpu, &val) < 0) 436 apic_debug("Can't read EOI MSR value: 0x%llx\n", 437 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 438 return val & 0x1; 439 } 440 441 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 442 { 443 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 444 apic_debug("Can't set EOI MSR value: 0x%llx\n", 445 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 446 return; 447 } 448 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 449 } 450 451 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 452 { 453 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 454 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 455 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 456 return; 457 } 458 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 459 } 460 461 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 462 { 463 int result; 464 if (!apic->isr_count) 465 return -1; 466 if (likely(apic->highest_isr_cache != -1)) 467 return apic->highest_isr_cache; 468 469 result = find_highest_vector(apic->regs + APIC_ISR); 470 ASSERT(result == -1 || result >= 16); 471 472 return result; 473 } 474 475 static void apic_update_ppr(struct kvm_lapic *apic) 476 { 477 u32 tpr, isrv, ppr, old_ppr; 478 int isr; 479 480 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 481 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 482 isr = apic_find_highest_isr(apic); 483 isrv = (isr != -1) ? isr : 0; 484 485 if ((tpr & 0xf0) >= (isrv & 0xf0)) 486 ppr = tpr & 0xff; 487 else 488 ppr = isrv & 0xf0; 489 490 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 491 apic, ppr, isr, isrv); 492 493 if (old_ppr != ppr) { 494 apic_set_reg(apic, APIC_PROCPRI, ppr); 495 if (ppr < old_ppr) 496 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 497 } 498 } 499 500 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 501 { 502 apic_set_reg(apic, APIC_TASKPRI, tpr); 503 apic_update_ppr(apic); 504 } 505 506 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 507 { 508 return dest == 0xff || kvm_apic_id(apic) == dest; 509 } 510 511 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 512 { 513 int result = 0; 514 u32 logical_id; 515 516 if (apic_x2apic_mode(apic)) { 517 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 518 return logical_id & mda; 519 } 520 521 logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); 522 523 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 524 case APIC_DFR_FLAT: 525 if (logical_id & mda) 526 result = 1; 527 break; 528 case APIC_DFR_CLUSTER: 529 if (((logical_id >> 4) == (mda >> 0x4)) 530 && (logical_id & mda & 0xf)) 531 result = 1; 532 break; 533 default: 534 apic_debug("Bad DFR vcpu %d: %08x\n", 535 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 536 break; 537 } 538 539 return result; 540 } 541 542 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 543 int short_hand, int dest, int dest_mode) 544 { 545 int result = 0; 546 struct kvm_lapic *target = vcpu->arch.apic; 547 548 apic_debug("target %p, source %p, dest 0x%x, " 549 "dest_mode 0x%x, short_hand 0x%x\n", 550 target, source, dest, dest_mode, short_hand); 551 552 ASSERT(target); 553 switch (short_hand) { 554 case APIC_DEST_NOSHORT: 555 if (dest_mode == 0) 556 /* Physical mode. */ 557 result = kvm_apic_match_physical_addr(target, dest); 558 else 559 /* Logical mode. */ 560 result = kvm_apic_match_logical_addr(target, dest); 561 break; 562 case APIC_DEST_SELF: 563 result = (target == source); 564 break; 565 case APIC_DEST_ALLINC: 566 result = 1; 567 break; 568 case APIC_DEST_ALLBUT: 569 result = (target != source); 570 break; 571 default: 572 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 573 short_hand); 574 break; 575 } 576 577 return result; 578 } 579 580 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 581 struct kvm_lapic_irq *irq, int *r) 582 { 583 struct kvm_apic_map *map; 584 unsigned long bitmap = 1; 585 struct kvm_lapic **dst; 586 int i; 587 bool ret = false; 588 589 *r = -1; 590 591 if (irq->shorthand == APIC_DEST_SELF) { 592 *r = kvm_apic_set_irq(src->vcpu, irq); 593 return true; 594 } 595 596 if (irq->shorthand) 597 return false; 598 599 rcu_read_lock(); 600 map = rcu_dereference(kvm->arch.apic_map); 601 602 if (!map) 603 goto out; 604 605 if (irq->dest_mode == 0) { /* physical mode */ 606 if (irq->delivery_mode == APIC_DM_LOWEST || 607 irq->dest_id == 0xff) 608 goto out; 609 dst = &map->phys_map[irq->dest_id & 0xff]; 610 } else { 611 u32 mda = irq->dest_id << (32 - map->ldr_bits); 612 613 dst = map->logical_map[apic_cluster_id(map, mda)]; 614 615 bitmap = apic_logical_id(map, mda); 616 617 if (irq->delivery_mode == APIC_DM_LOWEST) { 618 int l = -1; 619 for_each_set_bit(i, &bitmap, 16) { 620 if (!dst[i]) 621 continue; 622 if (l < 0) 623 l = i; 624 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 625 l = i; 626 } 627 628 bitmap = (l >= 0) ? 1 << l : 0; 629 } 630 } 631 632 for_each_set_bit(i, &bitmap, 16) { 633 if (!dst[i]) 634 continue; 635 if (*r < 0) 636 *r = 0; 637 *r += kvm_apic_set_irq(dst[i]->vcpu, irq); 638 } 639 640 ret = true; 641 out: 642 rcu_read_unlock(); 643 return ret; 644 } 645 646 /* 647 * Add a pending IRQ into lapic. 648 * Return 1 if successfully added and 0 if discarded. 649 */ 650 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 651 int vector, int level, int trig_mode) 652 { 653 int result = 0; 654 struct kvm_vcpu *vcpu = apic->vcpu; 655 656 switch (delivery_mode) { 657 case APIC_DM_LOWEST: 658 vcpu->arch.apic_arb_prio++; 659 case APIC_DM_FIXED: 660 /* FIXME add logic for vcpu on reset */ 661 if (unlikely(!apic_enabled(apic))) 662 break; 663 664 if (trig_mode) { 665 apic_debug("level trig mode for vector %d", vector); 666 apic_set_vector(vector, apic->regs + APIC_TMR); 667 } else 668 apic_clear_vector(vector, apic->regs + APIC_TMR); 669 670 result = !apic_test_and_set_irr(vector, apic); 671 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 672 trig_mode, vector, !result); 673 if (!result) { 674 if (trig_mode) 675 apic_debug("level trig mode repeatedly for " 676 "vector %d", vector); 677 break; 678 } 679 680 kvm_make_request(KVM_REQ_EVENT, vcpu); 681 kvm_vcpu_kick(vcpu); 682 break; 683 684 case APIC_DM_REMRD: 685 apic_debug("Ignoring delivery mode 3\n"); 686 break; 687 688 case APIC_DM_SMI: 689 apic_debug("Ignoring guest SMI\n"); 690 break; 691 692 case APIC_DM_NMI: 693 result = 1; 694 kvm_inject_nmi(vcpu); 695 kvm_vcpu_kick(vcpu); 696 break; 697 698 case APIC_DM_INIT: 699 if (!trig_mode || level) { 700 result = 1; 701 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 702 kvm_make_request(KVM_REQ_EVENT, vcpu); 703 kvm_vcpu_kick(vcpu); 704 } else { 705 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 706 vcpu->vcpu_id); 707 } 708 break; 709 710 case APIC_DM_STARTUP: 711 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 712 vcpu->vcpu_id, vector); 713 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 714 result = 1; 715 vcpu->arch.sipi_vector = vector; 716 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 717 kvm_make_request(KVM_REQ_EVENT, vcpu); 718 kvm_vcpu_kick(vcpu); 719 } 720 break; 721 722 case APIC_DM_EXTINT: 723 /* 724 * Should only be called by kvm_apic_local_deliver() with LVT0, 725 * before NMI watchdog was enabled. Already handled by 726 * kvm_apic_accept_pic_intr(). 727 */ 728 break; 729 730 default: 731 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 732 delivery_mode); 733 break; 734 } 735 return result; 736 } 737 738 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 739 { 740 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 741 } 742 743 static int apic_set_eoi(struct kvm_lapic *apic) 744 { 745 int vector = apic_find_highest_isr(apic); 746 747 trace_kvm_eoi(apic, vector); 748 749 /* 750 * Not every write EOI will has corresponding ISR, 751 * one example is when Kernel check timer on setup_IO_APIC 752 */ 753 if (vector == -1) 754 return vector; 755 756 apic_clear_isr(vector, apic); 757 apic_update_ppr(apic); 758 759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 761 int trigger_mode; 762 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 763 trigger_mode = IOAPIC_LEVEL_TRIG; 764 else 765 trigger_mode = IOAPIC_EDGE_TRIG; 766 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 767 } 768 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 769 return vector; 770 } 771 772 static void apic_send_ipi(struct kvm_lapic *apic) 773 { 774 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 775 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 776 struct kvm_lapic_irq irq; 777 778 irq.vector = icr_low & APIC_VECTOR_MASK; 779 irq.delivery_mode = icr_low & APIC_MODE_MASK; 780 irq.dest_mode = icr_low & APIC_DEST_MASK; 781 irq.level = icr_low & APIC_INT_ASSERT; 782 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 783 irq.shorthand = icr_low & APIC_SHORT_MASK; 784 if (apic_x2apic_mode(apic)) 785 irq.dest_id = icr_high; 786 else 787 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 788 789 trace_kvm_apic_ipi(icr_low, irq.dest_id); 790 791 apic_debug("icr_high 0x%x, icr_low 0x%x, " 792 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 793 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 794 icr_high, icr_low, irq.shorthand, irq.dest_id, 795 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 796 irq.vector); 797 798 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 799 } 800 801 static u32 apic_get_tmcct(struct kvm_lapic *apic) 802 { 803 ktime_t remaining; 804 s64 ns; 805 u32 tmcct; 806 807 ASSERT(apic != NULL); 808 809 /* if initial count is 0, current count should also be 0 */ 810 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) 811 return 0; 812 813 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 814 if (ktime_to_ns(remaining) < 0) 815 remaining = ktime_set(0, 0); 816 817 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 818 tmcct = div64_u64(ns, 819 (APIC_BUS_CYCLE_NS * apic->divide_count)); 820 821 return tmcct; 822 } 823 824 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 825 { 826 struct kvm_vcpu *vcpu = apic->vcpu; 827 struct kvm_run *run = vcpu->run; 828 829 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 830 run->tpr_access.rip = kvm_rip_read(vcpu); 831 run->tpr_access.is_write = write; 832 } 833 834 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 835 { 836 if (apic->vcpu->arch.tpr_access_reporting) 837 __report_tpr_access(apic, write); 838 } 839 840 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 841 { 842 u32 val = 0; 843 844 if (offset >= LAPIC_MMIO_LENGTH) 845 return 0; 846 847 switch (offset) { 848 case APIC_ID: 849 if (apic_x2apic_mode(apic)) 850 val = kvm_apic_id(apic); 851 else 852 val = kvm_apic_id(apic) << 24; 853 break; 854 case APIC_ARBPRI: 855 apic_debug("Access APIC ARBPRI register which is for P6\n"); 856 break; 857 858 case APIC_TMCCT: /* Timer CCR */ 859 if (apic_lvtt_tscdeadline(apic)) 860 return 0; 861 862 val = apic_get_tmcct(apic); 863 break; 864 case APIC_PROCPRI: 865 apic_update_ppr(apic); 866 val = kvm_apic_get_reg(apic, offset); 867 break; 868 case APIC_TASKPRI: 869 report_tpr_access(apic, false); 870 /* fall thru */ 871 default: 872 val = kvm_apic_get_reg(apic, offset); 873 break; 874 } 875 876 return val; 877 } 878 879 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 880 { 881 return container_of(dev, struct kvm_lapic, dev); 882 } 883 884 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 885 void *data) 886 { 887 unsigned char alignment = offset & 0xf; 888 u32 result; 889 /* this bitmask has a bit cleared for each reserved register */ 890 static const u64 rmask = 0x43ff01ffffffe70cULL; 891 892 if ((alignment + len) > 4) { 893 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 894 offset, len); 895 return 1; 896 } 897 898 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 899 apic_debug("KVM_APIC_READ: read reserved register %x\n", 900 offset); 901 return 1; 902 } 903 904 result = __apic_read(apic, offset & ~0xf); 905 906 trace_kvm_apic_read(offset, result); 907 908 switch (len) { 909 case 1: 910 case 2: 911 case 4: 912 memcpy(data, (char *)&result + alignment, len); 913 break; 914 default: 915 printk(KERN_ERR "Local APIC read with len = %x, " 916 "should be 1,2, or 4 instead\n", len); 917 break; 918 } 919 return 0; 920 } 921 922 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 923 { 924 return kvm_apic_hw_enabled(apic) && 925 addr >= apic->base_address && 926 addr < apic->base_address + LAPIC_MMIO_LENGTH; 927 } 928 929 static int apic_mmio_read(struct kvm_io_device *this, 930 gpa_t address, int len, void *data) 931 { 932 struct kvm_lapic *apic = to_lapic(this); 933 u32 offset = address - apic->base_address; 934 935 if (!apic_mmio_in_range(apic, address)) 936 return -EOPNOTSUPP; 937 938 apic_reg_read(apic, offset, len, data); 939 940 return 0; 941 } 942 943 static void update_divide_count(struct kvm_lapic *apic) 944 { 945 u32 tmp1, tmp2, tdcr; 946 947 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 948 tmp1 = tdcr & 0xf; 949 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 950 apic->divide_count = 0x1 << (tmp2 & 0x7); 951 952 apic_debug("timer divide count is 0x%x\n", 953 apic->divide_count); 954 } 955 956 static void start_apic_timer(struct kvm_lapic *apic) 957 { 958 ktime_t now; 959 atomic_set(&apic->lapic_timer.pending, 0); 960 961 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 962 /* lapic timer in oneshot or periodic mode */ 963 now = apic->lapic_timer.timer.base->get_time(); 964 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 965 * APIC_BUS_CYCLE_NS * apic->divide_count; 966 967 if (!apic->lapic_timer.period) 968 return; 969 /* 970 * Do not allow the guest to program periodic timers with small 971 * interval, since the hrtimers are not throttled by the host 972 * scheduler. 973 */ 974 if (apic_lvtt_period(apic)) { 975 s64 min_period = min_timer_period_us * 1000LL; 976 977 if (apic->lapic_timer.period < min_period) { 978 pr_info_ratelimited( 979 "kvm: vcpu %i: requested %lld ns " 980 "lapic timer period limited to %lld ns\n", 981 apic->vcpu->vcpu_id, 982 apic->lapic_timer.period, min_period); 983 apic->lapic_timer.period = min_period; 984 } 985 } 986 987 hrtimer_start(&apic->lapic_timer.timer, 988 ktime_add_ns(now, apic->lapic_timer.period), 989 HRTIMER_MODE_ABS); 990 991 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 992 PRIx64 ", " 993 "timer initial count 0x%x, period %lldns, " 994 "expire @ 0x%016" PRIx64 ".\n", __func__, 995 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 996 kvm_apic_get_reg(apic, APIC_TMICT), 997 apic->lapic_timer.period, 998 ktime_to_ns(ktime_add_ns(now, 999 apic->lapic_timer.period))); 1000 } else if (apic_lvtt_tscdeadline(apic)) { 1001 /* lapic timer in tsc deadline mode */ 1002 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1003 u64 ns = 0; 1004 struct kvm_vcpu *vcpu = apic->vcpu; 1005 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1006 unsigned long flags; 1007 1008 if (unlikely(!tscdeadline || !this_tsc_khz)) 1009 return; 1010 1011 local_irq_save(flags); 1012 1013 now = apic->lapic_timer.timer.base->get_time(); 1014 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 1015 if (likely(tscdeadline > guest_tsc)) { 1016 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1017 do_div(ns, this_tsc_khz); 1018 } 1019 hrtimer_start(&apic->lapic_timer.timer, 1020 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1021 1022 local_irq_restore(flags); 1023 } 1024 } 1025 1026 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1027 { 1028 int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); 1029 1030 if (apic_lvt_nmi_mode(lvt0_val)) { 1031 if (!nmi_wd_enabled) { 1032 apic_debug("Receive NMI setting on APIC_LVT0 " 1033 "for cpu %d\n", apic->vcpu->vcpu_id); 1034 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 1035 } 1036 } else if (nmi_wd_enabled) 1037 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 1038 } 1039 1040 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1041 { 1042 int ret = 0; 1043 1044 trace_kvm_apic_write(reg, val); 1045 1046 switch (reg) { 1047 case APIC_ID: /* Local APIC ID */ 1048 if (!apic_x2apic_mode(apic)) 1049 kvm_apic_set_id(apic, val >> 24); 1050 else 1051 ret = 1; 1052 break; 1053 1054 case APIC_TASKPRI: 1055 report_tpr_access(apic, true); 1056 apic_set_tpr(apic, val & 0xff); 1057 break; 1058 1059 case APIC_EOI: 1060 apic_set_eoi(apic); 1061 break; 1062 1063 case APIC_LDR: 1064 if (!apic_x2apic_mode(apic)) 1065 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1066 else 1067 ret = 1; 1068 break; 1069 1070 case APIC_DFR: 1071 if (!apic_x2apic_mode(apic)) { 1072 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1073 recalculate_apic_map(apic->vcpu->kvm); 1074 } else 1075 ret = 1; 1076 break; 1077 1078 case APIC_SPIV: { 1079 u32 mask = 0x3ff; 1080 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1081 mask |= APIC_SPIV_DIRECTED_EOI; 1082 apic_set_spiv(apic, val & mask); 1083 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1084 int i; 1085 u32 lvt_val; 1086 1087 for (i = 0; i < APIC_LVT_NUM; i++) { 1088 lvt_val = kvm_apic_get_reg(apic, 1089 APIC_LVTT + 0x10 * i); 1090 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1091 lvt_val | APIC_LVT_MASKED); 1092 } 1093 atomic_set(&apic->lapic_timer.pending, 0); 1094 1095 } 1096 break; 1097 } 1098 case APIC_ICR: 1099 /* No delay here, so we always clear the pending bit */ 1100 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1101 apic_send_ipi(apic); 1102 break; 1103 1104 case APIC_ICR2: 1105 if (!apic_x2apic_mode(apic)) 1106 val &= 0xff000000; 1107 apic_set_reg(apic, APIC_ICR2, val); 1108 break; 1109 1110 case APIC_LVT0: 1111 apic_manage_nmi_watchdog(apic, val); 1112 case APIC_LVTTHMR: 1113 case APIC_LVTPC: 1114 case APIC_LVT1: 1115 case APIC_LVTERR: 1116 /* TODO: Check vector */ 1117 if (!kvm_apic_sw_enabled(apic)) 1118 val |= APIC_LVT_MASKED; 1119 1120 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1121 apic_set_reg(apic, reg, val); 1122 1123 break; 1124 1125 case APIC_LVTT: 1126 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1127 apic->lapic_timer.timer_mode_mask) != 1128 (val & apic->lapic_timer.timer_mode_mask)) 1129 hrtimer_cancel(&apic->lapic_timer.timer); 1130 1131 if (!kvm_apic_sw_enabled(apic)) 1132 val |= APIC_LVT_MASKED; 1133 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1134 apic_set_reg(apic, APIC_LVTT, val); 1135 break; 1136 1137 case APIC_TMICT: 1138 if (apic_lvtt_tscdeadline(apic)) 1139 break; 1140 1141 hrtimer_cancel(&apic->lapic_timer.timer); 1142 apic_set_reg(apic, APIC_TMICT, val); 1143 start_apic_timer(apic); 1144 break; 1145 1146 case APIC_TDCR: 1147 if (val & 4) 1148 apic_debug("KVM_WRITE:TDCR %x\n", val); 1149 apic_set_reg(apic, APIC_TDCR, val); 1150 update_divide_count(apic); 1151 break; 1152 1153 case APIC_ESR: 1154 if (apic_x2apic_mode(apic) && val != 0) { 1155 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1156 ret = 1; 1157 } 1158 break; 1159 1160 case APIC_SELF_IPI: 1161 if (apic_x2apic_mode(apic)) { 1162 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1163 } else 1164 ret = 1; 1165 break; 1166 default: 1167 ret = 1; 1168 break; 1169 } 1170 if (ret) 1171 apic_debug("Local APIC Write to read-only register %x\n", reg); 1172 return ret; 1173 } 1174 1175 static int apic_mmio_write(struct kvm_io_device *this, 1176 gpa_t address, int len, const void *data) 1177 { 1178 struct kvm_lapic *apic = to_lapic(this); 1179 unsigned int offset = address - apic->base_address; 1180 u32 val; 1181 1182 if (!apic_mmio_in_range(apic, address)) 1183 return -EOPNOTSUPP; 1184 1185 /* 1186 * APIC register must be aligned on 128-bits boundary. 1187 * 32/64/128 bits registers must be accessed thru 32 bits. 1188 * Refer SDM 8.4.1 1189 */ 1190 if (len != 4 || (offset & 0xf)) { 1191 /* Don't shout loud, $infamous_os would cause only noise. */ 1192 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1193 return 0; 1194 } 1195 1196 val = *(u32*)data; 1197 1198 /* too common printing */ 1199 if (offset != APIC_EOI) 1200 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1201 "0x%x\n", __func__, offset, len, val); 1202 1203 apic_reg_write(apic, offset & 0xff0, val); 1204 1205 return 0; 1206 } 1207 1208 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1209 { 1210 if (kvm_vcpu_has_lapic(vcpu)) 1211 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1212 } 1213 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1214 1215 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1216 { 1217 struct kvm_lapic *apic = vcpu->arch.apic; 1218 1219 if (!vcpu->arch.apic) 1220 return; 1221 1222 hrtimer_cancel(&apic->lapic_timer.timer); 1223 1224 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1225 static_key_slow_dec_deferred(&apic_hw_disabled); 1226 1227 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1228 static_key_slow_dec_deferred(&apic_sw_disabled); 1229 1230 if (apic->regs) 1231 free_page((unsigned long)apic->regs); 1232 1233 kfree(apic); 1234 } 1235 1236 /* 1237 *---------------------------------------------------------------------- 1238 * LAPIC interface 1239 *---------------------------------------------------------------------- 1240 */ 1241 1242 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1243 { 1244 struct kvm_lapic *apic = vcpu->arch.apic; 1245 1246 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1247 apic_lvtt_period(apic)) 1248 return 0; 1249 1250 return apic->lapic_timer.tscdeadline; 1251 } 1252 1253 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1254 { 1255 struct kvm_lapic *apic = vcpu->arch.apic; 1256 1257 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1258 apic_lvtt_period(apic)) 1259 return; 1260 1261 hrtimer_cancel(&apic->lapic_timer.timer); 1262 apic->lapic_timer.tscdeadline = data; 1263 start_apic_timer(apic); 1264 } 1265 1266 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1267 { 1268 struct kvm_lapic *apic = vcpu->arch.apic; 1269 1270 if (!kvm_vcpu_has_lapic(vcpu)) 1271 return; 1272 1273 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1274 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1275 } 1276 1277 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1278 { 1279 u64 tpr; 1280 1281 if (!kvm_vcpu_has_lapic(vcpu)) 1282 return 0; 1283 1284 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1285 1286 return (tpr & 0xf0) >> 4; 1287 } 1288 1289 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1290 { 1291 struct kvm_lapic *apic = vcpu->arch.apic; 1292 1293 if (!apic) { 1294 value |= MSR_IA32_APICBASE_BSP; 1295 vcpu->arch.apic_base = value; 1296 return; 1297 } 1298 1299 /* update jump label if enable bit changes */ 1300 if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { 1301 if (value & MSR_IA32_APICBASE_ENABLE) 1302 static_key_slow_dec_deferred(&apic_hw_disabled); 1303 else 1304 static_key_slow_inc(&apic_hw_disabled.key); 1305 recalculate_apic_map(vcpu->kvm); 1306 } 1307 1308 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1309 value &= ~MSR_IA32_APICBASE_BSP; 1310 1311 vcpu->arch.apic_base = value; 1312 if (apic_x2apic_mode(apic)) { 1313 u32 id = kvm_apic_id(apic); 1314 u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); 1315 kvm_apic_set_ldr(apic, ldr); 1316 } 1317 apic->base_address = apic->vcpu->arch.apic_base & 1318 MSR_IA32_APICBASE_BASE; 1319 1320 /* with FSB delivery interrupt, we can restart APIC functionality */ 1321 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1322 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1323 1324 } 1325 1326 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1327 { 1328 struct kvm_lapic *apic; 1329 int i; 1330 1331 apic_debug("%s\n", __func__); 1332 1333 ASSERT(vcpu); 1334 apic = vcpu->arch.apic; 1335 ASSERT(apic != NULL); 1336 1337 /* Stop the timer in case it's a reset to an active apic */ 1338 hrtimer_cancel(&apic->lapic_timer.timer); 1339 1340 kvm_apic_set_id(apic, vcpu->vcpu_id); 1341 kvm_apic_set_version(apic->vcpu); 1342 1343 for (i = 0; i < APIC_LVT_NUM; i++) 1344 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1345 apic_set_reg(apic, APIC_LVT0, 1346 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1347 1348 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1349 apic_set_spiv(apic, 0xff); 1350 apic_set_reg(apic, APIC_TASKPRI, 0); 1351 kvm_apic_set_ldr(apic, 0); 1352 apic_set_reg(apic, APIC_ESR, 0); 1353 apic_set_reg(apic, APIC_ICR, 0); 1354 apic_set_reg(apic, APIC_ICR2, 0); 1355 apic_set_reg(apic, APIC_TDCR, 0); 1356 apic_set_reg(apic, APIC_TMICT, 0); 1357 for (i = 0; i < 8; i++) { 1358 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1359 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1360 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1361 } 1362 apic->irr_pending = false; 1363 apic->isr_count = 0; 1364 apic->highest_isr_cache = -1; 1365 update_divide_count(apic); 1366 atomic_set(&apic->lapic_timer.pending, 0); 1367 if (kvm_vcpu_is_bsp(vcpu)) 1368 kvm_lapic_set_base(vcpu, 1369 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 1370 vcpu->arch.pv_eoi.msr_val = 0; 1371 apic_update_ppr(apic); 1372 1373 vcpu->arch.apic_arb_prio = 0; 1374 vcpu->arch.apic_attention = 0; 1375 1376 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1377 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1378 vcpu, kvm_apic_id(apic), 1379 vcpu->arch.apic_base, apic->base_address); 1380 } 1381 1382 /* 1383 *---------------------------------------------------------------------- 1384 * timer interface 1385 *---------------------------------------------------------------------- 1386 */ 1387 1388 static bool lapic_is_periodic(struct kvm_lapic *apic) 1389 { 1390 return apic_lvtt_period(apic); 1391 } 1392 1393 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1394 { 1395 struct kvm_lapic *apic = vcpu->arch.apic; 1396 1397 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1398 apic_lvt_enabled(apic, APIC_LVTT)) 1399 return atomic_read(&apic->lapic_timer.pending); 1400 1401 return 0; 1402 } 1403 1404 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1405 { 1406 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1407 int vector, mode, trig_mode; 1408 1409 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1410 vector = reg & APIC_VECTOR_MASK; 1411 mode = reg & APIC_MODE_MASK; 1412 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1413 return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 1414 } 1415 return 0; 1416 } 1417 1418 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1419 { 1420 struct kvm_lapic *apic = vcpu->arch.apic; 1421 1422 if (apic) 1423 kvm_apic_local_deliver(apic, APIC_LVT0); 1424 } 1425 1426 static const struct kvm_io_device_ops apic_mmio_ops = { 1427 .read = apic_mmio_read, 1428 .write = apic_mmio_write, 1429 }; 1430 1431 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1432 { 1433 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1434 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1435 struct kvm_vcpu *vcpu = apic->vcpu; 1436 wait_queue_head_t *q = &vcpu->wq; 1437 1438 /* 1439 * There is a race window between reading and incrementing, but we do 1440 * not care about potentially losing timer events in the !reinject 1441 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1442 * in vcpu_enter_guest. 1443 */ 1444 if (!atomic_read(&ktimer->pending)) { 1445 atomic_inc(&ktimer->pending); 1446 /* FIXME: this code should not know anything about vcpus */ 1447 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1448 } 1449 1450 if (waitqueue_active(q)) 1451 wake_up_interruptible(q); 1452 1453 if (lapic_is_periodic(apic)) { 1454 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1455 return HRTIMER_RESTART; 1456 } else 1457 return HRTIMER_NORESTART; 1458 } 1459 1460 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1461 { 1462 struct kvm_lapic *apic; 1463 1464 ASSERT(vcpu != NULL); 1465 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1466 1467 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1468 if (!apic) 1469 goto nomem; 1470 1471 vcpu->arch.apic = apic; 1472 1473 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1474 if (!apic->regs) { 1475 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1476 vcpu->vcpu_id); 1477 goto nomem_free_apic; 1478 } 1479 apic->vcpu = vcpu; 1480 1481 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1482 HRTIMER_MODE_ABS); 1483 apic->lapic_timer.timer.function = apic_timer_fn; 1484 1485 /* 1486 * APIC is created enabled. This will prevent kvm_lapic_set_base from 1487 * thinking that APIC satet has changed. 1488 */ 1489 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 1490 kvm_lapic_set_base(vcpu, 1491 APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); 1492 1493 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 1494 kvm_lapic_reset(vcpu); 1495 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1496 1497 return 0; 1498 nomem_free_apic: 1499 kfree(apic); 1500 nomem: 1501 return -ENOMEM; 1502 } 1503 1504 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1505 { 1506 struct kvm_lapic *apic = vcpu->arch.apic; 1507 int highest_irr; 1508 1509 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1510 return -1; 1511 1512 apic_update_ppr(apic); 1513 highest_irr = apic_find_highest_irr(apic); 1514 if ((highest_irr == -1) || 1515 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1516 return -1; 1517 return highest_irr; 1518 } 1519 1520 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1521 { 1522 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1523 int r = 0; 1524 1525 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1526 r = 1; 1527 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1528 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1529 r = 1; 1530 return r; 1531 } 1532 1533 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1534 { 1535 struct kvm_lapic *apic = vcpu->arch.apic; 1536 1537 if (!kvm_vcpu_has_lapic(vcpu)) 1538 return; 1539 1540 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1541 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1542 atomic_dec(&apic->lapic_timer.pending); 1543 } 1544 } 1545 1546 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1547 { 1548 int vector = kvm_apic_has_interrupt(vcpu); 1549 struct kvm_lapic *apic = vcpu->arch.apic; 1550 1551 if (vector == -1) 1552 return -1; 1553 1554 apic_set_isr(vector, apic); 1555 apic_update_ppr(apic); 1556 apic_clear_irr(vector, apic); 1557 return vector; 1558 } 1559 1560 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 1561 struct kvm_lapic_state *s) 1562 { 1563 struct kvm_lapic *apic = vcpu->arch.apic; 1564 1565 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 1566 /* set SPIV separately to get count of SW disabled APICs right */ 1567 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 1568 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1569 /* call kvm_apic_set_id() to put apic into apic_map */ 1570 kvm_apic_set_id(apic, kvm_apic_id(apic)); 1571 kvm_apic_set_version(vcpu); 1572 1573 apic_update_ppr(apic); 1574 hrtimer_cancel(&apic->lapic_timer.timer); 1575 update_divide_count(apic); 1576 start_apic_timer(apic); 1577 apic->irr_pending = true; 1578 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 1579 apic->highest_isr_cache = -1; 1580 kvm_make_request(KVM_REQ_EVENT, vcpu); 1581 } 1582 1583 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1584 { 1585 struct hrtimer *timer; 1586 1587 if (!kvm_vcpu_has_lapic(vcpu)) 1588 return; 1589 1590 timer = &vcpu->arch.apic->lapic_timer.timer; 1591 if (hrtimer_cancel(timer)) 1592 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1593 } 1594 1595 /* 1596 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1597 * 1598 * Detect whether guest triggered PV EOI since the 1599 * last entry. If yes, set EOI on guests's behalf. 1600 * Clear PV EOI in guest memory in any case. 1601 */ 1602 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1603 struct kvm_lapic *apic) 1604 { 1605 bool pending; 1606 int vector; 1607 /* 1608 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1609 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1610 * 1611 * KVM_APIC_PV_EOI_PENDING is unset: 1612 * -> host disabled PV EOI. 1613 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1614 * -> host enabled PV EOI, guest did not execute EOI yet. 1615 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1616 * -> host enabled PV EOI, guest executed EOI. 1617 */ 1618 BUG_ON(!pv_eoi_enabled(vcpu)); 1619 pending = pv_eoi_get_pending(vcpu); 1620 /* 1621 * Clear pending bit in any case: it will be set again on vmentry. 1622 * While this might not be ideal from performance point of view, 1623 * this makes sure pv eoi is only enabled when we know it's safe. 1624 */ 1625 pv_eoi_clr_pending(vcpu); 1626 if (pending) 1627 return; 1628 vector = apic_set_eoi(apic); 1629 trace_kvm_pv_eoi(apic, vector); 1630 } 1631 1632 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1633 { 1634 u32 data; 1635 void *vapic; 1636 1637 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1638 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1639 1640 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1641 return; 1642 1643 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1644 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1645 kunmap_atomic(vapic); 1646 1647 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1648 } 1649 1650 /* 1651 * apic_sync_pv_eoi_to_guest - called before vmentry 1652 * 1653 * Detect whether it's safe to enable PV EOI and 1654 * if yes do so. 1655 */ 1656 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1657 struct kvm_lapic *apic) 1658 { 1659 if (!pv_eoi_enabled(vcpu) || 1660 /* IRR set or many bits in ISR: could be nested. */ 1661 apic->irr_pending || 1662 /* Cache not set: could be safe but we don't bother. */ 1663 apic->highest_isr_cache == -1 || 1664 /* Need EOI to update ioapic. */ 1665 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1666 /* 1667 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1668 * so we need not do anything here. 1669 */ 1670 return; 1671 } 1672 1673 pv_eoi_set_pending(apic->vcpu); 1674 } 1675 1676 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1677 { 1678 u32 data, tpr; 1679 int max_irr, max_isr; 1680 struct kvm_lapic *apic = vcpu->arch.apic; 1681 void *vapic; 1682 1683 apic_sync_pv_eoi_to_guest(vcpu, apic); 1684 1685 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1686 return; 1687 1688 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1689 max_irr = apic_find_highest_irr(apic); 1690 if (max_irr < 0) 1691 max_irr = 0; 1692 max_isr = apic_find_highest_isr(apic); 1693 if (max_isr < 0) 1694 max_isr = 0; 1695 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1696 1697 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1698 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1699 kunmap_atomic(vapic); 1700 } 1701 1702 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1703 { 1704 vcpu->arch.apic->vapic_addr = vapic_addr; 1705 if (vapic_addr) 1706 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1707 else 1708 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1709 } 1710 1711 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1712 { 1713 struct kvm_lapic *apic = vcpu->arch.apic; 1714 u32 reg = (msr - APIC_BASE_MSR) << 4; 1715 1716 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1717 return 1; 1718 1719 /* if this is ICR write vector before command */ 1720 if (msr == 0x830) 1721 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1722 return apic_reg_write(apic, reg, (u32)data); 1723 } 1724 1725 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1726 { 1727 struct kvm_lapic *apic = vcpu->arch.apic; 1728 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1729 1730 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1731 return 1; 1732 1733 if (apic_reg_read(apic, reg, 4, &low)) 1734 return 1; 1735 if (msr == 0x830) 1736 apic_reg_read(apic, APIC_ICR2, 4, &high); 1737 1738 *data = (((u64)high) << 32) | low; 1739 1740 return 0; 1741 } 1742 1743 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1744 { 1745 struct kvm_lapic *apic = vcpu->arch.apic; 1746 1747 if (!kvm_vcpu_has_lapic(vcpu)) 1748 return 1; 1749 1750 /* if this is ICR write vector before command */ 1751 if (reg == APIC_ICR) 1752 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1753 return apic_reg_write(apic, reg, (u32)data); 1754 } 1755 1756 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1757 { 1758 struct kvm_lapic *apic = vcpu->arch.apic; 1759 u32 low, high = 0; 1760 1761 if (!kvm_vcpu_has_lapic(vcpu)) 1762 return 1; 1763 1764 if (apic_reg_read(apic, reg, 4, &low)) 1765 return 1; 1766 if (reg == APIC_ICR) 1767 apic_reg_read(apic, APIC_ICR2, 4, &high); 1768 1769 *data = (((u64)high) << 32) | low; 1770 1771 return 0; 1772 } 1773 1774 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1775 { 1776 u64 addr = data & ~KVM_MSR_ENABLED; 1777 if (!IS_ALIGNED(addr, 4)) 1778 return 1; 1779 1780 vcpu->arch.pv_eoi.msr_val = data; 1781 if (!pv_eoi_enabled(vcpu)) 1782 return 0; 1783 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1784 addr); 1785 } 1786 1787 void kvm_lapic_init(void) 1788 { 1789 /* do not patch jump label more than once per second */ 1790 jump_label_rate_limit(&apic_hw_disabled, HZ); 1791 jump_label_rate_limit(&apic_sw_disabled, HZ); 1792 } 1793