1 /* 2 * KVM Microsoft Hyper-V emulation 3 * 4 * derived from arch/x86/kvm/x86.c 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc. 8 * Copyright IBM Corporation, 2008 9 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 10 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 11 * 12 * Authors: 13 * Avi Kivity <avi@qumranet.com> 14 * Yaniv Kamay <yaniv@qumranet.com> 15 * Amit Shah <amit.shah@qumranet.com> 16 * Ben-Ami Yassour <benami@il.ibm.com> 17 * Andrey Smetanin <asmetanin@virtuozzo.com> 18 * 19 * This work is licensed under the terms of the GNU GPL, version 2. See 20 * the COPYING file in the top-level directory. 21 * 22 */ 23 24 #include "x86.h" 25 #include "lapic.h" 26 #include "ioapic.h" 27 #include "hyperv.h" 28 29 #include <linux/kvm_host.h> 30 #include <linux/highmem.h> 31 #include <linux/sched/cputime.h> 32 #include <linux/eventfd.h> 33 34 #include <asm/apicdef.h> 35 #include <trace/events/kvm.h> 36 37 #include "trace.h" 38 39 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 40 { 41 return atomic64_read(&synic->sint[sint]); 42 } 43 44 static inline int synic_get_sint_vector(u64 sint_value) 45 { 46 if (sint_value & HV_SYNIC_SINT_MASKED) 47 return -1; 48 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 49 } 50 51 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 52 int vector) 53 { 54 int i; 55 56 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 57 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 58 return true; 59 } 60 return false; 61 } 62 63 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 64 int vector) 65 { 66 int i; 67 u64 sint_value; 68 69 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 70 sint_value = synic_read_sint(synic, i); 71 if (synic_get_sint_vector(sint_value) == vector && 72 sint_value & HV_SYNIC_SINT_AUTO_EOI) 73 return true; 74 } 75 return false; 76 } 77 78 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 79 int vector) 80 { 81 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 82 return; 83 84 if (synic_has_vector_connected(synic, vector)) 85 __set_bit(vector, synic->vec_bitmap); 86 else 87 __clear_bit(vector, synic->vec_bitmap); 88 89 if (synic_has_vector_auto_eoi(synic, vector)) 90 __set_bit(vector, synic->auto_eoi_bitmap); 91 else 92 __clear_bit(vector, synic->auto_eoi_bitmap); 93 } 94 95 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 96 u64 data, bool host) 97 { 98 int vector, old_vector; 99 bool masked; 100 101 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 102 masked = data & HV_SYNIC_SINT_MASKED; 103 104 /* 105 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 106 * default '0x10000' value on boot and this should not #GP. We need to 107 * allow zero-initing the register from host as well. 108 */ 109 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 110 return 1; 111 /* 112 * Guest may configure multiple SINTs to use the same vector, so 113 * we maintain a bitmap of vectors handled by synic, and a 114 * bitmap of vectors with auto-eoi behavior. The bitmaps are 115 * updated here, and atomically queried on fast paths. 116 */ 117 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 118 119 atomic64_set(&synic->sint[sint], data); 120 121 synic_update_vector(synic, old_vector); 122 123 synic_update_vector(synic, vector); 124 125 /* Load SynIC vectors into EOI exit bitmap */ 126 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 127 return 0; 128 } 129 130 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 131 { 132 struct kvm_vcpu *vcpu = NULL; 133 int i; 134 135 if (vpidx < KVM_MAX_VCPUS) 136 vcpu = kvm_get_vcpu(kvm, vpidx); 137 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 138 return vcpu; 139 kvm_for_each_vcpu(i, vcpu, kvm) 140 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 141 return vcpu; 142 return NULL; 143 } 144 145 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 146 { 147 struct kvm_vcpu *vcpu; 148 struct kvm_vcpu_hv_synic *synic; 149 150 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 151 if (!vcpu) 152 return NULL; 153 synic = vcpu_to_synic(vcpu); 154 return (synic->active) ? synic : NULL; 155 } 156 157 static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic, 158 u32 sint) 159 { 160 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 161 struct page *page; 162 gpa_t gpa; 163 struct hv_message *msg; 164 struct hv_message_page *msg_page; 165 166 gpa = synic->msg_page & PAGE_MASK; 167 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 168 if (is_error_page(page)) { 169 vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n", 170 gpa); 171 return; 172 } 173 msg_page = kmap_atomic(page); 174 175 msg = &msg_page->sint_message[sint]; 176 msg->header.message_flags.msg_pending = 0; 177 178 kunmap_atomic(msg_page); 179 kvm_release_page_dirty(page); 180 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 181 } 182 183 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 184 { 185 struct kvm *kvm = vcpu->kvm; 186 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 187 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 188 struct kvm_vcpu_hv_stimer *stimer; 189 int gsi, idx, stimers_pending; 190 191 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 192 193 if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) 194 synic_clear_sint_msg_pending(synic, sint); 195 196 /* Try to deliver pending Hyper-V SynIC timers messages */ 197 stimers_pending = 0; 198 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 199 stimer = &hv_vcpu->stimer[idx]; 200 if (stimer->msg_pending && 201 (stimer->config & HV_STIMER_ENABLE) && 202 HV_STIMER_SINT(stimer->config) == sint) { 203 set_bit(stimer->index, 204 hv_vcpu->stimer_pending_bitmap); 205 stimers_pending++; 206 } 207 } 208 if (stimers_pending) 209 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 210 211 idx = srcu_read_lock(&kvm->irq_srcu); 212 gsi = atomic_read(&synic->sint_to_gsi[sint]); 213 if (gsi != -1) 214 kvm_notify_acked_gsi(kvm, gsi); 215 srcu_read_unlock(&kvm->irq_srcu, idx); 216 } 217 218 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 219 { 220 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 221 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 222 223 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 224 hv_vcpu->exit.u.synic.msr = msr; 225 hv_vcpu->exit.u.synic.control = synic->control; 226 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 227 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 228 229 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 230 } 231 232 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 233 u32 msr, u64 data, bool host) 234 { 235 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 236 int ret; 237 238 if (!synic->active && !host) 239 return 1; 240 241 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 242 243 ret = 0; 244 switch (msr) { 245 case HV_X64_MSR_SCONTROL: 246 synic->control = data; 247 if (!host) 248 synic_exit(synic, msr); 249 break; 250 case HV_X64_MSR_SVERSION: 251 if (!host) { 252 ret = 1; 253 break; 254 } 255 synic->version = data; 256 break; 257 case HV_X64_MSR_SIEFP: 258 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 259 !synic->dont_zero_synic_pages) 260 if (kvm_clear_guest(vcpu->kvm, 261 data & PAGE_MASK, PAGE_SIZE)) { 262 ret = 1; 263 break; 264 } 265 synic->evt_page = data; 266 if (!host) 267 synic_exit(synic, msr); 268 break; 269 case HV_X64_MSR_SIMP: 270 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 271 !synic->dont_zero_synic_pages) 272 if (kvm_clear_guest(vcpu->kvm, 273 data & PAGE_MASK, PAGE_SIZE)) { 274 ret = 1; 275 break; 276 } 277 synic->msg_page = data; 278 if (!host) 279 synic_exit(synic, msr); 280 break; 281 case HV_X64_MSR_EOM: { 282 int i; 283 284 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 285 kvm_hv_notify_acked_sint(vcpu, i); 286 break; 287 } 288 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 289 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 290 break; 291 default: 292 ret = 1; 293 break; 294 } 295 return ret; 296 } 297 298 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 299 bool host) 300 { 301 int ret; 302 303 if (!synic->active && !host) 304 return 1; 305 306 ret = 0; 307 switch (msr) { 308 case HV_X64_MSR_SCONTROL: 309 *pdata = synic->control; 310 break; 311 case HV_X64_MSR_SVERSION: 312 *pdata = synic->version; 313 break; 314 case HV_X64_MSR_SIEFP: 315 *pdata = synic->evt_page; 316 break; 317 case HV_X64_MSR_SIMP: 318 *pdata = synic->msg_page; 319 break; 320 case HV_X64_MSR_EOM: 321 *pdata = 0; 322 break; 323 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 324 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 325 break; 326 default: 327 ret = 1; 328 break; 329 } 330 return ret; 331 } 332 333 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 334 { 335 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 336 struct kvm_lapic_irq irq; 337 int ret, vector; 338 339 if (sint >= ARRAY_SIZE(synic->sint)) 340 return -EINVAL; 341 342 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 343 if (vector < 0) 344 return -ENOENT; 345 346 memset(&irq, 0, sizeof(irq)); 347 irq.shorthand = APIC_DEST_SELF; 348 irq.dest_mode = APIC_DEST_PHYSICAL; 349 irq.delivery_mode = APIC_DM_FIXED; 350 irq.vector = vector; 351 irq.level = 1; 352 353 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 354 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 355 return ret; 356 } 357 358 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 359 { 360 struct kvm_vcpu_hv_synic *synic; 361 362 synic = synic_get(kvm, vpidx); 363 if (!synic) 364 return -EINVAL; 365 366 return synic_set_irq(synic, sint); 367 } 368 369 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 370 { 371 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 372 int i; 373 374 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 375 376 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 377 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 378 kvm_hv_notify_acked_sint(vcpu, i); 379 } 380 381 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 382 { 383 struct kvm_vcpu_hv_synic *synic; 384 385 synic = synic_get(kvm, vpidx); 386 if (!synic) 387 return -EINVAL; 388 389 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 390 return -EINVAL; 391 392 atomic_set(&synic->sint_to_gsi[sint], gsi); 393 return 0; 394 } 395 396 void kvm_hv_irq_routing_update(struct kvm *kvm) 397 { 398 struct kvm_irq_routing_table *irq_rt; 399 struct kvm_kernel_irq_routing_entry *e; 400 u32 gsi; 401 402 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 403 lockdep_is_held(&kvm->irq_lock)); 404 405 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 406 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 407 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 408 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 409 e->hv_sint.sint, gsi); 410 } 411 } 412 } 413 414 static void synic_init(struct kvm_vcpu_hv_synic *synic) 415 { 416 int i; 417 418 memset(synic, 0, sizeof(*synic)); 419 synic->version = HV_SYNIC_VERSION_1; 420 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 421 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 422 atomic_set(&synic->sint_to_gsi[i], -1); 423 } 424 } 425 426 static u64 get_time_ref_counter(struct kvm *kvm) 427 { 428 struct kvm_hv *hv = &kvm->arch.hyperv; 429 struct kvm_vcpu *vcpu; 430 u64 tsc; 431 432 /* 433 * The guest has not set up the TSC page or the clock isn't 434 * stable, fall back to get_kvmclock_ns. 435 */ 436 if (!hv->tsc_ref.tsc_sequence) 437 return div_u64(get_kvmclock_ns(kvm), 100); 438 439 vcpu = kvm_get_vcpu(kvm, 0); 440 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 441 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 442 + hv->tsc_ref.tsc_offset; 443 } 444 445 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 446 bool vcpu_kick) 447 { 448 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 449 450 set_bit(stimer->index, 451 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 452 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 453 if (vcpu_kick) 454 kvm_vcpu_kick(vcpu); 455 } 456 457 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 458 { 459 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 460 461 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 462 stimer->index); 463 464 hrtimer_cancel(&stimer->timer); 465 clear_bit(stimer->index, 466 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 467 stimer->msg_pending = false; 468 stimer->exp_time = 0; 469 } 470 471 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 472 { 473 struct kvm_vcpu_hv_stimer *stimer; 474 475 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 476 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 477 stimer->index); 478 stimer_mark_pending(stimer, true); 479 480 return HRTIMER_NORESTART; 481 } 482 483 /* 484 * stimer_start() assumptions: 485 * a) stimer->count is not equal to 0 486 * b) stimer->config has HV_STIMER_ENABLE flag 487 */ 488 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 489 { 490 u64 time_now; 491 ktime_t ktime_now; 492 493 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 494 ktime_now = ktime_get(); 495 496 if (stimer->config & HV_STIMER_PERIODIC) { 497 if (stimer->exp_time) { 498 if (time_now >= stimer->exp_time) { 499 u64 remainder; 500 501 div64_u64_rem(time_now - stimer->exp_time, 502 stimer->count, &remainder); 503 stimer->exp_time = 504 time_now + (stimer->count - remainder); 505 } 506 } else 507 stimer->exp_time = time_now + stimer->count; 508 509 trace_kvm_hv_stimer_start_periodic( 510 stimer_to_vcpu(stimer)->vcpu_id, 511 stimer->index, 512 time_now, stimer->exp_time); 513 514 hrtimer_start(&stimer->timer, 515 ktime_add_ns(ktime_now, 516 100 * (stimer->exp_time - time_now)), 517 HRTIMER_MODE_ABS); 518 return 0; 519 } 520 stimer->exp_time = stimer->count; 521 if (time_now >= stimer->count) { 522 /* 523 * Expire timer according to Hypervisor Top-Level Functional 524 * specification v4(15.3.1): 525 * "If a one shot is enabled and the specified count is in 526 * the past, it will expire immediately." 527 */ 528 stimer_mark_pending(stimer, false); 529 return 0; 530 } 531 532 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 533 stimer->index, 534 time_now, stimer->count); 535 536 hrtimer_start(&stimer->timer, 537 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 538 HRTIMER_MODE_ABS); 539 return 0; 540 } 541 542 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 543 bool host) 544 { 545 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 546 stimer->index, config, host); 547 548 stimer_cleanup(stimer); 549 if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) 550 config &= ~HV_STIMER_ENABLE; 551 stimer->config = config; 552 stimer_mark_pending(stimer, false); 553 return 0; 554 } 555 556 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 557 bool host) 558 { 559 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 560 stimer->index, count, host); 561 562 stimer_cleanup(stimer); 563 stimer->count = count; 564 if (stimer->count == 0) 565 stimer->config &= ~HV_STIMER_ENABLE; 566 else if (stimer->config & HV_STIMER_AUTOENABLE) 567 stimer->config |= HV_STIMER_ENABLE; 568 stimer_mark_pending(stimer, false); 569 return 0; 570 } 571 572 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 573 { 574 *pconfig = stimer->config; 575 return 0; 576 } 577 578 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 579 { 580 *pcount = stimer->count; 581 return 0; 582 } 583 584 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 585 struct hv_message *src_msg) 586 { 587 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 588 struct page *page; 589 gpa_t gpa; 590 struct hv_message *dst_msg; 591 int r; 592 struct hv_message_page *msg_page; 593 594 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 595 return -ENOENT; 596 597 gpa = synic->msg_page & PAGE_MASK; 598 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 599 if (is_error_page(page)) 600 return -EFAULT; 601 602 msg_page = kmap_atomic(page); 603 dst_msg = &msg_page->sint_message[sint]; 604 if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE, 605 src_msg->header.message_type) != HVMSG_NONE) { 606 dst_msg->header.message_flags.msg_pending = 1; 607 r = -EAGAIN; 608 } else { 609 memcpy(&dst_msg->u.payload, &src_msg->u.payload, 610 src_msg->header.payload_size); 611 dst_msg->header.message_type = src_msg->header.message_type; 612 dst_msg->header.payload_size = src_msg->header.payload_size; 613 r = synic_set_irq(synic, sint); 614 if (r >= 1) 615 r = 0; 616 else if (r == 0) 617 r = -EFAULT; 618 } 619 kunmap_atomic(msg_page); 620 kvm_release_page_dirty(page); 621 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 622 return r; 623 } 624 625 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 626 { 627 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 628 struct hv_message *msg = &stimer->msg; 629 struct hv_timer_message_payload *payload = 630 (struct hv_timer_message_payload *)&msg->u.payload; 631 632 payload->expiration_time = stimer->exp_time; 633 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 634 return synic_deliver_msg(vcpu_to_synic(vcpu), 635 HV_STIMER_SINT(stimer->config), msg); 636 } 637 638 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 639 { 640 int r; 641 642 stimer->msg_pending = true; 643 r = stimer_send_msg(stimer); 644 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 645 stimer->index, r); 646 if (!r) { 647 stimer->msg_pending = false; 648 if (!(stimer->config & HV_STIMER_PERIODIC)) 649 stimer->config &= ~HV_STIMER_ENABLE; 650 } 651 } 652 653 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 654 { 655 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 656 struct kvm_vcpu_hv_stimer *stimer; 657 u64 time_now, exp_time; 658 int i; 659 660 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 661 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 662 stimer = &hv_vcpu->stimer[i]; 663 if (stimer->config & HV_STIMER_ENABLE) { 664 exp_time = stimer->exp_time; 665 666 if (exp_time) { 667 time_now = 668 get_time_ref_counter(vcpu->kvm); 669 if (time_now >= exp_time) 670 stimer_expiration(stimer); 671 } 672 673 if ((stimer->config & HV_STIMER_ENABLE) && 674 stimer->count) { 675 if (!stimer->msg_pending) 676 stimer_start(stimer); 677 } else 678 stimer_cleanup(stimer); 679 } 680 } 681 } 682 683 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 684 { 685 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 686 int i; 687 688 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 689 stimer_cleanup(&hv_vcpu->stimer[i]); 690 } 691 692 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 693 { 694 struct hv_message *msg = &stimer->msg; 695 struct hv_timer_message_payload *payload = 696 (struct hv_timer_message_payload *)&msg->u.payload; 697 698 memset(&msg->header, 0, sizeof(msg->header)); 699 msg->header.message_type = HVMSG_TIMER_EXPIRED; 700 msg->header.payload_size = sizeof(*payload); 701 702 payload->timer_index = stimer->index; 703 payload->expiration_time = 0; 704 payload->delivery_time = 0; 705 } 706 707 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 708 { 709 memset(stimer, 0, sizeof(*stimer)); 710 stimer->index = timer_index; 711 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 712 stimer->timer.function = stimer_timer_callback; 713 stimer_prepare_msg(stimer); 714 } 715 716 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 717 { 718 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 719 int i; 720 721 synic_init(&hv_vcpu->synic); 722 723 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 724 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 725 stimer_init(&hv_vcpu->stimer[i], i); 726 } 727 728 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 729 { 730 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 731 732 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 733 } 734 735 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 736 { 737 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 738 739 /* 740 * Hyper-V SynIC auto EOI SINT's are 741 * not compatible with APICV, so deactivate APICV 742 */ 743 kvm_vcpu_deactivate_apicv(vcpu); 744 synic->active = true; 745 synic->dont_zero_synic_pages = dont_zero_synic_pages; 746 return 0; 747 } 748 749 static bool kvm_hv_msr_partition_wide(u32 msr) 750 { 751 bool r = false; 752 753 switch (msr) { 754 case HV_X64_MSR_GUEST_OS_ID: 755 case HV_X64_MSR_HYPERCALL: 756 case HV_X64_MSR_REFERENCE_TSC: 757 case HV_X64_MSR_TIME_REF_COUNT: 758 case HV_X64_MSR_CRASH_CTL: 759 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 760 case HV_X64_MSR_RESET: 761 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 762 case HV_X64_MSR_TSC_EMULATION_CONTROL: 763 case HV_X64_MSR_TSC_EMULATION_STATUS: 764 r = true; 765 break; 766 } 767 768 return r; 769 } 770 771 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 772 u32 index, u64 *pdata) 773 { 774 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 775 776 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 777 return -EINVAL; 778 779 *pdata = hv->hv_crash_param[index]; 780 return 0; 781 } 782 783 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 784 { 785 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 786 787 *pdata = hv->hv_crash_ctl; 788 return 0; 789 } 790 791 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 792 { 793 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 794 795 if (host) 796 hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY; 797 798 if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) { 799 800 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 801 hv->hv_crash_param[0], 802 hv->hv_crash_param[1], 803 hv->hv_crash_param[2], 804 hv->hv_crash_param[3], 805 hv->hv_crash_param[4]); 806 807 /* Send notification about crash to user space */ 808 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 809 } 810 811 return 0; 812 } 813 814 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 815 u32 index, u64 data) 816 { 817 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 818 819 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 820 return -EINVAL; 821 822 hv->hv_crash_param[index] = data; 823 return 0; 824 } 825 826 /* 827 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 828 * between them is possible: 829 * 830 * kvmclock formula: 831 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 832 * + system_time 833 * 834 * Hyper-V formula: 835 * nsec/100 = ticks * scale / 2^64 + offset 836 * 837 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 838 * By dividing the kvmclock formula by 100 and equating what's left we get: 839 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 840 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 841 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 842 * 843 * Now expand the kvmclock formula and divide by 100: 844 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 845 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 846 * + system_time 847 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 848 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 849 * + system_time / 100 850 * 851 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 852 * nsec/100 = ticks * scale / 2^64 853 * - tsc_timestamp * scale / 2^64 854 * + system_time / 100 855 * 856 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 857 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 858 * 859 * These two equivalencies are implemented in this function. 860 */ 861 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 862 HV_REFERENCE_TSC_PAGE *tsc_ref) 863 { 864 u64 max_mul; 865 866 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 867 return false; 868 869 /* 870 * check if scale would overflow, if so we use the time ref counter 871 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 872 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 873 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 874 */ 875 max_mul = 100ull << (32 - hv_clock->tsc_shift); 876 if (hv_clock->tsc_to_system_mul >= max_mul) 877 return false; 878 879 /* 880 * Otherwise compute the scale and offset according to the formulas 881 * derived above. 882 */ 883 tsc_ref->tsc_scale = 884 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 885 hv_clock->tsc_to_system_mul, 886 100); 887 888 tsc_ref->tsc_offset = hv_clock->system_time; 889 do_div(tsc_ref->tsc_offset, 100); 890 tsc_ref->tsc_offset -= 891 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 892 return true; 893 } 894 895 void kvm_hv_setup_tsc_page(struct kvm *kvm, 896 struct pvclock_vcpu_time_info *hv_clock) 897 { 898 struct kvm_hv *hv = &kvm->arch.hyperv; 899 u32 tsc_seq; 900 u64 gfn; 901 902 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 903 BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); 904 905 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 906 return; 907 908 mutex_lock(&kvm->arch.hyperv.hv_lock); 909 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 910 goto out_unlock; 911 912 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 913 /* 914 * Because the TSC parameters only vary when there is a 915 * change in the master clock, do not bother with caching. 916 */ 917 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 918 &tsc_seq, sizeof(tsc_seq)))) 919 goto out_unlock; 920 921 /* 922 * While we're computing and writing the parameters, force the 923 * guest to use the time reference count MSR. 924 */ 925 hv->tsc_ref.tsc_sequence = 0; 926 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 927 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 928 goto out_unlock; 929 930 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 931 goto out_unlock; 932 933 /* Ensure sequence is zero before writing the rest of the struct. */ 934 smp_wmb(); 935 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 936 goto out_unlock; 937 938 /* 939 * Now switch to the TSC page mechanism by writing the sequence. 940 */ 941 tsc_seq++; 942 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 943 tsc_seq = 1; 944 945 /* Write the struct entirely before the non-zero sequence. */ 946 smp_wmb(); 947 948 hv->tsc_ref.tsc_sequence = tsc_seq; 949 kvm_write_guest(kvm, gfn_to_gpa(gfn), 950 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 951 out_unlock: 952 mutex_unlock(&kvm->arch.hyperv.hv_lock); 953 } 954 955 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 956 bool host) 957 { 958 struct kvm *kvm = vcpu->kvm; 959 struct kvm_hv *hv = &kvm->arch.hyperv; 960 961 switch (msr) { 962 case HV_X64_MSR_GUEST_OS_ID: 963 hv->hv_guest_os_id = data; 964 /* setting guest os id to zero disables hypercall page */ 965 if (!hv->hv_guest_os_id) 966 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 967 break; 968 case HV_X64_MSR_HYPERCALL: { 969 u64 gfn; 970 unsigned long addr; 971 u8 instructions[4]; 972 973 /* if guest os id is not set hypercall should remain disabled */ 974 if (!hv->hv_guest_os_id) 975 break; 976 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 977 hv->hv_hypercall = data; 978 break; 979 } 980 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 981 addr = gfn_to_hva(kvm, gfn); 982 if (kvm_is_error_hva(addr)) 983 return 1; 984 kvm_x86_ops->patch_hypercall(vcpu, instructions); 985 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 986 if (__copy_to_user((void __user *)addr, instructions, 4)) 987 return 1; 988 hv->hv_hypercall = data; 989 mark_page_dirty(kvm, gfn); 990 break; 991 } 992 case HV_X64_MSR_REFERENCE_TSC: 993 hv->hv_tsc_page = data; 994 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 995 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 996 break; 997 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 998 return kvm_hv_msr_set_crash_data(vcpu, 999 msr - HV_X64_MSR_CRASH_P0, 1000 data); 1001 case HV_X64_MSR_CRASH_CTL: 1002 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1003 case HV_X64_MSR_RESET: 1004 if (data == 1) { 1005 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1006 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1007 } 1008 break; 1009 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1010 hv->hv_reenlightenment_control = data; 1011 break; 1012 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1013 hv->hv_tsc_emulation_control = data; 1014 break; 1015 case HV_X64_MSR_TSC_EMULATION_STATUS: 1016 hv->hv_tsc_emulation_status = data; 1017 break; 1018 case HV_X64_MSR_TIME_REF_COUNT: 1019 /* read-only, but still ignore it if host-initiated */ 1020 if (!host) 1021 return 1; 1022 break; 1023 default: 1024 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1025 msr, data); 1026 return 1; 1027 } 1028 return 0; 1029 } 1030 1031 /* Calculate cpu time spent by current task in 100ns units */ 1032 static u64 current_task_runtime_100ns(void) 1033 { 1034 u64 utime, stime; 1035 1036 task_cputime_adjusted(current, &utime, &stime); 1037 1038 return div_u64(utime + stime, 100); 1039 } 1040 1041 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1042 { 1043 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; 1044 1045 switch (msr) { 1046 case HV_X64_MSR_VP_INDEX: 1047 if (!host) 1048 return 1; 1049 hv->vp_index = (u32)data; 1050 break; 1051 case HV_X64_MSR_VP_ASSIST_PAGE: { 1052 u64 gfn; 1053 unsigned long addr; 1054 1055 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1056 hv->hv_vapic = data; 1057 if (kvm_lapic_enable_pv_eoi(vcpu, 0)) 1058 return 1; 1059 break; 1060 } 1061 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1062 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1063 if (kvm_is_error_hva(addr)) 1064 return 1; 1065 if (__clear_user((void __user *)addr, PAGE_SIZE)) 1066 return 1; 1067 hv->hv_vapic = data; 1068 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1069 if (kvm_lapic_enable_pv_eoi(vcpu, 1070 gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) 1071 return 1; 1072 break; 1073 } 1074 case HV_X64_MSR_EOI: 1075 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1076 case HV_X64_MSR_ICR: 1077 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1078 case HV_X64_MSR_TPR: 1079 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1080 case HV_X64_MSR_VP_RUNTIME: 1081 if (!host) 1082 return 1; 1083 hv->runtime_offset = data - current_task_runtime_100ns(); 1084 break; 1085 case HV_X64_MSR_SCONTROL: 1086 case HV_X64_MSR_SVERSION: 1087 case HV_X64_MSR_SIEFP: 1088 case HV_X64_MSR_SIMP: 1089 case HV_X64_MSR_EOM: 1090 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1091 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1092 case HV_X64_MSR_STIMER0_CONFIG: 1093 case HV_X64_MSR_STIMER1_CONFIG: 1094 case HV_X64_MSR_STIMER2_CONFIG: 1095 case HV_X64_MSR_STIMER3_CONFIG: { 1096 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1097 1098 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1099 data, host); 1100 } 1101 case HV_X64_MSR_STIMER0_COUNT: 1102 case HV_X64_MSR_STIMER1_COUNT: 1103 case HV_X64_MSR_STIMER2_COUNT: 1104 case HV_X64_MSR_STIMER3_COUNT: { 1105 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1106 1107 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1108 data, host); 1109 } 1110 case HV_X64_MSR_TSC_FREQUENCY: 1111 case HV_X64_MSR_APIC_FREQUENCY: 1112 /* read-only, but still ignore it if host-initiated */ 1113 if (!host) 1114 return 1; 1115 break; 1116 default: 1117 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1118 msr, data); 1119 return 1; 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1126 { 1127 u64 data = 0; 1128 struct kvm *kvm = vcpu->kvm; 1129 struct kvm_hv *hv = &kvm->arch.hyperv; 1130 1131 switch (msr) { 1132 case HV_X64_MSR_GUEST_OS_ID: 1133 data = hv->hv_guest_os_id; 1134 break; 1135 case HV_X64_MSR_HYPERCALL: 1136 data = hv->hv_hypercall; 1137 break; 1138 case HV_X64_MSR_TIME_REF_COUNT: 1139 data = get_time_ref_counter(kvm); 1140 break; 1141 case HV_X64_MSR_REFERENCE_TSC: 1142 data = hv->hv_tsc_page; 1143 break; 1144 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1145 return kvm_hv_msr_get_crash_data(vcpu, 1146 msr - HV_X64_MSR_CRASH_P0, 1147 pdata); 1148 case HV_X64_MSR_CRASH_CTL: 1149 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1150 case HV_X64_MSR_RESET: 1151 data = 0; 1152 break; 1153 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1154 data = hv->hv_reenlightenment_control; 1155 break; 1156 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1157 data = hv->hv_tsc_emulation_control; 1158 break; 1159 case HV_X64_MSR_TSC_EMULATION_STATUS: 1160 data = hv->hv_tsc_emulation_status; 1161 break; 1162 default: 1163 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1164 return 1; 1165 } 1166 1167 *pdata = data; 1168 return 0; 1169 } 1170 1171 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1172 bool host) 1173 { 1174 u64 data = 0; 1175 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; 1176 1177 switch (msr) { 1178 case HV_X64_MSR_VP_INDEX: 1179 data = hv->vp_index; 1180 break; 1181 case HV_X64_MSR_EOI: 1182 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1183 case HV_X64_MSR_ICR: 1184 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1185 case HV_X64_MSR_TPR: 1186 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1187 case HV_X64_MSR_VP_ASSIST_PAGE: 1188 data = hv->hv_vapic; 1189 break; 1190 case HV_X64_MSR_VP_RUNTIME: 1191 data = current_task_runtime_100ns() + hv->runtime_offset; 1192 break; 1193 case HV_X64_MSR_SCONTROL: 1194 case HV_X64_MSR_SVERSION: 1195 case HV_X64_MSR_SIEFP: 1196 case HV_X64_MSR_SIMP: 1197 case HV_X64_MSR_EOM: 1198 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1199 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1200 case HV_X64_MSR_STIMER0_CONFIG: 1201 case HV_X64_MSR_STIMER1_CONFIG: 1202 case HV_X64_MSR_STIMER2_CONFIG: 1203 case HV_X64_MSR_STIMER3_CONFIG: { 1204 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1205 1206 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1207 pdata); 1208 } 1209 case HV_X64_MSR_STIMER0_COUNT: 1210 case HV_X64_MSR_STIMER1_COUNT: 1211 case HV_X64_MSR_STIMER2_COUNT: 1212 case HV_X64_MSR_STIMER3_COUNT: { 1213 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1214 1215 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1216 pdata); 1217 } 1218 case HV_X64_MSR_TSC_FREQUENCY: 1219 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1220 break; 1221 case HV_X64_MSR_APIC_FREQUENCY: 1222 data = APIC_BUS_FREQUENCY; 1223 break; 1224 default: 1225 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1226 return 1; 1227 } 1228 *pdata = data; 1229 return 0; 1230 } 1231 1232 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1233 { 1234 if (kvm_hv_msr_partition_wide(msr)) { 1235 int r; 1236 1237 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1238 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1239 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1240 return r; 1241 } else 1242 return kvm_hv_set_msr(vcpu, msr, data, host); 1243 } 1244 1245 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1246 { 1247 if (kvm_hv_msr_partition_wide(msr)) { 1248 int r; 1249 1250 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1251 r = kvm_hv_get_msr_pw(vcpu, msr, pdata); 1252 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1253 return r; 1254 } else 1255 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1256 } 1257 1258 static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) 1259 { 1260 int i = 0, j; 1261 1262 if (!(valid_bank_mask & BIT_ULL(bank_no))) 1263 return -1; 1264 1265 for (j = 0; j < bank_no; j++) 1266 if (valid_bank_mask & BIT_ULL(j)) 1267 i++; 1268 1269 return i; 1270 } 1271 1272 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1273 u16 rep_cnt, bool ex) 1274 { 1275 struct kvm *kvm = current_vcpu->kvm; 1276 struct kvm_vcpu_hv *hv_current = ¤t_vcpu->arch.hyperv; 1277 struct hv_tlb_flush_ex flush_ex; 1278 struct hv_tlb_flush flush; 1279 struct kvm_vcpu *vcpu; 1280 unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0}; 1281 unsigned long valid_bank_mask = 0; 1282 u64 sparse_banks[64]; 1283 int sparse_banks_len, i; 1284 bool all_cpus; 1285 1286 if (!ex) { 1287 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1288 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1289 1290 trace_kvm_hv_flush_tlb(flush.processor_mask, 1291 flush.address_space, flush.flags); 1292 1293 sparse_banks[0] = flush.processor_mask; 1294 all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; 1295 } else { 1296 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1297 sizeof(flush_ex)))) 1298 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1299 1300 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1301 flush_ex.hv_vp_set.format, 1302 flush_ex.address_space, 1303 flush_ex.flags); 1304 1305 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1306 all_cpus = flush_ex.hv_vp_set.format != 1307 HV_GENERIC_SET_SPARSE_4K; 1308 1309 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1310 sizeof(sparse_banks[0]); 1311 1312 if (!sparse_banks_len && !all_cpus) 1313 goto ret_success; 1314 1315 if (!all_cpus && 1316 kvm_read_guest(kvm, 1317 ingpa + offsetof(struct hv_tlb_flush_ex, 1318 hv_vp_set.bank_contents), 1319 sparse_banks, 1320 sparse_banks_len)) 1321 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1322 } 1323 1324 cpumask_clear(&hv_current->tlb_lush); 1325 1326 kvm_for_each_vcpu(i, vcpu, kvm) { 1327 struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; 1328 int bank = hv->vp_index / 64, sbank = 0; 1329 1330 if (!all_cpus) { 1331 /* Banks >64 can't be represented */ 1332 if (bank >= 64) 1333 continue; 1334 1335 /* Non-ex hypercalls can only address first 64 vCPUs */ 1336 if (!ex && bank) 1337 continue; 1338 1339 if (ex) { 1340 /* 1341 * Check is the bank of this vCPU is in sparse 1342 * set and get the sparse bank number. 1343 */ 1344 sbank = get_sparse_bank_no(valid_bank_mask, 1345 bank); 1346 1347 if (sbank < 0) 1348 continue; 1349 } 1350 1351 if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) 1352 continue; 1353 } 1354 1355 /* 1356 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we 1357 * can't analyze it here, flush TLB regardless of the specified 1358 * address space. 1359 */ 1360 __set_bit(i, vcpu_bitmap); 1361 } 1362 1363 kvm_make_vcpus_request_mask(kvm, 1364 KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, 1365 vcpu_bitmap, &hv_current->tlb_lush); 1366 1367 ret_success: 1368 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1369 return (u64)HV_STATUS_SUCCESS | 1370 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1371 } 1372 1373 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1374 { 1375 return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; 1376 } 1377 1378 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1379 { 1380 bool longmode; 1381 1382 longmode = is_64_bit_mode(vcpu); 1383 if (longmode) 1384 kvm_register_write(vcpu, VCPU_REGS_RAX, result); 1385 else { 1386 kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); 1387 kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); 1388 } 1389 } 1390 1391 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1392 { 1393 kvm_hv_hypercall_set_result(vcpu, result); 1394 ++vcpu->stat.hypercalls; 1395 return kvm_skip_emulated_instruction(vcpu); 1396 } 1397 1398 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1399 { 1400 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1401 } 1402 1403 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1404 { 1405 struct eventfd_ctx *eventfd; 1406 1407 if (unlikely(!fast)) { 1408 int ret; 1409 gpa_t gpa = param; 1410 1411 if ((gpa & (__alignof__(param) - 1)) || 1412 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1413 return HV_STATUS_INVALID_ALIGNMENT; 1414 1415 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1416 if (ret < 0) 1417 return HV_STATUS_INVALID_ALIGNMENT; 1418 } 1419 1420 /* 1421 * Per spec, bits 32-47 contain the extra "flag number". However, we 1422 * have no use for it, and in all known usecases it is zero, so just 1423 * report lookup failure if it isn't. 1424 */ 1425 if (param & 0xffff00000000ULL) 1426 return HV_STATUS_INVALID_PORT_ID; 1427 /* remaining bits are reserved-zero */ 1428 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1429 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1430 1431 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1432 rcu_read_lock(); 1433 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1434 rcu_read_unlock(); 1435 if (!eventfd) 1436 return HV_STATUS_INVALID_PORT_ID; 1437 1438 eventfd_signal(eventfd, 1); 1439 return HV_STATUS_SUCCESS; 1440 } 1441 1442 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1443 { 1444 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1445 uint16_t code, rep_idx, rep_cnt; 1446 bool fast, longmode, rep; 1447 1448 /* 1449 * hypercall generates UD from non zero cpl and real mode 1450 * per HYPER-V spec 1451 */ 1452 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1453 kvm_queue_exception(vcpu, UD_VECTOR); 1454 return 1; 1455 } 1456 1457 longmode = is_64_bit_mode(vcpu); 1458 1459 if (!longmode) { 1460 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 1461 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); 1462 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | 1463 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); 1464 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | 1465 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); 1466 } 1467 #ifdef CONFIG_X86_64 1468 else { 1469 param = kvm_register_read(vcpu, VCPU_REGS_RCX); 1470 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); 1471 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); 1472 } 1473 #endif 1474 1475 code = param & 0xffff; 1476 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1477 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1478 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1479 rep = !!(rep_cnt || rep_idx); 1480 1481 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1482 1483 switch (code) { 1484 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1485 if (unlikely(rep)) { 1486 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1487 break; 1488 } 1489 kvm_vcpu_on_spin(vcpu, true); 1490 break; 1491 case HVCALL_SIGNAL_EVENT: 1492 if (unlikely(rep)) { 1493 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1494 break; 1495 } 1496 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1497 if (ret != HV_STATUS_INVALID_PORT_ID) 1498 break; 1499 /* maybe userspace knows this conn_id: fall through */ 1500 case HVCALL_POST_MESSAGE: 1501 /* don't bother userspace if it has no way to handle it */ 1502 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1503 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1504 break; 1505 } 1506 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1507 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1508 vcpu->run->hyperv.u.hcall.input = param; 1509 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1510 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1511 vcpu->arch.complete_userspace_io = 1512 kvm_hv_hypercall_complete_userspace; 1513 return 0; 1514 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1515 if (unlikely(fast || !rep_cnt || rep_idx)) { 1516 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1517 break; 1518 } 1519 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1520 break; 1521 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1522 if (unlikely(fast || rep)) { 1523 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1524 break; 1525 } 1526 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1527 break; 1528 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1529 if (unlikely(fast || !rep_cnt || rep_idx)) { 1530 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1531 break; 1532 } 1533 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1534 break; 1535 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1536 if (unlikely(fast || rep)) { 1537 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1538 break; 1539 } 1540 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1541 break; 1542 default: 1543 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1544 break; 1545 } 1546 1547 return kvm_hv_hypercall_complete(vcpu, ret); 1548 } 1549 1550 void kvm_hv_init_vm(struct kvm *kvm) 1551 { 1552 mutex_init(&kvm->arch.hyperv.hv_lock); 1553 idr_init(&kvm->arch.hyperv.conn_to_evt); 1554 } 1555 1556 void kvm_hv_destroy_vm(struct kvm *kvm) 1557 { 1558 struct eventfd_ctx *eventfd; 1559 int i; 1560 1561 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1562 eventfd_ctx_put(eventfd); 1563 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1564 } 1565 1566 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1567 { 1568 struct kvm_hv *hv = &kvm->arch.hyperv; 1569 struct eventfd_ctx *eventfd; 1570 int ret; 1571 1572 eventfd = eventfd_ctx_fdget(fd); 1573 if (IS_ERR(eventfd)) 1574 return PTR_ERR(eventfd); 1575 1576 mutex_lock(&hv->hv_lock); 1577 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1578 GFP_KERNEL); 1579 mutex_unlock(&hv->hv_lock); 1580 1581 if (ret >= 0) 1582 return 0; 1583 1584 if (ret == -ENOSPC) 1585 ret = -EEXIST; 1586 eventfd_ctx_put(eventfd); 1587 return ret; 1588 } 1589 1590 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1591 { 1592 struct kvm_hv *hv = &kvm->arch.hyperv; 1593 struct eventfd_ctx *eventfd; 1594 1595 mutex_lock(&hv->hv_lock); 1596 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1597 mutex_unlock(&hv->hv_lock); 1598 1599 if (!eventfd) 1600 return -ENOENT; 1601 1602 synchronize_srcu(&kvm->srcu); 1603 eventfd_ctx_put(eventfd); 1604 return 0; 1605 } 1606 1607 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1608 { 1609 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1610 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1611 return -EINVAL; 1612 1613 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1614 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1615 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1616 } 1617