1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM Microsoft Hyper-V emulation 4 * 5 * derived from arch/x86/kvm/x86.c 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright (C) 2008 Qumranet, Inc. 9 * Copyright IBM Corporation, 2008 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 12 * 13 * Authors: 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> 16 * Amit Shah <amit.shah@qumranet.com> 17 * Ben-Ami Yassour <benami@il.ibm.com> 18 * Andrey Smetanin <asmetanin@virtuozzo.com> 19 */ 20 21 #include "x86.h" 22 #include "lapic.h" 23 #include "ioapic.h" 24 #include "hyperv.h" 25 26 #include <linux/kvm_host.h> 27 #include <linux/highmem.h> 28 #include <linux/sched/cputime.h> 29 #include <linux/eventfd.h> 30 31 #include <asm/apicdef.h> 32 #include <trace/events/kvm.h> 33 34 #include "trace.h" 35 36 #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 37 38 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 39 bool vcpu_kick); 40 41 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 42 { 43 return atomic64_read(&synic->sint[sint]); 44 } 45 46 static inline int synic_get_sint_vector(u64 sint_value) 47 { 48 if (sint_value & HV_SYNIC_SINT_MASKED) 49 return -1; 50 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 51 } 52 53 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 54 int vector) 55 { 56 int i; 57 58 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 59 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 60 return true; 61 } 62 return false; 63 } 64 65 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 66 int vector) 67 { 68 int i; 69 u64 sint_value; 70 71 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 72 sint_value = synic_read_sint(synic, i); 73 if (synic_get_sint_vector(sint_value) == vector && 74 sint_value & HV_SYNIC_SINT_AUTO_EOI) 75 return true; 76 } 77 return false; 78 } 79 80 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 81 int vector) 82 { 83 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 84 return; 85 86 if (synic_has_vector_connected(synic, vector)) 87 __set_bit(vector, synic->vec_bitmap); 88 else 89 __clear_bit(vector, synic->vec_bitmap); 90 91 if (synic_has_vector_auto_eoi(synic, vector)) 92 __set_bit(vector, synic->auto_eoi_bitmap); 93 else 94 __clear_bit(vector, synic->auto_eoi_bitmap); 95 } 96 97 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 98 u64 data, bool host) 99 { 100 int vector, old_vector; 101 bool masked; 102 103 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 104 masked = data & HV_SYNIC_SINT_MASKED; 105 106 /* 107 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 108 * default '0x10000' value on boot and this should not #GP. We need to 109 * allow zero-initing the register from host as well. 110 */ 111 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 112 return 1; 113 /* 114 * Guest may configure multiple SINTs to use the same vector, so 115 * we maintain a bitmap of vectors handled by synic, and a 116 * bitmap of vectors with auto-eoi behavior. The bitmaps are 117 * updated here, and atomically queried on fast paths. 118 */ 119 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 120 121 atomic64_set(&synic->sint[sint], data); 122 123 synic_update_vector(synic, old_vector); 124 125 synic_update_vector(synic, vector); 126 127 /* Load SynIC vectors into EOI exit bitmap */ 128 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 129 return 0; 130 } 131 132 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 133 { 134 struct kvm_vcpu *vcpu = NULL; 135 int i; 136 137 if (vpidx >= KVM_MAX_VCPUS) 138 return NULL; 139 140 vcpu = kvm_get_vcpu(kvm, vpidx); 141 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 142 return vcpu; 143 kvm_for_each_vcpu(i, vcpu, kvm) 144 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 145 return vcpu; 146 return NULL; 147 } 148 149 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 150 { 151 struct kvm_vcpu *vcpu; 152 struct kvm_vcpu_hv_synic *synic; 153 154 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 155 if (!vcpu) 156 return NULL; 157 synic = vcpu_to_synic(vcpu); 158 return (synic->active) ? synic : NULL; 159 } 160 161 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 162 { 163 struct kvm *kvm = vcpu->kvm; 164 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 165 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 166 struct kvm_vcpu_hv_stimer *stimer; 167 int gsi, idx; 168 169 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 170 171 /* Try to deliver pending Hyper-V SynIC timers messages */ 172 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 173 stimer = &hv_vcpu->stimer[idx]; 174 if (stimer->msg_pending && stimer->config.enable && 175 !stimer->config.direct_mode && 176 stimer->config.sintx == sint) 177 stimer_mark_pending(stimer, false); 178 } 179 180 idx = srcu_read_lock(&kvm->irq_srcu); 181 gsi = atomic_read(&synic->sint_to_gsi[sint]); 182 if (gsi != -1) 183 kvm_notify_acked_gsi(kvm, gsi); 184 srcu_read_unlock(&kvm->irq_srcu, idx); 185 } 186 187 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 188 { 189 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 190 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 191 192 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 193 hv_vcpu->exit.u.synic.msr = msr; 194 hv_vcpu->exit.u.synic.control = synic->control; 195 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 196 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 197 198 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 199 } 200 201 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 202 u32 msr, u64 data, bool host) 203 { 204 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 205 int ret; 206 207 if (!synic->active && !host) 208 return 1; 209 210 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 211 212 ret = 0; 213 switch (msr) { 214 case HV_X64_MSR_SCONTROL: 215 synic->control = data; 216 if (!host) 217 synic_exit(synic, msr); 218 break; 219 case HV_X64_MSR_SVERSION: 220 if (!host) { 221 ret = 1; 222 break; 223 } 224 synic->version = data; 225 break; 226 case HV_X64_MSR_SIEFP: 227 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 228 !synic->dont_zero_synic_pages) 229 if (kvm_clear_guest(vcpu->kvm, 230 data & PAGE_MASK, PAGE_SIZE)) { 231 ret = 1; 232 break; 233 } 234 synic->evt_page = data; 235 if (!host) 236 synic_exit(synic, msr); 237 break; 238 case HV_X64_MSR_SIMP: 239 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 240 !synic->dont_zero_synic_pages) 241 if (kvm_clear_guest(vcpu->kvm, 242 data & PAGE_MASK, PAGE_SIZE)) { 243 ret = 1; 244 break; 245 } 246 synic->msg_page = data; 247 if (!host) 248 synic_exit(synic, msr); 249 break; 250 case HV_X64_MSR_EOM: { 251 int i; 252 253 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 254 kvm_hv_notify_acked_sint(vcpu, i); 255 break; 256 } 257 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 258 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 259 break; 260 default: 261 ret = 1; 262 break; 263 } 264 return ret; 265 } 266 267 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 268 bool host) 269 { 270 int ret; 271 272 if (!synic->active && !host) 273 return 1; 274 275 ret = 0; 276 switch (msr) { 277 case HV_X64_MSR_SCONTROL: 278 *pdata = synic->control; 279 break; 280 case HV_X64_MSR_SVERSION: 281 *pdata = synic->version; 282 break; 283 case HV_X64_MSR_SIEFP: 284 *pdata = synic->evt_page; 285 break; 286 case HV_X64_MSR_SIMP: 287 *pdata = synic->msg_page; 288 break; 289 case HV_X64_MSR_EOM: 290 *pdata = 0; 291 break; 292 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 293 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 294 break; 295 default: 296 ret = 1; 297 break; 298 } 299 return ret; 300 } 301 302 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 303 { 304 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 305 struct kvm_lapic_irq irq; 306 int ret, vector; 307 308 if (sint >= ARRAY_SIZE(synic->sint)) 309 return -EINVAL; 310 311 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 312 if (vector < 0) 313 return -ENOENT; 314 315 memset(&irq, 0, sizeof(irq)); 316 irq.shorthand = APIC_DEST_SELF; 317 irq.dest_mode = APIC_DEST_PHYSICAL; 318 irq.delivery_mode = APIC_DM_FIXED; 319 irq.vector = vector; 320 irq.level = 1; 321 322 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 323 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 324 return ret; 325 } 326 327 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 328 { 329 struct kvm_vcpu_hv_synic *synic; 330 331 synic = synic_get(kvm, vpidx); 332 if (!synic) 333 return -EINVAL; 334 335 return synic_set_irq(synic, sint); 336 } 337 338 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 339 { 340 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 341 int i; 342 343 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 344 345 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 346 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 347 kvm_hv_notify_acked_sint(vcpu, i); 348 } 349 350 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 351 { 352 struct kvm_vcpu_hv_synic *synic; 353 354 synic = synic_get(kvm, vpidx); 355 if (!synic) 356 return -EINVAL; 357 358 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 359 return -EINVAL; 360 361 atomic_set(&synic->sint_to_gsi[sint], gsi); 362 return 0; 363 } 364 365 void kvm_hv_irq_routing_update(struct kvm *kvm) 366 { 367 struct kvm_irq_routing_table *irq_rt; 368 struct kvm_kernel_irq_routing_entry *e; 369 u32 gsi; 370 371 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 372 lockdep_is_held(&kvm->irq_lock)); 373 374 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 375 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 376 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 377 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 378 e->hv_sint.sint, gsi); 379 } 380 } 381 } 382 383 static void synic_init(struct kvm_vcpu_hv_synic *synic) 384 { 385 int i; 386 387 memset(synic, 0, sizeof(*synic)); 388 synic->version = HV_SYNIC_VERSION_1; 389 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 390 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 391 atomic_set(&synic->sint_to_gsi[i], -1); 392 } 393 } 394 395 static u64 get_time_ref_counter(struct kvm *kvm) 396 { 397 struct kvm_hv *hv = &kvm->arch.hyperv; 398 struct kvm_vcpu *vcpu; 399 u64 tsc; 400 401 /* 402 * The guest has not set up the TSC page or the clock isn't 403 * stable, fall back to get_kvmclock_ns. 404 */ 405 if (!hv->tsc_ref.tsc_sequence) 406 return div_u64(get_kvmclock_ns(kvm), 100); 407 408 vcpu = kvm_get_vcpu(kvm, 0); 409 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 410 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 411 + hv->tsc_ref.tsc_offset; 412 } 413 414 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 415 bool vcpu_kick) 416 { 417 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 418 419 set_bit(stimer->index, 420 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 421 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 422 if (vcpu_kick) 423 kvm_vcpu_kick(vcpu); 424 } 425 426 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 427 { 428 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 429 430 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 431 stimer->index); 432 433 hrtimer_cancel(&stimer->timer); 434 clear_bit(stimer->index, 435 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 436 stimer->msg_pending = false; 437 stimer->exp_time = 0; 438 } 439 440 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 441 { 442 struct kvm_vcpu_hv_stimer *stimer; 443 444 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 445 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 446 stimer->index); 447 stimer_mark_pending(stimer, true); 448 449 return HRTIMER_NORESTART; 450 } 451 452 /* 453 * stimer_start() assumptions: 454 * a) stimer->count is not equal to 0 455 * b) stimer->config has HV_STIMER_ENABLE flag 456 */ 457 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 458 { 459 u64 time_now; 460 ktime_t ktime_now; 461 462 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 463 ktime_now = ktime_get(); 464 465 if (stimer->config.periodic) { 466 if (stimer->exp_time) { 467 if (time_now >= stimer->exp_time) { 468 u64 remainder; 469 470 div64_u64_rem(time_now - stimer->exp_time, 471 stimer->count, &remainder); 472 stimer->exp_time = 473 time_now + (stimer->count - remainder); 474 } 475 } else 476 stimer->exp_time = time_now + stimer->count; 477 478 trace_kvm_hv_stimer_start_periodic( 479 stimer_to_vcpu(stimer)->vcpu_id, 480 stimer->index, 481 time_now, stimer->exp_time); 482 483 hrtimer_start(&stimer->timer, 484 ktime_add_ns(ktime_now, 485 100 * (stimer->exp_time - time_now)), 486 HRTIMER_MODE_ABS); 487 return 0; 488 } 489 stimer->exp_time = stimer->count; 490 if (time_now >= stimer->count) { 491 /* 492 * Expire timer according to Hypervisor Top-Level Functional 493 * specification v4(15.3.1): 494 * "If a one shot is enabled and the specified count is in 495 * the past, it will expire immediately." 496 */ 497 stimer_mark_pending(stimer, false); 498 return 0; 499 } 500 501 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 502 stimer->index, 503 time_now, stimer->count); 504 505 hrtimer_start(&stimer->timer, 506 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 507 HRTIMER_MODE_ABS); 508 return 0; 509 } 510 511 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 512 bool host) 513 { 514 union hv_stimer_config new_config = {.as_uint64 = config}, 515 old_config = {.as_uint64 = stimer->config.as_uint64}; 516 517 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 518 stimer->index, config, host); 519 520 stimer_cleanup(stimer); 521 if (old_config.enable && 522 !new_config.direct_mode && new_config.sintx == 0) 523 new_config.enable = 0; 524 stimer->config.as_uint64 = new_config.as_uint64; 525 526 if (stimer->config.enable) 527 stimer_mark_pending(stimer, false); 528 529 return 0; 530 } 531 532 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 533 bool host) 534 { 535 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 536 stimer->index, count, host); 537 538 stimer_cleanup(stimer); 539 stimer->count = count; 540 if (stimer->count == 0) 541 stimer->config.enable = 0; 542 else if (stimer->config.auto_enable) 543 stimer->config.enable = 1; 544 545 if (stimer->config.enable) 546 stimer_mark_pending(stimer, false); 547 548 return 0; 549 } 550 551 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 552 { 553 *pconfig = stimer->config.as_uint64; 554 return 0; 555 } 556 557 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 558 { 559 *pcount = stimer->count; 560 return 0; 561 } 562 563 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 564 struct hv_message *src_msg, bool no_retry) 565 { 566 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 567 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 568 gfn_t msg_page_gfn; 569 struct hv_message_header hv_hdr; 570 int r; 571 572 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 573 return -ENOENT; 574 575 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 576 577 /* 578 * Strictly following the spec-mandated ordering would assume setting 579 * .msg_pending before checking .message_type. However, this function 580 * is only called in vcpu context so the entire update is atomic from 581 * guest POV and thus the exact order here doesn't matter. 582 */ 583 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 584 msg_off + offsetof(struct hv_message, 585 header.message_type), 586 sizeof(hv_hdr.message_type)); 587 if (r < 0) 588 return r; 589 590 if (hv_hdr.message_type != HVMSG_NONE) { 591 if (no_retry) 592 return 0; 593 594 hv_hdr.message_flags.msg_pending = 1; 595 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 596 &hv_hdr.message_flags, 597 msg_off + 598 offsetof(struct hv_message, 599 header.message_flags), 600 sizeof(hv_hdr.message_flags)); 601 if (r < 0) 602 return r; 603 return -EAGAIN; 604 } 605 606 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 607 sizeof(src_msg->header) + 608 src_msg->header.payload_size); 609 if (r < 0) 610 return r; 611 612 r = synic_set_irq(synic, sint); 613 if (r < 0) 614 return r; 615 if (r == 0) 616 return -EFAULT; 617 return 0; 618 } 619 620 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 621 { 622 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 623 struct hv_message *msg = &stimer->msg; 624 struct hv_timer_message_payload *payload = 625 (struct hv_timer_message_payload *)&msg->u.payload; 626 627 /* 628 * To avoid piling up periodic ticks, don't retry message 629 * delivery for them (within "lazy" lost ticks policy). 630 */ 631 bool no_retry = stimer->config.periodic; 632 633 payload->expiration_time = stimer->exp_time; 634 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 635 return synic_deliver_msg(vcpu_to_synic(vcpu), 636 stimer->config.sintx, msg, 637 no_retry); 638 } 639 640 static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 641 { 642 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 643 struct kvm_lapic_irq irq = { 644 .delivery_mode = APIC_DM_FIXED, 645 .vector = stimer->config.apic_vector 646 }; 647 648 return !kvm_apic_set_irq(vcpu, &irq, NULL); 649 } 650 651 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 652 { 653 int r, direct = stimer->config.direct_mode; 654 655 stimer->msg_pending = true; 656 if (!direct) 657 r = stimer_send_msg(stimer); 658 else 659 r = stimer_notify_direct(stimer); 660 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 661 stimer->index, direct, r); 662 if (!r) { 663 stimer->msg_pending = false; 664 if (!(stimer->config.periodic)) 665 stimer->config.enable = 0; 666 } 667 } 668 669 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 670 { 671 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 672 struct kvm_vcpu_hv_stimer *stimer; 673 u64 time_now, exp_time; 674 int i; 675 676 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 677 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 678 stimer = &hv_vcpu->stimer[i]; 679 if (stimer->config.enable) { 680 exp_time = stimer->exp_time; 681 682 if (exp_time) { 683 time_now = 684 get_time_ref_counter(vcpu->kvm); 685 if (time_now >= exp_time) 686 stimer_expiration(stimer); 687 } 688 689 if ((stimer->config.enable) && 690 stimer->count) { 691 if (!stimer->msg_pending) 692 stimer_start(stimer); 693 } else 694 stimer_cleanup(stimer); 695 } 696 } 697 } 698 699 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 700 { 701 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 702 int i; 703 704 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 705 stimer_cleanup(&hv_vcpu->stimer[i]); 706 } 707 708 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 709 { 710 if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 711 return false; 712 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 713 } 714 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 715 716 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 717 struct hv_vp_assist_page *assist_page) 718 { 719 if (!kvm_hv_assist_page_enabled(vcpu)) 720 return false; 721 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 722 assist_page, sizeof(*assist_page)); 723 } 724 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 725 726 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 727 { 728 struct hv_message *msg = &stimer->msg; 729 struct hv_timer_message_payload *payload = 730 (struct hv_timer_message_payload *)&msg->u.payload; 731 732 memset(&msg->header, 0, sizeof(msg->header)); 733 msg->header.message_type = HVMSG_TIMER_EXPIRED; 734 msg->header.payload_size = sizeof(*payload); 735 736 payload->timer_index = stimer->index; 737 payload->expiration_time = 0; 738 payload->delivery_time = 0; 739 } 740 741 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 742 { 743 memset(stimer, 0, sizeof(*stimer)); 744 stimer->index = timer_index; 745 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 746 stimer->timer.function = stimer_timer_callback; 747 stimer_prepare_msg(stimer); 748 } 749 750 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 751 { 752 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 753 int i; 754 755 synic_init(&hv_vcpu->synic); 756 757 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 758 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 759 stimer_init(&hv_vcpu->stimer[i], i); 760 } 761 762 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 763 { 764 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 765 766 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 767 } 768 769 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 770 { 771 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 772 773 /* 774 * Hyper-V SynIC auto EOI SINT's are 775 * not compatible with APICV, so deactivate APICV 776 */ 777 kvm_vcpu_deactivate_apicv(vcpu); 778 synic->active = true; 779 synic->dont_zero_synic_pages = dont_zero_synic_pages; 780 return 0; 781 } 782 783 static bool kvm_hv_msr_partition_wide(u32 msr) 784 { 785 bool r = false; 786 787 switch (msr) { 788 case HV_X64_MSR_GUEST_OS_ID: 789 case HV_X64_MSR_HYPERCALL: 790 case HV_X64_MSR_REFERENCE_TSC: 791 case HV_X64_MSR_TIME_REF_COUNT: 792 case HV_X64_MSR_CRASH_CTL: 793 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 794 case HV_X64_MSR_RESET: 795 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 796 case HV_X64_MSR_TSC_EMULATION_CONTROL: 797 case HV_X64_MSR_TSC_EMULATION_STATUS: 798 r = true; 799 break; 800 } 801 802 return r; 803 } 804 805 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 806 u32 index, u64 *pdata) 807 { 808 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 809 810 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 811 return -EINVAL; 812 813 *pdata = hv->hv_crash_param[index]; 814 return 0; 815 } 816 817 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 818 { 819 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 820 821 *pdata = hv->hv_crash_ctl; 822 return 0; 823 } 824 825 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 826 { 827 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 828 829 if (host) 830 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 831 832 if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) { 833 834 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 835 hv->hv_crash_param[0], 836 hv->hv_crash_param[1], 837 hv->hv_crash_param[2], 838 hv->hv_crash_param[3], 839 hv->hv_crash_param[4]); 840 841 /* Send notification about crash to user space */ 842 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 843 } 844 845 return 0; 846 } 847 848 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 849 u32 index, u64 data) 850 { 851 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 852 853 if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) 854 return -EINVAL; 855 856 hv->hv_crash_param[index] = data; 857 return 0; 858 } 859 860 /* 861 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 862 * between them is possible: 863 * 864 * kvmclock formula: 865 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 866 * + system_time 867 * 868 * Hyper-V formula: 869 * nsec/100 = ticks * scale / 2^64 + offset 870 * 871 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 872 * By dividing the kvmclock formula by 100 and equating what's left we get: 873 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 874 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 875 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 876 * 877 * Now expand the kvmclock formula and divide by 100: 878 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 879 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 880 * + system_time 881 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 882 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 883 * + system_time / 100 884 * 885 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 886 * nsec/100 = ticks * scale / 2^64 887 * - tsc_timestamp * scale / 2^64 888 * + system_time / 100 889 * 890 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 891 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 892 * 893 * These two equivalencies are implemented in this function. 894 */ 895 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 896 HV_REFERENCE_TSC_PAGE *tsc_ref) 897 { 898 u64 max_mul; 899 900 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 901 return false; 902 903 /* 904 * check if scale would overflow, if so we use the time ref counter 905 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 906 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 907 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 908 */ 909 max_mul = 100ull << (32 - hv_clock->tsc_shift); 910 if (hv_clock->tsc_to_system_mul >= max_mul) 911 return false; 912 913 /* 914 * Otherwise compute the scale and offset according to the formulas 915 * derived above. 916 */ 917 tsc_ref->tsc_scale = 918 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 919 hv_clock->tsc_to_system_mul, 920 100); 921 922 tsc_ref->tsc_offset = hv_clock->system_time; 923 do_div(tsc_ref->tsc_offset, 100); 924 tsc_ref->tsc_offset -= 925 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 926 return true; 927 } 928 929 void kvm_hv_setup_tsc_page(struct kvm *kvm, 930 struct pvclock_vcpu_time_info *hv_clock) 931 { 932 struct kvm_hv *hv = &kvm->arch.hyperv; 933 u32 tsc_seq; 934 u64 gfn; 935 936 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 937 BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); 938 939 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 940 return; 941 942 mutex_lock(&kvm->arch.hyperv.hv_lock); 943 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 944 goto out_unlock; 945 946 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 947 /* 948 * Because the TSC parameters only vary when there is a 949 * change in the master clock, do not bother with caching. 950 */ 951 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 952 &tsc_seq, sizeof(tsc_seq)))) 953 goto out_unlock; 954 955 /* 956 * While we're computing and writing the parameters, force the 957 * guest to use the time reference count MSR. 958 */ 959 hv->tsc_ref.tsc_sequence = 0; 960 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 961 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 962 goto out_unlock; 963 964 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 965 goto out_unlock; 966 967 /* Ensure sequence is zero before writing the rest of the struct. */ 968 smp_wmb(); 969 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 970 goto out_unlock; 971 972 /* 973 * Now switch to the TSC page mechanism by writing the sequence. 974 */ 975 tsc_seq++; 976 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 977 tsc_seq = 1; 978 979 /* Write the struct entirely before the non-zero sequence. */ 980 smp_wmb(); 981 982 hv->tsc_ref.tsc_sequence = tsc_seq; 983 kvm_write_guest(kvm, gfn_to_gpa(gfn), 984 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 985 out_unlock: 986 mutex_unlock(&kvm->arch.hyperv.hv_lock); 987 } 988 989 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 990 bool host) 991 { 992 struct kvm *kvm = vcpu->kvm; 993 struct kvm_hv *hv = &kvm->arch.hyperv; 994 995 switch (msr) { 996 case HV_X64_MSR_GUEST_OS_ID: 997 hv->hv_guest_os_id = data; 998 /* setting guest os id to zero disables hypercall page */ 999 if (!hv->hv_guest_os_id) 1000 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1001 break; 1002 case HV_X64_MSR_HYPERCALL: { 1003 u64 gfn; 1004 unsigned long addr; 1005 u8 instructions[4]; 1006 1007 /* if guest os id is not set hypercall should remain disabled */ 1008 if (!hv->hv_guest_os_id) 1009 break; 1010 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1011 hv->hv_hypercall = data; 1012 break; 1013 } 1014 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1015 addr = gfn_to_hva(kvm, gfn); 1016 if (kvm_is_error_hva(addr)) 1017 return 1; 1018 kvm_x86_ops->patch_hypercall(vcpu, instructions); 1019 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1020 if (__copy_to_user((void __user *)addr, instructions, 4)) 1021 return 1; 1022 hv->hv_hypercall = data; 1023 mark_page_dirty(kvm, gfn); 1024 break; 1025 } 1026 case HV_X64_MSR_REFERENCE_TSC: 1027 hv->hv_tsc_page = data; 1028 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1029 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1030 break; 1031 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1032 return kvm_hv_msr_set_crash_data(vcpu, 1033 msr - HV_X64_MSR_CRASH_P0, 1034 data); 1035 case HV_X64_MSR_CRASH_CTL: 1036 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1037 case HV_X64_MSR_RESET: 1038 if (data == 1) { 1039 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1040 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1041 } 1042 break; 1043 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1044 hv->hv_reenlightenment_control = data; 1045 break; 1046 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1047 hv->hv_tsc_emulation_control = data; 1048 break; 1049 case HV_X64_MSR_TSC_EMULATION_STATUS: 1050 hv->hv_tsc_emulation_status = data; 1051 break; 1052 case HV_X64_MSR_TIME_REF_COUNT: 1053 /* read-only, but still ignore it if host-initiated */ 1054 if (!host) 1055 return 1; 1056 break; 1057 default: 1058 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1059 msr, data); 1060 return 1; 1061 } 1062 return 0; 1063 } 1064 1065 /* Calculate cpu time spent by current task in 100ns units */ 1066 static u64 current_task_runtime_100ns(void) 1067 { 1068 u64 utime, stime; 1069 1070 task_cputime_adjusted(current, &utime, &stime); 1071 1072 return div_u64(utime + stime, 100); 1073 } 1074 1075 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1076 { 1077 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1078 1079 switch (msr) { 1080 case HV_X64_MSR_VP_INDEX: { 1081 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 1082 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1083 u32 new_vp_index = (u32)data; 1084 1085 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1086 return 1; 1087 1088 if (new_vp_index == hv_vcpu->vp_index) 1089 return 0; 1090 1091 /* 1092 * The VP index is initialized to vcpu_index by 1093 * kvm_hv_vcpu_postcreate so they initially match. Now the 1094 * VP index is changing, adjust num_mismatched_vp_indexes if 1095 * it now matches or no longer matches vcpu_idx. 1096 */ 1097 if (hv_vcpu->vp_index == vcpu_idx) 1098 atomic_inc(&hv->num_mismatched_vp_indexes); 1099 else if (new_vp_index == vcpu_idx) 1100 atomic_dec(&hv->num_mismatched_vp_indexes); 1101 1102 hv_vcpu->vp_index = new_vp_index; 1103 break; 1104 } 1105 case HV_X64_MSR_VP_ASSIST_PAGE: { 1106 u64 gfn; 1107 unsigned long addr; 1108 1109 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1110 hv_vcpu->hv_vapic = data; 1111 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1112 return 1; 1113 break; 1114 } 1115 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1116 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1117 if (kvm_is_error_hva(addr)) 1118 return 1; 1119 1120 /* 1121 * Clear apic_assist portion of f(struct hv_vp_assist_page 1122 * only, there can be valuable data in the rest which needs 1123 * to be preserved e.g. on migration. 1124 */ 1125 if (__clear_user((void __user *)addr, sizeof(u32))) 1126 return 1; 1127 hv_vcpu->hv_vapic = data; 1128 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1129 if (kvm_lapic_enable_pv_eoi(vcpu, 1130 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1131 sizeof(struct hv_vp_assist_page))) 1132 return 1; 1133 break; 1134 } 1135 case HV_X64_MSR_EOI: 1136 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1137 case HV_X64_MSR_ICR: 1138 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1139 case HV_X64_MSR_TPR: 1140 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1141 case HV_X64_MSR_VP_RUNTIME: 1142 if (!host) 1143 return 1; 1144 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1145 break; 1146 case HV_X64_MSR_SCONTROL: 1147 case HV_X64_MSR_SVERSION: 1148 case HV_X64_MSR_SIEFP: 1149 case HV_X64_MSR_SIMP: 1150 case HV_X64_MSR_EOM: 1151 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1152 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1153 case HV_X64_MSR_STIMER0_CONFIG: 1154 case HV_X64_MSR_STIMER1_CONFIG: 1155 case HV_X64_MSR_STIMER2_CONFIG: 1156 case HV_X64_MSR_STIMER3_CONFIG: { 1157 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1158 1159 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1160 data, host); 1161 } 1162 case HV_X64_MSR_STIMER0_COUNT: 1163 case HV_X64_MSR_STIMER1_COUNT: 1164 case HV_X64_MSR_STIMER2_COUNT: 1165 case HV_X64_MSR_STIMER3_COUNT: { 1166 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1167 1168 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1169 data, host); 1170 } 1171 case HV_X64_MSR_TSC_FREQUENCY: 1172 case HV_X64_MSR_APIC_FREQUENCY: 1173 /* read-only, but still ignore it if host-initiated */ 1174 if (!host) 1175 return 1; 1176 break; 1177 default: 1178 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 1179 msr, data); 1180 return 1; 1181 } 1182 1183 return 0; 1184 } 1185 1186 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1187 { 1188 u64 data = 0; 1189 struct kvm *kvm = vcpu->kvm; 1190 struct kvm_hv *hv = &kvm->arch.hyperv; 1191 1192 switch (msr) { 1193 case HV_X64_MSR_GUEST_OS_ID: 1194 data = hv->hv_guest_os_id; 1195 break; 1196 case HV_X64_MSR_HYPERCALL: 1197 data = hv->hv_hypercall; 1198 break; 1199 case HV_X64_MSR_TIME_REF_COUNT: 1200 data = get_time_ref_counter(kvm); 1201 break; 1202 case HV_X64_MSR_REFERENCE_TSC: 1203 data = hv->hv_tsc_page; 1204 break; 1205 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1206 return kvm_hv_msr_get_crash_data(vcpu, 1207 msr - HV_X64_MSR_CRASH_P0, 1208 pdata); 1209 case HV_X64_MSR_CRASH_CTL: 1210 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1211 case HV_X64_MSR_RESET: 1212 data = 0; 1213 break; 1214 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1215 data = hv->hv_reenlightenment_control; 1216 break; 1217 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1218 data = hv->hv_tsc_emulation_control; 1219 break; 1220 case HV_X64_MSR_TSC_EMULATION_STATUS: 1221 data = hv->hv_tsc_emulation_status; 1222 break; 1223 default: 1224 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1225 return 1; 1226 } 1227 1228 *pdata = data; 1229 return 0; 1230 } 1231 1232 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1233 bool host) 1234 { 1235 u64 data = 0; 1236 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1237 1238 switch (msr) { 1239 case HV_X64_MSR_VP_INDEX: 1240 data = hv_vcpu->vp_index; 1241 break; 1242 case HV_X64_MSR_EOI: 1243 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1244 case HV_X64_MSR_ICR: 1245 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1246 case HV_X64_MSR_TPR: 1247 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1248 case HV_X64_MSR_VP_ASSIST_PAGE: 1249 data = hv_vcpu->hv_vapic; 1250 break; 1251 case HV_X64_MSR_VP_RUNTIME: 1252 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1253 break; 1254 case HV_X64_MSR_SCONTROL: 1255 case HV_X64_MSR_SVERSION: 1256 case HV_X64_MSR_SIEFP: 1257 case HV_X64_MSR_SIMP: 1258 case HV_X64_MSR_EOM: 1259 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1260 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1261 case HV_X64_MSR_STIMER0_CONFIG: 1262 case HV_X64_MSR_STIMER1_CONFIG: 1263 case HV_X64_MSR_STIMER2_CONFIG: 1264 case HV_X64_MSR_STIMER3_CONFIG: { 1265 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1266 1267 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1268 pdata); 1269 } 1270 case HV_X64_MSR_STIMER0_COUNT: 1271 case HV_X64_MSR_STIMER1_COUNT: 1272 case HV_X64_MSR_STIMER2_COUNT: 1273 case HV_X64_MSR_STIMER3_COUNT: { 1274 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1275 1276 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1277 pdata); 1278 } 1279 case HV_X64_MSR_TSC_FREQUENCY: 1280 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1281 break; 1282 case HV_X64_MSR_APIC_FREQUENCY: 1283 data = APIC_BUS_FREQUENCY; 1284 break; 1285 default: 1286 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1287 return 1; 1288 } 1289 *pdata = data; 1290 return 0; 1291 } 1292 1293 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1294 { 1295 if (kvm_hv_msr_partition_wide(msr)) { 1296 int r; 1297 1298 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1299 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1300 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1301 return r; 1302 } else 1303 return kvm_hv_set_msr(vcpu, msr, data, host); 1304 } 1305 1306 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1307 { 1308 if (kvm_hv_msr_partition_wide(msr)) { 1309 int r; 1310 1311 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1312 r = kvm_hv_get_msr_pw(vcpu, msr, pdata); 1313 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1314 return r; 1315 } else 1316 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1317 } 1318 1319 static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1320 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1321 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1322 { 1323 struct kvm_hv *hv = &kvm->arch.hyperv; 1324 struct kvm_vcpu *vcpu; 1325 int i, bank, sbank = 0; 1326 1327 memset(vp_bitmap, 0, 1328 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1329 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1330 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1331 vp_bitmap[bank] = sparse_banks[sbank++]; 1332 1333 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1334 /* for all vcpus vp_index == vcpu_idx */ 1335 return (unsigned long *)vp_bitmap; 1336 } 1337 1338 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1339 kvm_for_each_vcpu(i, vcpu, kvm) { 1340 if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, 1341 (unsigned long *)vp_bitmap)) 1342 __set_bit(i, vcpu_bitmap); 1343 } 1344 return vcpu_bitmap; 1345 } 1346 1347 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1348 u16 rep_cnt, bool ex) 1349 { 1350 struct kvm *kvm = current_vcpu->kvm; 1351 struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; 1352 struct hv_tlb_flush_ex flush_ex; 1353 struct hv_tlb_flush flush; 1354 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1355 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1356 unsigned long *vcpu_mask; 1357 u64 valid_bank_mask; 1358 u64 sparse_banks[64]; 1359 int sparse_banks_len; 1360 bool all_cpus; 1361 1362 if (!ex) { 1363 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1364 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1365 1366 trace_kvm_hv_flush_tlb(flush.processor_mask, 1367 flush.address_space, flush.flags); 1368 1369 valid_bank_mask = BIT_ULL(0); 1370 sparse_banks[0] = flush.processor_mask; 1371 1372 /* 1373 * Work around possible WS2012 bug: it sends hypercalls 1374 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, 1375 * while also expecting us to flush something and crashing if 1376 * we don't. Let's treat processor_mask == 0 same as 1377 * HV_FLUSH_ALL_PROCESSORS. 1378 */ 1379 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || 1380 flush.processor_mask == 0; 1381 } else { 1382 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1383 sizeof(flush_ex)))) 1384 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1385 1386 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1387 flush_ex.hv_vp_set.format, 1388 flush_ex.address_space, 1389 flush_ex.flags); 1390 1391 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1392 all_cpus = flush_ex.hv_vp_set.format != 1393 HV_GENERIC_SET_SPARSE_4K; 1394 1395 sparse_banks_len = 1396 bitmap_weight((unsigned long *)&valid_bank_mask, 64) * 1397 sizeof(sparse_banks[0]); 1398 1399 if (!sparse_banks_len && !all_cpus) 1400 goto ret_success; 1401 1402 if (!all_cpus && 1403 kvm_read_guest(kvm, 1404 ingpa + offsetof(struct hv_tlb_flush_ex, 1405 hv_vp_set.bank_contents), 1406 sparse_banks, 1407 sparse_banks_len)) 1408 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1409 } 1410 1411 cpumask_clear(&hv_vcpu->tlb_flush); 1412 1413 vcpu_mask = all_cpus ? NULL : 1414 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1415 vp_bitmap, vcpu_bitmap); 1416 1417 /* 1418 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1419 * analyze it here, flush TLB regardless of the specified address space. 1420 */ 1421 kvm_make_vcpus_request_mask(kvm, 1422 KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, 1423 vcpu_mask, &hv_vcpu->tlb_flush); 1424 1425 ret_success: 1426 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1427 return (u64)HV_STATUS_SUCCESS | 1428 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1429 } 1430 1431 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1432 unsigned long *vcpu_bitmap) 1433 { 1434 struct kvm_lapic_irq irq = { 1435 .delivery_mode = APIC_DM_FIXED, 1436 .vector = vector 1437 }; 1438 struct kvm_vcpu *vcpu; 1439 int i; 1440 1441 kvm_for_each_vcpu(i, vcpu, kvm) { 1442 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1443 continue; 1444 1445 /* We fail only when APIC is disabled */ 1446 kvm_apic_set_irq(vcpu, &irq, NULL); 1447 } 1448 } 1449 1450 static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, 1451 bool ex, bool fast) 1452 { 1453 struct kvm *kvm = current_vcpu->kvm; 1454 struct hv_send_ipi_ex send_ipi_ex; 1455 struct hv_send_ipi send_ipi; 1456 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1457 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1458 unsigned long *vcpu_mask; 1459 unsigned long valid_bank_mask; 1460 u64 sparse_banks[64]; 1461 int sparse_banks_len; 1462 u32 vector; 1463 bool all_cpus; 1464 1465 if (!ex) { 1466 if (!fast) { 1467 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, 1468 sizeof(send_ipi)))) 1469 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1470 sparse_banks[0] = send_ipi.cpu_mask; 1471 vector = send_ipi.vector; 1472 } else { 1473 /* 'reserved' part of hv_send_ipi should be 0 */ 1474 if (unlikely(ingpa >> 32 != 0)) 1475 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1476 sparse_banks[0] = outgpa; 1477 vector = (u32)ingpa; 1478 } 1479 all_cpus = false; 1480 valid_bank_mask = BIT_ULL(0); 1481 1482 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1483 } else { 1484 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, 1485 sizeof(send_ipi_ex)))) 1486 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1487 1488 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1489 send_ipi_ex.vp_set.format, 1490 send_ipi_ex.vp_set.valid_bank_mask); 1491 1492 vector = send_ipi_ex.vector; 1493 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1494 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1495 sizeof(sparse_banks[0]); 1496 1497 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1498 1499 if (!sparse_banks_len) 1500 goto ret_success; 1501 1502 if (!all_cpus && 1503 kvm_read_guest(kvm, 1504 ingpa + offsetof(struct hv_send_ipi_ex, 1505 vp_set.bank_contents), 1506 sparse_banks, 1507 sparse_banks_len)) 1508 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1509 } 1510 1511 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1512 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1513 1514 vcpu_mask = all_cpus ? NULL : 1515 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1516 vp_bitmap, vcpu_bitmap); 1517 1518 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1519 1520 ret_success: 1521 return HV_STATUS_SUCCESS; 1522 } 1523 1524 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1525 { 1526 return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; 1527 } 1528 1529 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1530 { 1531 bool longmode; 1532 1533 longmode = is_64_bit_mode(vcpu); 1534 if (longmode) 1535 kvm_rax_write(vcpu, result); 1536 else { 1537 kvm_rdx_write(vcpu, result >> 32); 1538 kvm_rax_write(vcpu, result & 0xffffffff); 1539 } 1540 } 1541 1542 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1543 { 1544 kvm_hv_hypercall_set_result(vcpu, result); 1545 ++vcpu->stat.hypercalls; 1546 return kvm_skip_emulated_instruction(vcpu); 1547 } 1548 1549 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1550 { 1551 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1552 } 1553 1554 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1555 { 1556 struct eventfd_ctx *eventfd; 1557 1558 if (unlikely(!fast)) { 1559 int ret; 1560 gpa_t gpa = param; 1561 1562 if ((gpa & (__alignof__(param) - 1)) || 1563 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1564 return HV_STATUS_INVALID_ALIGNMENT; 1565 1566 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1567 if (ret < 0) 1568 return HV_STATUS_INVALID_ALIGNMENT; 1569 } 1570 1571 /* 1572 * Per spec, bits 32-47 contain the extra "flag number". However, we 1573 * have no use for it, and in all known usecases it is zero, so just 1574 * report lookup failure if it isn't. 1575 */ 1576 if (param & 0xffff00000000ULL) 1577 return HV_STATUS_INVALID_PORT_ID; 1578 /* remaining bits are reserved-zero */ 1579 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1580 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1581 1582 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1583 rcu_read_lock(); 1584 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1585 rcu_read_unlock(); 1586 if (!eventfd) 1587 return HV_STATUS_INVALID_PORT_ID; 1588 1589 eventfd_signal(eventfd, 1); 1590 return HV_STATUS_SUCCESS; 1591 } 1592 1593 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1594 { 1595 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1596 uint16_t code, rep_idx, rep_cnt; 1597 bool fast, rep; 1598 1599 /* 1600 * hypercall generates UD from non zero cpl and real mode 1601 * per HYPER-V spec 1602 */ 1603 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1604 kvm_queue_exception(vcpu, UD_VECTOR); 1605 return 1; 1606 } 1607 1608 #ifdef CONFIG_X86_64 1609 if (is_64_bit_mode(vcpu)) { 1610 param = kvm_rcx_read(vcpu); 1611 ingpa = kvm_rdx_read(vcpu); 1612 outgpa = kvm_r8_read(vcpu); 1613 } else 1614 #endif 1615 { 1616 param = ((u64)kvm_rdx_read(vcpu) << 32) | 1617 (kvm_rax_read(vcpu) & 0xffffffff); 1618 ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | 1619 (kvm_rcx_read(vcpu) & 0xffffffff); 1620 outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1621 (kvm_rsi_read(vcpu) & 0xffffffff); 1622 } 1623 1624 code = param & 0xffff; 1625 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1626 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1627 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1628 rep = !!(rep_cnt || rep_idx); 1629 1630 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1631 1632 switch (code) { 1633 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1634 if (unlikely(rep)) { 1635 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1636 break; 1637 } 1638 kvm_vcpu_on_spin(vcpu, true); 1639 break; 1640 case HVCALL_SIGNAL_EVENT: 1641 if (unlikely(rep)) { 1642 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1643 break; 1644 } 1645 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1646 if (ret != HV_STATUS_INVALID_PORT_ID) 1647 break; 1648 /* fall through - maybe userspace knows this conn_id. */ 1649 case HVCALL_POST_MESSAGE: 1650 /* don't bother userspace if it has no way to handle it */ 1651 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1652 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1653 break; 1654 } 1655 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1656 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1657 vcpu->run->hyperv.u.hcall.input = param; 1658 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1659 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1660 vcpu->arch.complete_userspace_io = 1661 kvm_hv_hypercall_complete_userspace; 1662 return 0; 1663 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1664 if (unlikely(fast || !rep_cnt || rep_idx)) { 1665 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1666 break; 1667 } 1668 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1669 break; 1670 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1671 if (unlikely(fast || rep)) { 1672 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1673 break; 1674 } 1675 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1676 break; 1677 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1678 if (unlikely(fast || !rep_cnt || rep_idx)) { 1679 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1680 break; 1681 } 1682 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1683 break; 1684 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1685 if (unlikely(fast || rep)) { 1686 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1687 break; 1688 } 1689 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1690 break; 1691 case HVCALL_SEND_IPI: 1692 if (unlikely(rep)) { 1693 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1694 break; 1695 } 1696 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); 1697 break; 1698 case HVCALL_SEND_IPI_EX: 1699 if (unlikely(fast || rep)) { 1700 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1701 break; 1702 } 1703 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); 1704 break; 1705 default: 1706 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1707 break; 1708 } 1709 1710 return kvm_hv_hypercall_complete(vcpu, ret); 1711 } 1712 1713 void kvm_hv_init_vm(struct kvm *kvm) 1714 { 1715 mutex_init(&kvm->arch.hyperv.hv_lock); 1716 idr_init(&kvm->arch.hyperv.conn_to_evt); 1717 } 1718 1719 void kvm_hv_destroy_vm(struct kvm *kvm) 1720 { 1721 struct eventfd_ctx *eventfd; 1722 int i; 1723 1724 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1725 eventfd_ctx_put(eventfd); 1726 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1727 } 1728 1729 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1730 { 1731 struct kvm_hv *hv = &kvm->arch.hyperv; 1732 struct eventfd_ctx *eventfd; 1733 int ret; 1734 1735 eventfd = eventfd_ctx_fdget(fd); 1736 if (IS_ERR(eventfd)) 1737 return PTR_ERR(eventfd); 1738 1739 mutex_lock(&hv->hv_lock); 1740 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1741 GFP_KERNEL_ACCOUNT); 1742 mutex_unlock(&hv->hv_lock); 1743 1744 if (ret >= 0) 1745 return 0; 1746 1747 if (ret == -ENOSPC) 1748 ret = -EEXIST; 1749 eventfd_ctx_put(eventfd); 1750 return ret; 1751 } 1752 1753 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1754 { 1755 struct kvm_hv *hv = &kvm->arch.hyperv; 1756 struct eventfd_ctx *eventfd; 1757 1758 mutex_lock(&hv->hv_lock); 1759 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1760 mutex_unlock(&hv->hv_lock); 1761 1762 if (!eventfd) 1763 return -ENOENT; 1764 1765 synchronize_srcu(&kvm->srcu); 1766 eventfd_ctx_put(eventfd); 1767 return 0; 1768 } 1769 1770 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1771 { 1772 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1773 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1774 return -EINVAL; 1775 1776 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1777 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1778 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1779 } 1780 1781 int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 1782 struct kvm_cpuid_entry2 __user *entries) 1783 { 1784 uint16_t evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu); 1785 struct kvm_cpuid_entry2 cpuid_entries[] = { 1786 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 1787 { .function = HYPERV_CPUID_INTERFACE }, 1788 { .function = HYPERV_CPUID_VERSION }, 1789 { .function = HYPERV_CPUID_FEATURES }, 1790 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 1791 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 1792 { .function = HYPERV_CPUID_NESTED_FEATURES }, 1793 }; 1794 int i, nent = ARRAY_SIZE(cpuid_entries); 1795 1796 /* Skip NESTED_FEATURES if eVMCS is not supported */ 1797 if (!evmcs_ver) 1798 --nent; 1799 1800 if (cpuid->nent < nent) 1801 return -E2BIG; 1802 1803 if (cpuid->nent > nent) 1804 cpuid->nent = nent; 1805 1806 for (i = 0; i < nent; i++) { 1807 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 1808 u32 signature[3]; 1809 1810 switch (ent->function) { 1811 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 1812 memcpy(signature, "Linux KVM Hv", 12); 1813 1814 ent->eax = HYPERV_CPUID_NESTED_FEATURES; 1815 ent->ebx = signature[0]; 1816 ent->ecx = signature[1]; 1817 ent->edx = signature[2]; 1818 break; 1819 1820 case HYPERV_CPUID_INTERFACE: 1821 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); 1822 ent->eax = signature[0]; 1823 break; 1824 1825 case HYPERV_CPUID_VERSION: 1826 /* 1827 * We implement some Hyper-V 2016 functions so let's use 1828 * this version. 1829 */ 1830 ent->eax = 0x00003839; 1831 ent->ebx = 0x000A0000; 1832 break; 1833 1834 case HYPERV_CPUID_FEATURES: 1835 ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE; 1836 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 1837 ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE; 1838 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 1839 ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; 1840 ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; 1841 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; 1842 ent->eax |= HV_X64_MSR_RESET_AVAILABLE; 1843 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 1844 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; 1845 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; 1846 1847 ent->ebx |= HV_X64_POST_MESSAGES; 1848 ent->ebx |= HV_X64_SIGNAL_EVENTS; 1849 1850 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 1851 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 1852 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 1853 1854 break; 1855 1856 case HYPERV_CPUID_ENLIGHTMENT_INFO: 1857 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 1858 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 1859 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 1860 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 1861 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 1862 if (evmcs_ver) 1863 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 1864 1865 /* 1866 * Default number of spinlock retry attempts, matches 1867 * HyperV 2016. 1868 */ 1869 ent->ebx = 0x00000FFF; 1870 1871 break; 1872 1873 case HYPERV_CPUID_IMPLEMENT_LIMITS: 1874 /* Maximum number of virtual processors */ 1875 ent->eax = KVM_MAX_VCPUS; 1876 /* 1877 * Maximum number of logical processors, matches 1878 * HyperV 2016. 1879 */ 1880 ent->ebx = 64; 1881 1882 break; 1883 1884 case HYPERV_CPUID_NESTED_FEATURES: 1885 ent->eax = evmcs_ver; 1886 1887 break; 1888 1889 default: 1890 break; 1891 } 1892 } 1893 1894 if (copy_to_user(entries, cpuid_entries, 1895 nent * sizeof(struct kvm_cpuid_entry2))) 1896 return -EFAULT; 1897 1898 return 0; 1899 } 1900