1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM Microsoft Hyper-V emulation 4 * 5 * derived from arch/x86/kvm/x86.c 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright (C) 2008 Qumranet, Inc. 9 * Copyright IBM Corporation, 2008 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 12 * 13 * Authors: 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> 16 * Amit Shah <amit.shah@qumranet.com> 17 * Ben-Ami Yassour <benami@il.ibm.com> 18 * Andrey Smetanin <asmetanin@virtuozzo.com> 19 */ 20 21 #include "x86.h" 22 #include "lapic.h" 23 #include "ioapic.h" 24 #include "hyperv.h" 25 26 #include <linux/cpu.h> 27 #include <linux/kvm_host.h> 28 #include <linux/highmem.h> 29 #include <linux/sched/cputime.h> 30 #include <linux/eventfd.h> 31 32 #include <asm/apicdef.h> 33 #include <trace/events/kvm.h> 34 35 #include "trace.h" 36 #include "irq.h" 37 38 #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 39 40 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 41 bool vcpu_kick); 42 43 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 44 { 45 return atomic64_read(&synic->sint[sint]); 46 } 47 48 static inline int synic_get_sint_vector(u64 sint_value) 49 { 50 if (sint_value & HV_SYNIC_SINT_MASKED) 51 return -1; 52 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 53 } 54 55 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 56 int vector) 57 { 58 int i; 59 60 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 61 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 62 return true; 63 } 64 return false; 65 } 66 67 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 68 int vector) 69 { 70 int i; 71 u64 sint_value; 72 73 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 74 sint_value = synic_read_sint(synic, i); 75 if (synic_get_sint_vector(sint_value) == vector && 76 sint_value & HV_SYNIC_SINT_AUTO_EOI) 77 return true; 78 } 79 return false; 80 } 81 82 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 83 int vector) 84 { 85 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 86 return; 87 88 if (synic_has_vector_connected(synic, vector)) 89 __set_bit(vector, synic->vec_bitmap); 90 else 91 __clear_bit(vector, synic->vec_bitmap); 92 93 if (synic_has_vector_auto_eoi(synic, vector)) 94 __set_bit(vector, synic->auto_eoi_bitmap); 95 else 96 __clear_bit(vector, synic->auto_eoi_bitmap); 97 } 98 99 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 100 u64 data, bool host) 101 { 102 int vector, old_vector; 103 bool masked; 104 105 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 106 masked = data & HV_SYNIC_SINT_MASKED; 107 108 /* 109 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 110 * default '0x10000' value on boot and this should not #GP. We need to 111 * allow zero-initing the register from host as well. 112 */ 113 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 114 return 1; 115 /* 116 * Guest may configure multiple SINTs to use the same vector, so 117 * we maintain a bitmap of vectors handled by synic, and a 118 * bitmap of vectors with auto-eoi behavior. The bitmaps are 119 * updated here, and atomically queried on fast paths. 120 */ 121 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 122 123 atomic64_set(&synic->sint[sint], data); 124 125 synic_update_vector(synic, old_vector); 126 127 synic_update_vector(synic, vector); 128 129 /* Load SynIC vectors into EOI exit bitmap */ 130 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 131 return 0; 132 } 133 134 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 135 { 136 struct kvm_vcpu *vcpu = NULL; 137 int i; 138 139 if (vpidx >= KVM_MAX_VCPUS) 140 return NULL; 141 142 vcpu = kvm_get_vcpu(kvm, vpidx); 143 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 144 return vcpu; 145 kvm_for_each_vcpu(i, vcpu, kvm) 146 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 147 return vcpu; 148 return NULL; 149 } 150 151 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 152 { 153 struct kvm_vcpu *vcpu; 154 struct kvm_vcpu_hv_synic *synic; 155 156 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 157 if (!vcpu) 158 return NULL; 159 synic = vcpu_to_synic(vcpu); 160 return (synic->active) ? synic : NULL; 161 } 162 163 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 164 { 165 struct kvm *kvm = vcpu->kvm; 166 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 167 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 168 struct kvm_vcpu_hv_stimer *stimer; 169 int gsi, idx; 170 171 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 172 173 /* Try to deliver pending Hyper-V SynIC timers messages */ 174 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 175 stimer = &hv_vcpu->stimer[idx]; 176 if (stimer->msg_pending && stimer->config.enable && 177 !stimer->config.direct_mode && 178 stimer->config.sintx == sint) 179 stimer_mark_pending(stimer, false); 180 } 181 182 idx = srcu_read_lock(&kvm->irq_srcu); 183 gsi = atomic_read(&synic->sint_to_gsi[sint]); 184 if (gsi != -1) 185 kvm_notify_acked_gsi(kvm, gsi); 186 srcu_read_unlock(&kvm->irq_srcu, idx); 187 } 188 189 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 190 { 191 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 192 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 193 194 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 195 hv_vcpu->exit.u.synic.msr = msr; 196 hv_vcpu->exit.u.synic.control = synic->control; 197 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 198 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 199 200 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 201 } 202 203 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 204 u32 msr, u64 data, bool host) 205 { 206 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 207 int ret; 208 209 if (!synic->active && !host) 210 return 1; 211 212 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 213 214 ret = 0; 215 switch (msr) { 216 case HV_X64_MSR_SCONTROL: 217 synic->control = data; 218 if (!host) 219 synic_exit(synic, msr); 220 break; 221 case HV_X64_MSR_SVERSION: 222 if (!host) { 223 ret = 1; 224 break; 225 } 226 synic->version = data; 227 break; 228 case HV_X64_MSR_SIEFP: 229 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 230 !synic->dont_zero_synic_pages) 231 if (kvm_clear_guest(vcpu->kvm, 232 data & PAGE_MASK, PAGE_SIZE)) { 233 ret = 1; 234 break; 235 } 236 synic->evt_page = data; 237 if (!host) 238 synic_exit(synic, msr); 239 break; 240 case HV_X64_MSR_SIMP: 241 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 242 !synic->dont_zero_synic_pages) 243 if (kvm_clear_guest(vcpu->kvm, 244 data & PAGE_MASK, PAGE_SIZE)) { 245 ret = 1; 246 break; 247 } 248 synic->msg_page = data; 249 if (!host) 250 synic_exit(synic, msr); 251 break; 252 case HV_X64_MSR_EOM: { 253 int i; 254 255 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 256 kvm_hv_notify_acked_sint(vcpu, i); 257 break; 258 } 259 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 260 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 261 break; 262 default: 263 ret = 1; 264 break; 265 } 266 return ret; 267 } 268 269 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 270 bool host) 271 { 272 int ret; 273 274 if (!synic->active && !host) 275 return 1; 276 277 ret = 0; 278 switch (msr) { 279 case HV_X64_MSR_SCONTROL: 280 *pdata = synic->control; 281 break; 282 case HV_X64_MSR_SVERSION: 283 *pdata = synic->version; 284 break; 285 case HV_X64_MSR_SIEFP: 286 *pdata = synic->evt_page; 287 break; 288 case HV_X64_MSR_SIMP: 289 *pdata = synic->msg_page; 290 break; 291 case HV_X64_MSR_EOM: 292 *pdata = 0; 293 break; 294 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 295 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 296 break; 297 default: 298 ret = 1; 299 break; 300 } 301 return ret; 302 } 303 304 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 305 { 306 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 307 struct kvm_lapic_irq irq; 308 int ret, vector; 309 310 if (sint >= ARRAY_SIZE(synic->sint)) 311 return -EINVAL; 312 313 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 314 if (vector < 0) 315 return -ENOENT; 316 317 memset(&irq, 0, sizeof(irq)); 318 irq.shorthand = APIC_DEST_SELF; 319 irq.dest_mode = APIC_DEST_PHYSICAL; 320 irq.delivery_mode = APIC_DM_FIXED; 321 irq.vector = vector; 322 irq.level = 1; 323 324 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 325 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 326 return ret; 327 } 328 329 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 330 { 331 struct kvm_vcpu_hv_synic *synic; 332 333 synic = synic_get(kvm, vpidx); 334 if (!synic) 335 return -EINVAL; 336 337 return synic_set_irq(synic, sint); 338 } 339 340 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 341 { 342 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 343 int i; 344 345 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 346 347 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 348 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 349 kvm_hv_notify_acked_sint(vcpu, i); 350 } 351 352 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 353 { 354 struct kvm_vcpu_hv_synic *synic; 355 356 synic = synic_get(kvm, vpidx); 357 if (!synic) 358 return -EINVAL; 359 360 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 361 return -EINVAL; 362 363 atomic_set(&synic->sint_to_gsi[sint], gsi); 364 return 0; 365 } 366 367 void kvm_hv_irq_routing_update(struct kvm *kvm) 368 { 369 struct kvm_irq_routing_table *irq_rt; 370 struct kvm_kernel_irq_routing_entry *e; 371 u32 gsi; 372 373 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 374 lockdep_is_held(&kvm->irq_lock)); 375 376 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 377 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 378 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 379 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 380 e->hv_sint.sint, gsi); 381 } 382 } 383 } 384 385 static void synic_init(struct kvm_vcpu_hv_synic *synic) 386 { 387 int i; 388 389 memset(synic, 0, sizeof(*synic)); 390 synic->version = HV_SYNIC_VERSION_1; 391 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 392 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 393 atomic_set(&synic->sint_to_gsi[i], -1); 394 } 395 } 396 397 static u64 get_time_ref_counter(struct kvm *kvm) 398 { 399 struct kvm_hv *hv = &kvm->arch.hyperv; 400 struct kvm_vcpu *vcpu; 401 u64 tsc; 402 403 /* 404 * The guest has not set up the TSC page or the clock isn't 405 * stable, fall back to get_kvmclock_ns. 406 */ 407 if (!hv->tsc_ref.tsc_sequence) 408 return div_u64(get_kvmclock_ns(kvm), 100); 409 410 vcpu = kvm_get_vcpu(kvm, 0); 411 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 412 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 413 + hv->tsc_ref.tsc_offset; 414 } 415 416 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 417 bool vcpu_kick) 418 { 419 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 420 421 set_bit(stimer->index, 422 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 423 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 424 if (vcpu_kick) 425 kvm_vcpu_kick(vcpu); 426 } 427 428 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 429 { 430 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 431 432 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 433 stimer->index); 434 435 hrtimer_cancel(&stimer->timer); 436 clear_bit(stimer->index, 437 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 438 stimer->msg_pending = false; 439 stimer->exp_time = 0; 440 } 441 442 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 443 { 444 struct kvm_vcpu_hv_stimer *stimer; 445 446 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 447 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 448 stimer->index); 449 stimer_mark_pending(stimer, true); 450 451 return HRTIMER_NORESTART; 452 } 453 454 /* 455 * stimer_start() assumptions: 456 * a) stimer->count is not equal to 0 457 * b) stimer->config has HV_STIMER_ENABLE flag 458 */ 459 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 460 { 461 u64 time_now; 462 ktime_t ktime_now; 463 464 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 465 ktime_now = ktime_get(); 466 467 if (stimer->config.periodic) { 468 if (stimer->exp_time) { 469 if (time_now >= stimer->exp_time) { 470 u64 remainder; 471 472 div64_u64_rem(time_now - stimer->exp_time, 473 stimer->count, &remainder); 474 stimer->exp_time = 475 time_now + (stimer->count - remainder); 476 } 477 } else 478 stimer->exp_time = time_now + stimer->count; 479 480 trace_kvm_hv_stimer_start_periodic( 481 stimer_to_vcpu(stimer)->vcpu_id, 482 stimer->index, 483 time_now, stimer->exp_time); 484 485 hrtimer_start(&stimer->timer, 486 ktime_add_ns(ktime_now, 487 100 * (stimer->exp_time - time_now)), 488 HRTIMER_MODE_ABS); 489 return 0; 490 } 491 stimer->exp_time = stimer->count; 492 if (time_now >= stimer->count) { 493 /* 494 * Expire timer according to Hypervisor Top-Level Functional 495 * specification v4(15.3.1): 496 * "If a one shot is enabled and the specified count is in 497 * the past, it will expire immediately." 498 */ 499 stimer_mark_pending(stimer, false); 500 return 0; 501 } 502 503 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 504 stimer->index, 505 time_now, stimer->count); 506 507 hrtimer_start(&stimer->timer, 508 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 509 HRTIMER_MODE_ABS); 510 return 0; 511 } 512 513 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 514 bool host) 515 { 516 union hv_stimer_config new_config = {.as_uint64 = config}, 517 old_config = {.as_uint64 = stimer->config.as_uint64}; 518 519 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 520 stimer->index, config, host); 521 522 stimer_cleanup(stimer); 523 if (old_config.enable && 524 !new_config.direct_mode && new_config.sintx == 0) 525 new_config.enable = 0; 526 stimer->config.as_uint64 = new_config.as_uint64; 527 528 if (stimer->config.enable) 529 stimer_mark_pending(stimer, false); 530 531 return 0; 532 } 533 534 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 535 bool host) 536 { 537 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 538 stimer->index, count, host); 539 540 stimer_cleanup(stimer); 541 stimer->count = count; 542 if (stimer->count == 0) 543 stimer->config.enable = 0; 544 else if (stimer->config.auto_enable) 545 stimer->config.enable = 1; 546 547 if (stimer->config.enable) 548 stimer_mark_pending(stimer, false); 549 550 return 0; 551 } 552 553 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 554 { 555 *pconfig = stimer->config.as_uint64; 556 return 0; 557 } 558 559 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 560 { 561 *pcount = stimer->count; 562 return 0; 563 } 564 565 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 566 struct hv_message *src_msg, bool no_retry) 567 { 568 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 569 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 570 gfn_t msg_page_gfn; 571 struct hv_message_header hv_hdr; 572 int r; 573 574 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 575 return -ENOENT; 576 577 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 578 579 /* 580 * Strictly following the spec-mandated ordering would assume setting 581 * .msg_pending before checking .message_type. However, this function 582 * is only called in vcpu context so the entire update is atomic from 583 * guest POV and thus the exact order here doesn't matter. 584 */ 585 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 586 msg_off + offsetof(struct hv_message, 587 header.message_type), 588 sizeof(hv_hdr.message_type)); 589 if (r < 0) 590 return r; 591 592 if (hv_hdr.message_type != HVMSG_NONE) { 593 if (no_retry) 594 return 0; 595 596 hv_hdr.message_flags.msg_pending = 1; 597 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 598 &hv_hdr.message_flags, 599 msg_off + 600 offsetof(struct hv_message, 601 header.message_flags), 602 sizeof(hv_hdr.message_flags)); 603 if (r < 0) 604 return r; 605 return -EAGAIN; 606 } 607 608 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 609 sizeof(src_msg->header) + 610 src_msg->header.payload_size); 611 if (r < 0) 612 return r; 613 614 r = synic_set_irq(synic, sint); 615 if (r < 0) 616 return r; 617 if (r == 0) 618 return -EFAULT; 619 return 0; 620 } 621 622 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 623 { 624 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 625 struct hv_message *msg = &stimer->msg; 626 struct hv_timer_message_payload *payload = 627 (struct hv_timer_message_payload *)&msg->u.payload; 628 629 /* 630 * To avoid piling up periodic ticks, don't retry message 631 * delivery for them (within "lazy" lost ticks policy). 632 */ 633 bool no_retry = stimer->config.periodic; 634 635 payload->expiration_time = stimer->exp_time; 636 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 637 return synic_deliver_msg(vcpu_to_synic(vcpu), 638 stimer->config.sintx, msg, 639 no_retry); 640 } 641 642 static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 643 { 644 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 645 struct kvm_lapic_irq irq = { 646 .delivery_mode = APIC_DM_FIXED, 647 .vector = stimer->config.apic_vector 648 }; 649 650 if (lapic_in_kernel(vcpu)) 651 return !kvm_apic_set_irq(vcpu, &irq, NULL); 652 return 0; 653 } 654 655 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 656 { 657 int r, direct = stimer->config.direct_mode; 658 659 stimer->msg_pending = true; 660 if (!direct) 661 r = stimer_send_msg(stimer); 662 else 663 r = stimer_notify_direct(stimer); 664 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 665 stimer->index, direct, r); 666 if (!r) { 667 stimer->msg_pending = false; 668 if (!(stimer->config.periodic)) 669 stimer->config.enable = 0; 670 } 671 } 672 673 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 674 { 675 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 676 struct kvm_vcpu_hv_stimer *stimer; 677 u64 time_now, exp_time; 678 int i; 679 680 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 681 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 682 stimer = &hv_vcpu->stimer[i]; 683 if (stimer->config.enable) { 684 exp_time = stimer->exp_time; 685 686 if (exp_time) { 687 time_now = 688 get_time_ref_counter(vcpu->kvm); 689 if (time_now >= exp_time) 690 stimer_expiration(stimer); 691 } 692 693 if ((stimer->config.enable) && 694 stimer->count) { 695 if (!stimer->msg_pending) 696 stimer_start(stimer); 697 } else 698 stimer_cleanup(stimer); 699 } 700 } 701 } 702 703 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 704 { 705 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 706 int i; 707 708 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 709 stimer_cleanup(&hv_vcpu->stimer[i]); 710 } 711 712 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 713 { 714 if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 715 return false; 716 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 717 } 718 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 719 720 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 721 struct hv_vp_assist_page *assist_page) 722 { 723 if (!kvm_hv_assist_page_enabled(vcpu)) 724 return false; 725 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 726 assist_page, sizeof(*assist_page)); 727 } 728 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 729 730 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 731 { 732 struct hv_message *msg = &stimer->msg; 733 struct hv_timer_message_payload *payload = 734 (struct hv_timer_message_payload *)&msg->u.payload; 735 736 memset(&msg->header, 0, sizeof(msg->header)); 737 msg->header.message_type = HVMSG_TIMER_EXPIRED; 738 msg->header.payload_size = sizeof(*payload); 739 740 payload->timer_index = stimer->index; 741 payload->expiration_time = 0; 742 payload->delivery_time = 0; 743 } 744 745 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 746 { 747 memset(stimer, 0, sizeof(*stimer)); 748 stimer->index = timer_index; 749 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 750 stimer->timer.function = stimer_timer_callback; 751 stimer_prepare_msg(stimer); 752 } 753 754 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 755 { 756 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 757 int i; 758 759 synic_init(&hv_vcpu->synic); 760 761 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 762 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 763 stimer_init(&hv_vcpu->stimer[i], i); 764 } 765 766 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 767 { 768 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 769 770 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 771 } 772 773 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 774 { 775 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 776 777 /* 778 * Hyper-V SynIC auto EOI SINT's are 779 * not compatible with APICV, so deactivate APICV 780 */ 781 kvm_vcpu_deactivate_apicv(vcpu); 782 synic->active = true; 783 synic->dont_zero_synic_pages = dont_zero_synic_pages; 784 return 0; 785 } 786 787 static bool kvm_hv_msr_partition_wide(u32 msr) 788 { 789 bool r = false; 790 791 switch (msr) { 792 case HV_X64_MSR_GUEST_OS_ID: 793 case HV_X64_MSR_HYPERCALL: 794 case HV_X64_MSR_REFERENCE_TSC: 795 case HV_X64_MSR_TIME_REF_COUNT: 796 case HV_X64_MSR_CRASH_CTL: 797 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 798 case HV_X64_MSR_RESET: 799 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 800 case HV_X64_MSR_TSC_EMULATION_CONTROL: 801 case HV_X64_MSR_TSC_EMULATION_STATUS: 802 r = true; 803 break; 804 } 805 806 return r; 807 } 808 809 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 810 u32 index, u64 *pdata) 811 { 812 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 813 size_t size = ARRAY_SIZE(hv->hv_crash_param); 814 815 if (WARN_ON_ONCE(index >= size)) 816 return -EINVAL; 817 818 *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; 819 return 0; 820 } 821 822 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 823 { 824 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 825 826 *pdata = hv->hv_crash_ctl; 827 return 0; 828 } 829 830 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 831 { 832 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 833 834 if (host) 835 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 836 837 if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) { 838 839 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 840 hv->hv_crash_param[0], 841 hv->hv_crash_param[1], 842 hv->hv_crash_param[2], 843 hv->hv_crash_param[3], 844 hv->hv_crash_param[4]); 845 846 /* Send notification about crash to user space */ 847 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 848 } 849 850 return 0; 851 } 852 853 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 854 u32 index, u64 data) 855 { 856 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 857 size_t size = ARRAY_SIZE(hv->hv_crash_param); 858 859 if (WARN_ON_ONCE(index >= size)) 860 return -EINVAL; 861 862 hv->hv_crash_param[array_index_nospec(index, size)] = data; 863 return 0; 864 } 865 866 /* 867 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 868 * between them is possible: 869 * 870 * kvmclock formula: 871 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 872 * + system_time 873 * 874 * Hyper-V formula: 875 * nsec/100 = ticks * scale / 2^64 + offset 876 * 877 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 878 * By dividing the kvmclock formula by 100 and equating what's left we get: 879 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 880 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 881 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 882 * 883 * Now expand the kvmclock formula and divide by 100: 884 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 885 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 886 * + system_time 887 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 888 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 889 * + system_time / 100 890 * 891 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 892 * nsec/100 = ticks * scale / 2^64 893 * - tsc_timestamp * scale / 2^64 894 * + system_time / 100 895 * 896 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 897 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 898 * 899 * These two equivalencies are implemented in this function. 900 */ 901 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 902 HV_REFERENCE_TSC_PAGE *tsc_ref) 903 { 904 u64 max_mul; 905 906 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 907 return false; 908 909 /* 910 * check if scale would overflow, if so we use the time ref counter 911 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 912 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 913 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 914 */ 915 max_mul = 100ull << (32 - hv_clock->tsc_shift); 916 if (hv_clock->tsc_to_system_mul >= max_mul) 917 return false; 918 919 /* 920 * Otherwise compute the scale and offset according to the formulas 921 * derived above. 922 */ 923 tsc_ref->tsc_scale = 924 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 925 hv_clock->tsc_to_system_mul, 926 100); 927 928 tsc_ref->tsc_offset = hv_clock->system_time; 929 do_div(tsc_ref->tsc_offset, 100); 930 tsc_ref->tsc_offset -= 931 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 932 return true; 933 } 934 935 void kvm_hv_setup_tsc_page(struct kvm *kvm, 936 struct pvclock_vcpu_time_info *hv_clock) 937 { 938 struct kvm_hv *hv = &kvm->arch.hyperv; 939 u32 tsc_seq; 940 u64 gfn; 941 942 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 943 BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); 944 945 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 946 return; 947 948 mutex_lock(&kvm->arch.hyperv.hv_lock); 949 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 950 goto out_unlock; 951 952 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 953 /* 954 * Because the TSC parameters only vary when there is a 955 * change in the master clock, do not bother with caching. 956 */ 957 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 958 &tsc_seq, sizeof(tsc_seq)))) 959 goto out_unlock; 960 961 /* 962 * While we're computing and writing the parameters, force the 963 * guest to use the time reference count MSR. 964 */ 965 hv->tsc_ref.tsc_sequence = 0; 966 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 967 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 968 goto out_unlock; 969 970 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 971 goto out_unlock; 972 973 /* Ensure sequence is zero before writing the rest of the struct. */ 974 smp_wmb(); 975 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 976 goto out_unlock; 977 978 /* 979 * Now switch to the TSC page mechanism by writing the sequence. 980 */ 981 tsc_seq++; 982 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 983 tsc_seq = 1; 984 985 /* Write the struct entirely before the non-zero sequence. */ 986 smp_wmb(); 987 988 hv->tsc_ref.tsc_sequence = tsc_seq; 989 kvm_write_guest(kvm, gfn_to_gpa(gfn), 990 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 991 out_unlock: 992 mutex_unlock(&kvm->arch.hyperv.hv_lock); 993 } 994 995 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 996 bool host) 997 { 998 struct kvm *kvm = vcpu->kvm; 999 struct kvm_hv *hv = &kvm->arch.hyperv; 1000 1001 switch (msr) { 1002 case HV_X64_MSR_GUEST_OS_ID: 1003 hv->hv_guest_os_id = data; 1004 /* setting guest os id to zero disables hypercall page */ 1005 if (!hv->hv_guest_os_id) 1006 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1007 break; 1008 case HV_X64_MSR_HYPERCALL: { 1009 u64 gfn; 1010 unsigned long addr; 1011 u8 instructions[4]; 1012 1013 /* if guest os id is not set hypercall should remain disabled */ 1014 if (!hv->hv_guest_os_id) 1015 break; 1016 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1017 hv->hv_hypercall = data; 1018 break; 1019 } 1020 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1021 addr = gfn_to_hva(kvm, gfn); 1022 if (kvm_is_error_hva(addr)) 1023 return 1; 1024 kvm_x86_ops->patch_hypercall(vcpu, instructions); 1025 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1026 if (__copy_to_user((void __user *)addr, instructions, 4)) 1027 return 1; 1028 hv->hv_hypercall = data; 1029 mark_page_dirty(kvm, gfn); 1030 break; 1031 } 1032 case HV_X64_MSR_REFERENCE_TSC: 1033 hv->hv_tsc_page = data; 1034 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1035 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1036 break; 1037 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1038 return kvm_hv_msr_set_crash_data(vcpu, 1039 msr - HV_X64_MSR_CRASH_P0, 1040 data); 1041 case HV_X64_MSR_CRASH_CTL: 1042 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1043 case HV_X64_MSR_RESET: 1044 if (data == 1) { 1045 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1046 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1047 } 1048 break; 1049 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1050 hv->hv_reenlightenment_control = data; 1051 break; 1052 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1053 hv->hv_tsc_emulation_control = data; 1054 break; 1055 case HV_X64_MSR_TSC_EMULATION_STATUS: 1056 hv->hv_tsc_emulation_status = data; 1057 break; 1058 case HV_X64_MSR_TIME_REF_COUNT: 1059 /* read-only, but still ignore it if host-initiated */ 1060 if (!host) 1061 return 1; 1062 break; 1063 default: 1064 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1065 msr, data); 1066 return 1; 1067 } 1068 return 0; 1069 } 1070 1071 /* Calculate cpu time spent by current task in 100ns units */ 1072 static u64 current_task_runtime_100ns(void) 1073 { 1074 u64 utime, stime; 1075 1076 task_cputime_adjusted(current, &utime, &stime); 1077 1078 return div_u64(utime + stime, 100); 1079 } 1080 1081 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1082 { 1083 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1084 1085 switch (msr) { 1086 case HV_X64_MSR_VP_INDEX: { 1087 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 1088 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1089 u32 new_vp_index = (u32)data; 1090 1091 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1092 return 1; 1093 1094 if (new_vp_index == hv_vcpu->vp_index) 1095 return 0; 1096 1097 /* 1098 * The VP index is initialized to vcpu_index by 1099 * kvm_hv_vcpu_postcreate so they initially match. Now the 1100 * VP index is changing, adjust num_mismatched_vp_indexes if 1101 * it now matches or no longer matches vcpu_idx. 1102 */ 1103 if (hv_vcpu->vp_index == vcpu_idx) 1104 atomic_inc(&hv->num_mismatched_vp_indexes); 1105 else if (new_vp_index == vcpu_idx) 1106 atomic_dec(&hv->num_mismatched_vp_indexes); 1107 1108 hv_vcpu->vp_index = new_vp_index; 1109 break; 1110 } 1111 case HV_X64_MSR_VP_ASSIST_PAGE: { 1112 u64 gfn; 1113 unsigned long addr; 1114 1115 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1116 hv_vcpu->hv_vapic = data; 1117 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1118 return 1; 1119 break; 1120 } 1121 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1122 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1123 if (kvm_is_error_hva(addr)) 1124 return 1; 1125 1126 /* 1127 * Clear apic_assist portion of struct hv_vp_assist_page 1128 * only, there can be valuable data in the rest which needs 1129 * to be preserved e.g. on migration. 1130 */ 1131 if (__clear_user((void __user *)addr, sizeof(u32))) 1132 return 1; 1133 hv_vcpu->hv_vapic = data; 1134 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1135 if (kvm_lapic_enable_pv_eoi(vcpu, 1136 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1137 sizeof(struct hv_vp_assist_page))) 1138 return 1; 1139 break; 1140 } 1141 case HV_X64_MSR_EOI: 1142 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1143 case HV_X64_MSR_ICR: 1144 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1145 case HV_X64_MSR_TPR: 1146 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1147 case HV_X64_MSR_VP_RUNTIME: 1148 if (!host) 1149 return 1; 1150 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1151 break; 1152 case HV_X64_MSR_SCONTROL: 1153 case HV_X64_MSR_SVERSION: 1154 case HV_X64_MSR_SIEFP: 1155 case HV_X64_MSR_SIMP: 1156 case HV_X64_MSR_EOM: 1157 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1158 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1159 case HV_X64_MSR_STIMER0_CONFIG: 1160 case HV_X64_MSR_STIMER1_CONFIG: 1161 case HV_X64_MSR_STIMER2_CONFIG: 1162 case HV_X64_MSR_STIMER3_CONFIG: { 1163 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1164 1165 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1166 data, host); 1167 } 1168 case HV_X64_MSR_STIMER0_COUNT: 1169 case HV_X64_MSR_STIMER1_COUNT: 1170 case HV_X64_MSR_STIMER2_COUNT: 1171 case HV_X64_MSR_STIMER3_COUNT: { 1172 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1173 1174 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1175 data, host); 1176 } 1177 case HV_X64_MSR_TSC_FREQUENCY: 1178 case HV_X64_MSR_APIC_FREQUENCY: 1179 /* read-only, but still ignore it if host-initiated */ 1180 if (!host) 1181 return 1; 1182 break; 1183 default: 1184 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1185 msr, data); 1186 return 1; 1187 } 1188 1189 return 0; 1190 } 1191 1192 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1193 { 1194 u64 data = 0; 1195 struct kvm *kvm = vcpu->kvm; 1196 struct kvm_hv *hv = &kvm->arch.hyperv; 1197 1198 switch (msr) { 1199 case HV_X64_MSR_GUEST_OS_ID: 1200 data = hv->hv_guest_os_id; 1201 break; 1202 case HV_X64_MSR_HYPERCALL: 1203 data = hv->hv_hypercall; 1204 break; 1205 case HV_X64_MSR_TIME_REF_COUNT: 1206 data = get_time_ref_counter(kvm); 1207 break; 1208 case HV_X64_MSR_REFERENCE_TSC: 1209 data = hv->hv_tsc_page; 1210 break; 1211 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1212 return kvm_hv_msr_get_crash_data(vcpu, 1213 msr - HV_X64_MSR_CRASH_P0, 1214 pdata); 1215 case HV_X64_MSR_CRASH_CTL: 1216 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1217 case HV_X64_MSR_RESET: 1218 data = 0; 1219 break; 1220 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1221 data = hv->hv_reenlightenment_control; 1222 break; 1223 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1224 data = hv->hv_tsc_emulation_control; 1225 break; 1226 case HV_X64_MSR_TSC_EMULATION_STATUS: 1227 data = hv->hv_tsc_emulation_status; 1228 break; 1229 default: 1230 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1231 return 1; 1232 } 1233 1234 *pdata = data; 1235 return 0; 1236 } 1237 1238 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1239 bool host) 1240 { 1241 u64 data = 0; 1242 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1243 1244 switch (msr) { 1245 case HV_X64_MSR_VP_INDEX: 1246 data = hv_vcpu->vp_index; 1247 break; 1248 case HV_X64_MSR_EOI: 1249 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1250 case HV_X64_MSR_ICR: 1251 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1252 case HV_X64_MSR_TPR: 1253 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1254 case HV_X64_MSR_VP_ASSIST_PAGE: 1255 data = hv_vcpu->hv_vapic; 1256 break; 1257 case HV_X64_MSR_VP_RUNTIME: 1258 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1259 break; 1260 case HV_X64_MSR_SCONTROL: 1261 case HV_X64_MSR_SVERSION: 1262 case HV_X64_MSR_SIEFP: 1263 case HV_X64_MSR_SIMP: 1264 case HV_X64_MSR_EOM: 1265 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1266 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1267 case HV_X64_MSR_STIMER0_CONFIG: 1268 case HV_X64_MSR_STIMER1_CONFIG: 1269 case HV_X64_MSR_STIMER2_CONFIG: 1270 case HV_X64_MSR_STIMER3_CONFIG: { 1271 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1272 1273 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1274 pdata); 1275 } 1276 case HV_X64_MSR_STIMER0_COUNT: 1277 case HV_X64_MSR_STIMER1_COUNT: 1278 case HV_X64_MSR_STIMER2_COUNT: 1279 case HV_X64_MSR_STIMER3_COUNT: { 1280 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1281 1282 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1283 pdata); 1284 } 1285 case HV_X64_MSR_TSC_FREQUENCY: 1286 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1287 break; 1288 case HV_X64_MSR_APIC_FREQUENCY: 1289 data = APIC_BUS_FREQUENCY; 1290 break; 1291 default: 1292 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1293 return 1; 1294 } 1295 *pdata = data; 1296 return 0; 1297 } 1298 1299 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1300 { 1301 if (kvm_hv_msr_partition_wide(msr)) { 1302 int r; 1303 1304 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1305 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1306 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1307 return r; 1308 } else 1309 return kvm_hv_set_msr(vcpu, msr, data, host); 1310 } 1311 1312 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1313 { 1314 if (kvm_hv_msr_partition_wide(msr)) { 1315 int r; 1316 1317 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1318 r = kvm_hv_get_msr_pw(vcpu, msr, pdata); 1319 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1320 return r; 1321 } else 1322 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1323 } 1324 1325 static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1326 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1327 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1328 { 1329 struct kvm_hv *hv = &kvm->arch.hyperv; 1330 struct kvm_vcpu *vcpu; 1331 int i, bank, sbank = 0; 1332 1333 memset(vp_bitmap, 0, 1334 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1335 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1336 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1337 vp_bitmap[bank] = sparse_banks[sbank++]; 1338 1339 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1340 /* for all vcpus vp_index == vcpu_idx */ 1341 return (unsigned long *)vp_bitmap; 1342 } 1343 1344 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1345 kvm_for_each_vcpu(i, vcpu, kvm) { 1346 if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, 1347 (unsigned long *)vp_bitmap)) 1348 __set_bit(i, vcpu_bitmap); 1349 } 1350 return vcpu_bitmap; 1351 } 1352 1353 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1354 u16 rep_cnt, bool ex) 1355 { 1356 struct kvm *kvm = current_vcpu->kvm; 1357 struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; 1358 struct hv_tlb_flush_ex flush_ex; 1359 struct hv_tlb_flush flush; 1360 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1361 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1362 unsigned long *vcpu_mask; 1363 u64 valid_bank_mask; 1364 u64 sparse_banks[64]; 1365 int sparse_banks_len; 1366 bool all_cpus; 1367 1368 if (!ex) { 1369 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1370 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1371 1372 trace_kvm_hv_flush_tlb(flush.processor_mask, 1373 flush.address_space, flush.flags); 1374 1375 valid_bank_mask = BIT_ULL(0); 1376 sparse_banks[0] = flush.processor_mask; 1377 1378 /* 1379 * Work around possible WS2012 bug: it sends hypercalls 1380 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, 1381 * while also expecting us to flush something and crashing if 1382 * we don't. Let's treat processor_mask == 0 same as 1383 * HV_FLUSH_ALL_PROCESSORS. 1384 */ 1385 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || 1386 flush.processor_mask == 0; 1387 } else { 1388 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1389 sizeof(flush_ex)))) 1390 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1391 1392 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1393 flush_ex.hv_vp_set.format, 1394 flush_ex.address_space, 1395 flush_ex.flags); 1396 1397 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1398 all_cpus = flush_ex.hv_vp_set.format != 1399 HV_GENERIC_SET_SPARSE_4K; 1400 1401 sparse_banks_len = 1402 bitmap_weight((unsigned long *)&valid_bank_mask, 64) * 1403 sizeof(sparse_banks[0]); 1404 1405 if (!sparse_banks_len && !all_cpus) 1406 goto ret_success; 1407 1408 if (!all_cpus && 1409 kvm_read_guest(kvm, 1410 ingpa + offsetof(struct hv_tlb_flush_ex, 1411 hv_vp_set.bank_contents), 1412 sparse_banks, 1413 sparse_banks_len)) 1414 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1415 } 1416 1417 cpumask_clear(&hv_vcpu->tlb_flush); 1418 1419 vcpu_mask = all_cpus ? NULL : 1420 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1421 vp_bitmap, vcpu_bitmap); 1422 1423 /* 1424 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1425 * analyze it here, flush TLB regardless of the specified address space. 1426 */ 1427 kvm_make_vcpus_request_mask(kvm, 1428 KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, 1429 vcpu_mask, &hv_vcpu->tlb_flush); 1430 1431 ret_success: 1432 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1433 return (u64)HV_STATUS_SUCCESS | 1434 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1435 } 1436 1437 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1438 unsigned long *vcpu_bitmap) 1439 { 1440 struct kvm_lapic_irq irq = { 1441 .delivery_mode = APIC_DM_FIXED, 1442 .vector = vector 1443 }; 1444 struct kvm_vcpu *vcpu; 1445 int i; 1446 1447 kvm_for_each_vcpu(i, vcpu, kvm) { 1448 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1449 continue; 1450 1451 /* We fail only when APIC is disabled */ 1452 kvm_apic_set_irq(vcpu, &irq, NULL); 1453 } 1454 } 1455 1456 static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, 1457 bool ex, bool fast) 1458 { 1459 struct kvm *kvm = current_vcpu->kvm; 1460 struct hv_send_ipi_ex send_ipi_ex; 1461 struct hv_send_ipi send_ipi; 1462 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1463 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1464 unsigned long *vcpu_mask; 1465 unsigned long valid_bank_mask; 1466 u64 sparse_banks[64]; 1467 int sparse_banks_len; 1468 u32 vector; 1469 bool all_cpus; 1470 1471 if (!ex) { 1472 if (!fast) { 1473 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, 1474 sizeof(send_ipi)))) 1475 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1476 sparse_banks[0] = send_ipi.cpu_mask; 1477 vector = send_ipi.vector; 1478 } else { 1479 /* 'reserved' part of hv_send_ipi should be 0 */ 1480 if (unlikely(ingpa >> 32 != 0)) 1481 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1482 sparse_banks[0] = outgpa; 1483 vector = (u32)ingpa; 1484 } 1485 all_cpus = false; 1486 valid_bank_mask = BIT_ULL(0); 1487 1488 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1489 } else { 1490 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, 1491 sizeof(send_ipi_ex)))) 1492 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1493 1494 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1495 send_ipi_ex.vp_set.format, 1496 send_ipi_ex.vp_set.valid_bank_mask); 1497 1498 vector = send_ipi_ex.vector; 1499 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1500 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1501 sizeof(sparse_banks[0]); 1502 1503 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1504 1505 if (!sparse_banks_len) 1506 goto ret_success; 1507 1508 if (!all_cpus && 1509 kvm_read_guest(kvm, 1510 ingpa + offsetof(struct hv_send_ipi_ex, 1511 vp_set.bank_contents), 1512 sparse_banks, 1513 sparse_banks_len)) 1514 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1515 } 1516 1517 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1518 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1519 1520 vcpu_mask = all_cpus ? NULL : 1521 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1522 vp_bitmap, vcpu_bitmap); 1523 1524 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1525 1526 ret_success: 1527 return HV_STATUS_SUCCESS; 1528 } 1529 1530 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1531 { 1532 return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; 1533 } 1534 1535 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1536 { 1537 bool longmode; 1538 1539 longmode = is_64_bit_mode(vcpu); 1540 if (longmode) 1541 kvm_rax_write(vcpu, result); 1542 else { 1543 kvm_rdx_write(vcpu, result >> 32); 1544 kvm_rax_write(vcpu, result & 0xffffffff); 1545 } 1546 } 1547 1548 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1549 { 1550 kvm_hv_hypercall_set_result(vcpu, result); 1551 ++vcpu->stat.hypercalls; 1552 return kvm_skip_emulated_instruction(vcpu); 1553 } 1554 1555 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1556 { 1557 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1558 } 1559 1560 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1561 { 1562 struct eventfd_ctx *eventfd; 1563 1564 if (unlikely(!fast)) { 1565 int ret; 1566 gpa_t gpa = param; 1567 1568 if ((gpa & (__alignof__(param) - 1)) || 1569 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1570 return HV_STATUS_INVALID_ALIGNMENT; 1571 1572 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1573 if (ret < 0) 1574 return HV_STATUS_INVALID_ALIGNMENT; 1575 } 1576 1577 /* 1578 * Per spec, bits 32-47 contain the extra "flag number". However, we 1579 * have no use for it, and in all known usecases it is zero, so just 1580 * report lookup failure if it isn't. 1581 */ 1582 if (param & 0xffff00000000ULL) 1583 return HV_STATUS_INVALID_PORT_ID; 1584 /* remaining bits are reserved-zero */ 1585 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1586 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1587 1588 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1589 rcu_read_lock(); 1590 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1591 rcu_read_unlock(); 1592 if (!eventfd) 1593 return HV_STATUS_INVALID_PORT_ID; 1594 1595 eventfd_signal(eventfd, 1); 1596 return HV_STATUS_SUCCESS; 1597 } 1598 1599 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1600 { 1601 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1602 uint16_t code, rep_idx, rep_cnt; 1603 bool fast, rep; 1604 1605 /* 1606 * hypercall generates UD from non zero cpl and real mode 1607 * per HYPER-V spec 1608 */ 1609 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1610 kvm_queue_exception(vcpu, UD_VECTOR); 1611 return 1; 1612 } 1613 1614 #ifdef CONFIG_X86_64 1615 if (is_64_bit_mode(vcpu)) { 1616 param = kvm_rcx_read(vcpu); 1617 ingpa = kvm_rdx_read(vcpu); 1618 outgpa = kvm_r8_read(vcpu); 1619 } else 1620 #endif 1621 { 1622 param = ((u64)kvm_rdx_read(vcpu) << 32) | 1623 (kvm_rax_read(vcpu) & 0xffffffff); 1624 ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | 1625 (kvm_rcx_read(vcpu) & 0xffffffff); 1626 outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1627 (kvm_rsi_read(vcpu) & 0xffffffff); 1628 } 1629 1630 code = param & 0xffff; 1631 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1632 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1633 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1634 rep = !!(rep_cnt || rep_idx); 1635 1636 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1637 1638 switch (code) { 1639 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1640 if (unlikely(rep)) { 1641 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1642 break; 1643 } 1644 kvm_vcpu_on_spin(vcpu, true); 1645 break; 1646 case HVCALL_SIGNAL_EVENT: 1647 if (unlikely(rep)) { 1648 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1649 break; 1650 } 1651 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1652 if (ret != HV_STATUS_INVALID_PORT_ID) 1653 break; 1654 /* fall through - maybe userspace knows this conn_id. */ 1655 case HVCALL_POST_MESSAGE: 1656 /* don't bother userspace if it has no way to handle it */ 1657 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1658 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1659 break; 1660 } 1661 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1662 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1663 vcpu->run->hyperv.u.hcall.input = param; 1664 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1665 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1666 vcpu->arch.complete_userspace_io = 1667 kvm_hv_hypercall_complete_userspace; 1668 return 0; 1669 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1670 if (unlikely(fast || !rep_cnt || rep_idx)) { 1671 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1672 break; 1673 } 1674 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1675 break; 1676 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1677 if (unlikely(fast || rep)) { 1678 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1679 break; 1680 } 1681 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1682 break; 1683 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1684 if (unlikely(fast || !rep_cnt || rep_idx)) { 1685 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1686 break; 1687 } 1688 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1689 break; 1690 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1691 if (unlikely(fast || rep)) { 1692 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1693 break; 1694 } 1695 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1696 break; 1697 case HVCALL_SEND_IPI: 1698 if (unlikely(rep)) { 1699 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1700 break; 1701 } 1702 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); 1703 break; 1704 case HVCALL_SEND_IPI_EX: 1705 if (unlikely(fast || rep)) { 1706 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1707 break; 1708 } 1709 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); 1710 break; 1711 default: 1712 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1713 break; 1714 } 1715 1716 return kvm_hv_hypercall_complete(vcpu, ret); 1717 } 1718 1719 void kvm_hv_init_vm(struct kvm *kvm) 1720 { 1721 mutex_init(&kvm->arch.hyperv.hv_lock); 1722 idr_init(&kvm->arch.hyperv.conn_to_evt); 1723 } 1724 1725 void kvm_hv_destroy_vm(struct kvm *kvm) 1726 { 1727 struct eventfd_ctx *eventfd; 1728 int i; 1729 1730 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1731 eventfd_ctx_put(eventfd); 1732 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1733 } 1734 1735 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1736 { 1737 struct kvm_hv *hv = &kvm->arch.hyperv; 1738 struct eventfd_ctx *eventfd; 1739 int ret; 1740 1741 eventfd = eventfd_ctx_fdget(fd); 1742 if (IS_ERR(eventfd)) 1743 return PTR_ERR(eventfd); 1744 1745 mutex_lock(&hv->hv_lock); 1746 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1747 GFP_KERNEL_ACCOUNT); 1748 mutex_unlock(&hv->hv_lock); 1749 1750 if (ret >= 0) 1751 return 0; 1752 1753 if (ret == -ENOSPC) 1754 ret = -EEXIST; 1755 eventfd_ctx_put(eventfd); 1756 return ret; 1757 } 1758 1759 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1760 { 1761 struct kvm_hv *hv = &kvm->arch.hyperv; 1762 struct eventfd_ctx *eventfd; 1763 1764 mutex_lock(&hv->hv_lock); 1765 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1766 mutex_unlock(&hv->hv_lock); 1767 1768 if (!eventfd) 1769 return -ENOENT; 1770 1771 synchronize_srcu(&kvm->srcu); 1772 eventfd_ctx_put(eventfd); 1773 return 0; 1774 } 1775 1776 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1777 { 1778 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1779 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1780 return -EINVAL; 1781 1782 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1783 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1784 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1785 } 1786 1787 int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 1788 struct kvm_cpuid_entry2 __user *entries) 1789 { 1790 uint16_t evmcs_ver = 0; 1791 struct kvm_cpuid_entry2 cpuid_entries[] = { 1792 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 1793 { .function = HYPERV_CPUID_INTERFACE }, 1794 { .function = HYPERV_CPUID_VERSION }, 1795 { .function = HYPERV_CPUID_FEATURES }, 1796 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 1797 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 1798 { .function = HYPERV_CPUID_NESTED_FEATURES }, 1799 }; 1800 int i, nent = ARRAY_SIZE(cpuid_entries); 1801 1802 if (kvm_x86_ops->nested_get_evmcs_version) 1803 evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu); 1804 1805 /* Skip NESTED_FEATURES if eVMCS is not supported */ 1806 if (!evmcs_ver) 1807 --nent; 1808 1809 if (cpuid->nent < nent) 1810 return -E2BIG; 1811 1812 if (cpuid->nent > nent) 1813 cpuid->nent = nent; 1814 1815 for (i = 0; i < nent; i++) { 1816 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 1817 u32 signature[3]; 1818 1819 switch (ent->function) { 1820 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 1821 memcpy(signature, "Linux KVM Hv", 12); 1822 1823 ent->eax = HYPERV_CPUID_NESTED_FEATURES; 1824 ent->ebx = signature[0]; 1825 ent->ecx = signature[1]; 1826 ent->edx = signature[2]; 1827 break; 1828 1829 case HYPERV_CPUID_INTERFACE: 1830 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); 1831 ent->eax = signature[0]; 1832 break; 1833 1834 case HYPERV_CPUID_VERSION: 1835 /* 1836 * We implement some Hyper-V 2016 functions so let's use 1837 * this version. 1838 */ 1839 ent->eax = 0x00003839; 1840 ent->ebx = 0x000A0000; 1841 break; 1842 1843 case HYPERV_CPUID_FEATURES: 1844 ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE; 1845 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 1846 ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE; 1847 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 1848 ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; 1849 ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; 1850 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; 1851 ent->eax |= HV_X64_MSR_RESET_AVAILABLE; 1852 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 1853 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; 1854 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; 1855 1856 ent->ebx |= HV_X64_POST_MESSAGES; 1857 ent->ebx |= HV_X64_SIGNAL_EVENTS; 1858 1859 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 1860 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 1861 1862 /* 1863 * Direct Synthetic timers only make sense with in-kernel 1864 * LAPIC 1865 */ 1866 if (lapic_in_kernel(vcpu)) 1867 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 1868 1869 break; 1870 1871 case HYPERV_CPUID_ENLIGHTMENT_INFO: 1872 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 1873 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 1874 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 1875 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 1876 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 1877 if (evmcs_ver) 1878 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 1879 if (!cpu_smt_possible()) 1880 ent->eax |= HV_X64_NO_NONARCH_CORESHARING; 1881 /* 1882 * Default number of spinlock retry attempts, matches 1883 * HyperV 2016. 1884 */ 1885 ent->ebx = 0x00000FFF; 1886 1887 break; 1888 1889 case HYPERV_CPUID_IMPLEMENT_LIMITS: 1890 /* Maximum number of virtual processors */ 1891 ent->eax = KVM_MAX_VCPUS; 1892 /* 1893 * Maximum number of logical processors, matches 1894 * HyperV 2016. 1895 */ 1896 ent->ebx = 64; 1897 1898 break; 1899 1900 case HYPERV_CPUID_NESTED_FEATURES: 1901 ent->eax = evmcs_ver; 1902 1903 break; 1904 1905 default: 1906 break; 1907 } 1908 } 1909 1910 if (copy_to_user(entries, cpuid_entries, 1911 nent * sizeof(struct kvm_cpuid_entry2))) 1912 return -EFAULT; 1913 1914 return 0; 1915 } 1916