1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM Microsoft Hyper-V emulation 4 * 5 * derived from arch/x86/kvm/x86.c 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright (C) 2008 Qumranet, Inc. 9 * Copyright IBM Corporation, 2008 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 12 * 13 * Authors: 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> 16 * Amit Shah <amit.shah@qumranet.com> 17 * Ben-Ami Yassour <benami@il.ibm.com> 18 * Andrey Smetanin <asmetanin@virtuozzo.com> 19 */ 20 21 #include "x86.h" 22 #include "lapic.h" 23 #include "ioapic.h" 24 #include "hyperv.h" 25 26 #include <linux/cpu.h> 27 #include <linux/kvm_host.h> 28 #include <linux/highmem.h> 29 #include <linux/sched/cputime.h> 30 #include <linux/eventfd.h> 31 32 #include <asm/apicdef.h> 33 #include <trace/events/kvm.h> 34 35 #include "trace.h" 36 #include "irq.h" 37 38 #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 39 40 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 41 bool vcpu_kick); 42 43 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 44 { 45 return atomic64_read(&synic->sint[sint]); 46 } 47 48 static inline int synic_get_sint_vector(u64 sint_value) 49 { 50 if (sint_value & HV_SYNIC_SINT_MASKED) 51 return -1; 52 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 53 } 54 55 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 56 int vector) 57 { 58 int i; 59 60 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 61 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 62 return true; 63 } 64 return false; 65 } 66 67 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 68 int vector) 69 { 70 int i; 71 u64 sint_value; 72 73 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 74 sint_value = synic_read_sint(synic, i); 75 if (synic_get_sint_vector(sint_value) == vector && 76 sint_value & HV_SYNIC_SINT_AUTO_EOI) 77 return true; 78 } 79 return false; 80 } 81 82 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 83 int vector) 84 { 85 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 86 return; 87 88 if (synic_has_vector_connected(synic, vector)) 89 __set_bit(vector, synic->vec_bitmap); 90 else 91 __clear_bit(vector, synic->vec_bitmap); 92 93 if (synic_has_vector_auto_eoi(synic, vector)) 94 __set_bit(vector, synic->auto_eoi_bitmap); 95 else 96 __clear_bit(vector, synic->auto_eoi_bitmap); 97 } 98 99 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 100 u64 data, bool host) 101 { 102 int vector, old_vector; 103 bool masked; 104 105 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 106 masked = data & HV_SYNIC_SINT_MASKED; 107 108 /* 109 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 110 * default '0x10000' value on boot and this should not #GP. We need to 111 * allow zero-initing the register from host as well. 112 */ 113 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 114 return 1; 115 /* 116 * Guest may configure multiple SINTs to use the same vector, so 117 * we maintain a bitmap of vectors handled by synic, and a 118 * bitmap of vectors with auto-eoi behavior. The bitmaps are 119 * updated here, and atomically queried on fast paths. 120 */ 121 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 122 123 atomic64_set(&synic->sint[sint], data); 124 125 synic_update_vector(synic, old_vector); 126 127 synic_update_vector(synic, vector); 128 129 /* Load SynIC vectors into EOI exit bitmap */ 130 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 131 return 0; 132 } 133 134 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 135 { 136 struct kvm_vcpu *vcpu = NULL; 137 int i; 138 139 if (vpidx >= KVM_MAX_VCPUS) 140 return NULL; 141 142 vcpu = kvm_get_vcpu(kvm, vpidx); 143 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 144 return vcpu; 145 kvm_for_each_vcpu(i, vcpu, kvm) 146 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 147 return vcpu; 148 return NULL; 149 } 150 151 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 152 { 153 struct kvm_vcpu *vcpu; 154 struct kvm_vcpu_hv_synic *synic; 155 156 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 157 if (!vcpu) 158 return NULL; 159 synic = vcpu_to_synic(vcpu); 160 return (synic->active) ? synic : NULL; 161 } 162 163 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 164 { 165 struct kvm *kvm = vcpu->kvm; 166 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 167 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 168 struct kvm_vcpu_hv_stimer *stimer; 169 int gsi, idx; 170 171 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 172 173 /* Try to deliver pending Hyper-V SynIC timers messages */ 174 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 175 stimer = &hv_vcpu->stimer[idx]; 176 if (stimer->msg_pending && stimer->config.enable && 177 !stimer->config.direct_mode && 178 stimer->config.sintx == sint) 179 stimer_mark_pending(stimer, false); 180 } 181 182 idx = srcu_read_lock(&kvm->irq_srcu); 183 gsi = atomic_read(&synic->sint_to_gsi[sint]); 184 if (gsi != -1) 185 kvm_notify_acked_gsi(kvm, gsi); 186 srcu_read_unlock(&kvm->irq_srcu, idx); 187 } 188 189 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 190 { 191 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 192 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 193 194 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 195 hv_vcpu->exit.u.synic.msr = msr; 196 hv_vcpu->exit.u.synic.control = synic->control; 197 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 198 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 199 200 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 201 } 202 203 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 204 u32 msr, u64 data, bool host) 205 { 206 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 207 int ret; 208 209 if (!synic->active && !host) 210 return 1; 211 212 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 213 214 ret = 0; 215 switch (msr) { 216 case HV_X64_MSR_SCONTROL: 217 synic->control = data; 218 if (!host) 219 synic_exit(synic, msr); 220 break; 221 case HV_X64_MSR_SVERSION: 222 if (!host) { 223 ret = 1; 224 break; 225 } 226 synic->version = data; 227 break; 228 case HV_X64_MSR_SIEFP: 229 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 230 !synic->dont_zero_synic_pages) 231 if (kvm_clear_guest(vcpu->kvm, 232 data & PAGE_MASK, PAGE_SIZE)) { 233 ret = 1; 234 break; 235 } 236 synic->evt_page = data; 237 if (!host) 238 synic_exit(synic, msr); 239 break; 240 case HV_X64_MSR_SIMP: 241 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 242 !synic->dont_zero_synic_pages) 243 if (kvm_clear_guest(vcpu->kvm, 244 data & PAGE_MASK, PAGE_SIZE)) { 245 ret = 1; 246 break; 247 } 248 synic->msg_page = data; 249 if (!host) 250 synic_exit(synic, msr); 251 break; 252 case HV_X64_MSR_EOM: { 253 int i; 254 255 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 256 kvm_hv_notify_acked_sint(vcpu, i); 257 break; 258 } 259 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 260 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 261 break; 262 default: 263 ret = 1; 264 break; 265 } 266 return ret; 267 } 268 269 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 270 bool host) 271 { 272 int ret; 273 274 if (!synic->active && !host) 275 return 1; 276 277 ret = 0; 278 switch (msr) { 279 case HV_X64_MSR_SCONTROL: 280 *pdata = synic->control; 281 break; 282 case HV_X64_MSR_SVERSION: 283 *pdata = synic->version; 284 break; 285 case HV_X64_MSR_SIEFP: 286 *pdata = synic->evt_page; 287 break; 288 case HV_X64_MSR_SIMP: 289 *pdata = synic->msg_page; 290 break; 291 case HV_X64_MSR_EOM: 292 *pdata = 0; 293 break; 294 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 295 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 296 break; 297 default: 298 ret = 1; 299 break; 300 } 301 return ret; 302 } 303 304 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 305 { 306 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 307 struct kvm_lapic_irq irq; 308 int ret, vector; 309 310 if (sint >= ARRAY_SIZE(synic->sint)) 311 return -EINVAL; 312 313 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 314 if (vector < 0) 315 return -ENOENT; 316 317 memset(&irq, 0, sizeof(irq)); 318 irq.shorthand = APIC_DEST_SELF; 319 irq.dest_mode = APIC_DEST_PHYSICAL; 320 irq.delivery_mode = APIC_DM_FIXED; 321 irq.vector = vector; 322 irq.level = 1; 323 324 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 325 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 326 return ret; 327 } 328 329 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 330 { 331 struct kvm_vcpu_hv_synic *synic; 332 333 synic = synic_get(kvm, vpidx); 334 if (!synic) 335 return -EINVAL; 336 337 return synic_set_irq(synic, sint); 338 } 339 340 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 341 { 342 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 343 int i; 344 345 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 346 347 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 348 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 349 kvm_hv_notify_acked_sint(vcpu, i); 350 } 351 352 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 353 { 354 struct kvm_vcpu_hv_synic *synic; 355 356 synic = synic_get(kvm, vpidx); 357 if (!synic) 358 return -EINVAL; 359 360 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 361 return -EINVAL; 362 363 atomic_set(&synic->sint_to_gsi[sint], gsi); 364 return 0; 365 } 366 367 void kvm_hv_irq_routing_update(struct kvm *kvm) 368 { 369 struct kvm_irq_routing_table *irq_rt; 370 struct kvm_kernel_irq_routing_entry *e; 371 u32 gsi; 372 373 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 374 lockdep_is_held(&kvm->irq_lock)); 375 376 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 377 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 378 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 379 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 380 e->hv_sint.sint, gsi); 381 } 382 } 383 } 384 385 static void synic_init(struct kvm_vcpu_hv_synic *synic) 386 { 387 int i; 388 389 memset(synic, 0, sizeof(*synic)); 390 synic->version = HV_SYNIC_VERSION_1; 391 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 392 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 393 atomic_set(&synic->sint_to_gsi[i], -1); 394 } 395 } 396 397 static u64 get_time_ref_counter(struct kvm *kvm) 398 { 399 struct kvm_hv *hv = &kvm->arch.hyperv; 400 struct kvm_vcpu *vcpu; 401 u64 tsc; 402 403 /* 404 * The guest has not set up the TSC page or the clock isn't 405 * stable, fall back to get_kvmclock_ns. 406 */ 407 if (!hv->tsc_ref.tsc_sequence) 408 return div_u64(get_kvmclock_ns(kvm), 100); 409 410 vcpu = kvm_get_vcpu(kvm, 0); 411 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 412 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 413 + hv->tsc_ref.tsc_offset; 414 } 415 416 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 417 bool vcpu_kick) 418 { 419 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 420 421 set_bit(stimer->index, 422 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 423 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 424 if (vcpu_kick) 425 kvm_vcpu_kick(vcpu); 426 } 427 428 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 429 { 430 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 431 432 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 433 stimer->index); 434 435 hrtimer_cancel(&stimer->timer); 436 clear_bit(stimer->index, 437 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 438 stimer->msg_pending = false; 439 stimer->exp_time = 0; 440 } 441 442 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 443 { 444 struct kvm_vcpu_hv_stimer *stimer; 445 446 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 447 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 448 stimer->index); 449 stimer_mark_pending(stimer, true); 450 451 return HRTIMER_NORESTART; 452 } 453 454 /* 455 * stimer_start() assumptions: 456 * a) stimer->count is not equal to 0 457 * b) stimer->config has HV_STIMER_ENABLE flag 458 */ 459 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 460 { 461 u64 time_now; 462 ktime_t ktime_now; 463 464 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 465 ktime_now = ktime_get(); 466 467 if (stimer->config.periodic) { 468 if (stimer->exp_time) { 469 if (time_now >= stimer->exp_time) { 470 u64 remainder; 471 472 div64_u64_rem(time_now - stimer->exp_time, 473 stimer->count, &remainder); 474 stimer->exp_time = 475 time_now + (stimer->count - remainder); 476 } 477 } else 478 stimer->exp_time = time_now + stimer->count; 479 480 trace_kvm_hv_stimer_start_periodic( 481 stimer_to_vcpu(stimer)->vcpu_id, 482 stimer->index, 483 time_now, stimer->exp_time); 484 485 hrtimer_start(&stimer->timer, 486 ktime_add_ns(ktime_now, 487 100 * (stimer->exp_time - time_now)), 488 HRTIMER_MODE_ABS); 489 return 0; 490 } 491 stimer->exp_time = stimer->count; 492 if (time_now >= stimer->count) { 493 /* 494 * Expire timer according to Hypervisor Top-Level Functional 495 * specification v4(15.3.1): 496 * "If a one shot is enabled and the specified count is in 497 * the past, it will expire immediately." 498 */ 499 stimer_mark_pending(stimer, false); 500 return 0; 501 } 502 503 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 504 stimer->index, 505 time_now, stimer->count); 506 507 hrtimer_start(&stimer->timer, 508 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 509 HRTIMER_MODE_ABS); 510 return 0; 511 } 512 513 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 514 bool host) 515 { 516 union hv_stimer_config new_config = {.as_uint64 = config}, 517 old_config = {.as_uint64 = stimer->config.as_uint64}; 518 519 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 520 stimer->index, config, host); 521 522 stimer_cleanup(stimer); 523 if (old_config.enable && 524 !new_config.direct_mode && new_config.sintx == 0) 525 new_config.enable = 0; 526 stimer->config.as_uint64 = new_config.as_uint64; 527 528 if (stimer->config.enable) 529 stimer_mark_pending(stimer, false); 530 531 return 0; 532 } 533 534 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 535 bool host) 536 { 537 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 538 stimer->index, count, host); 539 540 stimer_cleanup(stimer); 541 stimer->count = count; 542 if (stimer->count == 0) 543 stimer->config.enable = 0; 544 else if (stimer->config.auto_enable) 545 stimer->config.enable = 1; 546 547 if (stimer->config.enable) 548 stimer_mark_pending(stimer, false); 549 550 return 0; 551 } 552 553 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 554 { 555 *pconfig = stimer->config.as_uint64; 556 return 0; 557 } 558 559 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 560 { 561 *pcount = stimer->count; 562 return 0; 563 } 564 565 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 566 struct hv_message *src_msg, bool no_retry) 567 { 568 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 569 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 570 gfn_t msg_page_gfn; 571 struct hv_message_header hv_hdr; 572 int r; 573 574 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 575 return -ENOENT; 576 577 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 578 579 /* 580 * Strictly following the spec-mandated ordering would assume setting 581 * .msg_pending before checking .message_type. However, this function 582 * is only called in vcpu context so the entire update is atomic from 583 * guest POV and thus the exact order here doesn't matter. 584 */ 585 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 586 msg_off + offsetof(struct hv_message, 587 header.message_type), 588 sizeof(hv_hdr.message_type)); 589 if (r < 0) 590 return r; 591 592 if (hv_hdr.message_type != HVMSG_NONE) { 593 if (no_retry) 594 return 0; 595 596 hv_hdr.message_flags.msg_pending = 1; 597 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 598 &hv_hdr.message_flags, 599 msg_off + 600 offsetof(struct hv_message, 601 header.message_flags), 602 sizeof(hv_hdr.message_flags)); 603 if (r < 0) 604 return r; 605 return -EAGAIN; 606 } 607 608 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 609 sizeof(src_msg->header) + 610 src_msg->header.payload_size); 611 if (r < 0) 612 return r; 613 614 r = synic_set_irq(synic, sint); 615 if (r < 0) 616 return r; 617 if (r == 0) 618 return -EFAULT; 619 return 0; 620 } 621 622 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 623 { 624 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 625 struct hv_message *msg = &stimer->msg; 626 struct hv_timer_message_payload *payload = 627 (struct hv_timer_message_payload *)&msg->u.payload; 628 629 /* 630 * To avoid piling up periodic ticks, don't retry message 631 * delivery for them (within "lazy" lost ticks policy). 632 */ 633 bool no_retry = stimer->config.periodic; 634 635 payload->expiration_time = stimer->exp_time; 636 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 637 return synic_deliver_msg(vcpu_to_synic(vcpu), 638 stimer->config.sintx, msg, 639 no_retry); 640 } 641 642 static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 643 { 644 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 645 struct kvm_lapic_irq irq = { 646 .delivery_mode = APIC_DM_FIXED, 647 .vector = stimer->config.apic_vector 648 }; 649 650 if (lapic_in_kernel(vcpu)) 651 return !kvm_apic_set_irq(vcpu, &irq, NULL); 652 return 0; 653 } 654 655 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 656 { 657 int r, direct = stimer->config.direct_mode; 658 659 stimer->msg_pending = true; 660 if (!direct) 661 r = stimer_send_msg(stimer); 662 else 663 r = stimer_notify_direct(stimer); 664 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 665 stimer->index, direct, r); 666 if (!r) { 667 stimer->msg_pending = false; 668 if (!(stimer->config.periodic)) 669 stimer->config.enable = 0; 670 } 671 } 672 673 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 674 { 675 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 676 struct kvm_vcpu_hv_stimer *stimer; 677 u64 time_now, exp_time; 678 int i; 679 680 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 681 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 682 stimer = &hv_vcpu->stimer[i]; 683 if (stimer->config.enable) { 684 exp_time = stimer->exp_time; 685 686 if (exp_time) { 687 time_now = 688 get_time_ref_counter(vcpu->kvm); 689 if (time_now >= exp_time) 690 stimer_expiration(stimer); 691 } 692 693 if ((stimer->config.enable) && 694 stimer->count) { 695 if (!stimer->msg_pending) 696 stimer_start(stimer); 697 } else 698 stimer_cleanup(stimer); 699 } 700 } 701 } 702 703 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 704 { 705 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 706 int i; 707 708 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 709 stimer_cleanup(&hv_vcpu->stimer[i]); 710 } 711 712 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 713 { 714 if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 715 return false; 716 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 717 } 718 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 719 720 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 721 struct hv_vp_assist_page *assist_page) 722 { 723 if (!kvm_hv_assist_page_enabled(vcpu)) 724 return false; 725 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 726 assist_page, sizeof(*assist_page)); 727 } 728 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 729 730 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 731 { 732 struct hv_message *msg = &stimer->msg; 733 struct hv_timer_message_payload *payload = 734 (struct hv_timer_message_payload *)&msg->u.payload; 735 736 memset(&msg->header, 0, sizeof(msg->header)); 737 msg->header.message_type = HVMSG_TIMER_EXPIRED; 738 msg->header.payload_size = sizeof(*payload); 739 740 payload->timer_index = stimer->index; 741 payload->expiration_time = 0; 742 payload->delivery_time = 0; 743 } 744 745 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 746 { 747 memset(stimer, 0, sizeof(*stimer)); 748 stimer->index = timer_index; 749 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 750 stimer->timer.function = stimer_timer_callback; 751 stimer_prepare_msg(stimer); 752 } 753 754 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 755 { 756 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 757 int i; 758 759 synic_init(&hv_vcpu->synic); 760 761 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 762 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 763 stimer_init(&hv_vcpu->stimer[i], i); 764 } 765 766 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 767 { 768 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 769 770 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 771 } 772 773 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 774 { 775 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 776 777 /* 778 * Hyper-V SynIC auto EOI SINT's are 779 * not compatible with APICV, so request 780 * to deactivate APICV permanently. 781 */ 782 kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); 783 synic->active = true; 784 synic->dont_zero_synic_pages = dont_zero_synic_pages; 785 return 0; 786 } 787 788 static bool kvm_hv_msr_partition_wide(u32 msr) 789 { 790 bool r = false; 791 792 switch (msr) { 793 case HV_X64_MSR_GUEST_OS_ID: 794 case HV_X64_MSR_HYPERCALL: 795 case HV_X64_MSR_REFERENCE_TSC: 796 case HV_X64_MSR_TIME_REF_COUNT: 797 case HV_X64_MSR_CRASH_CTL: 798 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 799 case HV_X64_MSR_RESET: 800 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 801 case HV_X64_MSR_TSC_EMULATION_CONTROL: 802 case HV_X64_MSR_TSC_EMULATION_STATUS: 803 r = true; 804 break; 805 } 806 807 return r; 808 } 809 810 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 811 u32 index, u64 *pdata) 812 { 813 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 814 size_t size = ARRAY_SIZE(hv->hv_crash_param); 815 816 if (WARN_ON_ONCE(index >= size)) 817 return -EINVAL; 818 819 *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; 820 return 0; 821 } 822 823 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 824 { 825 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 826 827 *pdata = hv->hv_crash_ctl; 828 return 0; 829 } 830 831 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 832 { 833 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 834 835 if (host) 836 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 837 838 if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) { 839 840 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 841 hv->hv_crash_param[0], 842 hv->hv_crash_param[1], 843 hv->hv_crash_param[2], 844 hv->hv_crash_param[3], 845 hv->hv_crash_param[4]); 846 847 /* Send notification about crash to user space */ 848 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 849 } 850 851 return 0; 852 } 853 854 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 855 u32 index, u64 data) 856 { 857 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 858 size_t size = ARRAY_SIZE(hv->hv_crash_param); 859 860 if (WARN_ON_ONCE(index >= size)) 861 return -EINVAL; 862 863 hv->hv_crash_param[array_index_nospec(index, size)] = data; 864 return 0; 865 } 866 867 /* 868 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 869 * between them is possible: 870 * 871 * kvmclock formula: 872 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 873 * + system_time 874 * 875 * Hyper-V formula: 876 * nsec/100 = ticks * scale / 2^64 + offset 877 * 878 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 879 * By dividing the kvmclock formula by 100 and equating what's left we get: 880 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 881 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 882 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 883 * 884 * Now expand the kvmclock formula and divide by 100: 885 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 886 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 887 * + system_time 888 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 889 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 890 * + system_time / 100 891 * 892 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 893 * nsec/100 = ticks * scale / 2^64 894 * - tsc_timestamp * scale / 2^64 895 * + system_time / 100 896 * 897 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 898 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 899 * 900 * These two equivalencies are implemented in this function. 901 */ 902 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 903 HV_REFERENCE_TSC_PAGE *tsc_ref) 904 { 905 u64 max_mul; 906 907 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 908 return false; 909 910 /* 911 * check if scale would overflow, if so we use the time ref counter 912 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 913 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 914 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 915 */ 916 max_mul = 100ull << (32 - hv_clock->tsc_shift); 917 if (hv_clock->tsc_to_system_mul >= max_mul) 918 return false; 919 920 /* 921 * Otherwise compute the scale and offset according to the formulas 922 * derived above. 923 */ 924 tsc_ref->tsc_scale = 925 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 926 hv_clock->tsc_to_system_mul, 927 100); 928 929 tsc_ref->tsc_offset = hv_clock->system_time; 930 do_div(tsc_ref->tsc_offset, 100); 931 tsc_ref->tsc_offset -= 932 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 933 return true; 934 } 935 936 void kvm_hv_setup_tsc_page(struct kvm *kvm, 937 struct pvclock_vcpu_time_info *hv_clock) 938 { 939 struct kvm_hv *hv = &kvm->arch.hyperv; 940 u32 tsc_seq; 941 u64 gfn; 942 943 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 944 BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); 945 946 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 947 return; 948 949 mutex_lock(&kvm->arch.hyperv.hv_lock); 950 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 951 goto out_unlock; 952 953 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 954 /* 955 * Because the TSC parameters only vary when there is a 956 * change in the master clock, do not bother with caching. 957 */ 958 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 959 &tsc_seq, sizeof(tsc_seq)))) 960 goto out_unlock; 961 962 /* 963 * While we're computing and writing the parameters, force the 964 * guest to use the time reference count MSR. 965 */ 966 hv->tsc_ref.tsc_sequence = 0; 967 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 968 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 969 goto out_unlock; 970 971 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 972 goto out_unlock; 973 974 /* Ensure sequence is zero before writing the rest of the struct. */ 975 smp_wmb(); 976 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 977 goto out_unlock; 978 979 /* 980 * Now switch to the TSC page mechanism by writing the sequence. 981 */ 982 tsc_seq++; 983 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 984 tsc_seq = 1; 985 986 /* Write the struct entirely before the non-zero sequence. */ 987 smp_wmb(); 988 989 hv->tsc_ref.tsc_sequence = tsc_seq; 990 kvm_write_guest(kvm, gfn_to_gpa(gfn), 991 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 992 out_unlock: 993 mutex_unlock(&kvm->arch.hyperv.hv_lock); 994 } 995 996 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 997 bool host) 998 { 999 struct kvm *kvm = vcpu->kvm; 1000 struct kvm_hv *hv = &kvm->arch.hyperv; 1001 1002 switch (msr) { 1003 case HV_X64_MSR_GUEST_OS_ID: 1004 hv->hv_guest_os_id = data; 1005 /* setting guest os id to zero disables hypercall page */ 1006 if (!hv->hv_guest_os_id) 1007 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1008 break; 1009 case HV_X64_MSR_HYPERCALL: { 1010 u64 gfn; 1011 unsigned long addr; 1012 u8 instructions[4]; 1013 1014 /* if guest os id is not set hypercall should remain disabled */ 1015 if (!hv->hv_guest_os_id) 1016 break; 1017 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1018 hv->hv_hypercall = data; 1019 break; 1020 } 1021 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1022 addr = gfn_to_hva(kvm, gfn); 1023 if (kvm_is_error_hva(addr)) 1024 return 1; 1025 kvm_x86_ops.patch_hypercall(vcpu, instructions); 1026 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1027 if (__copy_to_user((void __user *)addr, instructions, 4)) 1028 return 1; 1029 hv->hv_hypercall = data; 1030 mark_page_dirty(kvm, gfn); 1031 break; 1032 } 1033 case HV_X64_MSR_REFERENCE_TSC: 1034 hv->hv_tsc_page = data; 1035 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1036 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1037 break; 1038 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1039 return kvm_hv_msr_set_crash_data(vcpu, 1040 msr - HV_X64_MSR_CRASH_P0, 1041 data); 1042 case HV_X64_MSR_CRASH_CTL: 1043 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1044 case HV_X64_MSR_RESET: 1045 if (data == 1) { 1046 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1047 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1048 } 1049 break; 1050 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1051 hv->hv_reenlightenment_control = data; 1052 break; 1053 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1054 hv->hv_tsc_emulation_control = data; 1055 break; 1056 case HV_X64_MSR_TSC_EMULATION_STATUS: 1057 hv->hv_tsc_emulation_status = data; 1058 break; 1059 case HV_X64_MSR_TIME_REF_COUNT: 1060 /* read-only, but still ignore it if host-initiated */ 1061 if (!host) 1062 return 1; 1063 break; 1064 default: 1065 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1066 msr, data); 1067 return 1; 1068 } 1069 return 0; 1070 } 1071 1072 /* Calculate cpu time spent by current task in 100ns units */ 1073 static u64 current_task_runtime_100ns(void) 1074 { 1075 u64 utime, stime; 1076 1077 task_cputime_adjusted(current, &utime, &stime); 1078 1079 return div_u64(utime + stime, 100); 1080 } 1081 1082 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1083 { 1084 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1085 1086 switch (msr) { 1087 case HV_X64_MSR_VP_INDEX: { 1088 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 1089 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1090 u32 new_vp_index = (u32)data; 1091 1092 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1093 return 1; 1094 1095 if (new_vp_index == hv_vcpu->vp_index) 1096 return 0; 1097 1098 /* 1099 * The VP index is initialized to vcpu_index by 1100 * kvm_hv_vcpu_postcreate so they initially match. Now the 1101 * VP index is changing, adjust num_mismatched_vp_indexes if 1102 * it now matches or no longer matches vcpu_idx. 1103 */ 1104 if (hv_vcpu->vp_index == vcpu_idx) 1105 atomic_inc(&hv->num_mismatched_vp_indexes); 1106 else if (new_vp_index == vcpu_idx) 1107 atomic_dec(&hv->num_mismatched_vp_indexes); 1108 1109 hv_vcpu->vp_index = new_vp_index; 1110 break; 1111 } 1112 case HV_X64_MSR_VP_ASSIST_PAGE: { 1113 u64 gfn; 1114 unsigned long addr; 1115 1116 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1117 hv_vcpu->hv_vapic = data; 1118 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1119 return 1; 1120 break; 1121 } 1122 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1123 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1124 if (kvm_is_error_hva(addr)) 1125 return 1; 1126 1127 /* 1128 * Clear apic_assist portion of struct hv_vp_assist_page 1129 * only, there can be valuable data in the rest which needs 1130 * to be preserved e.g. on migration. 1131 */ 1132 if (__clear_user((void __user *)addr, sizeof(u32))) 1133 return 1; 1134 hv_vcpu->hv_vapic = data; 1135 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1136 if (kvm_lapic_enable_pv_eoi(vcpu, 1137 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1138 sizeof(struct hv_vp_assist_page))) 1139 return 1; 1140 break; 1141 } 1142 case HV_X64_MSR_EOI: 1143 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1144 case HV_X64_MSR_ICR: 1145 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1146 case HV_X64_MSR_TPR: 1147 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1148 case HV_X64_MSR_VP_RUNTIME: 1149 if (!host) 1150 return 1; 1151 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1152 break; 1153 case HV_X64_MSR_SCONTROL: 1154 case HV_X64_MSR_SVERSION: 1155 case HV_X64_MSR_SIEFP: 1156 case HV_X64_MSR_SIMP: 1157 case HV_X64_MSR_EOM: 1158 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1159 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1160 case HV_X64_MSR_STIMER0_CONFIG: 1161 case HV_X64_MSR_STIMER1_CONFIG: 1162 case HV_X64_MSR_STIMER2_CONFIG: 1163 case HV_X64_MSR_STIMER3_CONFIG: { 1164 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1165 1166 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1167 data, host); 1168 } 1169 case HV_X64_MSR_STIMER0_COUNT: 1170 case HV_X64_MSR_STIMER1_COUNT: 1171 case HV_X64_MSR_STIMER2_COUNT: 1172 case HV_X64_MSR_STIMER3_COUNT: { 1173 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1174 1175 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1176 data, host); 1177 } 1178 case HV_X64_MSR_TSC_FREQUENCY: 1179 case HV_X64_MSR_APIC_FREQUENCY: 1180 /* read-only, but still ignore it if host-initiated */ 1181 if (!host) 1182 return 1; 1183 break; 1184 default: 1185 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1186 msr, data); 1187 return 1; 1188 } 1189 1190 return 0; 1191 } 1192 1193 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1194 { 1195 u64 data = 0; 1196 struct kvm *kvm = vcpu->kvm; 1197 struct kvm_hv *hv = &kvm->arch.hyperv; 1198 1199 switch (msr) { 1200 case HV_X64_MSR_GUEST_OS_ID: 1201 data = hv->hv_guest_os_id; 1202 break; 1203 case HV_X64_MSR_HYPERCALL: 1204 data = hv->hv_hypercall; 1205 break; 1206 case HV_X64_MSR_TIME_REF_COUNT: 1207 data = get_time_ref_counter(kvm); 1208 break; 1209 case HV_X64_MSR_REFERENCE_TSC: 1210 data = hv->hv_tsc_page; 1211 break; 1212 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1213 return kvm_hv_msr_get_crash_data(vcpu, 1214 msr - HV_X64_MSR_CRASH_P0, 1215 pdata); 1216 case HV_X64_MSR_CRASH_CTL: 1217 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1218 case HV_X64_MSR_RESET: 1219 data = 0; 1220 break; 1221 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1222 data = hv->hv_reenlightenment_control; 1223 break; 1224 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1225 data = hv->hv_tsc_emulation_control; 1226 break; 1227 case HV_X64_MSR_TSC_EMULATION_STATUS: 1228 data = hv->hv_tsc_emulation_status; 1229 break; 1230 default: 1231 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1232 return 1; 1233 } 1234 1235 *pdata = data; 1236 return 0; 1237 } 1238 1239 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1240 bool host) 1241 { 1242 u64 data = 0; 1243 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1244 1245 switch (msr) { 1246 case HV_X64_MSR_VP_INDEX: 1247 data = hv_vcpu->vp_index; 1248 break; 1249 case HV_X64_MSR_EOI: 1250 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1251 case HV_X64_MSR_ICR: 1252 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1253 case HV_X64_MSR_TPR: 1254 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1255 case HV_X64_MSR_VP_ASSIST_PAGE: 1256 data = hv_vcpu->hv_vapic; 1257 break; 1258 case HV_X64_MSR_VP_RUNTIME: 1259 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1260 break; 1261 case HV_X64_MSR_SCONTROL: 1262 case HV_X64_MSR_SVERSION: 1263 case HV_X64_MSR_SIEFP: 1264 case HV_X64_MSR_SIMP: 1265 case HV_X64_MSR_EOM: 1266 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1267 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1268 case HV_X64_MSR_STIMER0_CONFIG: 1269 case HV_X64_MSR_STIMER1_CONFIG: 1270 case HV_X64_MSR_STIMER2_CONFIG: 1271 case HV_X64_MSR_STIMER3_CONFIG: { 1272 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1273 1274 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1275 pdata); 1276 } 1277 case HV_X64_MSR_STIMER0_COUNT: 1278 case HV_X64_MSR_STIMER1_COUNT: 1279 case HV_X64_MSR_STIMER2_COUNT: 1280 case HV_X64_MSR_STIMER3_COUNT: { 1281 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1282 1283 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1284 pdata); 1285 } 1286 case HV_X64_MSR_TSC_FREQUENCY: 1287 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1288 break; 1289 case HV_X64_MSR_APIC_FREQUENCY: 1290 data = APIC_BUS_FREQUENCY; 1291 break; 1292 default: 1293 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1294 return 1; 1295 } 1296 *pdata = data; 1297 return 0; 1298 } 1299 1300 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1301 { 1302 if (kvm_hv_msr_partition_wide(msr)) { 1303 int r; 1304 1305 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1306 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1307 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1308 return r; 1309 } else 1310 return kvm_hv_set_msr(vcpu, msr, data, host); 1311 } 1312 1313 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1314 { 1315 if (kvm_hv_msr_partition_wide(msr)) { 1316 int r; 1317 1318 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1319 r = kvm_hv_get_msr_pw(vcpu, msr, pdata); 1320 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1321 return r; 1322 } else 1323 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1324 } 1325 1326 static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1327 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1328 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1329 { 1330 struct kvm_hv *hv = &kvm->arch.hyperv; 1331 struct kvm_vcpu *vcpu; 1332 int i, bank, sbank = 0; 1333 1334 memset(vp_bitmap, 0, 1335 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1336 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1337 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1338 vp_bitmap[bank] = sparse_banks[sbank++]; 1339 1340 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1341 /* for all vcpus vp_index == vcpu_idx */ 1342 return (unsigned long *)vp_bitmap; 1343 } 1344 1345 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1346 kvm_for_each_vcpu(i, vcpu, kvm) { 1347 if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, 1348 (unsigned long *)vp_bitmap)) 1349 __set_bit(i, vcpu_bitmap); 1350 } 1351 return vcpu_bitmap; 1352 } 1353 1354 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1355 u16 rep_cnt, bool ex) 1356 { 1357 struct kvm *kvm = current_vcpu->kvm; 1358 struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; 1359 struct hv_tlb_flush_ex flush_ex; 1360 struct hv_tlb_flush flush; 1361 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1362 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1363 unsigned long *vcpu_mask; 1364 u64 valid_bank_mask; 1365 u64 sparse_banks[64]; 1366 int sparse_banks_len; 1367 bool all_cpus; 1368 1369 if (!ex) { 1370 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1371 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1372 1373 trace_kvm_hv_flush_tlb(flush.processor_mask, 1374 flush.address_space, flush.flags); 1375 1376 valid_bank_mask = BIT_ULL(0); 1377 sparse_banks[0] = flush.processor_mask; 1378 1379 /* 1380 * Work around possible WS2012 bug: it sends hypercalls 1381 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, 1382 * while also expecting us to flush something and crashing if 1383 * we don't. Let's treat processor_mask == 0 same as 1384 * HV_FLUSH_ALL_PROCESSORS. 1385 */ 1386 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || 1387 flush.processor_mask == 0; 1388 } else { 1389 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1390 sizeof(flush_ex)))) 1391 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1392 1393 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1394 flush_ex.hv_vp_set.format, 1395 flush_ex.address_space, 1396 flush_ex.flags); 1397 1398 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1399 all_cpus = flush_ex.hv_vp_set.format != 1400 HV_GENERIC_SET_SPARSE_4K; 1401 1402 sparse_banks_len = 1403 bitmap_weight((unsigned long *)&valid_bank_mask, 64) * 1404 sizeof(sparse_banks[0]); 1405 1406 if (!sparse_banks_len && !all_cpus) 1407 goto ret_success; 1408 1409 if (!all_cpus && 1410 kvm_read_guest(kvm, 1411 ingpa + offsetof(struct hv_tlb_flush_ex, 1412 hv_vp_set.bank_contents), 1413 sparse_banks, 1414 sparse_banks_len)) 1415 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1416 } 1417 1418 cpumask_clear(&hv_vcpu->tlb_flush); 1419 1420 vcpu_mask = all_cpus ? NULL : 1421 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1422 vp_bitmap, vcpu_bitmap); 1423 1424 /* 1425 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1426 * analyze it here, flush TLB regardless of the specified address space. 1427 */ 1428 kvm_make_vcpus_request_mask(kvm, 1429 KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, 1430 NULL, vcpu_mask, &hv_vcpu->tlb_flush); 1431 1432 ret_success: 1433 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1434 return (u64)HV_STATUS_SUCCESS | 1435 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1436 } 1437 1438 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1439 unsigned long *vcpu_bitmap) 1440 { 1441 struct kvm_lapic_irq irq = { 1442 .delivery_mode = APIC_DM_FIXED, 1443 .vector = vector 1444 }; 1445 struct kvm_vcpu *vcpu; 1446 int i; 1447 1448 kvm_for_each_vcpu(i, vcpu, kvm) { 1449 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1450 continue; 1451 1452 /* We fail only when APIC is disabled */ 1453 kvm_apic_set_irq(vcpu, &irq, NULL); 1454 } 1455 } 1456 1457 static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, 1458 bool ex, bool fast) 1459 { 1460 struct kvm *kvm = current_vcpu->kvm; 1461 struct hv_send_ipi_ex send_ipi_ex; 1462 struct hv_send_ipi send_ipi; 1463 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1464 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1465 unsigned long *vcpu_mask; 1466 unsigned long valid_bank_mask; 1467 u64 sparse_banks[64]; 1468 int sparse_banks_len; 1469 u32 vector; 1470 bool all_cpus; 1471 1472 if (!ex) { 1473 if (!fast) { 1474 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, 1475 sizeof(send_ipi)))) 1476 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1477 sparse_banks[0] = send_ipi.cpu_mask; 1478 vector = send_ipi.vector; 1479 } else { 1480 /* 'reserved' part of hv_send_ipi should be 0 */ 1481 if (unlikely(ingpa >> 32 != 0)) 1482 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1483 sparse_banks[0] = outgpa; 1484 vector = (u32)ingpa; 1485 } 1486 all_cpus = false; 1487 valid_bank_mask = BIT_ULL(0); 1488 1489 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1490 } else { 1491 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, 1492 sizeof(send_ipi_ex)))) 1493 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1494 1495 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1496 send_ipi_ex.vp_set.format, 1497 send_ipi_ex.vp_set.valid_bank_mask); 1498 1499 vector = send_ipi_ex.vector; 1500 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1501 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1502 sizeof(sparse_banks[0]); 1503 1504 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1505 1506 if (!sparse_banks_len) 1507 goto ret_success; 1508 1509 if (!all_cpus && 1510 kvm_read_guest(kvm, 1511 ingpa + offsetof(struct hv_send_ipi_ex, 1512 vp_set.bank_contents), 1513 sparse_banks, 1514 sparse_banks_len)) 1515 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1516 } 1517 1518 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1519 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1520 1521 vcpu_mask = all_cpus ? NULL : 1522 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1523 vp_bitmap, vcpu_bitmap); 1524 1525 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1526 1527 ret_success: 1528 return HV_STATUS_SUCCESS; 1529 } 1530 1531 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1532 { 1533 return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; 1534 } 1535 1536 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1537 { 1538 bool longmode; 1539 1540 longmode = is_64_bit_mode(vcpu); 1541 if (longmode) 1542 kvm_rax_write(vcpu, result); 1543 else { 1544 kvm_rdx_write(vcpu, result >> 32); 1545 kvm_rax_write(vcpu, result & 0xffffffff); 1546 } 1547 } 1548 1549 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1550 { 1551 kvm_hv_hypercall_set_result(vcpu, result); 1552 ++vcpu->stat.hypercalls; 1553 return kvm_skip_emulated_instruction(vcpu); 1554 } 1555 1556 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1557 { 1558 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1559 } 1560 1561 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1562 { 1563 struct eventfd_ctx *eventfd; 1564 1565 if (unlikely(!fast)) { 1566 int ret; 1567 gpa_t gpa = param; 1568 1569 if ((gpa & (__alignof__(param) - 1)) || 1570 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1571 return HV_STATUS_INVALID_ALIGNMENT; 1572 1573 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1574 if (ret < 0) 1575 return HV_STATUS_INVALID_ALIGNMENT; 1576 } 1577 1578 /* 1579 * Per spec, bits 32-47 contain the extra "flag number". However, we 1580 * have no use for it, and in all known usecases it is zero, so just 1581 * report lookup failure if it isn't. 1582 */ 1583 if (param & 0xffff00000000ULL) 1584 return HV_STATUS_INVALID_PORT_ID; 1585 /* remaining bits are reserved-zero */ 1586 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1587 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1588 1589 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1590 rcu_read_lock(); 1591 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1592 rcu_read_unlock(); 1593 if (!eventfd) 1594 return HV_STATUS_INVALID_PORT_ID; 1595 1596 eventfd_signal(eventfd, 1); 1597 return HV_STATUS_SUCCESS; 1598 } 1599 1600 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1601 { 1602 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1603 uint16_t code, rep_idx, rep_cnt; 1604 bool fast, rep; 1605 1606 /* 1607 * hypercall generates UD from non zero cpl and real mode 1608 * per HYPER-V spec 1609 */ 1610 if (kvm_x86_ops.get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1611 kvm_queue_exception(vcpu, UD_VECTOR); 1612 return 1; 1613 } 1614 1615 #ifdef CONFIG_X86_64 1616 if (is_64_bit_mode(vcpu)) { 1617 param = kvm_rcx_read(vcpu); 1618 ingpa = kvm_rdx_read(vcpu); 1619 outgpa = kvm_r8_read(vcpu); 1620 } else 1621 #endif 1622 { 1623 param = ((u64)kvm_rdx_read(vcpu) << 32) | 1624 (kvm_rax_read(vcpu) & 0xffffffff); 1625 ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | 1626 (kvm_rcx_read(vcpu) & 0xffffffff); 1627 outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1628 (kvm_rsi_read(vcpu) & 0xffffffff); 1629 } 1630 1631 code = param & 0xffff; 1632 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1633 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1634 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1635 rep = !!(rep_cnt || rep_idx); 1636 1637 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1638 1639 switch (code) { 1640 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1641 if (unlikely(rep)) { 1642 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1643 break; 1644 } 1645 kvm_vcpu_on_spin(vcpu, true); 1646 break; 1647 case HVCALL_SIGNAL_EVENT: 1648 if (unlikely(rep)) { 1649 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1650 break; 1651 } 1652 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1653 if (ret != HV_STATUS_INVALID_PORT_ID) 1654 break; 1655 /* fall through - maybe userspace knows this conn_id. */ 1656 case HVCALL_POST_MESSAGE: 1657 /* don't bother userspace if it has no way to handle it */ 1658 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1659 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1660 break; 1661 } 1662 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1663 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1664 vcpu->run->hyperv.u.hcall.input = param; 1665 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1666 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1667 vcpu->arch.complete_userspace_io = 1668 kvm_hv_hypercall_complete_userspace; 1669 return 0; 1670 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1671 if (unlikely(fast || !rep_cnt || rep_idx)) { 1672 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1673 break; 1674 } 1675 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1676 break; 1677 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1678 if (unlikely(fast || rep)) { 1679 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1680 break; 1681 } 1682 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1683 break; 1684 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1685 if (unlikely(fast || !rep_cnt || rep_idx)) { 1686 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1687 break; 1688 } 1689 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1690 break; 1691 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1692 if (unlikely(fast || rep)) { 1693 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1694 break; 1695 } 1696 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1697 break; 1698 case HVCALL_SEND_IPI: 1699 if (unlikely(rep)) { 1700 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1701 break; 1702 } 1703 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); 1704 break; 1705 case HVCALL_SEND_IPI_EX: 1706 if (unlikely(fast || rep)) { 1707 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1708 break; 1709 } 1710 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); 1711 break; 1712 default: 1713 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1714 break; 1715 } 1716 1717 return kvm_hv_hypercall_complete(vcpu, ret); 1718 } 1719 1720 void kvm_hv_init_vm(struct kvm *kvm) 1721 { 1722 mutex_init(&kvm->arch.hyperv.hv_lock); 1723 idr_init(&kvm->arch.hyperv.conn_to_evt); 1724 } 1725 1726 void kvm_hv_destroy_vm(struct kvm *kvm) 1727 { 1728 struct eventfd_ctx *eventfd; 1729 int i; 1730 1731 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1732 eventfd_ctx_put(eventfd); 1733 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1734 } 1735 1736 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1737 { 1738 struct kvm_hv *hv = &kvm->arch.hyperv; 1739 struct eventfd_ctx *eventfd; 1740 int ret; 1741 1742 eventfd = eventfd_ctx_fdget(fd); 1743 if (IS_ERR(eventfd)) 1744 return PTR_ERR(eventfd); 1745 1746 mutex_lock(&hv->hv_lock); 1747 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1748 GFP_KERNEL_ACCOUNT); 1749 mutex_unlock(&hv->hv_lock); 1750 1751 if (ret >= 0) 1752 return 0; 1753 1754 if (ret == -ENOSPC) 1755 ret = -EEXIST; 1756 eventfd_ctx_put(eventfd); 1757 return ret; 1758 } 1759 1760 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1761 { 1762 struct kvm_hv *hv = &kvm->arch.hyperv; 1763 struct eventfd_ctx *eventfd; 1764 1765 mutex_lock(&hv->hv_lock); 1766 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1767 mutex_unlock(&hv->hv_lock); 1768 1769 if (!eventfd) 1770 return -ENOENT; 1771 1772 synchronize_srcu(&kvm->srcu); 1773 eventfd_ctx_put(eventfd); 1774 return 0; 1775 } 1776 1777 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1778 { 1779 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1780 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1781 return -EINVAL; 1782 1783 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1784 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1785 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1786 } 1787 1788 int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 1789 struct kvm_cpuid_entry2 __user *entries) 1790 { 1791 uint16_t evmcs_ver = 0; 1792 struct kvm_cpuid_entry2 cpuid_entries[] = { 1793 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 1794 { .function = HYPERV_CPUID_INTERFACE }, 1795 { .function = HYPERV_CPUID_VERSION }, 1796 { .function = HYPERV_CPUID_FEATURES }, 1797 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 1798 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 1799 { .function = HYPERV_CPUID_NESTED_FEATURES }, 1800 }; 1801 int i, nent = ARRAY_SIZE(cpuid_entries); 1802 1803 if (kvm_x86_ops.nested_get_evmcs_version) 1804 evmcs_ver = kvm_x86_ops.nested_get_evmcs_version(vcpu); 1805 1806 /* Skip NESTED_FEATURES if eVMCS is not supported */ 1807 if (!evmcs_ver) 1808 --nent; 1809 1810 if (cpuid->nent < nent) 1811 return -E2BIG; 1812 1813 if (cpuid->nent > nent) 1814 cpuid->nent = nent; 1815 1816 for (i = 0; i < nent; i++) { 1817 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 1818 u32 signature[3]; 1819 1820 switch (ent->function) { 1821 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 1822 memcpy(signature, "Linux KVM Hv", 12); 1823 1824 ent->eax = HYPERV_CPUID_NESTED_FEATURES; 1825 ent->ebx = signature[0]; 1826 ent->ecx = signature[1]; 1827 ent->edx = signature[2]; 1828 break; 1829 1830 case HYPERV_CPUID_INTERFACE: 1831 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); 1832 ent->eax = signature[0]; 1833 break; 1834 1835 case HYPERV_CPUID_VERSION: 1836 /* 1837 * We implement some Hyper-V 2016 functions so let's use 1838 * this version. 1839 */ 1840 ent->eax = 0x00003839; 1841 ent->ebx = 0x000A0000; 1842 break; 1843 1844 case HYPERV_CPUID_FEATURES: 1845 ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE; 1846 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 1847 ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE; 1848 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 1849 ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; 1850 ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; 1851 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; 1852 ent->eax |= HV_X64_MSR_RESET_AVAILABLE; 1853 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 1854 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; 1855 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; 1856 1857 ent->ebx |= HV_X64_POST_MESSAGES; 1858 ent->ebx |= HV_X64_SIGNAL_EVENTS; 1859 1860 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 1861 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 1862 1863 /* 1864 * Direct Synthetic timers only make sense with in-kernel 1865 * LAPIC 1866 */ 1867 if (lapic_in_kernel(vcpu)) 1868 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 1869 1870 break; 1871 1872 case HYPERV_CPUID_ENLIGHTMENT_INFO: 1873 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 1874 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 1875 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 1876 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 1877 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 1878 if (evmcs_ver) 1879 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 1880 if (!cpu_smt_possible()) 1881 ent->eax |= HV_X64_NO_NONARCH_CORESHARING; 1882 /* 1883 * Default number of spinlock retry attempts, matches 1884 * HyperV 2016. 1885 */ 1886 ent->ebx = 0x00000FFF; 1887 1888 break; 1889 1890 case HYPERV_CPUID_IMPLEMENT_LIMITS: 1891 /* Maximum number of virtual processors */ 1892 ent->eax = KVM_MAX_VCPUS; 1893 /* 1894 * Maximum number of logical processors, matches 1895 * HyperV 2016. 1896 */ 1897 ent->ebx = 64; 1898 1899 break; 1900 1901 case HYPERV_CPUID_NESTED_FEATURES: 1902 ent->eax = evmcs_ver; 1903 1904 break; 1905 1906 default: 1907 break; 1908 } 1909 } 1910 1911 if (copy_to_user(entries, cpuid_entries, 1912 nent * sizeof(struct kvm_cpuid_entry2))) 1913 return -EFAULT; 1914 1915 return 0; 1916 } 1917