1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM Microsoft Hyper-V emulation 4 * 5 * derived from arch/x86/kvm/x86.c 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright (C) 2008 Qumranet, Inc. 9 * Copyright IBM Corporation, 2008 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 12 * 13 * Authors: 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> 16 * Amit Shah <amit.shah@qumranet.com> 17 * Ben-Ami Yassour <benami@il.ibm.com> 18 * Andrey Smetanin <asmetanin@virtuozzo.com> 19 */ 20 21 #include "x86.h" 22 #include "lapic.h" 23 #include "ioapic.h" 24 #include "cpuid.h" 25 #include "hyperv.h" 26 27 #include <linux/cpu.h> 28 #include <linux/kvm_host.h> 29 #include <linux/highmem.h> 30 #include <linux/sched/cputime.h> 31 #include <linux/eventfd.h> 32 33 #include <asm/apicdef.h> 34 #include <trace/events/kvm.h> 35 36 #include "trace.h" 37 #include "irq.h" 38 39 #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 40 41 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 42 bool vcpu_kick); 43 44 static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 45 { 46 return atomic64_read(&synic->sint[sint]); 47 } 48 49 static inline int synic_get_sint_vector(u64 sint_value) 50 { 51 if (sint_value & HV_SYNIC_SINT_MASKED) 52 return -1; 53 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 54 } 55 56 static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 57 int vector) 58 { 59 int i; 60 61 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 62 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 63 return true; 64 } 65 return false; 66 } 67 68 static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 69 int vector) 70 { 71 int i; 72 u64 sint_value; 73 74 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 75 sint_value = synic_read_sint(synic, i); 76 if (synic_get_sint_vector(sint_value) == vector && 77 sint_value & HV_SYNIC_SINT_AUTO_EOI) 78 return true; 79 } 80 return false; 81 } 82 83 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 84 int vector) 85 { 86 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 87 return; 88 89 if (synic_has_vector_connected(synic, vector)) 90 __set_bit(vector, synic->vec_bitmap); 91 else 92 __clear_bit(vector, synic->vec_bitmap); 93 94 if (synic_has_vector_auto_eoi(synic, vector)) 95 __set_bit(vector, synic->auto_eoi_bitmap); 96 else 97 __clear_bit(vector, synic->auto_eoi_bitmap); 98 } 99 100 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 101 u64 data, bool host) 102 { 103 int vector, old_vector; 104 bool masked; 105 106 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 107 masked = data & HV_SYNIC_SINT_MASKED; 108 109 /* 110 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 111 * default '0x10000' value on boot and this should not #GP. We need to 112 * allow zero-initing the register from host as well. 113 */ 114 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 115 return 1; 116 /* 117 * Guest may configure multiple SINTs to use the same vector, so 118 * we maintain a bitmap of vectors handled by synic, and a 119 * bitmap of vectors with auto-eoi behavior. The bitmaps are 120 * updated here, and atomically queried on fast paths. 121 */ 122 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 123 124 atomic64_set(&synic->sint[sint], data); 125 126 synic_update_vector(synic, old_vector); 127 128 synic_update_vector(synic, vector); 129 130 /* Load SynIC vectors into EOI exit bitmap */ 131 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 132 return 0; 133 } 134 135 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 136 { 137 struct kvm_vcpu *vcpu = NULL; 138 int i; 139 140 if (vpidx >= KVM_MAX_VCPUS) 141 return NULL; 142 143 vcpu = kvm_get_vcpu(kvm, vpidx); 144 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 145 return vcpu; 146 kvm_for_each_vcpu(i, vcpu, kvm) 147 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 148 return vcpu; 149 return NULL; 150 } 151 152 static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 153 { 154 struct kvm_vcpu *vcpu; 155 struct kvm_vcpu_hv_synic *synic; 156 157 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 158 if (!vcpu) 159 return NULL; 160 synic = vcpu_to_synic(vcpu); 161 return (synic->active) ? synic : NULL; 162 } 163 164 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 165 { 166 struct kvm *kvm = vcpu->kvm; 167 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 168 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 169 struct kvm_vcpu_hv_stimer *stimer; 170 int gsi, idx; 171 172 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 173 174 /* Try to deliver pending Hyper-V SynIC timers messages */ 175 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 176 stimer = &hv_vcpu->stimer[idx]; 177 if (stimer->msg_pending && stimer->config.enable && 178 !stimer->config.direct_mode && 179 stimer->config.sintx == sint) 180 stimer_mark_pending(stimer, false); 181 } 182 183 idx = srcu_read_lock(&kvm->irq_srcu); 184 gsi = atomic_read(&synic->sint_to_gsi[sint]); 185 if (gsi != -1) 186 kvm_notify_acked_gsi(kvm, gsi); 187 srcu_read_unlock(&kvm->irq_srcu, idx); 188 } 189 190 static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 191 { 192 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 193 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 194 195 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 196 hv_vcpu->exit.u.synic.msr = msr; 197 hv_vcpu->exit.u.synic.control = synic->control; 198 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 199 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 200 201 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 202 } 203 204 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 205 u32 msr, u64 data, bool host) 206 { 207 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 208 int ret; 209 210 if (!synic->active && !host) 211 return 1; 212 213 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 214 215 ret = 0; 216 switch (msr) { 217 case HV_X64_MSR_SCONTROL: 218 synic->control = data; 219 if (!host) 220 synic_exit(synic, msr); 221 break; 222 case HV_X64_MSR_SVERSION: 223 if (!host) { 224 ret = 1; 225 break; 226 } 227 synic->version = data; 228 break; 229 case HV_X64_MSR_SIEFP: 230 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 231 !synic->dont_zero_synic_pages) 232 if (kvm_clear_guest(vcpu->kvm, 233 data & PAGE_MASK, PAGE_SIZE)) { 234 ret = 1; 235 break; 236 } 237 synic->evt_page = data; 238 if (!host) 239 synic_exit(synic, msr); 240 break; 241 case HV_X64_MSR_SIMP: 242 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 243 !synic->dont_zero_synic_pages) 244 if (kvm_clear_guest(vcpu->kvm, 245 data & PAGE_MASK, PAGE_SIZE)) { 246 ret = 1; 247 break; 248 } 249 synic->msg_page = data; 250 if (!host) 251 synic_exit(synic, msr); 252 break; 253 case HV_X64_MSR_EOM: { 254 int i; 255 256 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 257 kvm_hv_notify_acked_sint(vcpu, i); 258 break; 259 } 260 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 261 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 262 break; 263 default: 264 ret = 1; 265 break; 266 } 267 return ret; 268 } 269 270 static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu) 271 { 272 struct kvm_cpuid_entry2 *entry; 273 274 entry = kvm_find_cpuid_entry(vcpu, 275 HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 276 0); 277 if (!entry) 278 return false; 279 280 return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; 281 } 282 283 static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu) 284 { 285 struct kvm *kvm = vcpu->kvm; 286 struct kvm_hv *hv = &kvm->arch.hyperv; 287 288 if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL) 289 hv->hv_syndbg.control.status = 290 vcpu->run->hyperv.u.syndbg.status; 291 return 1; 292 } 293 294 static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr) 295 { 296 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 297 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 298 299 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG; 300 hv_vcpu->exit.u.syndbg.msr = msr; 301 hv_vcpu->exit.u.syndbg.control = syndbg->control.control; 302 hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page; 303 hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page; 304 hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page; 305 vcpu->arch.complete_userspace_io = 306 kvm_hv_syndbg_complete_userspace; 307 308 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 309 } 310 311 static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 312 { 313 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 314 315 if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) 316 return 1; 317 318 trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id, 319 vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data); 320 switch (msr) { 321 case HV_X64_MSR_SYNDBG_CONTROL: 322 syndbg->control.control = data; 323 if (!host) 324 syndbg_exit(vcpu, msr); 325 break; 326 case HV_X64_MSR_SYNDBG_STATUS: 327 syndbg->control.status = data; 328 break; 329 case HV_X64_MSR_SYNDBG_SEND_BUFFER: 330 syndbg->control.send_page = data; 331 break; 332 case HV_X64_MSR_SYNDBG_RECV_BUFFER: 333 syndbg->control.recv_page = data; 334 break; 335 case HV_X64_MSR_SYNDBG_PENDING_BUFFER: 336 syndbg->control.pending_page = data; 337 if (!host) 338 syndbg_exit(vcpu, msr); 339 break; 340 case HV_X64_MSR_SYNDBG_OPTIONS: 341 syndbg->options = data; 342 break; 343 default: 344 break; 345 } 346 347 return 0; 348 } 349 350 static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 351 { 352 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 353 354 if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) 355 return 1; 356 357 switch (msr) { 358 case HV_X64_MSR_SYNDBG_CONTROL: 359 *pdata = syndbg->control.control; 360 break; 361 case HV_X64_MSR_SYNDBG_STATUS: 362 *pdata = syndbg->control.status; 363 break; 364 case HV_X64_MSR_SYNDBG_SEND_BUFFER: 365 *pdata = syndbg->control.send_page; 366 break; 367 case HV_X64_MSR_SYNDBG_RECV_BUFFER: 368 *pdata = syndbg->control.recv_page; 369 break; 370 case HV_X64_MSR_SYNDBG_PENDING_BUFFER: 371 *pdata = syndbg->control.pending_page; 372 break; 373 case HV_X64_MSR_SYNDBG_OPTIONS: 374 *pdata = syndbg->options; 375 break; 376 default: 377 break; 378 } 379 380 trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, 381 vcpu_to_hv_vcpu(vcpu)->vp_index, msr, 382 *pdata); 383 384 return 0; 385 } 386 387 static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 388 bool host) 389 { 390 int ret; 391 392 if (!synic->active && !host) 393 return 1; 394 395 ret = 0; 396 switch (msr) { 397 case HV_X64_MSR_SCONTROL: 398 *pdata = synic->control; 399 break; 400 case HV_X64_MSR_SVERSION: 401 *pdata = synic->version; 402 break; 403 case HV_X64_MSR_SIEFP: 404 *pdata = synic->evt_page; 405 break; 406 case HV_X64_MSR_SIMP: 407 *pdata = synic->msg_page; 408 break; 409 case HV_X64_MSR_EOM: 410 *pdata = 0; 411 break; 412 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 413 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 414 break; 415 default: 416 ret = 1; 417 break; 418 } 419 return ret; 420 } 421 422 static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 423 { 424 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 425 struct kvm_lapic_irq irq; 426 int ret, vector; 427 428 if (sint >= ARRAY_SIZE(synic->sint)) 429 return -EINVAL; 430 431 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 432 if (vector < 0) 433 return -ENOENT; 434 435 memset(&irq, 0, sizeof(irq)); 436 irq.shorthand = APIC_DEST_SELF; 437 irq.dest_mode = APIC_DEST_PHYSICAL; 438 irq.delivery_mode = APIC_DM_FIXED; 439 irq.vector = vector; 440 irq.level = 1; 441 442 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 443 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 444 return ret; 445 } 446 447 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 448 { 449 struct kvm_vcpu_hv_synic *synic; 450 451 synic = synic_get(kvm, vpidx); 452 if (!synic) 453 return -EINVAL; 454 455 return synic_set_irq(synic, sint); 456 } 457 458 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 459 { 460 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 461 int i; 462 463 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 464 465 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 466 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 467 kvm_hv_notify_acked_sint(vcpu, i); 468 } 469 470 static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 471 { 472 struct kvm_vcpu_hv_synic *synic; 473 474 synic = synic_get(kvm, vpidx); 475 if (!synic) 476 return -EINVAL; 477 478 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 479 return -EINVAL; 480 481 atomic_set(&synic->sint_to_gsi[sint], gsi); 482 return 0; 483 } 484 485 void kvm_hv_irq_routing_update(struct kvm *kvm) 486 { 487 struct kvm_irq_routing_table *irq_rt; 488 struct kvm_kernel_irq_routing_entry *e; 489 u32 gsi; 490 491 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 492 lockdep_is_held(&kvm->irq_lock)); 493 494 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 495 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 496 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 497 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 498 e->hv_sint.sint, gsi); 499 } 500 } 501 } 502 503 static void synic_init(struct kvm_vcpu_hv_synic *synic) 504 { 505 int i; 506 507 memset(synic, 0, sizeof(*synic)); 508 synic->version = HV_SYNIC_VERSION_1; 509 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 510 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 511 atomic_set(&synic->sint_to_gsi[i], -1); 512 } 513 } 514 515 static u64 get_time_ref_counter(struct kvm *kvm) 516 { 517 struct kvm_hv *hv = &kvm->arch.hyperv; 518 struct kvm_vcpu *vcpu; 519 u64 tsc; 520 521 /* 522 * The guest has not set up the TSC page or the clock isn't 523 * stable, fall back to get_kvmclock_ns. 524 */ 525 if (!hv->tsc_ref.tsc_sequence) 526 return div_u64(get_kvmclock_ns(kvm), 100); 527 528 vcpu = kvm_get_vcpu(kvm, 0); 529 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 530 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 531 + hv->tsc_ref.tsc_offset; 532 } 533 534 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 535 bool vcpu_kick) 536 { 537 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 538 539 set_bit(stimer->index, 540 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 541 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 542 if (vcpu_kick) 543 kvm_vcpu_kick(vcpu); 544 } 545 546 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 547 { 548 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 549 550 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 551 stimer->index); 552 553 hrtimer_cancel(&stimer->timer); 554 clear_bit(stimer->index, 555 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 556 stimer->msg_pending = false; 557 stimer->exp_time = 0; 558 } 559 560 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 561 { 562 struct kvm_vcpu_hv_stimer *stimer; 563 564 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 565 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 566 stimer->index); 567 stimer_mark_pending(stimer, true); 568 569 return HRTIMER_NORESTART; 570 } 571 572 /* 573 * stimer_start() assumptions: 574 * a) stimer->count is not equal to 0 575 * b) stimer->config has HV_STIMER_ENABLE flag 576 */ 577 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 578 { 579 u64 time_now; 580 ktime_t ktime_now; 581 582 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 583 ktime_now = ktime_get(); 584 585 if (stimer->config.periodic) { 586 if (stimer->exp_time) { 587 if (time_now >= stimer->exp_time) { 588 u64 remainder; 589 590 div64_u64_rem(time_now - stimer->exp_time, 591 stimer->count, &remainder); 592 stimer->exp_time = 593 time_now + (stimer->count - remainder); 594 } 595 } else 596 stimer->exp_time = time_now + stimer->count; 597 598 trace_kvm_hv_stimer_start_periodic( 599 stimer_to_vcpu(stimer)->vcpu_id, 600 stimer->index, 601 time_now, stimer->exp_time); 602 603 hrtimer_start(&stimer->timer, 604 ktime_add_ns(ktime_now, 605 100 * (stimer->exp_time - time_now)), 606 HRTIMER_MODE_ABS); 607 return 0; 608 } 609 stimer->exp_time = stimer->count; 610 if (time_now >= stimer->count) { 611 /* 612 * Expire timer according to Hypervisor Top-Level Functional 613 * specification v4(15.3.1): 614 * "If a one shot is enabled and the specified count is in 615 * the past, it will expire immediately." 616 */ 617 stimer_mark_pending(stimer, false); 618 return 0; 619 } 620 621 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 622 stimer->index, 623 time_now, stimer->count); 624 625 hrtimer_start(&stimer->timer, 626 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 627 HRTIMER_MODE_ABS); 628 return 0; 629 } 630 631 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 632 bool host) 633 { 634 union hv_stimer_config new_config = {.as_uint64 = config}, 635 old_config = {.as_uint64 = stimer->config.as_uint64}; 636 637 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 638 stimer->index, config, host); 639 640 stimer_cleanup(stimer); 641 if (old_config.enable && 642 !new_config.direct_mode && new_config.sintx == 0) 643 new_config.enable = 0; 644 stimer->config.as_uint64 = new_config.as_uint64; 645 646 if (stimer->config.enable) 647 stimer_mark_pending(stimer, false); 648 649 return 0; 650 } 651 652 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 653 bool host) 654 { 655 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 656 stimer->index, count, host); 657 658 stimer_cleanup(stimer); 659 stimer->count = count; 660 if (stimer->count == 0) 661 stimer->config.enable = 0; 662 else if (stimer->config.auto_enable) 663 stimer->config.enable = 1; 664 665 if (stimer->config.enable) 666 stimer_mark_pending(stimer, false); 667 668 return 0; 669 } 670 671 static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 672 { 673 *pconfig = stimer->config.as_uint64; 674 return 0; 675 } 676 677 static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 678 { 679 *pcount = stimer->count; 680 return 0; 681 } 682 683 static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 684 struct hv_message *src_msg, bool no_retry) 685 { 686 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 687 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 688 gfn_t msg_page_gfn; 689 struct hv_message_header hv_hdr; 690 int r; 691 692 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 693 return -ENOENT; 694 695 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 696 697 /* 698 * Strictly following the spec-mandated ordering would assume setting 699 * .msg_pending before checking .message_type. However, this function 700 * is only called in vcpu context so the entire update is atomic from 701 * guest POV and thus the exact order here doesn't matter. 702 */ 703 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 704 msg_off + offsetof(struct hv_message, 705 header.message_type), 706 sizeof(hv_hdr.message_type)); 707 if (r < 0) 708 return r; 709 710 if (hv_hdr.message_type != HVMSG_NONE) { 711 if (no_retry) 712 return 0; 713 714 hv_hdr.message_flags.msg_pending = 1; 715 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 716 &hv_hdr.message_flags, 717 msg_off + 718 offsetof(struct hv_message, 719 header.message_flags), 720 sizeof(hv_hdr.message_flags)); 721 if (r < 0) 722 return r; 723 return -EAGAIN; 724 } 725 726 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 727 sizeof(src_msg->header) + 728 src_msg->header.payload_size); 729 if (r < 0) 730 return r; 731 732 r = synic_set_irq(synic, sint); 733 if (r < 0) 734 return r; 735 if (r == 0) 736 return -EFAULT; 737 return 0; 738 } 739 740 static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 741 { 742 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 743 struct hv_message *msg = &stimer->msg; 744 struct hv_timer_message_payload *payload = 745 (struct hv_timer_message_payload *)&msg->u.payload; 746 747 /* 748 * To avoid piling up periodic ticks, don't retry message 749 * delivery for them (within "lazy" lost ticks policy). 750 */ 751 bool no_retry = stimer->config.periodic; 752 753 payload->expiration_time = stimer->exp_time; 754 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 755 return synic_deliver_msg(vcpu_to_synic(vcpu), 756 stimer->config.sintx, msg, 757 no_retry); 758 } 759 760 static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 761 { 762 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 763 struct kvm_lapic_irq irq = { 764 .delivery_mode = APIC_DM_FIXED, 765 .vector = stimer->config.apic_vector 766 }; 767 768 if (lapic_in_kernel(vcpu)) 769 return !kvm_apic_set_irq(vcpu, &irq, NULL); 770 return 0; 771 } 772 773 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 774 { 775 int r, direct = stimer->config.direct_mode; 776 777 stimer->msg_pending = true; 778 if (!direct) 779 r = stimer_send_msg(stimer); 780 else 781 r = stimer_notify_direct(stimer); 782 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 783 stimer->index, direct, r); 784 if (!r) { 785 stimer->msg_pending = false; 786 if (!(stimer->config.periodic)) 787 stimer->config.enable = 0; 788 } 789 } 790 791 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 792 { 793 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 794 struct kvm_vcpu_hv_stimer *stimer; 795 u64 time_now, exp_time; 796 int i; 797 798 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 799 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 800 stimer = &hv_vcpu->stimer[i]; 801 if (stimer->config.enable) { 802 exp_time = stimer->exp_time; 803 804 if (exp_time) { 805 time_now = 806 get_time_ref_counter(vcpu->kvm); 807 if (time_now >= exp_time) 808 stimer_expiration(stimer); 809 } 810 811 if ((stimer->config.enable) && 812 stimer->count) { 813 if (!stimer->msg_pending) 814 stimer_start(stimer); 815 } else 816 stimer_cleanup(stimer); 817 } 818 } 819 } 820 821 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 822 { 823 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 824 int i; 825 826 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 827 stimer_cleanup(&hv_vcpu->stimer[i]); 828 } 829 830 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 831 { 832 if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 833 return false; 834 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 835 } 836 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 837 838 bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 839 struct hv_vp_assist_page *assist_page) 840 { 841 if (!kvm_hv_assist_page_enabled(vcpu)) 842 return false; 843 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 844 assist_page, sizeof(*assist_page)); 845 } 846 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 847 848 static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 849 { 850 struct hv_message *msg = &stimer->msg; 851 struct hv_timer_message_payload *payload = 852 (struct hv_timer_message_payload *)&msg->u.payload; 853 854 memset(&msg->header, 0, sizeof(msg->header)); 855 msg->header.message_type = HVMSG_TIMER_EXPIRED; 856 msg->header.payload_size = sizeof(*payload); 857 858 payload->timer_index = stimer->index; 859 payload->expiration_time = 0; 860 payload->delivery_time = 0; 861 } 862 863 static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 864 { 865 memset(stimer, 0, sizeof(*stimer)); 866 stimer->index = timer_index; 867 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 868 stimer->timer.function = stimer_timer_callback; 869 stimer_prepare_msg(stimer); 870 } 871 872 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 873 { 874 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 875 int i; 876 877 synic_init(&hv_vcpu->synic); 878 879 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 880 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 881 stimer_init(&hv_vcpu->stimer[i], i); 882 } 883 884 void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 885 { 886 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 887 888 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 889 } 890 891 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 892 { 893 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 894 895 /* 896 * Hyper-V SynIC auto EOI SINT's are 897 * not compatible with APICV, so request 898 * to deactivate APICV permanently. 899 */ 900 kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); 901 synic->active = true; 902 synic->dont_zero_synic_pages = dont_zero_synic_pages; 903 return 0; 904 } 905 906 static bool kvm_hv_msr_partition_wide(u32 msr) 907 { 908 bool r = false; 909 910 switch (msr) { 911 case HV_X64_MSR_GUEST_OS_ID: 912 case HV_X64_MSR_HYPERCALL: 913 case HV_X64_MSR_REFERENCE_TSC: 914 case HV_X64_MSR_TIME_REF_COUNT: 915 case HV_X64_MSR_CRASH_CTL: 916 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 917 case HV_X64_MSR_RESET: 918 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 919 case HV_X64_MSR_TSC_EMULATION_CONTROL: 920 case HV_X64_MSR_TSC_EMULATION_STATUS: 921 case HV_X64_MSR_SYNDBG_OPTIONS: 922 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 923 r = true; 924 break; 925 } 926 927 return r; 928 } 929 930 static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 931 u32 index, u64 *pdata) 932 { 933 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 934 size_t size = ARRAY_SIZE(hv->hv_crash_param); 935 936 if (WARN_ON_ONCE(index >= size)) 937 return -EINVAL; 938 939 *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; 940 return 0; 941 } 942 943 static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 944 { 945 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 946 947 *pdata = hv->hv_crash_ctl; 948 return 0; 949 } 950 951 static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 952 { 953 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 954 955 if (host) 956 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 957 958 if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) { 959 960 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 961 hv->hv_crash_param[0], 962 hv->hv_crash_param[1], 963 hv->hv_crash_param[2], 964 hv->hv_crash_param[3], 965 hv->hv_crash_param[4]); 966 967 /* Send notification about crash to user space */ 968 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 969 } 970 971 return 0; 972 } 973 974 static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 975 u32 index, u64 data) 976 { 977 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 978 size_t size = ARRAY_SIZE(hv->hv_crash_param); 979 980 if (WARN_ON_ONCE(index >= size)) 981 return -EINVAL; 982 983 hv->hv_crash_param[array_index_nospec(index, size)] = data; 984 return 0; 985 } 986 987 /* 988 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 989 * between them is possible: 990 * 991 * kvmclock formula: 992 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 993 * + system_time 994 * 995 * Hyper-V formula: 996 * nsec/100 = ticks * scale / 2^64 + offset 997 * 998 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 999 * By dividing the kvmclock formula by 100 and equating what's left we get: 1000 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1001 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 1002 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 1003 * 1004 * Now expand the kvmclock formula and divide by 100: 1005 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 1006 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 1007 * + system_time 1008 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1009 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1010 * + system_time / 100 1011 * 1012 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 1013 * nsec/100 = ticks * scale / 2^64 1014 * - tsc_timestamp * scale / 2^64 1015 * + system_time / 100 1016 * 1017 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 1018 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 1019 * 1020 * These two equivalencies are implemented in this function. 1021 */ 1022 static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 1023 struct ms_hyperv_tsc_page *tsc_ref) 1024 { 1025 u64 max_mul; 1026 1027 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 1028 return false; 1029 1030 /* 1031 * check if scale would overflow, if so we use the time ref counter 1032 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 1033 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 1034 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 1035 */ 1036 max_mul = 100ull << (32 - hv_clock->tsc_shift); 1037 if (hv_clock->tsc_to_system_mul >= max_mul) 1038 return false; 1039 1040 /* 1041 * Otherwise compute the scale and offset according to the formulas 1042 * derived above. 1043 */ 1044 tsc_ref->tsc_scale = 1045 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 1046 hv_clock->tsc_to_system_mul, 1047 100); 1048 1049 tsc_ref->tsc_offset = hv_clock->system_time; 1050 do_div(tsc_ref->tsc_offset, 100); 1051 tsc_ref->tsc_offset -= 1052 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 1053 return true; 1054 } 1055 1056 void kvm_hv_setup_tsc_page(struct kvm *kvm, 1057 struct pvclock_vcpu_time_info *hv_clock) 1058 { 1059 struct kvm_hv *hv = &kvm->arch.hyperv; 1060 u32 tsc_seq; 1061 u64 gfn; 1062 1063 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 1064 BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); 1065 1066 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1067 return; 1068 1069 mutex_lock(&kvm->arch.hyperv.hv_lock); 1070 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1071 goto out_unlock; 1072 1073 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1074 /* 1075 * Because the TSC parameters only vary when there is a 1076 * change in the master clock, do not bother with caching. 1077 */ 1078 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 1079 &tsc_seq, sizeof(tsc_seq)))) 1080 goto out_unlock; 1081 1082 /* 1083 * While we're computing and writing the parameters, force the 1084 * guest to use the time reference count MSR. 1085 */ 1086 hv->tsc_ref.tsc_sequence = 0; 1087 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1088 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1089 goto out_unlock; 1090 1091 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 1092 goto out_unlock; 1093 1094 /* Ensure sequence is zero before writing the rest of the struct. */ 1095 smp_wmb(); 1096 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 1097 goto out_unlock; 1098 1099 /* 1100 * Now switch to the TSC page mechanism by writing the sequence. 1101 */ 1102 tsc_seq++; 1103 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 1104 tsc_seq = 1; 1105 1106 /* Write the struct entirely before the non-zero sequence. */ 1107 smp_wmb(); 1108 1109 hv->tsc_ref.tsc_sequence = tsc_seq; 1110 kvm_write_guest(kvm, gfn_to_gpa(gfn), 1111 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 1112 out_unlock: 1113 mutex_unlock(&kvm->arch.hyperv.hv_lock); 1114 } 1115 1116 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 1117 bool host) 1118 { 1119 struct kvm *kvm = vcpu->kvm; 1120 struct kvm_hv *hv = &kvm->arch.hyperv; 1121 1122 switch (msr) { 1123 case HV_X64_MSR_GUEST_OS_ID: 1124 hv->hv_guest_os_id = data; 1125 /* setting guest os id to zero disables hypercall page */ 1126 if (!hv->hv_guest_os_id) 1127 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1128 break; 1129 case HV_X64_MSR_HYPERCALL: { 1130 u64 gfn; 1131 unsigned long addr; 1132 u8 instructions[4]; 1133 1134 /* if guest os id is not set hypercall should remain disabled */ 1135 if (!hv->hv_guest_os_id) 1136 break; 1137 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1138 hv->hv_hypercall = data; 1139 break; 1140 } 1141 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1142 addr = gfn_to_hva(kvm, gfn); 1143 if (kvm_is_error_hva(addr)) 1144 return 1; 1145 kvm_x86_ops.patch_hypercall(vcpu, instructions); 1146 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1147 if (__copy_to_user((void __user *)addr, instructions, 4)) 1148 return 1; 1149 hv->hv_hypercall = data; 1150 mark_page_dirty(kvm, gfn); 1151 break; 1152 } 1153 case HV_X64_MSR_REFERENCE_TSC: 1154 hv->hv_tsc_page = data; 1155 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1156 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1157 break; 1158 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1159 return kvm_hv_msr_set_crash_data(vcpu, 1160 msr - HV_X64_MSR_CRASH_P0, 1161 data); 1162 case HV_X64_MSR_CRASH_CTL: 1163 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1164 case HV_X64_MSR_RESET: 1165 if (data == 1) { 1166 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1167 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1168 } 1169 break; 1170 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1171 hv->hv_reenlightenment_control = data; 1172 break; 1173 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1174 hv->hv_tsc_emulation_control = data; 1175 break; 1176 case HV_X64_MSR_TSC_EMULATION_STATUS: 1177 hv->hv_tsc_emulation_status = data; 1178 break; 1179 case HV_X64_MSR_TIME_REF_COUNT: 1180 /* read-only, but still ignore it if host-initiated */ 1181 if (!host) 1182 return 1; 1183 break; 1184 case HV_X64_MSR_SYNDBG_OPTIONS: 1185 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 1186 return syndbg_set_msr(vcpu, msr, data, host); 1187 default: 1188 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1189 msr, data); 1190 return 1; 1191 } 1192 return 0; 1193 } 1194 1195 /* Calculate cpu time spent by current task in 100ns units */ 1196 static u64 current_task_runtime_100ns(void) 1197 { 1198 u64 utime, stime; 1199 1200 task_cputime_adjusted(current, &utime, &stime); 1201 1202 return div_u64(utime + stime, 100); 1203 } 1204 1205 static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1206 { 1207 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1208 1209 switch (msr) { 1210 case HV_X64_MSR_VP_INDEX: { 1211 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 1212 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1213 u32 new_vp_index = (u32)data; 1214 1215 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1216 return 1; 1217 1218 if (new_vp_index == hv_vcpu->vp_index) 1219 return 0; 1220 1221 /* 1222 * The VP index is initialized to vcpu_index by 1223 * kvm_hv_vcpu_postcreate so they initially match. Now the 1224 * VP index is changing, adjust num_mismatched_vp_indexes if 1225 * it now matches or no longer matches vcpu_idx. 1226 */ 1227 if (hv_vcpu->vp_index == vcpu_idx) 1228 atomic_inc(&hv->num_mismatched_vp_indexes); 1229 else if (new_vp_index == vcpu_idx) 1230 atomic_dec(&hv->num_mismatched_vp_indexes); 1231 1232 hv_vcpu->vp_index = new_vp_index; 1233 break; 1234 } 1235 case HV_X64_MSR_VP_ASSIST_PAGE: { 1236 u64 gfn; 1237 unsigned long addr; 1238 1239 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1240 hv_vcpu->hv_vapic = data; 1241 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1242 return 1; 1243 break; 1244 } 1245 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1246 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1247 if (kvm_is_error_hva(addr)) 1248 return 1; 1249 1250 /* 1251 * Clear apic_assist portion of struct hv_vp_assist_page 1252 * only, there can be valuable data in the rest which needs 1253 * to be preserved e.g. on migration. 1254 */ 1255 if (__put_user(0, (u32 __user *)addr)) 1256 return 1; 1257 hv_vcpu->hv_vapic = data; 1258 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1259 if (kvm_lapic_enable_pv_eoi(vcpu, 1260 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1261 sizeof(struct hv_vp_assist_page))) 1262 return 1; 1263 break; 1264 } 1265 case HV_X64_MSR_EOI: 1266 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1267 case HV_X64_MSR_ICR: 1268 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1269 case HV_X64_MSR_TPR: 1270 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1271 case HV_X64_MSR_VP_RUNTIME: 1272 if (!host) 1273 return 1; 1274 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1275 break; 1276 case HV_X64_MSR_SCONTROL: 1277 case HV_X64_MSR_SVERSION: 1278 case HV_X64_MSR_SIEFP: 1279 case HV_X64_MSR_SIMP: 1280 case HV_X64_MSR_EOM: 1281 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1282 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1283 case HV_X64_MSR_STIMER0_CONFIG: 1284 case HV_X64_MSR_STIMER1_CONFIG: 1285 case HV_X64_MSR_STIMER2_CONFIG: 1286 case HV_X64_MSR_STIMER3_CONFIG: { 1287 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1288 1289 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1290 data, host); 1291 } 1292 case HV_X64_MSR_STIMER0_COUNT: 1293 case HV_X64_MSR_STIMER1_COUNT: 1294 case HV_X64_MSR_STIMER2_COUNT: 1295 case HV_X64_MSR_STIMER3_COUNT: { 1296 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1297 1298 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1299 data, host); 1300 } 1301 case HV_X64_MSR_TSC_FREQUENCY: 1302 case HV_X64_MSR_APIC_FREQUENCY: 1303 /* read-only, but still ignore it if host-initiated */ 1304 if (!host) 1305 return 1; 1306 break; 1307 default: 1308 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1309 msr, data); 1310 return 1; 1311 } 1312 1313 return 0; 1314 } 1315 1316 static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1317 bool host) 1318 { 1319 u64 data = 0; 1320 struct kvm *kvm = vcpu->kvm; 1321 struct kvm_hv *hv = &kvm->arch.hyperv; 1322 1323 switch (msr) { 1324 case HV_X64_MSR_GUEST_OS_ID: 1325 data = hv->hv_guest_os_id; 1326 break; 1327 case HV_X64_MSR_HYPERCALL: 1328 data = hv->hv_hypercall; 1329 break; 1330 case HV_X64_MSR_TIME_REF_COUNT: 1331 data = get_time_ref_counter(kvm); 1332 break; 1333 case HV_X64_MSR_REFERENCE_TSC: 1334 data = hv->hv_tsc_page; 1335 break; 1336 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1337 return kvm_hv_msr_get_crash_data(vcpu, 1338 msr - HV_X64_MSR_CRASH_P0, 1339 pdata); 1340 case HV_X64_MSR_CRASH_CTL: 1341 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1342 case HV_X64_MSR_RESET: 1343 data = 0; 1344 break; 1345 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1346 data = hv->hv_reenlightenment_control; 1347 break; 1348 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1349 data = hv->hv_tsc_emulation_control; 1350 break; 1351 case HV_X64_MSR_TSC_EMULATION_STATUS: 1352 data = hv->hv_tsc_emulation_status; 1353 break; 1354 case HV_X64_MSR_SYNDBG_OPTIONS: 1355 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 1356 return syndbg_get_msr(vcpu, msr, pdata, host); 1357 default: 1358 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1359 return 1; 1360 } 1361 1362 *pdata = data; 1363 return 0; 1364 } 1365 1366 static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1367 bool host) 1368 { 1369 u64 data = 0; 1370 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1371 1372 switch (msr) { 1373 case HV_X64_MSR_VP_INDEX: 1374 data = hv_vcpu->vp_index; 1375 break; 1376 case HV_X64_MSR_EOI: 1377 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1378 case HV_X64_MSR_ICR: 1379 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1380 case HV_X64_MSR_TPR: 1381 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1382 case HV_X64_MSR_VP_ASSIST_PAGE: 1383 data = hv_vcpu->hv_vapic; 1384 break; 1385 case HV_X64_MSR_VP_RUNTIME: 1386 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1387 break; 1388 case HV_X64_MSR_SCONTROL: 1389 case HV_X64_MSR_SVERSION: 1390 case HV_X64_MSR_SIEFP: 1391 case HV_X64_MSR_SIMP: 1392 case HV_X64_MSR_EOM: 1393 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1394 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1395 case HV_X64_MSR_STIMER0_CONFIG: 1396 case HV_X64_MSR_STIMER1_CONFIG: 1397 case HV_X64_MSR_STIMER2_CONFIG: 1398 case HV_X64_MSR_STIMER3_CONFIG: { 1399 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1400 1401 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1402 pdata); 1403 } 1404 case HV_X64_MSR_STIMER0_COUNT: 1405 case HV_X64_MSR_STIMER1_COUNT: 1406 case HV_X64_MSR_STIMER2_COUNT: 1407 case HV_X64_MSR_STIMER3_COUNT: { 1408 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1409 1410 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1411 pdata); 1412 } 1413 case HV_X64_MSR_TSC_FREQUENCY: 1414 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1415 break; 1416 case HV_X64_MSR_APIC_FREQUENCY: 1417 data = APIC_BUS_FREQUENCY; 1418 break; 1419 default: 1420 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1421 return 1; 1422 } 1423 *pdata = data; 1424 return 0; 1425 } 1426 1427 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1428 { 1429 if (kvm_hv_msr_partition_wide(msr)) { 1430 int r; 1431 1432 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1433 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1434 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1435 return r; 1436 } else 1437 return kvm_hv_set_msr(vcpu, msr, data, host); 1438 } 1439 1440 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1441 { 1442 if (kvm_hv_msr_partition_wide(msr)) { 1443 int r; 1444 1445 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1446 r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host); 1447 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1448 return r; 1449 } else 1450 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1451 } 1452 1453 static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1454 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1455 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1456 { 1457 struct kvm_hv *hv = &kvm->arch.hyperv; 1458 struct kvm_vcpu *vcpu; 1459 int i, bank, sbank = 0; 1460 1461 memset(vp_bitmap, 0, 1462 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1463 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1464 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1465 vp_bitmap[bank] = sparse_banks[sbank++]; 1466 1467 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1468 /* for all vcpus vp_index == vcpu_idx */ 1469 return (unsigned long *)vp_bitmap; 1470 } 1471 1472 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1473 kvm_for_each_vcpu(i, vcpu, kvm) { 1474 if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, 1475 (unsigned long *)vp_bitmap)) 1476 __set_bit(i, vcpu_bitmap); 1477 } 1478 return vcpu_bitmap; 1479 } 1480 1481 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1482 u16 rep_cnt, bool ex) 1483 { 1484 struct kvm *kvm = current_vcpu->kvm; 1485 struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; 1486 struct hv_tlb_flush_ex flush_ex; 1487 struct hv_tlb_flush flush; 1488 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1489 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1490 unsigned long *vcpu_mask; 1491 u64 valid_bank_mask; 1492 u64 sparse_banks[64]; 1493 int sparse_banks_len; 1494 bool all_cpus; 1495 1496 if (!ex) { 1497 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1498 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1499 1500 trace_kvm_hv_flush_tlb(flush.processor_mask, 1501 flush.address_space, flush.flags); 1502 1503 valid_bank_mask = BIT_ULL(0); 1504 sparse_banks[0] = flush.processor_mask; 1505 1506 /* 1507 * Work around possible WS2012 bug: it sends hypercalls 1508 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, 1509 * while also expecting us to flush something and crashing if 1510 * we don't. Let's treat processor_mask == 0 same as 1511 * HV_FLUSH_ALL_PROCESSORS. 1512 */ 1513 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || 1514 flush.processor_mask == 0; 1515 } else { 1516 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1517 sizeof(flush_ex)))) 1518 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1519 1520 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1521 flush_ex.hv_vp_set.format, 1522 flush_ex.address_space, 1523 flush_ex.flags); 1524 1525 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1526 all_cpus = flush_ex.hv_vp_set.format != 1527 HV_GENERIC_SET_SPARSE_4K; 1528 1529 sparse_banks_len = 1530 bitmap_weight((unsigned long *)&valid_bank_mask, 64) * 1531 sizeof(sparse_banks[0]); 1532 1533 if (!sparse_banks_len && !all_cpus) 1534 goto ret_success; 1535 1536 if (!all_cpus && 1537 kvm_read_guest(kvm, 1538 ingpa + offsetof(struct hv_tlb_flush_ex, 1539 hv_vp_set.bank_contents), 1540 sparse_banks, 1541 sparse_banks_len)) 1542 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1543 } 1544 1545 cpumask_clear(&hv_vcpu->tlb_flush); 1546 1547 vcpu_mask = all_cpus ? NULL : 1548 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1549 vp_bitmap, vcpu_bitmap); 1550 1551 /* 1552 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1553 * analyze it here, flush TLB regardless of the specified address space. 1554 */ 1555 kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, 1556 NULL, vcpu_mask, &hv_vcpu->tlb_flush); 1557 1558 ret_success: 1559 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1560 return (u64)HV_STATUS_SUCCESS | 1561 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1562 } 1563 1564 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1565 unsigned long *vcpu_bitmap) 1566 { 1567 struct kvm_lapic_irq irq = { 1568 .delivery_mode = APIC_DM_FIXED, 1569 .vector = vector 1570 }; 1571 struct kvm_vcpu *vcpu; 1572 int i; 1573 1574 kvm_for_each_vcpu(i, vcpu, kvm) { 1575 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1576 continue; 1577 1578 /* We fail only when APIC is disabled */ 1579 kvm_apic_set_irq(vcpu, &irq, NULL); 1580 } 1581 } 1582 1583 static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, 1584 bool ex, bool fast) 1585 { 1586 struct kvm *kvm = current_vcpu->kvm; 1587 struct hv_send_ipi_ex send_ipi_ex; 1588 struct hv_send_ipi send_ipi; 1589 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1590 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1591 unsigned long *vcpu_mask; 1592 unsigned long valid_bank_mask; 1593 u64 sparse_banks[64]; 1594 int sparse_banks_len; 1595 u32 vector; 1596 bool all_cpus; 1597 1598 if (!ex) { 1599 if (!fast) { 1600 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, 1601 sizeof(send_ipi)))) 1602 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1603 sparse_banks[0] = send_ipi.cpu_mask; 1604 vector = send_ipi.vector; 1605 } else { 1606 /* 'reserved' part of hv_send_ipi should be 0 */ 1607 if (unlikely(ingpa >> 32 != 0)) 1608 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1609 sparse_banks[0] = outgpa; 1610 vector = (u32)ingpa; 1611 } 1612 all_cpus = false; 1613 valid_bank_mask = BIT_ULL(0); 1614 1615 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1616 } else { 1617 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, 1618 sizeof(send_ipi_ex)))) 1619 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1620 1621 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1622 send_ipi_ex.vp_set.format, 1623 send_ipi_ex.vp_set.valid_bank_mask); 1624 1625 vector = send_ipi_ex.vector; 1626 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1627 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1628 sizeof(sparse_banks[0]); 1629 1630 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1631 1632 if (!sparse_banks_len) 1633 goto ret_success; 1634 1635 if (!all_cpus && 1636 kvm_read_guest(kvm, 1637 ingpa + offsetof(struct hv_send_ipi_ex, 1638 vp_set.bank_contents), 1639 sparse_banks, 1640 sparse_banks_len)) 1641 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1642 } 1643 1644 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1645 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1646 1647 vcpu_mask = all_cpus ? NULL : 1648 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1649 vp_bitmap, vcpu_bitmap); 1650 1651 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1652 1653 ret_success: 1654 return HV_STATUS_SUCCESS; 1655 } 1656 1657 bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1658 { 1659 return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0; 1660 } 1661 1662 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1663 { 1664 bool longmode; 1665 1666 longmode = is_64_bit_mode(vcpu); 1667 if (longmode) 1668 kvm_rax_write(vcpu, result); 1669 else { 1670 kvm_rdx_write(vcpu, result >> 32); 1671 kvm_rax_write(vcpu, result & 0xffffffff); 1672 } 1673 } 1674 1675 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1676 { 1677 kvm_hv_hypercall_set_result(vcpu, result); 1678 ++vcpu->stat.hypercalls; 1679 return kvm_skip_emulated_instruction(vcpu); 1680 } 1681 1682 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1683 { 1684 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1685 } 1686 1687 static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1688 { 1689 struct eventfd_ctx *eventfd; 1690 1691 if (unlikely(!fast)) { 1692 int ret; 1693 gpa_t gpa = param; 1694 1695 if ((gpa & (__alignof__(param) - 1)) || 1696 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1697 return HV_STATUS_INVALID_ALIGNMENT; 1698 1699 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1700 if (ret < 0) 1701 return HV_STATUS_INVALID_ALIGNMENT; 1702 } 1703 1704 /* 1705 * Per spec, bits 32-47 contain the extra "flag number". However, we 1706 * have no use for it, and in all known usecases it is zero, so just 1707 * report lookup failure if it isn't. 1708 */ 1709 if (param & 0xffff00000000ULL) 1710 return HV_STATUS_INVALID_PORT_ID; 1711 /* remaining bits are reserved-zero */ 1712 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1713 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1714 1715 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1716 rcu_read_lock(); 1717 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1718 rcu_read_unlock(); 1719 if (!eventfd) 1720 return HV_STATUS_INVALID_PORT_ID; 1721 1722 eventfd_signal(eventfd, 1); 1723 return HV_STATUS_SUCCESS; 1724 } 1725 1726 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1727 { 1728 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1729 uint16_t code, rep_idx, rep_cnt; 1730 bool fast, rep; 1731 1732 /* 1733 * hypercall generates UD from non zero cpl and real mode 1734 * per HYPER-V spec 1735 */ 1736 if (kvm_x86_ops.get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1737 kvm_queue_exception(vcpu, UD_VECTOR); 1738 return 1; 1739 } 1740 1741 #ifdef CONFIG_X86_64 1742 if (is_64_bit_mode(vcpu)) { 1743 param = kvm_rcx_read(vcpu); 1744 ingpa = kvm_rdx_read(vcpu); 1745 outgpa = kvm_r8_read(vcpu); 1746 } else 1747 #endif 1748 { 1749 param = ((u64)kvm_rdx_read(vcpu) << 32) | 1750 (kvm_rax_read(vcpu) & 0xffffffff); 1751 ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | 1752 (kvm_rcx_read(vcpu) & 0xffffffff); 1753 outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1754 (kvm_rsi_read(vcpu) & 0xffffffff); 1755 } 1756 1757 code = param & 0xffff; 1758 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1759 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1760 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1761 rep = !!(rep_cnt || rep_idx); 1762 1763 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1764 1765 switch (code) { 1766 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1767 if (unlikely(rep)) { 1768 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1769 break; 1770 } 1771 kvm_vcpu_on_spin(vcpu, true); 1772 break; 1773 case HVCALL_SIGNAL_EVENT: 1774 if (unlikely(rep)) { 1775 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1776 break; 1777 } 1778 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1779 if (ret != HV_STATUS_INVALID_PORT_ID) 1780 break; 1781 /* fall through - maybe userspace knows this conn_id. */ 1782 case HVCALL_POST_MESSAGE: 1783 /* don't bother userspace if it has no way to handle it */ 1784 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1785 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1786 break; 1787 } 1788 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1789 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1790 vcpu->run->hyperv.u.hcall.input = param; 1791 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1792 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1793 vcpu->arch.complete_userspace_io = 1794 kvm_hv_hypercall_complete_userspace; 1795 return 0; 1796 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1797 if (unlikely(fast || !rep_cnt || rep_idx)) { 1798 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1799 break; 1800 } 1801 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1802 break; 1803 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1804 if (unlikely(fast || rep)) { 1805 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1806 break; 1807 } 1808 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1809 break; 1810 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1811 if (unlikely(fast || !rep_cnt || rep_idx)) { 1812 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1813 break; 1814 } 1815 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1816 break; 1817 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1818 if (unlikely(fast || rep)) { 1819 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1820 break; 1821 } 1822 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1823 break; 1824 case HVCALL_SEND_IPI: 1825 if (unlikely(rep)) { 1826 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1827 break; 1828 } 1829 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); 1830 break; 1831 case HVCALL_SEND_IPI_EX: 1832 if (unlikely(fast || rep)) { 1833 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1834 break; 1835 } 1836 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); 1837 break; 1838 case HVCALL_POST_DEBUG_DATA: 1839 case HVCALL_RETRIEVE_DEBUG_DATA: 1840 if (unlikely(fast)) { 1841 ret = HV_STATUS_INVALID_PARAMETER; 1842 break; 1843 } 1844 fallthrough; 1845 case HVCALL_RESET_DEBUG_SESSION: { 1846 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 1847 1848 if (!kvm_hv_is_syndbg_enabled(vcpu)) { 1849 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1850 break; 1851 } 1852 1853 if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) { 1854 ret = HV_STATUS_OPERATION_DENIED; 1855 break; 1856 } 1857 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1858 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1859 vcpu->run->hyperv.u.hcall.input = param; 1860 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1861 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1862 vcpu->arch.complete_userspace_io = 1863 kvm_hv_hypercall_complete_userspace; 1864 return 0; 1865 } 1866 default: 1867 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1868 break; 1869 } 1870 1871 return kvm_hv_hypercall_complete(vcpu, ret); 1872 } 1873 1874 void kvm_hv_init_vm(struct kvm *kvm) 1875 { 1876 mutex_init(&kvm->arch.hyperv.hv_lock); 1877 idr_init(&kvm->arch.hyperv.conn_to_evt); 1878 } 1879 1880 void kvm_hv_destroy_vm(struct kvm *kvm) 1881 { 1882 struct eventfd_ctx *eventfd; 1883 int i; 1884 1885 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1886 eventfd_ctx_put(eventfd); 1887 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1888 } 1889 1890 static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1891 { 1892 struct kvm_hv *hv = &kvm->arch.hyperv; 1893 struct eventfd_ctx *eventfd; 1894 int ret; 1895 1896 eventfd = eventfd_ctx_fdget(fd); 1897 if (IS_ERR(eventfd)) 1898 return PTR_ERR(eventfd); 1899 1900 mutex_lock(&hv->hv_lock); 1901 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1902 GFP_KERNEL_ACCOUNT); 1903 mutex_unlock(&hv->hv_lock); 1904 1905 if (ret >= 0) 1906 return 0; 1907 1908 if (ret == -ENOSPC) 1909 ret = -EEXIST; 1910 eventfd_ctx_put(eventfd); 1911 return ret; 1912 } 1913 1914 static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1915 { 1916 struct kvm_hv *hv = &kvm->arch.hyperv; 1917 struct eventfd_ctx *eventfd; 1918 1919 mutex_lock(&hv->hv_lock); 1920 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1921 mutex_unlock(&hv->hv_lock); 1922 1923 if (!eventfd) 1924 return -ENOENT; 1925 1926 synchronize_srcu(&kvm->srcu); 1927 eventfd_ctx_put(eventfd); 1928 return 0; 1929 } 1930 1931 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1932 { 1933 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1934 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1935 return -EINVAL; 1936 1937 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1938 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1939 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1940 } 1941 1942 int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 1943 struct kvm_cpuid_entry2 __user *entries) 1944 { 1945 uint16_t evmcs_ver = 0; 1946 struct kvm_cpuid_entry2 cpuid_entries[] = { 1947 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 1948 { .function = HYPERV_CPUID_INTERFACE }, 1949 { .function = HYPERV_CPUID_VERSION }, 1950 { .function = HYPERV_CPUID_FEATURES }, 1951 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 1952 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 1953 { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS }, 1954 { .function = HYPERV_CPUID_SYNDBG_INTERFACE }, 1955 { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }, 1956 { .function = HYPERV_CPUID_NESTED_FEATURES }, 1957 }; 1958 int i, nent = ARRAY_SIZE(cpuid_entries); 1959 1960 if (kvm_x86_ops.nested_ops->get_evmcs_version) 1961 evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu); 1962 1963 /* Skip NESTED_FEATURES if eVMCS is not supported */ 1964 if (!evmcs_ver) 1965 --nent; 1966 1967 if (cpuid->nent < nent) 1968 return -E2BIG; 1969 1970 if (cpuid->nent > nent) 1971 cpuid->nent = nent; 1972 1973 for (i = 0; i < nent; i++) { 1974 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 1975 u32 signature[3]; 1976 1977 switch (ent->function) { 1978 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 1979 memcpy(signature, "Linux KVM Hv", 12); 1980 1981 ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES; 1982 ent->ebx = signature[0]; 1983 ent->ecx = signature[1]; 1984 ent->edx = signature[2]; 1985 break; 1986 1987 case HYPERV_CPUID_INTERFACE: 1988 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); 1989 ent->eax = signature[0]; 1990 break; 1991 1992 case HYPERV_CPUID_VERSION: 1993 /* 1994 * We implement some Hyper-V 2016 functions so let's use 1995 * this version. 1996 */ 1997 ent->eax = 0x00003839; 1998 ent->ebx = 0x000A0000; 1999 break; 2000 2001 case HYPERV_CPUID_FEATURES: 2002 ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE; 2003 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 2004 ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE; 2005 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 2006 ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; 2007 ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; 2008 ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; 2009 ent->eax |= HV_X64_MSR_RESET_AVAILABLE; 2010 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 2011 ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; 2012 ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; 2013 2014 ent->ebx |= HV_X64_POST_MESSAGES; 2015 ent->ebx |= HV_X64_SIGNAL_EVENTS; 2016 2017 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 2018 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 2019 2020 ent->ebx |= HV_DEBUGGING; 2021 ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; 2022 ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; 2023 2024 /* 2025 * Direct Synthetic timers only make sense with in-kernel 2026 * LAPIC 2027 */ 2028 if (lapic_in_kernel(vcpu)) 2029 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 2030 2031 break; 2032 2033 case HYPERV_CPUID_ENLIGHTMENT_INFO: 2034 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 2035 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 2036 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 2037 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 2038 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 2039 if (evmcs_ver) 2040 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 2041 if (!cpu_smt_possible()) 2042 ent->eax |= HV_X64_NO_NONARCH_CORESHARING; 2043 /* 2044 * Default number of spinlock retry attempts, matches 2045 * HyperV 2016. 2046 */ 2047 ent->ebx = 0x00000FFF; 2048 2049 break; 2050 2051 case HYPERV_CPUID_IMPLEMENT_LIMITS: 2052 /* Maximum number of virtual processors */ 2053 ent->eax = KVM_MAX_VCPUS; 2054 /* 2055 * Maximum number of logical processors, matches 2056 * HyperV 2016. 2057 */ 2058 ent->ebx = 64; 2059 2060 break; 2061 2062 case HYPERV_CPUID_NESTED_FEATURES: 2063 ent->eax = evmcs_ver; 2064 2065 break; 2066 2067 case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS: 2068 memcpy(signature, "Linux KVM Hv", 12); 2069 2070 ent->eax = 0; 2071 ent->ebx = signature[0]; 2072 ent->ecx = signature[1]; 2073 ent->edx = signature[2]; 2074 break; 2075 2076 case HYPERV_CPUID_SYNDBG_INTERFACE: 2077 memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12); 2078 ent->eax = signature[0]; 2079 break; 2080 2081 case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES: 2082 ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; 2083 break; 2084 2085 default: 2086 break; 2087 } 2088 } 2089 2090 if (copy_to_user(entries, cpuid_entries, 2091 nent * sizeof(struct kvm_cpuid_entry2))) 2092 return -EFAULT; 2093 2094 return 0; 2095 } 2096