1 /* 2 * VMware Detection code. 3 * 4 * Copyright (C) 2008, VMware, Inc. 5 * Author : Alok N Kataria <akataria@vmware.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 15 * NON INFRINGEMENT. See the GNU General Public License for more 16 * details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 */ 23 24 #include <linux/dmi.h> 25 #include <linux/init.h> 26 #include <linux/export.h> 27 #include <linux/clocksource.h> 28 #include <linux/cpu.h> 29 #include <linux/efi.h> 30 #include <linux/reboot.h> 31 #include <linux/static_call.h> 32 #include <linux/sched/cputime.h> 33 #include <asm/div64.h> 34 #include <asm/x86_init.h> 35 #include <asm/hypervisor.h> 36 #include <asm/cpuid/api.h> 37 #include <asm/timer.h> 38 #include <asm/apic.h> 39 #include <asm/vmware.h> 40 #include <asm/svm.h> 41 42 #undef pr_fmt 43 #define pr_fmt(fmt) "vmware: " fmt 44 45 #define CPUID_VMWARE_INFO_LEAF 0x40000000 46 #define CPUID_VMWARE_FEATURES_LEAF 0x40000010 47 48 #define GETVCPU_INFO_LEGACY_X2APIC BIT(3) 49 #define GETVCPU_INFO_VCPU_RESERVED BIT(31) 50 51 #define STEALCLOCK_NOT_AVAILABLE (-1) 52 #define STEALCLOCK_DISABLED 0 53 #define STEALCLOCK_ENABLED 1 54 55 struct vmware_steal_time { 56 union { 57 u64 clock; /* stolen time counter in units of vtsc */ 58 struct { 59 /* only for little-endian */ 60 u32 clock_low; 61 u32 clock_high; 62 }; 63 }; 64 u64 reserved[7]; 65 }; 66 67 static unsigned long vmware_tsc_khz __ro_after_init; 68 static u8 vmware_hypercall_mode __ro_after_init; 69 70 unsigned long vmware_hypercall_slow(unsigned long cmd, 71 unsigned long in1, unsigned long in3, 72 unsigned long in4, unsigned long in5, 73 u32 *out1, u32 *out2, u32 *out3, 74 u32 *out4, u32 *out5) 75 { 76 unsigned long out0, rbx, rcx, rdx, rsi, rdi; 77 78 switch (vmware_hypercall_mode) { 79 case CPUID_VMWARE_FEATURES_ECX_VMCALL: 80 asm_inline volatile ("vmcall" 81 : "=a" (out0), "=b" (rbx), "=c" (rcx), 82 "=d" (rdx), "=S" (rsi), "=D" (rdi) 83 : "a" (VMWARE_HYPERVISOR_MAGIC), 84 "b" (in1), 85 "c" (cmd), 86 "d" (in3), 87 "S" (in4), 88 "D" (in5) 89 : "cc", "memory"); 90 break; 91 case CPUID_VMWARE_FEATURES_ECX_VMMCALL: 92 asm_inline volatile ("vmmcall" 93 : "=a" (out0), "=b" (rbx), "=c" (rcx), 94 "=d" (rdx), "=S" (rsi), "=D" (rdi) 95 : "a" (VMWARE_HYPERVISOR_MAGIC), 96 "b" (in1), 97 "c" (cmd), 98 "d" (in3), 99 "S" (in4), 100 "D" (in5) 101 : "cc", "memory"); 102 break; 103 default: 104 asm_inline volatile ("movw %[port], %%dx; inl (%%dx), %%eax" 105 : "=a" (out0), "=b" (rbx), "=c" (rcx), 106 "=d" (rdx), "=S" (rsi), "=D" (rdi) 107 : [port] "i" (VMWARE_HYPERVISOR_PORT), 108 "a" (VMWARE_HYPERVISOR_MAGIC), 109 "b" (in1), 110 "c" (cmd), 111 "d" (in3), 112 "S" (in4), 113 "D" (in5) 114 : "cc", "memory"); 115 break; 116 } 117 118 if (out1) 119 *out1 = rbx; 120 if (out2) 121 *out2 = rcx; 122 if (out3) 123 *out3 = rdx; 124 if (out4) 125 *out4 = rsi; 126 if (out5) 127 *out5 = rdi; 128 129 return out0; 130 } 131 132 static inline int __vmware_platform(void) 133 { 134 u32 eax, ebx, ecx; 135 136 eax = vmware_hypercall3(VMWARE_CMD_GETVERSION, 0, &ebx, &ecx); 137 return eax != UINT_MAX && ebx == VMWARE_HYPERVISOR_MAGIC; 138 } 139 140 static unsigned long vmware_get_tsc_khz(void) 141 { 142 return vmware_tsc_khz; 143 } 144 145 #ifdef CONFIG_PARAVIRT 146 static struct cyc2ns_data vmware_cyc2ns __ro_after_init; 147 static bool vmw_sched_clock __initdata = true; 148 static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64); 149 static bool has_steal_clock; 150 static bool steal_acc __initdata = true; /* steal time accounting */ 151 152 static __init int setup_vmw_sched_clock(char *s) 153 { 154 vmw_sched_clock = false; 155 return 0; 156 } 157 early_param("no-vmw-sched-clock", setup_vmw_sched_clock); 158 159 static __init int parse_no_stealacc(char *arg) 160 { 161 steal_acc = false; 162 return 0; 163 } 164 early_param("no-steal-acc", parse_no_stealacc); 165 166 static noinstr u64 vmware_sched_clock(void) 167 { 168 unsigned long long ns; 169 170 ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul, 171 vmware_cyc2ns.cyc2ns_shift); 172 ns -= vmware_cyc2ns.cyc2ns_offset; 173 return ns; 174 } 175 176 static void __init vmware_cyc2ns_setup(void) 177 { 178 struct cyc2ns_data *d = &vmware_cyc2ns; 179 unsigned long long tsc_now = rdtsc(); 180 181 clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift, 182 vmware_tsc_khz, NSEC_PER_MSEC, 0); 183 d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, 184 d->cyc2ns_shift); 185 186 pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset); 187 } 188 189 static int vmware_cmd_stealclock(u32 addr_hi, u32 addr_lo) 190 { 191 u32 info; 192 193 return vmware_hypercall5(VMWARE_CMD_STEALCLOCK, 0, 0, addr_hi, addr_lo, 194 &info); 195 } 196 197 static bool stealclock_enable(phys_addr_t pa) 198 { 199 return vmware_cmd_stealclock(upper_32_bits(pa), 200 lower_32_bits(pa)) == STEALCLOCK_ENABLED; 201 } 202 203 static int __stealclock_disable(void) 204 { 205 return vmware_cmd_stealclock(0, 1); 206 } 207 208 static void stealclock_disable(void) 209 { 210 __stealclock_disable(); 211 } 212 213 static bool vmware_is_stealclock_available(void) 214 { 215 return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE; 216 } 217 218 /** 219 * vmware_steal_clock() - read the per-cpu steal clock 220 * @cpu: the cpu number whose steal clock we want to read 221 * 222 * The function reads the steal clock if we are on a 64-bit system, otherwise 223 * reads it in parts, checking that the high part didn't change in the 224 * meantime. 225 * 226 * Return: 227 * The steal clock reading in ns. 228 */ 229 static u64 vmware_steal_clock(int cpu) 230 { 231 struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu); 232 u64 clock; 233 234 if (IS_ENABLED(CONFIG_64BIT)) 235 clock = READ_ONCE(steal->clock); 236 else { 237 u32 initial_high, low, high; 238 239 do { 240 initial_high = READ_ONCE(steal->clock_high); 241 /* Do not reorder initial_high and high readings */ 242 virt_rmb(); 243 low = READ_ONCE(steal->clock_low); 244 /* Keep low reading in between */ 245 virt_rmb(); 246 high = READ_ONCE(steal->clock_high); 247 } while (initial_high != high); 248 249 clock = ((u64)high << 32) | low; 250 } 251 252 return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul, 253 vmware_cyc2ns.cyc2ns_shift); 254 } 255 256 static void vmware_register_steal_time(void) 257 { 258 int cpu = smp_processor_id(); 259 struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu); 260 261 if (!has_steal_clock) 262 return; 263 264 if (!stealclock_enable(slow_virt_to_phys(st))) { 265 has_steal_clock = false; 266 return; 267 } 268 269 pr_info("vmware-stealtime: cpu %d, pa %llx\n", 270 cpu, (unsigned long long) slow_virt_to_phys(st)); 271 } 272 273 static void vmware_disable_steal_time(void) 274 { 275 if (!has_steal_clock) 276 return; 277 278 stealclock_disable(); 279 } 280 281 static void vmware_guest_cpu_init(void) 282 { 283 if (has_steal_clock) 284 vmware_register_steal_time(); 285 } 286 287 static void vmware_pv_guest_cpu_reboot(void *unused) 288 { 289 vmware_disable_steal_time(); 290 } 291 292 static int vmware_pv_reboot_notify(struct notifier_block *nb, 293 unsigned long code, void *unused) 294 { 295 if (code == SYS_RESTART) 296 on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1); 297 return NOTIFY_DONE; 298 } 299 300 static struct notifier_block vmware_pv_reboot_nb = { 301 .notifier_call = vmware_pv_reboot_notify, 302 }; 303 304 #ifdef CONFIG_SMP 305 static void __init vmware_smp_prepare_boot_cpu(void) 306 { 307 vmware_guest_cpu_init(); 308 native_smp_prepare_boot_cpu(); 309 } 310 311 static int vmware_cpu_online(unsigned int cpu) 312 { 313 local_irq_disable(); 314 vmware_guest_cpu_init(); 315 local_irq_enable(); 316 return 0; 317 } 318 319 static int vmware_cpu_down_prepare(unsigned int cpu) 320 { 321 local_irq_disable(); 322 vmware_disable_steal_time(); 323 local_irq_enable(); 324 return 0; 325 } 326 #endif 327 328 static __init int activate_jump_labels(void) 329 { 330 if (has_steal_clock) { 331 static_key_slow_inc(¶virt_steal_enabled); 332 if (steal_acc) 333 static_key_slow_inc(¶virt_steal_rq_enabled); 334 } 335 336 return 0; 337 } 338 arch_initcall(activate_jump_labels); 339 340 static void __init vmware_paravirt_ops_setup(void) 341 { 342 pv_info.name = "VMware hypervisor"; 343 pv_info.io_delay = false; 344 345 if (vmware_tsc_khz == 0) 346 return; 347 348 vmware_cyc2ns_setup(); 349 350 if (vmw_sched_clock) 351 paravirt_set_sched_clock(vmware_sched_clock); 352 353 if (vmware_is_stealclock_available()) { 354 has_steal_clock = true; 355 static_call_update(pv_steal_clock, vmware_steal_clock); 356 357 /* We use reboot notifier only to disable steal clock */ 358 register_reboot_notifier(&vmware_pv_reboot_nb); 359 360 #ifdef CONFIG_SMP 361 smp_ops.smp_prepare_boot_cpu = 362 vmware_smp_prepare_boot_cpu; 363 if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 364 "x86/vmware:online", 365 vmware_cpu_online, 366 vmware_cpu_down_prepare) < 0) 367 pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n"); 368 #else 369 vmware_guest_cpu_init(); 370 #endif 371 } 372 } 373 #else 374 #define vmware_paravirt_ops_setup() do {} while (0) 375 #endif 376 377 /* 378 * VMware hypervisor takes care of exporting a reliable TSC to the guest. 379 * Still, due to timing difference when running on virtual cpus, the TSC can 380 * be marked as unstable in some cases. For example, the TSC sync check at 381 * bootup can fail due to a marginal offset between vcpus' TSCs (though the 382 * TSCs do not drift from each other). Also, the ACPI PM timer clocksource 383 * is not suitable as a watchdog when running on a hypervisor because the 384 * kernel may miss a wrap of the counter if the vcpu is descheduled for a 385 * long time. To skip these checks at runtime we set these capability bits, 386 * so that the kernel could just trust the hypervisor with providing a 387 * reliable virtual TSC that is suitable for timekeeping. 388 */ 389 static void __init vmware_set_capabilities(void) 390 { 391 setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC); 392 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 393 if (vmware_tsc_khz) 394 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 395 if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL) 396 setup_force_cpu_cap(X86_FEATURE_VMCALL); 397 else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL) 398 setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL); 399 } 400 401 static void __init vmware_platform_setup(void) 402 { 403 u32 eax, ebx, ecx; 404 u64 lpj, tsc_khz; 405 406 eax = vmware_hypercall3(VMWARE_CMD_GETHZ, UINT_MAX, &ebx, &ecx); 407 408 if (ebx != UINT_MAX) { 409 lpj = tsc_khz = eax | (((u64)ebx) << 32); 410 do_div(tsc_khz, 1000); 411 WARN_ON(tsc_khz >> 32); 412 pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", 413 (unsigned long) tsc_khz / 1000, 414 (unsigned long) tsc_khz % 1000); 415 416 if (!preset_lpj) { 417 do_div(lpj, HZ); 418 preset_lpj = lpj; 419 } 420 421 vmware_tsc_khz = tsc_khz; 422 x86_platform.calibrate_tsc = vmware_get_tsc_khz; 423 x86_platform.calibrate_cpu = vmware_get_tsc_khz; 424 425 #ifdef CONFIG_X86_LOCAL_APIC 426 /* Skip lapic calibration since we know the bus frequency. */ 427 lapic_timer_period = ecx / HZ; 428 pr_info("Host bus clock speed read from hypervisor : %u Hz\n", 429 ecx); 430 #endif 431 } else { 432 pr_warn("Failed to get TSC freq from the hypervisor\n"); 433 } 434 435 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !efi_enabled(EFI_BOOT)) 436 x86_init.mpparse.find_mptable = mpparse_find_mptable; 437 438 vmware_paravirt_ops_setup(); 439 440 #ifdef CONFIG_X86_IO_APIC 441 no_timer_check = 1; 442 #endif 443 444 vmware_set_capabilities(); 445 } 446 447 static u8 __init vmware_select_hypercall(void) 448 { 449 int eax, ebx, ecx, edx; 450 451 cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx); 452 return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL | 453 CPUID_VMWARE_FEATURES_ECX_VMCALL)); 454 } 455 456 /* 457 * While checking the dmi string information, just checking the product 458 * serial key should be enough, as this will always have a VMware 459 * specific string when running under VMware hypervisor. 460 * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode 461 * intentionally defaults to 0. 462 */ 463 static u32 __init vmware_platform(void) 464 { 465 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { 466 unsigned int eax; 467 unsigned int hyper_vendor_id[3]; 468 469 cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], 470 &hyper_vendor_id[1], &hyper_vendor_id[2]); 471 if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) { 472 if (eax >= CPUID_VMWARE_FEATURES_LEAF) 473 vmware_hypercall_mode = 474 vmware_select_hypercall(); 475 476 pr_info("hypercall mode: 0x%02x\n", 477 (unsigned int) vmware_hypercall_mode); 478 479 return CPUID_VMWARE_INFO_LEAF; 480 } 481 } else if (dmi_available && dmi_name_in_serial("VMware") && 482 __vmware_platform()) 483 return 1; 484 485 return 0; 486 } 487 488 /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ 489 static bool __init vmware_legacy_x2apic_available(void) 490 { 491 u32 eax; 492 493 eax = vmware_hypercall1(VMWARE_CMD_GETVCPU_INFO, 0); 494 return !(eax & GETVCPU_INFO_VCPU_RESERVED) && 495 (eax & GETVCPU_INFO_LEGACY_X2APIC); 496 } 497 498 #ifdef CONFIG_INTEL_TDX_GUEST 499 /* 500 * TDCALL[TDG.VP.VMCALL] uses %rax (arg0) and %rcx (arg2). Therefore, 501 * we remap those registers to %r12 and %r13, respectively. 502 */ 503 unsigned long vmware_tdx_hypercall(unsigned long cmd, 504 unsigned long in1, unsigned long in3, 505 unsigned long in4, unsigned long in5, 506 u32 *out1, u32 *out2, u32 *out3, 507 u32 *out4, u32 *out5) 508 { 509 struct tdx_module_args args = {}; 510 511 if (!hypervisor_is_type(X86_HYPER_VMWARE)) { 512 pr_warn_once("Incorrect usage\n"); 513 return ULONG_MAX; 514 } 515 516 if (cmd & ~VMWARE_CMD_MASK) { 517 pr_warn_once("Out of range command %lx\n", cmd); 518 return ULONG_MAX; 519 } 520 521 args.rbx = in1; 522 args.rdx = in3; 523 args.rsi = in4; 524 args.rdi = in5; 525 args.r10 = VMWARE_TDX_VENDOR_LEAF; 526 args.r11 = VMWARE_TDX_HCALL_FUNC; 527 args.r12 = VMWARE_HYPERVISOR_MAGIC; 528 args.r13 = cmd; 529 /* CPL */ 530 args.r15 = 0; 531 532 __tdx_hypercall(&args); 533 534 if (out1) 535 *out1 = args.rbx; 536 if (out2) 537 *out2 = args.r13; 538 if (out3) 539 *out3 = args.rdx; 540 if (out4) 541 *out4 = args.rsi; 542 if (out5) 543 *out5 = args.rdi; 544 545 return args.r12; 546 } 547 EXPORT_SYMBOL_GPL(vmware_tdx_hypercall); 548 #endif 549 550 #ifdef CONFIG_AMD_MEM_ENCRYPT 551 static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb, 552 struct pt_regs *regs) 553 { 554 /* Copy VMWARE specific Hypercall parameters to the GHCB */ 555 ghcb_set_rip(ghcb, regs->ip); 556 ghcb_set_rbx(ghcb, regs->bx); 557 ghcb_set_rcx(ghcb, regs->cx); 558 ghcb_set_rdx(ghcb, regs->dx); 559 ghcb_set_rsi(ghcb, regs->si); 560 ghcb_set_rdi(ghcb, regs->di); 561 ghcb_set_rbp(ghcb, regs->bp); 562 } 563 564 static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) 565 { 566 if (!(ghcb_rbx_is_valid(ghcb) && 567 ghcb_rcx_is_valid(ghcb) && 568 ghcb_rdx_is_valid(ghcb) && 569 ghcb_rsi_is_valid(ghcb) && 570 ghcb_rdi_is_valid(ghcb) && 571 ghcb_rbp_is_valid(ghcb))) 572 return false; 573 574 regs->bx = ghcb_get_rbx(ghcb); 575 regs->cx = ghcb_get_rcx(ghcb); 576 regs->dx = ghcb_get_rdx(ghcb); 577 regs->si = ghcb_get_rsi(ghcb); 578 regs->di = ghcb_get_rdi(ghcb); 579 regs->bp = ghcb_get_rbp(ghcb); 580 581 return true; 582 } 583 #endif 584 585 const __initconst struct hypervisor_x86 x86_hyper_vmware = { 586 .name = "VMware", 587 .detect = vmware_platform, 588 .type = X86_HYPER_VMWARE, 589 .init.init_platform = vmware_platform_setup, 590 .init.x2apic_available = vmware_legacy_x2apic_available, 591 #ifdef CONFIG_AMD_MEM_ENCRYPT 592 .runtime.sev_es_hcall_prepare = vmware_sev_es_hcall_prepare, 593 .runtime.sev_es_hcall_finish = vmware_sev_es_hcall_finish, 594 #endif 595 }; 596