1 /* 2 * VMware Detection code. 3 * 4 * Copyright (C) 2008, VMware, Inc. 5 * Author : Alok N Kataria <akataria@vmware.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 15 * NON INFRINGEMENT. See the GNU General Public License for more 16 * details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 */ 23 24 #include <linux/dmi.h> 25 #include <linux/init.h> 26 #include <linux/export.h> 27 #include <linux/clocksource.h> 28 #include <linux/cpu.h> 29 #include <linux/reboot.h> 30 #include <asm/div64.h> 31 #include <asm/x86_init.h> 32 #include <asm/hypervisor.h> 33 #include <asm/timer.h> 34 #include <asm/apic.h> 35 #include <asm/vmware.h> 36 37 #undef pr_fmt 38 #define pr_fmt(fmt) "vmware: " fmt 39 40 #define CPUID_VMWARE_INFO_LEAF 0x40000000 41 #define CPUID_VMWARE_FEATURES_LEAF 0x40000010 42 #define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) 43 #define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) 44 45 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 46 47 #define VMWARE_CMD_GETVERSION 10 48 #define VMWARE_CMD_GETHZ 45 49 #define VMWARE_CMD_GETVCPU_INFO 68 50 #define VMWARE_CMD_LEGACY_X2APIC 3 51 #define VMWARE_CMD_VCPU_RESERVED 31 52 #define VMWARE_CMD_STEALCLOCK 91 53 54 #define STEALCLOCK_NOT_AVAILABLE (-1) 55 #define STEALCLOCK_DISABLED 0 56 #define STEALCLOCK_ENABLED 1 57 58 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ 59 __asm__("inl (%%dx), %%eax" : \ 60 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ 61 "a"(VMWARE_HYPERVISOR_MAGIC), \ 62 "c"(VMWARE_CMD_##cmd), \ 63 "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \ 64 "memory") 65 66 #define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \ 67 __asm__("vmcall" : \ 68 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ 69 "a"(VMWARE_HYPERVISOR_MAGIC), \ 70 "c"(VMWARE_CMD_##cmd), \ 71 "d"(0), "b"(UINT_MAX) : \ 72 "memory") 73 74 #define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \ 75 __asm__("vmmcall" : \ 76 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ 77 "a"(VMWARE_HYPERVISOR_MAGIC), \ 78 "c"(VMWARE_CMD_##cmd), \ 79 "d"(0), "b"(UINT_MAX) : \ 80 "memory") 81 82 #define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \ 83 switch (vmware_hypercall_mode) { \ 84 case CPUID_VMWARE_FEATURES_ECX_VMCALL: \ 85 VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \ 86 break; \ 87 case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \ 88 VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \ 89 break; \ 90 default: \ 91 VMWARE_PORT(cmd, eax, ebx, ecx, edx); \ 92 break; \ 93 } \ 94 } while (0) 95 96 struct vmware_steal_time { 97 union { 98 uint64_t clock; /* stolen time counter in units of vtsc */ 99 struct { 100 /* only for little-endian */ 101 uint32_t clock_low; 102 uint32_t clock_high; 103 }; 104 }; 105 uint64_t reserved[7]; 106 }; 107 108 static unsigned long vmware_tsc_khz __ro_after_init; 109 static u8 vmware_hypercall_mode __ro_after_init; 110 111 static inline int __vmware_platform(void) 112 { 113 uint32_t eax, ebx, ecx, edx; 114 VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx); 115 return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; 116 } 117 118 static unsigned long vmware_get_tsc_khz(void) 119 { 120 return vmware_tsc_khz; 121 } 122 123 #ifdef CONFIG_PARAVIRT 124 static struct cyc2ns_data vmware_cyc2ns __ro_after_init; 125 static bool vmw_sched_clock __initdata = true; 126 static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64); 127 static bool has_steal_clock; 128 static bool steal_acc __initdata = true; /* steal time accounting */ 129 130 static __init int setup_vmw_sched_clock(char *s) 131 { 132 vmw_sched_clock = false; 133 return 0; 134 } 135 early_param("no-vmw-sched-clock", setup_vmw_sched_clock); 136 137 static __init int parse_no_stealacc(char *arg) 138 { 139 steal_acc = false; 140 return 0; 141 } 142 early_param("no-steal-acc", parse_no_stealacc); 143 144 static unsigned long long notrace vmware_sched_clock(void) 145 { 146 unsigned long long ns; 147 148 ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul, 149 vmware_cyc2ns.cyc2ns_shift); 150 ns -= vmware_cyc2ns.cyc2ns_offset; 151 return ns; 152 } 153 154 static void __init vmware_cyc2ns_setup(void) 155 { 156 struct cyc2ns_data *d = &vmware_cyc2ns; 157 unsigned long long tsc_now = rdtsc(); 158 159 clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift, 160 vmware_tsc_khz, NSEC_PER_MSEC, 0); 161 d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, 162 d->cyc2ns_shift); 163 164 pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset); 165 } 166 167 static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2) 168 { 169 uint32_t result, info; 170 171 asm volatile (VMWARE_HYPERCALL : 172 "=a"(result), 173 "=c"(info) : 174 "a"(VMWARE_HYPERVISOR_MAGIC), 175 "b"(0), 176 "c"(VMWARE_CMD_STEALCLOCK), 177 "d"(0), 178 "S"(arg1), 179 "D"(arg2) : 180 "memory"); 181 return result; 182 } 183 184 static bool stealclock_enable(phys_addr_t pa) 185 { 186 return vmware_cmd_stealclock(upper_32_bits(pa), 187 lower_32_bits(pa)) == STEALCLOCK_ENABLED; 188 } 189 190 static int __stealclock_disable(void) 191 { 192 return vmware_cmd_stealclock(0, 1); 193 } 194 195 static void stealclock_disable(void) 196 { 197 __stealclock_disable(); 198 } 199 200 static bool vmware_is_stealclock_available(void) 201 { 202 return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE; 203 } 204 205 /** 206 * vmware_steal_clock() - read the per-cpu steal clock 207 * @cpu: the cpu number whose steal clock we want to read 208 * 209 * The function reads the steal clock if we are on a 64-bit system, otherwise 210 * reads it in parts, checking that the high part didn't change in the 211 * meantime. 212 * 213 * Return: 214 * The steal clock reading in ns. 215 */ 216 static uint64_t vmware_steal_clock(int cpu) 217 { 218 struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu); 219 uint64_t clock; 220 221 if (IS_ENABLED(CONFIG_64BIT)) 222 clock = READ_ONCE(steal->clock); 223 else { 224 uint32_t initial_high, low, high; 225 226 do { 227 initial_high = READ_ONCE(steal->clock_high); 228 /* Do not reorder initial_high and high readings */ 229 virt_rmb(); 230 low = READ_ONCE(steal->clock_low); 231 /* Keep low reading in between */ 232 virt_rmb(); 233 high = READ_ONCE(steal->clock_high); 234 } while (initial_high != high); 235 236 clock = ((uint64_t)high << 32) | low; 237 } 238 239 return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul, 240 vmware_cyc2ns.cyc2ns_shift); 241 } 242 243 static void vmware_register_steal_time(void) 244 { 245 int cpu = smp_processor_id(); 246 struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu); 247 248 if (!has_steal_clock) 249 return; 250 251 if (!stealclock_enable(slow_virt_to_phys(st))) { 252 has_steal_clock = false; 253 return; 254 } 255 256 pr_info("vmware-stealtime: cpu %d, pa %llx\n", 257 cpu, (unsigned long long) slow_virt_to_phys(st)); 258 } 259 260 static void vmware_disable_steal_time(void) 261 { 262 if (!has_steal_clock) 263 return; 264 265 stealclock_disable(); 266 } 267 268 static void vmware_guest_cpu_init(void) 269 { 270 if (has_steal_clock) 271 vmware_register_steal_time(); 272 } 273 274 static void vmware_pv_guest_cpu_reboot(void *unused) 275 { 276 vmware_disable_steal_time(); 277 } 278 279 static int vmware_pv_reboot_notify(struct notifier_block *nb, 280 unsigned long code, void *unused) 281 { 282 if (code == SYS_RESTART) 283 on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1); 284 return NOTIFY_DONE; 285 } 286 287 static struct notifier_block vmware_pv_reboot_nb = { 288 .notifier_call = vmware_pv_reboot_notify, 289 }; 290 291 #ifdef CONFIG_SMP 292 static void __init vmware_smp_prepare_boot_cpu(void) 293 { 294 vmware_guest_cpu_init(); 295 native_smp_prepare_boot_cpu(); 296 } 297 298 static int vmware_cpu_online(unsigned int cpu) 299 { 300 local_irq_disable(); 301 vmware_guest_cpu_init(); 302 local_irq_enable(); 303 return 0; 304 } 305 306 static int vmware_cpu_down_prepare(unsigned int cpu) 307 { 308 local_irq_disable(); 309 vmware_disable_steal_time(); 310 local_irq_enable(); 311 return 0; 312 } 313 #endif 314 315 static __init int activate_jump_labels(void) 316 { 317 if (has_steal_clock) { 318 static_key_slow_inc(¶virt_steal_enabled); 319 if (steal_acc) 320 static_key_slow_inc(¶virt_steal_rq_enabled); 321 } 322 323 return 0; 324 } 325 arch_initcall(activate_jump_labels); 326 327 static void __init vmware_paravirt_ops_setup(void) 328 { 329 pv_info.name = "VMware hypervisor"; 330 pv_ops.cpu.io_delay = paravirt_nop; 331 332 if (vmware_tsc_khz == 0) 333 return; 334 335 vmware_cyc2ns_setup(); 336 337 if (vmw_sched_clock) 338 pv_ops.time.sched_clock = vmware_sched_clock; 339 340 if (vmware_is_stealclock_available()) { 341 has_steal_clock = true; 342 pv_ops.time.steal_clock = vmware_steal_clock; 343 344 /* We use reboot notifier only to disable steal clock */ 345 register_reboot_notifier(&vmware_pv_reboot_nb); 346 347 #ifdef CONFIG_SMP 348 smp_ops.smp_prepare_boot_cpu = 349 vmware_smp_prepare_boot_cpu; 350 if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 351 "x86/vmware:online", 352 vmware_cpu_online, 353 vmware_cpu_down_prepare) < 0) 354 pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n"); 355 #else 356 vmware_guest_cpu_init(); 357 #endif 358 } 359 } 360 #else 361 #define vmware_paravirt_ops_setup() do {} while (0) 362 #endif 363 364 /* 365 * VMware hypervisor takes care of exporting a reliable TSC to the guest. 366 * Still, due to timing difference when running on virtual cpus, the TSC can 367 * be marked as unstable in some cases. For example, the TSC sync check at 368 * bootup can fail due to a marginal offset between vcpus' TSCs (though the 369 * TSCs do not drift from each other). Also, the ACPI PM timer clocksource 370 * is not suitable as a watchdog when running on a hypervisor because the 371 * kernel may miss a wrap of the counter if the vcpu is descheduled for a 372 * long time. To skip these checks at runtime we set these capability bits, 373 * so that the kernel could just trust the hypervisor with providing a 374 * reliable virtual TSC that is suitable for timekeeping. 375 */ 376 static void __init vmware_set_capabilities(void) 377 { 378 setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC); 379 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 380 if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL) 381 setup_force_cpu_cap(X86_FEATURE_VMCALL); 382 else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL) 383 setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL); 384 } 385 386 static void __init vmware_platform_setup(void) 387 { 388 uint32_t eax, ebx, ecx, edx; 389 uint64_t lpj, tsc_khz; 390 391 VMWARE_CMD(GETHZ, eax, ebx, ecx, edx); 392 393 if (ebx != UINT_MAX) { 394 lpj = tsc_khz = eax | (((uint64_t)ebx) << 32); 395 do_div(tsc_khz, 1000); 396 WARN_ON(tsc_khz >> 32); 397 pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", 398 (unsigned long) tsc_khz / 1000, 399 (unsigned long) tsc_khz % 1000); 400 401 if (!preset_lpj) { 402 do_div(lpj, HZ); 403 preset_lpj = lpj; 404 } 405 406 vmware_tsc_khz = tsc_khz; 407 x86_platform.calibrate_tsc = vmware_get_tsc_khz; 408 x86_platform.calibrate_cpu = vmware_get_tsc_khz; 409 410 #ifdef CONFIG_X86_LOCAL_APIC 411 /* Skip lapic calibration since we know the bus frequency. */ 412 lapic_timer_period = ecx / HZ; 413 pr_info("Host bus clock speed read from hypervisor : %u Hz\n", 414 ecx); 415 #endif 416 } else { 417 pr_warn("Failed to get TSC freq from the hypervisor\n"); 418 } 419 420 vmware_paravirt_ops_setup(); 421 422 #ifdef CONFIG_X86_IO_APIC 423 no_timer_check = 1; 424 #endif 425 426 vmware_set_capabilities(); 427 } 428 429 static u8 __init vmware_select_hypercall(void) 430 { 431 int eax, ebx, ecx, edx; 432 433 cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx); 434 return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL | 435 CPUID_VMWARE_FEATURES_ECX_VMCALL)); 436 } 437 438 /* 439 * While checking the dmi string information, just checking the product 440 * serial key should be enough, as this will always have a VMware 441 * specific string when running under VMware hypervisor. 442 * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode 443 * intentionally defaults to 0. 444 */ 445 static uint32_t __init vmware_platform(void) 446 { 447 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { 448 unsigned int eax; 449 unsigned int hyper_vendor_id[3]; 450 451 cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], 452 &hyper_vendor_id[1], &hyper_vendor_id[2]); 453 if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) { 454 if (eax >= CPUID_VMWARE_FEATURES_LEAF) 455 vmware_hypercall_mode = 456 vmware_select_hypercall(); 457 458 pr_info("hypercall mode: 0x%02x\n", 459 (unsigned int) vmware_hypercall_mode); 460 461 return CPUID_VMWARE_INFO_LEAF; 462 } 463 } else if (dmi_available && dmi_name_in_serial("VMware") && 464 __vmware_platform()) 465 return 1; 466 467 return 0; 468 } 469 470 /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ 471 static bool __init vmware_legacy_x2apic_available(void) 472 { 473 uint32_t eax, ebx, ecx, edx; 474 VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx); 475 return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 && 476 (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0; 477 } 478 479 const __initconst struct hypervisor_x86 x86_hyper_vmware = { 480 .name = "VMware", 481 .detect = vmware_platform, 482 .type = X86_HYPER_VMWARE, 483 .init.init_platform = vmware_platform_setup, 484 .init.x2apic_available = vmware_legacy_x2apic_available, 485 }; 486