1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Architecture neutral utility routines for interacting with 5 * Hyper-V. This file is specifically for code that must be 6 * built-in to the kernel image when CONFIG_HYPERV is set 7 * (vs. being in a module) because it is called from architecture 8 * specific code under arch/. 9 * 10 * Copyright (C) 2021, Microsoft, Inc. 11 * 12 * Author : Michael Kelley <mikelley@microsoft.com> 13 */ 14 15 #include <linux/types.h> 16 #include <linux/acpi.h> 17 #include <linux/export.h> 18 #include <linux/bitfield.h> 19 #include <linux/cpumask.h> 20 #include <linux/sched/task_stack.h> 21 #include <linux/panic_notifier.h> 22 #include <linux/ptrace.h> 23 #include <linux/kdebug.h> 24 #include <linux/kmsg_dump.h> 25 #include <linux/slab.h> 26 #include <linux/dma-map-ops.h> 27 #include <asm/hyperv-tlfs.h> 28 #include <asm/mshyperv.h> 29 30 /* 31 * hv_root_partition, ms_hyperv and hv_nested are defined here with other 32 * Hyper-V specific globals so they are shared across all architectures and are 33 * built only when CONFIG_HYPERV is defined. But on x86, 34 * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not 35 * defined, and it uses these three variables. So mark them as __weak 36 * here, allowing for an overriding definition in the module containing 37 * ms_hyperv_init_platform(). 38 */ 39 bool __weak hv_root_partition; 40 EXPORT_SYMBOL_GPL(hv_root_partition); 41 42 bool __weak hv_nested; 43 EXPORT_SYMBOL_GPL(hv_nested); 44 45 struct ms_hyperv_info __weak ms_hyperv; 46 EXPORT_SYMBOL_GPL(ms_hyperv); 47 48 u32 *hv_vp_index; 49 EXPORT_SYMBOL_GPL(hv_vp_index); 50 51 u32 hv_max_vp_index; 52 EXPORT_SYMBOL_GPL(hv_max_vp_index); 53 54 void * __percpu *hyperv_pcpu_input_arg; 55 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); 56 57 void * __percpu *hyperv_pcpu_output_arg; 58 EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); 59 60 static void hv_kmsg_dump_unregister(void); 61 62 static struct ctl_table_header *hv_ctl_table_hdr; 63 64 /* 65 * Hyper-V specific initialization and shutdown code that is 66 * common across all architectures. Called from architecture 67 * specific initialization functions. 68 */ 69 70 void __init hv_common_free(void) 71 { 72 unregister_sysctl_table(hv_ctl_table_hdr); 73 hv_ctl_table_hdr = NULL; 74 75 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) 76 hv_kmsg_dump_unregister(); 77 78 kfree(hv_vp_index); 79 hv_vp_index = NULL; 80 81 free_percpu(hyperv_pcpu_output_arg); 82 hyperv_pcpu_output_arg = NULL; 83 84 free_percpu(hyperv_pcpu_input_arg); 85 hyperv_pcpu_input_arg = NULL; 86 } 87 88 /* 89 * Functions for allocating and freeing memory with size and 90 * alignment HV_HYP_PAGE_SIZE. These functions are needed because 91 * the guest page size may not be the same as the Hyper-V page 92 * size. We depend upon kmalloc() aligning power-of-two size 93 * allocations to the allocation size boundary, so that the 94 * allocated memory appears to Hyper-V as a page of the size 95 * it expects. 96 */ 97 98 void *hv_alloc_hyperv_page(void) 99 { 100 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); 101 102 if (PAGE_SIZE == HV_HYP_PAGE_SIZE) 103 return (void *)__get_free_page(GFP_KERNEL); 104 else 105 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); 106 } 107 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); 108 109 void *hv_alloc_hyperv_zeroed_page(void) 110 { 111 if (PAGE_SIZE == HV_HYP_PAGE_SIZE) 112 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 113 else 114 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); 115 } 116 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); 117 118 void hv_free_hyperv_page(unsigned long addr) 119 { 120 if (PAGE_SIZE == HV_HYP_PAGE_SIZE) 121 free_page(addr); 122 else 123 kfree((void *)addr); 124 } 125 EXPORT_SYMBOL_GPL(hv_free_hyperv_page); 126 127 static void *hv_panic_page; 128 129 /* 130 * Boolean to control whether to report panic messages over Hyper-V. 131 * 132 * It can be set via /proc/sys/kernel/hyperv_record_panic_msg 133 */ 134 static int sysctl_record_panic_msg = 1; 135 136 /* 137 * sysctl option to allow the user to control whether kmsg data should be 138 * reported to Hyper-V on panic. 139 */ 140 static struct ctl_table hv_ctl_table[] = { 141 { 142 .procname = "hyperv_record_panic_msg", 143 .data = &sysctl_record_panic_msg, 144 .maxlen = sizeof(int), 145 .mode = 0644, 146 .proc_handler = proc_dointvec_minmax, 147 .extra1 = SYSCTL_ZERO, 148 .extra2 = SYSCTL_ONE 149 }, 150 {} 151 }; 152 153 static int hv_die_panic_notify_crash(struct notifier_block *self, 154 unsigned long val, void *args); 155 156 static struct notifier_block hyperv_die_report_block = { 157 .notifier_call = hv_die_panic_notify_crash, 158 }; 159 160 static struct notifier_block hyperv_panic_report_block = { 161 .notifier_call = hv_die_panic_notify_crash, 162 }; 163 164 /* 165 * The following callback works both as die and panic notifier; its 166 * goal is to provide panic information to the hypervisor unless the 167 * kmsg dumper is used [see hv_kmsg_dump()], which provides more 168 * information but isn't always available. 169 * 170 * Notice that both the panic/die report notifiers are registered only 171 * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set. 172 */ 173 static int hv_die_panic_notify_crash(struct notifier_block *self, 174 unsigned long val, void *args) 175 { 176 struct pt_regs *regs; 177 bool is_die; 178 179 /* Don't notify Hyper-V unless we have a die oops event or panic. */ 180 if (self == &hyperv_panic_report_block) { 181 is_die = false; 182 regs = current_pt_regs(); 183 } else { /* die event */ 184 if (val != DIE_OOPS) 185 return NOTIFY_DONE; 186 187 is_die = true; 188 regs = ((struct die_args *)args)->regs; 189 } 190 191 /* 192 * Hyper-V should be notified only once about a panic/die. If we will 193 * be calling hv_kmsg_dump() later with kmsg data, don't do the 194 * notification here. 195 */ 196 if (!sysctl_record_panic_msg || !hv_panic_page) 197 hyperv_report_panic(regs, val, is_die); 198 199 return NOTIFY_DONE; 200 } 201 202 /* 203 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg 204 * buffer and call into Hyper-V to transfer the data. 205 */ 206 static void hv_kmsg_dump(struct kmsg_dumper *dumper, 207 enum kmsg_dump_reason reason) 208 { 209 struct kmsg_dump_iter iter; 210 size_t bytes_written; 211 212 /* We are only interested in panics. */ 213 if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg) 214 return; 215 216 /* 217 * Write dump contents to the page. No need to synchronize; panic should 218 * be single-threaded. 219 */ 220 kmsg_dump_rewind(&iter); 221 kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, 222 &bytes_written); 223 if (!bytes_written) 224 return; 225 /* 226 * P3 to contain the physical address of the panic page & P4 to 227 * contain the size of the panic data in that page. Rest of the 228 * registers are no-op when the NOTIFY_MSG flag is set. 229 */ 230 hv_set_register(HV_REGISTER_CRASH_P0, 0); 231 hv_set_register(HV_REGISTER_CRASH_P1, 0); 232 hv_set_register(HV_REGISTER_CRASH_P2, 0); 233 hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); 234 hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); 235 236 /* 237 * Let Hyper-V know there is crash data available along with 238 * the panic message. 239 */ 240 hv_set_register(HV_REGISTER_CRASH_CTL, 241 (HV_CRASH_CTL_CRASH_NOTIFY | 242 HV_CRASH_CTL_CRASH_NOTIFY_MSG)); 243 } 244 245 static struct kmsg_dumper hv_kmsg_dumper = { 246 .dump = hv_kmsg_dump, 247 }; 248 249 static void hv_kmsg_dump_unregister(void) 250 { 251 kmsg_dump_unregister(&hv_kmsg_dumper); 252 unregister_die_notifier(&hyperv_die_report_block); 253 atomic_notifier_chain_unregister(&panic_notifier_list, 254 &hyperv_panic_report_block); 255 256 hv_free_hyperv_page((unsigned long)hv_panic_page); 257 hv_panic_page = NULL; 258 } 259 260 static void hv_kmsg_dump_register(void) 261 { 262 int ret; 263 264 hv_panic_page = hv_alloc_hyperv_zeroed_page(); 265 if (!hv_panic_page) { 266 pr_err("Hyper-V: panic message page memory allocation failed\n"); 267 return; 268 } 269 270 ret = kmsg_dump_register(&hv_kmsg_dumper); 271 if (ret) { 272 pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); 273 hv_free_hyperv_page((unsigned long)hv_panic_page); 274 hv_panic_page = NULL; 275 } 276 } 277 278 int __init hv_common_init(void) 279 { 280 int i; 281 282 if (hv_is_isolation_supported()) 283 sysctl_record_panic_msg = 0; 284 285 /* 286 * Hyper-V expects to get crash register data or kmsg when 287 * crash enlightment is available and system crashes. Set 288 * crash_kexec_post_notifiers to be true to make sure that 289 * calling crash enlightment interface before running kdump 290 * kernel. 291 */ 292 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 293 u64 hyperv_crash_ctl; 294 295 crash_kexec_post_notifiers = true; 296 pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n"); 297 298 /* 299 * Panic message recording (sysctl_record_panic_msg) 300 * is enabled by default in non-isolated guests and 301 * disabled by default in isolated guests; the panic 302 * message recording won't be available in isolated 303 * guests should the following registration fail. 304 */ 305 hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table); 306 if (!hv_ctl_table_hdr) 307 pr_err("Hyper-V: sysctl table register error"); 308 309 /* 310 * Register for panic kmsg callback only if the right 311 * capability is supported by the hypervisor. 312 */ 313 hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); 314 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) 315 hv_kmsg_dump_register(); 316 317 register_die_notifier(&hyperv_die_report_block); 318 atomic_notifier_chain_register(&panic_notifier_list, 319 &hyperv_panic_report_block); 320 } 321 322 /* 323 * Allocate the per-CPU state for the hypercall input arg. 324 * If this allocation fails, we will not be able to setup 325 * (per-CPU) hypercall input page and thus this failure is 326 * fatal on Hyper-V. 327 */ 328 hyperv_pcpu_input_arg = alloc_percpu(void *); 329 BUG_ON(!hyperv_pcpu_input_arg); 330 331 /* Allocate the per-CPU state for output arg for root */ 332 if (hv_root_partition) { 333 hyperv_pcpu_output_arg = alloc_percpu(void *); 334 BUG_ON(!hyperv_pcpu_output_arg); 335 } 336 337 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), 338 GFP_KERNEL); 339 if (!hv_vp_index) { 340 hv_common_free(); 341 return -ENOMEM; 342 } 343 344 for (i = 0; i < num_possible_cpus(); i++) 345 hv_vp_index[i] = VP_INVAL; 346 347 return 0; 348 } 349 350 /* 351 * Hyper-V specific initialization and die code for 352 * individual CPUs that is common across all architectures. 353 * Called by the CPU hotplug mechanism. 354 */ 355 356 int hv_common_cpu_init(unsigned int cpu) 357 { 358 void **inputarg, **outputarg; 359 u64 msr_vp_index; 360 gfp_t flags; 361 int pgcount = hv_root_partition ? 2 : 1; 362 363 /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ 364 flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; 365 366 inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 367 *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); 368 if (!(*inputarg)) 369 return -ENOMEM; 370 371 if (hv_root_partition) { 372 outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); 373 *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; 374 } 375 376 msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); 377 378 hv_vp_index[cpu] = msr_vp_index; 379 380 if (msr_vp_index > hv_max_vp_index) 381 hv_max_vp_index = msr_vp_index; 382 383 return 0; 384 } 385 386 int hv_common_cpu_die(unsigned int cpu) 387 { 388 unsigned long flags; 389 void **inputarg, **outputarg; 390 void *mem; 391 392 local_irq_save(flags); 393 394 inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 395 mem = *inputarg; 396 *inputarg = NULL; 397 398 if (hv_root_partition) { 399 outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); 400 *outputarg = NULL; 401 } 402 403 local_irq_restore(flags); 404 405 kfree(mem); 406 407 return 0; 408 } 409 410 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ 411 bool hv_query_ext_cap(u64 cap_query) 412 { 413 /* 414 * The address of the 'hv_extended_cap' variable will be used as an 415 * output parameter to the hypercall below and so it should be 416 * compatible with 'virt_to_phys'. Which means, it's address should be 417 * directly mapped. Use 'static' to keep it compatible; stack variables 418 * can be virtually mapped, making them incompatible with 419 * 'virt_to_phys'. 420 * Hypercall input/output addresses should also be 8-byte aligned. 421 */ 422 static u64 hv_extended_cap __aligned(8); 423 static bool hv_extended_cap_queried; 424 u64 status; 425 426 /* 427 * Querying extended capabilities is an extended hypercall. Check if the 428 * partition supports extended hypercall, first. 429 */ 430 if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) 431 return false; 432 433 /* Extended capabilities do not change at runtime. */ 434 if (hv_extended_cap_queried) 435 return hv_extended_cap & cap_query; 436 437 status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL, 438 &hv_extended_cap); 439 440 /* 441 * The query extended capabilities hypercall should not fail under 442 * any normal circumstances. Avoid repeatedly making the hypercall, on 443 * error. 444 */ 445 hv_extended_cap_queried = true; 446 if (!hv_result_success(status)) { 447 pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n", 448 status); 449 return false; 450 } 451 452 return hv_extended_cap & cap_query; 453 } 454 EXPORT_SYMBOL_GPL(hv_query_ext_cap); 455 456 void hv_setup_dma_ops(struct device *dev, bool coherent) 457 { 458 /* 459 * Hyper-V does not offer a vIOMMU in the guest 460 * VM, so pass 0/NULL for the IOMMU settings 461 */ 462 arch_setup_dma_ops(dev, 0, 0, NULL, coherent); 463 } 464 EXPORT_SYMBOL_GPL(hv_setup_dma_ops); 465 466 bool hv_is_hibernation_supported(void) 467 { 468 return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); 469 } 470 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); 471 472 /* 473 * Default function to read the Hyper-V reference counter, independent 474 * of whether Hyper-V enlightened clocks/timers are being used. But on 475 * architectures where it is used, Hyper-V enlightenment code in 476 * hyperv_timer.c may override this function. 477 */ 478 static u64 __hv_read_ref_counter(void) 479 { 480 return hv_get_register(HV_REGISTER_TIME_REF_COUNT); 481 } 482 483 u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter; 484 EXPORT_SYMBOL_GPL(hv_read_reference_counter); 485 486 /* These __weak functions provide default "no-op" behavior and 487 * may be overridden by architecture specific versions. Architectures 488 * for which the default "no-op" behavior is sufficient can leave 489 * them unimplemented and not be cluttered with a bunch of stub 490 * functions in arch-specific code. 491 */ 492 493 bool __weak hv_is_isolation_supported(void) 494 { 495 return false; 496 } 497 EXPORT_SYMBOL_GPL(hv_is_isolation_supported); 498 499 bool __weak hv_isolation_type_snp(void) 500 { 501 return false; 502 } 503 EXPORT_SYMBOL_GPL(hv_isolation_type_snp); 504 505 void __weak hv_setup_vmbus_handler(void (*handler)(void)) 506 { 507 } 508 EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); 509 510 void __weak hv_remove_vmbus_handler(void) 511 { 512 } 513 EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); 514 515 void __weak hv_setup_kexec_handler(void (*handler)(void)) 516 { 517 } 518 EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); 519 520 void __weak hv_remove_kexec_handler(void) 521 { 522 } 523 EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); 524 525 void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) 526 { 527 } 528 EXPORT_SYMBOL_GPL(hv_setup_crash_handler); 529 530 void __weak hv_remove_crash_handler(void) 531 { 532 } 533 EXPORT_SYMBOL_GPL(hv_remove_crash_handler); 534 535 void __weak hyperv_cleanup(void) 536 { 537 } 538 EXPORT_SYMBOL_GPL(hyperv_cleanup); 539 540 u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) 541 { 542 return HV_STATUS_INVALID_PARAMETER; 543 } 544 EXPORT_SYMBOL_GPL(hv_ghcb_hypercall); 545