xref: /linux/drivers/hv/hv_common.c (revision e7e6902fbd19b25630cf6a258c44cb385f16b1c8)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Architecture neutral utility routines for interacting with
5  * Hyper-V. This file is specifically for code that must be
6  * built-in to the kernel image when CONFIG_HYPERV is set
7  * (vs. being in a module) because it is called from architecture
8  * specific code under arch/.
9  *
10  * Copyright (C) 2021, Microsoft, Inc.
11  *
12  * Author : Michael Kelley <mikelley@microsoft.com>
13  */
14 
15 #include <linux/types.h>
16 #include <linux/acpi.h>
17 #include <linux/export.h>
18 #include <linux/bitfield.h>
19 #include <linux/cpumask.h>
20 #include <linux/sched/task_stack.h>
21 #include <linux/panic_notifier.h>
22 #include <linux/ptrace.h>
23 #include <linux/random.h>
24 #include <linux/efi.h>
25 #include <linux/kdebug.h>
26 #include <linux/kmsg_dump.h>
27 #include <linux/sizes.h>
28 #include <linux/slab.h>
29 #include <linux/dma-map-ops.h>
30 #include <linux/set_memory.h>
31 #include <hyperv/hvhdk.h>
32 #include <asm/mshyperv.h>
33 
34 u64 hv_current_partition_id = HV_PARTITION_ID_SELF;
35 EXPORT_SYMBOL_GPL(hv_current_partition_id);
36 
37 enum hv_partition_type hv_curr_partition_type;
38 EXPORT_SYMBOL_GPL(hv_curr_partition_type);
39 
40 /*
41  * ms_hyperv and hv_nested are defined here with other
42  * Hyper-V specific globals so they are shared across all architectures and are
43  * built only when CONFIG_HYPERV is defined.  But on x86,
44  * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
45  * defined, and it uses these three variables.  So mark them as __weak
46  * here, allowing for an overriding definition in the module containing
47  * ms_hyperv_init_platform().
48  */
49 bool __weak hv_nested;
50 EXPORT_SYMBOL_GPL(hv_nested);
51 
52 struct ms_hyperv_info __weak ms_hyperv;
53 EXPORT_SYMBOL_GPL(ms_hyperv);
54 
55 u32 *hv_vp_index;
56 EXPORT_SYMBOL_GPL(hv_vp_index);
57 
58 u32 hv_max_vp_index;
59 EXPORT_SYMBOL_GPL(hv_max_vp_index);
60 
61 void * __percpu *hyperv_pcpu_input_arg;
62 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
63 
64 void * __percpu *hyperv_pcpu_output_arg;
65 EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
66 
67 static void hv_kmsg_dump_unregister(void);
68 
69 static struct ctl_table_header *hv_ctl_table_hdr;
70 
71 /*
72  * Per-cpu array holding the tail pointer for the SynIC event ring buffer
73  * for each SINT.
74  *
75  * We cannot maintain this in mshv driver because the tail pointer should
76  * persist even if the mshv driver is unloaded.
77  */
78 u8 * __percpu *hv_synic_eventring_tail;
79 EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
80 
81 /*
82  * Hyper-V specific initialization and shutdown code that is
83  * common across all architectures.  Called from architecture
84  * specific initialization functions.
85  */
86 
87 void __init hv_common_free(void)
88 {
89 	unregister_sysctl_table(hv_ctl_table_hdr);
90 	hv_ctl_table_hdr = NULL;
91 
92 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
93 		hv_kmsg_dump_unregister();
94 
95 	kfree(hv_vp_index);
96 	hv_vp_index = NULL;
97 
98 	free_percpu(hyperv_pcpu_output_arg);
99 	hyperv_pcpu_output_arg = NULL;
100 
101 	free_percpu(hyperv_pcpu_input_arg);
102 	hyperv_pcpu_input_arg = NULL;
103 
104 	free_percpu(hv_synic_eventring_tail);
105 	hv_synic_eventring_tail = NULL;
106 }
107 
108 /*
109  * Functions for allocating and freeing memory with size and
110  * alignment HV_HYP_PAGE_SIZE. These functions are needed because
111  * the guest page size may not be the same as the Hyper-V page
112  * size. We depend upon kmalloc() aligning power-of-two size
113  * allocations to the allocation size boundary, so that the
114  * allocated memory appears to Hyper-V as a page of the size
115  * it expects.
116  */
117 
118 void *hv_alloc_hyperv_page(void)
119 {
120 	BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
121 
122 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
123 		return (void *)__get_free_page(GFP_KERNEL);
124 	else
125 		return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
126 }
127 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
128 
129 void *hv_alloc_hyperv_zeroed_page(void)
130 {
131 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
132 		return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
133 	else
134 		return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
135 }
136 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
137 
138 void hv_free_hyperv_page(void *addr)
139 {
140 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
141 		free_page((unsigned long)addr);
142 	else
143 		kfree(addr);
144 }
145 EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
146 
147 static void *hv_panic_page;
148 
149 /*
150  * Boolean to control whether to report panic messages over Hyper-V.
151  *
152  * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
153  */
154 static int sysctl_record_panic_msg = 1;
155 
156 /*
157  * sysctl option to allow the user to control whether kmsg data should be
158  * reported to Hyper-V on panic.
159  */
160 static const struct ctl_table hv_ctl_table[] = {
161 	{
162 		.procname	= "hyperv_record_panic_msg",
163 		.data		= &sysctl_record_panic_msg,
164 		.maxlen		= sizeof(int),
165 		.mode		= 0644,
166 		.proc_handler	= proc_dointvec_minmax,
167 		.extra1		= SYSCTL_ZERO,
168 		.extra2		= SYSCTL_ONE
169 	},
170 };
171 
172 static int hv_die_panic_notify_crash(struct notifier_block *self,
173 				     unsigned long val, void *args);
174 
175 static struct notifier_block hyperv_die_report_block = {
176 	.notifier_call = hv_die_panic_notify_crash,
177 };
178 
179 static struct notifier_block hyperv_panic_report_block = {
180 	.notifier_call = hv_die_panic_notify_crash,
181 };
182 
183 /*
184  * The following callback works both as die and panic notifier; its
185  * goal is to provide panic information to the hypervisor unless the
186  * kmsg dumper is used [see hv_kmsg_dump()], which provides more
187  * information but isn't always available.
188  *
189  * Notice that both the panic/die report notifiers are registered only
190  * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
191  */
192 static int hv_die_panic_notify_crash(struct notifier_block *self,
193 				     unsigned long val, void *args)
194 {
195 	struct pt_regs *regs;
196 	bool is_die;
197 
198 	/* Don't notify Hyper-V unless we have a die oops event or panic. */
199 	if (self == &hyperv_panic_report_block) {
200 		is_die = false;
201 		regs = current_pt_regs();
202 	} else { /* die event */
203 		if (val != DIE_OOPS)
204 			return NOTIFY_DONE;
205 
206 		is_die = true;
207 		regs = ((struct die_args *)args)->regs;
208 	}
209 
210 	/*
211 	 * Hyper-V should be notified only once about a panic/die. If we will
212 	 * be calling hv_kmsg_dump() later with kmsg data, don't do the
213 	 * notification here.
214 	 */
215 	if (!sysctl_record_panic_msg || !hv_panic_page)
216 		hyperv_report_panic(regs, val, is_die);
217 
218 	return NOTIFY_DONE;
219 }
220 
221 /*
222  * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
223  * buffer and call into Hyper-V to transfer the data.
224  */
225 static void hv_kmsg_dump(struct kmsg_dumper *dumper,
226 			 struct kmsg_dump_detail *detail)
227 {
228 	struct kmsg_dump_iter iter;
229 	size_t bytes_written;
230 
231 	/* We are only interested in panics. */
232 	if (detail->reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
233 		return;
234 
235 	/*
236 	 * Write dump contents to the page. No need to synchronize; panic should
237 	 * be single-threaded.
238 	 */
239 	kmsg_dump_rewind(&iter);
240 	kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
241 			     &bytes_written);
242 	if (!bytes_written)
243 		return;
244 	/*
245 	 * P3 to contain the physical address of the panic page & P4 to
246 	 * contain the size of the panic data in that page. Rest of the
247 	 * registers are no-op when the NOTIFY_MSG flag is set.
248 	 */
249 	hv_set_msr(HV_MSR_CRASH_P0, 0);
250 	hv_set_msr(HV_MSR_CRASH_P1, 0);
251 	hv_set_msr(HV_MSR_CRASH_P2, 0);
252 	hv_set_msr(HV_MSR_CRASH_P3, virt_to_phys(hv_panic_page));
253 	hv_set_msr(HV_MSR_CRASH_P4, bytes_written);
254 
255 	/*
256 	 * Let Hyper-V know there is crash data available along with
257 	 * the panic message.
258 	 */
259 	hv_set_msr(HV_MSR_CRASH_CTL,
260 		   (HV_CRASH_CTL_CRASH_NOTIFY |
261 		    HV_CRASH_CTL_CRASH_NOTIFY_MSG));
262 }
263 
264 static struct kmsg_dumper hv_kmsg_dumper = {
265 	.dump = hv_kmsg_dump,
266 };
267 
268 static void hv_kmsg_dump_unregister(void)
269 {
270 	kmsg_dump_unregister(&hv_kmsg_dumper);
271 	unregister_die_notifier(&hyperv_die_report_block);
272 	atomic_notifier_chain_unregister(&panic_notifier_list,
273 					 &hyperv_panic_report_block);
274 
275 	hv_free_hyperv_page(hv_panic_page);
276 	hv_panic_page = NULL;
277 }
278 
279 static void hv_kmsg_dump_register(void)
280 {
281 	int ret;
282 
283 	hv_panic_page = hv_alloc_hyperv_zeroed_page();
284 	if (!hv_panic_page) {
285 		pr_err("Hyper-V: panic message page memory allocation failed\n");
286 		return;
287 	}
288 
289 	ret = kmsg_dump_register(&hv_kmsg_dumper);
290 	if (ret) {
291 		pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
292 		hv_free_hyperv_page(hv_panic_page);
293 		hv_panic_page = NULL;
294 	}
295 }
296 
297 static inline bool hv_output_page_exists(void)
298 {
299 	return hv_root_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE);
300 }
301 
302 void __init hv_get_partition_id(void)
303 {
304 	struct hv_output_get_partition_id *output;
305 	unsigned long flags;
306 	u64 status, pt_id;
307 
308 	local_irq_save(flags);
309 	output = *this_cpu_ptr(hyperv_pcpu_input_arg);
310 	status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output);
311 	pt_id = output->partition_id;
312 	local_irq_restore(flags);
313 
314 	if (hv_result_success(status))
315 		hv_current_partition_id = pt_id;
316 	else
317 		pr_err("Hyper-V: failed to get partition ID: %#x\n",
318 		       hv_result(status));
319 }
320 #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
321 u8 __init get_vtl(void)
322 {
323 	u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS;
324 	struct hv_input_get_vp_registers *input;
325 	struct hv_output_get_vp_registers *output;
326 	unsigned long flags;
327 	u64 ret;
328 
329 	local_irq_save(flags);
330 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
331 	output = *this_cpu_ptr(hyperv_pcpu_output_arg);
332 
333 	memset(input, 0, struct_size(input, names, 1));
334 	input->partition_id = HV_PARTITION_ID_SELF;
335 	input->vp_index = HV_VP_INDEX_SELF;
336 	input->input_vtl.as_uint8 = 0;
337 	input->names[0] = HV_REGISTER_VSM_VP_STATUS;
338 
339 	ret = hv_do_hypercall(control, input, output);
340 	if (hv_result_success(ret)) {
341 		ret = output->values[0].reg8 & HV_VTL_MASK;
342 	} else {
343 		pr_err("Failed to get VTL(error: %lld) exiting...\n", ret);
344 		BUG();
345 	}
346 
347 	local_irq_restore(flags);
348 	return ret;
349 }
350 #endif
351 
352 int __init hv_common_init(void)
353 {
354 	int i;
355 	union hv_hypervisor_version_info version;
356 
357 	/* Get information about the Hyper-V host version */
358 	if (!hv_get_hypervisor_version(&version))
359 		pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
360 			version.major_version, version.minor_version,
361 			version.build_number, version.service_number,
362 			version.service_pack, version.service_branch);
363 
364 	if (hv_is_isolation_supported())
365 		sysctl_record_panic_msg = 0;
366 
367 	/*
368 	 * Hyper-V expects to get crash register data or kmsg when
369 	 * crash enlightment is available and system crashes. Set
370 	 * crash_kexec_post_notifiers to be true to make sure that
371 	 * calling crash enlightment interface before running kdump
372 	 * kernel.
373 	 */
374 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
375 		u64 hyperv_crash_ctl;
376 
377 		crash_kexec_post_notifiers = true;
378 		pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
379 
380 		/*
381 		 * Panic message recording (sysctl_record_panic_msg)
382 		 * is enabled by default in non-isolated guests and
383 		 * disabled by default in isolated guests; the panic
384 		 * message recording won't be available in isolated
385 		 * guests should the following registration fail.
386 		 */
387 		hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
388 		if (!hv_ctl_table_hdr)
389 			pr_err("Hyper-V: sysctl table register error");
390 
391 		/*
392 		 * Register for panic kmsg callback only if the right
393 		 * capability is supported by the hypervisor.
394 		 */
395 		hyperv_crash_ctl = hv_get_msr(HV_MSR_CRASH_CTL);
396 		if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
397 			hv_kmsg_dump_register();
398 
399 		register_die_notifier(&hyperv_die_report_block);
400 		atomic_notifier_chain_register(&panic_notifier_list,
401 					       &hyperv_panic_report_block);
402 	}
403 
404 	/*
405 	 * Allocate the per-CPU state for the hypercall input arg.
406 	 * If this allocation fails, we will not be able to setup
407 	 * (per-CPU) hypercall input page and thus this failure is
408 	 * fatal on Hyper-V.
409 	 */
410 	hyperv_pcpu_input_arg = alloc_percpu(void  *);
411 	BUG_ON(!hyperv_pcpu_input_arg);
412 
413 	/* Allocate the per-CPU state for output arg for root */
414 	if (hv_output_page_exists()) {
415 		hyperv_pcpu_output_arg = alloc_percpu(void *);
416 		BUG_ON(!hyperv_pcpu_output_arg);
417 	}
418 
419 	if (hv_root_partition()) {
420 		hv_synic_eventring_tail = alloc_percpu(u8 *);
421 		BUG_ON(!hv_synic_eventring_tail);
422 	}
423 
424 	hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
425 				    GFP_KERNEL);
426 	if (!hv_vp_index) {
427 		hv_common_free();
428 		return -ENOMEM;
429 	}
430 
431 	for (i = 0; i < nr_cpu_ids; i++)
432 		hv_vp_index[i] = VP_INVAL;
433 
434 	return 0;
435 }
436 
437 void __init ms_hyperv_late_init(void)
438 {
439 	struct acpi_table_header *header;
440 	acpi_status status;
441 	u8 *randomdata;
442 	u32 length, i;
443 
444 	/*
445 	 * Seed the Linux random number generator with entropy provided by
446 	 * the Hyper-V host in ACPI table OEM0.
447 	 */
448 	if (!IS_ENABLED(CONFIG_ACPI))
449 		return;
450 
451 	status = acpi_get_table("OEM0", 0, &header);
452 	if (ACPI_FAILURE(status) || !header)
453 		return;
454 
455 	/*
456 	 * Since the "OEM0" table name is for OEM specific usage, verify
457 	 * that what we're seeing purports to be from Microsoft.
458 	 */
459 	if (strncmp(header->oem_table_id, "MICROSFT", 8))
460 		goto error;
461 
462 	/*
463 	 * Ensure the length is reasonable. Requiring at least 8 bytes and
464 	 * no more than 4K bytes is somewhat arbitrary and just protects
465 	 * against a malformed table. Hyper-V currently provides 64 bytes,
466 	 * but allow for a change in a later version.
467 	 */
468 	if (header->length < sizeof(*header) + 8 ||
469 	    header->length > sizeof(*header) + SZ_4K)
470 		goto error;
471 
472 	length = header->length - sizeof(*header);
473 	randomdata = (u8 *)(header + 1);
474 
475 	pr_debug("Hyper-V: Seeding rng with %d random bytes from ACPI table OEM0\n",
476 			length);
477 
478 	add_bootloader_randomness(randomdata, length);
479 
480 	/*
481 	 * To prevent the seed data from being visible in /sys/firmware/acpi,
482 	 * zero out the random data in the ACPI table and fixup the checksum.
483 	 * The zero'ing is done out of an abundance of caution in avoiding
484 	 * potential security risks to the rng. Similarly, reset the table
485 	 * length to just the header size so that a subsequent kexec doesn't
486 	 * try to use the zero'ed out random data.
487 	 */
488 	for (i = 0; i < length; i++) {
489 		header->checksum += randomdata[i];
490 		randomdata[i] = 0;
491 	}
492 
493 	for (i = 0; i < sizeof(header->length); i++)
494 		header->checksum += ((u8 *)&header->length)[i];
495 	header->length = sizeof(*header);
496 	for (i = 0; i < sizeof(header->length); i++)
497 		header->checksum -= ((u8 *)&header->length)[i];
498 
499 error:
500 	acpi_put_table(header);
501 }
502 
503 /*
504  * Hyper-V specific initialization and die code for
505  * individual CPUs that is common across all architectures.
506  * Called by the CPU hotplug mechanism.
507  */
508 
509 int hv_common_cpu_init(unsigned int cpu)
510 {
511 	void **inputarg, **outputarg;
512 	u8 **synic_eventring_tail;
513 	u64 msr_vp_index;
514 	gfp_t flags;
515 	const int pgcount = hv_output_page_exists() ? 2 : 1;
516 	void *mem;
517 	int ret = 0;
518 
519 	/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
520 	flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
521 
522 	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
523 
524 	/*
525 	 * The per-cpu memory is already allocated if this CPU was previously
526 	 * online and then taken offline
527 	 */
528 	if (!*inputarg) {
529 		mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
530 		if (!mem)
531 			return -ENOMEM;
532 
533 		if (hv_output_page_exists()) {
534 			outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
535 			*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
536 		}
537 
538 		if (!ms_hyperv.paravisor_present &&
539 		    (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
540 			ret = set_memory_decrypted((unsigned long)mem, pgcount);
541 			if (ret) {
542 				/* It may be unsafe to free 'mem' */
543 				return ret;
544 			}
545 
546 			memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE);
547 		}
548 
549 		/*
550 		 * In a fully enlightened TDX/SNP VM with more than 64 VPs, if
551 		 * hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() ->
552 		 * ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to
553 		 * use hyperv_pcpu_input_arg as the hypercall input page, which
554 		 * must be a decrypted page in such a VM, but the page is still
555 		 * encrypted before set_memory_decrypted() returns. Fix this by
556 		 * setting *inputarg after the above set_memory_decrypted(): if
557 		 * hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns
558 		 * HV_STATUS_INVALID_PARAMETER immediately, and the function
559 		 * hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(),
560 		 * which may be slightly slower than the hypercall, but still
561 		 * works correctly in such a VM.
562 		 */
563 		*inputarg = mem;
564 	}
565 
566 	msr_vp_index = hv_get_msr(HV_MSR_VP_INDEX);
567 
568 	hv_vp_index[cpu] = msr_vp_index;
569 
570 	if (msr_vp_index > hv_max_vp_index)
571 		hv_max_vp_index = msr_vp_index;
572 
573 	if (hv_root_partition()) {
574 		synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
575 		*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
576 						sizeof(u8), flags);
577 		/* No need to unwind any of the above on failure here */
578 		if (unlikely(!*synic_eventring_tail))
579 			ret = -ENOMEM;
580 	}
581 
582 	return ret;
583 }
584 
585 int hv_common_cpu_die(unsigned int cpu)
586 {
587 	u8 **synic_eventring_tail;
588 	/*
589 	 * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
590 	 * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
591 	 * may be used by the Hyper-V vPCI driver in reassigning interrupts
592 	 * as part of the offlining process.  The interrupt reassignment
593 	 * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
594 	 * called this function.
595 	 *
596 	 * If a previously offlined CPU is brought back online again, the
597 	 * originally allocated memory is reused in hv_common_cpu_init().
598 	 */
599 
600 	if (hv_root_partition()) {
601 		synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail);
602 		kfree(*synic_eventring_tail);
603 		*synic_eventring_tail = NULL;
604 	}
605 
606 	return 0;
607 }
608 
609 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
610 bool hv_query_ext_cap(u64 cap_query)
611 {
612 	/*
613 	 * The address of the 'hv_extended_cap' variable will be used as an
614 	 * output parameter to the hypercall below and so it should be
615 	 * compatible with 'virt_to_phys'. Which means, it's address should be
616 	 * directly mapped. Use 'static' to keep it compatible; stack variables
617 	 * can be virtually mapped, making them incompatible with
618 	 * 'virt_to_phys'.
619 	 * Hypercall input/output addresses should also be 8-byte aligned.
620 	 */
621 	static u64 hv_extended_cap __aligned(8);
622 	static bool hv_extended_cap_queried;
623 	u64 status;
624 
625 	/*
626 	 * Querying extended capabilities is an extended hypercall. Check if the
627 	 * partition supports extended hypercall, first.
628 	 */
629 	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
630 		return false;
631 
632 	/* Extended capabilities do not change at runtime. */
633 	if (hv_extended_cap_queried)
634 		return hv_extended_cap & cap_query;
635 
636 	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
637 				 &hv_extended_cap);
638 
639 	/*
640 	 * The query extended capabilities hypercall should not fail under
641 	 * any normal circumstances. Avoid repeatedly making the hypercall, on
642 	 * error.
643 	 */
644 	hv_extended_cap_queried = true;
645 	if (!hv_result_success(status)) {
646 		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
647 		       status);
648 		return false;
649 	}
650 
651 	return hv_extended_cap & cap_query;
652 }
653 EXPORT_SYMBOL_GPL(hv_query_ext_cap);
654 
655 void hv_setup_dma_ops(struct device *dev, bool coherent)
656 {
657 	arch_setup_dma_ops(dev, coherent);
658 }
659 EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
660 
661 bool hv_is_hibernation_supported(void)
662 {
663 	return !hv_root_partition() && acpi_sleep_state_supported(ACPI_STATE_S4);
664 }
665 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
666 
667 /*
668  * Default function to read the Hyper-V reference counter, independent
669  * of whether Hyper-V enlightened clocks/timers are being used. But on
670  * architectures where it is used, Hyper-V enlightenment code in
671  * hyperv_timer.c may override this function.
672  */
673 static u64 __hv_read_ref_counter(void)
674 {
675 	return hv_get_msr(HV_MSR_TIME_REF_COUNT);
676 }
677 
678 u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
679 EXPORT_SYMBOL_GPL(hv_read_reference_counter);
680 
681 /* These __weak functions provide default "no-op" behavior and
682  * may be overridden by architecture specific versions. Architectures
683  * for which the default "no-op" behavior is sufficient can leave
684  * them unimplemented and not be cluttered with a bunch of stub
685  * functions in arch-specific code.
686  */
687 
688 bool __weak hv_is_isolation_supported(void)
689 {
690 	return false;
691 }
692 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
693 
694 bool __weak hv_isolation_type_snp(void)
695 {
696 	return false;
697 }
698 EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
699 
700 bool __weak hv_isolation_type_tdx(void)
701 {
702 	return false;
703 }
704 EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
705 
706 void __weak hv_setup_vmbus_handler(void (*handler)(void))
707 {
708 }
709 EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
710 
711 void __weak hv_remove_vmbus_handler(void)
712 {
713 }
714 EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
715 
716 void __weak hv_setup_mshv_handler(void (*handler)(void))
717 {
718 }
719 EXPORT_SYMBOL_GPL(hv_setup_mshv_handler);
720 
721 void __weak hv_setup_kexec_handler(void (*handler)(void))
722 {
723 }
724 EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
725 
726 void __weak hv_remove_kexec_handler(void)
727 {
728 }
729 EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
730 
731 void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
732 {
733 }
734 EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
735 
736 void __weak hv_remove_crash_handler(void)
737 {
738 }
739 EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
740 
741 void __weak hyperv_cleanup(void)
742 {
743 }
744 EXPORT_SYMBOL_GPL(hyperv_cleanup);
745 
746 u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
747 {
748 	return HV_STATUS_INVALID_PARAMETER;
749 }
750 EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
751 
752 u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
753 {
754 	return HV_STATUS_INVALID_PARAMETER;
755 }
756 EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
757 
758 void hv_identify_partition_type(void)
759 {
760 	/* Assume guest role */
761 	hv_curr_partition_type = HV_PARTITION_TYPE_GUEST;
762 	/*
763 	 * Check partition creation and cpu management privileges
764 	 *
765 	 * Hyper-V should never specify running as root and as a Confidential
766 	 * VM. But to protect against a compromised/malicious Hyper-V trying
767 	 * to exploit root behavior to expose Confidential VM memory, ignore
768 	 * the root partition setting if also a Confidential VM.
769 	 */
770 	if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
771 	    (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
772 	    !(ms_hyperv.priv_high & HV_ISOLATION)) {
773 		pr_info("Hyper-V: running as root partition\n");
774 		if (IS_ENABLED(CONFIG_MSHV_ROOT))
775 			hv_curr_partition_type = HV_PARTITION_TYPE_ROOT;
776 		else
777 			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
778 	}
779 }
780 
781 struct hv_status_info {
782 	char *string;
783 	int errno;
784 	u16 code;
785 };
786 
787 /*
788  * Note on the errno mappings:
789  * A failed hypercall is usually only recoverable (or loggable) near
790  * the call site where the HV_STATUS_* code is known. So the errno
791  * it gets converted to is not too useful further up the stack.
792  * Provide a few mappings that could be useful, and revert to -EIO
793  * as a fallback.
794  */
795 static const struct hv_status_info hv_status_infos[] = {
796 #define _STATUS_INFO(status, errno) { #status, (errno), (status) }
797 	_STATUS_INFO(HV_STATUS_SUCCESS,				0),
798 	_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_CODE,		-EINVAL),
799 	_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_INPUT,		-EINVAL),
800 	_STATUS_INFO(HV_STATUS_INVALID_ALIGNMENT,		-EIO),
801 	_STATUS_INFO(HV_STATUS_INVALID_PARAMETER,		-EINVAL),
802 	_STATUS_INFO(HV_STATUS_ACCESS_DENIED,			-EIO),
803 	_STATUS_INFO(HV_STATUS_INVALID_PARTITION_STATE,		-EIO),
804 	_STATUS_INFO(HV_STATUS_OPERATION_DENIED,		-EIO),
805 	_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY,		-EIO),
806 	_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE,	-EIO),
807 	_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY,		-ENOMEM),
808 	_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID,		-EINVAL),
809 	_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX,		-EINVAL),
810 	_STATUS_INFO(HV_STATUS_NOT_FOUND,			-EIO),
811 	_STATUS_INFO(HV_STATUS_INVALID_PORT_ID,			-EINVAL),
812 	_STATUS_INFO(HV_STATUS_INVALID_CONNECTION_ID,		-EINVAL),
813 	_STATUS_INFO(HV_STATUS_INSUFFICIENT_BUFFERS,		-EIO),
814 	_STATUS_INFO(HV_STATUS_NOT_ACKNOWLEDGED,		-EIO),
815 	_STATUS_INFO(HV_STATUS_INVALID_VP_STATE,		-EIO),
816 	_STATUS_INFO(HV_STATUS_NO_RESOURCES,			-EIO),
817 	_STATUS_INFO(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED,	-EIO),
818 	_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX,		-EINVAL),
819 	_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE,		-EINVAL),
820 	_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX,		-EIO),
821 	_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE,		-EIO),
822 	_STATUS_INFO(HV_STATUS_OPERATION_FAILED,		-EIO),
823 	_STATUS_INFO(HV_STATUS_TIME_OUT,			-EIO),
824 	_STATUS_INFO(HV_STATUS_CALL_PENDING,			-EIO),
825 	_STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED,		-EIO),
826 #undef _STATUS_INFO
827 };
828 
829 static inline const struct hv_status_info *find_hv_status_info(u64 hv_status)
830 {
831 	int i;
832 	u16 code = hv_result(hv_status);
833 
834 	for (i = 0; i < ARRAY_SIZE(hv_status_infos); ++i) {
835 		const struct hv_status_info *info = &hv_status_infos[i];
836 
837 		if (info->code == code)
838 			return info;
839 	}
840 
841 	return NULL;
842 }
843 
844 /* Convert a hypercall result into a linux-friendly error code. */
845 int hv_result_to_errno(u64 status)
846 {
847 	const struct hv_status_info *info;
848 
849 	/* hv_do_hypercall() may return U64_MAX, hypercalls aren't possible */
850 	if (unlikely(status == U64_MAX))
851 		return -EOPNOTSUPP;
852 
853 	info = find_hv_status_info(status);
854 	if (info)
855 		return info->errno;
856 
857 	return -EIO;
858 }
859 EXPORT_SYMBOL_GPL(hv_result_to_errno);
860 
861 const char *hv_result_to_string(u64 status)
862 {
863 	const struct hv_status_info *info;
864 
865 	if (unlikely(status == U64_MAX))
866 		return "Hypercall page missing!";
867 
868 	info = find_hv_status_info(status);
869 	if (info)
870 		return info->string;
871 
872 	return "Unknown";
873 }
874 EXPORT_SYMBOL_GPL(hv_result_to_string);
875