xref: /linux/drivers/hv/hv_common.c (revision 0526b56cbc3c489642bd6a5fe4b718dea7ef0ee8)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Architecture neutral utility routines for interacting with
5  * Hyper-V. This file is specifically for code that must be
6  * built-in to the kernel image when CONFIG_HYPERV is set
7  * (vs. being in a module) because it is called from architecture
8  * specific code under arch/.
9  *
10  * Copyright (C) 2021, Microsoft, Inc.
11  *
12  * Author : Michael Kelley <mikelley@microsoft.com>
13  */
14 
15 #include <linux/types.h>
16 #include <linux/acpi.h>
17 #include <linux/export.h>
18 #include <linux/bitfield.h>
19 #include <linux/cpumask.h>
20 #include <linux/sched/task_stack.h>
21 #include <linux/panic_notifier.h>
22 #include <linux/ptrace.h>
23 #include <linux/kdebug.h>
24 #include <linux/kmsg_dump.h>
25 #include <linux/slab.h>
26 #include <linux/dma-map-ops.h>
27 #include <asm/hyperv-tlfs.h>
28 #include <asm/mshyperv.h>
29 
30 /*
31  * hv_root_partition, ms_hyperv and hv_nested are defined here with other
32  * Hyper-V specific globals so they are shared across all architectures and are
33  * built only when CONFIG_HYPERV is defined.  But on x86,
34  * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
35  * defined, and it uses these three variables.  So mark them as __weak
36  * here, allowing for an overriding definition in the module containing
37  * ms_hyperv_init_platform().
38  */
39 bool __weak hv_root_partition;
40 EXPORT_SYMBOL_GPL(hv_root_partition);
41 
42 bool __weak hv_nested;
43 EXPORT_SYMBOL_GPL(hv_nested);
44 
45 struct ms_hyperv_info __weak ms_hyperv;
46 EXPORT_SYMBOL_GPL(ms_hyperv);
47 
48 u32 *hv_vp_index;
49 EXPORT_SYMBOL_GPL(hv_vp_index);
50 
51 u32 hv_max_vp_index;
52 EXPORT_SYMBOL_GPL(hv_max_vp_index);
53 
54 void * __percpu *hyperv_pcpu_input_arg;
55 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
56 
57 void * __percpu *hyperv_pcpu_output_arg;
58 EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
59 
60 static void hv_kmsg_dump_unregister(void);
61 
62 static struct ctl_table_header *hv_ctl_table_hdr;
63 
64 /*
65  * Hyper-V specific initialization and shutdown code that is
66  * common across all architectures.  Called from architecture
67  * specific initialization functions.
68  */
69 
70 void __init hv_common_free(void)
71 {
72 	unregister_sysctl_table(hv_ctl_table_hdr);
73 	hv_ctl_table_hdr = NULL;
74 
75 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
76 		hv_kmsg_dump_unregister();
77 
78 	kfree(hv_vp_index);
79 	hv_vp_index = NULL;
80 
81 	free_percpu(hyperv_pcpu_output_arg);
82 	hyperv_pcpu_output_arg = NULL;
83 
84 	free_percpu(hyperv_pcpu_input_arg);
85 	hyperv_pcpu_input_arg = NULL;
86 }
87 
88 /*
89  * Functions for allocating and freeing memory with size and
90  * alignment HV_HYP_PAGE_SIZE. These functions are needed because
91  * the guest page size may not be the same as the Hyper-V page
92  * size. We depend upon kmalloc() aligning power-of-two size
93  * allocations to the allocation size boundary, so that the
94  * allocated memory appears to Hyper-V as a page of the size
95  * it expects.
96  */
97 
98 void *hv_alloc_hyperv_page(void)
99 {
100 	BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
101 
102 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
103 		return (void *)__get_free_page(GFP_KERNEL);
104 	else
105 		return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
106 }
107 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
108 
109 void *hv_alloc_hyperv_zeroed_page(void)
110 {
111 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
112 		return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
113 	else
114 		return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
115 }
116 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
117 
118 void hv_free_hyperv_page(unsigned long addr)
119 {
120 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
121 		free_page(addr);
122 	else
123 		kfree((void *)addr);
124 }
125 EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
126 
127 static void *hv_panic_page;
128 
129 /*
130  * Boolean to control whether to report panic messages over Hyper-V.
131  *
132  * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
133  */
134 static int sysctl_record_panic_msg = 1;
135 
136 /*
137  * sysctl option to allow the user to control whether kmsg data should be
138  * reported to Hyper-V on panic.
139  */
140 static struct ctl_table hv_ctl_table[] = {
141 	{
142 		.procname	= "hyperv_record_panic_msg",
143 		.data		= &sysctl_record_panic_msg,
144 		.maxlen		= sizeof(int),
145 		.mode		= 0644,
146 		.proc_handler	= proc_dointvec_minmax,
147 		.extra1		= SYSCTL_ZERO,
148 		.extra2		= SYSCTL_ONE
149 	},
150 	{}
151 };
152 
153 static int hv_die_panic_notify_crash(struct notifier_block *self,
154 				     unsigned long val, void *args);
155 
156 static struct notifier_block hyperv_die_report_block = {
157 	.notifier_call = hv_die_panic_notify_crash,
158 };
159 
160 static struct notifier_block hyperv_panic_report_block = {
161 	.notifier_call = hv_die_panic_notify_crash,
162 };
163 
164 /*
165  * The following callback works both as die and panic notifier; its
166  * goal is to provide panic information to the hypervisor unless the
167  * kmsg dumper is used [see hv_kmsg_dump()], which provides more
168  * information but isn't always available.
169  *
170  * Notice that both the panic/die report notifiers are registered only
171  * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
172  */
173 static int hv_die_panic_notify_crash(struct notifier_block *self,
174 				     unsigned long val, void *args)
175 {
176 	struct pt_regs *regs;
177 	bool is_die;
178 
179 	/* Don't notify Hyper-V unless we have a die oops event or panic. */
180 	if (self == &hyperv_panic_report_block) {
181 		is_die = false;
182 		regs = current_pt_regs();
183 	} else { /* die event */
184 		if (val != DIE_OOPS)
185 			return NOTIFY_DONE;
186 
187 		is_die = true;
188 		regs = ((struct die_args *)args)->regs;
189 	}
190 
191 	/*
192 	 * Hyper-V should be notified only once about a panic/die. If we will
193 	 * be calling hv_kmsg_dump() later with kmsg data, don't do the
194 	 * notification here.
195 	 */
196 	if (!sysctl_record_panic_msg || !hv_panic_page)
197 		hyperv_report_panic(regs, val, is_die);
198 
199 	return NOTIFY_DONE;
200 }
201 
202 /*
203  * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
204  * buffer and call into Hyper-V to transfer the data.
205  */
206 static void hv_kmsg_dump(struct kmsg_dumper *dumper,
207 			 enum kmsg_dump_reason reason)
208 {
209 	struct kmsg_dump_iter iter;
210 	size_t bytes_written;
211 
212 	/* We are only interested in panics. */
213 	if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
214 		return;
215 
216 	/*
217 	 * Write dump contents to the page. No need to synchronize; panic should
218 	 * be single-threaded.
219 	 */
220 	kmsg_dump_rewind(&iter);
221 	kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
222 			     &bytes_written);
223 	if (!bytes_written)
224 		return;
225 	/*
226 	 * P3 to contain the physical address of the panic page & P4 to
227 	 * contain the size of the panic data in that page. Rest of the
228 	 * registers are no-op when the NOTIFY_MSG flag is set.
229 	 */
230 	hv_set_register(HV_REGISTER_CRASH_P0, 0);
231 	hv_set_register(HV_REGISTER_CRASH_P1, 0);
232 	hv_set_register(HV_REGISTER_CRASH_P2, 0);
233 	hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
234 	hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
235 
236 	/*
237 	 * Let Hyper-V know there is crash data available along with
238 	 * the panic message.
239 	 */
240 	hv_set_register(HV_REGISTER_CRASH_CTL,
241 			(HV_CRASH_CTL_CRASH_NOTIFY |
242 			 HV_CRASH_CTL_CRASH_NOTIFY_MSG));
243 }
244 
245 static struct kmsg_dumper hv_kmsg_dumper = {
246 	.dump = hv_kmsg_dump,
247 };
248 
249 static void hv_kmsg_dump_unregister(void)
250 {
251 	kmsg_dump_unregister(&hv_kmsg_dumper);
252 	unregister_die_notifier(&hyperv_die_report_block);
253 	atomic_notifier_chain_unregister(&panic_notifier_list,
254 					 &hyperv_panic_report_block);
255 
256 	hv_free_hyperv_page((unsigned long)hv_panic_page);
257 	hv_panic_page = NULL;
258 }
259 
260 static void hv_kmsg_dump_register(void)
261 {
262 	int ret;
263 
264 	hv_panic_page = hv_alloc_hyperv_zeroed_page();
265 	if (!hv_panic_page) {
266 		pr_err("Hyper-V: panic message page memory allocation failed\n");
267 		return;
268 	}
269 
270 	ret = kmsg_dump_register(&hv_kmsg_dumper);
271 	if (ret) {
272 		pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
273 		hv_free_hyperv_page((unsigned long)hv_panic_page);
274 		hv_panic_page = NULL;
275 	}
276 }
277 
278 int __init hv_common_init(void)
279 {
280 	int i;
281 
282 	if (hv_is_isolation_supported())
283 		sysctl_record_panic_msg = 0;
284 
285 	/*
286 	 * Hyper-V expects to get crash register data or kmsg when
287 	 * crash enlightment is available and system crashes. Set
288 	 * crash_kexec_post_notifiers to be true to make sure that
289 	 * calling crash enlightment interface before running kdump
290 	 * kernel.
291 	 */
292 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
293 		u64 hyperv_crash_ctl;
294 
295 		crash_kexec_post_notifiers = true;
296 		pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
297 
298 		/*
299 		 * Panic message recording (sysctl_record_panic_msg)
300 		 * is enabled by default in non-isolated guests and
301 		 * disabled by default in isolated guests; the panic
302 		 * message recording won't be available in isolated
303 		 * guests should the following registration fail.
304 		 */
305 		hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
306 		if (!hv_ctl_table_hdr)
307 			pr_err("Hyper-V: sysctl table register error");
308 
309 		/*
310 		 * Register for panic kmsg callback only if the right
311 		 * capability is supported by the hypervisor.
312 		 */
313 		hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
314 		if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
315 			hv_kmsg_dump_register();
316 
317 		register_die_notifier(&hyperv_die_report_block);
318 		atomic_notifier_chain_register(&panic_notifier_list,
319 					       &hyperv_panic_report_block);
320 	}
321 
322 	/*
323 	 * Allocate the per-CPU state for the hypercall input arg.
324 	 * If this allocation fails, we will not be able to setup
325 	 * (per-CPU) hypercall input page and thus this failure is
326 	 * fatal on Hyper-V.
327 	 */
328 	hyperv_pcpu_input_arg = alloc_percpu(void  *);
329 	BUG_ON(!hyperv_pcpu_input_arg);
330 
331 	/* Allocate the per-CPU state for output arg for root */
332 	if (hv_root_partition) {
333 		hyperv_pcpu_output_arg = alloc_percpu(void *);
334 		BUG_ON(!hyperv_pcpu_output_arg);
335 	}
336 
337 	hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
338 				    GFP_KERNEL);
339 	if (!hv_vp_index) {
340 		hv_common_free();
341 		return -ENOMEM;
342 	}
343 
344 	for (i = 0; i < num_possible_cpus(); i++)
345 		hv_vp_index[i] = VP_INVAL;
346 
347 	return 0;
348 }
349 
350 /*
351  * Hyper-V specific initialization and die code for
352  * individual CPUs that is common across all architectures.
353  * Called by the CPU hotplug mechanism.
354  */
355 
356 int hv_common_cpu_init(unsigned int cpu)
357 {
358 	void **inputarg, **outputarg;
359 	u64 msr_vp_index;
360 	gfp_t flags;
361 	int pgcount = hv_root_partition ? 2 : 1;
362 
363 	/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
364 	flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
365 
366 	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
367 	*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
368 	if (!(*inputarg))
369 		return -ENOMEM;
370 
371 	if (hv_root_partition) {
372 		outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
373 		*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
374 	}
375 
376 	msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
377 
378 	hv_vp_index[cpu] = msr_vp_index;
379 
380 	if (msr_vp_index > hv_max_vp_index)
381 		hv_max_vp_index = msr_vp_index;
382 
383 	return 0;
384 }
385 
386 int hv_common_cpu_die(unsigned int cpu)
387 {
388 	unsigned long flags;
389 	void **inputarg, **outputarg;
390 	void *mem;
391 
392 	local_irq_save(flags);
393 
394 	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
395 	mem = *inputarg;
396 	*inputarg = NULL;
397 
398 	if (hv_root_partition) {
399 		outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
400 		*outputarg = NULL;
401 	}
402 
403 	local_irq_restore(flags);
404 
405 	kfree(mem);
406 
407 	return 0;
408 }
409 
410 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
411 bool hv_query_ext_cap(u64 cap_query)
412 {
413 	/*
414 	 * The address of the 'hv_extended_cap' variable will be used as an
415 	 * output parameter to the hypercall below and so it should be
416 	 * compatible with 'virt_to_phys'. Which means, it's address should be
417 	 * directly mapped. Use 'static' to keep it compatible; stack variables
418 	 * can be virtually mapped, making them incompatible with
419 	 * 'virt_to_phys'.
420 	 * Hypercall input/output addresses should also be 8-byte aligned.
421 	 */
422 	static u64 hv_extended_cap __aligned(8);
423 	static bool hv_extended_cap_queried;
424 	u64 status;
425 
426 	/*
427 	 * Querying extended capabilities is an extended hypercall. Check if the
428 	 * partition supports extended hypercall, first.
429 	 */
430 	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
431 		return false;
432 
433 	/* Extended capabilities do not change at runtime. */
434 	if (hv_extended_cap_queried)
435 		return hv_extended_cap & cap_query;
436 
437 	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
438 				 &hv_extended_cap);
439 
440 	/*
441 	 * The query extended capabilities hypercall should not fail under
442 	 * any normal circumstances. Avoid repeatedly making the hypercall, on
443 	 * error.
444 	 */
445 	hv_extended_cap_queried = true;
446 	if (!hv_result_success(status)) {
447 		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
448 		       status);
449 		return false;
450 	}
451 
452 	return hv_extended_cap & cap_query;
453 }
454 EXPORT_SYMBOL_GPL(hv_query_ext_cap);
455 
456 void hv_setup_dma_ops(struct device *dev, bool coherent)
457 {
458 	/*
459 	 * Hyper-V does not offer a vIOMMU in the guest
460 	 * VM, so pass 0/NULL for the IOMMU settings
461 	 */
462 	arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
463 }
464 EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
465 
466 bool hv_is_hibernation_supported(void)
467 {
468 	return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
469 }
470 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
471 
472 /*
473  * Default function to read the Hyper-V reference counter, independent
474  * of whether Hyper-V enlightened clocks/timers are being used. But on
475  * architectures where it is used, Hyper-V enlightenment code in
476  * hyperv_timer.c may override this function.
477  */
478 static u64 __hv_read_ref_counter(void)
479 {
480 	return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
481 }
482 
483 u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
484 EXPORT_SYMBOL_GPL(hv_read_reference_counter);
485 
486 /* These __weak functions provide default "no-op" behavior and
487  * may be overridden by architecture specific versions. Architectures
488  * for which the default "no-op" behavior is sufficient can leave
489  * them unimplemented and not be cluttered with a bunch of stub
490  * functions in arch-specific code.
491  */
492 
493 bool __weak hv_is_isolation_supported(void)
494 {
495 	return false;
496 }
497 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
498 
499 bool __weak hv_isolation_type_snp(void)
500 {
501 	return false;
502 }
503 EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
504 
505 void __weak hv_setup_vmbus_handler(void (*handler)(void))
506 {
507 }
508 EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
509 
510 void __weak hv_remove_vmbus_handler(void)
511 {
512 }
513 EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
514 
515 void __weak hv_setup_kexec_handler(void (*handler)(void))
516 {
517 }
518 EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
519 
520 void __weak hv_remove_kexec_handler(void)
521 {
522 }
523 EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
524 
525 void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
526 {
527 }
528 EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
529 
530 void __weak hv_remove_crash_handler(void)
531 {
532 }
533 EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
534 
535 void __weak hyperv_cleanup(void)
536 {
537 }
538 EXPORT_SYMBOL_GPL(hyperv_cleanup);
539 
540 u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
541 {
542 	return HV_STATUS_INVALID_PARAMETER;
543 }
544 EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
545