xref: /freebsd/sys/x86/xen/hvm.c (revision aa1a8ff2d6dbc51ef058f46f3db5a8bb77967145)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008, 2013 Citrix Systems, Inc.
5  * Copyright (c) 2012 Spectra Logic Corporation
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/malloc.h>
35 #include <sys/proc.h>
36 #include <sys/smp.h>
37 #include <sys/systm.h>
38 
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 #include <vm/vm_param.h>
42 
43 #include <dev/pci/pcivar.h>
44 
45 #include <machine/_inttypes.h>
46 #include <machine/cpufunc.h>
47 #include <machine/cpu.h>
48 #include <machine/md_var.h>
49 #include <machine/metadata.h>
50 #include <machine/smp.h>
51 
52 #include <x86/apicreg.h>
53 
54 #include <xen/xen-os.h>
55 #include <xen/error.h>
56 #include <xen/features.h>
57 #include <xen/gnttab.h>
58 #include <xen/hypervisor.h>
59 #include <xen/hvm.h>
60 #include <xen/xen_intr.h>
61 
62 #include <contrib/xen/arch-x86/cpuid.h>
63 #include <contrib/xen/hvm/params.h>
64 #include <contrib/xen/vcpu.h>
65 
66 /*--------------------------- Forward Declarations ---------------------------*/
67 static void xen_hvm_cpu_init(void);
68 
69 /*-------------------------------- Global Data -------------------------------*/
70 #ifdef SMP
71 struct cpu_ops xen_hvm_cpu_ops = {
72 	.cpu_init	= xen_hvm_cpu_init,
73 	.cpu_resume	= xen_hvm_cpu_init
74 };
75 #endif
76 
77 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
78 
79 /**
80  * If non-zero, the hypervisor has been configured to use a direct
81  * IDT event callback for interrupt injection.
82  */
83 int xen_vector_callback_enabled;
84 
85 /**
86  * Signal whether the vector injected for the event channel upcall requires to
87  * be EOI'ed on the local APIC.
88  */
89 bool xen_evtchn_needs_ack;
90 
91 /*------------------------------- Per-CPU Data -------------------------------*/
92 DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
93 
94 /*------------------------------ Sysctl tunables -----------------------------*/
95 int xen_disable_pv_disks = 0;
96 int xen_disable_pv_nics = 0;
97 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
98 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
99 
100 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
101 
102 void xen_emergency_print(const char *str, size_t size)
103 {
104 	outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size);
105 }
106 
107 static void
108 hypervisor_quirks(unsigned int major, unsigned int minor)
109 {
110 #ifdef SMP
111 	if (((major < 4) || (major == 4 && minor <= 5)) &&
112 	    msix_disable_migration == -1) {
113 		/*
114 		 * Xen hypervisors prior to 4.6.0 do not properly
115 		 * handle updates to enabled MSI-X table entries,
116 		 * so disable MSI-X interrupt migration in that
117 		 * case.
118 		 */
119 		if (bootverbose)
120 			printf(
121 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
122 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
123 		msix_disable_migration = 1;
124 	}
125 #endif
126 }
127 
128 static void
129 hypervisor_version(void)
130 {
131 	uint32_t regs[4];
132 	int major, minor;
133 
134 	do_cpuid(hv_base + 1, regs);
135 
136 	major = regs[0] >> 16;
137 	minor = regs[0] & 0xffff;
138 	printf("XEN: Hypervisor version %d.%d detected.\n", major, minor);
139 
140 	hypervisor_quirks(major, minor);
141 }
142 
143 /*
144  * Translate linear to physical address when still running on the bootloader
145  * created page-tables.
146  */
147 static vm_paddr_t
148 early_init_vtop(void *addr)
149 {
150 
151 	/*
152 	 * Using a KASSERT won't print anything, as this is before console
153 	 * initialization.
154 	 */
155 	if (__predict_false((uintptr_t)addr < KERNBASE)) {
156 		xc_printf("invalid linear address: %p\n", addr);
157 		halt();
158 	}
159 
160 	return ((uintptr_t)addr - KERNBASE
161 #ifdef __amd64__
162 	    + kernphys - KERNLOAD
163 #endif
164 	    );
165 }
166 
167 static int
168 map_shared_info(void)
169 {
170 	/*
171 	 * TODO shared info page should be mapped in an unpopulated (IOW:
172 	 * non-RAM) address.  But finding one at this point in boot is
173 	 * complicated, hence re-use a RAM address for the time being.  This
174 	 * sadly causes super-page shattering in the second stage translation
175 	 * page tables.
176 	 */
177 	static union {
178 		shared_info_t shared_info;
179 		uint8_t raw[PAGE_SIZE];
180 	} shared_page __attribute__((aligned(PAGE_SIZE)));
181 	static struct xen_add_to_physmap xatp = {
182 	    .domid = DOMID_SELF,
183 	    .space = XENMAPSPACE_shared_info,
184 	};
185 	int rc;
186 
187 	_Static_assert(sizeof(shared_page) == PAGE_SIZE,
188 	    "invalid Xen shared_info struct size");
189 
190 	if (xatp.gpfn == 0)
191 		xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info));
192 
193 	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
194 	if (rc != 0) {
195 		xc_printf("cannot map shared info page: %d\n", rc);
196 		HYPERVISOR_shared_info = NULL;
197 	} else if (HYPERVISOR_shared_info == NULL)
198 		HYPERVISOR_shared_info = &shared_page.shared_info;
199 
200 	return (rc);
201 }
202 
203 static void
204 fixup_console(void)
205 {
206 	struct xen_platform_op op = {
207 		.cmd = XENPF_get_dom0_console,
208 	};
209 	xenpf_dom0_console_t *console = &op.u.dom0_console;
210 	union {
211 		struct efi_fb efi;
212 		struct vbe_fb vbe;
213 	} *fb = NULL;
214 	int size;
215 	caddr_t kmdp;
216 
217 	kmdp = preload_search_by_type("elf kernel");
218 	if (kmdp == NULL)
219 		kmdp = preload_search_by_type("elf64 kernel");
220 	if (kmdp == NULL) {
221 		xc_printf("Unable to find kernel metadata\n");
222 		return;
223 	}
224 
225 	size = HYPERVISOR_platform_op(&op);
226 	if (size < 0) {
227 		xc_printf("Failed to get video console info: %d\n", size);
228 		return;
229 	}
230 
231 	switch (console->video_type) {
232 	case XEN_VGATYPE_VESA_LFB:
233 		fb = (__typeof__ (fb))preload_search_info(kmdp,
234 		    MODINFO_METADATA | MODINFOMD_VBE_FB);
235 
236 		if (fb == NULL) {
237 			xc_printf("No VBE FB in kernel metadata\n");
238 			return;
239 		}
240 
241 		_Static_assert(offsetof(struct vbe_fb, fb_bpp) ==
242 		    offsetof(struct efi_fb, fb_mask_reserved) +
243 		    sizeof(fb->efi.fb_mask_reserved),
244 		    "Bad structure overlay\n");
245 		fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel;
246 		/* FALLTHROUGH */
247 	case XEN_VGATYPE_EFI_LFB:
248 		if (fb == NULL) {
249 			fb = (__typeof__ (fb))preload_search_info(kmdp,
250 			    MODINFO_METADATA | MODINFOMD_EFI_FB);
251 			if (fb == NULL) {
252 				xc_printf("No EFI FB in kernel metadata\n");
253 				return;
254 			}
255 		}
256 
257 		fb->efi.fb_addr = console->u.vesa_lfb.lfb_base;
258 		if (size >
259 		    offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base))
260 			fb->efi.fb_addr |=
261 			    (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32;
262 		fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16;
263 		fb->efi.fb_height = console->u.vesa_lfb.height;
264 		fb->efi.fb_width = console->u.vesa_lfb.width;
265 		fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) /
266 		    console->u.vesa_lfb.bits_per_pixel;
267 #define FBMASK(c) \
268     ((~0u << console->u.vesa_lfb.c ## _pos) & \
269     (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \
270     console->u.vesa_lfb.c ## _size)))
271 		fb->efi.fb_mask_red = FBMASK(red);
272 		fb->efi.fb_mask_green = FBMASK(green);
273 		fb->efi.fb_mask_blue = FBMASK(blue);
274 		fb->efi.fb_mask_reserved = FBMASK(rsvd);
275 #undef FBMASK
276 		break;
277 
278 	default:
279 		xc_printf("Video console type unsupported\n");
280 		return;
281 	}
282 }
283 
284 /* Early initialization when running as a Xen guest. */
285 void
286 xen_early_init(void)
287 {
288 	uint32_t regs[4];
289 	int rc;
290 
291 	if (hv_high < hv_base + 2) {
292 		xc_printf("Invalid maximum leaves for hv_base\n");
293 		vm_guest = VM_GUEST_VM;
294 		return;
295 	}
296 
297 	/* Find the hypercall pages. */
298 	do_cpuid(hv_base + 2, regs);
299 	if (regs[0] != 1) {
300 		xc_printf("Invalid number of hypercall pages %u\n",
301 		    regs[0]);
302 		vm_guest = VM_GUEST_VM;
303 		return;
304 	}
305 
306 	wrmsr(regs[1], early_init_vtop(&hypercall_page));
307 
308 	rc = map_shared_info();
309 	if (rc != 0) {
310 		vm_guest = VM_GUEST_VM;
311 		return;
312 	}
313 
314 	if (xen_initial_domain())
315 	    /* Fixup video console information in case Xen changed the mode. */
316 	    fixup_console();
317 }
318 
319 static int
320 set_percpu_callback(unsigned int vcpu)
321 {
322 	struct xen_hvm_evtchn_upcall_vector vec;
323 	int error;
324 
325 	vec.vcpu = vcpu;
326 	vec.vector = IDT_EVTCHN;
327 	error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec);
328 
329 	return (error != 0 ? xen_translate_error(error) : 0);
330 }
331 
332 /*
333  * Tell the hypervisor how to contact us for event channel callbacks.
334  */
335 void
336 xen_hvm_set_callback(device_t dev)
337 {
338 	struct xen_hvm_param xhp;
339 	int irq;
340 
341 	if (xen_vector_callback_enabled)
342 		return;
343 
344 	xhp.domid = DOMID_SELF;
345 	xhp.index = HVM_PARAM_CALLBACK_IRQ;
346 	if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
347 		int error;
348 
349 		error = set_percpu_callback(0);
350 		if (error == 0) {
351 			xen_evtchn_needs_ack = true;
352 			/* Trick toolstack to think we are enlightened */
353 			xhp.value = 1;
354 		} else
355 			xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
356 		error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
357 		if (error == 0) {
358 			xen_vector_callback_enabled = 1;
359 			return;
360 		} else if (xen_evtchn_needs_ack)
361 			panic("Unable to setup fake HVM param: %d", error);
362 
363 		printf("Xen HVM callback vector registration failed (%d). "
364 		    "Falling back to emulated device interrupt\n", error);
365 	}
366 	xen_vector_callback_enabled = 0;
367 	if (dev == NULL) {
368 		/*
369 		 * Called from early boot or resume.
370 		 * xenpci will invoke us again later.
371 		 */
372 		return;
373 	}
374 
375 	irq = pci_get_irq(dev);
376 	if (irq < 16) {
377 		xhp.value = HVM_CALLBACK_GSI(irq);
378 	} else {
379 		u_int slot;
380 		u_int pin;
381 
382 		slot = pci_get_slot(dev);
383 		pin = pci_get_intpin(dev) - 1;
384 		xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
385 	}
386 
387 	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
388 		panic("Can't set evtchn callback");
389 }
390 
391 #define	XEN_MAGIC_IOPORT 0x10
392 enum {
393 	XMI_MAGIC			 = 0x49d2,
394 	XMI_UNPLUG_IDE_DISKS		 = 0x01,
395 	XMI_UNPLUG_NICS			 = 0x02,
396 	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
397 };
398 
399 static void
400 xen_hvm_disable_emulated_devices(void)
401 {
402 	u_short disable_devs = 0;
403 
404 	if (xen_pv_domain()) {
405 		/*
406 		 * No emulated devices in the PV case, so no need to unplug
407 		 * anything.
408 		 */
409 		if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0)
410 			printf("PV devices cannot be disabled in PV guests\n");
411 		return;
412 	}
413 
414 	if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
415 		return;
416 
417 	if (xen_disable_pv_disks == 0) {
418 		if (bootverbose)
419 			printf("XEN: disabling emulated disks\n");
420 		disable_devs |= XMI_UNPLUG_IDE_DISKS;
421 	}
422 	if (xen_disable_pv_nics == 0) {
423 		if (bootverbose)
424 			printf("XEN: disabling emulated nics\n");
425 		disable_devs |= XMI_UNPLUG_NICS;
426 	}
427 
428 	if (disable_devs != 0)
429 		outw(XEN_MAGIC_IOPORT, disable_devs);
430 }
431 
432 static void
433 xen_hvm_init(enum xen_hvm_init_type init_type)
434 {
435 	unsigned int i;
436 
437 	if (!xen_domain() ||
438 	    init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
439 		return;
440 
441 	hypervisor_version();
442 
443 	switch (init_type) {
444 	case XEN_HVM_INIT_LATE:
445 		setup_xen_features();
446 #ifdef SMP
447 		cpu_ops = xen_hvm_cpu_ops;
448 #endif
449 		break;
450 	case XEN_HVM_INIT_RESUME:
451 		/* Clear stale vcpu_info. */
452 		CPU_FOREACH(i)
453 			DPCPU_ID_SET(i, vcpu_info, NULL);
454 
455 		if (map_shared_info() != 0)
456 			panic("cannot map Xen shared info page");
457 
458 		break;
459 	default:
460 		panic("Unsupported HVM initialization type");
461 	}
462 
463 	xen_vector_callback_enabled = 0;
464 	xen_evtchn_needs_ack = false;
465 	xen_hvm_set_callback(NULL);
466 
467 	xen_hvm_disable_emulated_devices();
468 }
469 
470 void
471 xen_hvm_suspend(void)
472 {
473 }
474 
475 void
476 xen_hvm_resume(bool suspend_cancelled)
477 {
478 
479 	xen_hvm_init(suspend_cancelled ?
480 	    XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
481 
482 	/* Register vcpu_info area for CPU#0. */
483 	xen_hvm_cpu_init();
484 }
485 
486 static void
487 xen_hvm_sysinit(void *arg __unused)
488 {
489 	xen_hvm_init(XEN_HVM_INIT_LATE);
490 }
491 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
492 
493 static void
494 xen_hvm_cpu_init(void)
495 {
496 	uint32_t regs[4];
497 	int rc;
498 
499 	if (!xen_domain())
500 		return;
501 
502 	if (DPCPU_GET(vcpu_info) != NULL) {
503 		/*
504 		 * vcpu_info is already set.  We're resuming
505 		 * from a failed migration and our pre-suspend
506 		 * configuration is still valid.
507 		 */
508 		return;
509 	}
510 
511 	/*
512 	 * Set vCPU ID. If available fetch the ID from CPUID, if not just use
513 	 * the ACPI ID.
514 	 */
515 	KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf"));
516 	cpuid_count(hv_base + 4, 0, regs);
517 	KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ||
518 	    !xen_pv_domain(),
519 	    ("Xen PV domain without vcpu_id in cpuid"));
520 	PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ?
521 	    regs[1] : PCPU_GET(acpi_id));
522 
523 	if (xen_evtchn_needs_ack && !IS_BSP()) {
524 		/*
525 		 * Setup the per-vpcu event channel upcall vector. This is only
526 		 * required when using the new HVMOP_set_evtchn_upcall_vector
527 		 * hypercall, which allows using a different vector for each
528 		 * vCPU. Note that FreeBSD uses the same vector for all vCPUs
529 		 * because it's not dynamically allocated.
530 		 */
531 		rc = set_percpu_callback(PCPU_GET(vcpu_id));
532 		if (rc != 0)
533 			panic("Event channel upcall vector setup failed: %d",
534 			    rc);
535 	}
536 
537 	xen_setup_vcpu_info();
538 }
539 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
540 
541 bool
542 xen_has_iommu_maps(void)
543 {
544 	uint32_t regs[4];
545 
546 	KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf"));
547 	cpuid_count(hv_base + 4, 0, regs);
548 
549 	return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS);
550 }
551