1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008, 2013 Citrix Systems, Inc. 5 * Copyright (c) 2012 Spectra Logic Corporation 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/malloc.h> 35 #include <sys/proc.h> 36 #include <sys/smp.h> 37 #include <sys/systm.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 #include <vm/vm_param.h> 42 43 #include <dev/pci/pcivar.h> 44 45 #include <machine/_inttypes.h> 46 #include <machine/cpufunc.h> 47 #include <machine/cpu.h> 48 #include <machine/md_var.h> 49 #include <machine/metadata.h> 50 #include <machine/smp.h> 51 52 #include <x86/apicreg.h> 53 54 #include <xen/xen-os.h> 55 #include <xen/error.h> 56 #include <xen/features.h> 57 #include <xen/gnttab.h> 58 #include <xen/hypervisor.h> 59 #include <xen/hvm.h> 60 #include <xen/xen_intr.h> 61 62 #include <contrib/xen/arch-x86/cpuid.h> 63 #include <contrib/xen/hvm/params.h> 64 #include <contrib/xen/vcpu.h> 65 66 /*--------------------------- Forward Declarations ---------------------------*/ 67 static void xen_hvm_cpu_init(void); 68 69 /*-------------------------------- Global Data -------------------------------*/ 70 #ifdef SMP 71 struct cpu_ops xen_hvm_cpu_ops = { 72 .cpu_init = xen_hvm_cpu_init, 73 .cpu_resume = xen_hvm_cpu_init 74 }; 75 #endif 76 77 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); 78 79 /** 80 * If non-zero, the hypervisor has been configured to use a direct 81 * IDT event callback for interrupt injection. 82 */ 83 int xen_vector_callback_enabled; 84 85 /** 86 * Signal whether the vector injected for the event channel upcall requires to 87 * be EOI'ed on the local APIC. 88 */ 89 bool xen_evtchn_needs_ack; 90 91 /*------------------------------- Per-CPU Data -------------------------------*/ 92 DPCPU_DECLARE(struct vcpu_info *, vcpu_info); 93 94 /*------------------------------ Sysctl tunables -----------------------------*/ 95 int xen_disable_pv_disks = 0; 96 int xen_disable_pv_nics = 0; 97 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); 98 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); 99 100 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ 101 102 void xen_emergency_print(const char *str, size_t size) 103 { 104 outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size); 105 } 106 107 static void 108 hypervisor_quirks(unsigned int major, unsigned int minor) 109 { 110 #ifdef SMP 111 if (((major < 4) || (major == 4 && minor <= 5)) && 112 msix_disable_migration == -1) { 113 /* 114 * Xen hypervisors prior to 4.6.0 do not properly 115 * handle updates to enabled MSI-X table entries, 116 * so disable MSI-X interrupt migration in that 117 * case. 118 */ 119 if (bootverbose) 120 printf( 121 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" 122 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); 123 msix_disable_migration = 1; 124 } 125 #endif 126 } 127 128 static void 129 hypervisor_version(void) 130 { 131 uint32_t regs[4]; 132 int major, minor; 133 134 do_cpuid(hv_base + 1, regs); 135 136 major = regs[0] >> 16; 137 minor = regs[0] & 0xffff; 138 printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); 139 140 hypervisor_quirks(major, minor); 141 } 142 143 /* 144 * Translate linear to physical address when still running on the bootloader 145 * created page-tables. 146 */ 147 static vm_paddr_t 148 early_init_vtop(void *addr) 149 { 150 151 /* 152 * Using a KASSERT won't print anything, as this is before console 153 * initialization. 154 */ 155 if (__predict_false((uintptr_t)addr < KERNBASE)) { 156 xc_printf("invalid linear address: %p\n", addr); 157 halt(); 158 } 159 160 return ((uintptr_t)addr - KERNBASE 161 #ifdef __amd64__ 162 + kernphys - KERNLOAD 163 #endif 164 ); 165 } 166 167 static int 168 map_shared_info(void) 169 { 170 /* 171 * TODO shared info page should be mapped in an unpopulated (IOW: 172 * non-RAM) address. But finding one at this point in boot is 173 * complicated, hence re-use a RAM address for the time being. This 174 * sadly causes super-page shattering in the second stage translation 175 * page tables. 176 */ 177 static union { 178 shared_info_t shared_info; 179 uint8_t raw[PAGE_SIZE]; 180 } shared_page __attribute__((aligned(PAGE_SIZE))); 181 static struct xen_add_to_physmap xatp = { 182 .domid = DOMID_SELF, 183 .space = XENMAPSPACE_shared_info, 184 }; 185 int rc; 186 187 _Static_assert(sizeof(shared_page) == PAGE_SIZE, 188 "invalid Xen shared_info struct size"); 189 190 if (xatp.gpfn == 0) 191 xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info)); 192 193 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); 194 if (rc != 0) { 195 xc_printf("cannot map shared info page: %d\n", rc); 196 HYPERVISOR_shared_info = NULL; 197 } else if (HYPERVISOR_shared_info == NULL) 198 HYPERVISOR_shared_info = &shared_page.shared_info; 199 200 return (rc); 201 } 202 203 static void 204 fixup_console(void) 205 { 206 struct xen_platform_op op = { 207 .cmd = XENPF_get_dom0_console, 208 }; 209 xenpf_dom0_console_t *console = &op.u.dom0_console; 210 union { 211 struct efi_fb efi; 212 struct vbe_fb vbe; 213 } *fb = NULL; 214 int size; 215 caddr_t kmdp; 216 217 kmdp = preload_search_by_type("elf kernel"); 218 if (kmdp == NULL) 219 kmdp = preload_search_by_type("elf64 kernel"); 220 if (kmdp == NULL) { 221 xc_printf("Unable to find kernel metadata\n"); 222 return; 223 } 224 225 size = HYPERVISOR_platform_op(&op); 226 if (size < 0) { 227 xc_printf("Failed to get video console info: %d\n", size); 228 return; 229 } 230 231 switch (console->video_type) { 232 case XEN_VGATYPE_VESA_LFB: 233 fb = (__typeof__ (fb))preload_search_info(kmdp, 234 MODINFO_METADATA | MODINFOMD_VBE_FB); 235 236 if (fb == NULL) { 237 xc_printf("No VBE FB in kernel metadata\n"); 238 return; 239 } 240 241 _Static_assert(offsetof(struct vbe_fb, fb_bpp) == 242 offsetof(struct efi_fb, fb_mask_reserved) + 243 sizeof(fb->efi.fb_mask_reserved), 244 "Bad structure overlay\n"); 245 fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel; 246 /* FALLTHROUGH */ 247 case XEN_VGATYPE_EFI_LFB: 248 if (fb == NULL) { 249 fb = (__typeof__ (fb))preload_search_info(kmdp, 250 MODINFO_METADATA | MODINFOMD_EFI_FB); 251 if (fb == NULL) { 252 xc_printf("No EFI FB in kernel metadata\n"); 253 return; 254 } 255 } 256 257 fb->efi.fb_addr = console->u.vesa_lfb.lfb_base; 258 if (size > 259 offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base)) 260 fb->efi.fb_addr |= 261 (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32; 262 fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16; 263 fb->efi.fb_height = console->u.vesa_lfb.height; 264 fb->efi.fb_width = console->u.vesa_lfb.width; 265 fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) / 266 console->u.vesa_lfb.bits_per_pixel; 267 #define FBMASK(c) \ 268 ((~0u << console->u.vesa_lfb.c ## _pos) & \ 269 (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \ 270 console->u.vesa_lfb.c ## _size))) 271 fb->efi.fb_mask_red = FBMASK(red); 272 fb->efi.fb_mask_green = FBMASK(green); 273 fb->efi.fb_mask_blue = FBMASK(blue); 274 fb->efi.fb_mask_reserved = FBMASK(rsvd); 275 #undef FBMASK 276 break; 277 278 default: 279 xc_printf("Video console type unsupported\n"); 280 return; 281 } 282 } 283 284 /* Early initialization when running as a Xen guest. */ 285 void 286 xen_early_init(void) 287 { 288 uint32_t regs[4]; 289 int rc; 290 291 if (hv_high < hv_base + 2) { 292 xc_printf("Invalid maximum leaves for hv_base\n"); 293 vm_guest = VM_GUEST_VM; 294 return; 295 } 296 297 /* Find the hypercall pages. */ 298 do_cpuid(hv_base + 2, regs); 299 if (regs[0] != 1) { 300 xc_printf("Invalid number of hypercall pages %u\n", 301 regs[0]); 302 vm_guest = VM_GUEST_VM; 303 return; 304 } 305 306 wrmsr(regs[1], early_init_vtop(&hypercall_page)); 307 308 rc = map_shared_info(); 309 if (rc != 0) { 310 vm_guest = VM_GUEST_VM; 311 return; 312 } 313 314 if (xen_initial_domain()) 315 /* Fixup video console information in case Xen changed the mode. */ 316 fixup_console(); 317 } 318 319 static int 320 set_percpu_callback(unsigned int vcpu) 321 { 322 struct xen_hvm_evtchn_upcall_vector vec; 323 int error; 324 325 vec.vcpu = vcpu; 326 vec.vector = IDT_EVTCHN; 327 error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec); 328 329 return (error != 0 ? xen_translate_error(error) : 0); 330 } 331 332 /* 333 * Tell the hypervisor how to contact us for event channel callbacks. 334 */ 335 void 336 xen_hvm_set_callback(device_t dev) 337 { 338 struct xen_hvm_param xhp; 339 int irq; 340 341 if (xen_vector_callback_enabled) 342 return; 343 344 xhp.domid = DOMID_SELF; 345 xhp.index = HVM_PARAM_CALLBACK_IRQ; 346 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { 347 int error; 348 349 error = set_percpu_callback(0); 350 if (error == 0) { 351 xen_evtchn_needs_ack = true; 352 /* Trick toolstack to think we are enlightened */ 353 xhp.value = 1; 354 } else 355 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); 356 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); 357 if (error == 0) { 358 xen_vector_callback_enabled = 1; 359 return; 360 } else if (xen_evtchn_needs_ack) 361 panic("Unable to setup fake HVM param: %d", error); 362 363 printf("Xen HVM callback vector registration failed (%d). " 364 "Falling back to emulated device interrupt\n", error); 365 } 366 xen_vector_callback_enabled = 0; 367 if (dev == NULL) { 368 /* 369 * Called from early boot or resume. 370 * xenpci will invoke us again later. 371 */ 372 return; 373 } 374 375 irq = pci_get_irq(dev); 376 if (irq < 16) { 377 xhp.value = HVM_CALLBACK_GSI(irq); 378 } else { 379 u_int slot; 380 u_int pin; 381 382 slot = pci_get_slot(dev); 383 pin = pci_get_intpin(dev) - 1; 384 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); 385 } 386 387 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) 388 panic("Can't set evtchn callback"); 389 } 390 391 #define XEN_MAGIC_IOPORT 0x10 392 enum { 393 XMI_MAGIC = 0x49d2, 394 XMI_UNPLUG_IDE_DISKS = 0x01, 395 XMI_UNPLUG_NICS = 0x02, 396 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 397 }; 398 399 static void 400 xen_hvm_disable_emulated_devices(void) 401 { 402 u_short disable_devs = 0; 403 404 if (xen_pv_domain()) { 405 /* 406 * No emulated devices in the PV case, so no need to unplug 407 * anything. 408 */ 409 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) 410 printf("PV devices cannot be disabled in PV guests\n"); 411 return; 412 } 413 414 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) 415 return; 416 417 if (xen_disable_pv_disks == 0) { 418 if (bootverbose) 419 printf("XEN: disabling emulated disks\n"); 420 disable_devs |= XMI_UNPLUG_IDE_DISKS; 421 } 422 if (xen_disable_pv_nics == 0) { 423 if (bootverbose) 424 printf("XEN: disabling emulated nics\n"); 425 disable_devs |= XMI_UNPLUG_NICS; 426 } 427 428 if (disable_devs != 0) 429 outw(XEN_MAGIC_IOPORT, disable_devs); 430 } 431 432 static void 433 xen_hvm_init(enum xen_hvm_init_type init_type) 434 { 435 unsigned int i; 436 437 if (!xen_domain() || 438 init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) 439 return; 440 441 hypervisor_version(); 442 443 switch (init_type) { 444 case XEN_HVM_INIT_LATE: 445 setup_xen_features(); 446 #ifdef SMP 447 cpu_ops = xen_hvm_cpu_ops; 448 #endif 449 break; 450 case XEN_HVM_INIT_RESUME: 451 /* Clear stale vcpu_info. */ 452 CPU_FOREACH(i) 453 DPCPU_ID_SET(i, vcpu_info, NULL); 454 455 if (map_shared_info() != 0) 456 panic("cannot map Xen shared info page"); 457 458 break; 459 default: 460 panic("Unsupported HVM initialization type"); 461 } 462 463 xen_vector_callback_enabled = 0; 464 xen_evtchn_needs_ack = false; 465 xen_hvm_set_callback(NULL); 466 467 xen_hvm_disable_emulated_devices(); 468 } 469 470 void 471 xen_hvm_suspend(void) 472 { 473 } 474 475 void 476 xen_hvm_resume(bool suspend_cancelled) 477 { 478 479 xen_hvm_init(suspend_cancelled ? 480 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); 481 482 /* Register vcpu_info area for CPU#0. */ 483 xen_hvm_cpu_init(); 484 } 485 486 static void 487 xen_hvm_sysinit(void *arg __unused) 488 { 489 xen_hvm_init(XEN_HVM_INIT_LATE); 490 } 491 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); 492 493 static void 494 xen_hvm_cpu_init(void) 495 { 496 uint32_t regs[4]; 497 int rc; 498 499 if (!xen_domain()) 500 return; 501 502 if (DPCPU_GET(vcpu_info) != NULL) { 503 /* 504 * vcpu_info is already set. We're resuming 505 * from a failed migration and our pre-suspend 506 * configuration is still valid. 507 */ 508 return; 509 } 510 511 /* 512 * Set vCPU ID. If available fetch the ID from CPUID, if not just use 513 * the ACPI ID. 514 */ 515 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf")); 516 cpuid_count(hv_base + 4, 0, regs); 517 KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) || 518 !xen_pv_domain(), 519 ("Xen PV domain without vcpu_id in cpuid")); 520 PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? 521 regs[1] : PCPU_GET(acpi_id)); 522 523 if (xen_evtchn_needs_ack && !IS_BSP()) { 524 /* 525 * Setup the per-vpcu event channel upcall vector. This is only 526 * required when using the new HVMOP_set_evtchn_upcall_vector 527 * hypercall, which allows using a different vector for each 528 * vCPU. Note that FreeBSD uses the same vector for all vCPUs 529 * because it's not dynamically allocated. 530 */ 531 rc = set_percpu_callback(PCPU_GET(vcpu_id)); 532 if (rc != 0) 533 panic("Event channel upcall vector setup failed: %d", 534 rc); 535 } 536 537 xen_setup_vcpu_info(); 538 } 539 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); 540 541 bool 542 xen_has_iommu_maps(void) 543 { 544 uint32_t regs[4]; 545 546 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf")); 547 cpuid_count(hv_base + 4, 0, regs); 548 549 return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS); 550 } 551