1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008, 2013 Citrix Systems, Inc. 5 * Copyright (c) 2012 Spectra Logic Corporation 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/malloc.h> 35 #include <sys/proc.h> 36 #include <sys/smp.h> 37 #include <sys/systm.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 #include <vm/vm_param.h> 42 43 #include <dev/pci/pcivar.h> 44 45 #include <machine/_inttypes.h> 46 #include <machine/cpufunc.h> 47 #include <machine/cpu.h> 48 #include <machine/md_var.h> 49 #include <machine/metadata.h> 50 #include <machine/pc/bios.h> 51 #include <machine/smp.h> 52 53 #include <x86/apicreg.h> 54 55 #include <xen/xen-os.h> 56 #include <xen/error.h> 57 #include <xen/features.h> 58 #include <xen/gnttab.h> 59 #include <xen/hypervisor.h> 60 #include <xen/hvm.h> 61 #include <xen/xen_intr.h> 62 63 #include <contrib/xen/arch-x86/cpuid.h> 64 #include <contrib/xen/hvm/params.h> 65 #include <contrib/xen/vcpu.h> 66 67 /*--------------------------- Forward Declarations ---------------------------*/ 68 static void xen_hvm_cpu_init(void); 69 70 /*-------------------------------- Global Data -------------------------------*/ 71 #ifdef SMP 72 struct cpu_ops xen_hvm_cpu_ops = { 73 .cpu_init = xen_hvm_cpu_init, 74 .cpu_resume = xen_hvm_cpu_init 75 }; 76 #endif 77 78 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); 79 80 /** 81 * If non-zero, the hypervisor has been configured to use a direct 82 * IDT event callback for interrupt injection. 83 */ 84 int xen_vector_callback_enabled; 85 86 /** 87 * Signal whether the vector injected for the event channel upcall requires to 88 * be EOI'ed on the local APIC. 89 */ 90 bool xen_evtchn_needs_ack; 91 92 /*------------------------------- Per-CPU Data -------------------------------*/ 93 DPCPU_DECLARE(struct vcpu_info *, vcpu_info); 94 95 /*------------------------------ Sysctl tunables -----------------------------*/ 96 int xen_disable_pv_disks = 0; 97 int xen_disable_pv_nics = 0; 98 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); 99 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); 100 101 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ 102 103 void xen_emergency_print(const char *str, size_t size) 104 { 105 outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size); 106 } 107 108 static void 109 hypervisor_quirks(unsigned int major, unsigned int minor) 110 { 111 #ifdef SMP 112 if (((major < 4) || (major == 4 && minor <= 5)) && 113 msix_disable_migration == -1) { 114 /* 115 * Xen hypervisors prior to 4.6.0 do not properly 116 * handle updates to enabled MSI-X table entries, 117 * so disable MSI-X interrupt migration in that 118 * case. 119 */ 120 if (bootverbose) 121 printf( 122 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" 123 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); 124 msix_disable_migration = 1; 125 } 126 #endif 127 } 128 129 static void 130 hypervisor_version(void) 131 { 132 uint32_t regs[4]; 133 int major, minor; 134 135 do_cpuid(hv_base + 1, regs); 136 137 major = regs[0] >> 16; 138 minor = regs[0] & 0xffff; 139 printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); 140 141 hypervisor_quirks(major, minor); 142 } 143 144 /* 145 * Translate linear to physical address when still running on the bootloader 146 * created page-tables. 147 */ 148 static vm_paddr_t 149 early_init_vtop(void *addr) 150 { 151 152 /* 153 * Using a KASSERT won't print anything, as this is before console 154 * initialization. 155 */ 156 if (__predict_false((uintptr_t)addr < KERNBASE)) { 157 xc_printf("invalid linear address: %p\n", addr); 158 halt(); 159 } 160 161 return ((uintptr_t)addr - KERNBASE 162 #ifdef __amd64__ 163 + kernphys - KERNLOAD 164 #endif 165 ); 166 } 167 168 static int 169 map_shared_info(void) 170 { 171 /* 172 * TODO shared info page should be mapped in an unpopulated (IOW: 173 * non-RAM) address. But finding one at this point in boot is 174 * complicated, hence re-use a RAM address for the time being. This 175 * sadly causes super-page shattering in the second stage translation 176 * page tables. 177 */ 178 static union { 179 shared_info_t shared_info; 180 uint8_t raw[PAGE_SIZE]; 181 } shared_page __attribute__((aligned(PAGE_SIZE))); 182 static struct xen_add_to_physmap xatp = { 183 .domid = DOMID_SELF, 184 .space = XENMAPSPACE_shared_info, 185 }; 186 int rc; 187 188 _Static_assert(sizeof(shared_page) == PAGE_SIZE, 189 "invalid Xen shared_info struct size"); 190 191 if (xatp.gpfn == 0) 192 xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info)); 193 194 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); 195 if (rc != 0) { 196 xc_printf("cannot map shared info page: %d\n", rc); 197 HYPERVISOR_shared_info = NULL; 198 } else if (HYPERVISOR_shared_info == NULL) 199 HYPERVISOR_shared_info = &shared_page.shared_info; 200 201 return (rc); 202 } 203 204 static void 205 fixup_console(void) 206 { 207 struct xen_platform_op op = { 208 .cmd = XENPF_get_dom0_console, 209 }; 210 xenpf_dom0_console_t *console = &op.u.dom0_console; 211 union { 212 struct efi_fb efi; 213 struct vbe_fb vbe; 214 } *fb = NULL; 215 int size; 216 caddr_t kmdp; 217 218 kmdp = preload_search_by_type("elf kernel"); 219 if (kmdp == NULL) 220 kmdp = preload_search_by_type("elf64 kernel"); 221 if (kmdp == NULL) { 222 xc_printf("Unable to find kernel metadata\n"); 223 return; 224 } 225 226 size = HYPERVISOR_platform_op(&op); 227 if (size < 0) { 228 xc_printf("Failed to get video console info: %d\n", size); 229 return; 230 } 231 232 switch (console->video_type) { 233 case XEN_VGATYPE_VESA_LFB: 234 fb = (__typeof__ (fb))preload_search_info(kmdp, 235 MODINFO_METADATA | MODINFOMD_VBE_FB); 236 237 if (fb == NULL) { 238 xc_printf("No VBE FB in kernel metadata\n"); 239 return; 240 } 241 242 _Static_assert(offsetof(struct vbe_fb, fb_bpp) == 243 offsetof(struct efi_fb, fb_mask_reserved) + 244 sizeof(fb->efi.fb_mask_reserved), 245 "Bad structure overlay\n"); 246 fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel; 247 /* FALLTHROUGH */ 248 case XEN_VGATYPE_EFI_LFB: 249 if (fb == NULL) { 250 fb = (__typeof__ (fb))preload_search_info(kmdp, 251 MODINFO_METADATA | MODINFOMD_EFI_FB); 252 if (fb == NULL) { 253 xc_printf("No EFI FB in kernel metadata\n"); 254 return; 255 } 256 } 257 258 fb->efi.fb_addr = console->u.vesa_lfb.lfb_base; 259 if (size > 260 offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base)) 261 fb->efi.fb_addr |= 262 (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32; 263 fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16; 264 fb->efi.fb_height = console->u.vesa_lfb.height; 265 fb->efi.fb_width = console->u.vesa_lfb.width; 266 fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) / 267 console->u.vesa_lfb.bits_per_pixel; 268 #define FBMASK(c) \ 269 ((~0u << console->u.vesa_lfb.c ## _pos) & \ 270 (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \ 271 console->u.vesa_lfb.c ## _size))) 272 fb->efi.fb_mask_red = FBMASK(red); 273 fb->efi.fb_mask_green = FBMASK(green); 274 fb->efi.fb_mask_blue = FBMASK(blue); 275 fb->efi.fb_mask_reserved = FBMASK(rsvd); 276 #undef FBMASK 277 break; 278 279 default: 280 xc_printf("Video console type unsupported\n"); 281 return; 282 } 283 } 284 285 /* Early initialization when running as a Xen guest. */ 286 void 287 xen_early_init(void) 288 { 289 uint32_t regs[4]; 290 int rc; 291 292 if (hv_high < hv_base + 2) { 293 xc_printf("Invalid maximum leaves for hv_base\n"); 294 vm_guest = VM_GUEST_VM; 295 return; 296 } 297 298 /* Find the hypercall pages. */ 299 do_cpuid(hv_base + 2, regs); 300 if (regs[0] != 1) { 301 xc_printf("Invalid number of hypercall pages %u\n", 302 regs[0]); 303 vm_guest = VM_GUEST_VM; 304 return; 305 } 306 307 wrmsr(regs[1], early_init_vtop(&hypercall_page)); 308 309 rc = map_shared_info(); 310 if (rc != 0) { 311 vm_guest = VM_GUEST_VM; 312 return; 313 } 314 315 if (xen_initial_domain()) 316 /* Fixup video console information in case Xen changed the mode. */ 317 fixup_console(); 318 } 319 320 static int 321 set_percpu_callback(unsigned int vcpu) 322 { 323 struct xen_hvm_evtchn_upcall_vector vec; 324 int error; 325 326 vec.vcpu = vcpu; 327 vec.vector = IDT_EVTCHN; 328 error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec); 329 330 return (error != 0 ? xen_translate_error(error) : 0); 331 } 332 333 /* 334 * Tell the hypervisor how to contact us for event channel callbacks. 335 */ 336 void 337 xen_hvm_set_callback(device_t dev) 338 { 339 struct xen_hvm_param xhp; 340 int irq; 341 342 if (xen_vector_callback_enabled) 343 return; 344 345 xhp.domid = DOMID_SELF; 346 xhp.index = HVM_PARAM_CALLBACK_IRQ; 347 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { 348 int error; 349 350 error = set_percpu_callback(0); 351 if (error == 0) { 352 xen_evtchn_needs_ack = true; 353 /* Trick toolstack to think we are enlightened */ 354 xhp.value = 1; 355 } else 356 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); 357 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); 358 if (error == 0) { 359 xen_vector_callback_enabled = 1; 360 return; 361 } else if (xen_evtchn_needs_ack) 362 panic("Unable to setup fake HVM param: %d", error); 363 364 printf("Xen HVM callback vector registration failed (%d). " 365 "Falling back to emulated device interrupt\n", error); 366 } 367 xen_vector_callback_enabled = 0; 368 if (dev == NULL) { 369 /* 370 * Called from early boot or resume. 371 * xenpci will invoke us again later. 372 */ 373 return; 374 } 375 376 irq = pci_get_irq(dev); 377 if (irq < 16) { 378 xhp.value = HVM_CALLBACK_GSI(irq); 379 } else { 380 u_int slot; 381 u_int pin; 382 383 slot = pci_get_slot(dev); 384 pin = pci_get_intpin(dev) - 1; 385 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); 386 } 387 388 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) 389 panic("Can't set evtchn callback"); 390 } 391 392 #define XEN_MAGIC_IOPORT 0x10 393 enum { 394 XMI_MAGIC = 0x49d2, 395 XMI_UNPLUG_IDE_DISKS = 0x01, 396 XMI_UNPLUG_NICS = 0x02, 397 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 398 }; 399 400 static void 401 xen_hvm_disable_emulated_devices(void) 402 { 403 u_short disable_devs = 0; 404 405 if (xen_pv_domain()) { 406 /* 407 * No emulated devices in the PV case, so no need to unplug 408 * anything. 409 */ 410 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) 411 printf("PV devices cannot be disabled in PV guests\n"); 412 return; 413 } 414 415 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) 416 return; 417 418 if (xen_disable_pv_disks == 0) { 419 if (bootverbose) 420 printf("XEN: disabling emulated disks\n"); 421 disable_devs |= XMI_UNPLUG_IDE_DISKS; 422 } 423 if (xen_disable_pv_nics == 0) { 424 if (bootverbose) 425 printf("XEN: disabling emulated nics\n"); 426 disable_devs |= XMI_UNPLUG_NICS; 427 } 428 429 if (disable_devs != 0) 430 outw(XEN_MAGIC_IOPORT, disable_devs); 431 } 432 433 static void 434 xen_hvm_init(enum xen_hvm_init_type init_type) 435 { 436 unsigned int i; 437 438 if (!xen_domain() || 439 init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) 440 return; 441 442 hypervisor_version(); 443 444 switch (init_type) { 445 case XEN_HVM_INIT_LATE: 446 setup_xen_features(); 447 #ifdef SMP 448 cpu_ops = xen_hvm_cpu_ops; 449 #endif 450 break; 451 case XEN_HVM_INIT_RESUME: 452 /* Clear stale vcpu_info. */ 453 CPU_FOREACH(i) 454 DPCPU_ID_SET(i, vcpu_info, NULL); 455 456 if (map_shared_info() != 0) 457 panic("cannot map Xen shared info page"); 458 459 break; 460 default: 461 panic("Unsupported HVM initialization type"); 462 } 463 464 xen_vector_callback_enabled = 0; 465 xen_evtchn_needs_ack = false; 466 xen_hvm_set_callback(NULL); 467 468 xen_hvm_disable_emulated_devices(); 469 } 470 471 void 472 xen_hvm_suspend(void) 473 { 474 } 475 476 void 477 xen_hvm_resume(bool suspend_cancelled) 478 { 479 480 xen_hvm_init(suspend_cancelled ? 481 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); 482 483 /* Register vcpu_info area for CPU#0. */ 484 xen_hvm_cpu_init(); 485 } 486 487 static void 488 xen_hvm_sysinit(void *arg __unused) 489 { 490 xen_hvm_init(XEN_HVM_INIT_LATE); 491 } 492 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); 493 494 static void 495 xen_hvm_cpu_init(void) 496 { 497 uint32_t regs[4]; 498 int rc; 499 500 if (!xen_domain()) 501 return; 502 503 if (DPCPU_GET(vcpu_info) != NULL) { 504 /* 505 * vcpu_info is already set. We're resuming 506 * from a failed migration and our pre-suspend 507 * configuration is still valid. 508 */ 509 return; 510 } 511 512 /* 513 * Set vCPU ID. If available fetch the ID from CPUID, if not just use 514 * the ACPI ID. 515 */ 516 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf")); 517 cpuid_count(hv_base + 4, 0, regs); 518 KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) || 519 !xen_pv_domain(), 520 ("Xen PV domain without vcpu_id in cpuid")); 521 PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? 522 regs[1] : PCPU_GET(acpi_id)); 523 524 if (xen_evtchn_needs_ack && !IS_BSP()) { 525 /* 526 * Setup the per-vpcu event channel upcall vector. This is only 527 * required when using the new HVMOP_set_evtchn_upcall_vector 528 * hypercall, which allows using a different vector for each 529 * vCPU. Note that FreeBSD uses the same vector for all vCPUs 530 * because it's not dynamically allocated. 531 */ 532 rc = set_percpu_callback(PCPU_GET(vcpu_id)); 533 if (rc != 0) 534 panic("Event channel upcall vector setup failed: %d", 535 rc); 536 } 537 538 xen_setup_vcpu_info(); 539 } 540 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); 541 542 bool 543 xen_has_iommu_maps(void) 544 { 545 uint32_t regs[4]; 546 547 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf")); 548 cpuid_count(hv_base + 4, 0, regs); 549 550 return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS); 551 } 552 553 int 554 xen_arch_init_physmem(device_t dev, struct rman *mem) 555 { 556 static struct bios_smap smap[128]; 557 struct xen_memory_map memmap = { 558 .nr_entries = nitems(smap), 559 }; 560 unsigned int i; 561 int error; 562 563 set_xen_guest_handle(memmap.buffer, smap); 564 error = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); 565 if (error != 0) 566 return (0); 567 568 /* 569 * Fill with UNUSABLE regions, as it's always fine to use those for 570 * foreign mappings, they will never be populated. 571 */ 572 for (i = 0; i < memmap.nr_entries; i++) { 573 const vm_paddr_t max_phys = cpu_getmaxphyaddr(); 574 vm_paddr_t start, end; 575 576 if (smap[i].type != SMAP_TYPE_ACPI_ERROR) 577 continue; 578 579 start = round_page(smap[i].base); 580 /* In 32bit mode we possibly need to truncate the addresses. */ 581 end = MIN(trunc_page(smap[i].base + smap[i].length) - 1, 582 max_phys); 583 584 if (start >= end) 585 continue; 586 587 if (bootverbose != 0) 588 device_printf(dev, 589 "scratch mapping region @ [%016jx, %016jx]\n", 590 start, end); 591 592 error = rman_manage_region(mem, start, end); 593 if (error != 0) 594 device_printf(dev, 595 "unable to add scratch region [%016jx, %016jx]: %d\n", 596 start, end, error); 597 } 598 599 return (0); 600 } 601