1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008, 2013 Citrix Systems, Inc. 5 * Copyright (c) 2012 Spectra Logic Corporation 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/kernel.h> 33 #include <sys/linker.h> 34 #include <sys/malloc.h> 35 #include <sys/proc.h> 36 #include <sys/smp.h> 37 #include <sys/systm.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 #include <vm/vm_param.h> 42 43 #include <dev/pci/pcivar.h> 44 45 #include <machine/_inttypes.h> 46 #include <machine/cpufunc.h> 47 #include <machine/cpu.h> 48 #include <machine/md_var.h> 49 #include <machine/metadata.h> 50 #include <machine/pc/bios.h> 51 #include <machine/smp.h> 52 53 #include <x86/apicreg.h> 54 55 #include <xen/xen-os.h> 56 #include <xen/error.h> 57 #include <xen/features.h> 58 #include <xen/gnttab.h> 59 #include <xen/hypervisor.h> 60 #include <xen/hvm.h> 61 #include <xen/xen_intr.h> 62 63 #include <contrib/xen/arch-x86/cpuid.h> 64 #include <contrib/xen/hvm/params.h> 65 #include <contrib/xen/vcpu.h> 66 67 /*--------------------------- Forward Declarations ---------------------------*/ 68 static void xen_hvm_cpu_init(void); 69 70 /*-------------------------------- Global Data -------------------------------*/ 71 #ifdef SMP 72 struct cpu_ops xen_hvm_cpu_ops = { 73 .cpu_init = xen_hvm_cpu_init, 74 .cpu_resume = xen_hvm_cpu_init 75 }; 76 #endif 77 78 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); 79 80 /** 81 * If non-zero, the hypervisor has been configured to use a direct 82 * IDT event callback for interrupt injection. 83 */ 84 int xen_vector_callback_enabled; 85 86 /** 87 * Signal whether the vector injected for the event channel upcall requires to 88 * be EOI'ed on the local APIC. 89 */ 90 bool xen_evtchn_needs_ack; 91 92 /*------------------------------- Per-CPU Data -------------------------------*/ 93 DPCPU_DECLARE(struct vcpu_info *, vcpu_info); 94 95 /*------------------------------ Sysctl tunables -----------------------------*/ 96 int xen_disable_pv_disks = 0; 97 int xen_disable_pv_nics = 0; 98 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); 99 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); 100 101 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ 102 103 void xen_emergency_print(const char *str, size_t size) 104 { 105 outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size); 106 } 107 108 static void 109 hypervisor_quirks(unsigned int major, unsigned int minor) 110 { 111 #ifdef SMP 112 if (((major < 4) || (major == 4 && minor <= 5)) && 113 msix_disable_migration == -1) { 114 /* 115 * Xen hypervisors prior to 4.6.0 do not properly 116 * handle updates to enabled MSI-X table entries, 117 * so disable MSI-X interrupt migration in that 118 * case. 119 */ 120 if (bootverbose) 121 printf( 122 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" 123 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); 124 msix_disable_migration = 1; 125 } 126 #endif 127 } 128 129 static void 130 hypervisor_version(void) 131 { 132 uint32_t regs[4]; 133 int major, minor; 134 135 do_cpuid(hv_base + 1, regs); 136 137 major = regs[0] >> 16; 138 minor = regs[0] & 0xffff; 139 printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); 140 141 hypervisor_quirks(major, minor); 142 } 143 144 /* 145 * Translate linear to physical address when still running on the bootloader 146 * created page-tables. 147 */ 148 static vm_paddr_t 149 early_init_vtop(void *addr) 150 { 151 152 /* 153 * Using a KASSERT won't print anything, as this is before console 154 * initialization. 155 */ 156 if (__predict_false((uintptr_t)addr < KERNBASE)) { 157 xc_printf("invalid linear address: %p\n", addr); 158 halt(); 159 } 160 161 return ((uintptr_t)addr - KERNBASE 162 #ifdef __amd64__ 163 + kernphys - KERNLOAD 164 #endif 165 ); 166 } 167 168 static int 169 map_shared_info(void) 170 { 171 /* 172 * TODO shared info page should be mapped in an unpopulated (IOW: 173 * non-RAM) address. But finding one at this point in boot is 174 * complicated, hence re-use a RAM address for the time being. This 175 * sadly causes super-page shattering in the second stage translation 176 * page tables. 177 */ 178 static union { 179 shared_info_t shared_info; 180 uint8_t raw[PAGE_SIZE]; 181 } shared_page __attribute__((aligned(PAGE_SIZE))); 182 static struct xen_add_to_physmap xatp = { 183 .domid = DOMID_SELF, 184 .space = XENMAPSPACE_shared_info, 185 }; 186 int rc; 187 188 _Static_assert(sizeof(shared_page) == PAGE_SIZE, 189 "invalid Xen shared_info struct size"); 190 191 if (xatp.gpfn == 0) 192 xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info)); 193 194 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); 195 if (rc != 0) { 196 xc_printf("cannot map shared info page: %d\n", rc); 197 HYPERVISOR_shared_info = NULL; 198 } else if (HYPERVISOR_shared_info == NULL) 199 HYPERVISOR_shared_info = &shared_page.shared_info; 200 201 return (rc); 202 } 203 204 static void 205 fixup_console(void) 206 { 207 struct xen_platform_op op = { 208 .cmd = XENPF_get_dom0_console, 209 }; 210 xenpf_dom0_console_t *console = &op.u.dom0_console; 211 union { 212 struct efi_fb efi; 213 struct vbe_fb vbe; 214 } *fb = NULL; 215 int size; 216 217 size = HYPERVISOR_platform_op(&op); 218 if (size < 0) { 219 xc_printf("Failed to get video console info: %d\n", size); 220 return; 221 } 222 223 switch (console->video_type) { 224 case XEN_VGATYPE_VESA_LFB: 225 fb = (__typeof__ (fb))preload_search_info(preload_kmdp, 226 MODINFO_METADATA | MODINFOMD_VBE_FB); 227 228 if (fb == NULL) { 229 xc_printf("No VBE FB in kernel metadata\n"); 230 return; 231 } 232 233 _Static_assert(offsetof(struct vbe_fb, fb_bpp) == 234 offsetof(struct efi_fb, fb_mask_reserved) + 235 sizeof(fb->efi.fb_mask_reserved), 236 "Bad structure overlay\n"); 237 fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel; 238 /* FALLTHROUGH */ 239 case XEN_VGATYPE_EFI_LFB: 240 if (fb == NULL) { 241 fb = (__typeof__ (fb))preload_search_info(preload_kmdp, 242 MODINFO_METADATA | MODINFOMD_EFI_FB); 243 if (fb == NULL) { 244 xc_printf("No EFI FB in kernel metadata\n"); 245 return; 246 } 247 } 248 249 fb->efi.fb_addr = console->u.vesa_lfb.lfb_base; 250 if (size > 251 offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base)) 252 fb->efi.fb_addr |= 253 (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32; 254 fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16; 255 fb->efi.fb_height = console->u.vesa_lfb.height; 256 fb->efi.fb_width = console->u.vesa_lfb.width; 257 fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) / 258 console->u.vesa_lfb.bits_per_pixel; 259 #define FBMASK(c) \ 260 ((~0u << console->u.vesa_lfb.c ## _pos) & \ 261 (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \ 262 console->u.vesa_lfb.c ## _size))) 263 fb->efi.fb_mask_red = FBMASK(red); 264 fb->efi.fb_mask_green = FBMASK(green); 265 fb->efi.fb_mask_blue = FBMASK(blue); 266 fb->efi.fb_mask_reserved = FBMASK(rsvd); 267 #undef FBMASK 268 break; 269 270 default: 271 xc_printf("Video console type unsupported\n"); 272 return; 273 } 274 } 275 276 /* Early initialization when running as a Xen guest. */ 277 void 278 xen_early_init(void) 279 { 280 uint32_t regs[4]; 281 int rc; 282 283 if (hv_high < hv_base + 2) { 284 xc_printf("Invalid maximum leaves for hv_base\n"); 285 vm_guest = VM_GUEST_VM; 286 return; 287 } 288 289 /* Find the hypercall pages. */ 290 do_cpuid(hv_base + 2, regs); 291 if (regs[0] != 1) { 292 xc_printf("Invalid number of hypercall pages %u\n", 293 regs[0]); 294 vm_guest = VM_GUEST_VM; 295 return; 296 } 297 298 wrmsr(regs[1], early_init_vtop(&hypercall_page)); 299 300 rc = map_shared_info(); 301 if (rc != 0) { 302 vm_guest = VM_GUEST_VM; 303 return; 304 } 305 306 if (xen_initial_domain()) 307 /* Fixup video console information in case Xen changed the mode. */ 308 fixup_console(); 309 } 310 311 static int 312 set_percpu_callback(unsigned int vcpu) 313 { 314 struct xen_hvm_evtchn_upcall_vector vec; 315 int error; 316 317 vec.vcpu = vcpu; 318 vec.vector = IDT_EVTCHN; 319 error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec); 320 321 return (error != 0 ? xen_translate_error(error) : 0); 322 } 323 324 /* 325 * Tell the hypervisor how to contact us for event channel callbacks. 326 */ 327 void 328 xen_hvm_set_callback(device_t dev) 329 { 330 struct xen_hvm_param xhp; 331 int irq; 332 333 if (xen_vector_callback_enabled) 334 return; 335 336 xhp.domid = DOMID_SELF; 337 xhp.index = HVM_PARAM_CALLBACK_IRQ; 338 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { 339 int error; 340 341 error = set_percpu_callback(0); 342 if (error == 0) { 343 xen_evtchn_needs_ack = true; 344 /* Trick toolstack to think we are enlightened */ 345 xhp.value = 1; 346 } else 347 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); 348 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); 349 if (error == 0) { 350 xen_vector_callback_enabled = 1; 351 return; 352 } else if (xen_evtchn_needs_ack) 353 panic("Unable to setup fake HVM param: %d", error); 354 355 printf("Xen HVM callback vector registration failed (%d). " 356 "Falling back to emulated device interrupt\n", error); 357 } 358 xen_vector_callback_enabled = 0; 359 if (dev == NULL) { 360 /* 361 * Called from early boot or resume. 362 * xenpci will invoke us again later. 363 */ 364 return; 365 } 366 367 irq = pci_get_irq(dev); 368 if (irq < 16) { 369 xhp.value = HVM_CALLBACK_GSI(irq); 370 } else { 371 u_int slot; 372 u_int pin; 373 374 slot = pci_get_slot(dev); 375 pin = pci_get_intpin(dev) - 1; 376 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); 377 } 378 379 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) 380 panic("Can't set evtchn callback"); 381 } 382 383 #define XEN_MAGIC_IOPORT 0x10 384 enum { 385 XMI_MAGIC = 0x49d2, 386 XMI_UNPLUG_IDE_DISKS = 0x01, 387 XMI_UNPLUG_NICS = 0x02, 388 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 389 }; 390 391 static void 392 xen_hvm_disable_emulated_devices(void) 393 { 394 u_short disable_devs = 0; 395 396 if (xen_pv_domain()) { 397 /* 398 * No emulated devices in the PV case, so no need to unplug 399 * anything. 400 */ 401 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) 402 printf("PV devices cannot be disabled in PV guests\n"); 403 return; 404 } 405 406 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) 407 return; 408 409 if (xen_disable_pv_disks == 0) { 410 if (bootverbose) 411 printf("XEN: disabling emulated disks\n"); 412 disable_devs |= XMI_UNPLUG_IDE_DISKS; 413 } 414 if (xen_disable_pv_nics == 0) { 415 if (bootverbose) 416 printf("XEN: disabling emulated nics\n"); 417 disable_devs |= XMI_UNPLUG_NICS; 418 } 419 420 if (disable_devs != 0) 421 outw(XEN_MAGIC_IOPORT, disable_devs); 422 } 423 424 static void 425 xen_hvm_init(enum xen_hvm_init_type init_type) 426 { 427 unsigned int i; 428 429 if (!xen_domain() || 430 init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) 431 return; 432 433 hypervisor_version(); 434 435 switch (init_type) { 436 case XEN_HVM_INIT_LATE: 437 setup_xen_features(); 438 #ifdef SMP 439 cpu_ops = xen_hvm_cpu_ops; 440 #endif 441 break; 442 case XEN_HVM_INIT_RESUME: 443 /* Clear stale vcpu_info. */ 444 CPU_FOREACH(i) 445 DPCPU_ID_SET(i, vcpu_info, NULL); 446 447 if (map_shared_info() != 0) 448 panic("cannot map Xen shared info page"); 449 450 break; 451 default: 452 panic("Unsupported HVM initialization type"); 453 } 454 455 xen_vector_callback_enabled = 0; 456 xen_evtchn_needs_ack = false; 457 xen_hvm_set_callback(NULL); 458 459 xen_hvm_disable_emulated_devices(); 460 } 461 462 void 463 xen_hvm_suspend(void) 464 { 465 } 466 467 void 468 xen_hvm_resume(bool suspend_cancelled) 469 { 470 471 xen_hvm_init(suspend_cancelled ? 472 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); 473 474 /* Register vcpu_info area for CPU#0. */ 475 xen_hvm_cpu_init(); 476 } 477 478 static void 479 xen_hvm_sysinit(void *arg __unused) 480 { 481 xen_hvm_init(XEN_HVM_INIT_LATE); 482 } 483 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); 484 485 static void 486 xen_hvm_cpu_init(void) 487 { 488 uint32_t regs[4]; 489 int rc; 490 491 if (!xen_domain()) 492 return; 493 494 if (DPCPU_GET(vcpu_info) != NULL) { 495 /* 496 * vcpu_info is already set. We're resuming 497 * from a failed migration and our pre-suspend 498 * configuration is still valid. 499 */ 500 return; 501 } 502 503 /* 504 * Set vCPU ID. If available fetch the ID from CPUID, if not just use 505 * the ACPI ID. 506 */ 507 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf")); 508 cpuid_count(hv_base + 4, 0, regs); 509 KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) || 510 !xen_pv_domain(), 511 ("Xen PV domain without vcpu_id in cpuid")); 512 PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? 513 regs[1] : PCPU_GET(acpi_id)); 514 515 if (xen_evtchn_needs_ack && !IS_BSP()) { 516 /* 517 * Setup the per-vpcu event channel upcall vector. This is only 518 * required when using the new HVMOP_set_evtchn_upcall_vector 519 * hypercall, which allows using a different vector for each 520 * vCPU. Note that FreeBSD uses the same vector for all vCPUs 521 * because it's not dynamically allocated. 522 */ 523 rc = set_percpu_callback(PCPU_GET(vcpu_id)); 524 if (rc != 0) 525 panic("Event channel upcall vector setup failed: %d", 526 rc); 527 } 528 529 xen_setup_vcpu_info(); 530 } 531 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); 532 533 bool 534 xen_has_iommu_maps(void) 535 { 536 uint32_t regs[4]; 537 538 KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf")); 539 cpuid_count(hv_base + 4, 0, regs); 540 541 return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS); 542 } 543 544 int 545 xen_arch_init_physmem(device_t dev, struct rman *mem) 546 { 547 static struct bios_smap smap[128]; 548 struct xen_memory_map memmap = { 549 .nr_entries = nitems(smap), 550 }; 551 unsigned int i; 552 int error; 553 554 set_xen_guest_handle(memmap.buffer, smap); 555 error = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); 556 if (error != 0) 557 return (0); 558 559 /* 560 * Fill with UNUSABLE regions, as it's always fine to use those for 561 * foreign mappings, they will never be populated. 562 */ 563 for (i = 0; i < memmap.nr_entries; i++) { 564 const vm_paddr_t max_phys = cpu_getmaxphyaddr(); 565 vm_paddr_t start, end; 566 567 if (smap[i].type != SMAP_TYPE_ACPI_ERROR) 568 continue; 569 570 start = round_page(smap[i].base); 571 /* In 32bit mode we possibly need to truncate the addresses. */ 572 end = MIN(trunc_page(smap[i].base + smap[i].length) - 1, 573 max_phys); 574 575 if (start >= end) 576 continue; 577 578 if (bootverbose != 0) 579 device_printf(dev, 580 "scratch mapping region @ [%016jx, %016jx]\n", 581 start, end); 582 583 error = rman_manage_region(mem, start, end); 584 if (error != 0) 585 device_printf(dev, 586 "unable to add scratch region [%016jx, %016jx]: %d\n", 587 start, end, error); 588 } 589 590 return (0); 591 } 592