1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008, 2013 Citrix Systems, Inc. 5 * Copyright (c) 2012 Spectra Logic Corporation 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/kernel.h> 33 #include <sys/malloc.h> 34 #include <sys/proc.h> 35 #include <sys/smp.h> 36 #include <sys/systm.h> 37 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 #include <vm/vm_param.h> 41 42 #include <dev/pci/pcivar.h> 43 44 #include <machine/cpufunc.h> 45 #include <machine/cpu.h> 46 #include <machine/smp.h> 47 48 #include <x86/apicreg.h> 49 50 #include <xen/xen-os.h> 51 #include <xen/error.h> 52 #include <xen/features.h> 53 #include <xen/gnttab.h> 54 #include <xen/hypervisor.h> 55 #include <xen/hvm.h> 56 #include <xen/xen_intr.h> 57 58 #include <contrib/xen/arch-x86/cpuid.h> 59 #include <contrib/xen/hvm/params.h> 60 #include <contrib/xen/vcpu.h> 61 62 /*--------------------------- Forward Declarations ---------------------------*/ 63 static void xen_hvm_cpu_init(void); 64 65 /*-------------------------------- Global Data -------------------------------*/ 66 #ifdef SMP 67 struct cpu_ops xen_hvm_cpu_ops = { 68 .cpu_init = xen_hvm_cpu_init, 69 .cpu_resume = xen_hvm_cpu_init 70 }; 71 #endif 72 73 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); 74 75 /** 76 * If non-zero, the hypervisor has been configured to use a direct 77 * IDT event callback for interrupt injection. 78 */ 79 int xen_vector_callback_enabled; 80 81 /** 82 * Signal whether the vector injected for the event channel upcall requires to 83 * be EOI'ed on the local APIC. 84 */ 85 bool xen_evtchn_needs_ack; 86 87 /*------------------------------- Per-CPU Data -------------------------------*/ 88 DPCPU_DECLARE(struct vcpu_info *, vcpu_info); 89 90 /*------------------------------ Sysctl tunables -----------------------------*/ 91 int xen_disable_pv_disks = 0; 92 int xen_disable_pv_nics = 0; 93 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); 94 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); 95 96 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ 97 98 uint32_t xen_cpuid_base; 99 100 static uint32_t 101 xen_hvm_cpuid_base(void) 102 { 103 uint32_t base, regs[4]; 104 105 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 106 do_cpuid(base, regs); 107 if (!memcmp("XenVMMXenVMM", ®s[1], 12) 108 && (regs[0] - base) >= 2) 109 return (base); 110 } 111 return (0); 112 } 113 114 static void 115 hypervisor_quirks(unsigned int major, unsigned int minor) 116 { 117 #ifdef SMP 118 if (((major < 4) || (major == 4 && minor <= 5)) && 119 msix_disable_migration == -1) { 120 /* 121 * Xen hypervisors prior to 4.6.0 do not properly 122 * handle updates to enabled MSI-X table entries, 123 * so disable MSI-X interrupt migration in that 124 * case. 125 */ 126 if (bootverbose) 127 printf( 128 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" 129 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); 130 msix_disable_migration = 1; 131 } 132 #endif 133 } 134 135 static void 136 hypervisor_version(void) 137 { 138 uint32_t regs[4]; 139 int major, minor; 140 141 do_cpuid(xen_cpuid_base + 1, regs); 142 143 major = regs[0] >> 16; 144 minor = regs[0] & 0xffff; 145 printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); 146 147 hypervisor_quirks(major, minor); 148 } 149 150 /* 151 * Allocate and fill in the hypcall page. 152 */ 153 int 154 xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type) 155 { 156 uint32_t regs[4]; 157 158 /* Legacy PVH will get here without the cpuid leaf being set. */ 159 if (xen_cpuid_base == 0) 160 xen_cpuid_base = xen_hvm_cpuid_base(); 161 if (xen_cpuid_base == 0) 162 return (ENXIO); 163 164 if (xen_domain() && init_type == XEN_HVM_INIT_LATE) { 165 /* 166 * If the domain type is already set we can assume that the 167 * hypercall page has been populated too, so just print the 168 * version (and apply any quirks) and exit. 169 */ 170 hypervisor_version(); 171 return 0; 172 } 173 174 if (init_type == XEN_HVM_INIT_LATE) 175 hypervisor_version(); 176 177 /* 178 * Find the hypercall pages. 179 */ 180 do_cpuid(xen_cpuid_base + 2, regs); 181 if (regs[0] != 1) 182 return (EINVAL); 183 184 wrmsr(regs[1], (init_type == XEN_HVM_INIT_EARLY) 185 ? (vm_paddr_t)((uintptr_t)&hypercall_page - KERNBASE) 186 : vtophys(&hypercall_page)); 187 188 return (0); 189 } 190 191 static void 192 xen_hvm_init_shared_info_page(void) 193 { 194 struct xen_add_to_physmap xatp; 195 196 if (xen_pv_domain()) { 197 /* 198 * Already setup in the PV case, shared_info is passed inside 199 * of the start_info struct at start of day. 200 */ 201 return; 202 } 203 204 if (HYPERVISOR_shared_info == NULL) { 205 HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); 206 if (HYPERVISOR_shared_info == NULL) 207 panic("Unable to allocate Xen shared info page"); 208 } 209 210 xatp.domid = DOMID_SELF; 211 xatp.idx = 0; 212 xatp.space = XENMAPSPACE_shared_info; 213 xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; 214 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 215 panic("HYPERVISOR_memory_op failed"); 216 } 217 218 static int 219 set_percpu_callback(unsigned int vcpu) 220 { 221 struct xen_hvm_evtchn_upcall_vector vec; 222 int error; 223 224 vec.vcpu = vcpu; 225 vec.vector = IDT_EVTCHN; 226 error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec); 227 228 return (error != 0 ? xen_translate_error(error) : 0); 229 } 230 231 /* 232 * Tell the hypervisor how to contact us for event channel callbacks. 233 */ 234 void 235 xen_hvm_set_callback(device_t dev) 236 { 237 struct xen_hvm_param xhp; 238 int irq; 239 240 if (xen_vector_callback_enabled) 241 return; 242 243 xhp.domid = DOMID_SELF; 244 xhp.index = HVM_PARAM_CALLBACK_IRQ; 245 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { 246 int error; 247 248 error = set_percpu_callback(0); 249 if (error == 0) { 250 xen_evtchn_needs_ack = true; 251 /* Trick toolstack to think we are enlightened */ 252 xhp.value = 1; 253 } else 254 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); 255 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); 256 if (error == 0) { 257 xen_vector_callback_enabled = 1; 258 return; 259 } else if (xen_evtchn_needs_ack) 260 panic("Unable to setup fake HVM param: %d", error); 261 262 printf("Xen HVM callback vector registration failed (%d). " 263 "Falling back to emulated device interrupt\n", error); 264 } 265 xen_vector_callback_enabled = 0; 266 if (dev == NULL) { 267 /* 268 * Called from early boot or resume. 269 * xenpci will invoke us again later. 270 */ 271 return; 272 } 273 274 irq = pci_get_irq(dev); 275 if (irq < 16) { 276 xhp.value = HVM_CALLBACK_GSI(irq); 277 } else { 278 u_int slot; 279 u_int pin; 280 281 slot = pci_get_slot(dev); 282 pin = pci_get_intpin(dev) - 1; 283 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); 284 } 285 286 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) 287 panic("Can't set evtchn callback"); 288 } 289 290 #define XEN_MAGIC_IOPORT 0x10 291 enum { 292 XMI_MAGIC = 0x49d2, 293 XMI_UNPLUG_IDE_DISKS = 0x01, 294 XMI_UNPLUG_NICS = 0x02, 295 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 296 }; 297 298 static void 299 xen_hvm_disable_emulated_devices(void) 300 { 301 u_short disable_devs = 0; 302 303 if (xen_pv_domain()) { 304 /* 305 * No emulated devices in the PV case, so no need to unplug 306 * anything. 307 */ 308 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) 309 printf("PV devices cannot be disabled in PV guests\n"); 310 return; 311 } 312 313 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) 314 return; 315 316 if (xen_disable_pv_disks == 0) { 317 if (bootverbose) 318 printf("XEN: disabling emulated disks\n"); 319 disable_devs |= XMI_UNPLUG_IDE_DISKS; 320 } 321 if (xen_disable_pv_nics == 0) { 322 if (bootverbose) 323 printf("XEN: disabling emulated nics\n"); 324 disable_devs |= XMI_UNPLUG_NICS; 325 } 326 327 if (disable_devs != 0) 328 outw(XEN_MAGIC_IOPORT, disable_devs); 329 } 330 331 static void 332 xen_hvm_init(enum xen_hvm_init_type init_type) 333 { 334 int error; 335 int i; 336 337 if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) 338 return; 339 340 error = xen_hvm_init_hypercall_stubs(init_type); 341 342 switch (init_type) { 343 case XEN_HVM_INIT_LATE: 344 if (error != 0) 345 return; 346 347 /* 348 * If the Xen domain type is not set at this point 349 * it means we are inside a (PV)HVM guest, because 350 * for PVH the guest type is set much earlier 351 * (see hammer_time_xen). 352 */ 353 if (!xen_domain()) 354 vm_guest = VM_GUEST_XEN; 355 356 setup_xen_features(); 357 #ifdef SMP 358 cpu_ops = xen_hvm_cpu_ops; 359 #endif 360 break; 361 case XEN_HVM_INIT_RESUME: 362 if (error != 0) 363 panic("Unable to init Xen hypercall stubs on resume"); 364 365 /* Clear stale vcpu_info. */ 366 CPU_FOREACH(i) 367 DPCPU_ID_SET(i, vcpu_info, NULL); 368 break; 369 default: 370 panic("Unsupported HVM initialization type"); 371 } 372 373 xen_vector_callback_enabled = 0; 374 xen_evtchn_needs_ack = false; 375 xen_hvm_set_callback(NULL); 376 377 /* 378 * On (PV)HVM domains we need to request the hypervisor to 379 * fill the shared info page, for PVH guest the shared_info page 380 * is passed inside the start_info struct and is already set, so this 381 * functions are no-ops. 382 */ 383 xen_hvm_init_shared_info_page(); 384 xen_hvm_disable_emulated_devices(); 385 } 386 387 void 388 xen_hvm_suspend(void) 389 { 390 } 391 392 void 393 xen_hvm_resume(bool suspend_cancelled) 394 { 395 396 xen_hvm_init(suspend_cancelled ? 397 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); 398 399 /* Register vcpu_info area for CPU#0. */ 400 xen_hvm_cpu_init(); 401 } 402 403 static void 404 xen_hvm_sysinit(void *arg __unused) 405 { 406 xen_hvm_init(XEN_HVM_INIT_LATE); 407 } 408 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); 409 410 static void 411 xen_hvm_cpu_init(void) 412 { 413 uint32_t regs[4]; 414 int rc; 415 416 if (!xen_domain()) 417 return; 418 419 if (DPCPU_GET(vcpu_info) != NULL) { 420 /* 421 * vcpu_info is already set. We're resuming 422 * from a failed migration and our pre-suspend 423 * configuration is still valid. 424 */ 425 return; 426 } 427 428 /* 429 * Set vCPU ID. If available fetch the ID from CPUID, if not just use 430 * the ACPI ID. 431 */ 432 KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf")); 433 cpuid_count(xen_cpuid_base + 4, 0, regs); 434 KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) || 435 !xen_pv_domain(), 436 ("Xen PV domain without vcpu_id in cpuid")); 437 PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? 438 regs[1] : PCPU_GET(acpi_id)); 439 440 if (xen_evtchn_needs_ack && !IS_BSP()) { 441 /* 442 * Setup the per-vpcu event channel upcall vector. This is only 443 * required when using the new HVMOP_set_evtchn_upcall_vector 444 * hypercall, which allows using a different vector for each 445 * vCPU. Note that FreeBSD uses the same vector for all vCPUs 446 * because it's not dynamically allocated. 447 */ 448 rc = set_percpu_callback(PCPU_GET(vcpu_id)); 449 if (rc != 0) 450 panic("Event channel upcall vector setup failed: %d", 451 rc); 452 } 453 454 xen_setup_vcpu_info(); 455 } 456 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); 457 458 bool 459 xen_has_iommu_maps(void) 460 { 461 uint32_t regs[4]; 462 463 KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf")); 464 cpuid_count(xen_cpuid_base + 4, 0, regs); 465 466 return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS); 467 } 468