1 /* 2 * Copyright (c) 2008, 2013 Citrix Systems, Inc. 3 * Copyright (c) 2012 Spectra Logic Corporation 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/bus.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/proc.h> 36 #include <sys/smp.h> 37 #include <sys/systm.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 42 #include <dev/pci/pcivar.h> 43 44 #include <machine/cpufunc.h> 45 #include <machine/cpu.h> 46 #include <machine/smp.h> 47 48 #include <x86/apicreg.h> 49 50 #include <xen/xen-os.h> 51 #include <xen/features.h> 52 #include <xen/gnttab.h> 53 #include <xen/hypervisor.h> 54 #include <xen/hvm.h> 55 #include <xen/xen_intr.h> 56 57 #include <xen/interface/hvm/params.h> 58 #include <xen/interface/vcpu.h> 59 60 /*--------------------------- Forward Declarations ---------------------------*/ 61 #ifdef SMP 62 static driver_filter_t xen_smp_rendezvous_action; 63 static driver_filter_t xen_invltlb; 64 static driver_filter_t xen_invlpg; 65 static driver_filter_t xen_invlrng; 66 static driver_filter_t xen_invlcache; 67 #ifdef __i386__ 68 static driver_filter_t xen_lazypmap; 69 #endif 70 static driver_filter_t xen_ipi_bitmap_handler; 71 static driver_filter_t xen_cpustop_handler; 72 static driver_filter_t xen_cpususpend_handler; 73 static driver_filter_t xen_cpustophard_handler; 74 static void xen_ipi_vectored(u_int vector, int dest); 75 static void xen_hvm_cpu_resume(void); 76 #endif 77 static void xen_hvm_cpu_init(void); 78 79 /*---------------------------- Extern Declarations ---------------------------*/ 80 #ifdef __i386__ 81 extern void pmap_lazyfix_action(void); 82 #endif 83 #ifdef __amd64__ 84 extern int pmap_pcid_enabled; 85 #endif 86 87 /* Variables used by mp_machdep to perform the bitmap IPI */ 88 extern volatile u_int cpu_ipi_pending[MAXCPU]; 89 90 /*---------------------------------- Macros ----------------------------------*/ 91 #define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) 92 93 /*-------------------------------- Local Types -------------------------------*/ 94 enum xen_hvm_init_type { 95 XEN_HVM_INIT_COLD, 96 XEN_HVM_INIT_CANCELLED_SUSPEND, 97 XEN_HVM_INIT_RESUME 98 }; 99 100 struct xen_ipi_handler 101 { 102 driver_filter_t *filter; 103 const char *description; 104 }; 105 106 /*-------------------------------- Global Data -------------------------------*/ 107 enum xen_domain_type xen_domain_type = XEN_NATIVE; 108 109 #ifdef SMP 110 struct cpu_ops xen_hvm_cpu_ops = { 111 .ipi_vectored = lapic_ipi_vectored, 112 .cpu_init = xen_hvm_cpu_init, 113 .cpu_resume = xen_hvm_cpu_resume 114 }; 115 #endif 116 117 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); 118 119 #ifdef SMP 120 static struct xen_ipi_handler xen_ipis[] = 121 { 122 [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, 123 [IPI_TO_IDX(IPI_INVLTLB)] = { xen_invltlb, "itlb"}, 124 [IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" }, 125 [IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" }, 126 [IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" }, 127 #ifdef __i386__ 128 [IPI_TO_IDX(IPI_LAZYPMAP)] = { xen_lazypmap, "lp" }, 129 #endif 130 [IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" }, 131 [IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" }, 132 [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, 133 [IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" }, 134 }; 135 #endif 136 137 /** 138 * If non-zero, the hypervisor has been configured to use a direct 139 * IDT event callback for interrupt injection. 140 */ 141 int xen_vector_callback_enabled; 142 143 /*------------------------------- Per-CPU Data -------------------------------*/ 144 DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); 145 DPCPU_DEFINE(struct vcpu_info *, vcpu_info); 146 #ifdef SMP 147 DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); 148 #endif 149 150 /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ 151 /** Hypercall table accessed via HYPERVISOR_*_op() methods. */ 152 extern char *hypercall_page; 153 shared_info_t *HYPERVISOR_shared_info; 154 start_info_t *HYPERVISOR_start_info; 155 156 #ifdef SMP 157 /*---------------------------- XEN PV IPI Handlers ---------------------------*/ 158 /* 159 * This are C clones of the ASM functions found in apic_vector.s 160 */ 161 static int 162 xen_ipi_bitmap_handler(void *arg) 163 { 164 struct trapframe *frame; 165 166 frame = arg; 167 ipi_bitmap_handler(*frame); 168 return (FILTER_HANDLED); 169 } 170 171 static int 172 xen_smp_rendezvous_action(void *arg) 173 { 174 #ifdef COUNT_IPIS 175 (*ipi_rendezvous_counts[PCPU_GET(cpuid)])++; 176 #endif /* COUNT_IPIS */ 177 178 smp_rendezvous_action(); 179 return (FILTER_HANDLED); 180 } 181 182 static int 183 xen_invltlb(void *arg) 184 { 185 186 invltlb_handler(); 187 return (FILTER_HANDLED); 188 } 189 190 #ifdef __amd64__ 191 static int 192 xen_invltlb_pcid(void *arg) 193 { 194 195 invltlb_pcid_handler(); 196 return (FILTER_HANDLED); 197 } 198 #endif 199 200 static int 201 xen_invlpg(void *arg) 202 { 203 204 invlpg_handler(); 205 return (FILTER_HANDLED); 206 } 207 208 #ifdef __amd64__ 209 static int 210 xen_invlpg_pcid(void *arg) 211 { 212 213 invlpg_pcid_handler(); 214 return (FILTER_HANDLED); 215 } 216 #endif 217 218 static int 219 xen_invlrng(void *arg) 220 { 221 222 invlrng_handler(); 223 return (FILTER_HANDLED); 224 } 225 226 static int 227 xen_invlcache(void *arg) 228 { 229 230 invlcache_handler(); 231 return (FILTER_HANDLED); 232 } 233 234 #ifdef __i386__ 235 static int 236 xen_lazypmap(void *arg) 237 { 238 239 pmap_lazyfix_action(); 240 return (FILTER_HANDLED); 241 } 242 #endif 243 244 static int 245 xen_cpustop_handler(void *arg) 246 { 247 248 cpustop_handler(); 249 return (FILTER_HANDLED); 250 } 251 252 static int 253 xen_cpususpend_handler(void *arg) 254 { 255 256 cpususpend_handler(); 257 return (FILTER_HANDLED); 258 } 259 260 static int 261 xen_cpustophard_handler(void *arg) 262 { 263 264 ipi_nmi_handler(); 265 return (FILTER_HANDLED); 266 } 267 268 /* Xen PV IPI sender */ 269 static void 270 xen_ipi_vectored(u_int vector, int dest) 271 { 272 xen_intr_handle_t *ipi_handle; 273 int ipi_idx, to_cpu, self; 274 275 ipi_idx = IPI_TO_IDX(vector); 276 if (ipi_idx > nitems(xen_ipis)) 277 panic("IPI out of range"); 278 279 switch(dest) { 280 case APIC_IPI_DEST_SELF: 281 ipi_handle = DPCPU_GET(ipi_handle); 282 xen_intr_signal(ipi_handle[ipi_idx]); 283 break; 284 case APIC_IPI_DEST_ALL: 285 CPU_FOREACH(to_cpu) { 286 ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); 287 xen_intr_signal(ipi_handle[ipi_idx]); 288 } 289 break; 290 case APIC_IPI_DEST_OTHERS: 291 self = PCPU_GET(cpuid); 292 CPU_FOREACH(to_cpu) { 293 if (to_cpu != self) { 294 ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); 295 xen_intr_signal(ipi_handle[ipi_idx]); 296 } 297 } 298 break; 299 default: 300 to_cpu = apic_cpuid(dest); 301 ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); 302 xen_intr_signal(ipi_handle[ipi_idx]); 303 break; 304 } 305 } 306 307 /* XEN diverged cpu operations */ 308 static void 309 xen_hvm_cpu_resume(void) 310 { 311 u_int cpuid = PCPU_GET(cpuid); 312 313 /* 314 * Reset pending bitmap IPIs, because Xen doesn't preserve pending 315 * event channels on migration. 316 */ 317 cpu_ipi_pending[cpuid] = 0; 318 319 /* register vcpu_info area */ 320 xen_hvm_cpu_init(); 321 } 322 323 static void 324 xen_cpu_ipi_init(int cpu) 325 { 326 xen_intr_handle_t *ipi_handle; 327 const struct xen_ipi_handler *ipi; 328 device_t dev; 329 int idx, rc; 330 331 ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); 332 dev = pcpu_find(cpu)->pc_device; 333 KASSERT((dev != NULL), ("NULL pcpu device_t")); 334 335 for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { 336 337 if (ipi->filter == NULL) { 338 ipi_handle[idx] = NULL; 339 continue; 340 } 341 342 rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter, 343 INTR_TYPE_TTY, &ipi_handle[idx]); 344 if (rc != 0) 345 panic("Unable to allocate a XEN IPI port"); 346 xen_intr_describe(ipi_handle[idx], "%s", ipi->description); 347 } 348 } 349 350 static void 351 xen_setup_cpus(void) 352 { 353 int i; 354 355 if (!xen_vector_callback_enabled) 356 return; 357 358 #ifdef __amd64__ 359 if (pmap_pcid_enabled) { 360 xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid; 361 xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid; 362 } 363 #endif 364 CPU_FOREACH(i) 365 xen_cpu_ipi_init(i); 366 367 /* Set the xen pv ipi ops to replace the native ones */ 368 cpu_ops.ipi_vectored = xen_ipi_vectored; 369 } 370 #endif 371 372 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ 373 static uint32_t 374 xen_hvm_cpuid_base(void) 375 { 376 uint32_t base, regs[4]; 377 378 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 379 do_cpuid(base, regs); 380 if (!memcmp("XenVMMXenVMM", ®s[1], 12) 381 && (regs[0] - base) >= 2) 382 return (base); 383 } 384 return (0); 385 } 386 387 /* 388 * Allocate and fill in the hypcall page. 389 */ 390 static int 391 xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type) 392 { 393 uint32_t base, regs[4]; 394 int i; 395 396 if (xen_pv_domain()) { 397 /* hypercall page is already set in the PV case */ 398 return (0); 399 } 400 401 base = xen_hvm_cpuid_base(); 402 if (base == 0) 403 return (ENXIO); 404 405 if (init_type == XEN_HVM_INIT_COLD) { 406 do_cpuid(base + 1, regs); 407 printf("XEN: Hypervisor version %d.%d detected.\n", 408 regs[0] >> 16, regs[0] & 0xffff); 409 } 410 411 /* 412 * Find the hypercall pages. 413 */ 414 do_cpuid(base + 2, regs); 415 416 for (i = 0; i < regs[0]; i++) 417 wrmsr(regs[1], vtophys(&hypercall_page + i * PAGE_SIZE) + i); 418 419 return (0); 420 } 421 422 static void 423 xen_hvm_init_shared_info_page(void) 424 { 425 struct xen_add_to_physmap xatp; 426 427 if (xen_pv_domain()) { 428 /* 429 * Already setup in the PV case, shared_info is passed inside 430 * of the start_info struct at start of day. 431 */ 432 return; 433 } 434 435 if (HYPERVISOR_shared_info == NULL) { 436 HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); 437 if (HYPERVISOR_shared_info == NULL) 438 panic("Unable to allocate Xen shared info page"); 439 } 440 441 xatp.domid = DOMID_SELF; 442 xatp.idx = 0; 443 xatp.space = XENMAPSPACE_shared_info; 444 xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; 445 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 446 panic("HYPERVISOR_memory_op failed"); 447 } 448 449 /* 450 * Tell the hypervisor how to contact us for event channel callbacks. 451 */ 452 void 453 xen_hvm_set_callback(device_t dev) 454 { 455 struct xen_hvm_param xhp; 456 int irq; 457 458 if (xen_vector_callback_enabled) 459 return; 460 461 xhp.domid = DOMID_SELF; 462 xhp.index = HVM_PARAM_CALLBACK_IRQ; 463 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { 464 int error; 465 466 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); 467 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); 468 if (error == 0) { 469 xen_vector_callback_enabled = 1; 470 return; 471 } 472 printf("Xen HVM callback vector registration failed (%d). " 473 "Falling back to emulated device interrupt\n", error); 474 } 475 xen_vector_callback_enabled = 0; 476 if (dev == NULL) { 477 /* 478 * Called from early boot or resume. 479 * xenpci will invoke us again later. 480 */ 481 return; 482 } 483 484 irq = pci_get_irq(dev); 485 if (irq < 16) { 486 xhp.value = HVM_CALLBACK_GSI(irq); 487 } else { 488 u_int slot; 489 u_int pin; 490 491 slot = pci_get_slot(dev); 492 pin = pci_get_intpin(dev) - 1; 493 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); 494 } 495 496 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) 497 panic("Can't set evtchn callback"); 498 } 499 500 #define XEN_MAGIC_IOPORT 0x10 501 enum { 502 XMI_MAGIC = 0x49d2, 503 XMI_UNPLUG_IDE_DISKS = 0x01, 504 XMI_UNPLUG_NICS = 0x02, 505 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 506 }; 507 508 static void 509 xen_hvm_disable_emulated_devices(void) 510 { 511 512 if (xen_pv_domain()) { 513 /* 514 * No emulated devices in the PV case, so no need to unplug 515 * anything. 516 */ 517 return; 518 } 519 520 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) 521 return; 522 523 if (bootverbose) 524 printf("XEN: Disabling emulated block and network devices\n"); 525 outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS); 526 } 527 528 static void 529 xen_hvm_init(enum xen_hvm_init_type init_type) 530 { 531 int error; 532 int i; 533 534 if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) 535 return; 536 537 error = xen_hvm_init_hypercall_stubs(init_type); 538 539 switch (init_type) { 540 case XEN_HVM_INIT_COLD: 541 if (error != 0) 542 return; 543 544 /* 545 * If xen_domain_type is not set at this point 546 * it means we are inside a (PV)HVM guest, because 547 * for PVH the guest type is set much earlier 548 * (see hammer_time_xen). 549 */ 550 if (!xen_domain()) { 551 xen_domain_type = XEN_HVM_DOMAIN; 552 vm_guest = VM_GUEST_XEN; 553 } 554 555 setup_xen_features(); 556 #ifdef SMP 557 cpu_ops = xen_hvm_cpu_ops; 558 #endif 559 break; 560 case XEN_HVM_INIT_RESUME: 561 if (error != 0) 562 panic("Unable to init Xen hypercall stubs on resume"); 563 564 /* Clear stale vcpu_info. */ 565 CPU_FOREACH(i) 566 DPCPU_ID_SET(i, vcpu_info, NULL); 567 break; 568 default: 569 panic("Unsupported HVM initialization type"); 570 } 571 572 xen_vector_callback_enabled = 0; 573 xen_hvm_set_callback(NULL); 574 575 /* 576 * On (PV)HVM domains we need to request the hypervisor to 577 * fill the shared info page, for PVH guest the shared_info page 578 * is passed inside the start_info struct and is already set, so this 579 * functions are no-ops. 580 */ 581 xen_hvm_init_shared_info_page(); 582 xen_hvm_disable_emulated_devices(); 583 } 584 585 void 586 xen_hvm_suspend(void) 587 { 588 } 589 590 void 591 xen_hvm_resume(bool suspend_cancelled) 592 { 593 594 xen_hvm_init(suspend_cancelled ? 595 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); 596 597 /* Register vcpu_info area for CPU#0. */ 598 xen_hvm_cpu_init(); 599 } 600 601 static void 602 xen_hvm_sysinit(void *arg __unused) 603 { 604 xen_hvm_init(XEN_HVM_INIT_COLD); 605 } 606 607 static void 608 xen_set_vcpu_id(void) 609 { 610 struct pcpu *pc; 611 int i; 612 613 if (!xen_hvm_domain()) 614 return; 615 616 /* Set vcpu_id to acpi_id */ 617 CPU_FOREACH(i) { 618 pc = pcpu_find(i); 619 pc->pc_vcpu_id = pc->pc_acpi_id; 620 if (bootverbose) 621 printf("XEN: CPU %u has VCPU ID %u\n", 622 i, pc->pc_vcpu_id); 623 } 624 } 625 626 static void 627 xen_hvm_cpu_init(void) 628 { 629 struct vcpu_register_vcpu_info info; 630 struct vcpu_info *vcpu_info; 631 int cpu, rc; 632 633 if (!xen_domain()) 634 return; 635 636 if (DPCPU_GET(vcpu_info) != NULL) { 637 /* 638 * vcpu_info is already set. We're resuming 639 * from a failed migration and our pre-suspend 640 * configuration is still valid. 641 */ 642 return; 643 } 644 645 vcpu_info = DPCPU_PTR(vcpu_local_info); 646 cpu = PCPU_GET(vcpu_id); 647 info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; 648 info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); 649 650 rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); 651 if (rc != 0) 652 DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]); 653 else 654 DPCPU_SET(vcpu_info, vcpu_info); 655 } 656 657 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); 658 #ifdef SMP 659 /* We need to setup IPIs before APs are started */ 660 SYSINIT(xen_setup_cpus, SI_SUB_SMP-1, SI_ORDER_FIRST, xen_setup_cpus, NULL); 661 #endif 662 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); 663 SYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL); 664