1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/modctl.h> 27 #include <sys/types.h> 28 #include <sys/archsystm.h> 29 #include <sys/machsystm.h> 30 #include <sys/sunndi.h> 31 #include <sys/sunddi.h> 32 #include <sys/ddi_subrdefs.h> 33 #include <sys/xpv_support.h> 34 #include <sys/xen_errno.h> 35 #include <sys/hypervisor.h> 36 #include <sys/gnttab.h> 37 #include <sys/xenbus_comms.h> 38 #include <sys/xenbus_impl.h> 39 #include <xen/sys/xendev.h> 40 #include <sys/sysmacros.h> 41 #include <sys/x86_archext.h> 42 #include <sys/mman.h> 43 #include <sys/stat.h> 44 #include <sys/conf.h> 45 #include <sys/devops.h> 46 #include <sys/pc_mmu.h> 47 #include <sys/cmn_err.h> 48 #include <sys/cpr.h> 49 #include <sys/ddi.h> 50 #include <vm/seg_kmem.h> 51 #include <vm/as.h> 52 #include <vm/hat_pte.h> 53 #include <vm/hat_i86.h> 54 55 #define XPV_MINOR 0 56 #define XPV_BUFSIZE 128 57 58 /* 59 * This structure is ordinarily constructed by Xen. In the HVM world, we 60 * manually fill in the few fields the PV drivers need. 61 */ 62 start_info_t *xen_info = NULL; 63 64 /* Xen version number. */ 65 int xen_major, xen_minor; 66 67 /* Metadata page shared between domain and Xen */ 68 shared_info_t *HYPERVISOR_shared_info = NULL; 69 70 /* Page containing code to issue hypercalls. */ 71 extern caddr_t hypercall_page; 72 73 /* Is the hypervisor 64-bit? */ 74 int xen_is_64bit = -1; 75 76 /* virtual addr for the store_mfn page */ 77 caddr_t xb_addr; 78 79 dev_info_t *xpv_dip; 80 static dev_info_t *xpvd_dip; 81 82 /* saved pfn of the shared info page */ 83 static pfn_t shared_info_frame; 84 85 #ifdef DEBUG 86 int xen_suspend_debug; 87 88 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf 89 #else 90 #define SUSPEND_DEBUG(...) 91 #endif 92 93 /* 94 * Forward declarations 95 */ 96 static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 97 static int xpv_attach(dev_info_t *, ddi_attach_cmd_t); 98 static int xpv_detach(dev_info_t *, ddi_detach_cmd_t); 99 static int xpv_open(dev_t *, int, int, cred_t *); 100 static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 101 102 static struct cb_ops xpv_cb_ops = { 103 xpv_open, 104 nulldev, /* close */ 105 nodev, /* strategy */ 106 nodev, /* print */ 107 nodev, /* dump */ 108 nodev, /* read */ 109 nodev, /* write */ 110 xpv_ioctl, /* ioctl */ 111 nodev, /* devmap */ 112 nodev, /* mmap */ 113 nodev, /* segmap */ 114 nochpoll, /* poll */ 115 ddi_prop_op, 116 NULL, 117 D_MP, 118 CB_REV, 119 NULL, 120 NULL 121 }; 122 123 static struct dev_ops xpv_dv_ops = { 124 DEVO_REV, 125 0, 126 xpv_getinfo, 127 nulldev, /* identify */ 128 nulldev, /* probe */ 129 xpv_attach, 130 xpv_detach, 131 nodev, /* reset */ 132 &xpv_cb_ops, 133 NULL, /* struct bus_ops */ 134 NULL /* power */ 135 }; 136 137 static struct modldrv modldrv = { 138 &mod_driverops, 139 "xpv driver", 140 &xpv_dv_ops 141 }; 142 143 static struct modlinkage modl = { 144 MODREV_1, 145 { 146 (void *)&modldrv, 147 NULL /* null termination */ 148 } 149 }; 150 151 static ddi_dma_attr_t xpv_dma_attr = { 152 DMA_ATTR_V0, /* version of this structure */ 153 0, /* lowest usable address */ 154 0xffffffffffffffffULL, /* highest usable address */ 155 0x7fffffff, /* maximum DMAable byte count */ 156 MMU_PAGESIZE, /* alignment in bytes */ 157 0x7ff, /* bitmap of burst sizes */ 158 1, /* minimum transfer */ 159 0xffffffffU, /* maximum transfer */ 160 0x7fffffffULL, /* maximum segment length */ 161 1, /* maximum number of segments */ 162 1, /* granularity */ 163 0, /* flags (reserved) */ 164 }; 165 166 static ddi_device_acc_attr_t xpv_accattr = { 167 DDI_DEVICE_ATTR_V0, 168 DDI_NEVERSWAP_ACC, 169 DDI_STRICTORDER_ACC 170 }; 171 172 #define MAX_ALLOCATIONS 10 173 static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS]; 174 static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS]; 175 static int xen_alloc_cnt = 0; 176 177 void * 178 xen_alloc_pages(pgcnt_t cnt) 179 { 180 size_t len; 181 int a = xen_alloc_cnt++; 182 caddr_t addr; 183 184 ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS); 185 if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0, 186 &xpv_dma_handle[a]) != DDI_SUCCESS) 187 return (NULL); 188 189 if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt, 190 &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, 191 &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) { 192 ddi_dma_free_handle(&xpv_dma_handle[a]); 193 cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices"); 194 return (NULL); 195 } 196 return (addr); 197 } 198 199 /* 200 * This function is invoked twice, first time with reprogram=0 to set up 201 * the xpvd portion of the device tree. The second time it is ignored. 202 */ 203 static void 204 xpv_enumerate(int reprogram) 205 { 206 dev_info_t *dip; 207 208 if (reprogram != 0) 209 return; 210 211 ndi_devi_alloc_sleep(ddi_root_node(), "xpvd", 212 (pnode_t)DEVI_SID_NODEID, &dip); 213 214 (void) ndi_devi_bind_driver(dip, 0); 215 216 /* 217 * Too early to enumerate split device drivers in domU 218 * since we need to create taskq thread during enumeration. 219 * So, we only enumerate softdevs and console here. 220 */ 221 xendev_enum_all(dip, B_TRUE); 222 } 223 224 /* 225 * Translate a hypervisor errcode to a Solaris error code. 226 */ 227 int 228 xen_xlate_errcode(int error) 229 { 230 #define CASE(num) case X_##num: error = num; break 231 232 switch (-error) { 233 CASE(EPERM); CASE(ENOENT); CASE(ESRCH); 234 CASE(EINTR); CASE(EIO); CASE(ENXIO); 235 CASE(E2BIG); CASE(ENOMEM); CASE(EACCES); 236 CASE(EFAULT); CASE(EBUSY); CASE(EEXIST); 237 CASE(ENODEV); CASE(EISDIR); CASE(EINVAL); 238 CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); 239 CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN); 240 CASE(ENODATA); 241 default: 242 panic("xen_xlate_errcode: unknown error %d", error); 243 } 244 return (error); 245 #undef CASE 246 } 247 248 /*PRINTFLIKE1*/ 249 void 250 xen_printf(const char *fmt, ...) 251 { 252 va_list adx; 253 254 va_start(adx, fmt); 255 printf(fmt, adx); 256 va_end(adx); 257 } 258 259 /* 260 * Stub functions to get the FE drivers to build, and to catch drivers that 261 * misbehave in HVM domains. 262 */ 263 /*ARGSUSED*/ 264 void 265 xen_release_pfn(pfn_t pfn, caddr_t va) 266 { 267 panic("xen_release_pfn() is not supported in HVM domains"); 268 } 269 270 /*ARGSUSED*/ 271 void 272 reassign_pfn(pfn_t pfn, mfn_t mfn) 273 { 274 panic("reassign_pfn() is not supported in HVM domains"); 275 } 276 277 /*ARGSUSED*/ 278 long 279 balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns) 280 { 281 panic("balloon_free_pages() is not supported in HVM domains"); 282 return (0); 283 } 284 285 /*ARGSUSED*/ 286 void 287 balloon_drv_added(int64_t delta) 288 { 289 panic("balloon_drv_added() is not supported in HVM domains"); 290 } 291 292 /* 293 * Add a mapping for the machine page at the given virtual address. 294 */ 295 void 296 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 297 { 298 ASSERT(level == 0); 299 300 hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, 301 mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); 302 } 303 304 static uint64_t 305 hvm_get_param(int param_id) 306 { 307 struct xen_hvm_param xhp; 308 309 xhp.domid = DOMID_SELF; 310 xhp.index = param_id; 311 if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0)) 312 return (-1); 313 return (xhp.value); 314 } 315 316 static struct xenbus_watch shutdown_watch; 317 taskq_t *xen_shutdown_tq; 318 319 #define SHUTDOWN_INVALID -1 320 #define SHUTDOWN_POWEROFF 0 321 #define SHUTDOWN_REBOOT 1 322 #define SHUTDOWN_SUSPEND 2 323 #define SHUTDOWN_HALT 3 324 #define SHUTDOWN_MAX 4 325 326 #define SHUTDOWN_TIMEOUT_SECS (60 * 5) 327 328 int 329 xen_suspend_devices(dev_info_t *dip) 330 { 331 int error; 332 char buf[XPV_BUFSIZE]; 333 334 SUSPEND_DEBUG("xen_suspend_devices\n"); 335 336 for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { 337 if (xen_suspend_devices(ddi_get_child(dip))) 338 return (ENXIO); 339 if (ddi_get_driver(dip) == NULL) 340 continue; 341 SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf)); 342 ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0); 343 344 345 if (!i_ddi_devi_attached(dip)) { 346 error = DDI_FAILURE; 347 } else { 348 error = devi_detach(dip, DDI_SUSPEND); 349 } 350 351 if (error == DDI_SUCCESS) { 352 DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; 353 } else { 354 SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n", 355 ddi_deviname(dip, buf)); 356 cmn_err(CE_WARN, "Unable to suspend device %s.", 357 ddi_deviname(dip, buf)); 358 cmn_err(CE_WARN, "Device is busy or does not " 359 "support suspend/resume."); 360 return (ENXIO); 361 } 362 } 363 return (0); 364 } 365 366 int 367 xen_resume_devices(dev_info_t *start, int resume_failed) 368 { 369 dev_info_t *dip, *next, *last = NULL; 370 int did_suspend; 371 int error = resume_failed; 372 char buf[XPV_BUFSIZE]; 373 374 SUSPEND_DEBUG("xen_resume_devices\n"); 375 376 while (last != start) { 377 dip = start; 378 next = ddi_get_next_sibling(dip); 379 while (next != last) { 380 dip = next; 381 next = ddi_get_next_sibling(dip); 382 } 383 384 /* 385 * cpr is the only one that uses this field and the device 386 * itself hasn't resumed yet, there is no need to use a 387 * lock, even though kernel threads are active by now. 388 */ 389 did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED; 390 if (did_suspend) 391 DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED; 392 393 /* 394 * There may be background attaches happening on devices 395 * that were not originally suspended by cpr, so resume 396 * only devices that were suspended by cpr. Also, stop 397 * resuming after the first resume failure, but traverse 398 * the entire tree to clear the suspend flag. 399 */ 400 if (did_suspend && !error) { 401 SUSPEND_DEBUG("Resuming device %s\n", 402 ddi_deviname(dip, buf)); 403 /* 404 * If a device suspended by cpr gets detached during 405 * the resume process (for example, due to hotplugging) 406 * before cpr gets around to issuing it a DDI_RESUME, 407 * we'll have problems. 408 */ 409 if (!i_ddi_devi_attached(dip)) { 410 cmn_err(CE_WARN, "Skipping %s, device " 411 "not ready for resume", 412 ddi_deviname(dip, buf)); 413 } else { 414 if (devi_attach(dip, DDI_RESUME) != 415 DDI_SUCCESS) { 416 error = ENXIO; 417 } 418 } 419 } 420 421 if (error == ENXIO) { 422 cmn_err(CE_WARN, "Unable to resume device %s", 423 ddi_deviname(dip, buf)); 424 } 425 426 error = xen_resume_devices(ddi_get_child(dip), error); 427 last = dip; 428 } 429 430 return (error); 431 } 432 433 /*ARGSUSED*/ 434 static int 435 check_xpvd(dev_info_t *dip, void *arg) 436 { 437 char *name; 438 439 name = ddi_node_name(dip); 440 if (name == NULL || strcmp(name, "xpvd")) { 441 return (DDI_WALK_CONTINUE); 442 } else { 443 xpvd_dip = dip; 444 return (DDI_WALK_TERMINATE); 445 } 446 } 447 448 /* 449 * Top level routine to direct suspend/resume of a domain. 450 */ 451 void 452 xen_suspend_domain(void) 453 { 454 extern void rtcsync(void); 455 extern void ec_resume(void); 456 extern kmutex_t ec_lock; 457 struct xen_add_to_physmap xatp; 458 ulong_t flags; 459 int err; 460 461 cmn_err(CE_NOTE, "Domain suspending for save/migrate"); 462 463 SUSPEND_DEBUG("xen_suspend_domain\n"); 464 465 /* 466 * We only want to suspend the PV devices, since the emulated devices 467 * are suspended by saving the emulated device state. The PV devices 468 * are all children of the xpvd nexus device. So we search the 469 * device tree for the xpvd node to use as the root of the tree to 470 * be suspended. 471 */ 472 if (xpvd_dip == NULL) 473 ddi_walk_devs(ddi_root_node(), check_xpvd, NULL); 474 475 /* 476 * suspend interrupts and devices 477 */ 478 if (xpvd_dip != NULL) 479 (void) xen_suspend_devices(ddi_get_child(xpvd_dip)); 480 else 481 cmn_err(CE_WARN, "No PV devices found to suspend"); 482 SUSPEND_DEBUG("xenbus_suspend\n"); 483 xenbus_suspend(); 484 485 mutex_enter(&cpu_lock); 486 487 /* 488 * Suspend on vcpu 0 489 */ 490 thread_affinity_set(curthread, 0); 491 kpreempt_disable(); 492 493 if (ncpus > 1) 494 pause_cpus(NULL); 495 /* 496 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence 497 * any holder would have dropped it to get through pause_cpus(). 498 */ 499 mutex_enter(&ec_lock); 500 501 /* 502 * From here on in, we can't take locks. 503 */ 504 505 flags = intr_clear(); 506 507 SUSPEND_DEBUG("HYPERVISOR_suspend\n"); 508 /* 509 * At this point we suspend and sometime later resume. 510 * Note that this call may return with an indication of a cancelled 511 * for now no matter ehat the return we do a full resume of all 512 * suspended drivers, etc. 513 */ 514 (void) HYPERVISOR_shutdown(SHUTDOWN_suspend); 515 516 /* 517 * Point HYPERVISOR_shared_info to the proper place. 518 */ 519 xatp.domid = DOMID_SELF; 520 xatp.idx = 0; 521 xatp.space = XENMAPSPACE_shared_info; 522 xatp.gpfn = shared_info_frame; 523 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) 524 panic("Could not set shared_info page. error: %d", err); 525 526 SUSPEND_DEBUG("gnttab_resume\n"); 527 gnttab_resume(); 528 529 SUSPEND_DEBUG("ec_resume\n"); 530 ec_resume(); 531 532 intr_restore(flags); 533 534 if (ncpus > 1) 535 start_cpus(); 536 537 mutex_exit(&ec_lock); 538 mutex_exit(&cpu_lock); 539 540 /* 541 * Now we can take locks again. 542 */ 543 544 rtcsync(); 545 546 SUSPEND_DEBUG("xenbus_resume\n"); 547 xenbus_resume(); 548 SUSPEND_DEBUG("xen_resume_devices\n"); 549 if (xpvd_dip != NULL) 550 (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0); 551 552 thread_affinity_clear(curthread); 553 kpreempt_enable(); 554 555 SUSPEND_DEBUG("finished xen_suspend_domain\n"); 556 557 cmn_err(CE_NOTE, "domain restore/migrate completed"); 558 } 559 560 static void 561 xen_dirty_shutdown(void *arg) 562 { 563 int cmd = (uintptr_t)arg; 564 565 cmn_err(CE_WARN, "Externally requested shutdown failed or " 566 "timed out.\nShutting down.\n"); 567 568 switch (cmd) { 569 case SHUTDOWN_HALT: 570 case SHUTDOWN_POWEROFF: 571 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred); 572 break; 573 case SHUTDOWN_REBOOT: 574 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred); 575 break; 576 } 577 } 578 579 static void 580 xen_shutdown(void *arg) 581 { 582 int cmd = (uintptr_t)arg; 583 proc_t *initpp; 584 585 ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX); 586 587 if (cmd == SHUTDOWN_SUSPEND) { 588 xen_suspend_domain(); 589 return; 590 } 591 592 switch (cmd) { 593 case SHUTDOWN_POWEROFF: 594 force_shutdown_method = AD_POWEROFF; 595 break; 596 case SHUTDOWN_HALT: 597 force_shutdown_method = AD_HALT; 598 break; 599 case SHUTDOWN_REBOOT: 600 force_shutdown_method = AD_BOOT; 601 break; 602 } 603 604 605 /* 606 * If we're still booting and init(1) isn't set up yet, simply halt. 607 */ 608 mutex_enter(&pidlock); 609 initpp = prfind(P_INITPID); 610 mutex_exit(&pidlock); 611 if (initpp == NULL) { 612 extern void halt(char *); 613 halt("Power off the System"); /* just in case */ 614 } 615 616 /* 617 * else, graceful shutdown with inittab and all getting involved 618 */ 619 psignal(initpp, SIGPWR); 620 621 (void) timeout(xen_dirty_shutdown, arg, 622 SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC)); 623 } 624 625 /*ARGSUSED*/ 626 static void 627 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec, 628 unsigned int len) 629 { 630 char *str; 631 xenbus_transaction_t xbt; 632 int err, shutdown_code = SHUTDOWN_INVALID; 633 unsigned int slen; 634 635 again: 636 err = xenbus_transaction_start(&xbt); 637 if (err) 638 return; 639 if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) { 640 (void) xenbus_transaction_end(xbt, 1); 641 return; 642 } 643 644 SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str); 645 646 /* 647 * If this is a watch fired from our write below, check out early to 648 * avoid an infinite loop. 649 */ 650 if (strcmp(str, "") == 0) { 651 (void) xenbus_transaction_end(xbt, 0); 652 kmem_free(str, slen); 653 return; 654 } else if (strcmp(str, "poweroff") == 0) { 655 shutdown_code = SHUTDOWN_POWEROFF; 656 } else if (strcmp(str, "reboot") == 0) { 657 shutdown_code = SHUTDOWN_REBOOT; 658 } else if (strcmp(str, "suspend") == 0) { 659 shutdown_code = SHUTDOWN_SUSPEND; 660 } else if (strcmp(str, "halt") == 0) { 661 shutdown_code = SHUTDOWN_HALT; 662 } else { 663 printf("Ignoring shutdown request: %s\n", str); 664 } 665 666 (void) xenbus_write(xbt, "control", "shutdown", ""); 667 err = xenbus_transaction_end(xbt, 0); 668 if (err == EAGAIN) { 669 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id); 670 kmem_free(str, slen); 671 goto again; 672 } 673 674 kmem_free(str, slen); 675 if (shutdown_code != SHUTDOWN_INVALID) { 676 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown, 677 (void *)(intptr_t)shutdown_code, 0); 678 } 679 } 680 681 static int 682 xen_pv_init(dev_info_t *xpv_dip) 683 { 684 struct cpuid_regs cp; 685 uint32_t xen_signature[4]; 686 char *xen_str; 687 struct xen_add_to_physmap xatp; 688 xen_capabilities_info_t caps; 689 pfn_t pfn; 690 uint64_t msrval; 691 int err; 692 693 /* 694 * Xen's pseudo-cpuid function 0x40000000 returns a string 695 * representing the Xen signature in %ebx, %ecx, and %edx. 696 * %eax contains the maximum supported cpuid function. 697 */ 698 cp.cp_eax = 0x40000000; 699 (void) __cpuid_insn(&cp); 700 xen_signature[0] = cp.cp_ebx; 701 xen_signature[1] = cp.cp_ecx; 702 xen_signature[2] = cp.cp_edx; 703 xen_signature[3] = 0; 704 xen_str = (char *)xen_signature; 705 if (strcmp("XenVMMXenVMM", xen_str) != 0 || 706 cp.cp_eax < 0x40000002) { 707 cmn_err(CE_WARN, 708 "Attempting to load Xen drivers on non-Xen system"); 709 return (-1); 710 } 711 712 /* 713 * cpuid function 0x40000001 returns the Xen version in %eax. The 714 * top 16 bits are the major version, the bottom 16 are the minor 715 * version. 716 */ 717 cp.cp_eax = 0x40000001; 718 (void) __cpuid_insn(&cp); 719 xen_major = cp.cp_eax >> 16; 720 xen_minor = cp.cp_eax & 0xffff; 721 722 /* 723 * The xpv driver is incompatible with xen versions older than 3.1. This 724 * is due to the changes in the vcpu_info and shared_info structs used 725 * to communicate with the hypervisor (the event channels in particular) 726 * that were introduced with 3.1. 727 */ 728 if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) { 729 cmn_err(CE_WARN, "Xen version %d.%d is not supported", 730 xen_major, xen_minor); 731 return (-1); 732 } 733 734 /* 735 * cpuid function 0x40000002 returns information about the 736 * hypercall page. %eax nominally contains the number of pages 737 * with hypercall code, but according to the Xen guys, "I'll 738 * guarantee that remains one forever more, so you can just 739 * allocate a single page and get quite upset if you ever see CPUID 740 * return more than one page." %ebx contains an MSR we use to ask 741 * Xen to remap each page at a specific pfn. 742 */ 743 cp.cp_eax = 0x40000002; 744 (void) __cpuid_insn(&cp); 745 746 /* 747 * Let Xen know where we want the hypercall page mapped. We 748 * already have a page allocated in the .text section to simplify 749 * the wrapper code. 750 */ 751 pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page); 752 msrval = mmu_ptob(pfn); 753 wrmsr(cp.cp_ebx, msrval); 754 755 /* Fill in the xen_info data */ 756 xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP); 757 (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor); 758 xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN); 759 xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN); 760 761 /* Figure out whether the hypervisor is 32-bit or 64-bit. */ 762 if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) { 763 ((char *)(caps))[sizeof (caps) - 1] = '\0'; 764 if (strstr(caps, "x86_64") != NULL) 765 xen_is_64bit = 1; 766 else if (strstr(caps, "x86_32") != NULL) 767 xen_is_64bit = 0; 768 } 769 if (xen_is_64bit < 0) { 770 cmn_err(CE_WARN, "Couldn't get capability info from Xen."); 771 return (-1); 772 } 773 #ifdef __amd64 774 ASSERT(xen_is_64bit == 1); 775 #endif 776 777 /* 778 * Allocate space for the shared_info page and tell Xen where it 779 * is. 780 */ 781 HYPERVISOR_shared_info = xen_alloc_pages(1); 782 shared_info_frame = hat_getpfnum(kas.a_hat, 783 (caddr_t)HYPERVISOR_shared_info); 784 xatp.domid = DOMID_SELF; 785 xatp.idx = 0; 786 xatp.space = XENMAPSPACE_shared_info; 787 xatp.gpfn = shared_info_frame; 788 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) { 789 cmn_err(CE_WARN, "Could not get shared_info page from Xen." 790 " error: %d", err); 791 return (-1); 792 } 793 794 /* Set up the grant tables. */ 795 gnttab_init(); 796 797 /* Set up event channel support */ 798 if (ec_init(xpv_dip) != 0) 799 return (-1); 800 801 /* Set up xenbus */ 802 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 803 xs_early_init(); 804 xs_domu_init(); 805 806 /* Set up for suspend/resume/migrate */ 807 xen_shutdown_tq = taskq_create("shutdown_taskq", 1, 808 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 809 shutdown_watch.node = "control/shutdown"; 810 shutdown_watch.callback = xen_shutdown_handler; 811 if (register_xenbus_watch(&shutdown_watch)) 812 cmn_err(CE_WARN, "Failed to set shutdown watcher"); 813 814 return (0); 815 } 816 817 static void 818 xen_pv_fini() 819 { 820 if (xen_info != NULL) 821 kmem_free(xen_info, sizeof (start_info_t)); 822 ec_fini(); 823 } 824 825 /*ARGSUSED*/ 826 static int 827 xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 828 { 829 if (getminor((dev_t)arg) != XPV_MINOR) 830 return (DDI_FAILURE); 831 832 switch (cmd) { 833 case DDI_INFO_DEVT2DEVINFO: 834 *result = xpv_dip; 835 break; 836 case DDI_INFO_DEVT2INSTANCE: 837 *result = 0; 838 break; 839 default: 840 return (DDI_FAILURE); 841 } 842 843 return (DDI_SUCCESS); 844 } 845 846 static int 847 xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 848 { 849 if (cmd != DDI_ATTACH) 850 return (DDI_FAILURE); 851 852 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 853 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 854 return (DDI_FAILURE); 855 856 xpv_dip = dip; 857 858 if (xen_pv_init(dip) != 0) 859 return (DDI_FAILURE); 860 861 ddi_report_dev(dip); 862 863 /* 864 * If the memscrubber attempts to scrub the pages we hand to Xen, 865 * the domain will panic. 866 */ 867 memscrub_disable(); 868 869 /* 870 * Report our version to dom0. 871 */ 872 if (xenbus_printf(XBT_NULL, "hvmpv/xpv", "version", "%d", 873 HVMPV_XPV_VERS)) 874 cmn_err(CE_WARN, "xpv: couldn't write version\n"); 875 876 return (DDI_SUCCESS); 877 } 878 879 /* 880 * Attempts to reload the PV driver plumbing hang on Intel platforms, so 881 * we don't want to unload the framework by accident. 882 */ 883 int xpv_allow_detach = 0; 884 885 static int 886 xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 887 { 888 if (cmd != DDI_DETACH || xpv_allow_detach == 0) 889 return (DDI_FAILURE); 890 891 if (xpv_dip != NULL) { 892 xen_pv_fini(); 893 ddi_remove_minor_node(dip, NULL); 894 xpv_dip = NULL; 895 } 896 897 return (DDI_SUCCESS); 898 } 899 900 /*ARGSUSED1*/ 901 static int 902 xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr) 903 { 904 return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO); 905 } 906 907 /*ARGSUSED*/ 908 static int 909 xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, 910 int *rval_p) 911 { 912 return (EINVAL); 913 } 914 915 int 916 _init(void) 917 { 918 int err; 919 920 if ((err = mod_install(&modl)) != 0) 921 return (err); 922 923 impl_bus_add_probe(xpv_enumerate); 924 return (0); 925 } 926 927 int 928 _fini(void) 929 { 930 int err; 931 932 if ((err = mod_remove(&modl)) != 0) 933 return (err); 934 935 impl_bus_delete_probe(xpv_enumerate); 936 return (0); 937 } 938 939 int 940 _info(struct modinfo *modinfop) 941 { 942 return (mod_info(&modl, modinfop)); 943 } 944