1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/modctl.h> 29 #include <sys/types.h> 30 #include <sys/archsystm.h> 31 #include <sys/machsystm.h> 32 #include <sys/sunndi.h> 33 #include <sys/sunddi.h> 34 #include <sys/ddi_subrdefs.h> 35 #include <sys/xpv_support.h> 36 #include <sys/xen_errno.h> 37 #include <sys/hypervisor.h> 38 #include <sys/gnttab.h> 39 #include <sys/xenbus_comms.h> 40 #include <sys/xenbus_impl.h> 41 #include <xen/sys/xendev.h> 42 #include <sys/sysmacros.h> 43 #include <sys/x86_archext.h> 44 #include <sys/mman.h> 45 #include <sys/stat.h> 46 #include <sys/conf.h> 47 #include <sys/devops.h> 48 #include <sys/pc_mmu.h> 49 #include <sys/cmn_err.h> 50 #include <sys/cpr.h> 51 #include <sys/ddi.h> 52 #include <vm/seg_kmem.h> 53 #include <vm/as.h> 54 #include <vm/hat_pte.h> 55 #include <vm/hat_i86.h> 56 57 #define XPV_MINOR 0 58 #define XPV_BUFSIZE 128 59 60 /* 61 * This structure is ordinarily constructed by Xen. In the HVM world, we 62 * manually fill in the few fields the PV drivers need. 63 */ 64 start_info_t *xen_info = NULL; 65 66 /* Xen version number. */ 67 int xen_major, xen_minor; 68 69 /* Metadata page shared between domain and Xen */ 70 shared_info_t *HYPERVISOR_shared_info = NULL; 71 72 /* Page containing code to issue hypercalls. */ 73 extern caddr_t hypercall_page; 74 75 /* Is the hypervisor 64-bit? */ 76 int xen_is_64bit = -1; 77 78 /* virtual addr for the store_mfn page */ 79 caddr_t xb_addr; 80 81 dev_info_t *xpv_dip; 82 static dev_info_t *xpvd_dip; 83 84 /* saved pfn of the shared info page */ 85 static pfn_t shared_info_frame; 86 87 #ifdef DEBUG 88 int xen_suspend_debug; 89 90 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf 91 #else 92 #define SUSPEND_DEBUG(...) 93 #endif 94 95 /* 96 * Forward declarations 97 */ 98 static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 99 static int xpv_attach(dev_info_t *, ddi_attach_cmd_t); 100 static int xpv_detach(dev_info_t *, ddi_detach_cmd_t); 101 static int xpv_open(dev_t *, int, int, cred_t *); 102 static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 103 104 static struct cb_ops xpv_cb_ops = { 105 xpv_open, 106 nulldev, /* close */ 107 nodev, /* strategy */ 108 nodev, /* print */ 109 nodev, /* dump */ 110 nodev, /* read */ 111 nodev, /* write */ 112 xpv_ioctl, /* ioctl */ 113 nodev, /* devmap */ 114 nodev, /* mmap */ 115 nodev, /* segmap */ 116 nochpoll, /* poll */ 117 ddi_prop_op, 118 NULL, 119 D_MP, 120 CB_REV, 121 NULL, 122 NULL 123 }; 124 125 static struct dev_ops xpv_dv_ops = { 126 DEVO_REV, 127 0, 128 xpv_getinfo, 129 nulldev, /* identify */ 130 nulldev, /* probe */ 131 xpv_attach, 132 xpv_detach, 133 nodev, /* reset */ 134 &xpv_cb_ops, 135 NULL, /* struct bus_ops */ 136 NULL /* power */ 137 }; 138 139 static struct modldrv modldrv = { 140 &mod_driverops, 141 "xpv driver %I%", 142 &xpv_dv_ops 143 }; 144 145 static struct modlinkage modl = { 146 MODREV_1, 147 { 148 (void *)&modldrv, 149 NULL /* null termination */ 150 } 151 }; 152 153 static ddi_dma_attr_t xpv_dma_attr = { 154 DMA_ATTR_V0, /* version of this structure */ 155 0, /* lowest usable address */ 156 0xffffffffffffffffULL, /* highest usable address */ 157 0x7fffffff, /* maximum DMAable byte count */ 158 MMU_PAGESIZE, /* alignment in bytes */ 159 0x7ff, /* bitmap of burst sizes */ 160 1, /* minimum transfer */ 161 0xffffffffU, /* maximum transfer */ 162 0x7fffffffULL, /* maximum segment length */ 163 1, /* maximum number of segments */ 164 1, /* granularity */ 165 0, /* flags (reserved) */ 166 }; 167 168 static ddi_device_acc_attr_t xpv_accattr = { 169 DDI_DEVICE_ATTR_V0, 170 DDI_NEVERSWAP_ACC, 171 DDI_STRICTORDER_ACC 172 }; 173 174 #define MAX_ALLOCATIONS 10 175 static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS]; 176 static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS]; 177 static int xen_alloc_cnt = 0; 178 179 void * 180 xen_alloc_pages(pgcnt_t cnt) 181 { 182 size_t len; 183 int a = xen_alloc_cnt++; 184 caddr_t addr; 185 186 ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS); 187 if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0, 188 &xpv_dma_handle[a]) != DDI_SUCCESS) 189 return (NULL); 190 191 if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt, 192 &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, 193 &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) { 194 ddi_dma_free_handle(&xpv_dma_handle[a]); 195 cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices"); 196 return (NULL); 197 } 198 return (addr); 199 } 200 201 /* 202 * This function is invoked twice, first time with reprogram=0 to set up 203 * the xpvd portion of the device tree. The second time it is ignored. 204 */ 205 static void 206 xpv_enumerate(int reprogram) 207 { 208 dev_info_t *dip; 209 210 if (reprogram != 0) 211 return; 212 213 ndi_devi_alloc_sleep(ddi_root_node(), "xpvd", 214 (pnode_t)DEVI_SID_NODEID, &dip); 215 216 (void) ndi_devi_bind_driver(dip, 0); 217 218 /* 219 * Too early to enumerate split device drivers in domU 220 * since we need to create taskq thread during enumeration. 221 * So, we only enumerate softdevs and console here. 222 */ 223 xendev_enum_all(dip, B_TRUE); 224 } 225 226 /* 227 * Translate a hypervisor errcode to a Solaris error code. 228 */ 229 int 230 xen_xlate_errcode(int error) 231 { 232 #define CASE(num) case X_##num: error = num; break 233 234 switch (-error) { 235 CASE(EPERM); CASE(ENOENT); CASE(ESRCH); 236 CASE(EINTR); CASE(EIO); CASE(ENXIO); 237 CASE(E2BIG); CASE(ENOMEM); CASE(EACCES); 238 CASE(EFAULT); CASE(EBUSY); CASE(EEXIST); 239 CASE(ENODEV); CASE(EISDIR); CASE(EINVAL); 240 CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); 241 CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN); 242 CASE(ENODATA); 243 default: 244 panic("xen_xlate_errcode: unknown error %d", error); 245 } 246 return (error); 247 #undef CASE 248 } 249 250 /*PRINTFLIKE1*/ 251 void 252 xen_printf(const char *fmt, ...) 253 { 254 va_list adx; 255 256 va_start(adx, fmt); 257 printf(fmt, adx); 258 va_end(adx); 259 } 260 261 /* 262 * Stub functions to get the FE drivers to build, and to catch drivers that 263 * misbehave in HVM domains. 264 */ 265 /*ARGSUSED*/ 266 void 267 xen_release_pfn(pfn_t pfn, caddr_t va) 268 { 269 panic("xen_release_pfn() is not supported in HVM domains"); 270 } 271 272 /*ARGSUSED*/ 273 void 274 reassign_pfn(pfn_t pfn, mfn_t mfn) 275 { 276 panic("reassign_pfn() is not supported in HVM domains"); 277 } 278 279 /*ARGSUSED*/ 280 long 281 balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns) 282 { 283 panic("balloon_free_pages() is not supported in HVM domains"); 284 return (0); 285 } 286 287 /*ARGSUSED*/ 288 void 289 balloon_drv_added(int64_t delta) 290 { 291 panic("balloon_drv_added() is not supported in HVM domains"); 292 } 293 294 /* 295 * Add a mapping for the machine page at the given virtual address. 296 */ 297 void 298 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 299 { 300 ASSERT(level == 0); 301 302 hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, 303 mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); 304 } 305 306 static uint64_t 307 hvm_get_param(int param_id) 308 { 309 struct xen_hvm_param xhp; 310 311 xhp.domid = DOMID_SELF; 312 xhp.index = param_id; 313 if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0)) 314 return (-1); 315 return (xhp.value); 316 } 317 318 static struct xenbus_watch shutdown_watch; 319 taskq_t *xen_shutdown_tq; 320 321 #define SHUTDOWN_INVALID -1 322 #define SHUTDOWN_POWEROFF 0 323 #define SHUTDOWN_REBOOT 1 324 #define SHUTDOWN_SUSPEND 2 325 #define SHUTDOWN_HALT 3 326 #define SHUTDOWN_MAX 4 327 328 #define SHUTDOWN_TIMEOUT_SECS (60 * 5) 329 330 static const char *cmd_strings[SHUTDOWN_MAX] = { 331 "poweroff", 332 "reboot", 333 "suspend", 334 "halt" 335 }; 336 337 int 338 xen_suspend_devices(dev_info_t *dip) 339 { 340 int error; 341 char buf[XPV_BUFSIZE]; 342 343 SUSPEND_DEBUG("xen_suspend_devices\n"); 344 345 for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { 346 if (xen_suspend_devices(ddi_get_child(dip))) 347 return (ENXIO); 348 if (ddi_get_driver(dip) == NULL) 349 continue; 350 SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf)); 351 ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0); 352 353 354 if (!i_ddi_devi_attached(dip)) { 355 error = DDI_FAILURE; 356 } else { 357 error = devi_detach(dip, DDI_SUSPEND); 358 } 359 360 if (error == DDI_SUCCESS) { 361 DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; 362 } else { 363 SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n", 364 ddi_deviname(dip, buf)); 365 cmn_err(CE_WARN, "Unable to suspend device %s.", 366 ddi_deviname(dip, buf)); 367 cmn_err(CE_WARN, "Device is busy or does not " 368 "support suspend/resume."); 369 return (ENXIO); 370 } 371 } 372 return (0); 373 } 374 375 int 376 xen_resume_devices(dev_info_t *start, int resume_failed) 377 { 378 dev_info_t *dip, *next, *last = NULL; 379 int did_suspend; 380 int error = resume_failed; 381 char buf[XPV_BUFSIZE]; 382 383 SUSPEND_DEBUG("xen_resume_devices\n"); 384 385 while (last != start) { 386 dip = start; 387 next = ddi_get_next_sibling(dip); 388 while (next != last) { 389 dip = next; 390 next = ddi_get_next_sibling(dip); 391 } 392 393 /* 394 * cpr is the only one that uses this field and the device 395 * itself hasn't resumed yet, there is no need to use a 396 * lock, even though kernel threads are active by now. 397 */ 398 did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED; 399 if (did_suspend) 400 DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED; 401 402 /* 403 * There may be background attaches happening on devices 404 * that were not originally suspended by cpr, so resume 405 * only devices that were suspended by cpr. Also, stop 406 * resuming after the first resume failure, but traverse 407 * the entire tree to clear the suspend flag. 408 */ 409 if (did_suspend && !error) { 410 SUSPEND_DEBUG("Resuming device %s\n", 411 ddi_deviname(dip, buf)); 412 /* 413 * If a device suspended by cpr gets detached during 414 * the resume process (for example, due to hotplugging) 415 * before cpr gets around to issuing it a DDI_RESUME, 416 * we'll have problems. 417 */ 418 if (!i_ddi_devi_attached(dip)) { 419 cmn_err(CE_WARN, "Skipping %s, device " 420 "not ready for resume", 421 ddi_deviname(dip, buf)); 422 } else { 423 if (devi_attach(dip, DDI_RESUME) != 424 DDI_SUCCESS) { 425 error = ENXIO; 426 } 427 } 428 } 429 430 if (error == ENXIO) { 431 cmn_err(CE_WARN, "Unable to resume device %s", 432 ddi_deviname(dip, buf)); 433 } 434 435 error = xen_resume_devices(ddi_get_child(dip), error); 436 last = dip; 437 } 438 439 return (error); 440 } 441 442 /*ARGSUSED*/ 443 static int 444 check_xpvd(dev_info_t *dip, void *arg) 445 { 446 char *name; 447 448 name = ddi_node_name(dip); 449 if (name == NULL || strcmp(name, "xpvd")) { 450 return (DDI_WALK_CONTINUE); 451 } else { 452 xpvd_dip = dip; 453 return (DDI_WALK_TERMINATE); 454 } 455 } 456 457 /* 458 * Top level routine to direct suspend/resume of a domain. 459 */ 460 void 461 xen_suspend_domain(void) 462 { 463 extern void rtcsync(void); 464 extern void ec_resume(void); 465 extern kmutex_t ec_lock; 466 struct xen_add_to_physmap xatp; 467 ulong_t flags; 468 int err; 469 470 cmn_err(CE_NOTE, "Domain suspending for save/migrate"); 471 472 SUSPEND_DEBUG("xen_suspend_domain\n"); 473 474 /* 475 * We only want to suspend the PV devices, since the emulated devices 476 * are suspended by saving the emulated device state. The PV devices 477 * are all children of the xpvd nexus device. So we search the 478 * device tree for the xpvd node to use as the root of the tree to 479 * be suspended. 480 */ 481 if (xpvd_dip == NULL) 482 ddi_walk_devs(ddi_root_node(), check_xpvd, NULL); 483 484 /* 485 * suspend interrupts and devices 486 */ 487 if (xpvd_dip != NULL) 488 (void) xen_suspend_devices(ddi_get_child(xpvd_dip)); 489 else 490 cmn_err(CE_WARN, "No PV devices found to suspend"); 491 SUSPEND_DEBUG("xenbus_suspend\n"); 492 xenbus_suspend(); 493 494 mutex_enter(&cpu_lock); 495 496 /* 497 * Suspend on vcpu 0 498 */ 499 thread_affinity_set(curthread, 0); 500 kpreempt_disable(); 501 502 if (ncpus > 1) 503 pause_cpus(NULL); 504 /* 505 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence 506 * any holder would have dropped it to get through pause_cpus(). 507 */ 508 mutex_enter(&ec_lock); 509 510 /* 511 * From here on in, we can't take locks. 512 */ 513 514 flags = intr_clear(); 515 516 SUSPEND_DEBUG("HYPERVISOR_suspend\n"); 517 /* 518 * At this point we suspend and sometime later resume. 519 * Note that this call may return with an indication of a cancelled 520 * for now no matter ehat the return we do a full resume of all 521 * suspended drivers, etc. 522 */ 523 (void) HYPERVISOR_shutdown(SHUTDOWN_suspend); 524 525 /* 526 * Point HYPERVISOR_shared_info to the proper place. 527 */ 528 xatp.domid = DOMID_SELF; 529 xatp.idx = 0; 530 xatp.space = XENMAPSPACE_shared_info; 531 xatp.gpfn = shared_info_frame; 532 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) 533 panic("Could not set shared_info page. error: %d", err); 534 535 SUSPEND_DEBUG("gnttab_resume\n"); 536 gnttab_resume(); 537 538 SUSPEND_DEBUG("ec_resume\n"); 539 ec_resume(); 540 541 intr_restore(flags); 542 543 if (ncpus > 1) 544 start_cpus(); 545 546 mutex_exit(&ec_lock); 547 mutex_exit(&cpu_lock); 548 549 /* 550 * Now we can take locks again. 551 */ 552 553 rtcsync(); 554 555 SUSPEND_DEBUG("xenbus_resume\n"); 556 xenbus_resume(); 557 SUSPEND_DEBUG("xen_resume_devices\n"); 558 if (xpvd_dip != NULL) 559 (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0); 560 561 thread_affinity_clear(curthread); 562 kpreempt_enable(); 563 564 SUSPEND_DEBUG("finished xen_suspend_domain\n"); 565 566 cmn_err(CE_NOTE, "domain restore/migrate completed"); 567 } 568 569 static void 570 xen_dirty_shutdown(void *arg) 571 { 572 int cmd = (uintptr_t)arg; 573 574 cmn_err(CE_WARN, "Externally requested shutdown failed or " 575 "timed out.\nShutting down.\n"); 576 577 switch (cmd) { 578 case SHUTDOWN_HALT: 579 case SHUTDOWN_POWEROFF: 580 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred); 581 break; 582 case SHUTDOWN_REBOOT: 583 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred); 584 break; 585 } 586 } 587 588 static void 589 xen_shutdown(void *arg) 590 { 591 nvlist_t *attr_list = NULL; 592 sysevent_t *event = NULL; 593 sysevent_id_t eid; 594 int cmd = (uintptr_t)arg; 595 int err; 596 597 ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX); 598 599 if (cmd == SHUTDOWN_SUSPEND) { 600 xen_suspend_domain(); 601 return; 602 } 603 604 err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_SLEEP); 605 if (err != DDI_SUCCESS) 606 goto failure; 607 608 err = nvlist_add_string(attr_list, "shutdown", cmd_strings[cmd]); 609 if (err != DDI_SUCCESS) 610 goto failure; 611 612 if ((event = sysevent_alloc("EC_xpvsys", "control", "SUNW:kern:xpv", 613 SE_SLEEP)) == NULL) 614 goto failure; 615 (void) sysevent_attach_attributes(event, 616 (sysevent_attr_list_t *)attr_list); 617 618 err = log_sysevent(event, SE_SLEEP, &eid); 619 620 sysevent_detach_attributes(event); 621 sysevent_free(event); 622 623 if (err != 0) 624 goto failure; 625 626 (void) timeout(xen_dirty_shutdown, arg, 627 SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC)); 628 629 nvlist_free(attr_list); 630 return; 631 632 failure: 633 if (attr_list != NULL) 634 nvlist_free(attr_list); 635 xen_dirty_shutdown(arg); 636 } 637 638 /*ARGSUSED*/ 639 static void 640 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec, 641 unsigned int len) 642 { 643 char *str; 644 xenbus_transaction_t xbt; 645 int err, shutdown_code = SHUTDOWN_INVALID; 646 unsigned int slen; 647 648 again: 649 err = xenbus_transaction_start(&xbt); 650 if (err) 651 return; 652 if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) { 653 (void) xenbus_transaction_end(xbt, 1); 654 return; 655 } 656 657 SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str); 658 659 /* 660 * If this is a watch fired from our write below, check out early to 661 * avoid an infinite loop. 662 */ 663 if (strcmp(str, "") == 0) { 664 (void) xenbus_transaction_end(xbt, 0); 665 kmem_free(str, slen); 666 return; 667 } else if (strcmp(str, "poweroff") == 0) { 668 shutdown_code = SHUTDOWN_POWEROFF; 669 } else if (strcmp(str, "reboot") == 0) { 670 shutdown_code = SHUTDOWN_REBOOT; 671 } else if (strcmp(str, "suspend") == 0) { 672 shutdown_code = SHUTDOWN_SUSPEND; 673 } else if (strcmp(str, "halt") == 0) { 674 shutdown_code = SHUTDOWN_HALT; 675 } else { 676 printf("Ignoring shutdown request: %s\n", str); 677 } 678 679 (void) xenbus_write(xbt, "control", "shutdown", ""); 680 err = xenbus_transaction_end(xbt, 0); 681 if (err == EAGAIN) { 682 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id); 683 kmem_free(str, slen); 684 goto again; 685 } 686 687 kmem_free(str, slen); 688 if (shutdown_code != SHUTDOWN_INVALID) { 689 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown, 690 (void *)(intptr_t)shutdown_code, 0); 691 } 692 } 693 694 static int 695 xen_pv_init(dev_info_t *xpv_dip) 696 { 697 struct cpuid_regs cp; 698 uint32_t xen_signature[4]; 699 char *xen_str; 700 struct xen_add_to_physmap xatp; 701 xen_capabilities_info_t caps; 702 pfn_t pfn; 703 uint64_t msrval; 704 int err; 705 706 /* 707 * Xen's pseudo-cpuid function 0x40000000 returns a string 708 * representing the Xen signature in %ebx, %ecx, and %edx. 709 * %eax contains the maximum supported cpuid function. 710 */ 711 cp.cp_eax = 0x40000000; 712 (void) __cpuid_insn(&cp); 713 xen_signature[0] = cp.cp_ebx; 714 xen_signature[1] = cp.cp_ecx; 715 xen_signature[2] = cp.cp_edx; 716 xen_signature[3] = 0; 717 xen_str = (char *)xen_signature; 718 if (strcmp("XenVMMXenVMM", xen_str) != 0 || 719 cp.cp_eax < 0x40000002) { 720 cmn_err(CE_WARN, 721 "Attempting to load Xen drivers on non-Xen system"); 722 return (-1); 723 } 724 725 /* 726 * cpuid function 0x40000001 returns the Xen version in %eax. The 727 * top 16 bits are the major version, the bottom 16 are the minor 728 * version. 729 */ 730 cp.cp_eax = 0x40000001; 731 (void) __cpuid_insn(&cp); 732 xen_major = cp.cp_eax >> 16; 733 xen_minor = cp.cp_eax & 0xffff; 734 735 /* 736 * The xpv driver is incompatible with xen versions older than 3.1. This 737 * is due to the changes in the vcpu_info and shared_info structs used 738 * to communicate with the hypervisor (the event channels in particular) 739 * that were introduced with 3.1. 740 */ 741 if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) { 742 cmn_err(CE_WARN, "Xen version %d.%d is not supported", 743 xen_major, xen_minor); 744 return (-1); 745 } 746 747 /* 748 * cpuid function 0x40000002 returns information about the 749 * hypercall page. %eax nominally contains the number of pages 750 * with hypercall code, but according to the Xen guys, "I'll 751 * guarantee that remains one forever more, so you can just 752 * allocate a single page and get quite upset if you ever see CPUID 753 * return more than one page." %ebx contains an MSR we use to ask 754 * Xen to remap each page at a specific pfn. 755 */ 756 cp.cp_eax = 0x40000002; 757 (void) __cpuid_insn(&cp); 758 759 /* 760 * Let Xen know where we want the hypercall page mapped. We 761 * already have a page allocated in the .text section to simplify 762 * the wrapper code. 763 */ 764 pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page); 765 msrval = mmu_ptob(pfn); 766 wrmsr(cp.cp_ebx, msrval); 767 768 /* Fill in the xen_info data */ 769 xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP); 770 (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor); 771 xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN); 772 xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN); 773 774 /* Figure out whether the hypervisor is 32-bit or 64-bit. */ 775 if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) { 776 ((char *)(caps))[sizeof (caps) - 1] = '\0'; 777 if (strstr(caps, "x86_64") != NULL) 778 xen_is_64bit = 1; 779 else if (strstr(caps, "x86_32") != NULL) 780 xen_is_64bit = 0; 781 } 782 if (xen_is_64bit < 0) { 783 cmn_err(CE_WARN, "Couldn't get capability info from Xen."); 784 return (-1); 785 } 786 #ifdef __amd64 787 ASSERT(xen_is_64bit == 1); 788 #endif 789 790 /* 791 * Allocate space for the shared_info page and tell Xen where it 792 * is. 793 */ 794 HYPERVISOR_shared_info = xen_alloc_pages(1); 795 shared_info_frame = hat_getpfnum(kas.a_hat, 796 (caddr_t)HYPERVISOR_shared_info); 797 xatp.domid = DOMID_SELF; 798 xatp.idx = 0; 799 xatp.space = XENMAPSPACE_shared_info; 800 xatp.gpfn = shared_info_frame; 801 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) { 802 cmn_err(CE_WARN, "Could not get shared_info page from Xen." 803 " error: %d", err); 804 return (-1); 805 } 806 807 /* Set up the grant tables. */ 808 gnttab_init(); 809 810 /* Set up event channel support */ 811 if (ec_init(xpv_dip) != 0) 812 return (-1); 813 814 /* Set up xenbus */ 815 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 816 xs_early_init(); 817 xs_domu_init(); 818 819 /* Set up for suspend/resume/migrate */ 820 xen_shutdown_tq = taskq_create("shutdown_taskq", 1, 821 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 822 shutdown_watch.node = "control/shutdown"; 823 shutdown_watch.callback = xen_shutdown_handler; 824 if (register_xenbus_watch(&shutdown_watch)) 825 cmn_err(CE_WARN, "Failed to set shutdown watcher"); 826 827 return (0); 828 } 829 830 static void 831 xen_pv_fini() 832 { 833 if (xen_info != NULL) 834 kmem_free(xen_info, sizeof (start_info_t)); 835 ec_fini(); 836 } 837 838 /*ARGSUSED*/ 839 static int 840 xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 841 { 842 if (getminor((dev_t)arg) != XPV_MINOR) 843 return (DDI_FAILURE); 844 845 switch (cmd) { 846 case DDI_INFO_DEVT2DEVINFO: 847 *result = xpv_dip; 848 break; 849 case DDI_INFO_DEVT2INSTANCE: 850 *result = 0; 851 break; 852 default: 853 return (DDI_FAILURE); 854 } 855 856 return (DDI_SUCCESS); 857 } 858 859 static int 860 xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 861 { 862 if (cmd != DDI_ATTACH) 863 return (DDI_FAILURE); 864 865 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 866 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 867 return (DDI_FAILURE); 868 869 xpv_dip = dip; 870 871 if (xen_pv_init(dip) != 0) 872 return (DDI_FAILURE); 873 874 ddi_report_dev(dip); 875 876 /* 877 * If the memscrubber attempts to scrub the pages we hand to Xen, 878 * the domain will panic. 879 */ 880 memscrub_disable(); 881 882 /* 883 * Report our version to dom0. 884 */ 885 if (xenbus_printf(XBT_NULL, "hvmpv/xpv", "version", "%d", 886 HVMPV_XPV_VERS)) 887 cmn_err(CE_WARN, "xpv: couldn't write version\n"); 888 889 return (DDI_SUCCESS); 890 } 891 892 /* 893 * Attempts to reload the PV driver plumbing hang on Intel platforms, so 894 * we don't want to unload the framework by accident. 895 */ 896 int xpv_allow_detach = 0; 897 898 static int 899 xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 900 { 901 if (cmd != DDI_DETACH || xpv_allow_detach == 0) 902 return (DDI_FAILURE); 903 904 if (xpv_dip != NULL) { 905 xen_pv_fini(); 906 ddi_remove_minor_node(dip, NULL); 907 xpv_dip = NULL; 908 } 909 910 return (DDI_SUCCESS); 911 } 912 913 /*ARGSUSED1*/ 914 static int 915 xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr) 916 { 917 return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO); 918 } 919 920 /*ARGSUSED*/ 921 static int 922 xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, 923 int *rval_p) 924 { 925 return (EINVAL); 926 } 927 928 int 929 _init(void) 930 { 931 int err; 932 933 if ((err = mod_install(&modl)) != 0) 934 return (err); 935 936 impl_bus_add_probe(xpv_enumerate); 937 return (0); 938 } 939 940 int 941 _fini(void) 942 { 943 int err; 944 945 if ((err = mod_remove(&modl)) != 0) 946 return (err); 947 948 impl_bus_delete_probe(xpv_enumerate); 949 return (0); 950 } 951 952 int 953 _info(struct modinfo *modinfop) 954 { 955 return (mod_info(&modl, modinfop)); 956 } 957