1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/modctl.h> 29 #include <sys/types.h> 30 #include <sys/archsystm.h> 31 #include <sys/machsystm.h> 32 #include <sys/sunndi.h> 33 #include <sys/sunddi.h> 34 #include <sys/ddi_subrdefs.h> 35 #include <sys/xpv_support.h> 36 #include <sys/xen_errno.h> 37 #include <sys/hypervisor.h> 38 #include <sys/gnttab.h> 39 #include <sys/xenbus_comms.h> 40 #include <sys/xenbus_impl.h> 41 #include <xen/sys/xendev.h> 42 #include <sys/sysmacros.h> 43 #include <sys/x86_archext.h> 44 #include <sys/mman.h> 45 #include <sys/stat.h> 46 #include <sys/conf.h> 47 #include <sys/devops.h> 48 #include <sys/pc_mmu.h> 49 #include <sys/cmn_err.h> 50 #include <sys/cpr.h> 51 #include <sys/ddi.h> 52 #include <vm/seg_kmem.h> 53 #include <vm/as.h> 54 #include <vm/hat_pte.h> 55 #include <vm/hat_i86.h> 56 57 #define XPV_MINOR 0 58 #define XPV_BUFSIZE 128 59 60 /* 61 * This structure is ordinarily constructed by Xen. In the HVM world, we 62 * manually fill in the few fields the PV drivers need. 63 */ 64 start_info_t *xen_info = NULL; 65 66 /* Xen version number. */ 67 int xen_major, xen_minor; 68 69 /* Metadata page shared between domain and Xen */ 70 shared_info_t *HYPERVISOR_shared_info = NULL; 71 72 /* Page containing code to issue hypercalls. */ 73 extern caddr_t hypercall_page; 74 75 /* Is the hypervisor 64-bit? */ 76 int xen_is_64bit = -1; 77 78 /* virtual addr for the store_mfn page */ 79 caddr_t xb_addr; 80 81 dev_info_t *xpv_dip; 82 static dev_info_t *xpvd_dip; 83 84 /* saved pfn of the shared info page */ 85 static pfn_t shared_info_frame; 86 87 #ifdef DEBUG 88 int xen_suspend_debug; 89 90 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf 91 #else 92 #define SUSPEND_DEBUG(...) 93 #endif 94 95 /* 96 * Forward declarations 97 */ 98 static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 99 static int xpv_attach(dev_info_t *, ddi_attach_cmd_t); 100 static int xpv_detach(dev_info_t *, ddi_detach_cmd_t); 101 static int xpv_open(dev_t *, int, int, cred_t *); 102 static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 103 104 static struct cb_ops xpv_cb_ops = { 105 xpv_open, 106 nulldev, /* close */ 107 nodev, /* strategy */ 108 nodev, /* print */ 109 nodev, /* dump */ 110 nodev, /* read */ 111 nodev, /* write */ 112 xpv_ioctl, /* ioctl */ 113 nodev, /* devmap */ 114 nodev, /* mmap */ 115 nodev, /* segmap */ 116 nochpoll, /* poll */ 117 ddi_prop_op, 118 NULL, 119 D_MP, 120 CB_REV, 121 NULL, 122 NULL 123 }; 124 125 static struct dev_ops xpv_dv_ops = { 126 DEVO_REV, 127 0, 128 xpv_getinfo, 129 nulldev, /* identify */ 130 nulldev, /* probe */ 131 xpv_attach, 132 xpv_detach, 133 nodev, /* reset */ 134 &xpv_cb_ops, 135 NULL, /* struct bus_ops */ 136 NULL /* power */ 137 }; 138 139 static struct modldrv modldrv = { 140 &mod_driverops, 141 "xpv driver %I%", 142 &xpv_dv_ops 143 }; 144 145 static struct modlinkage modl = { 146 MODREV_1, 147 { 148 (void *)&modldrv, 149 NULL /* null termination */ 150 } 151 }; 152 153 static ddi_dma_attr_t xpv_dma_attr = { 154 DMA_ATTR_V0, /* version of this structure */ 155 0, /* lowest usable address */ 156 0xffffffffffffffffULL, /* highest usable address */ 157 0x7fffffff, /* maximum DMAable byte count */ 158 MMU_PAGESIZE, /* alignment in bytes */ 159 0x7ff, /* bitmap of burst sizes */ 160 1, /* minimum transfer */ 161 0xffffffffU, /* maximum transfer */ 162 0x7fffffffULL, /* maximum segment length */ 163 1, /* maximum number of segments */ 164 1, /* granularity */ 165 0, /* flags (reserved) */ 166 }; 167 168 static ddi_device_acc_attr_t xpv_accattr = { 169 DDI_DEVICE_ATTR_V0, 170 DDI_NEVERSWAP_ACC, 171 DDI_STRICTORDER_ACC 172 }; 173 174 #define MAX_ALLOCATIONS 10 175 static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS]; 176 static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS]; 177 static int xen_alloc_cnt = 0; 178 179 void * 180 xen_alloc_pages(pgcnt_t cnt) 181 { 182 size_t len; 183 int a = xen_alloc_cnt++; 184 caddr_t addr; 185 186 ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS); 187 if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0, 188 &xpv_dma_handle[a]) != DDI_SUCCESS) 189 return (NULL); 190 191 if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt, 192 &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, 193 &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) { 194 ddi_dma_free_handle(&xpv_dma_handle[a]); 195 cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices"); 196 return (NULL); 197 } 198 return (addr); 199 } 200 201 /* 202 * This function is invoked twice, first time with reprogram=0 to set up 203 * the xpvd portion of the device tree. The second time it is ignored. 204 */ 205 static void 206 xpv_enumerate(int reprogram) 207 { 208 dev_info_t *dip; 209 210 if (reprogram != 0) 211 return; 212 213 ndi_devi_alloc_sleep(ddi_root_node(), "xpvd", 214 (pnode_t)DEVI_SID_NODEID, &dip); 215 216 (void) ndi_devi_bind_driver(dip, 0); 217 218 /* 219 * Too early to enumerate split device drivers in domU 220 * since we need to create taskq thread during enumeration. 221 * So, we only enumerate softdevs and console here. 222 */ 223 xendev_enum_all(dip, B_TRUE); 224 } 225 226 /* 227 * Translate a hypervisor errcode to a Solaris error code. 228 */ 229 int 230 xen_xlate_errcode(int error) 231 { 232 #define CASE(num) case X_##num: error = num; break 233 234 switch (-error) { 235 CASE(EPERM); CASE(ENOENT); CASE(ESRCH); 236 CASE(EINTR); CASE(EIO); CASE(ENXIO); 237 CASE(E2BIG); CASE(ENOMEM); CASE(EACCES); 238 CASE(EFAULT); CASE(EBUSY); CASE(EEXIST); 239 CASE(ENODEV); CASE(EISDIR); CASE(EINVAL); 240 CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); 241 CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN); 242 CASE(ENODATA); 243 default: 244 panic("xen_xlate_errcode: unknown error %d", error); 245 } 246 return (error); 247 #undef CASE 248 } 249 250 /*PRINTFLIKE1*/ 251 void 252 xen_printf(const char *fmt, ...) 253 { 254 va_list adx; 255 256 va_start(adx, fmt); 257 printf(fmt, adx); 258 va_end(adx); 259 } 260 261 /* 262 * Stub functions to get the FE drivers to build, and to catch drivers that 263 * misbehave in HVM domains. 264 */ 265 /*ARGSUSED*/ 266 void 267 xen_release_pfn(pfn_t pfn, caddr_t va) 268 { 269 panic("xen_release_pfn() is not supported in HVM domains"); 270 } 271 272 /*ARGSUSED*/ 273 void 274 reassign_pfn(pfn_t pfn, mfn_t mfn) 275 { 276 panic("reassign_pfn() is not supported in HVM domains"); 277 } 278 279 /*ARGSUSED*/ 280 long 281 balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns) 282 { 283 panic("balloon_free_pages() is not supported in HVM domains"); 284 return (0); 285 } 286 287 /*ARGSUSED*/ 288 void 289 balloon_drv_added(int64_t delta) 290 { 291 panic("balloon_drv_added() is not supported in HVM domains"); 292 } 293 294 /* 295 * Add a mapping for the machine page at the given virtual address. 296 */ 297 void 298 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 299 { 300 ASSERT(level == 0); 301 302 hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, 303 mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); 304 } 305 306 static uint64_t 307 hvm_get_param(int param_id) 308 { 309 struct xen_hvm_param xhp; 310 311 xhp.domid = DOMID_SELF; 312 xhp.index = param_id; 313 if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0)) 314 return (-1); 315 return (xhp.value); 316 } 317 318 static struct xenbus_watch shutdown_watch; 319 taskq_t *xen_shutdown_tq; 320 321 #define SHUTDOWN_INVALID -1 322 #define SHUTDOWN_POWEROFF 0 323 #define SHUTDOWN_REBOOT 1 324 #define SHUTDOWN_SUSPEND 2 325 #define SHUTDOWN_HALT 3 326 #define SHUTDOWN_MAX 4 327 328 #define SHUTDOWN_TIMEOUT_SECS (60 * 5) 329 330 int 331 xen_suspend_devices(dev_info_t *dip) 332 { 333 int error; 334 char buf[XPV_BUFSIZE]; 335 336 SUSPEND_DEBUG("xen_suspend_devices\n"); 337 338 for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { 339 if (xen_suspend_devices(ddi_get_child(dip))) 340 return (ENXIO); 341 if (ddi_get_driver(dip) == NULL) 342 continue; 343 SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf)); 344 ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0); 345 346 347 if (!i_ddi_devi_attached(dip)) { 348 error = DDI_FAILURE; 349 } else { 350 error = devi_detach(dip, DDI_SUSPEND); 351 } 352 353 if (error == DDI_SUCCESS) { 354 DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; 355 } else { 356 SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n", 357 ddi_deviname(dip, buf)); 358 cmn_err(CE_WARN, "Unable to suspend device %s.", 359 ddi_deviname(dip, buf)); 360 cmn_err(CE_WARN, "Device is busy or does not " 361 "support suspend/resume."); 362 return (ENXIO); 363 } 364 } 365 return (0); 366 } 367 368 int 369 xen_resume_devices(dev_info_t *start, int resume_failed) 370 { 371 dev_info_t *dip, *next, *last = NULL; 372 int did_suspend; 373 int error = resume_failed; 374 char buf[XPV_BUFSIZE]; 375 376 SUSPEND_DEBUG("xen_resume_devices\n"); 377 378 while (last != start) { 379 dip = start; 380 next = ddi_get_next_sibling(dip); 381 while (next != last) { 382 dip = next; 383 next = ddi_get_next_sibling(dip); 384 } 385 386 /* 387 * cpr is the only one that uses this field and the device 388 * itself hasn't resumed yet, there is no need to use a 389 * lock, even though kernel threads are active by now. 390 */ 391 did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED; 392 if (did_suspend) 393 DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED; 394 395 /* 396 * There may be background attaches happening on devices 397 * that were not originally suspended by cpr, so resume 398 * only devices that were suspended by cpr. Also, stop 399 * resuming after the first resume failure, but traverse 400 * the entire tree to clear the suspend flag. 401 */ 402 if (did_suspend && !error) { 403 SUSPEND_DEBUG("Resuming device %s\n", 404 ddi_deviname(dip, buf)); 405 /* 406 * If a device suspended by cpr gets detached during 407 * the resume process (for example, due to hotplugging) 408 * before cpr gets around to issuing it a DDI_RESUME, 409 * we'll have problems. 410 */ 411 if (!i_ddi_devi_attached(dip)) { 412 cmn_err(CE_WARN, "Skipping %s, device " 413 "not ready for resume", 414 ddi_deviname(dip, buf)); 415 } else { 416 if (devi_attach(dip, DDI_RESUME) != 417 DDI_SUCCESS) { 418 error = ENXIO; 419 } 420 } 421 } 422 423 if (error == ENXIO) { 424 cmn_err(CE_WARN, "Unable to resume device %s", 425 ddi_deviname(dip, buf)); 426 } 427 428 error = xen_resume_devices(ddi_get_child(dip), error); 429 last = dip; 430 } 431 432 return (error); 433 } 434 435 /*ARGSUSED*/ 436 static int 437 check_xpvd(dev_info_t *dip, void *arg) 438 { 439 char *name; 440 441 name = ddi_node_name(dip); 442 if (name == NULL || strcmp(name, "xpvd")) { 443 return (DDI_WALK_CONTINUE); 444 } else { 445 xpvd_dip = dip; 446 return (DDI_WALK_TERMINATE); 447 } 448 } 449 450 /* 451 * Top level routine to direct suspend/resume of a domain. 452 */ 453 void 454 xen_suspend_domain(void) 455 { 456 extern void rtcsync(void); 457 extern void ec_resume(void); 458 extern kmutex_t ec_lock; 459 struct xen_add_to_physmap xatp; 460 ulong_t flags; 461 int err; 462 463 cmn_err(CE_NOTE, "Domain suspending for save/migrate"); 464 465 SUSPEND_DEBUG("xen_suspend_domain\n"); 466 467 /* 468 * We only want to suspend the PV devices, since the emulated devices 469 * are suspended by saving the emulated device state. The PV devices 470 * are all children of the xpvd nexus device. So we search the 471 * device tree for the xpvd node to use as the root of the tree to 472 * be suspended. 473 */ 474 if (xpvd_dip == NULL) 475 ddi_walk_devs(ddi_root_node(), check_xpvd, NULL); 476 477 /* 478 * suspend interrupts and devices 479 */ 480 if (xpvd_dip != NULL) 481 (void) xen_suspend_devices(ddi_get_child(xpvd_dip)); 482 else 483 cmn_err(CE_WARN, "No PV devices found to suspend"); 484 SUSPEND_DEBUG("xenbus_suspend\n"); 485 xenbus_suspend(); 486 487 mutex_enter(&cpu_lock); 488 489 /* 490 * Suspend on vcpu 0 491 */ 492 thread_affinity_set(curthread, 0); 493 kpreempt_disable(); 494 495 if (ncpus > 1) 496 pause_cpus(NULL); 497 /* 498 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence 499 * any holder would have dropped it to get through pause_cpus(). 500 */ 501 mutex_enter(&ec_lock); 502 503 /* 504 * From here on in, we can't take locks. 505 */ 506 507 flags = intr_clear(); 508 509 SUSPEND_DEBUG("HYPERVISOR_suspend\n"); 510 /* 511 * At this point we suspend and sometime later resume. 512 * Note that this call may return with an indication of a cancelled 513 * for now no matter ehat the return we do a full resume of all 514 * suspended drivers, etc. 515 */ 516 (void) HYPERVISOR_shutdown(SHUTDOWN_suspend); 517 518 /* 519 * Point HYPERVISOR_shared_info to the proper place. 520 */ 521 xatp.domid = DOMID_SELF; 522 xatp.idx = 0; 523 xatp.space = XENMAPSPACE_shared_info; 524 xatp.gpfn = shared_info_frame; 525 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) 526 panic("Could not set shared_info page. error: %d", err); 527 528 SUSPEND_DEBUG("gnttab_resume\n"); 529 gnttab_resume(); 530 531 SUSPEND_DEBUG("ec_resume\n"); 532 ec_resume(); 533 534 intr_restore(flags); 535 536 if (ncpus > 1) 537 start_cpus(); 538 539 mutex_exit(&ec_lock); 540 mutex_exit(&cpu_lock); 541 542 /* 543 * Now we can take locks again. 544 */ 545 546 rtcsync(); 547 548 SUSPEND_DEBUG("xenbus_resume\n"); 549 xenbus_resume(); 550 SUSPEND_DEBUG("xen_resume_devices\n"); 551 if (xpvd_dip != NULL) 552 (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0); 553 554 thread_affinity_clear(curthread); 555 kpreempt_enable(); 556 557 SUSPEND_DEBUG("finished xen_suspend_domain\n"); 558 559 cmn_err(CE_NOTE, "domain restore/migrate completed"); 560 } 561 562 static void 563 xen_dirty_shutdown(void *arg) 564 { 565 int cmd = (uintptr_t)arg; 566 567 cmn_err(CE_WARN, "Externally requested shutdown failed or " 568 "timed out.\nShutting down.\n"); 569 570 switch (cmd) { 571 case SHUTDOWN_HALT: 572 case SHUTDOWN_POWEROFF: 573 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred); 574 break; 575 case SHUTDOWN_REBOOT: 576 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred); 577 break; 578 } 579 } 580 581 static void 582 xen_shutdown(void *arg) 583 { 584 int cmd = (uintptr_t)arg; 585 proc_t *initpp; 586 587 ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX); 588 589 if (cmd == SHUTDOWN_SUSPEND) { 590 xen_suspend_domain(); 591 return; 592 } 593 594 switch (cmd) { 595 case SHUTDOWN_POWEROFF: 596 force_shutdown_method = AD_POWEROFF; 597 break; 598 case SHUTDOWN_HALT: 599 force_shutdown_method = AD_HALT; 600 break; 601 case SHUTDOWN_REBOOT: 602 force_shutdown_method = AD_BOOT; 603 break; 604 } 605 606 607 /* 608 * If we're still booting and init(1) isn't set up yet, simply halt. 609 */ 610 mutex_enter(&pidlock); 611 initpp = prfind(P_INITPID); 612 mutex_exit(&pidlock); 613 if (initpp == NULL) { 614 extern void halt(char *); 615 halt("Power off the System"); /* just in case */ 616 } 617 618 /* 619 * else, graceful shutdown with inittab and all getting involved 620 */ 621 psignal(initpp, SIGPWR); 622 623 (void) timeout(xen_dirty_shutdown, arg, 624 SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC)); 625 } 626 627 /*ARGSUSED*/ 628 static void 629 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec, 630 unsigned int len) 631 { 632 char *str; 633 xenbus_transaction_t xbt; 634 int err, shutdown_code = SHUTDOWN_INVALID; 635 unsigned int slen; 636 637 again: 638 err = xenbus_transaction_start(&xbt); 639 if (err) 640 return; 641 if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) { 642 (void) xenbus_transaction_end(xbt, 1); 643 return; 644 } 645 646 SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str); 647 648 /* 649 * If this is a watch fired from our write below, check out early to 650 * avoid an infinite loop. 651 */ 652 if (strcmp(str, "") == 0) { 653 (void) xenbus_transaction_end(xbt, 0); 654 kmem_free(str, slen); 655 return; 656 } else if (strcmp(str, "poweroff") == 0) { 657 shutdown_code = SHUTDOWN_POWEROFF; 658 } else if (strcmp(str, "reboot") == 0) { 659 shutdown_code = SHUTDOWN_REBOOT; 660 } else if (strcmp(str, "suspend") == 0) { 661 shutdown_code = SHUTDOWN_SUSPEND; 662 } else if (strcmp(str, "halt") == 0) { 663 shutdown_code = SHUTDOWN_HALT; 664 } else { 665 printf("Ignoring shutdown request: %s\n", str); 666 } 667 668 (void) xenbus_write(xbt, "control", "shutdown", ""); 669 err = xenbus_transaction_end(xbt, 0); 670 if (err == EAGAIN) { 671 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id); 672 kmem_free(str, slen); 673 goto again; 674 } 675 676 kmem_free(str, slen); 677 if (shutdown_code != SHUTDOWN_INVALID) { 678 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown, 679 (void *)(intptr_t)shutdown_code, 0); 680 } 681 } 682 683 static int 684 xen_pv_init(dev_info_t *xpv_dip) 685 { 686 struct cpuid_regs cp; 687 uint32_t xen_signature[4]; 688 char *xen_str; 689 struct xen_add_to_physmap xatp; 690 xen_capabilities_info_t caps; 691 pfn_t pfn; 692 uint64_t msrval; 693 int err; 694 695 /* 696 * Xen's pseudo-cpuid function 0x40000000 returns a string 697 * representing the Xen signature in %ebx, %ecx, and %edx. 698 * %eax contains the maximum supported cpuid function. 699 */ 700 cp.cp_eax = 0x40000000; 701 (void) __cpuid_insn(&cp); 702 xen_signature[0] = cp.cp_ebx; 703 xen_signature[1] = cp.cp_ecx; 704 xen_signature[2] = cp.cp_edx; 705 xen_signature[3] = 0; 706 xen_str = (char *)xen_signature; 707 if (strcmp("XenVMMXenVMM", xen_str) != 0 || 708 cp.cp_eax < 0x40000002) { 709 cmn_err(CE_WARN, 710 "Attempting to load Xen drivers on non-Xen system"); 711 return (-1); 712 } 713 714 /* 715 * cpuid function 0x40000001 returns the Xen version in %eax. The 716 * top 16 bits are the major version, the bottom 16 are the minor 717 * version. 718 */ 719 cp.cp_eax = 0x40000001; 720 (void) __cpuid_insn(&cp); 721 xen_major = cp.cp_eax >> 16; 722 xen_minor = cp.cp_eax & 0xffff; 723 724 /* 725 * The xpv driver is incompatible with xen versions older than 3.1. This 726 * is due to the changes in the vcpu_info and shared_info structs used 727 * to communicate with the hypervisor (the event channels in particular) 728 * that were introduced with 3.1. 729 */ 730 if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) { 731 cmn_err(CE_WARN, "Xen version %d.%d is not supported", 732 xen_major, xen_minor); 733 return (-1); 734 } 735 736 /* 737 * cpuid function 0x40000002 returns information about the 738 * hypercall page. %eax nominally contains the number of pages 739 * with hypercall code, but according to the Xen guys, "I'll 740 * guarantee that remains one forever more, so you can just 741 * allocate a single page and get quite upset if you ever see CPUID 742 * return more than one page." %ebx contains an MSR we use to ask 743 * Xen to remap each page at a specific pfn. 744 */ 745 cp.cp_eax = 0x40000002; 746 (void) __cpuid_insn(&cp); 747 748 /* 749 * Let Xen know where we want the hypercall page mapped. We 750 * already have a page allocated in the .text section to simplify 751 * the wrapper code. 752 */ 753 pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page); 754 msrval = mmu_ptob(pfn); 755 wrmsr(cp.cp_ebx, msrval); 756 757 /* Fill in the xen_info data */ 758 xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP); 759 (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor); 760 xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN); 761 xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN); 762 763 /* Figure out whether the hypervisor is 32-bit or 64-bit. */ 764 if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) { 765 ((char *)(caps))[sizeof (caps) - 1] = '\0'; 766 if (strstr(caps, "x86_64") != NULL) 767 xen_is_64bit = 1; 768 else if (strstr(caps, "x86_32") != NULL) 769 xen_is_64bit = 0; 770 } 771 if (xen_is_64bit < 0) { 772 cmn_err(CE_WARN, "Couldn't get capability info from Xen."); 773 return (-1); 774 } 775 #ifdef __amd64 776 ASSERT(xen_is_64bit == 1); 777 #endif 778 779 /* 780 * Allocate space for the shared_info page and tell Xen where it 781 * is. 782 */ 783 HYPERVISOR_shared_info = xen_alloc_pages(1); 784 shared_info_frame = hat_getpfnum(kas.a_hat, 785 (caddr_t)HYPERVISOR_shared_info); 786 xatp.domid = DOMID_SELF; 787 xatp.idx = 0; 788 xatp.space = XENMAPSPACE_shared_info; 789 xatp.gpfn = shared_info_frame; 790 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) { 791 cmn_err(CE_WARN, "Could not get shared_info page from Xen." 792 " error: %d", err); 793 return (-1); 794 } 795 796 /* Set up the grant tables. */ 797 gnttab_init(); 798 799 /* Set up event channel support */ 800 if (ec_init(xpv_dip) != 0) 801 return (-1); 802 803 /* Set up xenbus */ 804 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 805 xs_early_init(); 806 xs_domu_init(); 807 808 /* Set up for suspend/resume/migrate */ 809 xen_shutdown_tq = taskq_create("shutdown_taskq", 1, 810 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 811 shutdown_watch.node = "control/shutdown"; 812 shutdown_watch.callback = xen_shutdown_handler; 813 if (register_xenbus_watch(&shutdown_watch)) 814 cmn_err(CE_WARN, "Failed to set shutdown watcher"); 815 816 return (0); 817 } 818 819 static void 820 xen_pv_fini() 821 { 822 if (xen_info != NULL) 823 kmem_free(xen_info, sizeof (start_info_t)); 824 ec_fini(); 825 } 826 827 /*ARGSUSED*/ 828 static int 829 xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 830 { 831 if (getminor((dev_t)arg) != XPV_MINOR) 832 return (DDI_FAILURE); 833 834 switch (cmd) { 835 case DDI_INFO_DEVT2DEVINFO: 836 *result = xpv_dip; 837 break; 838 case DDI_INFO_DEVT2INSTANCE: 839 *result = 0; 840 break; 841 default: 842 return (DDI_FAILURE); 843 } 844 845 return (DDI_SUCCESS); 846 } 847 848 static int 849 xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 850 { 851 if (cmd != DDI_ATTACH) 852 return (DDI_FAILURE); 853 854 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 855 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 856 return (DDI_FAILURE); 857 858 xpv_dip = dip; 859 860 if (xen_pv_init(dip) != 0) 861 return (DDI_FAILURE); 862 863 ddi_report_dev(dip); 864 865 /* 866 * If the memscrubber attempts to scrub the pages we hand to Xen, 867 * the domain will panic. 868 */ 869 memscrub_disable(); 870 871 /* 872 * Report our version to dom0. 873 */ 874 if (xenbus_printf(XBT_NULL, "hvmpv/xpv", "version", "%d", 875 HVMPV_XPV_VERS)) 876 cmn_err(CE_WARN, "xpv: couldn't write version\n"); 877 878 return (DDI_SUCCESS); 879 } 880 881 /* 882 * Attempts to reload the PV driver plumbing hang on Intel platforms, so 883 * we don't want to unload the framework by accident. 884 */ 885 int xpv_allow_detach = 0; 886 887 static int 888 xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 889 { 890 if (cmd != DDI_DETACH || xpv_allow_detach == 0) 891 return (DDI_FAILURE); 892 893 if (xpv_dip != NULL) { 894 xen_pv_fini(); 895 ddi_remove_minor_node(dip, NULL); 896 xpv_dip = NULL; 897 } 898 899 return (DDI_SUCCESS); 900 } 901 902 /*ARGSUSED1*/ 903 static int 904 xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr) 905 { 906 return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO); 907 } 908 909 /*ARGSUSED*/ 910 static int 911 xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, 912 int *rval_p) 913 { 914 return (EINVAL); 915 } 916 917 int 918 _init(void) 919 { 920 int err; 921 922 if ((err = mod_install(&modl)) != 0) 923 return (err); 924 925 impl_bus_add_probe(xpv_enumerate); 926 return (0); 927 } 928 929 int 930 _fini(void) 931 { 932 int err; 933 934 if ((err = mod_remove(&modl)) != 0) 935 return (err); 936 937 impl_bus_delete_probe(xpv_enumerate); 938 return (0); 939 } 940 941 int 942 _info(struct modinfo *modinfop) 943 { 944 return (mod_info(&modl, modinfop)); 945 } 946