1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/param.h> 32 #include <sys/taskq.h> 33 #include <sys/cmn_err.h> 34 #include <sys/archsystm.h> 35 #include <sys/machsystm.h> 36 #include <sys/segments.h> 37 #include <sys/cpuvar.h> 38 #include <sys/psw.h> 39 #include <sys/x86_archext.h> 40 #include <sys/controlregs.h> 41 #include <vm/as.h> 42 #include <vm/hat.h> 43 #include <vm/hat_i86.h> 44 #include <sys/mman.h> 45 #include <sys/hypervisor.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <sys/xpv_panic.h> 48 #include <util/sscanf.h> 49 #include <sys/cpu.h> 50 #include <asm/cpu.h> 51 52 #include <xen/public/vcpu.h> 53 #include <xen/public/io/xs_wire.h> 54 55 struct xen_evt_data cpu0_evt_data; /* cpu0's pending event data */ 56 57 static taskq_t *cpu_config_tq; 58 static void vcpu_config_event(struct xenbus_watch *, const char **, uint_t); 59 static int xen_vcpu_initialize(processorid_t, vcpu_guest_context_t *); 60 61 /* 62 * These routines allocate any global state that might be needed 63 * while starting cpus. For virtual cpus, there is no such state. 64 */ 65 int 66 mach_cpucontext_init(void) 67 { 68 return (0); 69 } 70 71 void 72 do_cpu_config_watch(int state) 73 { 74 static struct xenbus_watch cpu_config_watch; 75 76 if (state != XENSTORE_UP) 77 return; 78 cpu_config_watch.node = "cpu"; 79 cpu_config_watch.callback = vcpu_config_event; 80 if (register_xenbus_watch(&cpu_config_watch)) { 81 taskq_destroy(cpu_config_tq); 82 cmn_err(CE_WARN, "do_cpu_config_watch: " 83 "failed to set vcpu config watch"); 84 } 85 86 } 87 88 /* 89 * This routine is called after all the "normal" MP startup has 90 * been done; a good place to start watching xen store for virtual 91 * cpu hot plug events. 92 */ 93 void 94 mach_cpucontext_fini(void) 95 { 96 97 cpu_config_tq = taskq_create("vcpu config taskq", 1, 98 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 99 100 (void) xs_register_xenbus_callback(do_cpu_config_watch); 101 } 102 103 /* 104 * Fill in the remaining CPU context and initialize it. 105 */ 106 static int 107 mp_set_cpu_context(vcpu_guest_context_t *vgc, cpu_t *cp) 108 { 109 uint_t vec, iopl; 110 111 vgc->flags = VGCF_IN_KERNEL; 112 113 /* 114 * fpu_ctx we leave as zero; on first fault we'll store 115 * sse_initial into it anyway. 116 */ 117 118 #if defined(__amd64) 119 vgc->user_regs.cs = KCS_SEL | SEL_KPL; /* force to ring 3 */ 120 #else 121 vgc->user_regs.cs = KCS_SEL; 122 #endif 123 vgc->user_regs.ds = KDS_SEL; 124 vgc->user_regs.es = KDS_SEL; 125 vgc->user_regs.ss = KDS_SEL; 126 vgc->kernel_ss = KDS_SEL; 127 128 /* 129 * Allow I/O privilege level for Dom0 kernel. 130 */ 131 if (DOMAIN_IS_INITDOMAIN(xen_info)) 132 iopl = (PS_IOPL & 0x1000); /* ring 1 */ 133 else 134 iopl = 0; 135 136 #if defined(__amd64) 137 vgc->user_regs.fs = 0; 138 vgc->user_regs.gs = 0; 139 vgc->user_regs.rflags = F_OFF | iopl; 140 #elif defined(__i386) 141 vgc->user_regs.fs = KFS_SEL; 142 vgc->user_regs.gs = KGS_SEL; 143 vgc->user_regs.eflags = F_OFF | iopl; 144 vgc->event_callback_cs = vgc->user_regs.cs; 145 vgc->failsafe_callback_cs = vgc->user_regs.cs; 146 #endif 147 148 /* 149 * Initialize the trap_info_t from the IDT 150 */ 151 #if !defined(__lint) 152 ASSERT(NIDT == sizeof (vgc->trap_ctxt) / sizeof (vgc->trap_ctxt[0])); 153 #endif 154 for (vec = 0; vec < NIDT; vec++) { 155 trap_info_t *ti = &vgc->trap_ctxt[vec]; 156 157 if (xen_idt_to_trap_info(vec, 158 &cp->cpu_m.mcpu_idt[vec], ti) == 0) { 159 ti->cs = KCS_SEL; 160 ti->vector = vec; 161 } 162 } 163 164 /* 165 * No LDT 166 */ 167 168 /* 169 * (We assert in various places that the GDT is (a) aligned on a 170 * page boundary and (b) one page long, so this really should fit..) 171 */ 172 #ifdef CRASH_XEN 173 vgc->gdt_frames[0] = pa_to_ma(mmu_btop(cp->cpu_m.mcpu_gdtpa)); 174 #else 175 vgc->gdt_frames[0] = pfn_to_mfn(mmu_btop(cp->cpu_m.mcpu_gdtpa)); 176 #endif 177 vgc->gdt_ents = NGDT; 178 179 vgc->ctrlreg[0] = CR0_ENABLE_FPU_FLAGS(getcr0()); 180 181 #if defined(__i386) 182 if (mmu.pae_hat) 183 vgc->ctrlreg[3] = 184 xen_pfn_to_cr3(pfn_to_mfn(kas.a_hat->hat_htable->ht_pfn)); 185 else 186 #endif 187 vgc->ctrlreg[3] = 188 pa_to_ma(mmu_ptob(kas.a_hat->hat_htable->ht_pfn)); 189 190 vgc->ctrlreg[4] = getcr4(); 191 192 vgc->event_callback_eip = (uintptr_t)xen_callback; 193 vgc->failsafe_callback_eip = (uintptr_t)xen_failsafe_callback; 194 vgc->flags |= VGCF_failsafe_disables_events; 195 196 #if defined(__amd64) 197 /* 198 * XXPV should this be moved to init_cpu_syscall? 199 */ 200 vgc->syscall_callback_eip = (uintptr_t)sys_syscall; 201 vgc->flags |= VGCF_syscall_disables_events; 202 203 ASSERT(vgc->user_regs.gs == 0); 204 vgc->gs_base_kernel = (uintptr_t)cp; 205 #endif 206 207 return (xen_vcpu_initialize(cp->cpu_id, vgc)); 208 } 209 210 /* 211 * Create a guest virtual cpu context so that the virtual cpu 212 * springs into life in the domain just about to call mp_startup() 213 * 214 * Virtual CPUs must be initialized once in the lifetime of the domain; 215 * after that subsequent attempts to start them will fail with X_EEXIST. 216 * 217 * Thus 'alloc' -really- creates and initializes the virtual 218 * CPU context just once. Once the initialisation succeeds, we never 219 * free it, nor the regular cpu_t to which it refers. 220 */ 221 void * 222 mach_cpucontext_alloc(struct cpu *cp) 223 { 224 kthread_t *tp = cp->cpu_thread; 225 vcpu_guest_context_t vgc; 226 227 int err = 1; 228 229 /* 230 * First, augment the incoming cpu structure 231 * - vcpu pointer reference 232 * - pending event storage area 233 * - physical address of GDT 234 */ 235 cp->cpu_m.mcpu_vcpu_info = 236 &HYPERVISOR_shared_info->vcpu_info[cp->cpu_id]; 237 cp->cpu_m.mcpu_evt_pend = kmem_zalloc( 238 sizeof (struct xen_evt_data), KM_SLEEP); 239 cp->cpu_m.mcpu_gdtpa = 240 mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)cp->cpu_gdt)); 241 242 if ((err = xen_gdt_setprot(cp, PROT_READ)) != 0) 243 goto done; 244 245 /* 246 * Now set up the vcpu context so that we can start this vcpu 247 * in the kernel at tp->t_pc (mp_startup). Note that the 248 * thread will thread_exit() shortly after performing the 249 * initialization; in particular, we will *never* take a 250 * privilege transition on this thread. 251 */ 252 253 bzero(&vgc, sizeof (vgc)); 254 255 #ifdef __amd64 256 vgc.user_regs.rip = tp->t_pc; 257 vgc.user_regs.rsp = tp->t_sp; 258 vgc.user_regs.rbp = tp->t_sp - 2 * sizeof (greg_t); 259 #else 260 vgc.user_regs.eip = tp->t_pc; 261 vgc.user_regs.esp = tp->t_sp; 262 vgc.user_regs.ebp = tp->t_sp - 2 * sizeof (greg_t); 263 #endif 264 /* 265 * XXPV Fix resume, if Russ didn't already fix it. 266 * 267 * Note that resume unconditionally puts t->t_stk + sizeof (regs) 268 * into kernel_sp via HYPERVISOR_stack_switch. This anticipates 269 * that only lwps take traps that switch to the kernel stack; 270 * part of creating an lwp adjusts the stack by subtracting 271 * sizeof (struct regs) off t_stk. 272 * 273 * The more interesting question is, why do we do all the work 274 * of a fully fledged lwp for a plain thread? In particular 275 * we don't have to call HYPERVISOR_stack_switch for lwp-less threads 276 * or futz with the LDT. This should probably all be done with 277 * an lwp context operator to keep pure thread context switch fast. 278 */ 279 vgc.kernel_sp = (ulong_t)tp->t_stk; 280 281 err = mp_set_cpu_context(&vgc, cp); 282 283 done: 284 if (err) { 285 mach_cpucontext_free(cp, NULL, err); 286 return (NULL); 287 } 288 return (cp); 289 } 290 291 /* 292 * By the time we are called either we have successfully started 293 * the cpu, or our attempt to start it has failed. 294 */ 295 296 /*ARGSUSED*/ 297 void 298 mach_cpucontext_free(struct cpu *cp, void *arg, int err) 299 { 300 switch (err) { 301 case 0: 302 break; 303 case ETIMEDOUT: 304 /* 305 * The vcpu context is loaded into the hypervisor, and 306 * we've tried to start it, but the vcpu has not been set 307 * running yet, for whatever reason. We arrange to -not- 308 * free any data structures it may be referencing. In 309 * particular, we've already told the hypervisor about 310 * the GDT, and so we can't map it read-write again. 311 */ 312 break; 313 default: 314 (void) xen_gdt_setprot(cp, PROT_READ | PROT_WRITE); 315 kmem_free(cp->cpu_m.mcpu_evt_pend, 316 sizeof (struct xen_evt_data)); 317 break; 318 } 319 } 320 321 /* 322 * Reset this CPU's context. Clear out any pending evtchn data, since event 323 * channel numbers will all change when we resume. 324 */ 325 void 326 mach_cpucontext_reset(cpu_t *cp) 327 { 328 bzero(cp->cpu_m.mcpu_evt_pend, sizeof (struct xen_evt_data)); 329 /* mcpu_intr_pending ? */ 330 } 331 332 static void 333 pcb_to_user_regs(label_t *pcb, vcpu_guest_context_t *vgc) 334 { 335 #ifdef __amd64 336 vgc->user_regs.rip = pcb->val[REG_LABEL_PC]; 337 vgc->user_regs.rsp = pcb->val[REG_LABEL_SP]; 338 vgc->user_regs.rbp = pcb->val[REG_LABEL_BP]; 339 vgc->user_regs.rbx = pcb->val[REG_LABEL_RBX]; 340 vgc->user_regs.r12 = pcb->val[REG_LABEL_R12]; 341 vgc->user_regs.r13 = pcb->val[REG_LABEL_R13]; 342 vgc->user_regs.r14 = pcb->val[REG_LABEL_R14]; 343 vgc->user_regs.r15 = pcb->val[REG_LABEL_R15]; 344 #else /* __amd64 */ 345 vgc->user_regs.eip = pcb->val[REG_LABEL_PC]; 346 vgc->user_regs.esp = pcb->val[REG_LABEL_SP]; 347 vgc->user_regs.ebp = pcb->val[REG_LABEL_BP]; 348 vgc->user_regs.ebx = pcb->val[REG_LABEL_EBX]; 349 vgc->user_regs.esi = pcb->val[REG_LABEL_ESI]; 350 vgc->user_regs.edi = pcb->val[REG_LABEL_EDI]; 351 #endif /* __amd64 */ 352 } 353 354 /* 355 * Restore the context of a CPU during resume. The CPU must either 356 * have been blocked in cpu_idle() (running the idle thread), if it was 357 * offline, or inside cpu_pause_thread(). Either way we can restore safely 358 * from the t_pcb. 359 */ 360 void 361 mach_cpucontext_restore(cpu_t *cp) 362 { 363 vcpu_guest_context_t vgc; 364 int err; 365 366 ASSERT(cp->cpu_thread == cp->cpu_pause_thread || 367 cp->cpu_thread == cp->cpu_idle_thread); 368 369 bzero(&vgc, sizeof (vgc)); 370 371 pcb_to_user_regs(&cp->cpu_thread->t_pcb, &vgc); 372 373 /* 374 * We're emulating a longjmp() here: in particular, we need to bump the 375 * stack pointer to account for the pop of xIP that returning from 376 * longjmp() normally would do, and set the return value in xAX to 1. 377 */ 378 #ifdef __amd64 379 vgc.user_regs.rax = 1; 380 vgc.user_regs.rsp += sizeof (ulong_t); 381 #else 382 vgc.user_regs.eax = 1; 383 vgc.user_regs.esp += sizeof (ulong_t); 384 #endif 385 386 vgc.kernel_sp = cp->cpu_thread->t_sp; 387 388 err = mp_set_cpu_context(&vgc, cp); 389 390 ASSERT(err == 0); 391 } 392 393 void 394 mach_cpu_idle(void) 395 { 396 if (IN_XPV_PANIC()) { 397 xpv_panic_halt(); 398 } else { 399 (void) setjmp(&curthread->t_pcb); 400 CPUSET_ATOMIC_ADD(cpu_suspend_set, CPU->cpu_id); 401 (void) HYPERVISOR_block(); 402 CPUSET_ATOMIC_DEL(cpu_suspend_set, CPU->cpu_id); 403 } 404 } 405 406 void 407 mach_cpu_halt(char *msg) 408 { 409 if (msg) 410 prom_printf("%s\n", msg); 411 (void) xen_vcpu_down(CPU->cpu_id); 412 } 413 414 void 415 mach_cpu_pause(volatile char *safe) 416 { 417 ulong_t flags; 418 419 flags = intr_clear(); 420 421 if (setjmp(&curthread->t_pcb) == 0) { 422 CPUSET_ATOMIC_ADD(cpu_suspend_set, CPU->cpu_id); 423 /* 424 * This cpu is now safe. 425 */ 426 *safe = PAUSE_WAIT; 427 membar_enter(); 428 } 429 430 while (*safe != PAUSE_IDLE) 431 SMT_PAUSE(); 432 433 CPUSET_ATOMIC_DEL(cpu_suspend_set, CPU->cpu_id); 434 435 intr_restore(flags); 436 } 437 438 /* 439 * Virtual CPU management. 440 * 441 * VCPUs can be controlled in one of two ways; through the domain itself 442 * (psradm, p_online(), etc.), and via changes in xenstore (vcpu_config()). 443 * Unfortunately, the terminology is used in different ways; they work out as 444 * follows: 445 * 446 * P_ONLINE: the VCPU is up and running, taking interrupts and running threads 447 * 448 * P_OFFLINE: the VCPU is up and running, but quiesced (i.e. blocked in the 449 * hypervisor on the idle thread). It must be up since a downed VCPU cannot 450 * receive interrupts, and we require this for offline CPUs in Solaris. 451 * 452 * P_POWEROFF: the VCPU is down (we never called xen_vcpu_up(), or called 453 * xen_vcpu_down() for it). It can't take interrupts or run anything, though 454 * if it has run previously, its software state (cpu_t, machcpu structures, IPI 455 * event channels, etc.) will still exist. 456 * 457 * The hypervisor has two notions of CPU states as represented in the store: 458 * 459 * "offline": the VCPU is down. Corresponds to P_POWEROFF. 460 * 461 * "online": the VCPU is running. Corresponds to a CPU state other than 462 * P_POWEROFF. 463 * 464 * Currently, only a notification via xenstore can bring a CPU into a 465 * P_POWEROFF state, and only the domain can change between P_ONLINE, P_NOINTR, 466 * P_OFFLINE, etc. We need to be careful to treat xenstore notifications 467 * idempotently, as we'll get 'duplicate' entries when we resume a domain. 468 * 469 * Note that the xenstore configuration is strictly advisory, in that a domain 470 * can choose to ignore it and still power up a VCPU in the offline state. To 471 * play nice, we don't allow it. Thus, any attempt to power on/off a CPU is 472 * ENOTSUP from within Solaris. 473 */ 474 475 /*ARGSUSED*/ 476 int 477 mp_cpu_poweron(struct cpu *cp) 478 { 479 return (ENOTSUP); 480 } 481 482 /*ARGSUSED*/ 483 int 484 mp_cpu_poweroff(struct cpu *cp) 485 { 486 return (ENOTSUP); 487 } 488 489 static int 490 poweron_vcpu(struct cpu *cp) 491 { 492 int error; 493 494 ASSERT(MUTEX_HELD(&cpu_lock)); 495 496 if (HYPERVISOR_vcpu_op(VCPUOP_is_up, cp->cpu_id, NULL) != 0) { 497 printf("poweron_vcpu: vcpu%d is not available!\n", 498 cp->cpu_id); 499 return (ENXIO); 500 } 501 502 if ((error = xen_vcpu_up(cp->cpu_id)) == 0) { 503 CPUSET_ADD(cpu_ready_set, cp->cpu_id); 504 cp->cpu_flags |= CPU_EXISTS | CPU_READY | CPU_RUNNING; 505 cp->cpu_flags &= ~CPU_POWEROFF; 506 /* 507 * There are some nasty races possible here. 508 * Tell the vcpu it's up one more time. 509 * XXPV Is this enough? Is this safe? 510 */ 511 (void) xen_vcpu_up(cp->cpu_id); 512 513 cpu_set_state(cp); 514 } 515 return (error); 516 } 517 518 static int 519 poweroff_poke(void) 520 { 521 CPUSET_ATOMIC_DEL(cpu_suspend_set, CPU->cpu_id); 522 return (0); 523 } 524 525 /* 526 * We must ensure that the VCPU reaches a safe state (in the suspend set, and 527 * thus is not going to change) before we can power it off. The VCPU could 528 * still be in mach_cpu_pause() and about to head back out; so just checking 529 * cpu_suspend_set() isn't sufficient to make sure the VCPU has stopped moving. 530 * Instead, we xcall it to delete itself from the set; whichever way it comes 531 * back from that xcall, it won't mark itself in the set until it's safely back 532 * in mach_cpu_idle(). 533 */ 534 static int 535 poweroff_vcpu(struct cpu *cp) 536 { 537 int error; 538 cpuset_t set; 539 540 ASSERT(MUTEX_HELD(&cpu_lock)); 541 542 ASSERT(CPU->cpu_id != cp->cpu_id); 543 ASSERT(cp->cpu_flags & CPU_QUIESCED); 544 545 CPUSET_ONLY(set, cp->cpu_id); 546 547 xc_sync(0, 0, 0, X_CALL_HIPRI, set, (xc_func_t)poweroff_poke); 548 549 while (!CPU_IN_SET(cpu_suspend_set, cp->cpu_id)) 550 SMT_PAUSE(); 551 552 if ((error = xen_vcpu_down(cp->cpu_id)) == 0) { 553 ASSERT(CPU_IN_SET(cpu_suspend_set, cp->cpu_id)); 554 CPUSET_DEL(cpu_ready_set, cp->cpu_id); 555 cp->cpu_flags |= CPU_POWEROFF | CPU_OFFLINE; 556 cp->cpu_flags &= 557 ~(CPU_RUNNING | CPU_READY | CPU_EXISTS | CPU_ENABLE); 558 559 cpu_set_state(cp); 560 } 561 return (error); 562 } 563 564 static int 565 vcpu_config_poweroff(processorid_t id) 566 { 567 int oldstate; 568 int error; 569 cpu_t *cp; 570 571 mutex_enter(&cpu_lock); 572 573 if ((cp = cpu_get(id)) == NULL) { 574 mutex_exit(&cpu_lock); 575 return (ESRCH); 576 } 577 578 if (cpu_get_state(cp) == P_POWEROFF) { 579 mutex_exit(&cpu_lock); 580 return (0); 581 } 582 583 mutex_exit(&cpu_lock); 584 585 do { 586 error = p_online_internal(id, P_OFFLINE, 587 &oldstate); 588 589 if (error != 0) 590 break; 591 592 /* 593 * So we just changed it to P_OFFLINE. But then we dropped 594 * cpu_lock, so now it is possible for another thread to change 595 * the cpu back to a different, non-quiesced state e.g. 596 * P_ONLINE. 597 */ 598 mutex_enter(&cpu_lock); 599 if ((cp = cpu_get(id)) == NULL) 600 error = ESRCH; 601 else { 602 if (cp->cpu_flags & CPU_QUIESCED) 603 error = poweroff_vcpu(cp); 604 else 605 error = EBUSY; 606 } 607 mutex_exit(&cpu_lock); 608 } while (error == EBUSY); 609 610 return (error); 611 } 612 613 /* 614 * Add a new virtual cpu to the domain. 615 */ 616 static int 617 vcpu_config_new(processorid_t id) 618 { 619 extern int start_cpu(processorid_t); 620 int error; 621 622 if (ncpus == 1) { 623 printf("cannot (yet) add cpus to a single-cpu domain\n"); 624 return (ENOTSUP); 625 } 626 627 affinity_set(CPU_CURRENT); 628 error = start_cpu(id); 629 affinity_clear(); 630 return (error); 631 } 632 633 static int 634 vcpu_config_poweron(processorid_t id) 635 { 636 cpu_t *cp; 637 int oldstate; 638 int error; 639 640 if (id >= ncpus) 641 return (vcpu_config_new(id)); 642 643 mutex_enter(&cpu_lock); 644 645 if ((cp = cpu_get(id)) == NULL) { 646 mutex_exit(&cpu_lock); 647 return (ESRCH); 648 } 649 650 if (cpu_get_state(cp) != P_POWEROFF) { 651 mutex_exit(&cpu_lock); 652 return (0); 653 } 654 655 if ((error = poweron_vcpu(cp)) != 0) { 656 mutex_exit(&cpu_lock); 657 return (error); 658 } 659 660 mutex_exit(&cpu_lock); 661 662 return (p_online_internal(id, P_ONLINE, &oldstate)); 663 } 664 665 #define REPORT_LEN 128 666 667 static void 668 vcpu_config_report(processorid_t id, uint_t newstate, int error) 669 { 670 char *report = kmem_alloc(REPORT_LEN, KM_SLEEP); 671 size_t len; 672 char *ps; 673 674 switch (newstate) { 675 case P_ONLINE: 676 ps = PS_ONLINE; 677 break; 678 case P_POWEROFF: 679 ps = PS_POWEROFF; 680 break; 681 default: 682 cmn_err(CE_PANIC, "unknown state %u\n", newstate); 683 break; 684 } 685 686 len = snprintf(report, REPORT_LEN, 687 "cpu%d: externally initiated %s", id, ps); 688 689 if (!error) { 690 cmn_err(CE_CONT, "!%s\n", report); 691 kmem_free(report, REPORT_LEN); 692 return; 693 } 694 695 len += snprintf(report + len, REPORT_LEN - len, 696 " failed, error %d: ", error); 697 switch (error) { 698 case EEXIST: 699 len += snprintf(report + len, REPORT_LEN - len, 700 "cpu already %s", ps ? ps : "?"); 701 break; 702 case ESRCH: 703 len += snprintf(report + len, REPORT_LEN - len, 704 "cpu not found"); 705 break; 706 case EINVAL: 707 case EALREADY: 708 break; 709 case EPERM: 710 len += snprintf(report + len, REPORT_LEN - len, 711 "insufficient privilege (0x%x)", id); 712 break; 713 case EBUSY: 714 switch (newstate) { 715 case P_ONLINE: 716 /* 717 * This return comes from mp_cpu_start - 718 * we cannot 'start' the boot CPU. 719 */ 720 len += snprintf(report + len, REPORT_LEN - len, 721 "already running"); 722 break; 723 case P_POWEROFF: 724 len += snprintf(report + len, REPORT_LEN - len, 725 "bound lwps?"); 726 break; 727 default: 728 break; 729 } 730 default: 731 break; 732 } 733 734 cmn_err(CE_CONT, "%s\n", report); 735 kmem_free(report, REPORT_LEN); 736 } 737 738 static void 739 vcpu_config(void *arg) 740 { 741 int id = (int)(uintptr_t)arg; 742 int error; 743 char dir[16]; 744 char *state; 745 746 if ((uint_t)id >= max_ncpus) { 747 cmn_err(CE_WARN, 748 "vcpu_config: cpu%d does not fit in this domain", id); 749 return; 750 } 751 752 (void) snprintf(dir, sizeof (dir), "cpu/%d", id); 753 state = kmem_alloc(MAXPATHLEN, KM_SLEEP); 754 if (xenbus_scanf(XBT_NULL, dir, "availability", "%s", state) == 0) { 755 if (strcmp(state, "online") == 0) { 756 error = vcpu_config_poweron(id); 757 vcpu_config_report(id, P_ONLINE, error); 758 } else if (strcmp(state, "offline") == 0) { 759 error = vcpu_config_poweroff(id); 760 vcpu_config_report(id, P_POWEROFF, error); 761 } else { 762 cmn_err(CE_WARN, 763 "cpu%d: unknown target state '%s'", id, state); 764 } 765 } else 766 cmn_err(CE_WARN, 767 "cpu%d: unable to read target state from xenstore", id); 768 769 kmem_free(state, MAXPATHLEN); 770 } 771 772 /*ARGSUSED*/ 773 static void 774 vcpu_config_event(struct xenbus_watch *watch, const char **vec, uint_t len) 775 { 776 const char *path = vec[XS_WATCH_PATH]; 777 processorid_t id; 778 char *s; 779 780 if ((s = strstr(path, "cpu/")) != NULL && 781 sscanf(s, "cpu/%d", &id) == 1) { 782 /* 783 * Run the virtual CPU configuration on a separate thread to 784 * avoid blocking on this event for too long (and for now, 785 * to ensure configuration requests are serialized.) 786 */ 787 (void) taskq_dispatch(cpu_config_tq, 788 vcpu_config, (void *)(uintptr_t)id, 0); 789 } 790 } 791 792 static int 793 xen_vcpu_initialize(processorid_t id, vcpu_guest_context_t *vgc) 794 { 795 int err; 796 797 if ((err = HYPERVISOR_vcpu_op(VCPUOP_initialise, id, vgc)) != 0) { 798 char *str; 799 int level = CE_WARN; 800 801 switch (err) { 802 case -X_EINVAL: 803 /* 804 * This interface squashes multiple error sources 805 * to one error code. In particular, an X_EINVAL 806 * code can mean: 807 * 808 * - the vcpu id is out of range 809 * - cs or ss are in ring 0 810 * - cr3 is wrong 811 * - an entry in the new gdt is above the 812 * reserved entry 813 * - a frame underneath the new gdt is bad 814 */ 815 str = "something is wrong :("; 816 break; 817 case -X_ENOENT: 818 str = "no such cpu"; 819 break; 820 case -X_ENOMEM: 821 str = "no mem to copy ctxt"; 822 break; 823 case -X_EFAULT: 824 str = "bad address"; 825 break; 826 case -X_EEXIST: 827 /* 828 * Hmm. This error is returned if the vcpu has already 829 * been initialized once before in the lifetime of this 830 * domain. This is a logic error in the kernel. 831 */ 832 level = CE_PANIC; 833 str = "already initialized"; 834 break; 835 default: 836 level = CE_PANIC; 837 str = "<unexpected>"; 838 break; 839 } 840 841 cmn_err(level, "vcpu%d: failed to init: error %d: %s", 842 id, -err, str); 843 } 844 return (err); 845 } 846 847 long 848 xen_vcpu_up(processorid_t id) 849 { 850 long err; 851 852 if ((err = HYPERVISOR_vcpu_op(VCPUOP_up, id, NULL)) != 0) { 853 char *str; 854 855 switch (err) { 856 case -X_ENOENT: 857 str = "no such cpu"; 858 break; 859 case -X_EINVAL: 860 /* 861 * Perhaps this is diagnostic overkill. 862 */ 863 if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) < 0) 864 str = "bad cpuid"; 865 else 866 str = "not initialized"; 867 break; 868 default: 869 str = "<unexpected>"; 870 break; 871 } 872 873 printf("vcpu%d: failed to start: error %d: %s\n", 874 id, -(int)err, str); 875 return (EBFONT); /* deliberately silly */ 876 } 877 return (err); 878 } 879 880 long 881 xen_vcpu_down(processorid_t id) 882 { 883 long err; 884 885 if ((err = HYPERVISOR_vcpu_op(VCPUOP_down, id, NULL)) != 0) { 886 /* 887 * X_ENOENT: no such cpu 888 * X_EINVAL: bad cpuid 889 */ 890 panic("vcpu%d: failed to stop: error %d", id, -(int)err); 891 } 892 893 return (err); 894 } 895