xref: /illumos-gate/usr/src/uts/i86pc/os/mp_pc.c (revision 50d967713af8725306d090dd29033d9efe924715)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright (c) 2010, Intel Corporation.
26  * All rights reserved.
27  */
28 /*
29  * Copyright 2019 Joyent, Inc.
30  */
31 
32 /*
33  * Welcome to the world of the "real mode platter".
34  * See also startup.c, mpcore.s and apic.c for related routines.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cpuvar.h>
40 #include <sys/cpu_module.h>
41 #include <sys/kmem.h>
42 #include <sys/archsystm.h>
43 #include <sys/machsystm.h>
44 #include <sys/controlregs.h>
45 #include <sys/x86_archext.h>
46 #include <sys/smp_impldefs.h>
47 #include <sys/sysmacros.h>
48 #include <sys/mach_mmu.h>
49 #include <sys/promif.h>
50 #include <sys/cpu.h>
51 #include <sys/cpu_event.h>
52 #include <sys/sunndi.h>
53 #include <sys/fs/dv_node.h>
54 #include <vm/hat_i86.h>
55 #include <vm/as.h>
56 
57 extern cpuset_t cpu_ready_set;
58 
59 extern int  mp_start_cpu_common(cpu_t *cp, boolean_t boot);
60 extern void real_mode_start_cpu(void);
61 extern void real_mode_start_cpu_end(void);
62 extern void real_mode_stop_cpu_stage1(void);
63 extern void real_mode_stop_cpu_stage1_end(void);
64 extern void real_mode_stop_cpu_stage2(void);
65 extern void real_mode_stop_cpu_stage2_end(void);
66 
67 void rmp_gdt_init(rm_platter_t *);
68 
69 /*
70  * Fill up the real mode platter to make it easy for real mode code to
71  * kick it off. This area should really be one passed by boot to kernel
72  * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
73  * have identical physical and virtual address in paged mode.
74  */
75 static ushort_t *warm_reset_vector = NULL;
76 
77 int
78 mach_cpucontext_init(void)
79 {
80 	ushort_t *vec;
81 	ulong_t addr;
82 	struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
83 
84 	if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
85 	    sizeof (vec), PROT_READ | PROT_WRITE)))
86 		return (-1);
87 
88 	/*
89 	 * setup secondary cpu bios boot up vector
90 	 * Write page offset to 0x467 and page frame number to 0x469.
91 	 */
92 	addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
93 	vec[0] = (ushort_t)(addr & PAGEOFFSET);
94 	vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
95 	warm_reset_vector = vec;
96 
97 	/* Map real mode platter into kas so kernel can access it. */
98 	hat_devload(kas.a_hat,
99 	    (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
100 	    btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
101 	    HAT_LOAD_NOCONSIST);
102 
103 	/* Copy CPU startup code to rm_platter if it's still during boot. */
104 	if (!plat_dr_enabled()) {
105 		ASSERT((size_t)real_mode_start_cpu_end -
106 		    (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
107 		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
108 		    (size_t)real_mode_start_cpu_end -
109 		    (size_t)real_mode_start_cpu);
110 	}
111 
112 	return (0);
113 }
114 
115 void
116 mach_cpucontext_fini(void)
117 {
118 	if (warm_reset_vector)
119 		psm_unmap_phys((caddr_t)warm_reset_vector,
120 		    sizeof (warm_reset_vector));
121 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
122 	    HAT_UNLOAD);
123 }
124 
125 #if defined(__amd64)
126 extern void *long_mode_64(void);
127 #endif	/* __amd64 */
128 
129 /*ARGSUSED*/
130 void
131 rmp_gdt_init(rm_platter_t *rm)
132 {
133 
134 #if defined(__amd64)
135 	/* Use the kas address space for the CPU startup thread. */
136 	if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) {
137 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
138 		    "located above 4G in physical memory (@ 0x%lx)",
139 		    mmu_ptob(kas.a_hat->hat_htable->ht_pfn));
140 	}
141 
142 	/*
143 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
144 	 * by code in real_mode_start_cpu():
145 	 *
146 	 * GDT[0]:  NULL selector
147 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
148 	 *
149 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
150 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
151 	 * a course of action as any other, though it may cause the entire
152 	 * platform to reset in some cases...
153 	 */
154 	rm->rm_temp_gdt[0] = 0;
155 	rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
156 
157 	rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
158 	rm->rm_temp_gdt_base = rm_platter_pa +
159 	    (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
160 	rm->rm_temp_idt_lim = 0;
161 	rm->rm_temp_idt_base = 0;
162 
163 	/*
164 	 * Since the CPU needs to jump to protected mode using an identity
165 	 * mapped address, we need to calculate it here.
166 	 */
167 	rm->rm_longmode64_addr = rm_platter_pa +
168 	    (uint32_t)((uintptr_t)long_mode_64 -
169 	    (uintptr_t)real_mode_start_cpu);
170 #endif	/* __amd64 */
171 }
172 
173 static void *
174 mach_cpucontext_alloc_tables(struct cpu *cp)
175 {
176 	tss_t *ntss;
177 	struct cpu_tables *ct;
178 	size_t ctsize;
179 
180 	/*
181 	 * Allocate space for stack, tss, gdt and idt. We round the size
182 	 * allotted for cpu_tables up, so that the TSS is on a unique page.
183 	 * This is more efficient when running in virtual machines.
184 	 */
185 	ctsize = P2ROUNDUP(sizeof (*ct), PAGESIZE);
186 	ct = kmem_zalloc(ctsize, KM_SLEEP);
187 	if ((uintptr_t)ct & PAGEOFFSET)
188 		panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
189 		    cp->cpu_id);
190 
191 	ntss = cp->cpu_tss = &ct->ct_tss;
192 
193 #if defined(__amd64)
194 	uintptr_t va;
195 	size_t len;
196 
197 	/*
198 	 * #DF (double fault).
199 	 */
200 	ntss->tss_ist1 = (uintptr_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
201 
202 	/*
203 	 * #NM (non-maskable interrupt)
204 	 */
205 	ntss->tss_ist2 = (uintptr_t)&ct->ct_stack2[sizeof (ct->ct_stack2)];
206 
207 	/*
208 	 * #MC (machine check exception / hardware error)
209 	 */
210 	ntss->tss_ist3 = (uintptr_t)&ct->ct_stack3[sizeof (ct->ct_stack3)];
211 
212 	/*
213 	 * #DB, #BP debug interrupts and KDI/kmdb
214 	 */
215 	ntss->tss_ist4 = (uintptr_t)&cp->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
216 
217 	if (kpti_enable == 1) {
218 		/*
219 		 * #GP, #PF, #SS fault interrupts
220 		 */
221 		ntss->tss_ist5 = (uintptr_t)&cp->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
222 
223 		/*
224 		 * Used by all other interrupts
225 		 */
226 		ntss->tss_ist6 = (uint64_t)&cp->cpu_m.mcpu_kpti.kf_tr_rsp;
227 
228 		/*
229 		 * On AMD64 we need to make sure that all of the pages of the
230 		 * struct cpu_tables are punched through onto the user CPU for
231 		 * kpti.
232 		 *
233 		 * The final page will always be the TSS, so treat that
234 		 * separately.
235 		 */
236 		for (va = (uintptr_t)ct, len = ctsize - MMU_PAGESIZE;
237 		    len >= MMU_PAGESIZE;
238 		    len -= MMU_PAGESIZE, va += MMU_PAGESIZE) {
239 			/* The doublefault stack must be RW */
240 			hati_cpu_punchin(cp, va, PROT_READ | PROT_WRITE);
241 		}
242 		ASSERT3U((uintptr_t)ntss, ==, va);
243 		hati_cpu_punchin(cp, (uintptr_t)ntss, PROT_READ);
244 	}
245 
246 #elif defined(__i386)
247 
248 	ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
249 	    (uint32_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
250 
251 	ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
252 
253 	ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
254 
255 	ntss->tss_cs = KCS_SEL;
256 	ntss->tss_ds = ntss->tss_es = KDS_SEL;
257 	ntss->tss_fs = KFS_SEL;
258 	ntss->tss_gs = KGS_SEL;
259 
260 #endif	/* __i386 */
261 
262 	/*
263 	 * Set I/O bit map offset equal to size of TSS segment limit
264 	 * for no I/O permission map. This will cause all user I/O
265 	 * instructions to generate #gp fault.
266 	 */
267 	ntss->tss_bitmapbase = sizeof (*ntss);
268 
269 	/*
270 	 * Setup kernel tss.
271 	 */
272 	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
273 	    sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
274 
275 	return (ct);
276 }
277 
278 void *
279 mach_cpucontext_xalloc(struct cpu *cp, int optype)
280 {
281 	size_t len;
282 	struct cpu_tables *ct;
283 	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
284 	static int cpu_halt_code_ready;
285 
286 	if (optype == MACH_CPUCONTEXT_OP_STOP) {
287 		ASSERT(plat_dr_enabled());
288 
289 		/*
290 		 * The WARM_RESET_VECTOR has a limitation that the physical
291 		 * address written to it must be page-aligned. To work around
292 		 * this limitation, the CPU stop code has been splitted into
293 		 * two stages.
294 		 * The stage 2 code, which implements the real logic to halt
295 		 * CPUs, is copied to the rm_cpu_halt_code field in the real
296 		 * mode platter. The stage 1 code, which simply jumps to the
297 		 * stage 2 code in the rm_cpu_halt_code field, is copied to
298 		 * rm_code field in the real mode platter and it may be
299 		 * overwritten after the CPU has been stopped.
300 		 */
301 		if (!cpu_halt_code_ready) {
302 			/*
303 			 * The rm_cpu_halt_code field in the real mode platter
304 			 * is used by the CPU stop code only. So only copy the
305 			 * CPU stop stage 2 code into the rm_cpu_halt_code
306 			 * field on the first call.
307 			 */
308 			len = (size_t)real_mode_stop_cpu_stage2_end -
309 			    (size_t)real_mode_stop_cpu_stage2;
310 			ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
311 			bcopy((caddr_t)real_mode_stop_cpu_stage2,
312 			    (caddr_t)rm->rm_cpu_halt_code, len);
313 			cpu_halt_code_ready = 1;
314 		}
315 
316 		/*
317 		 * The rm_code field in the real mode platter is shared by
318 		 * the CPU start, CPU stop, CPR and fast reboot code. So copy
319 		 * the CPU stop stage 1 code into the rm_code field every time.
320 		 */
321 		len = (size_t)real_mode_stop_cpu_stage1_end -
322 		    (size_t)real_mode_stop_cpu_stage1;
323 		ASSERT(len <= RM_PLATTER_CODE_SIZE);
324 		bcopy((caddr_t)real_mode_stop_cpu_stage1,
325 		    (caddr_t)rm->rm_code, len);
326 		rm->rm_cpu_halted = 0;
327 
328 		return (cp->cpu_m.mcpu_mach_ctx_ptr);
329 	} else if (optype != MACH_CPUCONTEXT_OP_START) {
330 		return (NULL);
331 	}
332 
333 	/*
334 	 * Only need to allocate tables when starting CPU.
335 	 * Tables allocated when starting CPU will be reused when stopping CPU.
336 	 */
337 	ct = mach_cpucontext_alloc_tables(cp);
338 	if (ct == NULL) {
339 		return (NULL);
340 	}
341 
342 	/* Copy CPU startup code to rm_platter for CPU hot-add operations. */
343 	if (plat_dr_enabled()) {
344 		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
345 		    (size_t)real_mode_start_cpu_end -
346 		    (size_t)real_mode_start_cpu);
347 	}
348 
349 	/*
350 	 * Now copy all that we've set up onto the real mode platter
351 	 * for the real mode code to digest as part of starting the cpu.
352 	 */
353 	rm->rm_idt_base = cp->cpu_idt;
354 	rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
355 	rm->rm_gdt_base = cp->cpu_gdt;
356 	rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
357 
358 	/*
359 	 * CPU needs to access kernel address space after powering on.
360 	 */
361 	rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE);
362 	rm->rm_cpu = cp->cpu_id;
363 
364 	/*
365 	 * We need to mask off any bits set on our boot CPU that can't apply
366 	 * while the subject CPU is initializing.  If appropriate, they are
367 	 * enabled later on.
368 	 */
369 	rm->rm_cr4 = getcr4();
370 	rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE);
371 
372 	rmp_gdt_init(rm);
373 
374 	return (ct);
375 }
376 
377 void
378 mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
379 {
380 	struct cpu_tables *ct = arg;
381 
382 	ASSERT(&ct->ct_tss == cp->cpu_tss);
383 	if (optype == MACH_CPUCONTEXT_OP_START) {
384 		switch (err) {
385 		case 0:
386 			/*
387 			 * Save pointer for reuse when stopping CPU.
388 			 */
389 			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
390 			break;
391 		case ETIMEDOUT:
392 			/*
393 			 * The processor was poked, but failed to start before
394 			 * we gave up waiting for it.  In case it starts later,
395 			 * don't free anything.
396 			 */
397 			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
398 			break;
399 		default:
400 			/*
401 			 * Some other, passive, error occurred.
402 			 */
403 			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
404 			cp->cpu_tss = NULL;
405 			break;
406 		}
407 	} else if (optype == MACH_CPUCONTEXT_OP_STOP) {
408 		switch (err) {
409 		case 0:
410 			/*
411 			 * Free resources allocated when starting CPU.
412 			 */
413 			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
414 			cp->cpu_tss = NULL;
415 			cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
416 			break;
417 		default:
418 			/*
419 			 * Don't touch table pointer in case of failure.
420 			 */
421 			break;
422 		}
423 	} else {
424 		ASSERT(0);
425 	}
426 }
427 
428 void *
429 mach_cpucontext_alloc(struct cpu *cp)
430 {
431 	return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
432 }
433 
434 void
435 mach_cpucontext_free(struct cpu *cp, void *arg, int err)
436 {
437 	mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
438 }
439 
440 /*
441  * "Enter monitor."  Called via cross-call from stop_other_cpus().
442  */
443 int
444 mach_cpu_halt(xc_arg_t arg1, xc_arg_t arg2 __unused, xc_arg_t arg3 __unused)
445 {
446 	char *msg = (char *)arg1;
447 
448 	if (msg)
449 		prom_printf("%s\n", msg);
450 
451 	/*CONSTANTCONDITION*/
452 	while (1)
453 		;
454 	return (0);
455 }
456 
457 void
458 mach_cpu_idle(void)
459 {
460 	x86_md_clear();
461 	i86_halt();
462 }
463 
464 void
465 mach_cpu_pause(volatile char *safe)
466 {
467 	/*
468 	 * This cpu is now safe.
469 	 */
470 	*safe = PAUSE_WAIT;
471 	membar_enter(); /* make sure stores are flushed */
472 
473 	/*
474 	 * Now we wait.  When we are allowed to continue, safe
475 	 * will be set to PAUSE_IDLE.
476 	 */
477 	while (*safe != PAUSE_IDLE)
478 		SMT_PAUSE();
479 }
480 
481 /*
482  * Power on the target CPU.
483  */
484 int
485 mp_cpu_poweron(struct cpu *cp)
486 {
487 	int error;
488 	cpuset_t tempset;
489 	processorid_t cpuid;
490 
491 	ASSERT(cp != NULL);
492 	cpuid = cp->cpu_id;
493 	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
494 		return (ENOTSUP);
495 	} else if (cpuid < 0 || cpuid >= max_ncpus) {
496 		return (EINVAL);
497 	}
498 
499 	/*
500 	 * The currrent x86 implementaiton of mp_cpu_configure() and
501 	 * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
502 	 * be called once after calling mp_cpu_configure() for a specific CPU.
503 	 * It's because mp_cpu_poweron() will destroy data structure created
504 	 * by mp_cpu_configure(). So reject the request if the CPU has already
505 	 * been powered on once after calling mp_cpu_configure().
506 	 * This limitaiton only affects the p_online syscall and the DR driver
507 	 * won't be affected because the DR driver always invoke public CPU
508 	 * management interfaces in the predefined order:
509 	 * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
510 	 */
511 	if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
512 		return (ENOTSUP);
513 	}
514 
515 	/*
516 	 * Check if there's at least a Mbyte of kmem available
517 	 * before attempting to start the cpu.
518 	 */
519 	if (kmem_avail() < 1024 * 1024) {
520 		/*
521 		 * Kick off a reap in case that helps us with
522 		 * later attempts ..
523 		 */
524 		kmem_reap();
525 		return (ENOMEM);
526 	}
527 
528 	affinity_set(CPU->cpu_id);
529 
530 	/*
531 	 * Start the target CPU. No need to call mach_cpucontext_fini()
532 	 * if mach_cpucontext_init() fails.
533 	 */
534 	if ((error = mach_cpucontext_init()) == 0) {
535 		error = mp_start_cpu_common(cp, B_FALSE);
536 		mach_cpucontext_fini();
537 	}
538 	if (error != 0) {
539 		affinity_clear();
540 		return (error);
541 	}
542 
543 	/* Wait for the target cpu to reach READY state. */
544 	tempset = cpu_ready_set;
545 	while (!CPU_IN_SET(tempset, cpuid)) {
546 		delay(1);
547 		tempset = *((volatile cpuset_t *)&cpu_ready_set);
548 	}
549 
550 	/* Mark the target CPU as available for mp operation. */
551 	CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
552 
553 	/* Free the space allocated to hold the microcode file */
554 	ucode_cleanup();
555 
556 	affinity_clear();
557 
558 	return (0);
559 }
560 
561 #define	MP_CPU_DETACH_MAX_TRIES		5
562 #define	MP_CPU_DETACH_DELAY		100
563 
564 static int
565 mp_cpu_detach_driver(dev_info_t *dip)
566 {
567 	int i;
568 	int rv = EBUSY;
569 	dev_info_t *pdip;
570 
571 	pdip = ddi_get_parent(dip);
572 	ASSERT(pdip != NULL);
573 	/*
574 	 * Check if caller holds pdip busy - can cause deadlocks in
575 	 * e_ddi_branch_unconfigure(), which calls devfs_clean().
576 	 */
577 	if (DEVI_BUSY_OWNED(pdip)) {
578 		return (EDEADLOCK);
579 	}
580 
581 	for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
582 		if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
583 			rv = 0;
584 			break;
585 		}
586 		DELAY(MP_CPU_DETACH_DELAY);
587 	}
588 
589 	return (rv);
590 }
591 
592 /*
593  * Power off the target CPU.
594  * Note: cpu_lock will be released and then reacquired.
595  */
596 int
597 mp_cpu_poweroff(struct cpu *cp)
598 {
599 	int rv = 0;
600 	void *ctx;
601 	dev_info_t *dip = NULL;
602 	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
603 	extern void cpupm_start(cpu_t *);
604 	extern void cpupm_stop(cpu_t *);
605 
606 	ASSERT(cp != NULL);
607 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
608 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
609 
610 	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
611 		return (ENOTSUP);
612 	}
613 	/*
614 	 * There is no support for powering off cpu0 yet.
615 	 * There are many pieces of code which have a hard dependency on cpu0.
616 	 */
617 	if (cp->cpu_id == 0) {
618 		return (ENOTSUP);
619 	};
620 
621 	if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
622 		return (ENXIO);
623 	}
624 	ASSERT(dip != NULL);
625 	if (mp_cpu_detach_driver(dip) != 0) {
626 		rv = EBUSY;
627 		goto out_online;
628 	}
629 
630 	/* Allocate CPU context for stopping */
631 	if (mach_cpucontext_init() != 0) {
632 		rv = ENXIO;
633 		goto out_online;
634 	}
635 	ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
636 	if (ctx == NULL) {
637 		rv = ENXIO;
638 		goto out_context_fini;
639 	}
640 
641 	cpupm_stop(cp);
642 	cpu_event_fini_cpu(cp);
643 
644 	if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
645 		cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
646 		cp->cpu_m.mcpu_cmi_hdl = NULL;
647 	}
648 
649 	rv = mach_cpu_stop(cp, ctx);
650 	if (rv != 0) {
651 		goto out_enable_cmi;
652 	}
653 
654 	/* Wait until the target CPU has been halted. */
655 	while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
656 		delay(1);
657 	}
658 	rm->rm_cpu_halted = 0xffff;
659 
660 	/* CPU_READY has been cleared by mach_cpu_stop. */
661 	ASSERT((cp->cpu_flags & CPU_READY) == 0);
662 	ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
663 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
664 	CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
665 
666 	mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
667 	mach_cpucontext_fini();
668 
669 	return (0);
670 
671 out_enable_cmi:
672 	{
673 		cmi_hdl_t hdl;
674 
675 		if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
676 		    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
677 			if (is_x86_feature(x86_featureset, X86FSET_MCA))
678 				cmi_mca_init(hdl);
679 			cp->cpu_m.mcpu_cmi_hdl = hdl;
680 		}
681 	}
682 	cpu_event_init_cpu(cp);
683 	cpupm_start(cp);
684 	mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
685 
686 out_context_fini:
687 	mach_cpucontext_fini();
688 
689 out_online:
690 	(void) e_ddi_branch_configure(dip, NULL, 0);
691 
692 	if (rv != EAGAIN && rv != ETIME) {
693 		rv = ENXIO;
694 	}
695 
696 	return (rv);
697 }
698 
699 /*
700  * Return vcpu state, since this could be a virtual environment that we
701  * are unaware of, return "unknown".
702  */
703 /* ARGSUSED */
704 int
705 vcpu_on_pcpu(processorid_t cpu)
706 {
707 	return (VCPU_STATE_UNKNOWN);
708 }
709