1ae115bc7Smrj /*
2ae115bc7Smrj * CDDL HEADER START
3ae115bc7Smrj *
4ae115bc7Smrj * The contents of this file are subject to the terms of the
5ae115bc7Smrj * Common Development and Distribution License (the "License").
6ae115bc7Smrj * You may not use this file except in compliance with the License.
7ae115bc7Smrj *
8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj * See the License for the specific language governing permissions
11ae115bc7Smrj * and limitations under the License.
12ae115bc7Smrj *
13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj *
19ae115bc7Smrj * CDDL HEADER END
20ae115bc7Smrj */
21ae115bc7Smrj /*
227417cfdeSKuriakose Kuruvilla * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23ae115bc7Smrj */
24a3114836SGerry Liu /*
25a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation.
26a3114836SGerry Liu * All rights reserved.
27a3114836SGerry Liu */
28f16a0f4cSRobert Mustacchi /*
29f16a0f4cSRobert Mustacchi * Copyright 2011 Joyent, Inc. All rights reserved.
30f16a0f4cSRobert Mustacchi */
31ae115bc7Smrj
32ae115bc7Smrj /*
33ae115bc7Smrj * Welcome to the world of the "real mode platter".
34ae115bc7Smrj * See also startup.c, mpcore.s and apic.c for related routines.
35ae115bc7Smrj */
36ae115bc7Smrj
37ae115bc7Smrj #include <sys/types.h>
38ae115bc7Smrj #include <sys/systm.h>
39ae115bc7Smrj #include <sys/cpuvar.h>
40a3114836SGerry Liu #include <sys/cpu_module.h>
41ae115bc7Smrj #include <sys/kmem.h>
42ae115bc7Smrj #include <sys/archsystm.h>
43ae115bc7Smrj #include <sys/machsystm.h>
44ae115bc7Smrj #include <sys/controlregs.h>
45ae115bc7Smrj #include <sys/x86_archext.h>
46ae115bc7Smrj #include <sys/smp_impldefs.h>
47ae115bc7Smrj #include <sys/sysmacros.h>
48ae115bc7Smrj #include <sys/mach_mmu.h>
49ae115bc7Smrj #include <sys/promif.h>
50ae115bc7Smrj #include <sys/cpu.h>
51a3114836SGerry Liu #include <sys/cpu_event.h>
52a3114836SGerry Liu #include <sys/sunndi.h>
53a3114836SGerry Liu #include <sys/fs/dv_node.h>
5495c0a3c8Sjosephb #include <vm/hat_i86.h>
55a3114836SGerry Liu #include <vm/as.h>
56ae115bc7Smrj
57a3114836SGerry Liu extern cpuset_t cpu_ready_set;
58a3114836SGerry Liu
59a3114836SGerry Liu extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot);
60a3114836SGerry Liu extern void real_mode_start_cpu(void);
61a3114836SGerry Liu extern void real_mode_start_cpu_end(void);
62a3114836SGerry Liu extern void real_mode_stop_cpu_stage1(void);
63a3114836SGerry Liu extern void real_mode_stop_cpu_stage1_end(void);
64a3114836SGerry Liu extern void real_mode_stop_cpu_stage2(void);
65a3114836SGerry Liu extern void real_mode_stop_cpu_stage2_end(void);
662df1fe9cSrandyf
672df1fe9cSrandyf void rmp_gdt_init(rm_platter_t *);
68ae115bc7Smrj
69ae115bc7Smrj /*
70ae115bc7Smrj * Fill up the real mode platter to make it easy for real mode code to
71ae115bc7Smrj * kick it off. This area should really be one passed by boot to kernel
72ae115bc7Smrj * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
73ae115bc7Smrj * have identical physical and virtual address in paged mode.
74ae115bc7Smrj */
75ae115bc7Smrj static ushort_t *warm_reset_vector = NULL;
76ae115bc7Smrj
77ae115bc7Smrj int
mach_cpucontext_init(void)78ae115bc7Smrj mach_cpucontext_init(void)
79ae115bc7Smrj {
80ae115bc7Smrj ushort_t *vec;
81a3114836SGerry Liu ulong_t addr;
82a3114836SGerry Liu struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
83ae115bc7Smrj
84ae115bc7Smrj if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
85ae115bc7Smrj sizeof (vec), PROT_READ | PROT_WRITE)))
86ae115bc7Smrj return (-1);
87a3114836SGerry Liu
88ae115bc7Smrj /*
89ae115bc7Smrj * setup secondary cpu bios boot up vector
90a3114836SGerry Liu * Write page offset to 0x467 and page frame number to 0x469.
91ae115bc7Smrj */
92a3114836SGerry Liu addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
93a3114836SGerry Liu vec[0] = (ushort_t)(addr & PAGEOFFSET);
94a3114836SGerry Liu vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
95ae115bc7Smrj warm_reset_vector = vec;
96ae115bc7Smrj
97a3114836SGerry Liu /* Map real mode platter into kas so kernel can access it. */
98a3114836SGerry Liu hat_devload(kas.a_hat,
99a3114836SGerry Liu (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
100a3114836SGerry Liu btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
101a3114836SGerry Liu HAT_LOAD_NOCONSIST);
102a3114836SGerry Liu
103a3114836SGerry Liu /* Copy CPU startup code to rm_platter if it's still during boot. */
104a3114836SGerry Liu if (!plat_dr_enabled()) {
105a3114836SGerry Liu ASSERT((size_t)real_mode_start_cpu_end -
106a3114836SGerry Liu (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
107a3114836SGerry Liu bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
108a3114836SGerry Liu (size_t)real_mode_start_cpu_end -
109a3114836SGerry Liu (size_t)real_mode_start_cpu);
110a3114836SGerry Liu }
111ae115bc7Smrj
112ae115bc7Smrj return (0);
113ae115bc7Smrj }
114ae115bc7Smrj
115ae115bc7Smrj void
mach_cpucontext_fini(void)116ae115bc7Smrj mach_cpucontext_fini(void)
117ae115bc7Smrj {
118ae115bc7Smrj if (warm_reset_vector)
119ae115bc7Smrj psm_unmap_phys((caddr_t)warm_reset_vector,
120ae115bc7Smrj sizeof (warm_reset_vector));
121ae115bc7Smrj hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
122ae115bc7Smrj HAT_UNLOAD);
123ae115bc7Smrj }
124ae115bc7Smrj
125ae115bc7Smrj #if defined(__amd64)
126ae115bc7Smrj extern void *long_mode_64(void);
127ae115bc7Smrj #endif /* __amd64 */
128ae115bc7Smrj
129a3114836SGerry Liu /*ARGSUSED*/
130a3114836SGerry Liu void
rmp_gdt_init(rm_platter_t * rm)131a3114836SGerry Liu rmp_gdt_init(rm_platter_t *rm)
132ae115bc7Smrj {
133a3114836SGerry Liu
134a3114836SGerry Liu #if defined(__amd64)
135a3114836SGerry Liu /* Use the kas address space for the CPU startup thread. */
136a3114836SGerry Liu if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
137a3114836SGerry Liu panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
138a3114836SGerry Liu "located above 4G in physical memory (@ 0x%lx)",
139a3114836SGerry Liu MAKECR3(kas.a_hat->hat_htable->ht_pfn));
140a3114836SGerry Liu
141a3114836SGerry Liu /*
142a3114836SGerry Liu * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
143a3114836SGerry Liu * by code in real_mode_start_cpu():
144a3114836SGerry Liu *
145a3114836SGerry Liu * GDT[0]: NULL selector
146a3114836SGerry Liu * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
147a3114836SGerry Liu *
148a3114836SGerry Liu * Clear the IDT as interrupts will be off and a limit of 0 will cause
149a3114836SGerry Liu * the CPU to triple fault and reset on an NMI, seemingly as reasonable
150a3114836SGerry Liu * a course of action as any other, though it may cause the entire
151a3114836SGerry Liu * platform to reset in some cases...
152a3114836SGerry Liu */
153a3114836SGerry Liu rm->rm_temp_gdt[0] = 0;
154a3114836SGerry Liu rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
155a3114836SGerry Liu
156a3114836SGerry Liu rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
157a3114836SGerry Liu rm->rm_temp_gdt_base = rm_platter_pa +
158a3114836SGerry Liu (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
159a3114836SGerry Liu rm->rm_temp_idt_lim = 0;
160a3114836SGerry Liu rm->rm_temp_idt_base = 0;
161a3114836SGerry Liu
162a3114836SGerry Liu /*
163a3114836SGerry Liu * Since the CPU needs to jump to protected mode using an identity
164a3114836SGerry Liu * mapped address, we need to calculate it here.
165a3114836SGerry Liu */
166a3114836SGerry Liu rm->rm_longmode64_addr = rm_platter_pa +
167*c909a41bSRichard Lowe (uint32_t)((uintptr_t)long_mode_64 -
168*c909a41bSRichard Lowe (uintptr_t)real_mode_start_cpu);
169a3114836SGerry Liu #endif /* __amd64 */
170a3114836SGerry Liu }
171a3114836SGerry Liu
172a3114836SGerry Liu static void *
mach_cpucontext_alloc_tables(struct cpu * cp)173a3114836SGerry Liu mach_cpucontext_alloc_tables(struct cpu *cp)
174a3114836SGerry Liu {
175f16a0f4cSRobert Mustacchi tss_t *ntss;
176a3114836SGerry Liu struct cpu_tables *ct;
177ae115bc7Smrj
178ae115bc7Smrj /*
1790cfdb603Sjosephb * Allocate space for stack, tss, gdt and idt. We round the size
180fb2caebeSRandy Fishel * allotted for cpu_tables up, so that the TSS is on a unique page.
1810cfdb603Sjosephb * This is more efficient when running in virtual machines.
182ae115bc7Smrj */
1830cfdb603Sjosephb ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
1840cfdb603Sjosephb if ((uintptr_t)ct & PAGEOFFSET)
185a3114836SGerry Liu panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
186a3114836SGerry Liu cp->cpu_id);
187ae115bc7Smrj
188ae115bc7Smrj ntss = cp->cpu_tss = &ct->ct_tss;
189ae115bc7Smrj
190ae115bc7Smrj #if defined(__amd64)
191ae115bc7Smrj
192ae115bc7Smrj /*
193ae115bc7Smrj * #DF (double fault).
194ae115bc7Smrj */
195ae115bc7Smrj ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)];
196ae115bc7Smrj
197ae115bc7Smrj #elif defined(__i386)
198ae115bc7Smrj
199ae115bc7Smrj ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
200ae115bc7Smrj (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)];
201ae115bc7Smrj
202ae115bc7Smrj ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
203ae115bc7Smrj
204ae115bc7Smrj ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
205ae115bc7Smrj
206ae115bc7Smrj ntss->tss_cs = KCS_SEL;
207ae115bc7Smrj ntss->tss_ds = ntss->tss_es = KDS_SEL;
208ae115bc7Smrj ntss->tss_fs = KFS_SEL;
209ae115bc7Smrj ntss->tss_gs = KGS_SEL;
210ae115bc7Smrj
211ae115bc7Smrj #endif /* __i386 */
212ae115bc7Smrj
213ae115bc7Smrj /*
214ae115bc7Smrj * Set I/O bit map offset equal to size of TSS segment limit
215ae115bc7Smrj * for no I/O permission map. This will cause all user I/O
216ae115bc7Smrj * instructions to generate #gp fault.
217ae115bc7Smrj */
218ae115bc7Smrj ntss->tss_bitmapbase = sizeof (*ntss);
219ae115bc7Smrj
220ae115bc7Smrj /*
221ae115bc7Smrj * Setup kernel tss.
222ae115bc7Smrj */
223ae115bc7Smrj set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
224ae115bc7Smrj sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
225ae115bc7Smrj
226a3114836SGerry Liu return (ct);
227a3114836SGerry Liu }
228a3114836SGerry Liu
229a3114836SGerry Liu void *
mach_cpucontext_xalloc(struct cpu * cp,int optype)230a3114836SGerry Liu mach_cpucontext_xalloc(struct cpu *cp, int optype)
231a3114836SGerry Liu {
232a3114836SGerry Liu size_t len;
233a3114836SGerry Liu struct cpu_tables *ct;
234a3114836SGerry Liu rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
235a3114836SGerry Liu static int cpu_halt_code_ready;
236a3114836SGerry Liu
237a3114836SGerry Liu if (optype == MACH_CPUCONTEXT_OP_STOP) {
238a3114836SGerry Liu ASSERT(plat_dr_enabled());
239a3114836SGerry Liu
240a3114836SGerry Liu /*
241a3114836SGerry Liu * The WARM_RESET_VECTOR has a limitation that the physical
242a3114836SGerry Liu * address written to it must be page-aligned. To work around
243a3114836SGerry Liu * this limitation, the CPU stop code has been splitted into
244a3114836SGerry Liu * two stages.
245a3114836SGerry Liu * The stage 2 code, which implements the real logic to halt
246a3114836SGerry Liu * CPUs, is copied to the rm_cpu_halt_code field in the real
247a3114836SGerry Liu * mode platter. The stage 1 code, which simply jumps to the
248a3114836SGerry Liu * stage 2 code in the rm_cpu_halt_code field, is copied to
249a3114836SGerry Liu * rm_code field in the real mode platter and it may be
250a3114836SGerry Liu * overwritten after the CPU has been stopped.
251a3114836SGerry Liu */
252a3114836SGerry Liu if (!cpu_halt_code_ready) {
253a3114836SGerry Liu /*
254a3114836SGerry Liu * The rm_cpu_halt_code field in the real mode platter
255a3114836SGerry Liu * is used by the CPU stop code only. So only copy the
256a3114836SGerry Liu * CPU stop stage 2 code into the rm_cpu_halt_code
257a3114836SGerry Liu * field on the first call.
258a3114836SGerry Liu */
259a3114836SGerry Liu len = (size_t)real_mode_stop_cpu_stage2_end -
260a3114836SGerry Liu (size_t)real_mode_stop_cpu_stage2;
261a3114836SGerry Liu ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
262a3114836SGerry Liu bcopy((caddr_t)real_mode_stop_cpu_stage2,
263a3114836SGerry Liu (caddr_t)rm->rm_cpu_halt_code, len);
264a3114836SGerry Liu cpu_halt_code_ready = 1;
265a3114836SGerry Liu }
266a3114836SGerry Liu
267a3114836SGerry Liu /*
268a3114836SGerry Liu * The rm_code field in the real mode platter is shared by
269a3114836SGerry Liu * the CPU start, CPU stop, CPR and fast reboot code. So copy
270a3114836SGerry Liu * the CPU stop stage 1 code into the rm_code field every time.
271a3114836SGerry Liu */
272a3114836SGerry Liu len = (size_t)real_mode_stop_cpu_stage1_end -
273a3114836SGerry Liu (size_t)real_mode_stop_cpu_stage1;
274a3114836SGerry Liu ASSERT(len <= RM_PLATTER_CODE_SIZE);
275a3114836SGerry Liu bcopy((caddr_t)real_mode_stop_cpu_stage1,
276a3114836SGerry Liu (caddr_t)rm->rm_code, len);
277a3114836SGerry Liu rm->rm_cpu_halted = 0;
278a3114836SGerry Liu
279a3114836SGerry Liu return (cp->cpu_m.mcpu_mach_ctx_ptr);
280a3114836SGerry Liu } else if (optype != MACH_CPUCONTEXT_OP_START) {
281a3114836SGerry Liu return (NULL);
282a3114836SGerry Liu }
283a3114836SGerry Liu
284a3114836SGerry Liu /*
285a3114836SGerry Liu * Only need to allocate tables when starting CPU.
286a3114836SGerry Liu * Tables allocated when starting CPU will be reused when stopping CPU.
287a3114836SGerry Liu */
288a3114836SGerry Liu ct = mach_cpucontext_alloc_tables(cp);
289a3114836SGerry Liu if (ct == NULL) {
290a3114836SGerry Liu return (NULL);
291a3114836SGerry Liu }
292a3114836SGerry Liu
293a3114836SGerry Liu /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
294a3114836SGerry Liu if (plat_dr_enabled()) {
295a3114836SGerry Liu bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
296a3114836SGerry Liu (size_t)real_mode_start_cpu_end -
297a3114836SGerry Liu (size_t)real_mode_start_cpu);
298a3114836SGerry Liu }
299a3114836SGerry Liu
300ae115bc7Smrj /*
301ae115bc7Smrj * Now copy all that we've set up onto the real mode platter
302ae115bc7Smrj * for the real mode code to digest as part of starting the cpu.
303ae115bc7Smrj */
304ae115bc7Smrj rm->rm_idt_base = cp->cpu_idt;
3050cfdb603Sjosephb rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
306ae115bc7Smrj rm->rm_gdt_base = cp->cpu_gdt;
3070cfdb603Sjosephb rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
308ae115bc7Smrj
309a3114836SGerry Liu /*
310a3114836SGerry Liu * CPU needs to access kernel address space after powering on.
311a3114836SGerry Liu * When hot-adding CPU at runtime, directly use top level page table
312a3114836SGerry Liu * of kas other than the return value of getcr3(). getcr3() returns
313a3114836SGerry Liu * current process's top level page table, which may be different from
314a3114836SGerry Liu * the one of kas.
315a3114836SGerry Liu */
316a3114836SGerry Liu rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
317ae115bc7Smrj rm->rm_cpu = cp->cpu_id;
318a3114836SGerry Liu
319a3114836SGerry Liu /*
320a3114836SGerry Liu * For hot-adding CPU at runtime, Machine Check and Performance Counter
321a3114836SGerry Liu * should be disabled. They will be enabled on demand after CPU powers
322a3114836SGerry Liu * on successfully
323a3114836SGerry Liu */
324ae115bc7Smrj rm->rm_cr4 = getcr4();
325a3114836SGerry Liu rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
326ae115bc7Smrj
3272df1fe9cSrandyf rmp_gdt_init(rm);
3282df1fe9cSrandyf
3292df1fe9cSrandyf return (ct);
3302df1fe9cSrandyf }
3312df1fe9cSrandyf
3322df1fe9cSrandyf void
mach_cpucontext_xfree(struct cpu * cp,void * arg,int err,int optype)333a3114836SGerry Liu mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
334ae115bc7Smrj {
335ae115bc7Smrj struct cpu_tables *ct = arg;
336ae115bc7Smrj
337ae115bc7Smrj ASSERT(&ct->ct_tss == cp->cpu_tss);
338a3114836SGerry Liu if (optype == MACH_CPUCONTEXT_OP_START) {
339ae115bc7Smrj switch (err) {
340ae115bc7Smrj case 0:
341a3114836SGerry Liu /*
342a3114836SGerry Liu * Save pointer for reuse when stopping CPU.
343a3114836SGerry Liu */
344a3114836SGerry Liu cp->cpu_m.mcpu_mach_ctx_ptr = arg;
345ae115bc7Smrj break;
346ae115bc7Smrj case ETIMEDOUT:
347ae115bc7Smrj /*
348ae115bc7Smrj * The processor was poked, but failed to start before
349ae115bc7Smrj * we gave up waiting for it. In case it starts later,
350ae115bc7Smrj * don't free anything.
351ae115bc7Smrj */
352a3114836SGerry Liu cp->cpu_m.mcpu_mach_ctx_ptr = arg;
353ae115bc7Smrj break;
354ae115bc7Smrj default:
355ae115bc7Smrj /*
356ae115bc7Smrj * Some other, passive, error occurred.
357ae115bc7Smrj */
3580cfdb603Sjosephb kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
359ae115bc7Smrj cp->cpu_tss = NULL;
360ae115bc7Smrj break;
361ae115bc7Smrj }
362a3114836SGerry Liu } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
363a3114836SGerry Liu switch (err) {
364a3114836SGerry Liu case 0:
365a3114836SGerry Liu /*
366a3114836SGerry Liu * Free resources allocated when starting CPU.
367a3114836SGerry Liu */
368a3114836SGerry Liu kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
369a3114836SGerry Liu cp->cpu_tss = NULL;
370a3114836SGerry Liu cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
371a3114836SGerry Liu break;
372a3114836SGerry Liu default:
373a3114836SGerry Liu /*
374a3114836SGerry Liu * Don't touch table pointer in case of failure.
375a3114836SGerry Liu */
376a3114836SGerry Liu break;
377a3114836SGerry Liu }
378a3114836SGerry Liu } else {
379a3114836SGerry Liu ASSERT(0);
380a3114836SGerry Liu }
381a3114836SGerry Liu }
382a3114836SGerry Liu
383a3114836SGerry Liu void *
mach_cpucontext_alloc(struct cpu * cp)384a3114836SGerry Liu mach_cpucontext_alloc(struct cpu *cp)
385a3114836SGerry Liu {
386a3114836SGerry Liu return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
387a3114836SGerry Liu }
388a3114836SGerry Liu
389a3114836SGerry Liu void
mach_cpucontext_free(struct cpu * cp,void * arg,int err)390a3114836SGerry Liu mach_cpucontext_free(struct cpu *cp, void *arg, int err)
391a3114836SGerry Liu {
392a3114836SGerry Liu mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
393ae115bc7Smrj }
394ae115bc7Smrj
395ae115bc7Smrj /*
396ae115bc7Smrj * "Enter monitor." Called via cross-call from stop_other_cpus().
397ae115bc7Smrj */
398ae115bc7Smrj void
mach_cpu_halt(char * msg)399ae115bc7Smrj mach_cpu_halt(char *msg)
400ae115bc7Smrj {
401ae115bc7Smrj if (msg)
402ae115bc7Smrj prom_printf("%s\n", msg);
403ae115bc7Smrj
404ae115bc7Smrj /*CONSTANTCONDITION*/
405ae115bc7Smrj while (1)
406ae115bc7Smrj ;
407ae115bc7Smrj }
408ae115bc7Smrj
409ae115bc7Smrj void
mach_cpu_idle(void)410ae115bc7Smrj mach_cpu_idle(void)
411ae115bc7Smrj {
412ae115bc7Smrj i86_halt();
413ae115bc7Smrj }
414ae115bc7Smrj
415ae115bc7Smrj void
mach_cpu_pause(volatile char * safe)416ae115bc7Smrj mach_cpu_pause(volatile char *safe)
417ae115bc7Smrj {
418ae115bc7Smrj /*
419ae115bc7Smrj * This cpu is now safe.
420ae115bc7Smrj */
421ae115bc7Smrj *safe = PAUSE_WAIT;
422ae115bc7Smrj membar_enter(); /* make sure stores are flushed */
423ae115bc7Smrj
424ae115bc7Smrj /*
425ae115bc7Smrj * Now we wait. When we are allowed to continue, safe
426ae115bc7Smrj * will be set to PAUSE_IDLE.
427ae115bc7Smrj */
428ae115bc7Smrj while (*safe != PAUSE_IDLE)
429ae115bc7Smrj SMT_PAUSE();
430ae115bc7Smrj }
431ae115bc7Smrj
432ae115bc7Smrj /*
433a3114836SGerry Liu * Power on the target CPU.
434ae115bc7Smrj */
435ae115bc7Smrj int
mp_cpu_poweron(struct cpu * cp)436ae115bc7Smrj mp_cpu_poweron(struct cpu *cp)
437ae115bc7Smrj {
438a3114836SGerry Liu int error;
439a3114836SGerry Liu cpuset_t tempset;
440a3114836SGerry Liu processorid_t cpuid;
441a3114836SGerry Liu
442a3114836SGerry Liu ASSERT(cp != NULL);
443a3114836SGerry Liu cpuid = cp->cpu_id;
444a3114836SGerry Liu if (use_mp == 0 || plat_dr_support_cpu() == 0) {
445a3114836SGerry Liu return (ENOTSUP);
446a3114836SGerry Liu } else if (cpuid < 0 || cpuid >= max_ncpus) {
447a3114836SGerry Liu return (EINVAL);
448ae115bc7Smrj }
449ae115bc7Smrj
450ae115bc7Smrj /*
451a3114836SGerry Liu * The currrent x86 implementaiton of mp_cpu_configure() and
452a3114836SGerry Liu * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
453a3114836SGerry Liu * be called once after calling mp_cpu_configure() for a specific CPU.
454a3114836SGerry Liu * It's because mp_cpu_poweron() will destroy data structure created
455a3114836SGerry Liu * by mp_cpu_configure(). So reject the request if the CPU has already
456a3114836SGerry Liu * been powered on once after calling mp_cpu_configure().
457a3114836SGerry Liu * This limitaiton only affects the p_online syscall and the DR driver
458a3114836SGerry Liu * won't be affected because the DR driver always invoke public CPU
459a3114836SGerry Liu * management interfaces in the predefined order:
460a3114836SGerry Liu * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
461ae115bc7Smrj */
462a3114836SGerry Liu if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
463a3114836SGerry Liu return (ENOTSUP);
464a3114836SGerry Liu }
465a3114836SGerry Liu
466a3114836SGerry Liu /*
467a3114836SGerry Liu * Check if there's at least a Mbyte of kmem available
468a3114836SGerry Liu * before attempting to start the cpu.
469a3114836SGerry Liu */
470a3114836SGerry Liu if (kmem_avail() < 1024 * 1024) {
471a3114836SGerry Liu /*
472a3114836SGerry Liu * Kick off a reap in case that helps us with
473a3114836SGerry Liu * later attempts ..
474a3114836SGerry Liu */
475a3114836SGerry Liu kmem_reap();
476a3114836SGerry Liu return (ENOMEM);
477a3114836SGerry Liu }
478a3114836SGerry Liu
479a3114836SGerry Liu affinity_set(CPU->cpu_id);
480a3114836SGerry Liu
481a3114836SGerry Liu /*
482a3114836SGerry Liu * Start the target CPU. No need to call mach_cpucontext_fini()
483a3114836SGerry Liu * if mach_cpucontext_init() fails.
484a3114836SGerry Liu */
485a3114836SGerry Liu if ((error = mach_cpucontext_init()) == 0) {
486a3114836SGerry Liu error = mp_start_cpu_common(cp, B_FALSE);
487a3114836SGerry Liu mach_cpucontext_fini();
488a3114836SGerry Liu }
489a3114836SGerry Liu if (error != 0) {
490a3114836SGerry Liu affinity_clear();
491a3114836SGerry Liu return (error);
492a3114836SGerry Liu }
493a3114836SGerry Liu
494a3114836SGerry Liu /* Wait for the target cpu to reach READY state. */
495a3114836SGerry Liu tempset = cpu_ready_set;
496a3114836SGerry Liu while (!CPU_IN_SET(tempset, cpuid)) {
497a3114836SGerry Liu delay(1);
498a3114836SGerry Liu tempset = *((volatile cpuset_t *)&cpu_ready_set);
499a3114836SGerry Liu }
500a3114836SGerry Liu
501a3114836SGerry Liu /* Mark the target CPU as available for mp operation. */
502a3114836SGerry Liu CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
503a3114836SGerry Liu
504a3114836SGerry Liu /* Free the space allocated to hold the microcode file */
505a3114836SGerry Liu ucode_cleanup();
506a3114836SGerry Liu
507a3114836SGerry Liu affinity_clear();
508a3114836SGerry Liu
509a3114836SGerry Liu return (0);
510a3114836SGerry Liu }
511a3114836SGerry Liu
512a3114836SGerry Liu #define MP_CPU_DETACH_MAX_TRIES 5
513a3114836SGerry Liu #define MP_CPU_DETACH_DELAY 100
514a3114836SGerry Liu
515a3114836SGerry Liu static int
mp_cpu_detach_driver(dev_info_t * dip)516a3114836SGerry Liu mp_cpu_detach_driver(dev_info_t *dip)
517a3114836SGerry Liu {
518a3114836SGerry Liu int i;
519a3114836SGerry Liu int rv = EBUSY;
520a3114836SGerry Liu dev_info_t *pdip;
521a3114836SGerry Liu
522a3114836SGerry Liu pdip = ddi_get_parent(dip);
523a3114836SGerry Liu ASSERT(pdip != NULL);
524a3114836SGerry Liu /*
525a3114836SGerry Liu * Check if caller holds pdip busy - can cause deadlocks in
526a3114836SGerry Liu * e_ddi_branch_unconfigure(), which calls devfs_clean().
527a3114836SGerry Liu */
528a3114836SGerry Liu if (DEVI_BUSY_OWNED(pdip)) {
529a3114836SGerry Liu return (EDEADLOCK);
530a3114836SGerry Liu }
531a3114836SGerry Liu
532a3114836SGerry Liu for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
533a3114836SGerry Liu if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
534a3114836SGerry Liu rv = 0;
535a3114836SGerry Liu break;
536a3114836SGerry Liu }
537a3114836SGerry Liu DELAY(MP_CPU_DETACH_DELAY);
538a3114836SGerry Liu }
539a3114836SGerry Liu
540a3114836SGerry Liu return (rv);
541a3114836SGerry Liu }
542a3114836SGerry Liu
543a3114836SGerry Liu /*
544a3114836SGerry Liu * Power off the target CPU.
545a3114836SGerry Liu * Note: cpu_lock will be released and then reacquired.
546a3114836SGerry Liu */
547ae115bc7Smrj int
mp_cpu_poweroff(struct cpu * cp)548ae115bc7Smrj mp_cpu_poweroff(struct cpu *cp)
549ae115bc7Smrj {
550a3114836SGerry Liu int rv = 0;
551a3114836SGerry Liu void *ctx;
552a3114836SGerry Liu dev_info_t *dip = NULL;
553a3114836SGerry Liu rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
554a3114836SGerry Liu extern void cpupm_start(cpu_t *);
555a3114836SGerry Liu extern void cpupm_stop(cpu_t *);
556a3114836SGerry Liu
557a3114836SGerry Liu ASSERT(cp != NULL);
558a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
559a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
560a3114836SGerry Liu
561a3114836SGerry Liu if (use_mp == 0 || plat_dr_support_cpu() == 0) {
562a3114836SGerry Liu return (ENOTSUP);
563a3114836SGerry Liu }
564a3114836SGerry Liu /*
565a3114836SGerry Liu * There is no support for powering off cpu0 yet.
566a3114836SGerry Liu * There are many pieces of code which have a hard dependency on cpu0.
567a3114836SGerry Liu */
568a3114836SGerry Liu if (cp->cpu_id == 0) {
569a3114836SGerry Liu return (ENOTSUP);
570a3114836SGerry Liu };
571a3114836SGerry Liu
572a3114836SGerry Liu if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
573a3114836SGerry Liu return (ENXIO);
574a3114836SGerry Liu }
575a3114836SGerry Liu ASSERT(dip != NULL);
576a3114836SGerry Liu if (mp_cpu_detach_driver(dip) != 0) {
577a3114836SGerry Liu rv = EBUSY;
578a3114836SGerry Liu goto out_online;
579a3114836SGerry Liu }
580a3114836SGerry Liu
581a3114836SGerry Liu /* Allocate CPU context for stopping */
582a3114836SGerry Liu if (mach_cpucontext_init() != 0) {
583a3114836SGerry Liu rv = ENXIO;
584a3114836SGerry Liu goto out_online;
585a3114836SGerry Liu }
586a3114836SGerry Liu ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
587a3114836SGerry Liu if (ctx == NULL) {
588a3114836SGerry Liu rv = ENXIO;
589a3114836SGerry Liu goto out_context_fini;
590a3114836SGerry Liu }
591a3114836SGerry Liu
592a3114836SGerry Liu cpupm_stop(cp);
593a3114836SGerry Liu cpu_event_fini_cpu(cp);
594a3114836SGerry Liu
595a3114836SGerry Liu if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
596a3114836SGerry Liu cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
597a3114836SGerry Liu cp->cpu_m.mcpu_cmi_hdl = NULL;
598a3114836SGerry Liu }
599a3114836SGerry Liu
600a3114836SGerry Liu rv = mach_cpu_stop(cp, ctx);
601a3114836SGerry Liu if (rv != 0) {
602a3114836SGerry Liu goto out_enable_cmi;
603a3114836SGerry Liu }
604a3114836SGerry Liu
605a3114836SGerry Liu /* Wait until the target CPU has been halted. */
606a3114836SGerry Liu while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
607a3114836SGerry Liu delay(1);
608a3114836SGerry Liu }
609a3114836SGerry Liu rm->rm_cpu_halted = 0xffff;
610a3114836SGerry Liu
611a3114836SGerry Liu /* CPU_READY has been cleared by mach_cpu_stop. */
612a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_READY) == 0);
613a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
614a3114836SGerry Liu cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
615a3114836SGerry Liu CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
616a3114836SGerry Liu
617a3114836SGerry Liu mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
618a3114836SGerry Liu mach_cpucontext_fini();
619a3114836SGerry Liu
620a3114836SGerry Liu return (0);
621a3114836SGerry Liu
622a3114836SGerry Liu out_enable_cmi:
623a3114836SGerry Liu {
624a3114836SGerry Liu cmi_hdl_t hdl;
625a3114836SGerry Liu
626a3114836SGerry Liu if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
627a3114836SGerry Liu cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
6287417cfdeSKuriakose Kuruvilla if (is_x86_feature(x86_featureset, X86FSET_MCA))
629a3114836SGerry Liu cmi_mca_init(hdl);
630a3114836SGerry Liu cp->cpu_m.mcpu_cmi_hdl = hdl;
631a3114836SGerry Liu }
632a3114836SGerry Liu }
633a3114836SGerry Liu cpu_event_init_cpu(cp);
634a3114836SGerry Liu cpupm_start(cp);
635a3114836SGerry Liu mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
636a3114836SGerry Liu
637a3114836SGerry Liu out_context_fini:
638a3114836SGerry Liu mach_cpucontext_fini();
639a3114836SGerry Liu
640a3114836SGerry Liu out_online:
641a3114836SGerry Liu (void) e_ddi_branch_configure(dip, NULL, 0);
642a3114836SGerry Liu
643a3114836SGerry Liu if (rv != EAGAIN && rv != ETIME) {
644a3114836SGerry Liu rv = ENXIO;
645a3114836SGerry Liu }
646a3114836SGerry Liu
647a3114836SGerry Liu return (rv);
648ae115bc7Smrj }
649b9bc7f78Ssmaybe
650b9bc7f78Ssmaybe /*
651b9bc7f78Ssmaybe * Return vcpu state, since this could be a virtual environment that we
652b9bc7f78Ssmaybe * are unaware of, return "unknown".
653b9bc7f78Ssmaybe */
654b9bc7f78Ssmaybe /* ARGSUSED */
655b9bc7f78Ssmaybe int
vcpu_on_pcpu(processorid_t cpu)656b9bc7f78Ssmaybe vcpu_on_pcpu(processorid_t cpu)
657b9bc7f78Ssmaybe {
658b9bc7f78Ssmaybe return (VCPU_STATE_UNKNOWN);
659b9bc7f78Ssmaybe }
660