1ae115bc7Smrj /* 2ae115bc7Smrj * CDDL HEADER START 3ae115bc7Smrj * 4ae115bc7Smrj * The contents of this file are subject to the terms of the 5ae115bc7Smrj * Common Development and Distribution License (the "License"). 6ae115bc7Smrj * You may not use this file except in compliance with the License. 7ae115bc7Smrj * 8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing. 10ae115bc7Smrj * See the License for the specific language governing permissions 11ae115bc7Smrj * and limitations under the License. 12ae115bc7Smrj * 13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each 14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the 16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying 17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner] 18ae115bc7Smrj * 19ae115bc7Smrj * CDDL HEADER END 20ae115bc7Smrj */ 21ae115bc7Smrj /* 227417cfdeSKuriakose Kuruvilla * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 23ae115bc7Smrj */ 24a3114836SGerry Liu /* 25a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation. 26a3114836SGerry Liu * All rights reserved. 27a3114836SGerry Liu */ 28f16a0f4cSRobert Mustacchi /* 29f16a0f4cSRobert Mustacchi * Copyright 2011 Joyent, Inc. All rights reserved. 30f16a0f4cSRobert Mustacchi */ 31ae115bc7Smrj 32ae115bc7Smrj /* 33ae115bc7Smrj * Welcome to the world of the "real mode platter". 34ae115bc7Smrj * See also startup.c, mpcore.s and apic.c for related routines. 35ae115bc7Smrj */ 36ae115bc7Smrj 37ae115bc7Smrj #include <sys/types.h> 38ae115bc7Smrj #include <sys/systm.h> 39ae115bc7Smrj #include <sys/cpuvar.h> 40a3114836SGerry Liu #include <sys/cpu_module.h> 41ae115bc7Smrj #include <sys/kmem.h> 42ae115bc7Smrj #include <sys/archsystm.h> 43ae115bc7Smrj #include <sys/machsystm.h> 44ae115bc7Smrj #include <sys/controlregs.h> 45ae115bc7Smrj #include <sys/x86_archext.h> 46ae115bc7Smrj #include <sys/smp_impldefs.h> 47ae115bc7Smrj #include <sys/sysmacros.h> 48ae115bc7Smrj #include <sys/mach_mmu.h> 49ae115bc7Smrj #include <sys/promif.h> 50ae115bc7Smrj #include <sys/cpu.h> 51a3114836SGerry Liu #include <sys/cpu_event.h> 52a3114836SGerry Liu #include <sys/sunndi.h> 53a3114836SGerry Liu #include <sys/fs/dv_node.h> 5495c0a3c8Sjosephb #include <vm/hat_i86.h> 55a3114836SGerry Liu #include <vm/as.h> 56ae115bc7Smrj 57a3114836SGerry Liu extern cpuset_t cpu_ready_set; 58a3114836SGerry Liu 59a3114836SGerry Liu extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot); 60a3114836SGerry Liu extern void real_mode_start_cpu(void); 61a3114836SGerry Liu extern void real_mode_start_cpu_end(void); 62a3114836SGerry Liu extern void real_mode_stop_cpu_stage1(void); 63a3114836SGerry Liu extern void real_mode_stop_cpu_stage1_end(void); 64a3114836SGerry Liu extern void real_mode_stop_cpu_stage2(void); 65a3114836SGerry Liu extern void real_mode_stop_cpu_stage2_end(void); 662df1fe9cSrandyf 672df1fe9cSrandyf void rmp_gdt_init(rm_platter_t *); 68ae115bc7Smrj 69ae115bc7Smrj /* 70ae115bc7Smrj * Fill up the real mode platter to make it easy for real mode code to 71ae115bc7Smrj * kick it off. This area should really be one passed by boot to kernel 72ae115bc7Smrj * and guaranteed to be below 1MB and aligned to 16 bytes. Should also 73ae115bc7Smrj * have identical physical and virtual address in paged mode. 74ae115bc7Smrj */ 75ae115bc7Smrj static ushort_t *warm_reset_vector = NULL; 76ae115bc7Smrj 77ae115bc7Smrj int 78ae115bc7Smrj mach_cpucontext_init(void) 79ae115bc7Smrj { 80ae115bc7Smrj ushort_t *vec; 81a3114836SGerry Liu ulong_t addr; 82a3114836SGerry Liu struct rm_platter *rm = (struct rm_platter *)rm_platter_va; 83ae115bc7Smrj 84ae115bc7Smrj if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR, 85ae115bc7Smrj sizeof (vec), PROT_READ | PROT_WRITE))) 86ae115bc7Smrj return (-1); 87a3114836SGerry Liu 88ae115bc7Smrj /* 89ae115bc7Smrj * setup secondary cpu bios boot up vector 90a3114836SGerry Liu * Write page offset to 0x467 and page frame number to 0x469. 91ae115bc7Smrj */ 92a3114836SGerry Liu addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa; 93a3114836SGerry Liu vec[0] = (ushort_t)(addr & PAGEOFFSET); 94a3114836SGerry Liu vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4); 95ae115bc7Smrj warm_reset_vector = vec; 96ae115bc7Smrj 97a3114836SGerry Liu /* Map real mode platter into kas so kernel can access it. */ 98a3114836SGerry Liu hat_devload(kas.a_hat, 99a3114836SGerry Liu (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, 100a3114836SGerry Liu btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC, 101a3114836SGerry Liu HAT_LOAD_NOCONSIST); 102a3114836SGerry Liu 103a3114836SGerry Liu /* Copy CPU startup code to rm_platter if it's still during boot. */ 104a3114836SGerry Liu if (!plat_dr_enabled()) { 105a3114836SGerry Liu ASSERT((size_t)real_mode_start_cpu_end - 106a3114836SGerry Liu (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE); 107a3114836SGerry Liu bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code, 108a3114836SGerry Liu (size_t)real_mode_start_cpu_end - 109a3114836SGerry Liu (size_t)real_mode_start_cpu); 110a3114836SGerry Liu } 111ae115bc7Smrj 112ae115bc7Smrj return (0); 113ae115bc7Smrj } 114ae115bc7Smrj 115ae115bc7Smrj void 116ae115bc7Smrj mach_cpucontext_fini(void) 117ae115bc7Smrj { 118ae115bc7Smrj if (warm_reset_vector) 119ae115bc7Smrj psm_unmap_phys((caddr_t)warm_reset_vector, 120ae115bc7Smrj sizeof (warm_reset_vector)); 121ae115bc7Smrj hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, 122ae115bc7Smrj HAT_UNLOAD); 123ae115bc7Smrj } 124ae115bc7Smrj 125ae115bc7Smrj #if defined(__amd64) 126ae115bc7Smrj extern void *long_mode_64(void); 127ae115bc7Smrj #endif /* __amd64 */ 128ae115bc7Smrj 129a3114836SGerry Liu /*ARGSUSED*/ 130a3114836SGerry Liu void 131a3114836SGerry Liu rmp_gdt_init(rm_platter_t *rm) 132ae115bc7Smrj { 133a3114836SGerry Liu 134a3114836SGerry Liu #if defined(__amd64) 135a3114836SGerry Liu /* Use the kas address space for the CPU startup thread. */ 136a3114836SGerry Liu if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) 137a3114836SGerry Liu panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" 138a3114836SGerry Liu "located above 4G in physical memory (@ 0x%lx)", 139a3114836SGerry Liu MAKECR3(kas.a_hat->hat_htable->ht_pfn)); 140a3114836SGerry Liu 141a3114836SGerry Liu /* 142a3114836SGerry Liu * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY 143a3114836SGerry Liu * by code in real_mode_start_cpu(): 144a3114836SGerry Liu * 145a3114836SGerry Liu * GDT[0]: NULL selector 146a3114836SGerry Liu * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1 147a3114836SGerry Liu * 148a3114836SGerry Liu * Clear the IDT as interrupts will be off and a limit of 0 will cause 149a3114836SGerry Liu * the CPU to triple fault and reset on an NMI, seemingly as reasonable 150a3114836SGerry Liu * a course of action as any other, though it may cause the entire 151a3114836SGerry Liu * platform to reset in some cases... 152a3114836SGerry Liu */ 153a3114836SGerry Liu rm->rm_temp_gdt[0] = 0; 154a3114836SGerry Liu rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL; 155a3114836SGerry Liu 156a3114836SGerry Liu rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1); 157a3114836SGerry Liu rm->rm_temp_gdt_base = rm_platter_pa + 158a3114836SGerry Liu (uint32_t)offsetof(rm_platter_t, rm_temp_gdt); 159a3114836SGerry Liu rm->rm_temp_idt_lim = 0; 160a3114836SGerry Liu rm->rm_temp_idt_base = 0; 161a3114836SGerry Liu 162a3114836SGerry Liu /* 163a3114836SGerry Liu * Since the CPU needs to jump to protected mode using an identity 164a3114836SGerry Liu * mapped address, we need to calculate it here. 165a3114836SGerry Liu */ 166a3114836SGerry Liu rm->rm_longmode64_addr = rm_platter_pa + 167*c909a41bSRichard Lowe (uint32_t)((uintptr_t)long_mode_64 - 168*c909a41bSRichard Lowe (uintptr_t)real_mode_start_cpu); 169a3114836SGerry Liu #endif /* __amd64 */ 170a3114836SGerry Liu } 171a3114836SGerry Liu 172a3114836SGerry Liu static void * 173a3114836SGerry Liu mach_cpucontext_alloc_tables(struct cpu *cp) 174a3114836SGerry Liu { 175f16a0f4cSRobert Mustacchi tss_t *ntss; 176a3114836SGerry Liu struct cpu_tables *ct; 177ae115bc7Smrj 178ae115bc7Smrj /* 1790cfdb603Sjosephb * Allocate space for stack, tss, gdt and idt. We round the size 180fb2caebeSRandy Fishel * allotted for cpu_tables up, so that the TSS is on a unique page. 1810cfdb603Sjosephb * This is more efficient when running in virtual machines. 182ae115bc7Smrj */ 1830cfdb603Sjosephb ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP); 1840cfdb603Sjosephb if ((uintptr_t)ct & PAGEOFFSET) 185a3114836SGerry Liu panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables", 186a3114836SGerry Liu cp->cpu_id); 187ae115bc7Smrj 188ae115bc7Smrj ntss = cp->cpu_tss = &ct->ct_tss; 189ae115bc7Smrj 190ae115bc7Smrj #if defined(__amd64) 191ae115bc7Smrj 192ae115bc7Smrj /* 193ae115bc7Smrj * #DF (double fault). 194ae115bc7Smrj */ 195ae115bc7Smrj ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)]; 196ae115bc7Smrj 197ae115bc7Smrj #elif defined(__i386) 198ae115bc7Smrj 199ae115bc7Smrj ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp = 200ae115bc7Smrj (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)]; 201ae115bc7Smrj 202ae115bc7Smrj ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL; 203ae115bc7Smrj 204ae115bc7Smrj ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc; 205ae115bc7Smrj 206ae115bc7Smrj ntss->tss_cs = KCS_SEL; 207ae115bc7Smrj ntss->tss_ds = ntss->tss_es = KDS_SEL; 208ae115bc7Smrj ntss->tss_fs = KFS_SEL; 209ae115bc7Smrj ntss->tss_gs = KGS_SEL; 210ae115bc7Smrj 211ae115bc7Smrj #endif /* __i386 */ 212ae115bc7Smrj 213ae115bc7Smrj /* 214ae115bc7Smrj * Set I/O bit map offset equal to size of TSS segment limit 215ae115bc7Smrj * for no I/O permission map. This will cause all user I/O 216ae115bc7Smrj * instructions to generate #gp fault. 217ae115bc7Smrj */ 218ae115bc7Smrj ntss->tss_bitmapbase = sizeof (*ntss); 219ae115bc7Smrj 220ae115bc7Smrj /* 221ae115bc7Smrj * Setup kernel tss. 222ae115bc7Smrj */ 223ae115bc7Smrj set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss, 224ae115bc7Smrj sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL); 225ae115bc7Smrj 226a3114836SGerry Liu return (ct); 227a3114836SGerry Liu } 228a3114836SGerry Liu 229a3114836SGerry Liu void * 230a3114836SGerry Liu mach_cpucontext_xalloc(struct cpu *cp, int optype) 231a3114836SGerry Liu { 232a3114836SGerry Liu size_t len; 233a3114836SGerry Liu struct cpu_tables *ct; 234a3114836SGerry Liu rm_platter_t *rm = (rm_platter_t *)rm_platter_va; 235a3114836SGerry Liu static int cpu_halt_code_ready; 236a3114836SGerry Liu 237a3114836SGerry Liu if (optype == MACH_CPUCONTEXT_OP_STOP) { 238a3114836SGerry Liu ASSERT(plat_dr_enabled()); 239a3114836SGerry Liu 240a3114836SGerry Liu /* 241a3114836SGerry Liu * The WARM_RESET_VECTOR has a limitation that the physical 242a3114836SGerry Liu * address written to it must be page-aligned. To work around 243a3114836SGerry Liu * this limitation, the CPU stop code has been splitted into 244a3114836SGerry Liu * two stages. 245a3114836SGerry Liu * The stage 2 code, which implements the real logic to halt 246a3114836SGerry Liu * CPUs, is copied to the rm_cpu_halt_code field in the real 247a3114836SGerry Liu * mode platter. The stage 1 code, which simply jumps to the 248a3114836SGerry Liu * stage 2 code in the rm_cpu_halt_code field, is copied to 249a3114836SGerry Liu * rm_code field in the real mode platter and it may be 250a3114836SGerry Liu * overwritten after the CPU has been stopped. 251a3114836SGerry Liu */ 252a3114836SGerry Liu if (!cpu_halt_code_ready) { 253a3114836SGerry Liu /* 254a3114836SGerry Liu * The rm_cpu_halt_code field in the real mode platter 255a3114836SGerry Liu * is used by the CPU stop code only. So only copy the 256a3114836SGerry Liu * CPU stop stage 2 code into the rm_cpu_halt_code 257a3114836SGerry Liu * field on the first call. 258a3114836SGerry Liu */ 259a3114836SGerry Liu len = (size_t)real_mode_stop_cpu_stage2_end - 260a3114836SGerry Liu (size_t)real_mode_stop_cpu_stage2; 261a3114836SGerry Liu ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE); 262a3114836SGerry Liu bcopy((caddr_t)real_mode_stop_cpu_stage2, 263a3114836SGerry Liu (caddr_t)rm->rm_cpu_halt_code, len); 264a3114836SGerry Liu cpu_halt_code_ready = 1; 265a3114836SGerry Liu } 266a3114836SGerry Liu 267a3114836SGerry Liu /* 268a3114836SGerry Liu * The rm_code field in the real mode platter is shared by 269a3114836SGerry Liu * the CPU start, CPU stop, CPR and fast reboot code. So copy 270a3114836SGerry Liu * the CPU stop stage 1 code into the rm_code field every time. 271a3114836SGerry Liu */ 272a3114836SGerry Liu len = (size_t)real_mode_stop_cpu_stage1_end - 273a3114836SGerry Liu (size_t)real_mode_stop_cpu_stage1; 274a3114836SGerry Liu ASSERT(len <= RM_PLATTER_CODE_SIZE); 275a3114836SGerry Liu bcopy((caddr_t)real_mode_stop_cpu_stage1, 276a3114836SGerry Liu (caddr_t)rm->rm_code, len); 277a3114836SGerry Liu rm->rm_cpu_halted = 0; 278a3114836SGerry Liu 279a3114836SGerry Liu return (cp->cpu_m.mcpu_mach_ctx_ptr); 280a3114836SGerry Liu } else if (optype != MACH_CPUCONTEXT_OP_START) { 281a3114836SGerry Liu return (NULL); 282a3114836SGerry Liu } 283a3114836SGerry Liu 284a3114836SGerry Liu /* 285a3114836SGerry Liu * Only need to allocate tables when starting CPU. 286a3114836SGerry Liu * Tables allocated when starting CPU will be reused when stopping CPU. 287a3114836SGerry Liu */ 288a3114836SGerry Liu ct = mach_cpucontext_alloc_tables(cp); 289a3114836SGerry Liu if (ct == NULL) { 290a3114836SGerry Liu return (NULL); 291a3114836SGerry Liu } 292a3114836SGerry Liu 293a3114836SGerry Liu /* Copy CPU startup code to rm_platter for CPU hot-add operations. */ 294a3114836SGerry Liu if (plat_dr_enabled()) { 295a3114836SGerry Liu bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code, 296a3114836SGerry Liu (size_t)real_mode_start_cpu_end - 297a3114836SGerry Liu (size_t)real_mode_start_cpu); 298a3114836SGerry Liu } 299a3114836SGerry Liu 300ae115bc7Smrj /* 301ae115bc7Smrj * Now copy all that we've set up onto the real mode platter 302ae115bc7Smrj * for the real mode code to digest as part of starting the cpu. 303ae115bc7Smrj */ 304ae115bc7Smrj rm->rm_idt_base = cp->cpu_idt; 3050cfdb603Sjosephb rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1; 306ae115bc7Smrj rm->rm_gdt_base = cp->cpu_gdt; 3070cfdb603Sjosephb rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1; 308ae115bc7Smrj 309a3114836SGerry Liu /* 310a3114836SGerry Liu * CPU needs to access kernel address space after powering on. 311a3114836SGerry Liu * When hot-adding CPU at runtime, directly use top level page table 312a3114836SGerry Liu * of kas other than the return value of getcr3(). getcr3() returns 313a3114836SGerry Liu * current process's top level page table, which may be different from 314a3114836SGerry Liu * the one of kas. 315a3114836SGerry Liu */ 316a3114836SGerry Liu rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn); 317ae115bc7Smrj rm->rm_cpu = cp->cpu_id; 318a3114836SGerry Liu 319a3114836SGerry Liu /* 320a3114836SGerry Liu * For hot-adding CPU at runtime, Machine Check and Performance Counter 321a3114836SGerry Liu * should be disabled. They will be enabled on demand after CPU powers 322a3114836SGerry Liu * on successfully 323a3114836SGerry Liu */ 324ae115bc7Smrj rm->rm_cr4 = getcr4(); 325a3114836SGerry Liu rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE); 326ae115bc7Smrj 3272df1fe9cSrandyf rmp_gdt_init(rm); 3282df1fe9cSrandyf 3292df1fe9cSrandyf return (ct); 3302df1fe9cSrandyf } 3312df1fe9cSrandyf 3322df1fe9cSrandyf void 333a3114836SGerry Liu mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype) 334ae115bc7Smrj { 335ae115bc7Smrj struct cpu_tables *ct = arg; 336ae115bc7Smrj 337ae115bc7Smrj ASSERT(&ct->ct_tss == cp->cpu_tss); 338a3114836SGerry Liu if (optype == MACH_CPUCONTEXT_OP_START) { 339ae115bc7Smrj switch (err) { 340ae115bc7Smrj case 0: 341a3114836SGerry Liu /* 342a3114836SGerry Liu * Save pointer for reuse when stopping CPU. 343a3114836SGerry Liu */ 344a3114836SGerry Liu cp->cpu_m.mcpu_mach_ctx_ptr = arg; 345ae115bc7Smrj break; 346ae115bc7Smrj case ETIMEDOUT: 347ae115bc7Smrj /* 348ae115bc7Smrj * The processor was poked, but failed to start before 349ae115bc7Smrj * we gave up waiting for it. In case it starts later, 350ae115bc7Smrj * don't free anything. 351ae115bc7Smrj */ 352a3114836SGerry Liu cp->cpu_m.mcpu_mach_ctx_ptr = arg; 353ae115bc7Smrj break; 354ae115bc7Smrj default: 355ae115bc7Smrj /* 356ae115bc7Smrj * Some other, passive, error occurred. 357ae115bc7Smrj */ 3580cfdb603Sjosephb kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE)); 359ae115bc7Smrj cp->cpu_tss = NULL; 360ae115bc7Smrj break; 361ae115bc7Smrj } 362a3114836SGerry Liu } else if (optype == MACH_CPUCONTEXT_OP_STOP) { 363a3114836SGerry Liu switch (err) { 364a3114836SGerry Liu case 0: 365a3114836SGerry Liu /* 366a3114836SGerry Liu * Free resources allocated when starting CPU. 367a3114836SGerry Liu */ 368a3114836SGerry Liu kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE)); 369a3114836SGerry Liu cp->cpu_tss = NULL; 370a3114836SGerry Liu cp->cpu_m.mcpu_mach_ctx_ptr = NULL; 371a3114836SGerry Liu break; 372a3114836SGerry Liu default: 373a3114836SGerry Liu /* 374a3114836SGerry Liu * Don't touch table pointer in case of failure. 375a3114836SGerry Liu */ 376a3114836SGerry Liu break; 377a3114836SGerry Liu } 378a3114836SGerry Liu } else { 379a3114836SGerry Liu ASSERT(0); 380a3114836SGerry Liu } 381a3114836SGerry Liu } 382a3114836SGerry Liu 383a3114836SGerry Liu void * 384a3114836SGerry Liu mach_cpucontext_alloc(struct cpu *cp) 385a3114836SGerry Liu { 386a3114836SGerry Liu return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START)); 387a3114836SGerry Liu } 388a3114836SGerry Liu 389a3114836SGerry Liu void 390a3114836SGerry Liu mach_cpucontext_free(struct cpu *cp, void *arg, int err) 391a3114836SGerry Liu { 392a3114836SGerry Liu mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START); 393ae115bc7Smrj } 394ae115bc7Smrj 395ae115bc7Smrj /* 396ae115bc7Smrj * "Enter monitor." Called via cross-call from stop_other_cpus(). 397ae115bc7Smrj */ 398ae115bc7Smrj void 399ae115bc7Smrj mach_cpu_halt(char *msg) 400ae115bc7Smrj { 401ae115bc7Smrj if (msg) 402ae115bc7Smrj prom_printf("%s\n", msg); 403ae115bc7Smrj 404ae115bc7Smrj /*CONSTANTCONDITION*/ 405ae115bc7Smrj while (1) 406ae115bc7Smrj ; 407ae115bc7Smrj } 408ae115bc7Smrj 409ae115bc7Smrj void 410ae115bc7Smrj mach_cpu_idle(void) 411ae115bc7Smrj { 412ae115bc7Smrj i86_halt(); 413ae115bc7Smrj } 414ae115bc7Smrj 415ae115bc7Smrj void 416ae115bc7Smrj mach_cpu_pause(volatile char *safe) 417ae115bc7Smrj { 418ae115bc7Smrj /* 419ae115bc7Smrj * This cpu is now safe. 420ae115bc7Smrj */ 421ae115bc7Smrj *safe = PAUSE_WAIT; 422ae115bc7Smrj membar_enter(); /* make sure stores are flushed */ 423ae115bc7Smrj 424ae115bc7Smrj /* 425ae115bc7Smrj * Now we wait. When we are allowed to continue, safe 426ae115bc7Smrj * will be set to PAUSE_IDLE. 427ae115bc7Smrj */ 428ae115bc7Smrj while (*safe != PAUSE_IDLE) 429ae115bc7Smrj SMT_PAUSE(); 430ae115bc7Smrj } 431ae115bc7Smrj 432ae115bc7Smrj /* 433a3114836SGerry Liu * Power on the target CPU. 434ae115bc7Smrj */ 435ae115bc7Smrj int 436ae115bc7Smrj mp_cpu_poweron(struct cpu *cp) 437ae115bc7Smrj { 438a3114836SGerry Liu int error; 439a3114836SGerry Liu cpuset_t tempset; 440a3114836SGerry Liu processorid_t cpuid; 441a3114836SGerry Liu 442a3114836SGerry Liu ASSERT(cp != NULL); 443a3114836SGerry Liu cpuid = cp->cpu_id; 444a3114836SGerry Liu if (use_mp == 0 || plat_dr_support_cpu() == 0) { 445a3114836SGerry Liu return (ENOTSUP); 446a3114836SGerry Liu } else if (cpuid < 0 || cpuid >= max_ncpus) { 447a3114836SGerry Liu return (EINVAL); 448ae115bc7Smrj } 449ae115bc7Smrj 450ae115bc7Smrj /* 451a3114836SGerry Liu * The currrent x86 implementaiton of mp_cpu_configure() and 452a3114836SGerry Liu * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only 453a3114836SGerry Liu * be called once after calling mp_cpu_configure() for a specific CPU. 454a3114836SGerry Liu * It's because mp_cpu_poweron() will destroy data structure created 455a3114836SGerry Liu * by mp_cpu_configure(). So reject the request if the CPU has already 456a3114836SGerry Liu * been powered on once after calling mp_cpu_configure(). 457a3114836SGerry Liu * This limitaiton only affects the p_online syscall and the DR driver 458a3114836SGerry Liu * won't be affected because the DR driver always invoke public CPU 459a3114836SGerry Liu * management interfaces in the predefined order: 460a3114836SGerry Liu * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure() 461ae115bc7Smrj */ 462a3114836SGerry Liu if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) { 463a3114836SGerry Liu return (ENOTSUP); 464a3114836SGerry Liu } 465a3114836SGerry Liu 466a3114836SGerry Liu /* 467a3114836SGerry Liu * Check if there's at least a Mbyte of kmem available 468a3114836SGerry Liu * before attempting to start the cpu. 469a3114836SGerry Liu */ 470a3114836SGerry Liu if (kmem_avail() < 1024 * 1024) { 471a3114836SGerry Liu /* 472a3114836SGerry Liu * Kick off a reap in case that helps us with 473a3114836SGerry Liu * later attempts .. 474a3114836SGerry Liu */ 475a3114836SGerry Liu kmem_reap(); 476a3114836SGerry Liu return (ENOMEM); 477a3114836SGerry Liu } 478a3114836SGerry Liu 479a3114836SGerry Liu affinity_set(CPU->cpu_id); 480a3114836SGerry Liu 481a3114836SGerry Liu /* 482a3114836SGerry Liu * Start the target CPU. No need to call mach_cpucontext_fini() 483a3114836SGerry Liu * if mach_cpucontext_init() fails. 484a3114836SGerry Liu */ 485a3114836SGerry Liu if ((error = mach_cpucontext_init()) == 0) { 486a3114836SGerry Liu error = mp_start_cpu_common(cp, B_FALSE); 487a3114836SGerry Liu mach_cpucontext_fini(); 488a3114836SGerry Liu } 489a3114836SGerry Liu if (error != 0) { 490a3114836SGerry Liu affinity_clear(); 491a3114836SGerry Liu return (error); 492a3114836SGerry Liu } 493a3114836SGerry Liu 494a3114836SGerry Liu /* Wait for the target cpu to reach READY state. */ 495a3114836SGerry Liu tempset = cpu_ready_set; 496a3114836SGerry Liu while (!CPU_IN_SET(tempset, cpuid)) { 497a3114836SGerry Liu delay(1); 498a3114836SGerry Liu tempset = *((volatile cpuset_t *)&cpu_ready_set); 499a3114836SGerry Liu } 500a3114836SGerry Liu 501a3114836SGerry Liu /* Mark the target CPU as available for mp operation. */ 502a3114836SGerry Liu CPUSET_ATOMIC_ADD(mp_cpus, cpuid); 503a3114836SGerry Liu 504a3114836SGerry Liu /* Free the space allocated to hold the microcode file */ 505a3114836SGerry Liu ucode_cleanup(); 506a3114836SGerry Liu 507a3114836SGerry Liu affinity_clear(); 508a3114836SGerry Liu 509a3114836SGerry Liu return (0); 510a3114836SGerry Liu } 511a3114836SGerry Liu 512a3114836SGerry Liu #define MP_CPU_DETACH_MAX_TRIES 5 513a3114836SGerry Liu #define MP_CPU_DETACH_DELAY 100 514a3114836SGerry Liu 515a3114836SGerry Liu static int 516a3114836SGerry Liu mp_cpu_detach_driver(dev_info_t *dip) 517a3114836SGerry Liu { 518a3114836SGerry Liu int i; 519a3114836SGerry Liu int rv = EBUSY; 520a3114836SGerry Liu dev_info_t *pdip; 521a3114836SGerry Liu 522a3114836SGerry Liu pdip = ddi_get_parent(dip); 523a3114836SGerry Liu ASSERT(pdip != NULL); 524a3114836SGerry Liu /* 525a3114836SGerry Liu * Check if caller holds pdip busy - can cause deadlocks in 526a3114836SGerry Liu * e_ddi_branch_unconfigure(), which calls devfs_clean(). 527a3114836SGerry Liu */ 528a3114836SGerry Liu if (DEVI_BUSY_OWNED(pdip)) { 529a3114836SGerry Liu return (EDEADLOCK); 530a3114836SGerry Liu } 531a3114836SGerry Liu 532a3114836SGerry Liu for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) { 533a3114836SGerry Liu if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) { 534a3114836SGerry Liu rv = 0; 535a3114836SGerry Liu break; 536a3114836SGerry Liu } 537a3114836SGerry Liu DELAY(MP_CPU_DETACH_DELAY); 538a3114836SGerry Liu } 539a3114836SGerry Liu 540a3114836SGerry Liu return (rv); 541a3114836SGerry Liu } 542a3114836SGerry Liu 543a3114836SGerry Liu /* 544a3114836SGerry Liu * Power off the target CPU. 545a3114836SGerry Liu * Note: cpu_lock will be released and then reacquired. 546a3114836SGerry Liu */ 547ae115bc7Smrj int 548ae115bc7Smrj mp_cpu_poweroff(struct cpu *cp) 549ae115bc7Smrj { 550a3114836SGerry Liu int rv = 0; 551a3114836SGerry Liu void *ctx; 552a3114836SGerry Liu dev_info_t *dip = NULL; 553a3114836SGerry Liu rm_platter_t *rm = (rm_platter_t *)rm_platter_va; 554a3114836SGerry Liu extern void cpupm_start(cpu_t *); 555a3114836SGerry Liu extern void cpupm_stop(cpu_t *); 556a3114836SGerry Liu 557a3114836SGerry Liu ASSERT(cp != NULL); 558a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0); 559a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0); 560a3114836SGerry Liu 561a3114836SGerry Liu if (use_mp == 0 || plat_dr_support_cpu() == 0) { 562a3114836SGerry Liu return (ENOTSUP); 563a3114836SGerry Liu } 564a3114836SGerry Liu /* 565a3114836SGerry Liu * There is no support for powering off cpu0 yet. 566a3114836SGerry Liu * There are many pieces of code which have a hard dependency on cpu0. 567a3114836SGerry Liu */ 568a3114836SGerry Liu if (cp->cpu_id == 0) { 569a3114836SGerry Liu return (ENOTSUP); 570a3114836SGerry Liu }; 571a3114836SGerry Liu 572a3114836SGerry Liu if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) { 573a3114836SGerry Liu return (ENXIO); 574a3114836SGerry Liu } 575a3114836SGerry Liu ASSERT(dip != NULL); 576a3114836SGerry Liu if (mp_cpu_detach_driver(dip) != 0) { 577a3114836SGerry Liu rv = EBUSY; 578a3114836SGerry Liu goto out_online; 579a3114836SGerry Liu } 580a3114836SGerry Liu 581a3114836SGerry Liu /* Allocate CPU context for stopping */ 582a3114836SGerry Liu if (mach_cpucontext_init() != 0) { 583a3114836SGerry Liu rv = ENXIO; 584a3114836SGerry Liu goto out_online; 585a3114836SGerry Liu } 586a3114836SGerry Liu ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP); 587a3114836SGerry Liu if (ctx == NULL) { 588a3114836SGerry Liu rv = ENXIO; 589a3114836SGerry Liu goto out_context_fini; 590a3114836SGerry Liu } 591a3114836SGerry Liu 592a3114836SGerry Liu cpupm_stop(cp); 593a3114836SGerry Liu cpu_event_fini_cpu(cp); 594a3114836SGerry Liu 595a3114836SGerry Liu if (cp->cpu_m.mcpu_cmi_hdl != NULL) { 596a3114836SGerry Liu cmi_fini(cp->cpu_m.mcpu_cmi_hdl); 597a3114836SGerry Liu cp->cpu_m.mcpu_cmi_hdl = NULL; 598a3114836SGerry Liu } 599a3114836SGerry Liu 600a3114836SGerry Liu rv = mach_cpu_stop(cp, ctx); 601a3114836SGerry Liu if (rv != 0) { 602a3114836SGerry Liu goto out_enable_cmi; 603a3114836SGerry Liu } 604a3114836SGerry Liu 605a3114836SGerry Liu /* Wait until the target CPU has been halted. */ 606a3114836SGerry Liu while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) { 607a3114836SGerry Liu delay(1); 608a3114836SGerry Liu } 609a3114836SGerry Liu rm->rm_cpu_halted = 0xffff; 610a3114836SGerry Liu 611a3114836SGerry Liu /* CPU_READY has been cleared by mach_cpu_stop. */ 612a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_READY) == 0); 613a3114836SGerry Liu ASSERT((cp->cpu_flags & CPU_RUNNING) == 0); 614a3114836SGerry Liu cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF; 615a3114836SGerry Liu CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id); 616a3114836SGerry Liu 617a3114836SGerry Liu mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP); 618a3114836SGerry Liu mach_cpucontext_fini(); 619a3114836SGerry Liu 620a3114836SGerry Liu return (0); 621a3114836SGerry Liu 622a3114836SGerry Liu out_enable_cmi: 623a3114836SGerry Liu { 624a3114836SGerry Liu cmi_hdl_t hdl; 625a3114836SGerry Liu 626a3114836SGerry Liu if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 627a3114836SGerry Liu cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) { 6287417cfdeSKuriakose Kuruvilla if (is_x86_feature(x86_featureset, X86FSET_MCA)) 629a3114836SGerry Liu cmi_mca_init(hdl); 630a3114836SGerry Liu cp->cpu_m.mcpu_cmi_hdl = hdl; 631a3114836SGerry Liu } 632a3114836SGerry Liu } 633a3114836SGerry Liu cpu_event_init_cpu(cp); 634a3114836SGerry Liu cpupm_start(cp); 635a3114836SGerry Liu mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP); 636a3114836SGerry Liu 637a3114836SGerry Liu out_context_fini: 638a3114836SGerry Liu mach_cpucontext_fini(); 639a3114836SGerry Liu 640a3114836SGerry Liu out_online: 641a3114836SGerry Liu (void) e_ddi_branch_configure(dip, NULL, 0); 642a3114836SGerry Liu 643a3114836SGerry Liu if (rv != EAGAIN && rv != ETIME) { 644a3114836SGerry Liu rv = ENXIO; 645a3114836SGerry Liu } 646a3114836SGerry Liu 647a3114836SGerry Liu return (rv); 648ae115bc7Smrj } 649b9bc7f78Ssmaybe 650b9bc7f78Ssmaybe /* 651b9bc7f78Ssmaybe * Return vcpu state, since this could be a virtual environment that we 652b9bc7f78Ssmaybe * are unaware of, return "unknown". 653b9bc7f78Ssmaybe */ 654b9bc7f78Ssmaybe /* ARGSUSED */ 655b9bc7f78Ssmaybe int 656b9bc7f78Ssmaybe vcpu_on_pcpu(processorid_t cpu) 657b9bc7f78Ssmaybe { 658b9bc7f78Ssmaybe return (VCPU_STATE_UNKNOWN); 659b9bc7f78Ssmaybe } 660