xref: /illumos-gate/usr/src/uts/i86pc/os/mp_startup.c (revision 618b6b99eb6eee4272ca949f5ac45efb4425f02c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/thread.h>
31 #include <sys/cpuvar.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/proc.h>
35 #include <sys/disp.h>
36 #include <sys/class.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/asm_linkage.h>
40 #include <sys/x_call.h>
41 #include <sys/systm.h>
42 #include <sys/var.h>
43 #include <sys/vtrace.h>
44 #include <vm/hat.h>
45 #include <vm/as.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_kp.h>
48 #include <sys/segments.h>
49 #include <sys/kmem.h>
50 #include <sys/stack.h>
51 #include <sys/smp_impldefs.h>
52 #include <sys/x86_archext.h>
53 #include <sys/machsystm.h>
54 #include <sys/traptrace.h>
55 #include <sys/clock.h>
56 #include <sys/cpc_impl.h>
57 #include <sys/pg.h>
58 #include <sys/cmt.h>
59 #include <sys/dtrace.h>
60 #include <sys/archsystm.h>
61 #include <sys/fp.h>
62 #include <sys/reboot.h>
63 #include <sys/kdi_machimpl.h>
64 #include <vm/hat_i86.h>
65 #include <sys/memnode.h>
66 #include <sys/pci_cfgspace.h>
67 #include <sys/mach_mmu.h>
68 #include <sys/sysmacros.h>
69 #include <sys/cpu_module.h>
70 
71 struct cpu	cpus[1];			/* CPU data */
72 struct cpu	*cpu[NCPU] = {&cpus[0]};	/* pointers to all CPUs */
73 cpu_core_t	cpu_core[NCPU];			/* cpu_core structures */
74 
75 /*
76  * Useful for disabling MP bring-up on a MP capable system.
77  */
78 int use_mp = 1;
79 
80 /*
81  * to be set by a PSM to indicate what cpus
82  * are sitting around on the system.
83  */
84 cpuset_t mp_cpus;
85 
86 /*
87  * This variable is used by the hat layer to decide whether or not
88  * critical sections are needed to prevent race conditions.  For sun4m,
89  * this variable is set once enough MP initialization has been done in
90  * order to allow cross calls.
91  */
92 int flushes_require_xcalls;
93 cpuset_t cpu_ready_set = 1;
94 
95 static 	void	mp_startup(void);
96 
97 static void cpu_sep_enable(void);
98 static void cpu_sep_disable(void);
99 static void cpu_asysc_enable(void);
100 static void cpu_asysc_disable(void);
101 
102 extern int tsc_gethrtime_enable;
103 
104 /*
105  * Init CPU info - get CPU type info for processor_info system call.
106  */
107 void
108 init_cpu_info(struct cpu *cp)
109 {
110 	processor_info_t *pi = &cp->cpu_type_info;
111 	char buf[CPU_IDSTRLEN];
112 
113 	/*
114 	 * Get clock-frequency property for the CPU.
115 	 */
116 	pi->pi_clock = cpu_freq;
117 
118 	/*
119 	 * Current frequency in Hz.
120 	 */
121 	cp->cpu_curr_clock = cpu_freq_hz;
122 
123 	(void) strcpy(pi->pi_processor_type, "i386");
124 	if (fpu_exists)
125 		(void) strcpy(pi->pi_fputypes, "i387 compatible");
126 
127 	(void) cpuid_getidstr(cp, buf, sizeof (buf));
128 
129 	cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
130 	(void) strcpy(cp->cpu_idstr, buf);
131 
132 	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
133 
134 	(void) cpuid_getbrandstr(cp, buf, sizeof (buf));
135 	cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
136 	(void) strcpy(cp->cpu_brandstr, buf);
137 
138 	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
139 }
140 
141 /*
142  * Configure syscall support on this CPU.
143  */
144 /*ARGSUSED*/
145 static void
146 init_cpu_syscall(struct cpu *cp)
147 {
148 	kpreempt_disable();
149 
150 #if defined(__amd64)
151 	if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) {
152 
153 #if !defined(__lint)
154 		/*
155 		 * The syscall instruction imposes a certain ordering on
156 		 * segment selectors, so we double-check that ordering
157 		 * here.
158 		 */
159 		ASSERT(KDS_SEL == KCS_SEL + 8);
160 		ASSERT(UDS_SEL == U32CS_SEL + 8);
161 		ASSERT(UCS_SEL == U32CS_SEL + 16);
162 #endif
163 		/*
164 		 * Turn syscall/sysret extensions on.
165 		 */
166 		cpu_asysc_enable();
167 
168 		/*
169 		 * Program the magic registers ..
170 		 */
171 		wrmsr(MSR_AMD_STAR,
172 		    ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32);
173 		wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall);
174 		wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32);
175 
176 		/*
177 		 * This list of flags is masked off the incoming
178 		 * %rfl when we enter the kernel.
179 		 */
180 		wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T));
181 	}
182 #endif
183 
184 	/*
185 	 * On 32-bit kernels, we use sysenter/sysexit because it's too
186 	 * hard to use syscall/sysret, and it is more portable anyway.
187 	 *
188 	 * On 64-bit kernels on Nocona machines, the 32-bit syscall
189 	 * variant isn't available to 32-bit applications, but sysenter is.
190 	 */
191 	if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) {
192 
193 #if !defined(__lint)
194 		/*
195 		 * The sysenter instruction imposes a certain ordering on
196 		 * segment selectors, so we double-check that ordering
197 		 * here. See "sysenter" in Intel document 245471-012, "IA-32
198 		 * Intel Architecture Software Developer's Manual Volume 2:
199 		 * Instruction Set Reference"
200 		 */
201 		ASSERT(KDS_SEL == KCS_SEL + 8);
202 
203 		ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
204 		ASSERT32(UDS_SEL == UCS_SEL + 8);
205 
206 		ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
207 		ASSERT64(UDS_SEL == U32CS_SEL + 8);
208 #endif
209 
210 		cpu_sep_enable();
211 
212 		/*
213 		 * resume() sets this value to the base of the threads stack
214 		 * via a context handler.
215 		 */
216 		wrmsr(MSR_INTC_SEP_ESP, 0);
217 		wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter);
218 	}
219 
220 	kpreempt_enable();
221 }
222 
223 /*
224  * Multiprocessor initialization.
225  *
226  * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
227  * startup and idle threads for the specified CPU.
228  */
229 struct cpu *
230 mp_startup_init(int cpun)
231 {
232 	struct cpu *cp;
233 	kthread_id_t tp;
234 	caddr_t	sp;
235 	proc_t *procp;
236 	extern void idle();
237 
238 #ifdef TRAPTRACE
239 	trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
240 #endif
241 
242 	ASSERT(cpun < NCPU && cpu[cpun] == NULL);
243 
244 	cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
245 	if (x86_feature & X86_MWAIT)
246 		cp->cpu_m.mcpu_mwait = mach_alloc_mwait(CPU);
247 
248 	procp = curthread->t_procp;
249 
250 	mutex_enter(&cpu_lock);
251 	/*
252 	 * Initialize the dispatcher first.
253 	 */
254 	disp_cpu_init(cp);
255 	mutex_exit(&cpu_lock);
256 
257 	cpu_vm_data_init(cp);
258 
259 	/*
260 	 * Allocate and initialize the startup thread for this CPU.
261 	 * Interrupt and process switch stacks get allocated later
262 	 * when the CPU starts running.
263 	 */
264 	tp = thread_create(NULL, 0, NULL, NULL, 0, procp,
265 	    TS_STOPPED, maxclsyspri);
266 
267 	/*
268 	 * Set state to TS_ONPROC since this thread will start running
269 	 * as soon as the CPU comes online.
270 	 *
271 	 * All the other fields of the thread structure are setup by
272 	 * thread_create().
273 	 */
274 	THREAD_ONPROC(tp, cp);
275 	tp->t_preempt = 1;
276 	tp->t_bound_cpu = cp;
277 	tp->t_affinitycnt = 1;
278 	tp->t_cpu = cp;
279 	tp->t_disp_queue = cp->cpu_disp;
280 
281 	/*
282 	 * Setup thread to start in mp_startup.
283 	 */
284 	sp = tp->t_stk;
285 	tp->t_pc = (uintptr_t)mp_startup;
286 	tp->t_sp = (uintptr_t)(sp - MINFRAME);
287 #if defined(__amd64)
288 	tp->t_sp -= STACK_ENTRY_ALIGN;		/* fake a call */
289 #endif
290 
291 	cp->cpu_id = cpun;
292 	cp->cpu_self = cp;
293 	cp->cpu_thread = tp;
294 	cp->cpu_lwp = NULL;
295 	cp->cpu_dispthread = tp;
296 	cp->cpu_dispatch_pri = DISP_PRIO(tp);
297 
298 	/*
299 	 * cpu_base_spl must be set explicitly here to prevent any blocking
300 	 * operations in mp_startup from causing the spl of the cpu to drop
301 	 * to 0 (allowing device interrupts before we're ready) in resume().
302 	 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY.
303 	 * As an extra bit of security on DEBUG kernels, this is enforced with
304 	 * an assertion in mp_startup() -- before cpu_base_spl is set to its
305 	 * proper value.
306 	 */
307 	cp->cpu_base_spl = ipltospl(LOCK_LEVEL);
308 
309 	/*
310 	 * Now, initialize per-CPU idle thread for this CPU.
311 	 */
312 	tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1);
313 
314 	cp->cpu_idle_thread = tp;
315 
316 	tp->t_preempt = 1;
317 	tp->t_bound_cpu = cp;
318 	tp->t_affinitycnt = 1;
319 	tp->t_cpu = cp;
320 	tp->t_disp_queue = cp->cpu_disp;
321 
322 	/*
323 	 * Bootstrap the CPU's PG data
324 	 */
325 	pg_cpu_bootstrap(cp);
326 
327 	/*
328 	 * Perform CPC initialization on the new CPU.
329 	 */
330 	kcpc_hw_init(cp);
331 
332 	/*
333 	 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2
334 	 * for each CPU.
335 	 */
336 	setup_vaddr_for_ppcopy(cp);
337 
338 	/*
339 	 * Allocate page for new GDT and initialize from current GDT.
340 	 */
341 #if !defined(__lint)
342 	ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE);
343 #endif
344 	cp->cpu_m.mcpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP);
345 	bcopy(CPU->cpu_m.mcpu_gdt, cp->cpu_m.mcpu_gdt,
346 	    (sizeof (*cp->cpu_m.mcpu_gdt) * NGDT));
347 
348 #if defined(__i386)
349 	/*
350 	 * setup kernel %gs.
351 	 */
352 	set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
353 	    SEL_KPL, 0, 1);
354 #endif
355 
356 	/*
357 	 * If we have more than one node, each cpu gets a copy of IDT
358 	 * local to its node. If this is a Pentium box, we use cpu 0's
359 	 * IDT. cpu 0's IDT has been made read-only to workaround the
360 	 * cmpxchgl register bug
361 	 */
362 	if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
363 		struct machcpu *mcpu = &cp->cpu_m;
364 
365 		mcpu->mcpu_idt = kmem_alloc(sizeof (idt0), KM_SLEEP);
366 		bcopy(idt0, mcpu->mcpu_idt, sizeof (idt0));
367 	} else {
368 		cp->cpu_m.mcpu_idt = CPU->cpu_m.mcpu_idt;
369 	}
370 
371 	/*
372 	 * Get interrupt priority data from cpu 0.
373 	 */
374 	cp->cpu_pri_data = CPU->cpu_pri_data;
375 
376 	/*
377 	 * alloc space for cpuid info
378 	 */
379 	cpuid_alloc_space(cp);
380 
381 	/*
382 	 * alloc space for ucode_info
383 	 */
384 	ucode_alloc_space(cp);
385 
386 	hat_cpu_online(cp);
387 
388 #ifdef TRAPTRACE
389 	/*
390 	 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers
391 	 */
392 	ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP);
393 	ttc->ttc_next = ttc->ttc_first;
394 	ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
395 #endif
396 	/*
397 	 * Record that we have another CPU.
398 	 */
399 	mutex_enter(&cpu_lock);
400 	/*
401 	 * Initialize the interrupt threads for this CPU
402 	 */
403 	cpu_intr_alloc(cp, NINTR_THREADS);
404 	/*
405 	 * Add CPU to list of available CPUs.  It'll be on the active list
406 	 * after mp_startup().
407 	 */
408 	cpu_add_unit(cp);
409 	mutex_exit(&cpu_lock);
410 
411 	return (cp);
412 }
413 
414 /*
415  * Undo what was done in mp_startup_init
416  */
417 static void
418 mp_startup_fini(struct cpu *cp, int error)
419 {
420 	mutex_enter(&cpu_lock);
421 
422 	/*
423 	 * Remove the CPU from the list of available CPUs.
424 	 */
425 	cpu_del_unit(cp->cpu_id);
426 
427 	if (error == ETIMEDOUT) {
428 		/*
429 		 * The cpu was started, but never *seemed* to run any
430 		 * code in the kernel; it's probably off spinning in its
431 		 * own private world, though with potential references to
432 		 * our kmem-allocated IDTs and GDTs (for example).
433 		 *
434 		 * Worse still, it may actually wake up some time later,
435 		 * so rather than guess what it might or might not do, we
436 		 * leave the fundamental data structures intact.
437 		 */
438 		cp->cpu_flags = 0;
439 		mutex_exit(&cpu_lock);
440 		return;
441 	}
442 
443 	/*
444 	 * At this point, the only threads bound to this CPU should
445 	 * special per-cpu threads: it's idle thread, it's pause threads,
446 	 * and it's interrupt threads.  Clean these up.
447 	 */
448 	cpu_destroy_bound_threads(cp);
449 	cp->cpu_idle_thread = NULL;
450 
451 	/*
452 	 * Free the interrupt stack.
453 	 */
454 	segkp_release(segkp,
455 	    cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME)));
456 
457 	mutex_exit(&cpu_lock);
458 
459 #ifdef TRAPTRACE
460 	/*
461 	 * Discard the trap trace buffer
462 	 */
463 	{
464 		trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id];
465 
466 		kmem_free((void *)ttc->ttc_first, trap_trace_bufsize);
467 		ttc->ttc_first = NULL;
468 	}
469 #endif
470 
471 	hat_cpu_offline(cp);
472 
473 	cpuid_free_space(cp);
474 
475 	ucode_free_space(cp);
476 
477 	if (cp->cpu_m.mcpu_idt != CPU->cpu_m.mcpu_idt)
478 		kmem_free(cp->cpu_m.mcpu_idt, sizeof (idt0));
479 	cp->cpu_m.mcpu_idt = NULL;
480 
481 	kmem_free(cp->cpu_m.mcpu_gdt, PAGESIZE);
482 	cp->cpu_m.mcpu_gdt = NULL;
483 
484 	teardown_vaddr_for_ppcopy(cp);
485 
486 	kcpc_hw_fini(cp);
487 
488 	cp->cpu_dispthread = NULL;
489 	cp->cpu_thread = NULL;	/* discarded by cpu_destroy_bound_threads() */
490 
491 	cpu_vm_data_destroy(cp);
492 
493 	mutex_enter(&cpu_lock);
494 	disp_cpu_fini(cp);
495 	mutex_exit(&cpu_lock);
496 
497 	kmem_free(cp, sizeof (*cp));
498 }
499 
500 /*
501  * Apply workarounds for known errata, and warn about those that are absent.
502  *
503  * System vendors occasionally create configurations which contain different
504  * revisions of the CPUs that are almost but not exactly the same.  At the
505  * time of writing, this meant that their clock rates were the same, their
506  * feature sets were the same, but the required workaround were -not-
507  * necessarily the same.  So, this routine is invoked on -every- CPU soon
508  * after starting to make sure that the resulting system contains the most
509  * pessimal set of workarounds needed to cope with *any* of the CPUs in the
510  * system.
511  *
512  * workaround_errata is invoked early in mlsetup() for CPU 0, and in
513  * mp_startup() for all slave CPUs. Slaves process workaround_errata prior
514  * to acknowledging their readiness to the master, so this routine will
515  * never be executed by multiple CPUs in parallel, thus making updates to
516  * global data safe.
517  *
518  * These workarounds are based on Rev 3.57 of the Revision Guide for
519  * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005.
520  */
521 
522 #if defined(OPTERON_ERRATUM_88)
523 int opteron_erratum_88;		/* if non-zero -> at least one cpu has it */
524 #endif
525 
526 #if defined(OPTERON_ERRATUM_91)
527 int opteron_erratum_91;		/* if non-zero -> at least one cpu has it */
528 #endif
529 
530 #if defined(OPTERON_ERRATUM_93)
531 int opteron_erratum_93;		/* if non-zero -> at least one cpu has it */
532 #endif
533 
534 #if defined(OPTERON_ERRATUM_95)
535 int opteron_erratum_95;		/* if non-zero -> at least one cpu has it */
536 #endif
537 
538 #if defined(OPTERON_ERRATUM_100)
539 int opteron_erratum_100;	/* if non-zero -> at least one cpu has it */
540 #endif
541 
542 #if defined(OPTERON_ERRATUM_108)
543 int opteron_erratum_108;	/* if non-zero -> at least one cpu has it */
544 #endif
545 
546 #if defined(OPTERON_ERRATUM_109)
547 int opteron_erratum_109;	/* if non-zero -> at least one cpu has it */
548 #endif
549 
550 #if defined(OPTERON_ERRATUM_121)
551 int opteron_erratum_121;	/* if non-zero -> at least one cpu has it */
552 #endif
553 
554 #if defined(OPTERON_ERRATUM_122)
555 int opteron_erratum_122;	/* if non-zero -> at least one cpu has it */
556 #endif
557 
558 #if defined(OPTERON_ERRATUM_123)
559 int opteron_erratum_123;	/* if non-zero -> at least one cpu has it */
560 #endif
561 
562 #if defined(OPTERON_ERRATUM_131)
563 int opteron_erratum_131;	/* if non-zero -> at least one cpu has it */
564 #endif
565 
566 #if defined(OPTERON_WORKAROUND_6336786)
567 int opteron_workaround_6336786;	/* non-zero -> WA relevant and applied */
568 int opteron_workaround_6336786_UP = 0;	/* Not needed for UP */
569 #endif
570 
571 #if defined(OPTERON_WORKAROUND_6323525)
572 int opteron_workaround_6323525;	/* if non-zero -> at least one cpu has it */
573 #endif
574 
575 static void
576 workaround_warning(cpu_t *cp, uint_t erratum)
577 {
578 	cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u",
579 	    cp->cpu_id, erratum);
580 }
581 
582 static void
583 workaround_applied(uint_t erratum)
584 {
585 	if (erratum > 1000000)
586 		cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n",
587 		    erratum);
588 	else
589 		cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n",
590 		    erratum);
591 }
592 
593 static void
594 msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error)
595 {
596 	cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d",
597 	    cp->cpu_id, rw, msr, error);
598 }
599 
600 uint_t
601 workaround_errata(struct cpu *cpu)
602 {
603 	uint_t missing = 0;
604 
605 	ASSERT(cpu == CPU);
606 
607 	/*LINTED*/
608 	if (cpuid_opteron_erratum(cpu, 88) > 0) {
609 		/*
610 		 * SWAPGS May Fail To Read Correct GS Base
611 		 */
612 #if defined(OPTERON_ERRATUM_88)
613 		/*
614 		 * The workaround is an mfence in the relevant assembler code
615 		 */
616 		opteron_erratum_88++;
617 #else
618 		workaround_warning(cpu, 88);
619 		missing++;
620 #endif
621 	}
622 
623 	if (cpuid_opteron_erratum(cpu, 91) > 0) {
624 		/*
625 		 * Software Prefetches May Report A Page Fault
626 		 */
627 #if defined(OPTERON_ERRATUM_91)
628 		/*
629 		 * fix is in trap.c
630 		 */
631 		opteron_erratum_91++;
632 #else
633 		workaround_warning(cpu, 91);
634 		missing++;
635 #endif
636 	}
637 
638 	if (cpuid_opteron_erratum(cpu, 93) > 0) {
639 		/*
640 		 * RSM Auto-Halt Restart Returns to Incorrect RIP
641 		 */
642 #if defined(OPTERON_ERRATUM_93)
643 		/*
644 		 * fix is in trap.c
645 		 */
646 		opteron_erratum_93++;
647 #else
648 		workaround_warning(cpu, 93);
649 		missing++;
650 #endif
651 	}
652 
653 	/*LINTED*/
654 	if (cpuid_opteron_erratum(cpu, 95) > 0) {
655 		/*
656 		 * RET Instruction May Return to Incorrect EIP
657 		 */
658 #if defined(OPTERON_ERRATUM_95)
659 #if defined(_LP64)
660 		/*
661 		 * Workaround this by ensuring that 32-bit user code and
662 		 * 64-bit kernel code never occupy the same address
663 		 * range mod 4G.
664 		 */
665 		if (_userlimit32 > 0xc0000000ul)
666 			*(uintptr_t *)&_userlimit32 = 0xc0000000ul;
667 
668 		/*LINTED*/
669 		ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u);
670 		opteron_erratum_95++;
671 #endif	/* _LP64 */
672 #else
673 		workaround_warning(cpu, 95);
674 		missing++;
675 #endif
676 	}
677 
678 	if (cpuid_opteron_erratum(cpu, 100) > 0) {
679 		/*
680 		 * Compatibility Mode Branches Transfer to Illegal Address
681 		 */
682 #if defined(OPTERON_ERRATUM_100)
683 		/*
684 		 * fix is in trap.c
685 		 */
686 		opteron_erratum_100++;
687 #else
688 		workaround_warning(cpu, 100);
689 		missing++;
690 #endif
691 	}
692 
693 	/*LINTED*/
694 	if (cpuid_opteron_erratum(cpu, 108) > 0) {
695 		/*
696 		 * CPUID Instruction May Return Incorrect Model Number In
697 		 * Some Processors
698 		 */
699 #if defined(OPTERON_ERRATUM_108)
700 		/*
701 		 * (Our cpuid-handling code corrects the model number on
702 		 * those processors)
703 		 */
704 #else
705 		workaround_warning(cpu, 108);
706 		missing++;
707 #endif
708 	}
709 
710 	/*LINTED*/
711 	if (cpuid_opteron_erratum(cpu, 109) > 0) do {
712 		/*
713 		 * Certain Reverse REP MOVS May Produce Unpredictable Behaviour
714 		 */
715 #if defined(OPTERON_ERRATUM_109)
716 		/*
717 		 * The "workaround" is to print a warning to upgrade the BIOS
718 		 */
719 		uint64_t value;
720 		const uint_t msr = MSR_AMD_PATCHLEVEL;
721 		int err;
722 
723 		if ((err = checked_rdmsr(msr, &value)) != 0) {
724 			msr_warning(cpu, "rd", msr, err);
725 			workaround_warning(cpu, 109);
726 			missing++;
727 		}
728 		if (value == 0)
729 			opteron_erratum_109++;
730 #else
731 		workaround_warning(cpu, 109);
732 		missing++;
733 #endif
734 	/*CONSTANTCONDITION*/
735 	} while (0);
736 
737 	/*LINTED*/
738 	if (cpuid_opteron_erratum(cpu, 121) > 0) {
739 		/*
740 		 * Sequential Execution Across Non_Canonical Boundary Caused
741 		 * Processor Hang
742 		 */
743 #if defined(OPTERON_ERRATUM_121)
744 #if defined(_LP64)
745 		/*
746 		 * Erratum 121 is only present in long (64 bit) mode.
747 		 * Workaround is to include the page immediately before the
748 		 * va hole to eliminate the possibility of system hangs due to
749 		 * sequential execution across the va hole boundary.
750 		 */
751 		if (opteron_erratum_121)
752 			opteron_erratum_121++;
753 		else {
754 			if (hole_start) {
755 				hole_start -= PAGESIZE;
756 			} else {
757 				/*
758 				 * hole_start not yet initialized by
759 				 * mmu_init. Initialize hole_start
760 				 * with value to be subtracted.
761 				 */
762 				hole_start = PAGESIZE;
763 			}
764 			opteron_erratum_121++;
765 		}
766 #endif	/* _LP64 */
767 #else
768 		workaround_warning(cpu, 121);
769 		missing++;
770 #endif
771 	}
772 
773 	/*LINTED*/
774 	if (cpuid_opteron_erratum(cpu, 122) > 0) do {
775 		/*
776 		 * TLB Flush Filter May Cause Coherency Problem in
777 		 * Multiprocessor Systems
778 		 */
779 #if defined(OPTERON_ERRATUM_122)
780 		uint64_t value;
781 		const uint_t msr = MSR_AMD_HWCR;
782 		int error;
783 
784 		/*
785 		 * Erratum 122 is only present in MP configurations (multi-core
786 		 * or multi-processor).
787 		 */
788 		if (!opteron_erratum_122 && lgrp_plat_node_cnt == 1 &&
789 		    cpuid_get_ncpu_per_chip(cpu) == 1)
790 			break;
791 
792 		/* disable TLB Flush Filter */
793 
794 		if ((error = checked_rdmsr(msr, &value)) != 0) {
795 			msr_warning(cpu, "rd", msr, error);
796 			workaround_warning(cpu, 122);
797 			missing++;
798 		} else {
799 			value |= (uint64_t)AMD_HWCR_FFDIS;
800 			if ((error = checked_wrmsr(msr, value)) != 0) {
801 				msr_warning(cpu, "wr", msr, error);
802 				workaround_warning(cpu, 122);
803 				missing++;
804 			}
805 		}
806 		opteron_erratum_122++;
807 #else
808 		workaround_warning(cpu, 122);
809 		missing++;
810 #endif
811 	/*CONSTANTCONDITION*/
812 	} while (0);
813 
814 	/*LINTED*/
815 	if (cpuid_opteron_erratum(cpu, 123) > 0) do {
816 		/*
817 		 * Bypassed Reads May Cause Data Corruption of System Hang in
818 		 * Dual Core Processors
819 		 */
820 #if defined(OPTERON_ERRATUM_123)
821 		uint64_t value;
822 		const uint_t msr = MSR_AMD_PATCHLEVEL;
823 		int err;
824 
825 		/*
826 		 * Erratum 123 applies only to multi-core cpus.
827 		 */
828 		if (cpuid_get_ncpu_per_chip(cpu) < 2)
829 			break;
830 
831 		/*
832 		 * The "workaround" is to print a warning to upgrade the BIOS
833 		 */
834 		if ((err = checked_rdmsr(msr, &value)) != 0) {
835 			msr_warning(cpu, "rd", msr, err);
836 			workaround_warning(cpu, 123);
837 			missing++;
838 		}
839 		if (value == 0)
840 			opteron_erratum_123++;
841 #else
842 		workaround_warning(cpu, 123);
843 		missing++;
844 
845 #endif
846 	/*CONSTANTCONDITION*/
847 	} while (0);
848 
849 	/*LINTED*/
850 	if (cpuid_opteron_erratum(cpu, 131) > 0) do {
851 		/*
852 		 * Multiprocessor Systems with Four or More Cores May Deadlock
853 		 * Waiting for a Probe Response
854 		 */
855 #if defined(OPTERON_ERRATUM_131)
856 		uint64_t nbcfg;
857 		const uint_t msr = MSR_AMD_NB_CFG;
858 		const uint64_t wabits =
859 		    AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR;
860 		int error;
861 
862 		/*
863 		 * Erratum 131 applies to any system with four or more cores.
864 		 */
865 		if (opteron_erratum_131)
866 			break;
867 
868 		if (lgrp_plat_node_cnt * cpuid_get_ncpu_per_chip(cpu) < 4)
869 			break;
870 
871 		/*
872 		 * Print a warning if neither of the workarounds for
873 		 * erratum 131 is present.
874 		 */
875 		if ((error = checked_rdmsr(msr, &nbcfg)) != 0) {
876 			msr_warning(cpu, "rd", msr, error);
877 			workaround_warning(cpu, 131);
878 			missing++;
879 		} else if ((nbcfg & wabits) == 0) {
880 			opteron_erratum_131++;
881 		} else {
882 			/* cannot have both workarounds set */
883 			ASSERT((nbcfg & wabits) != wabits);
884 		}
885 #else
886 		workaround_warning(cpu, 131);
887 		missing++;
888 #endif
889 	/*CONSTANTCONDITION*/
890 	} while (0);
891 
892 	/*
893 	 * This isn't really an erratum, but for convenience the
894 	 * detection/workaround code lives here and in cpuid_opteron_erratum.
895 	 */
896 	if (cpuid_opteron_erratum(cpu, 6336786) > 0) {
897 #if defined(OPTERON_WORKAROUND_6336786)
898 		/*
899 		 * Disable C1-Clock ramping on multi-core/multi-processor
900 		 * K8 platforms to guard against TSC drift.
901 		 */
902 		if (opteron_workaround_6336786) {
903 			opteron_workaround_6336786++;
904 		} else if ((lgrp_plat_node_cnt *
905 		    cpuid_get_ncpu_per_chip(cpu) > 1) ||
906 		    opteron_workaround_6336786_UP) {
907 			int	node;
908 			uint8_t data;
909 
910 			for (node = 0; node < lgrp_plat_node_cnt; node++) {
911 				/*
912 				 * Clear PMM7[1:0] (function 3, offset 0x87)
913 				 * Northbridge device is the node id + 24.
914 				 */
915 				data = pci_getb_func(0, node + 24, 3, 0x87);
916 				data &= 0xFC;
917 				pci_putb_func(0, node + 24, 3, 0x87, data);
918 			}
919 			opteron_workaround_6336786++;
920 		}
921 #else
922 		workaround_warning(cpu, 6336786);
923 		missing++;
924 #endif
925 	}
926 
927 	/*LINTED*/
928 	/*
929 	 * Mutex primitives don't work as expected.
930 	 */
931 	if (cpuid_opteron_erratum(cpu, 6323525) > 0) {
932 #if defined(OPTERON_WORKAROUND_6323525)
933 		/*
934 		 * This problem only occurs with 2 or more cores. If bit in
935 		 * MSR_BU_CFG set, then not applicable. The workaround
936 		 * is to patch the semaphone routines with the lfence
937 		 * instruction to provide necessary load memory barrier with
938 		 * possible subsequent read-modify-write ops.
939 		 *
940 		 * It is too early in boot to call the patch routine so
941 		 * set erratum variable to be done in startup_end().
942 		 */
943 		if (opteron_workaround_6323525) {
944 			opteron_workaround_6323525++;
945 		} else if ((x86_feature & X86_SSE2) && ((lgrp_plat_node_cnt *
946 		    cpuid_get_ncpu_per_chip(cpu)) > 1)) {
947 			if ((xrdmsr(MSR_BU_CFG) & 0x02) == 0)
948 				opteron_workaround_6323525++;
949 		}
950 #else
951 		workaround_warning(cpu, 6323525);
952 		missing++;
953 #endif
954 	}
955 
956 	return (missing);
957 }
958 
959 void
960 workaround_errata_end()
961 {
962 #if defined(OPTERON_ERRATUM_88)
963 	if (opteron_erratum_88)
964 		workaround_applied(88);
965 #endif
966 #if defined(OPTERON_ERRATUM_91)
967 	if (opteron_erratum_91)
968 		workaround_applied(91);
969 #endif
970 #if defined(OPTERON_ERRATUM_93)
971 	if (opteron_erratum_93)
972 		workaround_applied(93);
973 #endif
974 #if defined(OPTERON_ERRATUM_95)
975 	if (opteron_erratum_95)
976 		workaround_applied(95);
977 #endif
978 #if defined(OPTERON_ERRATUM_100)
979 	if (opteron_erratum_100)
980 		workaround_applied(100);
981 #endif
982 #if defined(OPTERON_ERRATUM_108)
983 	if (opteron_erratum_108)
984 		workaround_applied(108);
985 #endif
986 #if defined(OPTERON_ERRATUM_109)
987 	if (opteron_erratum_109) {
988 		cmn_err(CE_WARN,
989 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
990 		    " processor\nerratum 109 was not detected; updating your"
991 		    " system's BIOS to a version\ncontaining this"
992 		    " microcode patch is HIGHLY recommended or erroneous"
993 		    " system\noperation may occur.\n");
994 	}
995 #endif
996 #if defined(OPTERON_ERRATUM_121)
997 	if (opteron_erratum_121)
998 		workaround_applied(121);
999 #endif
1000 #if defined(OPTERON_ERRATUM_122)
1001 	if (opteron_erratum_122)
1002 		workaround_applied(122);
1003 #endif
1004 #if defined(OPTERON_ERRATUM_123)
1005 	if (opteron_erratum_123) {
1006 		cmn_err(CE_WARN,
1007 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
1008 		    " processor\nerratum 123 was not detected; updating your"
1009 		    " system's BIOS to a version\ncontaining this"
1010 		    " microcode patch is HIGHLY recommended or erroneous"
1011 		    " system\noperation may occur.\n");
1012 	}
1013 #endif
1014 #if defined(OPTERON_ERRATUM_131)
1015 	if (opteron_erratum_131) {
1016 		cmn_err(CE_WARN,
1017 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
1018 		    " processor\nerratum 131 was not detected; updating your"
1019 		    " system's BIOS to a version\ncontaining this"
1020 		    " microcode patch is HIGHLY recommended or erroneous"
1021 		    " system\noperation may occur.\n");
1022 	}
1023 #endif
1024 #if defined(OPTERON_WORKAROUND_6336786)
1025 	if (opteron_workaround_6336786)
1026 		workaround_applied(6336786);
1027 #endif
1028 #if defined(OPTERON_WORKAROUND_6323525)
1029 	if (opteron_workaround_6323525)
1030 		workaround_applied(6323525);
1031 #endif
1032 }
1033 
1034 static cpuset_t procset;
1035 
1036 /*
1037  * Start a single cpu, assuming that the kernel context is available
1038  * to successfully start another cpu.
1039  *
1040  * (For example, real mode code is mapped into the right place
1041  * in memory and is ready to be run.)
1042  */
1043 int
1044 start_cpu(processorid_t who)
1045 {
1046 	void *ctx;
1047 	cpu_t *cp;
1048 	int delays;
1049 	int error = 0;
1050 
1051 	ASSERT(who != 0);
1052 
1053 	/*
1054 	 * Check if there's at least a Mbyte of kmem available
1055 	 * before attempting to start the cpu.
1056 	 */
1057 	if (kmem_avail() < 1024 * 1024) {
1058 		/*
1059 		 * Kick off a reap in case that helps us with
1060 		 * later attempts ..
1061 		 */
1062 		kmem_reap();
1063 		return (ENOMEM);
1064 	}
1065 
1066 	cp = mp_startup_init(who);
1067 	if ((ctx = mach_cpucontext_alloc(cp)) == NULL ||
1068 	    (error = mach_cpu_start(cp, ctx)) != 0) {
1069 
1070 		/*
1071 		 * Something went wrong before we even started it
1072 		 */
1073 		if (ctx)
1074 			cmn_err(CE_WARN,
1075 			    "cpu%d: failed to start error %d",
1076 			    cp->cpu_id, error);
1077 		else
1078 			cmn_err(CE_WARN,
1079 			    "cpu%d: failed to allocate context", cp->cpu_id);
1080 
1081 		if (ctx)
1082 			mach_cpucontext_free(cp, ctx, error);
1083 		else
1084 			error = EAGAIN;		/* hmm. */
1085 		mp_startup_fini(cp, error);
1086 		return (error);
1087 	}
1088 
1089 	for (delays = 0; !CPU_IN_SET(procset, who); delays++) {
1090 		if (delays == 500) {
1091 			/*
1092 			 * After five seconds, things are probably looking
1093 			 * a bit bleak - explain the hang.
1094 			 */
1095 			cmn_err(CE_NOTE, "cpu%d: started, "
1096 			    "but not running in the kernel yet", who);
1097 		} else if (delays > 2000) {
1098 			/*
1099 			 * We waited at least 20 seconds, bail ..
1100 			 */
1101 			error = ETIMEDOUT;
1102 			cmn_err(CE_WARN, "cpu%d: timed out", who);
1103 			mach_cpucontext_free(cp, ctx, error);
1104 			mp_startup_fini(cp, error);
1105 			return (error);
1106 		}
1107 
1108 		/*
1109 		 * wait at least 10ms, then check again..
1110 		 */
1111 		delay(USEC_TO_TICK_ROUNDUP(10000));
1112 	}
1113 
1114 	mach_cpucontext_free(cp, ctx, 0);
1115 
1116 	if (tsc_gethrtime_enable)
1117 		tsc_sync_master(who);
1118 
1119 	if (dtrace_cpu_init != NULL) {
1120 		/*
1121 		 * DTrace CPU initialization expects cpu_lock to be held.
1122 		 */
1123 		mutex_enter(&cpu_lock);
1124 		(*dtrace_cpu_init)(who);
1125 		mutex_exit(&cpu_lock);
1126 	}
1127 
1128 	while (!CPU_IN_SET(cpu_ready_set, who))
1129 		delay(1);
1130 
1131 	return (0);
1132 }
1133 
1134 
1135 /*ARGSUSED*/
1136 void
1137 start_other_cpus(int cprboot)
1138 {
1139 	uint_t who;
1140 	uint_t skipped = 0;
1141 	uint_t bootcpuid = 0;
1142 
1143 	/*
1144 	 * Initialize our own cpu_info.
1145 	 */
1146 	init_cpu_info(CPU);
1147 
1148 	/*
1149 	 * Initialize our syscall handlers
1150 	 */
1151 	init_cpu_syscall(CPU);
1152 
1153 	/*
1154 	 * Take the boot cpu out of the mp_cpus set because we know
1155 	 * it's already running.  Add it to the cpu_ready_set for
1156 	 * precisely the same reason.
1157 	 */
1158 	CPUSET_DEL(mp_cpus, bootcpuid);
1159 	CPUSET_ADD(cpu_ready_set, bootcpuid);
1160 
1161 	/*
1162 	 * if only 1 cpu or not using MP, skip the rest of this
1163 	 */
1164 	if (CPUSET_ISNULL(mp_cpus) || use_mp == 0) {
1165 		if (use_mp == 0)
1166 			cmn_err(CE_CONT, "?***** Not in MP mode\n");
1167 		goto done;
1168 	}
1169 
1170 	/*
1171 	 * perform such initialization as is needed
1172 	 * to be able to take CPUs on- and off-line.
1173 	 */
1174 	cpu_pause_init();
1175 
1176 	xc_init();		/* initialize processor crosscalls */
1177 
1178 	if (mach_cpucontext_init() != 0)
1179 		goto done;
1180 
1181 	flushes_require_xcalls = 1;
1182 
1183 	/*
1184 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
1185 	 * do their TSC syncs with the same CPU.
1186 	 */
1187 	affinity_set(CPU_CURRENT);
1188 
1189 	for (who = 0; who < NCPU; who++) {
1190 
1191 		if (!CPU_IN_SET(mp_cpus, who))
1192 			continue;
1193 		ASSERT(who != bootcpuid);
1194 		if (ncpus >= max_ncpus) {
1195 			skipped = who;
1196 			continue;
1197 		}
1198 		if (start_cpu(who) != 0)
1199 			CPUSET_DEL(mp_cpus, who);
1200 	}
1201 
1202 	/* Free the space allocated to hold the microcode file */
1203 	ucode_free();
1204 
1205 	affinity_clear();
1206 
1207 	if (skipped) {
1208 		cmn_err(CE_NOTE,
1209 		    "System detected %d cpus, but "
1210 		    "only %d cpu(s) were enabled during boot.",
1211 		    skipped + 1, ncpus);
1212 		cmn_err(CE_NOTE,
1213 		    "Use \"boot-ncpus\" parameter to enable more CPU(s). "
1214 		    "See eeprom(1M).");
1215 	}
1216 
1217 done:
1218 	workaround_errata_end();
1219 	mach_cpucontext_fini();
1220 
1221 	cmi_post_mpstartup();
1222 }
1223 
1224 /*
1225  * Dummy functions - no i86pc platforms support dynamic cpu allocation.
1226  */
1227 /*ARGSUSED*/
1228 int
1229 mp_cpu_configure(int cpuid)
1230 {
1231 	return (ENOTSUP);		/* not supported */
1232 }
1233 
1234 /*ARGSUSED*/
1235 int
1236 mp_cpu_unconfigure(int cpuid)
1237 {
1238 	return (ENOTSUP);		/* not supported */
1239 }
1240 
1241 /*
1242  * Startup function for 'other' CPUs (besides boot cpu).
1243  * Called from real_mode_start.
1244  *
1245  * WARNING: until CPU_READY is set, mp_startup and routines called by
1246  * mp_startup should not call routines (e.g. kmem_free) that could call
1247  * hat_unload which requires CPU_READY to be set.
1248  */
1249 void
1250 mp_startup(void)
1251 {
1252 	struct cpu *cp = CPU;
1253 	uint_t new_x86_feature;
1254 
1255 	/*
1256 	 * We need to get TSC on this proc synced (i.e., any delta
1257 	 * from cpu0 accounted for) as soon as we can, because many
1258 	 * many things use gethrtime/pc_gethrestime, including
1259 	 * interrupts, cmn_err, etc.
1260 	 */
1261 
1262 	/* Let cpu0 continue into tsc_sync_master() */
1263 	CPUSET_ATOMIC_ADD(procset, cp->cpu_id);
1264 
1265 	if (tsc_gethrtime_enable)
1266 		tsc_sync_slave();
1267 
1268 	/*
1269 	 * Once this was done from assembly, but it's safer here; if
1270 	 * it blocks, we need to be able to swtch() to and from, and
1271 	 * since we get here by calling t_pc, we need to do that call
1272 	 * before swtch() overwrites it.
1273 	 */
1274 
1275 	(void) (*ap_mlsetup)();
1276 
1277 	new_x86_feature = cpuid_pass1(cp);
1278 
1279 	/*
1280 	 * We need to Sync MTRR with cpu0's MTRR. We have to do
1281 	 * this with interrupts disabled.
1282 	 */
1283 	if (x86_feature & X86_MTRR)
1284 		mtrr_sync();
1285 
1286 	/*
1287 	 * Set up TSC_AUX to contain the cpuid for this processor
1288 	 * for the rdtscp instruction.
1289 	 */
1290 	if (x86_feature & X86_TSCP)
1291 		(void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id);
1292 
1293 	/*
1294 	 * Initialize this CPU's syscall handlers
1295 	 */
1296 	init_cpu_syscall(cp);
1297 
1298 	/*
1299 	 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the
1300 	 * highest level at which a routine is permitted to block on
1301 	 * an adaptive mutex (allows for cpu poke interrupt in case
1302 	 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks
1303 	 * device interrupts that may end up in the hat layer issuing cross
1304 	 * calls before CPU_READY is set.
1305 	 */
1306 	splx(ipltospl(LOCK_LEVEL));
1307 	sti();
1308 
1309 	/*
1310 	 * Do a sanity check to make sure this new CPU is a sane thing
1311 	 * to add to the collection of processors running this system.
1312 	 *
1313 	 * XXX	Clearly this needs to get more sophisticated, if x86
1314 	 * systems start to get built out of heterogenous CPUs; as is
1315 	 * likely to happen once the number of processors in a configuration
1316 	 * gets large enough.
1317 	 */
1318 	if ((x86_feature & new_x86_feature) != x86_feature) {
1319 		cmn_err(CE_CONT, "?cpu%d: %b\n",
1320 		    cp->cpu_id, new_x86_feature, FMT_X86_FEATURE);
1321 		cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id);
1322 	}
1323 
1324 	/*
1325 	 * We do not support cpus with mixed monitor/mwait support if the
1326 	 * boot cpu supports monitor/mwait.
1327 	 */
1328 	if ((x86_feature & ~new_x86_feature) & X86_MWAIT)
1329 		panic("unsupported mixed cpu monitor/mwait support detected");
1330 
1331 	/*
1332 	 * We could be more sophisticated here, and just mark the CPU
1333 	 * as "faulted" but at this point we'll opt for the easier
1334 	 * answer of dieing horribly.  Provided the boot cpu is ok,
1335 	 * the system can be recovered by booting with use_mp set to zero.
1336 	 */
1337 	if (workaround_errata(cp) != 0)
1338 		panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
1339 
1340 	cpuid_pass2(cp);
1341 	cpuid_pass3(cp);
1342 	(void) cpuid_pass4(cp);
1343 
1344 	init_cpu_info(cp);
1345 
1346 	mutex_enter(&cpu_lock);
1347 	/*
1348 	 * Processor group initialization for this CPU is dependent on the
1349 	 * cpuid probing, which must be done in the context of the current
1350 	 * CPU.
1351 	 */
1352 	pghw_physid_create(cp);
1353 	pg_cpu_init(cp);
1354 	pg_cmt_cpu_startup(cp);
1355 
1356 	cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS;
1357 	cpu_add_active(cp);
1358 
1359 	if (dtrace_cpu_init != NULL) {
1360 		(*dtrace_cpu_init)(cp->cpu_id);
1361 	}
1362 
1363 	/*
1364 	 * Fill out cpu_ucode_info.  Update microcode if necessary.
1365 	 */
1366 	ucode_check(cp);
1367 
1368 	mutex_exit(&cpu_lock);
1369 
1370 	/*
1371 	 * Enable preemption here so that contention for any locks acquired
1372 	 * later in mp_startup may be preempted if the thread owning those
1373 	 * locks is continously executing on other CPUs (for example, this
1374 	 * CPU must be preemptible to allow other CPUs to pause it during their
1375 	 * startup phases).  It's safe to enable preemption here because the
1376 	 * CPU state is pretty-much fully constructed.
1377 	 */
1378 	curthread->t_preempt = 0;
1379 
1380 	add_cpunode2devtree(cp->cpu_id, cp->cpu_m.mcpu_cpi);
1381 
1382 	/* The base spl should still be at LOCK LEVEL here */
1383 	ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
1384 	set_base_spl();		/* Restore the spl to its proper value */
1385 
1386 	(void) spl0();				/* enable interrupts */
1387 
1388 	/*
1389 	 * Set up the CPU module for this CPU.  This can't be done before
1390 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
1391 	 * need to go load another CPU module.  The act of attempting to load
1392 	 * a module may trigger a cross-call, which will ASSERT unless this
1393 	 * cpu is CPU_READY.
1394 	 */
1395 	cmi_init();
1396 
1397 	if (x86_feature & X86_MCA)
1398 		cmi_mca_init();
1399 
1400 	if (boothowto & RB_DEBUG)
1401 		kdi_cpu_init();
1402 
1403 	/*
1404 	 * Setting the bit in cpu_ready_set must be the last operation in
1405 	 * processor initialization; the boot CPU will continue to boot once
1406 	 * it sees this bit set for all active CPUs.
1407 	 */
1408 	CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
1409 
1410 	/*
1411 	 * Because mp_startup() gets fired off after init() starts, we
1412 	 * can't use the '?' trick to do 'boot -v' printing - so we
1413 	 * always direct the 'cpu .. online' messages to the log.
1414 	 */
1415 	cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
1416 	    cp->cpu_id);
1417 
1418 	/*
1419 	 * Now we are done with the startup thread, so free it up.
1420 	 */
1421 	thread_exit();
1422 	panic("mp_startup: cannot return");
1423 	/*NOTREACHED*/
1424 }
1425 
1426 
1427 /*
1428  * Start CPU on user request.
1429  */
1430 /* ARGSUSED */
1431 int
1432 mp_cpu_start(struct cpu *cp)
1433 {
1434 	ASSERT(MUTEX_HELD(&cpu_lock));
1435 	return (0);
1436 }
1437 
1438 /*
1439  * Stop CPU on user request.
1440  */
1441 /* ARGSUSED */
1442 int
1443 mp_cpu_stop(struct cpu *cp)
1444 {
1445 	extern int cbe_psm_timer_mode;
1446 	ASSERT(MUTEX_HELD(&cpu_lock));
1447 
1448 	/*
1449 	 * If TIMER_PERIODIC mode is used, CPU0 is the one running it;
1450 	 * can't stop it.  (This is true only for machines with no TSC.)
1451 	 */
1452 
1453 	if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0))
1454 		return (1);
1455 
1456 	return (0);
1457 }
1458 
1459 /*
1460  * Take the specified CPU out of participation in interrupts.
1461  */
1462 int
1463 cpu_disable_intr(struct cpu *cp)
1464 {
1465 	if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS)
1466 		return (EBUSY);
1467 
1468 	cp->cpu_flags &= ~CPU_ENABLE;
1469 	return (0);
1470 }
1471 
1472 /*
1473  * Allow the specified CPU to participate in interrupts.
1474  */
1475 void
1476 cpu_enable_intr(struct cpu *cp)
1477 {
1478 	ASSERT(MUTEX_HELD(&cpu_lock));
1479 	cp->cpu_flags |= CPU_ENABLE;
1480 	psm_enable_intr(cp->cpu_id);
1481 }
1482 
1483 
1484 
1485 void
1486 mp_cpu_faulted_enter(struct cpu *cp)
1487 {
1488 	cmi_faulted_enter(cp);
1489 }
1490 
1491 void
1492 mp_cpu_faulted_exit(struct cpu *cp)
1493 {
1494 	cmi_faulted_exit(cp);
1495 }
1496 
1497 /*
1498  * The following two routines are used as context operators on threads belonging
1499  * to processes with a private LDT (see sysi86).  Due to the rarity of such
1500  * processes, these routines are currently written for best code readability and
1501  * organization rather than speed.  We could avoid checking x86_feature at every
1502  * context switch by installing different context ops, depending on the
1503  * x86_feature flags, at LDT creation time -- one for each combination of fast
1504  * syscall feature flags.
1505  */
1506 
1507 /*ARGSUSED*/
1508 void
1509 cpu_fast_syscall_disable(void *arg)
1510 {
1511 	if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP))
1512 		cpu_sep_disable();
1513 	if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC))
1514 		cpu_asysc_disable();
1515 }
1516 
1517 /*ARGSUSED*/
1518 void
1519 cpu_fast_syscall_enable(void *arg)
1520 {
1521 	if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP))
1522 		cpu_sep_enable();
1523 	if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC))
1524 		cpu_asysc_enable();
1525 }
1526 
1527 static void
1528 cpu_sep_enable(void)
1529 {
1530 	ASSERT(x86_feature & X86_SEP);
1531 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1532 
1533 	wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL);
1534 }
1535 
1536 static void
1537 cpu_sep_disable(void)
1538 {
1539 	ASSERT(x86_feature & X86_SEP);
1540 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1541 
1542 	/*
1543 	 * Setting the SYSENTER_CS_MSR register to 0 causes software executing
1544 	 * the sysenter or sysexit instruction to trigger a #gp fault.
1545 	 */
1546 	wrmsr(MSR_INTC_SEP_CS, 0);
1547 }
1548 
1549 static void
1550 cpu_asysc_enable(void)
1551 {
1552 	ASSERT(x86_feature & X86_ASYSC);
1553 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1554 
1555 	wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) |
1556 	    (uint64_t)(uintptr_t)AMD_EFER_SCE);
1557 }
1558 
1559 static void
1560 cpu_asysc_disable(void)
1561 {
1562 	ASSERT(x86_feature & X86_ASYSC);
1563 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1564 
1565 	/*
1566 	 * Turn off the SCE (syscall enable) bit in the EFER register. Software
1567 	 * executing syscall or sysret with this bit off will incur a #ud trap.
1568 	 */
1569 	wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) &
1570 	    ~((uint64_t)(uintptr_t)AMD_EFER_SCE));
1571 }
1572