xref: /illumos-gate/usr/src/uts/i86pc/os/mp_startup.c (revision d4476ccb08e9498c2013971c4212dc6362fcec46)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/thread.h>
31 #include <sys/cpuvar.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/proc.h>
35 #include <sys/disp.h>
36 #include <sys/mmu.h>
37 #include <sys/class.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/asm_linkage.h>
41 #include <sys/x_call.h>
42 #include <sys/systm.h>
43 #include <sys/var.h>
44 #include <sys/vtrace.h>
45 #include <vm/hat.h>
46 #include <sys/mmu.h>
47 #include <vm/as.h>
48 #include <vm/seg_kmem.h>
49 #include <sys/segments.h>
50 #include <sys/kmem.h>
51 #include <sys/stack.h>
52 #include <sys/smp_impldefs.h>
53 #include <sys/x86_archext.h>
54 #include <sys/machsystm.h>
55 #include <sys/traptrace.h>
56 #include <sys/clock.h>
57 #include <sys/cpc_impl.h>
58 #include <sys/chip.h>
59 #include <sys/dtrace.h>
60 #include <sys/archsystm.h>
61 #include <sys/fp.h>
62 #include <sys/reboot.h>
63 #include <sys/kdi.h>
64 #include <vm/hat_i86.h>
65 #include <sys/memnode.h>
66 
67 struct cpu	cpus[1];			/* CPU data */
68 struct cpu	*cpu[NCPU] = {&cpus[0]};	/* pointers to all CPUs */
69 cpu_core_t	cpu_core[NCPU];			/* cpu_core structures */
70 
71 /*
72  * Useful for disabling MP bring-up for an MP capable kernel
73  * (a kernel that was built with MP defined)
74  */
75 int use_mp = 1;
76 
77 int mp_cpus = 0x1;	/* to be set by platform specific module	*/
78 
79 /*
80  * This variable is used by the hat layer to decide whether or not
81  * critical sections are needed to prevent race conditions.  For sun4m,
82  * this variable is set once enough MP initialization has been done in
83  * order to allow cross calls.
84  */
85 int flushes_require_xcalls = 0;
86 ulong_t	cpu_ready_set = 1;
87 
88 extern	void	real_mode_start(void);
89 extern	void	real_mode_end(void);
90 static 	void	mp_startup(void);
91 
92 static void cpu_sep_enable(void);
93 static void cpu_sep_disable(void);
94 static void cpu_asysc_enable(void);
95 static void cpu_asysc_disable(void);
96 
97 extern int tsc_gethrtime_enable;
98 
99 /*
100  * Init CPU info - get CPU type info for processor_info system call.
101  */
102 void
103 init_cpu_info(struct cpu *cp)
104 {
105 	processor_info_t *pi = &cp->cpu_type_info;
106 	char buf[CPU_IDSTRLEN];
107 
108 	/*
109 	 * Get clock-frequency property for the CPU.
110 	 */
111 	pi->pi_clock = cpu_freq;
112 
113 	(void) strcpy(pi->pi_processor_type, "i386");
114 	if (fpu_exists)
115 		(void) strcpy(pi->pi_fputypes, "i387 compatible");
116 
117 	(void) cpuid_getidstr(cp, buf, sizeof (buf));
118 
119 	cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
120 	(void) strcpy(cp->cpu_idstr, buf);
121 
122 	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
123 
124 	(void) cpuid_getbrandstr(cp, buf, sizeof (buf));
125 	cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
126 	(void) strcpy(cp->cpu_brandstr, buf);
127 
128 	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
129 }
130 
131 /*
132  * Configure syscall support on this CPU.
133  */
134 /*ARGSUSED*/
135 static void
136 init_cpu_syscall(struct cpu *cp)
137 {
138 	kpreempt_disable();
139 
140 #if defined(__amd64)
141 	if (x86_feature & X86_ASYSC) {
142 
143 #if !defined(__lint)
144 		/*
145 		 * The syscall instruction imposes a certain ordering on
146 		 * segment selectors, so we double-check that ordering
147 		 * here.
148 		 */
149 		ASSERT(KDS_SEL == KCS_SEL + 8);
150 		ASSERT(UDS_SEL == U32CS_SEL + 8);
151 		ASSERT(UCS_SEL == U32CS_SEL + 16);
152 #endif
153 		/*
154 		 * Turn syscall/sysret extensions on.
155 		 */
156 		cpu_asysc_enable();
157 
158 		/*
159 		 * Program the magic registers ..
160 		 */
161 		wrmsr(MSR_AMD_STAR, ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) <<
162 		    32);
163 		wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall);
164 		wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32);
165 
166 		/*
167 		 * This list of flags is masked off the incoming
168 		 * %rfl when we enter the kernel.
169 		 */
170 		wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T));
171 	}
172 #endif
173 
174 	/*
175 	 * On 32-bit kernels, we use sysenter/sysexit because it's too
176 	 * hard to use syscall/sysret, and it is more portable anyway.
177 	 *
178 	 * On 64-bit kernels on Nocona machines, the 32-bit syscall
179 	 * variant isn't available to 32-bit applications, but sysenter is.
180 	 */
181 	if (x86_feature & X86_SEP) {
182 
183 #if !defined(__lint)
184 		/*
185 		 * The sysenter instruction imposes a certain ordering on
186 		 * segment selectors, so we double-check that ordering
187 		 * here. See "sysenter" in Intel document 245471-012, "IA-32
188 		 * Intel Architecture Software Developer's Manual Volume 2:
189 		 * Instruction Set Reference"
190 		 */
191 		ASSERT(KDS_SEL == KCS_SEL + 8);
192 
193 		ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
194 		ASSERT32(UDS_SEL == UCS_SEL + 8);
195 
196 		ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
197 		ASSERT64(UDS_SEL == U32CS_SEL + 8);
198 #endif
199 
200 		cpu_sep_enable();
201 
202 		/*
203 		 * resume() sets this value to the base of the threads stack
204 		 * via a context handler.
205 		 */
206 		wrmsr(MSR_INTC_SEP_ESP, 0ULL);
207 		wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter);
208 	}
209 
210 	kpreempt_enable();
211 }
212 
213 /*
214  * Multiprocessor initialization.
215  *
216  * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
217  * startup and idle threads for the specified CPU.
218  */
219 static void
220 mp_startup_init(int cpun)
221 {
222 #if defined(__amd64)
223 extern void *long_mode_64(void);
224 #endif	/* __amd64 */
225 
226 	struct cpu *cp;
227 	struct tss *ntss;
228 	kthread_id_t tp;
229 	caddr_t	sp;
230 	int size;
231 	proc_t *procp;
232 	extern void idle();
233 	extern void init_intr_threads(struct cpu *);
234 
235 	struct cpu_tables *tablesp;
236 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
237 
238 #ifdef TRAPTRACE
239 	trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
240 #endif
241 
242 	ASSERT(cpun < NCPU && cpu[cpun] == NULL);
243 
244 	if ((cp = kmem_zalloc(sizeof (*cp), KM_NOSLEEP)) == NULL) {
245 		panic("mp_startup_init: cpu%d: "
246 		    "no memory for cpu structure", cpun);
247 		/*NOTREACHED*/
248 	}
249 	procp = curthread->t_procp;
250 
251 	mutex_enter(&cpu_lock);
252 	/*
253 	 * Initialize the dispatcher first.
254 	 */
255 	disp_cpu_init(cp);
256 	mutex_exit(&cpu_lock);
257 
258 	cpu_vm_data_init(cp);
259 
260 	/*
261 	 * Allocate and initialize the startup thread for this CPU.
262 	 * Interrupt and process switch stacks get allocated later
263 	 * when the CPU starts running.
264 	 */
265 	tp = thread_create(NULL, 0, NULL, NULL, 0, procp,
266 	    TS_STOPPED, maxclsyspri);
267 
268 	/*
269 	 * Set state to TS_ONPROC since this thread will start running
270 	 * as soon as the CPU comes online.
271 	 *
272 	 * All the other fields of the thread structure are setup by
273 	 * thread_create().
274 	 */
275 	THREAD_ONPROC(tp, cp);
276 	tp->t_preempt = 1;
277 	tp->t_bound_cpu = cp;
278 	tp->t_affinitycnt = 1;
279 	tp->t_cpu = cp;
280 	tp->t_disp_queue = cp->cpu_disp;
281 
282 	/*
283 	 * Setup thread to start in mp_startup.
284 	 */
285 	sp = tp->t_stk;
286 	tp->t_pc = (uintptr_t)mp_startup;
287 	tp->t_sp = (uintptr_t)(sp - MINFRAME);
288 
289 	cp->cpu_id = cpun;
290 	cp->cpu_self = cp;
291 	cp->cpu_mask = 1 << cpun;
292 	cp->cpu_thread = tp;
293 	cp->cpu_lwp = NULL;
294 	cp->cpu_dispthread = tp;
295 	cp->cpu_dispatch_pri = DISP_PRIO(tp);
296 
297 	/*
298 	 * Now, initialize per-CPU idle thread for this CPU.
299 	 */
300 	tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1);
301 
302 	cp->cpu_idle_thread = tp;
303 
304 	tp->t_preempt = 1;
305 	tp->t_bound_cpu = cp;
306 	tp->t_affinitycnt = 1;
307 	tp->t_cpu = cp;
308 	tp->t_disp_queue = cp->cpu_disp;
309 
310 	/*
311 	 * Bootstrap the CPU for CMT aware scheduling
312 	 * The rest of the initialization will happen from
313 	 * mp_startup()
314 	 */
315 	chip_bootstrap_cpu(cp);
316 
317 	/*
318 	 * Perform CPC intialization on the new CPU.
319 	 */
320 	kcpc_hw_init(cp);
321 
322 	/*
323 	 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2
324 	 * for each CPU.
325 	 */
326 
327 	setup_vaddr_for_ppcopy(cp);
328 
329 	/*
330 	 * Allocate space for page directory, stack, tss, gdt and idt.
331 	 * This assumes that kmem_alloc will return memory which is aligned
332 	 * to the next higher power of 2 or a page(if size > MAXABIG)
333 	 * If this assumption goes wrong at any time due to change in
334 	 * kmem alloc, things may not work as the page directory has to be
335 	 * page aligned
336 	 */
337 	if ((tablesp = kmem_zalloc(sizeof (*tablesp), KM_NOSLEEP)) == NULL)
338 		panic("mp_startup_init: cpu%d cannot allocate tables", cpun);
339 
340 	if ((uintptr_t)tablesp & ~MMU_STD_PAGEMASK) {
341 		kmem_free(tablesp, sizeof (struct cpu_tables));
342 		size = sizeof (struct cpu_tables) + MMU_STD_PAGESIZE;
343 		tablesp = kmem_zalloc(size, KM_NOSLEEP);
344 		tablesp = (struct cpu_tables *)
345 		    (((uintptr_t)tablesp + MMU_STD_PAGESIZE) &
346 		    MMU_STD_PAGEMASK);
347 	}
348 
349 	ntss = cp->cpu_tss = &tablesp->ct_tss;
350 	cp->cpu_gdt = tablesp->ct_gdt;
351 	bcopy(CPU->cpu_gdt, cp->cpu_gdt, NGDT * (sizeof (user_desc_t)));
352 
353 #if defined(__amd64)
354 
355 	/*
356 	 * #DF (double fault).
357 	 */
358 	ntss->tss_ist1 =
359 	    (uint64_t)&tablesp->ct_stack[sizeof (tablesp->ct_stack)];
360 
361 #elif defined(__i386)
362 
363 	ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
364 	    (uint32_t)&tablesp->ct_stack[sizeof (tablesp->ct_stack)];
365 
366 	ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
367 
368 	ntss->tss_eip = (uint32_t)mp_startup;
369 
370 	ntss->tss_cs = KCS_SEL;
371 	ntss->tss_fs = KFS_SEL;
372 	ntss->tss_gs = KGS_SEL;
373 
374 	/*
375 	 * setup kernel %gs.
376 	 */
377 	set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
378 	    SEL_KPL, 0, 1);
379 
380 #endif	/* __i386 */
381 
382 	/*
383 	 * Set I/O bit map offset equal to size of TSS segment limit
384 	 * for no I/O permission map. This will cause all user I/O
385 	 * instructions to generate #gp fault.
386 	 */
387 	ntss->tss_bitmapbase = sizeof (*ntss);
388 
389 	/*
390 	 * setup kernel tss.
391 	 */
392 	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
393 	    sizeof (*cp->cpu_tss) -1, SDT_SYSTSS, SEL_KPL);
394 
395 	/*
396 	 * If we have more than one node, each cpu gets a copy of IDT
397 	 * local to its node. If this is a Pentium box, we use cpu 0's
398 	 * IDT. cpu 0's IDT has been made read-only to workaround the
399 	 * cmpxchgl register bug
400 	 */
401 	cp->cpu_idt = CPU->cpu_idt;
402 	if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
403 		cp->cpu_idt = kmem_alloc(sizeof (idt0), KM_SLEEP);
404 		bcopy(idt0, cp->cpu_idt, sizeof (idt0));
405 	}
406 
407 	/*
408 	 * Get interrupt priority data from cpu 0
409 	 */
410 	cp->cpu_pri_data = CPU->cpu_pri_data;
411 
412 	hat_cpu_online(cp);
413 
414 	/* Should remove all entries for the current process/thread here */
415 
416 	/*
417 	 * Fill up the real mode platter to make it easy for real mode code to
418 	 * kick it off. This area should really be one passed by boot to kernel
419 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
420 	 * have identical physical and virtual address in paged mode.
421 	 */
422 	real_mode_platter->rm_idt_base = cp->cpu_idt;
423 	real_mode_platter->rm_idt_lim = sizeof (idt0) - 1;
424 	real_mode_platter->rm_gdt_base = cp->cpu_gdt;
425 	real_mode_platter->rm_gdt_lim = sizeof (gdt0) -1;
426 	real_mode_platter->rm_pdbr = getcr3();
427 	real_mode_platter->rm_cpu = cpun;
428 	real_mode_platter->rm_x86feature = x86_feature;
429 	real_mode_platter->rm_cr4 = cr4_value;
430 
431 #if defined(__amd64)
432 	if (getcr3() > 0xffffffffUL)
433 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
434 			"located above 4G in physical memory (@ 0x%llx).",
435 			(unsigned long long)getcr3());
436 
437 	/*
438 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
439 	 * by code in real_mode_start():
440 	 *
441 	 * GDT[0]:  NULL selector
442 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
443 	 *
444 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
445 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
446 	 * a course of action as any other, though it may cause the entire
447 	 * platform to reset in some cases...
448 	 */
449 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
450 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
451 
452 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
453 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
454 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
455 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
456 
457 	real_mode_platter->rm_temp_idt_lim = 0;
458 	real_mode_platter->rm_temp_idt_base = 0;
459 
460 	/*
461 	 * Since the CPU needs to jump to protected mode using an identity
462 	 * mapped address, we need to calculate it here.
463 	 */
464 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
465 	    ((uint32_t)long_mode_64 - (uint32_t)real_mode_start);
466 #endif	/* __amd64 */
467 
468 #ifdef TRAPTRACE
469 	/*
470 	 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers for this
471 	 * CPU.
472 	 */
473 	ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP);
474 	ttc->ttc_next = ttc->ttc_first;
475 	ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
476 #endif
477 
478 	/*
479 	 * Record that we have another CPU.
480 	 */
481 	mutex_enter(&cpu_lock);
482 	/*
483 	 * Initialize the interrupt threads for this CPU
484 	 */
485 	init_intr_threads(cp);
486 	/*
487 	 * Add CPU to list of available CPUs.  It'll be on the active list
488 	 * after mp_startup().
489 	 */
490 	cpu_add_unit(cp);
491 	mutex_exit(&cpu_lock);
492 }
493 
494 /*
495  * Apply workarounds for known errata, and warn about those that are absent.
496  *
497  * System vendors occasionally create configurations which contain different
498  * revisions of the CPUs that are almost but not exactly the same.  At the
499  * time of writing, this meant that their clock rates were the same, their
500  * feature sets were the same, but the required workaround were -not-
501  * necessarily the same.  So, this routine is invoked on -every- CPU soon
502  * after starting to make sure that the resulting system contains the most
503  * pessimal set of workarounds needed to cope with *any* of the CPUs in the
504  * system.
505  *
506  * These workarounds are based on Rev 3.57 of the Revision Guide for
507  * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005.
508  */
509 
510 #if defined(OPTERON_ERRATUM_91)
511 int opteron_erratum_91;		/* if non-zero -> at least one cpu has it */
512 #endif
513 
514 #if defined(OPTERON_ERRATUM_93)
515 int opteron_erratum_93;		/* if non-zero -> at least one cpu has it */
516 #endif
517 
518 #if defined(OPTERON_ERRATUM_100)
519 int opteron_erratum_100;	/* if non-zero -> at least one cpu has it */
520 #endif
521 
522 #if defined(OPTERON_ERRATUM_109)
523 int opteron_erratum_109;	/* if non-zero -> at least one cpu has it */
524 #endif
525 
526 #if defined(OPTERON_ERRATUM_121)
527 int opteron_erratum_121;	/* if non-zero -> at least one cpu has it */
528 #endif
529 
530 #if defined(OPTERON_ERRATUM_122)
531 int opteron_erratum_122;	/* if non-zero -> at least one cpu has it */
532 #endif
533 
534 #if defined(OPTERON_ERRATUM_123)
535 int opteron_erratum_123;	/* if non-zero -> at least one cpu has it */
536 #endif
537 
538 #if defined(OPTERON_ERRATUM_131)
539 int opteron_erratum_131;	/* if non-zero -> at least one cpu has it */
540 #endif
541 
542 #define	WARNING(cpu, n)						\
543 	cmn_err(CE_WARN, "cpu%d: no workaround for erratum %d",	\
544 	    (cpu)->cpu_id, (n))
545 
546 uint_t
547 workaround_errata(struct cpu *cpu)
548 {
549 	uint_t missing = 0;
550 
551 	ASSERT(cpu == CPU);
552 
553 	/*LINTED*/
554 	if (cpuid_opteron_erratum(cpu, 88) > 0) {
555 		/*
556 		 * SWAPGS May Fail To Read Correct GS Base
557 		 */
558 #if defined(OPTERON_ERRATUM_88)
559 		/*
560 		 * The workaround is an mfence in the relevant assembler code
561 		 */
562 #else
563 		WARNING(cpu, 88);
564 		missing++;
565 #endif
566 	}
567 
568 	if (cpuid_opteron_erratum(cpu, 91) > 0) {
569 		/*
570 		 * Software Prefetches May Report A Page Fault
571 		 */
572 #if defined(OPTERON_ERRATUM_91)
573 		/*
574 		 * fix is in trap.c
575 		 */
576 		opteron_erratum_91++;
577 #else
578 		WARNING(cpu, 91);
579 		missing++;
580 #endif
581 	}
582 
583 	if (cpuid_opteron_erratum(cpu, 93) > 0) {
584 		/*
585 		 * RSM Auto-Halt Restart Returns to Incorrect RIP
586 		 */
587 #if defined(OPTERON_ERRATUM_93)
588 		/*
589 		 * fix is in trap.c
590 		 */
591 		opteron_erratum_93++;
592 #else
593 		WARNING(cpu, 93);
594 		missing++;
595 #endif
596 	}
597 
598 	/*LINTED*/
599 	if (cpuid_opteron_erratum(cpu, 95) > 0) {
600 		/*
601 		 * RET Instruction May Return to Incorrect EIP
602 		 */
603 #if defined(OPTERON_ERRATUM_95)
604 #if defined(_LP64)
605 		/*
606 		 * Workaround this by ensuring that 32-bit user code and
607 		 * 64-bit kernel code never occupy the same address
608 		 * range mod 4G.
609 		 */
610 		if (_userlimit32 > 0xc0000000ul)
611 			*(uintptr_t *)&_userlimit32 = 0xc0000000ul;
612 
613 		/*LINTED*/
614 		ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u);
615 #endif	/* _LP64 */
616 #else
617 		WARNING(cpu, 95);
618 		missing++;
619 #endif	/* OPTERON_ERRATUM_95 */
620 	}
621 
622 	if (cpuid_opteron_erratum(cpu, 100) > 0) {
623 		/*
624 		 * Compatibility Mode Branches Transfer to Illegal Address
625 		 */
626 #if defined(OPTERON_ERRATUM_100)
627 		/*
628 		 * fix is in trap.c
629 		 */
630 		opteron_erratum_100++;
631 #else
632 		WARNING(cpu, 100);
633 		missing++;
634 #endif
635 	}
636 
637 	/*LINTED*/
638 	if (cpuid_opteron_erratum(cpu, 108) > 0) {
639 		/*
640 		 * CPUID Instruction May Return Incorrect Model Number In
641 		 * Some Processors
642 		 */
643 #if defined(OPTERON_ERRATUM_108)
644 		/*
645 		 * (Our cpuid-handling code corrects the model number on
646 		 * those processors)
647 		 */
648 #else
649 		WARNING(cpu, 108);
650 		missing++;
651 #endif
652 	}
653 
654 	/*LINTED*/
655 	if (cpuid_opteron_erratum(cpu, 109) > 0) {
656 		/*
657 		 * Certain Reverse REP MOVS May Produce Unpredictable Behaviour
658 		 */
659 #if defined(OPTERON_ERRATUM_109)
660 
661 		/* workaround is to print a warning to upgrade BIOS */
662 		if (rdmsr(MSR_AMD_PATCHLEVEL) == 0)
663 			opteron_erratum_109++;
664 #else
665 		WARNING(cpu, 109);
666 		missing++;
667 #endif
668 	}
669 	/*LINTED*/
670 	if (cpuid_opteron_erratum(cpu, 121) > 0) {
671 		/*
672 		 * Sequential Execution Across Non_Canonical Boundary Caused
673 		 * Processor Hang
674 		 */
675 #if defined(OPTERON_ERRATUM_121)
676 		static int	lma;
677 
678 		if (opteron_erratum_121)
679 			opteron_erratum_121++;
680 
681 		/*
682 		 * Erratum 121 is only present in long (64 bit) mode.
683 		 * Workaround is to include the page immediately before the
684 		 * va hole to eliminate the possibility of system hangs due to
685 		 * sequential execution across the va hole boundary.
686 		 */
687 		if (lma == 0) {
688 			/*
689 			 * check LMA once: assume all cpus are in long mode
690 			 * or not.
691 			 */
692 			lma = 1;
693 
694 			if (rdmsr(MSR_AMD_EFER) & AMD_EFER_LMA) {
695 				if (hole_start) {
696 					hole_start -= PAGESIZE;
697 				} else {
698 					/*
699 					 * hole_start not yet initialized by
700 					 * mmu_init. Initialize hole_start
701 					 * with value to be subtracted.
702 					 */
703 					hole_start = PAGESIZE;
704 				}
705 				opteron_erratum_121++;
706 			}
707 		}
708 #else
709 		WARNING(cpu, 121);
710 		missing++;
711 #endif
712 	}
713 
714 	/*LINTED*/
715 	if (cpuid_opteron_erratum(cpu, 122) > 0) {
716 		/*
717 		 * TLB Flush Filter May Cause Cohenrency Problem in
718 		 * Multiprocessor Systems
719 		 */
720 #if defined(OPTERON_ERRATUM_122)
721 		/*
722 		 * Erratum 122 is only present in MP configurations (multi-core
723 		 * or multi-processor).
724 		 */
725 
726 		if (opteron_erratum_122 || lgrp_plat_node_cnt > 1 ||
727 		    cpuid_get_ncpu_per_chip(cpu) > 1) {
728 			/* disable TLB Flush Filter */
729 			wrmsr(MSR_AMD_HWCR, rdmsr(MSR_AMD_HWCR) |
730 			    (uint64_t)(uintptr_t)AMD_HWCR_FFDIS);
731 			opteron_erratum_122++;
732 		}
733 
734 #else
735 		WARNING(cpu, 122);
736 		missing++;
737 #endif
738 	}
739 
740 #if defined(OPTERON_ERRATUM_123)
741 	/*LINTED*/
742 	if (cpuid_opteron_erratum(cpu, 123) > 0) {
743 		/*
744 		 * Bypassed Reads May Cause Data Corruption of System Hang in
745 		 * Dual Core Processors
746 		 */
747 		/*
748 		 * Erratum 123 applies only to multi-core cpus.
749 		 */
750 
751 		if (cpuid_get_ncpu_per_chip(cpu) > 1) {
752 			/* workaround is to print a warning to upgrade BIOS */
753 			if (rdmsr(MSR_AMD_PATCHLEVEL) == 0)
754 				opteron_erratum_123++;
755 		}
756 	}
757 #endif
758 
759 #if defined(OPTERON_ERRATUM_131)
760 	/*LINTED*/
761 	if (cpuid_opteron_erratum(cpu, 131) > 0) {
762 		/*
763 		 * Multiprocessor Systems with Four or More Cores May Deadlock
764 		 * Waiting for a Probe Response
765 		 */
766 		/*
767 		 * Erratum 131 applies to any system with four or more cores.
768 		 */
769 		if ((opteron_erratum_131 == 0) && ((lgrp_plat_node_cnt *
770 		    cpuid_get_ncpu_per_chip(cpu)) >= 4)) {
771 			/*
772 			 * Workaround is to print a warning to upgrade
773 			 * the BIOS
774 			 */
775 			if (!(rdmsr(MSR_AMD_NB_CFG) & AMD_NB_CFG_SRQ_HEARTBEAT))
776 				opteron_erratum_131++;
777 		}
778 #endif
779 	}
780 	return (missing);
781 }
782 
783 void
784 workaround_errata_end()
785 {
786 #if defined(OPTERON_ERRATUM_109)
787 	if (opteron_erratum_109) {
788 		cmn_err(CE_WARN,
789 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
790 		    " processor\nerratum 109 was not detected; updating your"
791 		    " system's BIOS to a version\ncontaining this"
792 		    " microcode patch is HIGHLY recommended or erroneous"
793 		    " system\noperation may occur.\n");
794 	}
795 #endif	/* OPTERON_ERRATUM_109 */
796 #if defined(OPTERON_ERRATUM_123)
797 	if (opteron_erratum_123) {
798 		cmn_err(CE_WARN,
799 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
800 		    " processor\nerratum 123 was not detected; updating your"
801 		    " system's BIOS to a version\ncontaining this"
802 		    " microcode patch is HIGHLY recommended or erroneous"
803 		    " system\noperation may occur.\n");
804 	}
805 #endif	/* OPTERON_ERRATUM_123 */
806 #if defined(OPTERON_ERRATUM_131)
807 	if (opteron_erratum_131) {
808 		cmn_err(CE_WARN,
809 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
810 		    " processor\nerratum 131 was not detected; updating your"
811 		    " system's BIOS to a version\ncontaining this"
812 		    " microcode patch is HIGHLY recommended or erroneous"
813 		    " system\noperation may occur.\n");
814 	}
815 #endif	/* OPTERON_ERRATUM_131 */
816 }
817 
818 static ushort_t *mp_map_warm_reset_vector();
819 static void mp_unmap_warm_reset_vector(ushort_t *warm_reset_vector);
820 
821 /*ARGSUSED*/
822 void
823 start_other_cpus(int cprboot)
824 {
825 	unsigned who;
826 	int cpuid = getbootcpuid();
827 	int delays = 0;
828 	int started_cpu;
829 	ushort_t *warm_reset_vector = NULL;
830 	extern int procset;
831 
832 	/*
833 	 * Initialize our own cpu_info.
834 	 */
835 	init_cpu_info(CPU);
836 
837 	/*
838 	 * Initialize our syscall handlers
839 	 */
840 	init_cpu_syscall(CPU);
841 
842 	/*
843 	 * if only 1 cpu or not using MP, skip the rest of this
844 	 */
845 	if (!(mp_cpus & ~(1 << cpuid)) || use_mp == 0) {
846 		if (use_mp == 0)
847 			cmn_err(CE_CONT, "?***** Not in MP mode\n");
848 		goto done;
849 	}
850 
851 	/*
852 	 * perform such initialization as is needed
853 	 * to be able to take CPUs on- and off-line.
854 	 */
855 	cpu_pause_init();
856 
857 	xc_init();		/* initialize processor crosscalls */
858 
859 	/*
860 	 * Copy the real mode code at "real_mode_start" to the
861 	 * page at rm_platter_va.
862 	 */
863 	warm_reset_vector = mp_map_warm_reset_vector();
864 	if (warm_reset_vector == NULL)
865 		goto done;
866 
867 	bcopy((caddr_t)real_mode_start,
868 	    (caddr_t)((rm_platter_t *)rm_platter_va)->rm_code,
869 	    (size_t)real_mode_end - (size_t)real_mode_start);
870 
871 	flushes_require_xcalls = 1;
872 
873 	affinity_set(CPU_CURRENT);
874 
875 	for (who = 0; who < NCPU; who++) {
876 		if (who == cpuid)
877 			continue;
878 
879 		if ((mp_cpus & (1 << who)) == 0)
880 			continue;
881 
882 		mp_startup_init(who);
883 		started_cpu = 1;
884 		(*cpu_startf)(who, rm_platter_pa);
885 
886 		while ((procset & (1 << who)) == 0) {
887 
888 			delay(1);
889 			if (++delays > (20 * hz)) {
890 
891 				cmn_err(CE_WARN,
892 				    "cpu%d failed to start", who);
893 
894 				mutex_enter(&cpu_lock);
895 				cpu[who]->cpu_flags = 0;
896 				cpu_vm_data_destroy(cpu[who]);
897 				cpu_del_unit(who);
898 				mutex_exit(&cpu_lock);
899 
900 				started_cpu = 0;
901 				break;
902 			}
903 		}
904 		if (!started_cpu)
905 			continue;
906 		if (tsc_gethrtime_enable)
907 			tsc_sync_master(who);
908 
909 
910 		if (dtrace_cpu_init != NULL) {
911 			/*
912 			 * DTrace CPU initialization expects cpu_lock
913 			 * to be held.
914 			 */
915 			mutex_enter(&cpu_lock);
916 			(*dtrace_cpu_init)(who);
917 			mutex_exit(&cpu_lock);
918 		}
919 	}
920 
921 	affinity_clear();
922 
923 	for (who = 0; who < NCPU; who++) {
924 		if (who == cpuid)
925 			continue;
926 
927 		if (!(procset & (1 << who)))
928 			continue;
929 
930 		while (!(cpu_ready_set & (1 << who)))
931 			delay(1);
932 	}
933 
934 done:
935 	workaround_errata_end();
936 
937 	if (warm_reset_vector != NULL)
938 		mp_unmap_warm_reset_vector(warm_reset_vector);
939 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
940 	    HAT_UNLOAD);
941 }
942 
943 /*
944  * Dummy functions - no i86pc platforms support dynamic cpu allocation.
945  */
946 /*ARGSUSED*/
947 int
948 mp_cpu_configure(int cpuid)
949 {
950 	return (ENOTSUP);		/* not supported */
951 }
952 
953 /*ARGSUSED*/
954 int
955 mp_cpu_unconfigure(int cpuid)
956 {
957 	return (ENOTSUP);		/* not supported */
958 }
959 
960 /*
961  * Startup function for 'other' CPUs (besides boot cpu).
962  * Resumed from cpu_startup.
963  */
964 void
965 mp_startup(void)
966 {
967 	struct cpu *cp = CPU;
968 	extern int procset;
969 	uint_t new_x86_feature;
970 
971 	new_x86_feature = cpuid_pass1(cp);
972 
973 	/*
974 	 * We need to Sync MTRR with cpu0's MTRR. We have to do
975 	 * this with interrupts disabled.
976 	 */
977 	if (x86_feature & X86_MTRR)
978 		mtrr_sync();
979 	/*
980 	 * Enable machine check architecture
981 	 */
982 	if (x86_feature & X86_MCA)
983 		setup_mca();
984 
985 	/*
986 	 * Initialize this CPU's syscall handlers
987 	 */
988 	init_cpu_syscall(cp);
989 
990 	/*
991 	 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the
992 	 * highest level at which a routine is permitted to block on
993 	 * an adaptive mutex (allows for cpu poke interrupt in case
994 	 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks
995 	 * device interrupts that may end up in the hat layer issuing cross
996 	 * calls before CPU_READY is set.
997 	 */
998 	(void) splx(ipltospl(LOCK_LEVEL));
999 
1000 	/*
1001 	 * Do a sanity check to make sure this new CPU is a sane thing
1002 	 * to add to the collection of processors running this system.
1003 	 *
1004 	 * XXX	Clearly this needs to get more sophisticated, if x86
1005 	 * systems start to get built out of heterogenous CPUs; as is
1006 	 * likely to happen once the number of processors in a configuration
1007 	 * gets large enough.
1008 	 */
1009 	if ((x86_feature & new_x86_feature) != x86_feature) {
1010 		cmn_err(CE_CONT, "?cpu%d: %b\n",
1011 		    cp->cpu_id, new_x86_feature, FMT_X86_FEATURE);
1012 		cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id);
1013 	}
1014 
1015 	/*
1016 	 * We could be more sophisticated here, and just mark the CPU
1017 	 * as "faulted" but at this point we'll opt for the easier
1018 	 * answer of dieing horribly.  Provided the boot cpu is ok,
1019 	 * the system can be recovered by booting with use_mp set to zero.
1020 	 */
1021 	if (workaround_errata(cp) != 0)
1022 		panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
1023 
1024 	cpuid_pass2(cp);
1025 	cpuid_pass3(cp);
1026 	(void) cpuid_pass4(cp);
1027 
1028 	init_cpu_info(cp);
1029 
1030 	add_cpunode2devtree(cp->cpu_id, cp->cpu_m.mcpu_cpi);
1031 
1032 	mutex_enter(&cpu_lock);
1033 	procset |= 1 << cp->cpu_id;
1034 	mutex_exit(&cpu_lock);
1035 
1036 	if (tsc_gethrtime_enable)
1037 		tsc_sync_slave();
1038 
1039 	mutex_enter(&cpu_lock);
1040 	/*
1041 	 * It's unfortunate that chip_cpu_init() has to be called here.
1042 	 * It really belongs in cpu_add_unit(), but unfortunately it is
1043 	 * dependent on the cpuid probing, which must be done in the
1044 	 * context of the current CPU. Care must be taken on x86 to ensure
1045 	 * that mp_startup can safely block even though chip_cpu_init() and
1046 	 * cpu_add_active() have not yet been called.
1047 	 */
1048 	chip_cpu_init(cp);
1049 	chip_cpu_startup(cp);
1050 
1051 	cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS;
1052 	cpu_add_active(cp);
1053 	mutex_exit(&cpu_lock);
1054 
1055 	(void) spl0();				/* enable interrupts */
1056 
1057 	if (boothowto & RB_DEBUG)
1058 		kdi_dvec_cpu_init(cp);
1059 
1060 	/*
1061 	 * Setting the bit in cpu_ready_set must be the last operation in
1062 	 * processor initialization; the boot CPU will continue to boot once
1063 	 * it sees this bit set for all active CPUs.
1064 	 */
1065 	CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
1066 
1067 	/*
1068 	 * Because mp_startup() gets fired off after init() starts, we
1069 	 * can't use the '?' trick to do 'boot -v' printing - so we
1070 	 * always direct the 'cpu .. online' messages to the log.
1071 	 */
1072 	cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
1073 	    cp->cpu_id);
1074 
1075 	/*
1076 	 * Now we are done with the startup thread, so free it up.
1077 	 */
1078 	thread_exit();
1079 	panic("mp_startup: cannot return");
1080 	/*NOTREACHED*/
1081 }
1082 
1083 
1084 /*
1085  * Start CPU on user request.
1086  */
1087 /* ARGSUSED */
1088 int
1089 mp_cpu_start(struct cpu *cp)
1090 {
1091 	ASSERT(MUTEX_HELD(&cpu_lock));
1092 	if (cp->cpu_id == getbootcpuid())
1093 		return (EBUSY); 	/* Cannot start boot CPU */
1094 	return (0);
1095 }
1096 
1097 /*
1098  * Stop CPU on user request.
1099  */
1100 /* ARGSUSED */
1101 int
1102 mp_cpu_stop(struct cpu *cp)
1103 {
1104 	ASSERT(MUTEX_HELD(&cpu_lock));
1105 	if (cp->cpu_id == getbootcpuid())
1106 		return (EBUSY); 	/* Cannot stop boot CPU */
1107 
1108 	return (0);
1109 }
1110 
1111 /*
1112  * Power on CPU.
1113  */
1114 /* ARGSUSED */
1115 int
1116 mp_cpu_poweron(struct cpu *cp)
1117 {
1118 	ASSERT(MUTEX_HELD(&cpu_lock));
1119 	return (ENOTSUP);		/* not supported */
1120 }
1121 
1122 /*
1123  * Power off CPU.
1124  */
1125 /* ARGSUSED */
1126 int
1127 mp_cpu_poweroff(struct cpu *cp)
1128 {
1129 	ASSERT(MUTEX_HELD(&cpu_lock));
1130 	return (ENOTSUP);		/* not supported */
1131 }
1132 
1133 
1134 /*
1135  * Take the specified CPU out of participation in interrupts.
1136  */
1137 int
1138 cpu_disable_intr(struct cpu *cp)
1139 {
1140 	/*
1141 	 * cannot disable interrupts on boot cpu
1142 	 */
1143 	if (cp == cpu[getbootcpuid()])
1144 		return (EBUSY);
1145 
1146 	if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS)
1147 		return (EBUSY);
1148 
1149 	cp->cpu_flags &= ~CPU_ENABLE;
1150 	return (0);
1151 }
1152 
1153 /*
1154  * Allow the specified CPU to participate in interrupts.
1155  */
1156 void
1157 cpu_enable_intr(struct cpu *cp)
1158 {
1159 	ASSERT(MUTEX_HELD(&cpu_lock));
1160 	if (cp == cpu[getbootcpuid()])
1161 		return;
1162 
1163 	cp->cpu_flags |= CPU_ENABLE;
1164 	psm_enable_intr(cp->cpu_id);
1165 }
1166 
1167 
1168 /*
1169  * return the cpu id of the initial startup cpu
1170  */
1171 processorid_t
1172 getbootcpuid(void)
1173 {
1174 	return (0);
1175 }
1176 
1177 static ushort_t *
1178 mp_map_warm_reset_vector()
1179 {
1180 	ushort_t *warm_reset_vector;
1181 
1182 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
1183 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
1184 		return (NULL);
1185 
1186 	/*
1187 	 * setup secondary cpu bios boot up vector
1188 	 */
1189 	*warm_reset_vector = (ushort_t)((caddr_t)
1190 		((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
1191 		+ ((ulong_t)rm_platter_va & 0xf));
1192 	warm_reset_vector++;
1193 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
1194 
1195 	--warm_reset_vector;
1196 	return (warm_reset_vector);
1197 }
1198 
1199 static void
1200 mp_unmap_warm_reset_vector(ushort_t *warm_reset_vector)
1201 {
1202 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
1203 }
1204 
1205 /*ARGSUSED*/
1206 void
1207 mp_cpu_faulted_enter(struct cpu *cp)
1208 {}
1209 
1210 /*ARGSUSED*/
1211 void
1212 mp_cpu_faulted_exit(struct cpu *cp)
1213 {}
1214 
1215 /*
1216  * The following two routines are used as context operators on threads belonging
1217  * to processes with a private LDT (see sysi86).  Due to the rarity of such
1218  * processes, these routines are currently written for best code readability and
1219  * organization rather than speed.  We could avoid checking x86_feature at every
1220  * context switch by installing different context ops, depending on the
1221  * x86_feature flags, at LDT creation time -- one for each combination of fast
1222  * syscall feature flags.
1223  */
1224 
1225 /*ARGSUSED*/
1226 void
1227 cpu_fast_syscall_disable(void *arg)
1228 {
1229 	if (x86_feature & X86_SEP)
1230 		cpu_sep_disable();
1231 	if (x86_feature & X86_ASYSC)
1232 		cpu_asysc_disable();
1233 }
1234 
1235 /*ARGSUSED*/
1236 void
1237 cpu_fast_syscall_enable(void *arg)
1238 {
1239 	if (x86_feature & X86_SEP)
1240 		cpu_sep_enable();
1241 	if (x86_feature & X86_ASYSC)
1242 		cpu_asysc_enable();
1243 }
1244 
1245 static void
1246 cpu_sep_enable(void)
1247 {
1248 	ASSERT(x86_feature & X86_SEP);
1249 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1250 
1251 	wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL);
1252 }
1253 
1254 static void
1255 cpu_sep_disable(void)
1256 {
1257 	ASSERT(x86_feature & X86_SEP);
1258 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1259 
1260 	/*
1261 	 * Setting the SYSENTER_CS_MSR register to 0 causes software executing
1262 	 * the sysenter or sysexit instruction to trigger a #gp fault.
1263 	 */
1264 	wrmsr(MSR_INTC_SEP_CS, 0ULL);
1265 }
1266 
1267 static void
1268 cpu_asysc_enable(void)
1269 {
1270 	ASSERT(x86_feature & X86_ASYSC);
1271 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1272 
1273 	wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) |
1274 	    (uint64_t)(uintptr_t)AMD_EFER_SCE);
1275 }
1276 
1277 static void
1278 cpu_asysc_disable(void)
1279 {
1280 	ASSERT(x86_feature & X86_ASYSC);
1281 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1282 
1283 	/*
1284 	 * Turn off the SCE (syscall enable) bit in the EFER register. Software
1285 	 * executing syscall or sysret with this bit off will incur a #ud trap.
1286 	 */
1287 	wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) &
1288 	    ~((uint64_t)(uintptr_t)AMD_EFER_SCE));
1289 }
1290