xref: /titanic_50/usr/src/uts/i86pc/os/mp_startup.c (revision 965005c81e0f731867d47892b9fb677030b102df)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/thread.h>
31 #include <sys/cpuvar.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/proc.h>
35 #include <sys/disp.h>
36 #include <sys/mmu.h>
37 #include <sys/class.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/asm_linkage.h>
41 #include <sys/x_call.h>
42 #include <sys/systm.h>
43 #include <sys/var.h>
44 #include <sys/vtrace.h>
45 #include <vm/hat.h>
46 #include <sys/mmu.h>
47 #include <vm/as.h>
48 #include <vm/seg_kmem.h>
49 #include <sys/segments.h>
50 #include <sys/kmem.h>
51 #include <sys/stack.h>
52 #include <sys/smp_impldefs.h>
53 #include <sys/x86_archext.h>
54 #include <sys/machsystm.h>
55 #include <sys/traptrace.h>
56 #include <sys/clock.h>
57 #include <sys/cpc_impl.h>
58 #include <sys/chip.h>
59 #include <sys/dtrace.h>
60 #include <sys/archsystm.h>
61 #include <sys/fp.h>
62 #include <sys/reboot.h>
63 #include <sys/kdi.h>
64 #include <vm/hat_i86.h>
65 #include <sys/memnode.h>
66 
67 struct cpu	cpus[1];			/* CPU data */
68 struct cpu	*cpu[NCPU] = {&cpus[0]};	/* pointers to all CPUs */
69 cpu_core_t	cpu_core[NCPU];			/* cpu_core structures */
70 
71 /*
72  * Useful for disabling MP bring-up for an MP capable kernel
73  * (a kernel that was built with MP defined)
74  */
75 int use_mp = 1;
76 
77 int mp_cpus = 0x1;	/* to be set by platform specific module	*/
78 
79 /*
80  * This variable is used by the hat layer to decide whether or not
81  * critical sections are needed to prevent race conditions.  For sun4m,
82  * this variable is set once enough MP initialization has been done in
83  * order to allow cross calls.
84  */
85 int flushes_require_xcalls = 0;
86 ulong_t	cpu_ready_set = 1;
87 
88 extern	void	real_mode_start(void);
89 extern	void	real_mode_end(void);
90 static 	void	mp_startup(void);
91 
92 static void cpu_sep_enable(void);
93 static void cpu_sep_disable(void);
94 static void cpu_asysc_enable(void);
95 static void cpu_asysc_disable(void);
96 
97 extern int tsc_gethrtime_enable;
98 
99 /*
100  * Init CPU info - get CPU type info for processor_info system call.
101  */
102 void
103 init_cpu_info(struct cpu *cp)
104 {
105 	processor_info_t *pi = &cp->cpu_type_info;
106 	char buf[CPU_IDSTRLEN];
107 
108 	/*
109 	 * Get clock-frequency property for the CPU.
110 	 */
111 	pi->pi_clock = cpu_freq;
112 
113 	(void) strcpy(pi->pi_processor_type, "i386");
114 	if (fpu_exists)
115 		(void) strcpy(pi->pi_fputypes, "i387 compatible");
116 
117 	(void) cpuid_getidstr(cp, buf, sizeof (buf));
118 
119 	cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
120 	(void) strcpy(cp->cpu_idstr, buf);
121 
122 	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
123 
124 	(void) cpuid_getbrandstr(cp, buf, sizeof (buf));
125 	cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
126 	(void) strcpy(cp->cpu_brandstr, buf);
127 
128 	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
129 }
130 
131 /*
132  * Configure syscall support on this CPU.
133  */
134 /*ARGSUSED*/
135 static void
136 init_cpu_syscall(struct cpu *cp)
137 {
138 	uint64_t value;
139 
140 	kpreempt_disable();
141 
142 #if defined(__amd64)
143 	if (x86_feature & X86_ASYSC) {
144 
145 #if !defined(__lint)
146 		/*
147 		 * The syscall instruction imposes a certain ordering on
148 		 * segment selectors, so we double-check that ordering
149 		 * here.
150 		 */
151 		ASSERT(KDS_SEL == KCS_SEL + 8);
152 		ASSERT(UDS_SEL == U32CS_SEL + 8);
153 		ASSERT(UCS_SEL == U32CS_SEL + 16);
154 #endif
155 		/*
156 		 * Turn syscall/sysret extensions on.
157 		 */
158 		cpu_asysc_enable();
159 
160 		/*
161 		 * Program the magic registers ..
162 		 */
163 		value = ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32;
164 		wrmsr(MSR_AMD_STAR, &value);
165 		value = (uintptr_t)sys_syscall;
166 		wrmsr(MSR_AMD_LSTAR, &value);
167 		value = (uintptr_t)sys_syscall32;
168 		wrmsr(MSR_AMD_CSTAR, &value);
169 
170 		/*
171 		 * This list of flags is masked off the incoming
172 		 * %rfl when we enter the kernel.
173 		 */
174 		value = PS_IE | PS_T;
175 		wrmsr(MSR_AMD_SFMASK, &value);
176 	}
177 #endif
178 
179 	/*
180 	 * On 32-bit kernels, we use sysenter/sysexit because it's too
181 	 * hard to use syscall/sysret, and it is more portable anyway.
182 	 *
183 	 * On 64-bit kernels on Nocona machines, the 32-bit syscall
184 	 * variant isn't available to 32-bit applications, but sysenter is.
185 	 */
186 	if (x86_feature & X86_SEP) {
187 
188 #if !defined(__lint)
189 		/*
190 		 * The sysenter instruction imposes a certain ordering on
191 		 * segment selectors, so we double-check that ordering
192 		 * here. See "sysenter" in Intel document 245471-012, "IA-32
193 		 * Intel Architecture Software Developer's Manual Volume 2:
194 		 * Instruction Set Reference"
195 		 */
196 		ASSERT(KDS_SEL == KCS_SEL + 8);
197 
198 		ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
199 		ASSERT32(UDS_SEL == UCS_SEL + 8);
200 
201 		ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
202 		ASSERT64(UDS_SEL == U32CS_SEL + 8);
203 #endif
204 
205 		cpu_sep_enable();
206 
207 		/*
208 		 * resume() sets this value to the base of the threads stack
209 		 * via a context handler.
210 		 */
211 		value = 0;
212 		wrmsr(MSR_INTC_SEP_ESP, &value);
213 
214 		value = (uintptr_t)sys_sysenter;
215 		wrmsr(MSR_INTC_SEP_EIP, &value);
216 	}
217 
218 	kpreempt_enable();
219 }
220 
221 /*
222  * Multiprocessor initialization.
223  *
224  * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
225  * startup and idle threads for the specified CPU.
226  */
227 static void
228 mp_startup_init(int cpun)
229 {
230 #if defined(__amd64)
231 extern void *long_mode_64(void);
232 #endif	/* __amd64 */
233 
234 	struct cpu *cp;
235 	struct tss *ntss;
236 	kthread_id_t tp;
237 	caddr_t	sp;
238 	int size;
239 	proc_t *procp;
240 	extern void idle();
241 	extern void init_intr_threads(struct cpu *);
242 
243 	struct cpu_tables *tablesp;
244 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
245 
246 #ifdef TRAPTRACE
247 	trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
248 #endif
249 
250 	ASSERT(cpun < NCPU && cpu[cpun] == NULL);
251 
252 	if ((cp = kmem_zalloc(sizeof (*cp), KM_NOSLEEP)) == NULL) {
253 		panic("mp_startup_init: cpu%d: "
254 		    "no memory for cpu structure", cpun);
255 		/*NOTREACHED*/
256 	}
257 	procp = curthread->t_procp;
258 
259 	mutex_enter(&cpu_lock);
260 	/*
261 	 * Initialize the dispatcher first.
262 	 */
263 	disp_cpu_init(cp);
264 	mutex_exit(&cpu_lock);
265 
266 	cpu_vm_data_init(cp);
267 
268 	/*
269 	 * Allocate and initialize the startup thread for this CPU.
270 	 * Interrupt and process switch stacks get allocated later
271 	 * when the CPU starts running.
272 	 */
273 	tp = thread_create(NULL, 0, NULL, NULL, 0, procp,
274 	    TS_STOPPED, maxclsyspri);
275 
276 	/*
277 	 * Set state to TS_ONPROC since this thread will start running
278 	 * as soon as the CPU comes online.
279 	 *
280 	 * All the other fields of the thread structure are setup by
281 	 * thread_create().
282 	 */
283 	THREAD_ONPROC(tp, cp);
284 	tp->t_preempt = 1;
285 	tp->t_bound_cpu = cp;
286 	tp->t_affinitycnt = 1;
287 	tp->t_cpu = cp;
288 	tp->t_disp_queue = cp->cpu_disp;
289 
290 	/*
291 	 * Setup thread to start in mp_startup.
292 	 */
293 	sp = tp->t_stk;
294 	tp->t_pc = (uintptr_t)mp_startup;
295 	tp->t_sp = (uintptr_t)(sp - MINFRAME);
296 
297 	cp->cpu_id = cpun;
298 	cp->cpu_self = cp;
299 	cp->cpu_mask = 1 << cpun;
300 	cp->cpu_thread = tp;
301 	cp->cpu_lwp = NULL;
302 	cp->cpu_dispthread = tp;
303 	cp->cpu_dispatch_pri = DISP_PRIO(tp);
304 
305 	/*
306 	 * Now, initialize per-CPU idle thread for this CPU.
307 	 */
308 	tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1);
309 
310 	cp->cpu_idle_thread = tp;
311 
312 	tp->t_preempt = 1;
313 	tp->t_bound_cpu = cp;
314 	tp->t_affinitycnt = 1;
315 	tp->t_cpu = cp;
316 	tp->t_disp_queue = cp->cpu_disp;
317 
318 	/*
319 	 * Bootstrap the CPU for CMT aware scheduling
320 	 * The rest of the initialization will happen from
321 	 * mp_startup()
322 	 */
323 	chip_bootstrap_cpu(cp);
324 
325 	/*
326 	 * Perform CPC intialization on the new CPU.
327 	 */
328 	kcpc_hw_init(cp);
329 
330 	/*
331 	 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2
332 	 * for each CPU.
333 	 */
334 
335 	setup_vaddr_for_ppcopy(cp);
336 
337 	/*
338 	 * Allocate space for page directory, stack, tss, gdt and idt.
339 	 * This assumes that kmem_alloc will return memory which is aligned
340 	 * to the next higher power of 2 or a page(if size > MAXABIG)
341 	 * If this assumption goes wrong at any time due to change in
342 	 * kmem alloc, things may not work as the page directory has to be
343 	 * page aligned
344 	 */
345 	if ((tablesp = kmem_zalloc(sizeof (*tablesp), KM_NOSLEEP)) == NULL)
346 		panic("mp_startup_init: cpu%d cannot allocate tables", cpun);
347 
348 	if ((uintptr_t)tablesp & ~MMU_STD_PAGEMASK) {
349 		kmem_free(tablesp, sizeof (struct cpu_tables));
350 		size = sizeof (struct cpu_tables) + MMU_STD_PAGESIZE;
351 		tablesp = kmem_zalloc(size, KM_NOSLEEP);
352 		tablesp = (struct cpu_tables *)
353 		    (((uintptr_t)tablesp + MMU_STD_PAGESIZE) &
354 		    MMU_STD_PAGEMASK);
355 	}
356 
357 	ntss = cp->cpu_tss = &tablesp->ct_tss;
358 	cp->cpu_gdt = tablesp->ct_gdt;
359 	bcopy(CPU->cpu_gdt, cp->cpu_gdt, NGDT * (sizeof (user_desc_t)));
360 
361 #if defined(__amd64)
362 
363 	/*
364 	 * #DF (double fault).
365 	 */
366 	ntss->tss_ist1 =
367 	    (uint64_t)&tablesp->ct_stack[sizeof (tablesp->ct_stack)];
368 
369 #elif defined(__i386)
370 
371 	ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
372 	    (uint32_t)&tablesp->ct_stack[sizeof (tablesp->ct_stack)];
373 
374 	ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
375 
376 	ntss->tss_eip = (uint32_t)mp_startup;
377 
378 	ntss->tss_cs = KCS_SEL;
379 	ntss->tss_fs = KFS_SEL;
380 	ntss->tss_gs = KGS_SEL;
381 
382 	/*
383 	 * setup kernel %gs.
384 	 */
385 	set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
386 	    SEL_KPL, 0, 1);
387 
388 #endif	/* __i386 */
389 
390 	/*
391 	 * Set I/O bit map offset equal to size of TSS segment limit
392 	 * for no I/O permission map. This will cause all user I/O
393 	 * instructions to generate #gp fault.
394 	 */
395 	ntss->tss_bitmapbase = sizeof (*ntss);
396 
397 	/*
398 	 * setup kernel tss.
399 	 */
400 	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
401 	    sizeof (*cp->cpu_tss) -1, SDT_SYSTSS, SEL_KPL);
402 
403 	/*
404 	 * If we have more than one node, each cpu gets a copy of IDT
405 	 * local to its node. If this is a Pentium box, we use cpu 0's
406 	 * IDT. cpu 0's IDT has been made read-only to workaround the
407 	 * cmpxchgl register bug
408 	 */
409 	cp->cpu_idt = CPU->cpu_idt;
410 	if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
411 		cp->cpu_idt = kmem_alloc(sizeof (idt0), KM_SLEEP);
412 		bcopy(idt0, cp->cpu_idt, sizeof (idt0));
413 	}
414 
415 	/*
416 	 * Get interrupt priority data from cpu 0
417 	 */
418 	cp->cpu_pri_data = CPU->cpu_pri_data;
419 
420 	hat_cpu_online(cp);
421 
422 	/* Should remove all entries for the current process/thread here */
423 
424 	/*
425 	 * Fill up the real mode platter to make it easy for real mode code to
426 	 * kick it off. This area should really be one passed by boot to kernel
427 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
428 	 * have identical physical and virtual address in paged mode.
429 	 */
430 	real_mode_platter->rm_idt_base = cp->cpu_idt;
431 	real_mode_platter->rm_idt_lim = sizeof (idt0) - 1;
432 	real_mode_platter->rm_gdt_base = cp->cpu_gdt;
433 	real_mode_platter->rm_gdt_lim = sizeof (gdt0) -1;
434 	real_mode_platter->rm_pdbr = getcr3();
435 	real_mode_platter->rm_cpu = cpun;
436 	real_mode_platter->rm_x86feature = x86_feature;
437 	real_mode_platter->rm_cr4 = cr4_value;
438 
439 #if defined(__amd64)
440 	if (getcr3() > 0xffffffffUL)
441 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
442 			"located above 4G in physical memory (@ 0x%llx).",
443 			(unsigned long long)getcr3());
444 
445 	/*
446 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
447 	 * by code in real_mode_start():
448 	 *
449 	 * GDT[0]:  NULL selector
450 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
451 	 *
452 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
453 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
454 	 * a course of action as any other, though it may cause the entire
455 	 * platform to reset in some cases...
456 	 */
457 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
458 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
459 
460 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
461 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
462 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
463 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
464 
465 	real_mode_platter->rm_temp_idt_lim = 0;
466 	real_mode_platter->rm_temp_idt_base = 0;
467 
468 	/*
469 	 * Since the CPU needs to jump to protected mode using an identity
470 	 * mapped address, we need to calculate it here.
471 	 */
472 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
473 	    ((uint32_t)long_mode_64 - (uint32_t)real_mode_start);
474 #endif	/* __amd64 */
475 
476 #ifdef TRAPTRACE
477 	/*
478 	 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers for this
479 	 * CPU.
480 	 */
481 	ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP);
482 	ttc->ttc_next = ttc->ttc_first;
483 	ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
484 #endif
485 
486 	/*
487 	 * Record that we have another CPU.
488 	 */
489 	mutex_enter(&cpu_lock);
490 	/*
491 	 * Initialize the interrupt threads for this CPU
492 	 */
493 	init_intr_threads(cp);
494 	/*
495 	 * Add CPU to list of available CPUs.  It'll be on the active list
496 	 * after mp_startup().
497 	 */
498 	cpu_add_unit(cp);
499 	mutex_exit(&cpu_lock);
500 }
501 
502 /*
503  * Apply workarounds for known errata, and warn about those that are absent.
504  *
505  * System vendors occasionally create configurations which contain different
506  * revisions of the CPUs that are almost but not exactly the same.  At the
507  * time of writing, this meant that their clock rates were the same, their
508  * feature sets were the same, but the required workaround were -not-
509  * necessarily the same.  So, this routine is invoked on -every- CPU soon
510  * after starting to make sure that the resulting system contains the most
511  * pessimal set of workarounds needed to cope with *any* of the CPUs in the
512  * system.
513  *
514  * These workarounds are based on Rev 3.57 of the Revision Guide for
515  * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005.
516  */
517 
518 #if defined(OPTERON_ERRATUM_91)
519 int opteron_erratum_91;		/* if non-zero -> at least one cpu has it */
520 #endif
521 
522 #if defined(OPTERON_ERRATUM_93)
523 int opteron_erratum_93;		/* if non-zero -> at least one cpu has it */
524 #endif
525 
526 #if defined(OPTERON_ERRATUM_100)
527 int opteron_erratum_100;	/* if non-zero -> at least one cpu has it */
528 #endif
529 
530 #if defined(OPTERON_ERRATUM_109)
531 int opteron_erratum_109;	/* if non-zero -> at least one cpu has it */
532 #endif
533 
534 #if defined(OPTERON_ERRATUM_121)
535 int opteron_erratum_121;	/* if non-zero -> at least one cpu has it */
536 #endif
537 
538 #if defined(OPTERON_ERRATUM_122)
539 int opteron_erratum_122;	/* if non-zero -> at least one cpu has it */
540 #endif
541 
542 #if defined(OPTERON_ERRATUM_123)
543 int opteron_erratum_123;	/* if non-zero -> at least one cpu has it */
544 #endif
545 
546 #if defined(OPTERON_ERRATUM_131)
547 int opteron_erratum_131;	/* if non-zero -> at least one cpu has it */
548 #endif
549 
550 #define	WARNING(cpu, n)						\
551 	cmn_err(CE_WARN, "cpu%d: no workaround for erratum %d",	\
552 	    (cpu)->cpu_id, (n))
553 
554 uint_t
555 workaround_errata(struct cpu *cpu)
556 {
557 	uint_t missing = 0;
558 
559 	ASSERT(cpu == CPU);
560 
561 	/*LINTED*/
562 	if (cpuid_opteron_erratum(cpu, 88) > 0) {
563 		/*
564 		 * SWAPGS May Fail To Read Correct GS Base
565 		 */
566 #if defined(OPTERON_ERRATUM_88)
567 		/*
568 		 * The workaround is an mfence in the relevant assembler code
569 		 */
570 #else
571 		WARNING(cpu, 88);
572 		missing++;
573 #endif
574 	}
575 
576 	if (cpuid_opteron_erratum(cpu, 91) > 0) {
577 		/*
578 		 * Software Prefetches May Report A Page Fault
579 		 */
580 #if defined(OPTERON_ERRATUM_91)
581 		/*
582 		 * fix is in trap.c
583 		 */
584 		opteron_erratum_91++;
585 #else
586 		WARNING(cpu, 91);
587 		missing++;
588 #endif
589 	}
590 
591 	if (cpuid_opteron_erratum(cpu, 93) > 0) {
592 		/*
593 		 * RSM Auto-Halt Restart Returns to Incorrect RIP
594 		 */
595 #if defined(OPTERON_ERRATUM_93)
596 		/*
597 		 * fix is in trap.c
598 		 */
599 		opteron_erratum_93++;
600 #else
601 		WARNING(cpu, 93);
602 		missing++;
603 #endif
604 	}
605 
606 	/*LINTED*/
607 	if (cpuid_opteron_erratum(cpu, 95) > 0) {
608 		/*
609 		 * RET Instruction May Return to Incorrect EIP
610 		 */
611 #if defined(OPTERON_ERRATUM_95)
612 #if defined(_LP64)
613 		/*
614 		 * Workaround this by ensuring that 32-bit user code and
615 		 * 64-bit kernel code never occupy the same address
616 		 * range mod 4G.
617 		 */
618 		if (_userlimit32 > 0xc0000000ul)
619 			*(uintptr_t *)&_userlimit32 = 0xc0000000ul;
620 
621 		/*LINTED*/
622 		ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u);
623 #endif	/* _LP64 */
624 #else
625 		WARNING(cpu, 95);
626 		missing++;
627 #endif	/* OPTERON_ERRATUM_95 */
628 	}
629 
630 	if (cpuid_opteron_erratum(cpu, 100) > 0) {
631 		/*
632 		 * Compatibility Mode Branches Transfer to Illegal Address
633 		 */
634 #if defined(OPTERON_ERRATUM_100)
635 		/*
636 		 * fix is in trap.c
637 		 */
638 		opteron_erratum_100++;
639 #else
640 		WARNING(cpu, 100);
641 		missing++;
642 #endif
643 	}
644 
645 	/*LINTED*/
646 	if (cpuid_opteron_erratum(cpu, 108) > 0) {
647 		/*
648 		 * CPUID Instruction May Return Incorrect Model Number In
649 		 * Some Processors
650 		 */
651 #if defined(OPTERON_ERRATUM_108)
652 		/*
653 		 * (Our cpuid-handling code corrects the model number on
654 		 * those processors)
655 		 */
656 #else
657 		WARNING(cpu, 108);
658 		missing++;
659 #endif
660 	}
661 
662 	/*LINTED*/
663 	if (cpuid_opteron_erratum(cpu, 109) > 0) {
664 		/*
665 		 * Certain Reverse REP MOVS May Produce Unpredictable Behaviour
666 		 */
667 #if defined(OPTERON_ERRATUM_109)
668 		uint64_t	patchlevel;
669 
670 		(void) rdmsr(MSR_AMD_PATCHLEVEL, &patchlevel);
671 		/* workaround is to print a warning to upgrade BIOS */
672 		if (patchlevel == 0)
673 			opteron_erratum_109++;
674 #else
675 		WARNING(cpu, 109);
676 		missing++;
677 #endif
678 	}
679 	/*LINTED*/
680 	if (cpuid_opteron_erratum(cpu, 121) > 0) {
681 		/*
682 		 * Sequential Execution Across Non_Canonical Boundary Caused
683 		 * Processor Hang
684 		 */
685 #if defined(OPTERON_ERRATUM_121)
686 		static int	lma;
687 
688 		if (opteron_erratum_121)
689 			opteron_erratum_121++;
690 
691 		/*
692 		 * Erratum 121 is only present in long (64 bit) mode.
693 		 * Workaround is to include the page immediately before the
694 		 * va hole to eliminate the possibility of system hangs due to
695 		 * sequential execution across the va hole boundary.
696 		 */
697 		if (lma == 0) {
698 			uint64_t	efer;
699 
700 			/*
701 			 * check LMA once: assume all cpus are in long mode
702 			 * or not.
703 			 */
704 			lma = 1;
705 
706 			(void) rdmsr(MSR_AMD_EFER, &efer);
707 			if (efer & AMD_EFER_LMA) {
708 				if (hole_start) {
709 					hole_start -= PAGESIZE;
710 				} else {
711 					/*
712 					 * hole_start not yet initialized by
713 					 * mmu_init. Initialize hole_start
714 					 * with value to be subtracted.
715 					 */
716 					hole_start = PAGESIZE;
717 				}
718 				opteron_erratum_121++;
719 			}
720 		}
721 #else
722 		WARNING(cpu, 121);
723 		missing++;
724 #endif
725 	}
726 
727 	/*LINTED*/
728 	if (cpuid_opteron_erratum(cpu, 122) > 0) {
729 		/*
730 		 * TLB Flush Filter May Cause Cohenrency Problem in
731 		 * Multiprocessor Systems
732 		 */
733 #if defined(OPTERON_ERRATUM_122)
734 		/*
735 		 * Erratum 122 is only present in MP configurations (multi-core
736 		 * or multi-processor).
737 		 */
738 
739 		if (opteron_erratum_122 || lgrp_plat_node_cnt > 1 ||
740 		    cpuid_get_ncpu_per_chip(cpu) > 1) {
741 			uint64_t	hwcrval;
742 
743 			/* disable TLB Flush Filter */
744 			(void) rdmsr(MSR_AMD_HWCR, &hwcrval);
745 			hwcrval |= AMD_HWCR_FFDIS;
746 			wrmsr(MSR_AMD_HWCR, &hwcrval);
747 			opteron_erratum_122++;
748 		}
749 
750 #else
751 		WARNING(cpu, 122);
752 		missing++;
753 #endif
754 	}
755 
756 #if defined(OPTERON_ERRATUM_123)
757 	/*LINTED*/
758 	if (cpuid_opteron_erratum(cpu, 123) > 0) {
759 		/*
760 		 * Bypassed Reads May Cause Data Corruption of System Hang in
761 		 * Dual Core Processors
762 		 */
763 		/*
764 		 * Erratum 123 applies only to multi-core cpus.
765 		 */
766 
767 		if (cpuid_get_ncpu_per_chip(cpu) > 1) {
768 			uint64_t	patchlevel;
769 
770 			(void) rdmsr(MSR_AMD_PATCHLEVEL, &patchlevel);
771 			/* workaround is to print a warning to upgrade BIOS */
772 			if (patchlevel == 0)
773 				opteron_erratum_123++;
774 		}
775 	}
776 #endif
777 
778 #if defined(OPTERON_ERRATUM_131)
779 	/*LINTED*/
780 	if (cpuid_opteron_erratum(cpu, 131) > 0) {
781 		/*
782 		 * Multiprocessor Systems with Four or More Cores May Deadlock
783 		 * Waiting for a Probe Response
784 		 */
785 		/*
786 		 * Erratum 131 applies to any system with four or more cores.
787 		 */
788 		if ((opteron_erratum_131 == 0) && ((lgrp_plat_node_cnt *
789 		    cpuid_get_ncpu_per_chip(cpu)) >= 4)) {
790 			uint64_t nbcfg;
791 
792 			/*
793 			 * Workaround is to print a warning to upgrade
794 			 * the BIOS
795 			 */
796 			(void) rdmsr(MSR_AMD_NB_CFG, &nbcfg);
797 			if (!(nbcfg & AMD_NB_CFG_SRQ_HEARTBEAT))
798 				opteron_erratum_131++;
799 		}
800 #endif
801 	}
802 	return (missing);
803 }
804 
805 void
806 workaround_errata_end()
807 {
808 #if defined(OPTERON_ERRATUM_109)
809 	if (opteron_erratum_109) {
810 		cmn_err(CE_WARN,
811 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
812 		    " processor\nerratum 109 was not detected; updating your"
813 		    " system's BIOS to a version\ncontaining this"
814 		    " microcode patch is HIGHLY recommended or erroneous"
815 		    " system\noperation may occur.\n");
816 	}
817 #endif	/* OPTERON_ERRATUM_109 */
818 #if defined(OPTERON_ERRATUM_123)
819 	if (opteron_erratum_123) {
820 		cmn_err(CE_WARN,
821 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
822 		    " processor\nerratum 123 was not detected; updating your"
823 		    " system's BIOS to a version\ncontaining this"
824 		    " microcode patch is HIGHLY recommended or erroneous"
825 		    " system\noperation may occur.\n");
826 	}
827 #endif	/* OPTERON_ERRATUM_123 */
828 #if defined(OPTERON_ERRATUM_131)
829 	if (opteron_erratum_131) {
830 		cmn_err(CE_WARN,
831 		    "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)"
832 		    " processor\nerratum 131 was not detected; updating your"
833 		    " system's BIOS to a version\ncontaining this"
834 		    " microcode patch is HIGHLY recommended or erroneous"
835 		    " system\noperation may occur.\n");
836 	}
837 #endif	/* OPTERON_ERRATUM_131 */
838 }
839 
840 static ushort_t *mp_map_warm_reset_vector();
841 static void mp_unmap_warm_reset_vector(ushort_t *warm_reset_vector);
842 
843 /*ARGSUSED*/
844 void
845 start_other_cpus(int cprboot)
846 {
847 	unsigned who;
848 	int cpuid = getbootcpuid();
849 	int delays = 0;
850 	int started_cpu;
851 	ushort_t *warm_reset_vector = NULL;
852 	extern int procset;
853 
854 	/*
855 	 * Initialize our own cpu_info.
856 	 */
857 	init_cpu_info(CPU);
858 
859 	/*
860 	 * Initialize our syscall handlers
861 	 */
862 	init_cpu_syscall(CPU);
863 
864 	/*
865 	 * if only 1 cpu or not using MP, skip the rest of this
866 	 */
867 	if (!(mp_cpus & ~(1 << cpuid)) || use_mp == 0) {
868 		if (use_mp == 0)
869 			cmn_err(CE_CONT, "?***** Not in MP mode\n");
870 		goto done;
871 	}
872 
873 	/*
874 	 * perform such initialization as is needed
875 	 * to be able to take CPUs on- and off-line.
876 	 */
877 	cpu_pause_init();
878 
879 	xc_init();		/* initialize processor crosscalls */
880 
881 	/*
882 	 * Copy the real mode code at "real_mode_start" to the
883 	 * page at rm_platter_va.
884 	 */
885 	warm_reset_vector = mp_map_warm_reset_vector();
886 	if (warm_reset_vector == NULL)
887 		goto done;
888 
889 	bcopy((caddr_t)real_mode_start,
890 	    (caddr_t)((rm_platter_t *)rm_platter_va)->rm_code,
891 	    (size_t)real_mode_end - (size_t)real_mode_start);
892 
893 	flushes_require_xcalls = 1;
894 
895 	affinity_set(CPU_CURRENT);
896 
897 	for (who = 0; who < NCPU; who++) {
898 		if (who == cpuid)
899 			continue;
900 
901 		if ((mp_cpus & (1 << who)) == 0)
902 			continue;
903 
904 		mp_startup_init(who);
905 		started_cpu = 1;
906 		(*cpu_startf)(who, rm_platter_pa);
907 
908 		while ((procset & (1 << who)) == 0) {
909 
910 			delay(1);
911 			if (++delays > (20 * hz)) {
912 
913 				cmn_err(CE_WARN,
914 				    "cpu%d failed to start", who);
915 
916 				mutex_enter(&cpu_lock);
917 				cpu[who]->cpu_flags = 0;
918 				cpu_vm_data_destroy(cpu[who]);
919 				cpu_del_unit(who);
920 				mutex_exit(&cpu_lock);
921 
922 				started_cpu = 0;
923 				break;
924 			}
925 		}
926 		if (!started_cpu)
927 			continue;
928 		if (tsc_gethrtime_enable)
929 			tsc_sync_master(who);
930 
931 
932 		if (dtrace_cpu_init != NULL) {
933 			/*
934 			 * DTrace CPU initialization expects cpu_lock
935 			 * to be held.
936 			 */
937 			mutex_enter(&cpu_lock);
938 			(*dtrace_cpu_init)(who);
939 			mutex_exit(&cpu_lock);
940 		}
941 	}
942 
943 	affinity_clear();
944 
945 	for (who = 0; who < NCPU; who++) {
946 		if (who == cpuid)
947 			continue;
948 
949 		if (!(procset & (1 << who)))
950 			continue;
951 
952 		while (!(cpu_ready_set & (1 << who)))
953 			delay(1);
954 	}
955 
956 done:
957 	workaround_errata_end();
958 
959 	if (warm_reset_vector != NULL)
960 		mp_unmap_warm_reset_vector(warm_reset_vector);
961 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
962 	    HAT_UNLOAD);
963 }
964 
965 /*
966  * Dummy functions - no i86pc platforms support dynamic cpu allocation.
967  */
968 /*ARGSUSED*/
969 int
970 mp_cpu_configure(int cpuid)
971 {
972 	return (ENOTSUP);		/* not supported */
973 }
974 
975 /*ARGSUSED*/
976 int
977 mp_cpu_unconfigure(int cpuid)
978 {
979 	return (ENOTSUP);		/* not supported */
980 }
981 
982 /*
983  * Startup function for 'other' CPUs (besides boot cpu).
984  * Resumed from cpu_startup.
985  */
986 void
987 mp_startup(void)
988 {
989 	struct cpu *cp = CPU;
990 	extern int procset;
991 	uint_t new_x86_feature;
992 
993 	new_x86_feature = cpuid_pass1(cp);
994 
995 	/*
996 	 * We need to Sync MTRR with cpu0's MTRR. We have to do
997 	 * this with interrupts disabled.
998 	 */
999 	if (x86_feature & X86_MTRR)
1000 		mtrr_sync();
1001 	/*
1002 	 * Enable machine check architecture
1003 	 */
1004 	if (x86_feature & X86_MCA)
1005 		setup_mca();
1006 
1007 	/*
1008 	 * Initialize this CPU's syscall handlers
1009 	 */
1010 	init_cpu_syscall(cp);
1011 
1012 	/*
1013 	 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the
1014 	 * highest level at which a routine is permitted to block on
1015 	 * an adaptive mutex (allows for cpu poke interrupt in case
1016 	 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks
1017 	 * device interrupts that may end up in the hat layer issuing cross
1018 	 * calls before CPU_READY is set.
1019 	 */
1020 	(void) splx(ipltospl(LOCK_LEVEL));
1021 
1022 	/*
1023 	 * Do a sanity check to make sure this new CPU is a sane thing
1024 	 * to add to the collection of processors running this system.
1025 	 *
1026 	 * XXX	Clearly this needs to get more sophisticated, if x86
1027 	 * systems start to get built out of heterogenous CPUs; as is
1028 	 * likely to happen once the number of processors in a configuration
1029 	 * gets large enough.
1030 	 */
1031 	if ((x86_feature & new_x86_feature) != x86_feature) {
1032 		cmn_err(CE_CONT, "?cpu%d: %b\n",
1033 		    cp->cpu_id, new_x86_feature, FMT_X86_FEATURE);
1034 		cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id);
1035 	}
1036 
1037 	/*
1038 	 * We could be more sophisticated here, and just mark the CPU
1039 	 * as "faulted" but at this point we'll opt for the easier
1040 	 * answer of dieing horribly.  Provided the boot cpu is ok,
1041 	 * the system can be recovered by booting with use_mp set to zero.
1042 	 */
1043 	if (workaround_errata(cp) != 0)
1044 		panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
1045 
1046 	cpuid_pass2(cp);
1047 	cpuid_pass3(cp);
1048 	(void) cpuid_pass4(cp);
1049 
1050 	init_cpu_info(cp);
1051 
1052 	add_cpunode2devtree(cp->cpu_id, cp->cpu_m.mcpu_cpi);
1053 
1054 	mutex_enter(&cpu_lock);
1055 	procset |= 1 << cp->cpu_id;
1056 	mutex_exit(&cpu_lock);
1057 
1058 	if (tsc_gethrtime_enable)
1059 		tsc_sync_slave();
1060 
1061 	mutex_enter(&cpu_lock);
1062 	/*
1063 	 * It's unfortunate that chip_cpu_init() has to be called here.
1064 	 * It really belongs in cpu_add_unit(), but unfortunately it is
1065 	 * dependent on the cpuid probing, which must be done in the
1066 	 * context of the current CPU. Care must be taken on x86 to ensure
1067 	 * that mp_startup can safely block even though chip_cpu_init() and
1068 	 * cpu_add_active() have not yet been called.
1069 	 */
1070 	chip_cpu_init(cp);
1071 	chip_cpu_startup(cp);
1072 
1073 	cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS;
1074 	cpu_add_active(cp);
1075 	mutex_exit(&cpu_lock);
1076 
1077 	(void) spl0();				/* enable interrupts */
1078 
1079 	if (boothowto & RB_DEBUG)
1080 		kdi_dvec_cpu_init(cp);
1081 
1082 	/*
1083 	 * Setting the bit in cpu_ready_set must be the last operation in
1084 	 * processor initialization; the boot CPU will continue to boot once
1085 	 * it sees this bit set for all active CPUs.
1086 	 */
1087 	CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
1088 
1089 	/*
1090 	 * Because mp_startup() gets fired off after init() starts, we
1091 	 * can't use the '?' trick to do 'boot -v' printing - so we
1092 	 * always direct the 'cpu .. online' messages to the log.
1093 	 */
1094 	cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
1095 	    cp->cpu_id);
1096 
1097 	/*
1098 	 * Now we are done with the startup thread, so free it up.
1099 	 */
1100 	thread_exit();
1101 	panic("mp_startup: cannot return");
1102 	/*NOTREACHED*/
1103 }
1104 
1105 
1106 /*
1107  * Start CPU on user request.
1108  */
1109 /* ARGSUSED */
1110 int
1111 mp_cpu_start(struct cpu *cp)
1112 {
1113 	ASSERT(MUTEX_HELD(&cpu_lock));
1114 	if (cp->cpu_id == getbootcpuid())
1115 		return (EBUSY); 	/* Cannot start boot CPU */
1116 	return (0);
1117 }
1118 
1119 /*
1120  * Stop CPU on user request.
1121  */
1122 /* ARGSUSED */
1123 int
1124 mp_cpu_stop(struct cpu *cp)
1125 {
1126 	ASSERT(MUTEX_HELD(&cpu_lock));
1127 	if (cp->cpu_id == getbootcpuid())
1128 		return (EBUSY); 	/* Cannot stop boot CPU */
1129 
1130 	return (0);
1131 }
1132 
1133 /*
1134  * Power on CPU.
1135  */
1136 /* ARGSUSED */
1137 int
1138 mp_cpu_poweron(struct cpu *cp)
1139 {
1140 	ASSERT(MUTEX_HELD(&cpu_lock));
1141 	return (ENOTSUP);		/* not supported */
1142 }
1143 
1144 /*
1145  * Power off CPU.
1146  */
1147 /* ARGSUSED */
1148 int
1149 mp_cpu_poweroff(struct cpu *cp)
1150 {
1151 	ASSERT(MUTEX_HELD(&cpu_lock));
1152 	return (ENOTSUP);		/* not supported */
1153 }
1154 
1155 
1156 /*
1157  * Take the specified CPU out of participation in interrupts.
1158  */
1159 int
1160 cpu_disable_intr(struct cpu *cp)
1161 {
1162 	/*
1163 	 * cannot disable interrupts on boot cpu
1164 	 */
1165 	if (cp == cpu[getbootcpuid()])
1166 		return (EBUSY);
1167 
1168 	if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS)
1169 		return (EBUSY);
1170 
1171 	cp->cpu_flags &= ~CPU_ENABLE;
1172 	return (0);
1173 }
1174 
1175 /*
1176  * Allow the specified CPU to participate in interrupts.
1177  */
1178 void
1179 cpu_enable_intr(struct cpu *cp)
1180 {
1181 	ASSERT(MUTEX_HELD(&cpu_lock));
1182 	if (cp == cpu[getbootcpuid()])
1183 		return;
1184 
1185 	cp->cpu_flags |= CPU_ENABLE;
1186 	psm_enable_intr(cp->cpu_id);
1187 }
1188 
1189 
1190 /*
1191  * return the cpu id of the initial startup cpu
1192  */
1193 processorid_t
1194 getbootcpuid(void)
1195 {
1196 	return (0);
1197 }
1198 
1199 static ushort_t *
1200 mp_map_warm_reset_vector()
1201 {
1202 	ushort_t *warm_reset_vector;
1203 
1204 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
1205 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
1206 		return (NULL);
1207 
1208 	/*
1209 	 * setup secondary cpu bios boot up vector
1210 	 */
1211 	*warm_reset_vector = (ushort_t)((caddr_t)
1212 		((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
1213 		+ ((ulong_t)rm_platter_va & 0xf));
1214 	warm_reset_vector++;
1215 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
1216 
1217 	--warm_reset_vector;
1218 	return (warm_reset_vector);
1219 }
1220 
1221 static void
1222 mp_unmap_warm_reset_vector(ushort_t *warm_reset_vector)
1223 {
1224 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
1225 }
1226 
1227 /*ARGSUSED*/
1228 void
1229 mp_cpu_faulted_enter(struct cpu *cp)
1230 {}
1231 
1232 /*ARGSUSED*/
1233 void
1234 mp_cpu_faulted_exit(struct cpu *cp)
1235 {}
1236 
1237 /*
1238  * The following two routines are used as context operators on threads belonging
1239  * to processes with a private LDT (see sysi86).  Due to the rarity of such
1240  * processes, these routines are currently written for best code readability and
1241  * organization rather than speed.  We could avoid checking x86_feature at every
1242  * context switch by installing different context ops, depending on the
1243  * x86_feature flags, at LDT creation time -- one for each combination of fast
1244  * syscall feature flags.
1245  */
1246 
1247 /*ARGSUSED*/
1248 void
1249 cpu_fast_syscall_disable(void *arg)
1250 {
1251 	if (x86_feature & X86_SEP)
1252 		cpu_sep_disable();
1253 	if (x86_feature & X86_ASYSC)
1254 		cpu_asysc_disable();
1255 }
1256 
1257 /*ARGSUSED*/
1258 void
1259 cpu_fast_syscall_enable(void *arg)
1260 {
1261 	if (x86_feature & X86_SEP)
1262 		cpu_sep_enable();
1263 	if (x86_feature & X86_ASYSC)
1264 		cpu_asysc_enable();
1265 }
1266 
1267 static void
1268 cpu_sep_enable(void)
1269 {
1270 	uint64_t value;
1271 
1272 	ASSERT(x86_feature & X86_SEP);
1273 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1274 
1275 	value = KCS_SEL;
1276 	wrmsr(MSR_INTC_SEP_CS, &value);
1277 }
1278 
1279 static void
1280 cpu_sep_disable(void)
1281 {
1282 	uint64_t value;
1283 
1284 	ASSERT(x86_feature & X86_SEP);
1285 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1286 
1287 	/*
1288 	 * Setting the SYSENTER_CS_MSR register to 0 causes software executing
1289 	 * the sysenter or sysexit instruction to trigger a #gp fault.
1290 	 */
1291 	value = 0;
1292 	wrmsr(MSR_INTC_SEP_CS, &value);
1293 }
1294 
1295 static void
1296 cpu_asysc_enable(void)
1297 {
1298 	uint64_t value;
1299 
1300 	ASSERT(x86_feature & X86_ASYSC);
1301 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1302 
1303 	(void) rdmsr(MSR_AMD_EFER, &value);
1304 	value |= AMD_EFER_SCE;
1305 	wrmsr(MSR_AMD_EFER, &value);
1306 }
1307 
1308 static void
1309 cpu_asysc_disable(void)
1310 {
1311 	uint64_t value;
1312 
1313 	ASSERT(x86_feature & X86_ASYSC);
1314 	ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
1315 
1316 	/*
1317 	 * Turn off the SCE (syscall enable) bit in the EFER register. Software
1318 	 * executing syscall or sysret with this bit off will incur a #ud trap.
1319 	 */
1320 	(void) rdmsr(MSR_AMD_EFER, &value);
1321 	value &= ~AMD_EFER_SCE;
1322 	wrmsr(MSR_AMD_EFER, &value);
1323 }
1324