1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/t_lock.h> 33 #include <sys/param.h> 34 #include <sys/proc.h> 35 #include <sys/disp.h> 36 #include <sys/class.h> 37 #include <sys/cmn_err.h> 38 #include <sys/debug.h> 39 #include <sys/asm_linkage.h> 40 #include <sys/x_call.h> 41 #include <sys/systm.h> 42 #include <sys/var.h> 43 #include <sys/vtrace.h> 44 #include <vm/hat.h> 45 #include <vm/as.h> 46 #include <vm/seg_kmem.h> 47 #include <vm/seg_kp.h> 48 #include <sys/segments.h> 49 #include <sys/kmem.h> 50 #include <sys/stack.h> 51 #include <sys/smp_impldefs.h> 52 #include <sys/x86_archext.h> 53 #include <sys/machsystm.h> 54 #include <sys/traptrace.h> 55 #include <sys/clock.h> 56 #include <sys/cpc_impl.h> 57 #include <sys/pg.h> 58 #include <sys/cmt.h> 59 #include <sys/dtrace.h> 60 #include <sys/archsystm.h> 61 #include <sys/fp.h> 62 #include <sys/reboot.h> 63 #include <sys/kdi_machimpl.h> 64 #include <vm/hat_i86.h> 65 #include <sys/memnode.h> 66 #include <sys/pci_cfgspace.h> 67 #include <sys/mach_mmu.h> 68 #include <sys/sysmacros.h> 69 #if defined(__xpv) 70 #include <sys/hypervisor.h> 71 #endif 72 #include <sys/cpu_module.h> 73 74 struct cpu cpus[1]; /* CPU data */ 75 struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */ 76 cpu_core_t cpu_core[NCPU]; /* cpu_core structures */ 77 78 /* 79 * Useful for disabling MP bring-up on a MP capable system. 80 */ 81 int use_mp = 1; 82 83 /* 84 * to be set by a PSM to indicate what cpus 85 * are sitting around on the system. 86 */ 87 cpuset_t mp_cpus; 88 89 /* 90 * This variable is used by the hat layer to decide whether or not 91 * critical sections are needed to prevent race conditions. For sun4m, 92 * this variable is set once enough MP initialization has been done in 93 * order to allow cross calls. 94 */ 95 int flushes_require_xcalls; 96 cpuset_t cpu_ready_set = 1; 97 98 static void mp_startup(void); 99 100 static void cpu_sep_enable(void); 101 static void cpu_sep_disable(void); 102 static void cpu_asysc_enable(void); 103 static void cpu_asysc_disable(void); 104 105 /* 106 * Init CPU info - get CPU type info for processor_info system call. 107 */ 108 void 109 init_cpu_info(struct cpu *cp) 110 { 111 processor_info_t *pi = &cp->cpu_type_info; 112 char buf[CPU_IDSTRLEN]; 113 114 /* 115 * Get clock-frequency property for the CPU. 116 */ 117 pi->pi_clock = cpu_freq; 118 119 /* 120 * Current frequency in Hz. 121 */ 122 cp->cpu_curr_clock = cpu_freq_hz; 123 124 /* 125 * Supported frequencies. 126 */ 127 cpu_set_supp_freqs(cp, NULL); 128 129 (void) strcpy(pi->pi_processor_type, "i386"); 130 if (fpu_exists) 131 (void) strcpy(pi->pi_fputypes, "i387 compatible"); 132 133 (void) cpuid_getidstr(cp, buf, sizeof (buf)); 134 135 cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP); 136 (void) strcpy(cp->cpu_idstr, buf); 137 138 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr); 139 140 (void) cpuid_getbrandstr(cp, buf, sizeof (buf)); 141 cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP); 142 (void) strcpy(cp->cpu_brandstr, buf); 143 144 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr); 145 } 146 147 /* 148 * Configure syscall support on this CPU. 149 */ 150 /*ARGSUSED*/ 151 void 152 init_cpu_syscall(struct cpu *cp) 153 { 154 kpreempt_disable(); 155 156 #if defined(__amd64) 157 if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) { 158 159 #if !defined(__lint) 160 /* 161 * The syscall instruction imposes a certain ordering on 162 * segment selectors, so we double-check that ordering 163 * here. 164 */ 165 ASSERT(KDS_SEL == KCS_SEL + 8); 166 ASSERT(UDS_SEL == U32CS_SEL + 8); 167 ASSERT(UCS_SEL == U32CS_SEL + 16); 168 #endif 169 /* 170 * Turn syscall/sysret extensions on. 171 */ 172 cpu_asysc_enable(); 173 174 /* 175 * Program the magic registers .. 176 */ 177 wrmsr(MSR_AMD_STAR, 178 ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); 179 wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall); 180 wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32); 181 182 /* 183 * This list of flags is masked off the incoming 184 * %rfl when we enter the kernel. 185 */ 186 wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T)); 187 } 188 #endif 189 190 /* 191 * On 32-bit kernels, we use sysenter/sysexit because it's too 192 * hard to use syscall/sysret, and it is more portable anyway. 193 * 194 * On 64-bit kernels on Nocona machines, the 32-bit syscall 195 * variant isn't available to 32-bit applications, but sysenter is. 196 */ 197 if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) { 198 199 #if !defined(__lint) 200 /* 201 * The sysenter instruction imposes a certain ordering on 202 * segment selectors, so we double-check that ordering 203 * here. See "sysenter" in Intel document 245471-012, "IA-32 204 * Intel Architecture Software Developer's Manual Volume 2: 205 * Instruction Set Reference" 206 */ 207 ASSERT(KDS_SEL == KCS_SEL + 8); 208 209 ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3)); 210 ASSERT32(UDS_SEL == UCS_SEL + 8); 211 212 ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3)); 213 ASSERT64(UDS_SEL == U32CS_SEL + 8); 214 #endif 215 216 cpu_sep_enable(); 217 218 /* 219 * resume() sets this value to the base of the threads stack 220 * via a context handler. 221 */ 222 wrmsr(MSR_INTC_SEP_ESP, 0); 223 wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter); 224 } 225 226 kpreempt_enable(); 227 } 228 229 /* 230 * Multiprocessor initialization. 231 * 232 * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the 233 * startup and idle threads for the specified CPU. 234 */ 235 struct cpu * 236 mp_startup_init(int cpun) 237 { 238 struct cpu *cp; 239 kthread_id_t tp; 240 caddr_t sp; 241 proc_t *procp; 242 #if !defined(__xpv) 243 extern int idle_cpu_prefer_mwait; 244 #endif 245 extern void idle(); 246 247 #ifdef TRAPTRACE 248 trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun]; 249 #endif 250 251 ASSERT(cpun < NCPU && cpu[cpun] == NULL); 252 253 cp = kmem_zalloc(sizeof (*cp), KM_SLEEP); 254 #if !defined(__xpv) 255 if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) 256 cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU); 257 #endif 258 259 procp = curthread->t_procp; 260 261 mutex_enter(&cpu_lock); 262 /* 263 * Initialize the dispatcher first. 264 */ 265 disp_cpu_init(cp); 266 mutex_exit(&cpu_lock); 267 268 cpu_vm_data_init(cp); 269 270 /* 271 * Allocate and initialize the startup thread for this CPU. 272 * Interrupt and process switch stacks get allocated later 273 * when the CPU starts running. 274 */ 275 tp = thread_create(NULL, 0, NULL, NULL, 0, procp, 276 TS_STOPPED, maxclsyspri); 277 278 /* 279 * Set state to TS_ONPROC since this thread will start running 280 * as soon as the CPU comes online. 281 * 282 * All the other fields of the thread structure are setup by 283 * thread_create(). 284 */ 285 THREAD_ONPROC(tp, cp); 286 tp->t_preempt = 1; 287 tp->t_bound_cpu = cp; 288 tp->t_affinitycnt = 1; 289 tp->t_cpu = cp; 290 tp->t_disp_queue = cp->cpu_disp; 291 292 /* 293 * Setup thread to start in mp_startup. 294 */ 295 sp = tp->t_stk; 296 tp->t_pc = (uintptr_t)mp_startup; 297 tp->t_sp = (uintptr_t)(sp - MINFRAME); 298 #if defined(__amd64) 299 tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */ 300 #endif 301 302 cp->cpu_id = cpun; 303 cp->cpu_self = cp; 304 cp->cpu_thread = tp; 305 cp->cpu_lwp = NULL; 306 cp->cpu_dispthread = tp; 307 cp->cpu_dispatch_pri = DISP_PRIO(tp); 308 309 /* 310 * cpu_base_spl must be set explicitly here to prevent any blocking 311 * operations in mp_startup from causing the spl of the cpu to drop 312 * to 0 (allowing device interrupts before we're ready) in resume(). 313 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY. 314 * As an extra bit of security on DEBUG kernels, this is enforced with 315 * an assertion in mp_startup() -- before cpu_base_spl is set to its 316 * proper value. 317 */ 318 cp->cpu_base_spl = ipltospl(LOCK_LEVEL); 319 320 /* 321 * Now, initialize per-CPU idle thread for this CPU. 322 */ 323 tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1); 324 325 cp->cpu_idle_thread = tp; 326 327 tp->t_preempt = 1; 328 tp->t_bound_cpu = cp; 329 tp->t_affinitycnt = 1; 330 tp->t_cpu = cp; 331 tp->t_disp_queue = cp->cpu_disp; 332 333 /* 334 * Bootstrap the CPU's PG data 335 */ 336 pg_cpu_bootstrap(cp); 337 338 /* 339 * Perform CPC initialization on the new CPU. 340 */ 341 kcpc_hw_init(cp); 342 343 /* 344 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2 345 * for each CPU. 346 */ 347 setup_vaddr_for_ppcopy(cp); 348 349 /* 350 * Allocate page for new GDT and initialize from current GDT. 351 */ 352 #if !defined(__lint) 353 ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE); 354 #endif 355 cp->cpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP); 356 bcopy(CPU->cpu_gdt, cp->cpu_gdt, (sizeof (*cp->cpu_gdt) * NGDT)); 357 358 #if defined(__i386) 359 /* 360 * setup kernel %gs. 361 */ 362 set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, 363 SEL_KPL, 0, 1); 364 #endif 365 366 /* 367 * If we have more than one node, each cpu gets a copy of IDT 368 * local to its node. If this is a Pentium box, we use cpu 0's 369 * IDT. cpu 0's IDT has been made read-only to workaround the 370 * cmpxchgl register bug 371 */ 372 if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) { 373 #if !defined(__lint) 374 ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); 375 #endif 376 cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP); 377 bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); 378 } else { 379 cp->cpu_idt = CPU->cpu_idt; 380 } 381 382 /* 383 * Get interrupt priority data from cpu 0. 384 */ 385 cp->cpu_pri_data = CPU->cpu_pri_data; 386 387 /* 388 * alloc space for cpuid info 389 */ 390 cpuid_alloc_space(cp); 391 392 #if !defined(__xpv) 393 /* 394 * alloc space for ucode_info 395 */ 396 ucode_alloc_space(cp); 397 #endif 398 399 hat_cpu_online(cp); 400 401 #ifdef TRAPTRACE 402 /* 403 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers 404 */ 405 ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP); 406 ttc->ttc_next = ttc->ttc_first; 407 ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize; 408 #endif 409 /* 410 * Record that we have another CPU. 411 */ 412 mutex_enter(&cpu_lock); 413 /* 414 * Initialize the interrupt threads for this CPU 415 */ 416 cpu_intr_alloc(cp, NINTR_THREADS); 417 /* 418 * Add CPU to list of available CPUs. It'll be on the active list 419 * after mp_startup(). 420 */ 421 cpu_add_unit(cp); 422 mutex_exit(&cpu_lock); 423 424 return (cp); 425 } 426 427 /* 428 * Undo what was done in mp_startup_init 429 */ 430 static void 431 mp_startup_fini(struct cpu *cp, int error) 432 { 433 mutex_enter(&cpu_lock); 434 435 /* 436 * Remove the CPU from the list of available CPUs. 437 */ 438 cpu_del_unit(cp->cpu_id); 439 440 if (error == ETIMEDOUT) { 441 /* 442 * The cpu was started, but never *seemed* to run any 443 * code in the kernel; it's probably off spinning in its 444 * own private world, though with potential references to 445 * our kmem-allocated IDTs and GDTs (for example). 446 * 447 * Worse still, it may actually wake up some time later, 448 * so rather than guess what it might or might not do, we 449 * leave the fundamental data structures intact. 450 */ 451 cp->cpu_flags = 0; 452 mutex_exit(&cpu_lock); 453 return; 454 } 455 456 /* 457 * At this point, the only threads bound to this CPU should 458 * special per-cpu threads: it's idle thread, it's pause threads, 459 * and it's interrupt threads. Clean these up. 460 */ 461 cpu_destroy_bound_threads(cp); 462 cp->cpu_idle_thread = NULL; 463 464 /* 465 * Free the interrupt stack. 466 */ 467 segkp_release(segkp, 468 cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME))); 469 470 mutex_exit(&cpu_lock); 471 472 #ifdef TRAPTRACE 473 /* 474 * Discard the trap trace buffer 475 */ 476 { 477 trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id]; 478 479 kmem_free((void *)ttc->ttc_first, trap_trace_bufsize); 480 ttc->ttc_first = NULL; 481 } 482 #endif 483 484 hat_cpu_offline(cp); 485 486 cpuid_free_space(cp); 487 488 #if !defined(__xpv) 489 ucode_free_space(cp); 490 #endif 491 492 if (cp->cpu_idt != CPU->cpu_idt) 493 kmem_free(cp->cpu_idt, PAGESIZE); 494 cp->cpu_idt = NULL; 495 496 kmem_free(cp->cpu_gdt, PAGESIZE); 497 cp->cpu_gdt = NULL; 498 499 teardown_vaddr_for_ppcopy(cp); 500 501 kcpc_hw_fini(cp); 502 503 cp->cpu_dispthread = NULL; 504 cp->cpu_thread = NULL; /* discarded by cpu_destroy_bound_threads() */ 505 506 cpu_vm_data_destroy(cp); 507 508 mutex_enter(&cpu_lock); 509 disp_cpu_fini(cp); 510 mutex_exit(&cpu_lock); 511 512 #if !defined(__xpv) 513 if (cp->cpu_m.mcpu_mwait != NULL) 514 cpuid_mwait_free(cp); 515 #endif 516 kmem_free(cp, sizeof (*cp)); 517 } 518 519 /* 520 * Apply workarounds for known errata, and warn about those that are absent. 521 * 522 * System vendors occasionally create configurations which contain different 523 * revisions of the CPUs that are almost but not exactly the same. At the 524 * time of writing, this meant that their clock rates were the same, their 525 * feature sets were the same, but the required workaround were -not- 526 * necessarily the same. So, this routine is invoked on -every- CPU soon 527 * after starting to make sure that the resulting system contains the most 528 * pessimal set of workarounds needed to cope with *any* of the CPUs in the 529 * system. 530 * 531 * workaround_errata is invoked early in mlsetup() for CPU 0, and in 532 * mp_startup() for all slave CPUs. Slaves process workaround_errata prior 533 * to acknowledging their readiness to the master, so this routine will 534 * never be executed by multiple CPUs in parallel, thus making updates to 535 * global data safe. 536 * 537 * These workarounds are based on Rev 3.57 of the Revision Guide for 538 * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005. 539 */ 540 541 #if defined(OPTERON_ERRATUM_88) 542 int opteron_erratum_88; /* if non-zero -> at least one cpu has it */ 543 #endif 544 545 #if defined(OPTERON_ERRATUM_91) 546 int opteron_erratum_91; /* if non-zero -> at least one cpu has it */ 547 #endif 548 549 #if defined(OPTERON_ERRATUM_93) 550 int opteron_erratum_93; /* if non-zero -> at least one cpu has it */ 551 #endif 552 553 #if defined(OPTERON_ERRATUM_95) 554 int opteron_erratum_95; /* if non-zero -> at least one cpu has it */ 555 #endif 556 557 #if defined(OPTERON_ERRATUM_100) 558 int opteron_erratum_100; /* if non-zero -> at least one cpu has it */ 559 #endif 560 561 #if defined(OPTERON_ERRATUM_108) 562 int opteron_erratum_108; /* if non-zero -> at least one cpu has it */ 563 #endif 564 565 #if defined(OPTERON_ERRATUM_109) 566 int opteron_erratum_109; /* if non-zero -> at least one cpu has it */ 567 #endif 568 569 #if defined(OPTERON_ERRATUM_121) 570 int opteron_erratum_121; /* if non-zero -> at least one cpu has it */ 571 #endif 572 573 #if defined(OPTERON_ERRATUM_122) 574 int opteron_erratum_122; /* if non-zero -> at least one cpu has it */ 575 #endif 576 577 #if defined(OPTERON_ERRATUM_123) 578 int opteron_erratum_123; /* if non-zero -> at least one cpu has it */ 579 #endif 580 581 #if defined(OPTERON_ERRATUM_131) 582 int opteron_erratum_131; /* if non-zero -> at least one cpu has it */ 583 #endif 584 585 #if defined(OPTERON_WORKAROUND_6336786) 586 int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */ 587 int opteron_workaround_6336786_UP = 0; /* Not needed for UP */ 588 #endif 589 590 #if defined(OPTERON_WORKAROUND_6323525) 591 int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ 592 #endif 593 594 static void 595 workaround_warning(cpu_t *cp, uint_t erratum) 596 { 597 cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u", 598 cp->cpu_id, erratum); 599 } 600 601 static void 602 workaround_applied(uint_t erratum) 603 { 604 if (erratum > 1000000) 605 cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n", 606 erratum); 607 else 608 cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n", 609 erratum); 610 } 611 612 static void 613 msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error) 614 { 615 cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d", 616 cp->cpu_id, rw, msr, error); 617 } 618 619 /* 620 * Determine the number of nodes in an Opteron / Greyhound family system. 621 */ 622 static uint_t 623 opteron_get_nnodes(void) 624 { 625 static uint_t nnodes = 0; 626 627 #ifdef DEBUG 628 uint_t family; 629 630 family = cpuid_getfamily(CPU); 631 ASSERT(family == 0xf || family == 0x10); 632 #endif /* DEBUG */ 633 634 if (nnodes == 0) { 635 /* 636 * Obtain the number of nodes in the system from 637 * bits [6:4] of the Node ID register on node 0. 638 * 639 * The actual node count is NodeID[6:4] + 1 640 * 641 * The Node ID register is accessed via function 0, 642 * offset 0x60. Node 0 is device 24. 643 */ 644 nnodes = ((pci_getl_func(0, 24, 0, 0x60) & 0x70) >> 4) + 1; 645 } 646 return (nnodes); 647 } 648 649 #if defined(__xpv) 650 651 /* 652 * On dom0, we can determine the number of physical cpus on the machine. 653 * This number is important when figuring out what workarounds are 654 * appropriate, so compute it now. 655 */ 656 static uint_t 657 xen_get_nphyscpus(void) 658 { 659 static uint_t nphyscpus = 0; 660 661 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info)); 662 663 if (nphyscpus == 0) { 664 xen_sysctl_t op; 665 xen_sysctl_physinfo_t *pi = &op.u.physinfo; 666 667 op.cmd = XEN_SYSCTL_physinfo; 668 op.interface_version = XEN_SYSCTL_INTERFACE_VERSION; 669 if (HYPERVISOR_sysctl(&op) == 0) 670 nphyscpus = pi->threads_per_core * 671 pi->cores_per_socket * pi->sockets_per_node * 672 pi->nr_nodes; 673 } 674 return (nphyscpus); 675 } 676 #endif 677 678 uint_t 679 workaround_errata(struct cpu *cpu) 680 { 681 uint_t missing = 0; 682 683 ASSERT(cpu == CPU); 684 685 /*LINTED*/ 686 if (cpuid_opteron_erratum(cpu, 88) > 0) { 687 /* 688 * SWAPGS May Fail To Read Correct GS Base 689 */ 690 #if defined(OPTERON_ERRATUM_88) 691 /* 692 * The workaround is an mfence in the relevant assembler code 693 */ 694 opteron_erratum_88++; 695 #else 696 workaround_warning(cpu, 88); 697 missing++; 698 #endif 699 } 700 701 if (cpuid_opteron_erratum(cpu, 91) > 0) { 702 /* 703 * Software Prefetches May Report A Page Fault 704 */ 705 #if defined(OPTERON_ERRATUM_91) 706 /* 707 * fix is in trap.c 708 */ 709 opteron_erratum_91++; 710 #else 711 workaround_warning(cpu, 91); 712 missing++; 713 #endif 714 } 715 716 if (cpuid_opteron_erratum(cpu, 93) > 0) { 717 /* 718 * RSM Auto-Halt Restart Returns to Incorrect RIP 719 */ 720 #if defined(OPTERON_ERRATUM_93) 721 /* 722 * fix is in trap.c 723 */ 724 opteron_erratum_93++; 725 #else 726 workaround_warning(cpu, 93); 727 missing++; 728 #endif 729 } 730 731 /*LINTED*/ 732 if (cpuid_opteron_erratum(cpu, 95) > 0) { 733 /* 734 * RET Instruction May Return to Incorrect EIP 735 */ 736 #if defined(OPTERON_ERRATUM_95) 737 #if defined(_LP64) 738 /* 739 * Workaround this by ensuring that 32-bit user code and 740 * 64-bit kernel code never occupy the same address 741 * range mod 4G. 742 */ 743 if (_userlimit32 > 0xc0000000ul) 744 *(uintptr_t *)&_userlimit32 = 0xc0000000ul; 745 746 /*LINTED*/ 747 ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u); 748 opteron_erratum_95++; 749 #endif /* _LP64 */ 750 #else 751 workaround_warning(cpu, 95); 752 missing++; 753 #endif 754 } 755 756 if (cpuid_opteron_erratum(cpu, 100) > 0) { 757 /* 758 * Compatibility Mode Branches Transfer to Illegal Address 759 */ 760 #if defined(OPTERON_ERRATUM_100) 761 /* 762 * fix is in trap.c 763 */ 764 opteron_erratum_100++; 765 #else 766 workaround_warning(cpu, 100); 767 missing++; 768 #endif 769 } 770 771 /*LINTED*/ 772 if (cpuid_opteron_erratum(cpu, 108) > 0) { 773 /* 774 * CPUID Instruction May Return Incorrect Model Number In 775 * Some Processors 776 */ 777 #if defined(OPTERON_ERRATUM_108) 778 /* 779 * (Our cpuid-handling code corrects the model number on 780 * those processors) 781 */ 782 #else 783 workaround_warning(cpu, 108); 784 missing++; 785 #endif 786 } 787 788 /*LINTED*/ 789 if (cpuid_opteron_erratum(cpu, 109) > 0) do { 790 /* 791 * Certain Reverse REP MOVS May Produce Unpredictable Behaviour 792 */ 793 #if defined(OPTERON_ERRATUM_109) 794 /* 795 * The "workaround" is to print a warning to upgrade the BIOS 796 */ 797 uint64_t value; 798 const uint_t msr = MSR_AMD_PATCHLEVEL; 799 int err; 800 801 if ((err = checked_rdmsr(msr, &value)) != 0) { 802 msr_warning(cpu, "rd", msr, err); 803 workaround_warning(cpu, 109); 804 missing++; 805 } 806 if (value == 0) 807 opteron_erratum_109++; 808 #else 809 workaround_warning(cpu, 109); 810 missing++; 811 #endif 812 /*CONSTANTCONDITION*/ 813 } while (0); 814 815 /*LINTED*/ 816 if (cpuid_opteron_erratum(cpu, 121) > 0) { 817 /* 818 * Sequential Execution Across Non_Canonical Boundary Caused 819 * Processor Hang 820 */ 821 #if defined(OPTERON_ERRATUM_121) 822 #if defined(_LP64) 823 /* 824 * Erratum 121 is only present in long (64 bit) mode. 825 * Workaround is to include the page immediately before the 826 * va hole to eliminate the possibility of system hangs due to 827 * sequential execution across the va hole boundary. 828 */ 829 if (opteron_erratum_121) 830 opteron_erratum_121++; 831 else { 832 if (hole_start) { 833 hole_start -= PAGESIZE; 834 } else { 835 /* 836 * hole_start not yet initialized by 837 * mmu_init. Initialize hole_start 838 * with value to be subtracted. 839 */ 840 hole_start = PAGESIZE; 841 } 842 opteron_erratum_121++; 843 } 844 #endif /* _LP64 */ 845 #else 846 workaround_warning(cpu, 121); 847 missing++; 848 #endif 849 } 850 851 /*LINTED*/ 852 if (cpuid_opteron_erratum(cpu, 122) > 0) do { 853 /* 854 * TLB Flush Filter May Cause Coherency Problem in 855 * Multiprocessor Systems 856 */ 857 #if defined(OPTERON_ERRATUM_122) 858 uint64_t value; 859 const uint_t msr = MSR_AMD_HWCR; 860 int error; 861 862 /* 863 * Erratum 122 is only present in MP configurations (multi-core 864 * or multi-processor). 865 */ 866 #if defined(__xpv) 867 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 868 break; 869 if (!opteron_erratum_122 && xen_get_nphyscpus() == 1) 870 break; 871 #else 872 if (!opteron_erratum_122 && opteron_get_nnodes() == 1 && 873 cpuid_get_ncpu_per_chip(cpu) == 1) 874 break; 875 #endif 876 /* disable TLB Flush Filter */ 877 878 if ((error = checked_rdmsr(msr, &value)) != 0) { 879 msr_warning(cpu, "rd", msr, error); 880 workaround_warning(cpu, 122); 881 missing++; 882 } else { 883 value |= (uint64_t)AMD_HWCR_FFDIS; 884 if ((error = checked_wrmsr(msr, value)) != 0) { 885 msr_warning(cpu, "wr", msr, error); 886 workaround_warning(cpu, 122); 887 missing++; 888 } 889 } 890 opteron_erratum_122++; 891 #else 892 workaround_warning(cpu, 122); 893 missing++; 894 #endif 895 /*CONSTANTCONDITION*/ 896 } while (0); 897 898 /*LINTED*/ 899 if (cpuid_opteron_erratum(cpu, 123) > 0) do { 900 /* 901 * Bypassed Reads May Cause Data Corruption of System Hang in 902 * Dual Core Processors 903 */ 904 #if defined(OPTERON_ERRATUM_123) 905 uint64_t value; 906 const uint_t msr = MSR_AMD_PATCHLEVEL; 907 int err; 908 909 /* 910 * Erratum 123 applies only to multi-core cpus. 911 */ 912 if (cpuid_get_ncpu_per_chip(cpu) < 2) 913 break; 914 #if defined(__xpv) 915 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 916 break; 917 #endif 918 /* 919 * The "workaround" is to print a warning to upgrade the BIOS 920 */ 921 if ((err = checked_rdmsr(msr, &value)) != 0) { 922 msr_warning(cpu, "rd", msr, err); 923 workaround_warning(cpu, 123); 924 missing++; 925 } 926 if (value == 0) 927 opteron_erratum_123++; 928 #else 929 workaround_warning(cpu, 123); 930 missing++; 931 932 #endif 933 /*CONSTANTCONDITION*/ 934 } while (0); 935 936 /*LINTED*/ 937 if (cpuid_opteron_erratum(cpu, 131) > 0) do { 938 /* 939 * Multiprocessor Systems with Four or More Cores May Deadlock 940 * Waiting for a Probe Response 941 */ 942 #if defined(OPTERON_ERRATUM_131) 943 uint64_t nbcfg; 944 const uint_t msr = MSR_AMD_NB_CFG; 945 const uint64_t wabits = 946 AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR; 947 int error; 948 949 /* 950 * Erratum 131 applies to any system with four or more cores. 951 */ 952 if (opteron_erratum_131) 953 break; 954 #if defined(__xpv) 955 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 956 break; 957 if (xen_get_nphyscpus() < 4) 958 break; 959 #else 960 if (opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu) < 4) 961 break; 962 #endif 963 /* 964 * Print a warning if neither of the workarounds for 965 * erratum 131 is present. 966 */ 967 if ((error = checked_rdmsr(msr, &nbcfg)) != 0) { 968 msr_warning(cpu, "rd", msr, error); 969 workaround_warning(cpu, 131); 970 missing++; 971 } else if ((nbcfg & wabits) == 0) { 972 opteron_erratum_131++; 973 } else { 974 /* cannot have both workarounds set */ 975 ASSERT((nbcfg & wabits) != wabits); 976 } 977 #else 978 workaround_warning(cpu, 131); 979 missing++; 980 #endif 981 /*CONSTANTCONDITION*/ 982 } while (0); 983 984 /* 985 * This isn't really an erratum, but for convenience the 986 * detection/workaround code lives here and in cpuid_opteron_erratum. 987 */ 988 if (cpuid_opteron_erratum(cpu, 6336786) > 0) { 989 #if defined(OPTERON_WORKAROUND_6336786) 990 /* 991 * Disable C1-Clock ramping on multi-core/multi-processor 992 * K8 platforms to guard against TSC drift. 993 */ 994 if (opteron_workaround_6336786) { 995 opteron_workaround_6336786++; 996 #if defined(__xpv) 997 } else if ((DOMAIN_IS_INITDOMAIN(xen_info) && 998 xen_get_nphyscpus() > 1) || 999 opteron_workaround_6336786_UP) { 1000 /* 1001 * XXPV Hmm. We can't walk the Northbridges on 1002 * the hypervisor; so just complain and drive 1003 * on. This probably needs to be fixed in 1004 * the hypervisor itself. 1005 */ 1006 opteron_workaround_6336786++; 1007 workaround_warning(cpu, 6336786); 1008 #else /* __xpv */ 1009 } else if ((opteron_get_nnodes() * 1010 cpuid_get_ncpu_per_chip(cpu) > 1) || 1011 opteron_workaround_6336786_UP) { 1012 1013 uint_t node, nnodes; 1014 uint8_t data; 1015 1016 nnodes = opteron_get_nnodes(); 1017 for (node = 0; node < nnodes; node++) { 1018 /* 1019 * Clear PMM7[1:0] (function 3, offset 0x87) 1020 * Northbridge device is the node id + 24. 1021 */ 1022 data = pci_getb_func(0, node + 24, 3, 0x87); 1023 data &= 0xFC; 1024 pci_putb_func(0, node + 24, 3, 0x87, data); 1025 } 1026 opteron_workaround_6336786++; 1027 #endif /* __xpv */ 1028 } 1029 #else 1030 workaround_warning(cpu, 6336786); 1031 missing++; 1032 #endif 1033 } 1034 1035 /*LINTED*/ 1036 /* 1037 * Mutex primitives don't work as expected. 1038 */ 1039 if (cpuid_opteron_erratum(cpu, 6323525) > 0) { 1040 #if defined(OPTERON_WORKAROUND_6323525) 1041 /* 1042 * This problem only occurs with 2 or more cores. If bit in 1043 * MSR_BU_CFG set, then not applicable. The workaround 1044 * is to patch the semaphone routines with the lfence 1045 * instruction to provide necessary load memory barrier with 1046 * possible subsequent read-modify-write ops. 1047 * 1048 * It is too early in boot to call the patch routine so 1049 * set erratum variable to be done in startup_end(). 1050 */ 1051 if (opteron_workaround_6323525) { 1052 opteron_workaround_6323525++; 1053 #if defined(__xpv) 1054 } else if (x86_feature & X86_SSE2) { 1055 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1056 /* 1057 * XXPV Use dom0_msr here when extended 1058 * operations are supported? 1059 */ 1060 if (xen_get_nphyscpus() > 1) 1061 opteron_workaround_6323525++; 1062 } else { 1063 /* 1064 * We have no way to tell how many physical 1065 * cpus there are, or even if this processor 1066 * has the problem, so enable the workaround 1067 * unconditionally (at some performance cost). 1068 */ 1069 opteron_workaround_6323525++; 1070 } 1071 #else /* __xpv */ 1072 } else if ((x86_feature & X86_SSE2) && ((opteron_get_nnodes() * 1073 cpuid_get_ncpu_per_chip(cpu)) > 1)) { 1074 if ((xrdmsr(MSR_BU_CFG) & 0x02) == 0) 1075 opteron_workaround_6323525++; 1076 #endif /* __xpv */ 1077 } 1078 #else 1079 workaround_warning(cpu, 6323525); 1080 missing++; 1081 #endif 1082 } 1083 1084 #ifdef __xpv 1085 return (0); 1086 #else 1087 return (missing); 1088 #endif 1089 } 1090 1091 void 1092 workaround_errata_end() 1093 { 1094 #if defined(OPTERON_ERRATUM_88) 1095 if (opteron_erratum_88) 1096 workaround_applied(88); 1097 #endif 1098 #if defined(OPTERON_ERRATUM_91) 1099 if (opteron_erratum_91) 1100 workaround_applied(91); 1101 #endif 1102 #if defined(OPTERON_ERRATUM_93) 1103 if (opteron_erratum_93) 1104 workaround_applied(93); 1105 #endif 1106 #if defined(OPTERON_ERRATUM_95) 1107 if (opteron_erratum_95) 1108 workaround_applied(95); 1109 #endif 1110 #if defined(OPTERON_ERRATUM_100) 1111 if (opteron_erratum_100) 1112 workaround_applied(100); 1113 #endif 1114 #if defined(OPTERON_ERRATUM_108) 1115 if (opteron_erratum_108) 1116 workaround_applied(108); 1117 #endif 1118 #if defined(OPTERON_ERRATUM_109) 1119 if (opteron_erratum_109) { 1120 cmn_err(CE_WARN, 1121 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1122 " processor\nerratum 109 was not detected; updating your" 1123 " system's BIOS to a version\ncontaining this" 1124 " microcode patch is HIGHLY recommended or erroneous" 1125 " system\noperation may occur.\n"); 1126 } 1127 #endif 1128 #if defined(OPTERON_ERRATUM_121) 1129 if (opteron_erratum_121) 1130 workaround_applied(121); 1131 #endif 1132 #if defined(OPTERON_ERRATUM_122) 1133 if (opteron_erratum_122) 1134 workaround_applied(122); 1135 #endif 1136 #if defined(OPTERON_ERRATUM_123) 1137 if (opteron_erratum_123) { 1138 cmn_err(CE_WARN, 1139 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1140 " processor\nerratum 123 was not detected; updating your" 1141 " system's BIOS to a version\ncontaining this" 1142 " microcode patch is HIGHLY recommended or erroneous" 1143 " system\noperation may occur.\n"); 1144 } 1145 #endif 1146 #if defined(OPTERON_ERRATUM_131) 1147 if (opteron_erratum_131) { 1148 cmn_err(CE_WARN, 1149 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1150 " processor\nerratum 131 was not detected; updating your" 1151 " system's BIOS to a version\ncontaining this" 1152 " microcode patch is HIGHLY recommended or erroneous" 1153 " system\noperation may occur.\n"); 1154 } 1155 #endif 1156 #if defined(OPTERON_WORKAROUND_6336786) 1157 if (opteron_workaround_6336786) 1158 workaround_applied(6336786); 1159 #endif 1160 #if defined(OPTERON_WORKAROUND_6323525) 1161 if (opteron_workaround_6323525) 1162 workaround_applied(6323525); 1163 #endif 1164 } 1165 1166 static cpuset_t procset; 1167 1168 /* 1169 * Start a single cpu, assuming that the kernel context is available 1170 * to successfully start another cpu. 1171 * 1172 * (For example, real mode code is mapped into the right place 1173 * in memory and is ready to be run.) 1174 */ 1175 int 1176 start_cpu(processorid_t who) 1177 { 1178 void *ctx; 1179 cpu_t *cp; 1180 int delays; 1181 int error = 0; 1182 1183 ASSERT(who != 0); 1184 1185 /* 1186 * Check if there's at least a Mbyte of kmem available 1187 * before attempting to start the cpu. 1188 */ 1189 if (kmem_avail() < 1024 * 1024) { 1190 /* 1191 * Kick off a reap in case that helps us with 1192 * later attempts .. 1193 */ 1194 kmem_reap(); 1195 return (ENOMEM); 1196 } 1197 1198 cp = mp_startup_init(who); 1199 if ((ctx = mach_cpucontext_alloc(cp)) == NULL || 1200 (error = mach_cpu_start(cp, ctx)) != 0) { 1201 1202 /* 1203 * Something went wrong before we even started it 1204 */ 1205 if (ctx) 1206 cmn_err(CE_WARN, 1207 "cpu%d: failed to start error %d", 1208 cp->cpu_id, error); 1209 else 1210 cmn_err(CE_WARN, 1211 "cpu%d: failed to allocate context", cp->cpu_id); 1212 1213 if (ctx) 1214 mach_cpucontext_free(cp, ctx, error); 1215 else 1216 error = EAGAIN; /* hmm. */ 1217 mp_startup_fini(cp, error); 1218 return (error); 1219 } 1220 1221 for (delays = 0; !CPU_IN_SET(procset, who); delays++) { 1222 if (delays == 500) { 1223 /* 1224 * After five seconds, things are probably looking 1225 * a bit bleak - explain the hang. 1226 */ 1227 cmn_err(CE_NOTE, "cpu%d: started, " 1228 "but not running in the kernel yet", who); 1229 } else if (delays > 2000) { 1230 /* 1231 * We waited at least 20 seconds, bail .. 1232 */ 1233 error = ETIMEDOUT; 1234 cmn_err(CE_WARN, "cpu%d: timed out", who); 1235 mach_cpucontext_free(cp, ctx, error); 1236 mp_startup_fini(cp, error); 1237 return (error); 1238 } 1239 1240 /* 1241 * wait at least 10ms, then check again.. 1242 */ 1243 delay(USEC_TO_TICK_ROUNDUP(10000)); 1244 } 1245 1246 mach_cpucontext_free(cp, ctx, 0); 1247 1248 #ifndef __xpv 1249 if (tsc_gethrtime_enable) 1250 tsc_sync_master(who); 1251 #endif 1252 1253 if (dtrace_cpu_init != NULL) { 1254 /* 1255 * DTrace CPU initialization expects cpu_lock to be held. 1256 */ 1257 mutex_enter(&cpu_lock); 1258 (*dtrace_cpu_init)(who); 1259 mutex_exit(&cpu_lock); 1260 } 1261 1262 while (!CPU_IN_SET(cpu_ready_set, who)) 1263 delay(1); 1264 1265 return (0); 1266 } 1267 1268 1269 /*ARGSUSED*/ 1270 void 1271 start_other_cpus(int cprboot) 1272 { 1273 uint_t who; 1274 uint_t skipped = 0; 1275 uint_t bootcpuid = 0; 1276 1277 /* 1278 * Initialize our own cpu_info. 1279 */ 1280 init_cpu_info(CPU); 1281 1282 /* 1283 * Initialize our syscall handlers 1284 */ 1285 init_cpu_syscall(CPU); 1286 1287 /* 1288 * Take the boot cpu out of the mp_cpus set because we know 1289 * it's already running. Add it to the cpu_ready_set for 1290 * precisely the same reason. 1291 */ 1292 CPUSET_DEL(mp_cpus, bootcpuid); 1293 CPUSET_ADD(cpu_ready_set, bootcpuid); 1294 1295 /* 1296 * if only 1 cpu or not using MP, skip the rest of this 1297 */ 1298 if (CPUSET_ISNULL(mp_cpus) || use_mp == 0) { 1299 if (use_mp == 0) 1300 cmn_err(CE_CONT, "?***** Not in MP mode\n"); 1301 goto done; 1302 } 1303 1304 /* 1305 * perform such initialization as is needed 1306 * to be able to take CPUs on- and off-line. 1307 */ 1308 cpu_pause_init(); 1309 1310 xc_init(); /* initialize processor crosscalls */ 1311 1312 if (mach_cpucontext_init() != 0) 1313 goto done; 1314 1315 flushes_require_xcalls = 1; 1316 1317 /* 1318 * We lock our affinity to the master CPU to ensure that all slave CPUs 1319 * do their TSC syncs with the same CPU. 1320 */ 1321 affinity_set(CPU_CURRENT); 1322 1323 for (who = 0; who < NCPU; who++) { 1324 1325 if (!CPU_IN_SET(mp_cpus, who)) 1326 continue; 1327 ASSERT(who != bootcpuid); 1328 if (ncpus >= max_ncpus) { 1329 skipped = who; 1330 continue; 1331 } 1332 if (start_cpu(who) != 0) 1333 CPUSET_DEL(mp_cpus, who); 1334 } 1335 1336 #if !defined(__xpv) 1337 /* Free the space allocated to hold the microcode file */ 1338 ucode_free(); 1339 #endif 1340 1341 affinity_clear(); 1342 1343 if (skipped) { 1344 cmn_err(CE_NOTE, 1345 "System detected %d cpus, but " 1346 "only %d cpu(s) were enabled during boot.", 1347 skipped + 1, ncpus); 1348 cmn_err(CE_NOTE, 1349 "Use \"boot-ncpus\" parameter to enable more CPU(s). " 1350 "See eeprom(1M)."); 1351 } 1352 1353 done: 1354 workaround_errata_end(); 1355 mach_cpucontext_fini(); 1356 1357 cmi_post_mpstartup(); 1358 } 1359 1360 /* 1361 * Dummy functions - no i86pc platforms support dynamic cpu allocation. 1362 */ 1363 /*ARGSUSED*/ 1364 int 1365 mp_cpu_configure(int cpuid) 1366 { 1367 return (ENOTSUP); /* not supported */ 1368 } 1369 1370 /*ARGSUSED*/ 1371 int 1372 mp_cpu_unconfigure(int cpuid) 1373 { 1374 return (ENOTSUP); /* not supported */ 1375 } 1376 1377 /* 1378 * Startup function for 'other' CPUs (besides boot cpu). 1379 * Called from real_mode_start. 1380 * 1381 * WARNING: until CPU_READY is set, mp_startup and routines called by 1382 * mp_startup should not call routines (e.g. kmem_free) that could call 1383 * hat_unload which requires CPU_READY to be set. 1384 */ 1385 void 1386 mp_startup(void) 1387 { 1388 struct cpu *cp = CPU; 1389 uint_t new_x86_feature; 1390 1391 /* 1392 * We need to get TSC on this proc synced (i.e., any delta 1393 * from cpu0 accounted for) as soon as we can, because many 1394 * many things use gethrtime/pc_gethrestime, including 1395 * interrupts, cmn_err, etc. 1396 */ 1397 1398 /* Let cpu0 continue into tsc_sync_master() */ 1399 CPUSET_ATOMIC_ADD(procset, cp->cpu_id); 1400 1401 #ifndef __xpv 1402 if (tsc_gethrtime_enable) 1403 tsc_sync_slave(); 1404 #endif 1405 1406 /* 1407 * Once this was done from assembly, but it's safer here; if 1408 * it blocks, we need to be able to swtch() to and from, and 1409 * since we get here by calling t_pc, we need to do that call 1410 * before swtch() overwrites it. 1411 */ 1412 1413 (void) (*ap_mlsetup)(); 1414 1415 new_x86_feature = cpuid_pass1(cp); 1416 1417 #ifndef __xpv 1418 /* 1419 * Program this cpu's PAT 1420 */ 1421 if (x86_feature & X86_PAT) 1422 pat_sync(); 1423 #endif 1424 1425 /* 1426 * Set up TSC_AUX to contain the cpuid for this processor 1427 * for the rdtscp instruction. 1428 */ 1429 if (x86_feature & X86_TSCP) 1430 (void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id); 1431 1432 /* 1433 * Initialize this CPU's syscall handlers 1434 */ 1435 init_cpu_syscall(cp); 1436 1437 /* 1438 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the 1439 * highest level at which a routine is permitted to block on 1440 * an adaptive mutex (allows for cpu poke interrupt in case 1441 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks 1442 * device interrupts that may end up in the hat layer issuing cross 1443 * calls before CPU_READY is set. 1444 */ 1445 splx(ipltospl(LOCK_LEVEL)); 1446 sti(); 1447 1448 /* 1449 * Do a sanity check to make sure this new CPU is a sane thing 1450 * to add to the collection of processors running this system. 1451 * 1452 * XXX Clearly this needs to get more sophisticated, if x86 1453 * systems start to get built out of heterogenous CPUs; as is 1454 * likely to happen once the number of processors in a configuration 1455 * gets large enough. 1456 */ 1457 if ((x86_feature & new_x86_feature) != x86_feature) { 1458 cmn_err(CE_CONT, "?cpu%d: %b\n", 1459 cp->cpu_id, new_x86_feature, FMT_X86_FEATURE); 1460 cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id); 1461 } 1462 1463 /* 1464 * We do not support cpus with mixed monitor/mwait support if the 1465 * boot cpu supports monitor/mwait. 1466 */ 1467 if ((x86_feature & ~new_x86_feature) & X86_MWAIT) 1468 panic("unsupported mixed cpu monitor/mwait support detected"); 1469 1470 /* 1471 * We could be more sophisticated here, and just mark the CPU 1472 * as "faulted" but at this point we'll opt for the easier 1473 * answer of dieing horribly. Provided the boot cpu is ok, 1474 * the system can be recovered by booting with use_mp set to zero. 1475 */ 1476 if (workaround_errata(cp) != 0) 1477 panic("critical workaround(s) missing for cpu%d", cp->cpu_id); 1478 1479 cpuid_pass2(cp); 1480 cpuid_pass3(cp); 1481 (void) cpuid_pass4(cp); 1482 1483 init_cpu_info(cp); 1484 1485 mutex_enter(&cpu_lock); 1486 /* 1487 * Processor group initialization for this CPU is dependent on the 1488 * cpuid probing, which must be done in the context of the current 1489 * CPU. 1490 */ 1491 pghw_physid_create(cp); 1492 pg_cpu_init(cp); 1493 pg_cmt_cpu_startup(cp); 1494 1495 cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS; 1496 cpu_add_active(cp); 1497 1498 if (dtrace_cpu_init != NULL) { 1499 (*dtrace_cpu_init)(cp->cpu_id); 1500 } 1501 1502 #if !defined(__xpv) 1503 /* 1504 * Fill out cpu_ucode_info. Update microcode if necessary. 1505 */ 1506 ucode_check(cp); 1507 #endif 1508 1509 mutex_exit(&cpu_lock); 1510 1511 /* 1512 * Enable preemption here so that contention for any locks acquired 1513 * later in mp_startup may be preempted if the thread owning those 1514 * locks is continously executing on other CPUs (for example, this 1515 * CPU must be preemptible to allow other CPUs to pause it during their 1516 * startup phases). It's safe to enable preemption here because the 1517 * CPU state is pretty-much fully constructed. 1518 */ 1519 curthread->t_preempt = 0; 1520 1521 add_cpunode2devtree(cp->cpu_id, cp->cpu_m.mcpu_cpi); 1522 1523 /* The base spl should still be at LOCK LEVEL here */ 1524 ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL)); 1525 set_base_spl(); /* Restore the spl to its proper value */ 1526 1527 (void) spl0(); /* enable interrupts */ 1528 1529 #ifndef __xpv 1530 { 1531 /* 1532 * Set up the CPU module for this CPU. This can't be done 1533 * before this CPU is made CPU_READY, because we may (in 1534 * heterogeneous systems) need to go load another CPU module. 1535 * The act of attempting to load a module may trigger a 1536 * cross-call, which will ASSERT unless this cpu is CPU_READY. 1537 */ 1538 cmi_hdl_t hdl; 1539 1540 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU), 1541 cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) { 1542 if (x86_feature & X86_MCA) 1543 cmi_mca_init(hdl); 1544 } 1545 } 1546 #endif /* __xpv */ 1547 1548 if (boothowto & RB_DEBUG) 1549 kdi_cpu_init(); 1550 1551 /* 1552 * Setting the bit in cpu_ready_set must be the last operation in 1553 * processor initialization; the boot CPU will continue to boot once 1554 * it sees this bit set for all active CPUs. 1555 */ 1556 CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id); 1557 1558 /* 1559 * Because mp_startup() gets fired off after init() starts, we 1560 * can't use the '?' trick to do 'boot -v' printing - so we 1561 * always direct the 'cpu .. online' messages to the log. 1562 */ 1563 cmn_err(CE_CONT, "!cpu%d initialization complete - online\n", 1564 cp->cpu_id); 1565 1566 /* 1567 * Now we are done with the startup thread, so free it up. 1568 */ 1569 thread_exit(); 1570 panic("mp_startup: cannot return"); 1571 /*NOTREACHED*/ 1572 } 1573 1574 1575 /* 1576 * Start CPU on user request. 1577 */ 1578 /* ARGSUSED */ 1579 int 1580 mp_cpu_start(struct cpu *cp) 1581 { 1582 ASSERT(MUTEX_HELD(&cpu_lock)); 1583 return (0); 1584 } 1585 1586 /* 1587 * Stop CPU on user request. 1588 */ 1589 /* ARGSUSED */ 1590 int 1591 mp_cpu_stop(struct cpu *cp) 1592 { 1593 extern int cbe_psm_timer_mode; 1594 ASSERT(MUTEX_HELD(&cpu_lock)); 1595 1596 #ifdef __xpv 1597 /* 1598 * We can't offline vcpu0. 1599 */ 1600 if (cp->cpu_id == 0) 1601 return (EBUSY); 1602 #endif 1603 1604 /* 1605 * If TIMER_PERIODIC mode is used, CPU0 is the one running it; 1606 * can't stop it. (This is true only for machines with no TSC.) 1607 */ 1608 1609 if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0)) 1610 return (EBUSY); 1611 1612 return (0); 1613 } 1614 1615 /* 1616 * Take the specified CPU out of participation in interrupts. 1617 */ 1618 int 1619 cpu_disable_intr(struct cpu *cp) 1620 { 1621 if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS) 1622 return (EBUSY); 1623 1624 cp->cpu_flags &= ~CPU_ENABLE; 1625 return (0); 1626 } 1627 1628 /* 1629 * Allow the specified CPU to participate in interrupts. 1630 */ 1631 void 1632 cpu_enable_intr(struct cpu *cp) 1633 { 1634 ASSERT(MUTEX_HELD(&cpu_lock)); 1635 cp->cpu_flags |= CPU_ENABLE; 1636 psm_enable_intr(cp->cpu_id); 1637 } 1638 1639 1640 /*ARGSUSED*/ 1641 void 1642 mp_cpu_faulted_enter(struct cpu *cp) 1643 { 1644 #ifndef __xpv 1645 cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1646 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1647 1648 if (hdl != NULL) { 1649 cmi_faulted_enter(hdl); 1650 cmi_hdl_rele(hdl); 1651 } 1652 #endif 1653 } 1654 1655 /*ARGSUSED*/ 1656 void 1657 mp_cpu_faulted_exit(struct cpu *cp) 1658 { 1659 #ifndef __xpv 1660 cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1661 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1662 1663 if (hdl != NULL) { 1664 cmi_faulted_exit(hdl); 1665 cmi_hdl_rele(hdl); 1666 } 1667 #endif 1668 } 1669 1670 /* 1671 * The following two routines are used as context operators on threads belonging 1672 * to processes with a private LDT (see sysi86). Due to the rarity of such 1673 * processes, these routines are currently written for best code readability and 1674 * organization rather than speed. We could avoid checking x86_feature at every 1675 * context switch by installing different context ops, depending on the 1676 * x86_feature flags, at LDT creation time -- one for each combination of fast 1677 * syscall feature flags. 1678 */ 1679 1680 /*ARGSUSED*/ 1681 void 1682 cpu_fast_syscall_disable(void *arg) 1683 { 1684 if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) 1685 cpu_sep_disable(); 1686 if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) 1687 cpu_asysc_disable(); 1688 } 1689 1690 /*ARGSUSED*/ 1691 void 1692 cpu_fast_syscall_enable(void *arg) 1693 { 1694 if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) 1695 cpu_sep_enable(); 1696 if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) 1697 cpu_asysc_enable(); 1698 } 1699 1700 static void 1701 cpu_sep_enable(void) 1702 { 1703 ASSERT(x86_feature & X86_SEP); 1704 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1705 1706 wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL); 1707 } 1708 1709 static void 1710 cpu_sep_disable(void) 1711 { 1712 ASSERT(x86_feature & X86_SEP); 1713 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1714 1715 /* 1716 * Setting the SYSENTER_CS_MSR register to 0 causes software executing 1717 * the sysenter or sysexit instruction to trigger a #gp fault. 1718 */ 1719 wrmsr(MSR_INTC_SEP_CS, 0); 1720 } 1721 1722 static void 1723 cpu_asysc_enable(void) 1724 { 1725 ASSERT(x86_feature & X86_ASYSC); 1726 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1727 1728 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) | 1729 (uint64_t)(uintptr_t)AMD_EFER_SCE); 1730 } 1731 1732 static void 1733 cpu_asysc_disable(void) 1734 { 1735 ASSERT(x86_feature & X86_ASYSC); 1736 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1737 1738 /* 1739 * Turn off the SCE (syscall enable) bit in the EFER register. Software 1740 * executing syscall or sysret with this bit off will incur a #ud trap. 1741 */ 1742 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) & 1743 ~((uint64_t)(uintptr_t)AMD_EFER_SCE)); 1744 } 1745