1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/t_lock.h> 33 #include <sys/param.h> 34 #include <sys/proc.h> 35 #include <sys/disp.h> 36 #include <sys/class.h> 37 #include <sys/cmn_err.h> 38 #include <sys/debug.h> 39 #include <sys/asm_linkage.h> 40 #include <sys/x_call.h> 41 #include <sys/systm.h> 42 #include <sys/var.h> 43 #include <sys/vtrace.h> 44 #include <vm/hat.h> 45 #include <vm/as.h> 46 #include <vm/seg_kmem.h> 47 #include <vm/seg_kp.h> 48 #include <sys/segments.h> 49 #include <sys/kmem.h> 50 #include <sys/stack.h> 51 #include <sys/smp_impldefs.h> 52 #include <sys/x86_archext.h> 53 #include <sys/machsystm.h> 54 #include <sys/traptrace.h> 55 #include <sys/clock.h> 56 #include <sys/cpc_impl.h> 57 #include <sys/pg.h> 58 #include <sys/cmt.h> 59 #include <sys/dtrace.h> 60 #include <sys/archsystm.h> 61 #include <sys/fp.h> 62 #include <sys/reboot.h> 63 #include <sys/kdi_machimpl.h> 64 #include <vm/hat_i86.h> 65 #include <sys/memnode.h> 66 #include <sys/pci_cfgspace.h> 67 #include <sys/mach_mmu.h> 68 #include <sys/sysmacros.h> 69 #if defined(__xpv) 70 #include <sys/hypervisor.h> 71 #endif 72 #include <sys/cpu_module.h> 73 74 struct cpu cpus[1]; /* CPU data */ 75 struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */ 76 cpu_core_t cpu_core[NCPU]; /* cpu_core structures */ 77 78 /* 79 * Useful for disabling MP bring-up on a MP capable system. 80 */ 81 int use_mp = 1; 82 83 /* 84 * to be set by a PSM to indicate what cpus 85 * are sitting around on the system. 86 */ 87 cpuset_t mp_cpus; 88 89 /* 90 * This variable is used by the hat layer to decide whether or not 91 * critical sections are needed to prevent race conditions. For sun4m, 92 * this variable is set once enough MP initialization has been done in 93 * order to allow cross calls. 94 */ 95 int flushes_require_xcalls; 96 97 cpuset_t cpu_ready_set; /* initialized in startup() */ 98 99 static void mp_startup(void); 100 101 static void cpu_sep_enable(void); 102 static void cpu_sep_disable(void); 103 static void cpu_asysc_enable(void); 104 static void cpu_asysc_disable(void); 105 106 /* 107 * Init CPU info - get CPU type info for processor_info system call. 108 */ 109 void 110 init_cpu_info(struct cpu *cp) 111 { 112 processor_info_t *pi = &cp->cpu_type_info; 113 char buf[CPU_IDSTRLEN]; 114 115 /* 116 * Get clock-frequency property for the CPU. 117 */ 118 pi->pi_clock = cpu_freq; 119 120 /* 121 * Current frequency in Hz. 122 */ 123 cp->cpu_curr_clock = cpu_freq_hz; 124 125 /* 126 * Supported frequencies. 127 */ 128 cpu_set_supp_freqs(cp, NULL); 129 130 (void) strcpy(pi->pi_processor_type, "i386"); 131 if (fpu_exists) 132 (void) strcpy(pi->pi_fputypes, "i387 compatible"); 133 134 (void) cpuid_getidstr(cp, buf, sizeof (buf)); 135 136 cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP); 137 (void) strcpy(cp->cpu_idstr, buf); 138 139 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr); 140 141 (void) cpuid_getbrandstr(cp, buf, sizeof (buf)); 142 cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP); 143 (void) strcpy(cp->cpu_brandstr, buf); 144 145 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr); 146 } 147 148 /* 149 * Configure syscall support on this CPU. 150 */ 151 /*ARGSUSED*/ 152 void 153 init_cpu_syscall(struct cpu *cp) 154 { 155 kpreempt_disable(); 156 157 #if defined(__amd64) 158 if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) { 159 160 #if !defined(__lint) 161 /* 162 * The syscall instruction imposes a certain ordering on 163 * segment selectors, so we double-check that ordering 164 * here. 165 */ 166 ASSERT(KDS_SEL == KCS_SEL + 8); 167 ASSERT(UDS_SEL == U32CS_SEL + 8); 168 ASSERT(UCS_SEL == U32CS_SEL + 16); 169 #endif 170 /* 171 * Turn syscall/sysret extensions on. 172 */ 173 cpu_asysc_enable(); 174 175 /* 176 * Program the magic registers .. 177 */ 178 wrmsr(MSR_AMD_STAR, 179 ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); 180 wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall); 181 wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32); 182 183 /* 184 * This list of flags is masked off the incoming 185 * %rfl when we enter the kernel. 186 */ 187 wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T)); 188 } 189 #endif 190 191 /* 192 * On 32-bit kernels, we use sysenter/sysexit because it's too 193 * hard to use syscall/sysret, and it is more portable anyway. 194 * 195 * On 64-bit kernels on Nocona machines, the 32-bit syscall 196 * variant isn't available to 32-bit applications, but sysenter is. 197 */ 198 if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) { 199 200 #if !defined(__lint) 201 /* 202 * The sysenter instruction imposes a certain ordering on 203 * segment selectors, so we double-check that ordering 204 * here. See "sysenter" in Intel document 245471-012, "IA-32 205 * Intel Architecture Software Developer's Manual Volume 2: 206 * Instruction Set Reference" 207 */ 208 ASSERT(KDS_SEL == KCS_SEL + 8); 209 210 ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3)); 211 ASSERT32(UDS_SEL == UCS_SEL + 8); 212 213 ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3)); 214 ASSERT64(UDS_SEL == U32CS_SEL + 8); 215 #endif 216 217 cpu_sep_enable(); 218 219 /* 220 * resume() sets this value to the base of the threads stack 221 * via a context handler. 222 */ 223 wrmsr(MSR_INTC_SEP_ESP, 0); 224 wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter); 225 } 226 227 kpreempt_enable(); 228 } 229 230 /* 231 * Multiprocessor initialization. 232 * 233 * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the 234 * startup and idle threads for the specified CPU. 235 */ 236 struct cpu * 237 mp_startup_init(int cpun) 238 { 239 struct cpu *cp; 240 kthread_id_t tp; 241 caddr_t sp; 242 proc_t *procp; 243 #if !defined(__xpv) 244 extern int idle_cpu_prefer_mwait; 245 #endif 246 extern void idle(); 247 248 #ifdef TRAPTRACE 249 trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun]; 250 #endif 251 252 ASSERT(cpun < NCPU && cpu[cpun] == NULL); 253 254 cp = kmem_zalloc(sizeof (*cp), KM_SLEEP); 255 #if !defined(__xpv) 256 if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) 257 cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU); 258 #endif 259 260 procp = curthread->t_procp; 261 262 mutex_enter(&cpu_lock); 263 /* 264 * Initialize the dispatcher first. 265 */ 266 disp_cpu_init(cp); 267 mutex_exit(&cpu_lock); 268 269 cpu_vm_data_init(cp); 270 271 /* 272 * Allocate and initialize the startup thread for this CPU. 273 * Interrupt and process switch stacks get allocated later 274 * when the CPU starts running. 275 */ 276 tp = thread_create(NULL, 0, NULL, NULL, 0, procp, 277 TS_STOPPED, maxclsyspri); 278 279 /* 280 * Set state to TS_ONPROC since this thread will start running 281 * as soon as the CPU comes online. 282 * 283 * All the other fields of the thread structure are setup by 284 * thread_create(). 285 */ 286 THREAD_ONPROC(tp, cp); 287 tp->t_preempt = 1; 288 tp->t_bound_cpu = cp; 289 tp->t_affinitycnt = 1; 290 tp->t_cpu = cp; 291 tp->t_disp_queue = cp->cpu_disp; 292 293 /* 294 * Setup thread to start in mp_startup. 295 */ 296 sp = tp->t_stk; 297 tp->t_pc = (uintptr_t)mp_startup; 298 tp->t_sp = (uintptr_t)(sp - MINFRAME); 299 #if defined(__amd64) 300 tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */ 301 #endif 302 303 cp->cpu_id = cpun; 304 cp->cpu_self = cp; 305 cp->cpu_thread = tp; 306 cp->cpu_lwp = NULL; 307 cp->cpu_dispthread = tp; 308 cp->cpu_dispatch_pri = DISP_PRIO(tp); 309 310 /* 311 * cpu_base_spl must be set explicitly here to prevent any blocking 312 * operations in mp_startup from causing the spl of the cpu to drop 313 * to 0 (allowing device interrupts before we're ready) in resume(). 314 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY. 315 * As an extra bit of security on DEBUG kernels, this is enforced with 316 * an assertion in mp_startup() -- before cpu_base_spl is set to its 317 * proper value. 318 */ 319 cp->cpu_base_spl = ipltospl(LOCK_LEVEL); 320 321 /* 322 * Now, initialize per-CPU idle thread for this CPU. 323 */ 324 tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1); 325 326 cp->cpu_idle_thread = tp; 327 328 tp->t_preempt = 1; 329 tp->t_bound_cpu = cp; 330 tp->t_affinitycnt = 1; 331 tp->t_cpu = cp; 332 tp->t_disp_queue = cp->cpu_disp; 333 334 /* 335 * Bootstrap the CPU's PG data 336 */ 337 pg_cpu_bootstrap(cp); 338 339 /* 340 * Perform CPC initialization on the new CPU. 341 */ 342 kcpc_hw_init(cp); 343 344 /* 345 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2 346 * for each CPU. 347 */ 348 setup_vaddr_for_ppcopy(cp); 349 350 /* 351 * Allocate page for new GDT and initialize from current GDT. 352 */ 353 #if !defined(__lint) 354 ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE); 355 #endif 356 cp->cpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP); 357 bcopy(CPU->cpu_gdt, cp->cpu_gdt, (sizeof (*cp->cpu_gdt) * NGDT)); 358 359 #if defined(__i386) 360 /* 361 * setup kernel %gs. 362 */ 363 set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, 364 SEL_KPL, 0, 1); 365 #endif 366 367 /* 368 * If we have more than one node, each cpu gets a copy of IDT 369 * local to its node. If this is a Pentium box, we use cpu 0's 370 * IDT. cpu 0's IDT has been made read-only to workaround the 371 * cmpxchgl register bug 372 */ 373 if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) { 374 #if !defined(__lint) 375 ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); 376 #endif 377 cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP); 378 bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); 379 } else { 380 cp->cpu_idt = CPU->cpu_idt; 381 } 382 383 /* 384 * Get interrupt priority data from cpu 0. 385 */ 386 cp->cpu_pri_data = CPU->cpu_pri_data; 387 388 /* 389 * alloc space for cpuid info 390 */ 391 cpuid_alloc_space(cp); 392 393 #if !defined(__xpv) 394 /* 395 * alloc space for ucode_info 396 */ 397 ucode_alloc_space(cp); 398 #endif 399 400 hat_cpu_online(cp); 401 402 #ifdef TRAPTRACE 403 /* 404 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers 405 */ 406 ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP); 407 ttc->ttc_next = ttc->ttc_first; 408 ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize; 409 #endif 410 /* 411 * Record that we have another CPU. 412 */ 413 mutex_enter(&cpu_lock); 414 /* 415 * Initialize the interrupt threads for this CPU 416 */ 417 cpu_intr_alloc(cp, NINTR_THREADS); 418 /* 419 * Add CPU to list of available CPUs. It'll be on the active list 420 * after mp_startup(). 421 */ 422 cpu_add_unit(cp); 423 mutex_exit(&cpu_lock); 424 425 return (cp); 426 } 427 428 /* 429 * Undo what was done in mp_startup_init 430 */ 431 static void 432 mp_startup_fini(struct cpu *cp, int error) 433 { 434 mutex_enter(&cpu_lock); 435 436 /* 437 * Remove the CPU from the list of available CPUs. 438 */ 439 cpu_del_unit(cp->cpu_id); 440 441 if (error == ETIMEDOUT) { 442 /* 443 * The cpu was started, but never *seemed* to run any 444 * code in the kernel; it's probably off spinning in its 445 * own private world, though with potential references to 446 * our kmem-allocated IDTs and GDTs (for example). 447 * 448 * Worse still, it may actually wake up some time later, 449 * so rather than guess what it might or might not do, we 450 * leave the fundamental data structures intact. 451 */ 452 cp->cpu_flags = 0; 453 mutex_exit(&cpu_lock); 454 return; 455 } 456 457 /* 458 * At this point, the only threads bound to this CPU should 459 * special per-cpu threads: it's idle thread, it's pause threads, 460 * and it's interrupt threads. Clean these up. 461 */ 462 cpu_destroy_bound_threads(cp); 463 cp->cpu_idle_thread = NULL; 464 465 /* 466 * Free the interrupt stack. 467 */ 468 segkp_release(segkp, 469 cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME))); 470 471 mutex_exit(&cpu_lock); 472 473 #ifdef TRAPTRACE 474 /* 475 * Discard the trap trace buffer 476 */ 477 { 478 trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id]; 479 480 kmem_free((void *)ttc->ttc_first, trap_trace_bufsize); 481 ttc->ttc_first = NULL; 482 } 483 #endif 484 485 hat_cpu_offline(cp); 486 487 cpuid_free_space(cp); 488 489 #if !defined(__xpv) 490 ucode_free_space(cp); 491 #endif 492 493 if (cp->cpu_idt != CPU->cpu_idt) 494 kmem_free(cp->cpu_idt, PAGESIZE); 495 cp->cpu_idt = NULL; 496 497 kmem_free(cp->cpu_gdt, PAGESIZE); 498 cp->cpu_gdt = NULL; 499 500 teardown_vaddr_for_ppcopy(cp); 501 502 kcpc_hw_fini(cp); 503 504 cp->cpu_dispthread = NULL; 505 cp->cpu_thread = NULL; /* discarded by cpu_destroy_bound_threads() */ 506 507 cpu_vm_data_destroy(cp); 508 509 mutex_enter(&cpu_lock); 510 disp_cpu_fini(cp); 511 mutex_exit(&cpu_lock); 512 513 #if !defined(__xpv) 514 if (cp->cpu_m.mcpu_mwait != NULL) 515 cpuid_mwait_free(cp); 516 #endif 517 kmem_free(cp, sizeof (*cp)); 518 } 519 520 /* 521 * Apply workarounds for known errata, and warn about those that are absent. 522 * 523 * System vendors occasionally create configurations which contain different 524 * revisions of the CPUs that are almost but not exactly the same. At the 525 * time of writing, this meant that their clock rates were the same, their 526 * feature sets were the same, but the required workaround were -not- 527 * necessarily the same. So, this routine is invoked on -every- CPU soon 528 * after starting to make sure that the resulting system contains the most 529 * pessimal set of workarounds needed to cope with *any* of the CPUs in the 530 * system. 531 * 532 * workaround_errata is invoked early in mlsetup() for CPU 0, and in 533 * mp_startup() for all slave CPUs. Slaves process workaround_errata prior 534 * to acknowledging their readiness to the master, so this routine will 535 * never be executed by multiple CPUs in parallel, thus making updates to 536 * global data safe. 537 * 538 * These workarounds are based on Rev 3.57 of the Revision Guide for 539 * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005. 540 */ 541 542 #if defined(OPTERON_ERRATUM_88) 543 int opteron_erratum_88; /* if non-zero -> at least one cpu has it */ 544 #endif 545 546 #if defined(OPTERON_ERRATUM_91) 547 int opteron_erratum_91; /* if non-zero -> at least one cpu has it */ 548 #endif 549 550 #if defined(OPTERON_ERRATUM_93) 551 int opteron_erratum_93; /* if non-zero -> at least one cpu has it */ 552 #endif 553 554 #if defined(OPTERON_ERRATUM_95) 555 int opteron_erratum_95; /* if non-zero -> at least one cpu has it */ 556 #endif 557 558 #if defined(OPTERON_ERRATUM_100) 559 int opteron_erratum_100; /* if non-zero -> at least one cpu has it */ 560 #endif 561 562 #if defined(OPTERON_ERRATUM_108) 563 int opteron_erratum_108; /* if non-zero -> at least one cpu has it */ 564 #endif 565 566 #if defined(OPTERON_ERRATUM_109) 567 int opteron_erratum_109; /* if non-zero -> at least one cpu has it */ 568 #endif 569 570 #if defined(OPTERON_ERRATUM_121) 571 int opteron_erratum_121; /* if non-zero -> at least one cpu has it */ 572 #endif 573 574 #if defined(OPTERON_ERRATUM_122) 575 int opteron_erratum_122; /* if non-zero -> at least one cpu has it */ 576 #endif 577 578 #if defined(OPTERON_ERRATUM_123) 579 int opteron_erratum_123; /* if non-zero -> at least one cpu has it */ 580 #endif 581 582 #if defined(OPTERON_ERRATUM_131) 583 int opteron_erratum_131; /* if non-zero -> at least one cpu has it */ 584 #endif 585 586 #if defined(OPTERON_WORKAROUND_6336786) 587 int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */ 588 int opteron_workaround_6336786_UP = 0; /* Not needed for UP */ 589 #endif 590 591 #if defined(OPTERON_WORKAROUND_6323525) 592 int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ 593 #endif 594 595 static void 596 workaround_warning(cpu_t *cp, uint_t erratum) 597 { 598 cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u", 599 cp->cpu_id, erratum); 600 } 601 602 static void 603 workaround_applied(uint_t erratum) 604 { 605 if (erratum > 1000000) 606 cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n", 607 erratum); 608 else 609 cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n", 610 erratum); 611 } 612 613 static void 614 msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error) 615 { 616 cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d", 617 cp->cpu_id, rw, msr, error); 618 } 619 620 /* 621 * Determine the number of nodes in an Opteron / Greyhound family system. 622 */ 623 static uint_t 624 opteron_get_nnodes(void) 625 { 626 static uint_t nnodes = 0; 627 628 #ifdef DEBUG 629 uint_t family; 630 631 family = cpuid_getfamily(CPU); 632 ASSERT(family == 0xf || family == 0x10); 633 #endif /* DEBUG */ 634 635 if (nnodes == 0) { 636 /* 637 * Obtain the number of nodes in the system from 638 * bits [6:4] of the Node ID register on node 0. 639 * 640 * The actual node count is NodeID[6:4] + 1 641 * 642 * The Node ID register is accessed via function 0, 643 * offset 0x60. Node 0 is device 24. 644 */ 645 nnodes = ((pci_getl_func(0, 24, 0, 0x60) & 0x70) >> 4) + 1; 646 } 647 return (nnodes); 648 } 649 650 #if defined(__xpv) 651 652 /* 653 * On dom0, we can determine the number of physical cpus on the machine. 654 * This number is important when figuring out what workarounds are 655 * appropriate, so compute it now. 656 */ 657 uint_t 658 xen_get_nphyscpus(void) 659 { 660 static uint_t nphyscpus = 0; 661 662 ASSERT(DOMAIN_IS_INITDOMAIN(xen_info)); 663 664 if (nphyscpus == 0) { 665 xen_sysctl_t op; 666 xen_sysctl_physinfo_t *pi = &op.u.physinfo; 667 668 op.cmd = XEN_SYSCTL_physinfo; 669 op.interface_version = XEN_SYSCTL_INTERFACE_VERSION; 670 if (HYPERVISOR_sysctl(&op) == 0) 671 nphyscpus = pi->threads_per_core * 672 pi->cores_per_socket * pi->sockets_per_node * 673 pi->nr_nodes; 674 } 675 return (nphyscpus); 676 } 677 #endif 678 679 uint_t 680 workaround_errata(struct cpu *cpu) 681 { 682 uint_t missing = 0; 683 684 ASSERT(cpu == CPU); 685 686 /*LINTED*/ 687 if (cpuid_opteron_erratum(cpu, 88) > 0) { 688 /* 689 * SWAPGS May Fail To Read Correct GS Base 690 */ 691 #if defined(OPTERON_ERRATUM_88) 692 /* 693 * The workaround is an mfence in the relevant assembler code 694 */ 695 opteron_erratum_88++; 696 #else 697 workaround_warning(cpu, 88); 698 missing++; 699 #endif 700 } 701 702 if (cpuid_opteron_erratum(cpu, 91) > 0) { 703 /* 704 * Software Prefetches May Report A Page Fault 705 */ 706 #if defined(OPTERON_ERRATUM_91) 707 /* 708 * fix is in trap.c 709 */ 710 opteron_erratum_91++; 711 #else 712 workaround_warning(cpu, 91); 713 missing++; 714 #endif 715 } 716 717 if (cpuid_opteron_erratum(cpu, 93) > 0) { 718 /* 719 * RSM Auto-Halt Restart Returns to Incorrect RIP 720 */ 721 #if defined(OPTERON_ERRATUM_93) 722 /* 723 * fix is in trap.c 724 */ 725 opteron_erratum_93++; 726 #else 727 workaround_warning(cpu, 93); 728 missing++; 729 #endif 730 } 731 732 /*LINTED*/ 733 if (cpuid_opteron_erratum(cpu, 95) > 0) { 734 /* 735 * RET Instruction May Return to Incorrect EIP 736 */ 737 #if defined(OPTERON_ERRATUM_95) 738 #if defined(_LP64) 739 /* 740 * Workaround this by ensuring that 32-bit user code and 741 * 64-bit kernel code never occupy the same address 742 * range mod 4G. 743 */ 744 if (_userlimit32 > 0xc0000000ul) 745 *(uintptr_t *)&_userlimit32 = 0xc0000000ul; 746 747 /*LINTED*/ 748 ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u); 749 opteron_erratum_95++; 750 #endif /* _LP64 */ 751 #else 752 workaround_warning(cpu, 95); 753 missing++; 754 #endif 755 } 756 757 if (cpuid_opteron_erratum(cpu, 100) > 0) { 758 /* 759 * Compatibility Mode Branches Transfer to Illegal Address 760 */ 761 #if defined(OPTERON_ERRATUM_100) 762 /* 763 * fix is in trap.c 764 */ 765 opteron_erratum_100++; 766 #else 767 workaround_warning(cpu, 100); 768 missing++; 769 #endif 770 } 771 772 /*LINTED*/ 773 if (cpuid_opteron_erratum(cpu, 108) > 0) { 774 /* 775 * CPUID Instruction May Return Incorrect Model Number In 776 * Some Processors 777 */ 778 #if defined(OPTERON_ERRATUM_108) 779 /* 780 * (Our cpuid-handling code corrects the model number on 781 * those processors) 782 */ 783 #else 784 workaround_warning(cpu, 108); 785 missing++; 786 #endif 787 } 788 789 /*LINTED*/ 790 if (cpuid_opteron_erratum(cpu, 109) > 0) do { 791 /* 792 * Certain Reverse REP MOVS May Produce Unpredictable Behaviour 793 */ 794 #if defined(OPTERON_ERRATUM_109) 795 /* 796 * The "workaround" is to print a warning to upgrade the BIOS 797 */ 798 uint64_t value; 799 const uint_t msr = MSR_AMD_PATCHLEVEL; 800 int err; 801 802 if ((err = checked_rdmsr(msr, &value)) != 0) { 803 msr_warning(cpu, "rd", msr, err); 804 workaround_warning(cpu, 109); 805 missing++; 806 } 807 if (value == 0) 808 opteron_erratum_109++; 809 #else 810 workaround_warning(cpu, 109); 811 missing++; 812 #endif 813 /*CONSTANTCONDITION*/ 814 } while (0); 815 816 /*LINTED*/ 817 if (cpuid_opteron_erratum(cpu, 121) > 0) { 818 /* 819 * Sequential Execution Across Non_Canonical Boundary Caused 820 * Processor Hang 821 */ 822 #if defined(OPTERON_ERRATUM_121) 823 #if defined(_LP64) 824 /* 825 * Erratum 121 is only present in long (64 bit) mode. 826 * Workaround is to include the page immediately before the 827 * va hole to eliminate the possibility of system hangs due to 828 * sequential execution across the va hole boundary. 829 */ 830 if (opteron_erratum_121) 831 opteron_erratum_121++; 832 else { 833 if (hole_start) { 834 hole_start -= PAGESIZE; 835 } else { 836 /* 837 * hole_start not yet initialized by 838 * mmu_init. Initialize hole_start 839 * with value to be subtracted. 840 */ 841 hole_start = PAGESIZE; 842 } 843 opteron_erratum_121++; 844 } 845 #endif /* _LP64 */ 846 #else 847 workaround_warning(cpu, 121); 848 missing++; 849 #endif 850 } 851 852 /*LINTED*/ 853 if (cpuid_opteron_erratum(cpu, 122) > 0) do { 854 /* 855 * TLB Flush Filter May Cause Coherency Problem in 856 * Multiprocessor Systems 857 */ 858 #if defined(OPTERON_ERRATUM_122) 859 uint64_t value; 860 const uint_t msr = MSR_AMD_HWCR; 861 int error; 862 863 /* 864 * Erratum 122 is only present in MP configurations (multi-core 865 * or multi-processor). 866 */ 867 #if defined(__xpv) 868 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 869 break; 870 if (!opteron_erratum_122 && xen_get_nphyscpus() == 1) 871 break; 872 #else 873 if (!opteron_erratum_122 && opteron_get_nnodes() == 1 && 874 cpuid_get_ncpu_per_chip(cpu) == 1) 875 break; 876 #endif 877 /* disable TLB Flush Filter */ 878 879 if ((error = checked_rdmsr(msr, &value)) != 0) { 880 msr_warning(cpu, "rd", msr, error); 881 workaround_warning(cpu, 122); 882 missing++; 883 } else { 884 value |= (uint64_t)AMD_HWCR_FFDIS; 885 if ((error = checked_wrmsr(msr, value)) != 0) { 886 msr_warning(cpu, "wr", msr, error); 887 workaround_warning(cpu, 122); 888 missing++; 889 } 890 } 891 opteron_erratum_122++; 892 #else 893 workaround_warning(cpu, 122); 894 missing++; 895 #endif 896 /*CONSTANTCONDITION*/ 897 } while (0); 898 899 /*LINTED*/ 900 if (cpuid_opteron_erratum(cpu, 123) > 0) do { 901 /* 902 * Bypassed Reads May Cause Data Corruption of System Hang in 903 * Dual Core Processors 904 */ 905 #if defined(OPTERON_ERRATUM_123) 906 uint64_t value; 907 const uint_t msr = MSR_AMD_PATCHLEVEL; 908 int err; 909 910 /* 911 * Erratum 123 applies only to multi-core cpus. 912 */ 913 if (cpuid_get_ncpu_per_chip(cpu) < 2) 914 break; 915 #if defined(__xpv) 916 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 917 break; 918 #endif 919 /* 920 * The "workaround" is to print a warning to upgrade the BIOS 921 */ 922 if ((err = checked_rdmsr(msr, &value)) != 0) { 923 msr_warning(cpu, "rd", msr, err); 924 workaround_warning(cpu, 123); 925 missing++; 926 } 927 if (value == 0) 928 opteron_erratum_123++; 929 #else 930 workaround_warning(cpu, 123); 931 missing++; 932 933 #endif 934 /*CONSTANTCONDITION*/ 935 } while (0); 936 937 /*LINTED*/ 938 if (cpuid_opteron_erratum(cpu, 131) > 0) do { 939 /* 940 * Multiprocessor Systems with Four or More Cores May Deadlock 941 * Waiting for a Probe Response 942 */ 943 #if defined(OPTERON_ERRATUM_131) 944 uint64_t nbcfg; 945 const uint_t msr = MSR_AMD_NB_CFG; 946 const uint64_t wabits = 947 AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR; 948 int error; 949 950 /* 951 * Erratum 131 applies to any system with four or more cores. 952 */ 953 if (opteron_erratum_131) 954 break; 955 #if defined(__xpv) 956 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 957 break; 958 if (xen_get_nphyscpus() < 4) 959 break; 960 #else 961 if (opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu) < 4) 962 break; 963 #endif 964 /* 965 * Print a warning if neither of the workarounds for 966 * erratum 131 is present. 967 */ 968 if ((error = checked_rdmsr(msr, &nbcfg)) != 0) { 969 msr_warning(cpu, "rd", msr, error); 970 workaround_warning(cpu, 131); 971 missing++; 972 } else if ((nbcfg & wabits) == 0) { 973 opteron_erratum_131++; 974 } else { 975 /* cannot have both workarounds set */ 976 ASSERT((nbcfg & wabits) != wabits); 977 } 978 #else 979 workaround_warning(cpu, 131); 980 missing++; 981 #endif 982 /*CONSTANTCONDITION*/ 983 } while (0); 984 985 /* 986 * This isn't really an erratum, but for convenience the 987 * detection/workaround code lives here and in cpuid_opteron_erratum. 988 */ 989 if (cpuid_opteron_erratum(cpu, 6336786) > 0) { 990 #if defined(OPTERON_WORKAROUND_6336786) 991 /* 992 * Disable C1-Clock ramping on multi-core/multi-processor 993 * K8 platforms to guard against TSC drift. 994 */ 995 if (opteron_workaround_6336786) { 996 opteron_workaround_6336786++; 997 #if defined(__xpv) 998 } else if ((DOMAIN_IS_INITDOMAIN(xen_info) && 999 xen_get_nphyscpus() > 1) || 1000 opteron_workaround_6336786_UP) { 1001 /* 1002 * XXPV Hmm. We can't walk the Northbridges on 1003 * the hypervisor; so just complain and drive 1004 * on. This probably needs to be fixed in 1005 * the hypervisor itself. 1006 */ 1007 opteron_workaround_6336786++; 1008 workaround_warning(cpu, 6336786); 1009 #else /* __xpv */ 1010 } else if ((opteron_get_nnodes() * 1011 cpuid_get_ncpu_per_chip(cpu) > 1) || 1012 opteron_workaround_6336786_UP) { 1013 1014 uint_t node, nnodes; 1015 uint8_t data; 1016 1017 nnodes = opteron_get_nnodes(); 1018 for (node = 0; node < nnodes; node++) { 1019 /* 1020 * Clear PMM7[1:0] (function 3, offset 0x87) 1021 * Northbridge device is the node id + 24. 1022 */ 1023 data = pci_getb_func(0, node + 24, 3, 0x87); 1024 data &= 0xFC; 1025 pci_putb_func(0, node + 24, 3, 0x87, data); 1026 } 1027 opteron_workaround_6336786++; 1028 #endif /* __xpv */ 1029 } 1030 #else 1031 workaround_warning(cpu, 6336786); 1032 missing++; 1033 #endif 1034 } 1035 1036 /*LINTED*/ 1037 /* 1038 * Mutex primitives don't work as expected. 1039 */ 1040 if (cpuid_opteron_erratum(cpu, 6323525) > 0) { 1041 #if defined(OPTERON_WORKAROUND_6323525) 1042 /* 1043 * This problem only occurs with 2 or more cores. If bit in 1044 * MSR_BU_CFG set, then not applicable. The workaround 1045 * is to patch the semaphone routines with the lfence 1046 * instruction to provide necessary load memory barrier with 1047 * possible subsequent read-modify-write ops. 1048 * 1049 * It is too early in boot to call the patch routine so 1050 * set erratum variable to be done in startup_end(). 1051 */ 1052 if (opteron_workaround_6323525) { 1053 opteron_workaround_6323525++; 1054 #if defined(__xpv) 1055 } else if (x86_feature & X86_SSE2) { 1056 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1057 /* 1058 * XXPV Use dom0_msr here when extended 1059 * operations are supported? 1060 */ 1061 if (xen_get_nphyscpus() > 1) 1062 opteron_workaround_6323525++; 1063 } else { 1064 /* 1065 * We have no way to tell how many physical 1066 * cpus there are, or even if this processor 1067 * has the problem, so enable the workaround 1068 * unconditionally (at some performance cost). 1069 */ 1070 opteron_workaround_6323525++; 1071 } 1072 #else /* __xpv */ 1073 } else if ((x86_feature & X86_SSE2) && ((opteron_get_nnodes() * 1074 cpuid_get_ncpu_per_chip(cpu)) > 1)) { 1075 if ((xrdmsr(MSR_BU_CFG) & 0x02) == 0) 1076 opteron_workaround_6323525++; 1077 #endif /* __xpv */ 1078 } 1079 #else 1080 workaround_warning(cpu, 6323525); 1081 missing++; 1082 #endif 1083 } 1084 1085 #ifdef __xpv 1086 return (0); 1087 #else 1088 return (missing); 1089 #endif 1090 } 1091 1092 void 1093 workaround_errata_end() 1094 { 1095 #if defined(OPTERON_ERRATUM_88) 1096 if (opteron_erratum_88) 1097 workaround_applied(88); 1098 #endif 1099 #if defined(OPTERON_ERRATUM_91) 1100 if (opteron_erratum_91) 1101 workaround_applied(91); 1102 #endif 1103 #if defined(OPTERON_ERRATUM_93) 1104 if (opteron_erratum_93) 1105 workaround_applied(93); 1106 #endif 1107 #if defined(OPTERON_ERRATUM_95) 1108 if (opteron_erratum_95) 1109 workaround_applied(95); 1110 #endif 1111 #if defined(OPTERON_ERRATUM_100) 1112 if (opteron_erratum_100) 1113 workaround_applied(100); 1114 #endif 1115 #if defined(OPTERON_ERRATUM_108) 1116 if (opteron_erratum_108) 1117 workaround_applied(108); 1118 #endif 1119 #if defined(OPTERON_ERRATUM_109) 1120 if (opteron_erratum_109) { 1121 cmn_err(CE_WARN, 1122 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1123 " processor\nerratum 109 was not detected; updating your" 1124 " system's BIOS to a version\ncontaining this" 1125 " microcode patch is HIGHLY recommended or erroneous" 1126 " system\noperation may occur.\n"); 1127 } 1128 #endif 1129 #if defined(OPTERON_ERRATUM_121) 1130 if (opteron_erratum_121) 1131 workaround_applied(121); 1132 #endif 1133 #if defined(OPTERON_ERRATUM_122) 1134 if (opteron_erratum_122) 1135 workaround_applied(122); 1136 #endif 1137 #if defined(OPTERON_ERRATUM_123) 1138 if (opteron_erratum_123) { 1139 cmn_err(CE_WARN, 1140 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1141 " processor\nerratum 123 was not detected; updating your" 1142 " system's BIOS to a version\ncontaining this" 1143 " microcode patch is HIGHLY recommended or erroneous" 1144 " system\noperation may occur.\n"); 1145 } 1146 #endif 1147 #if defined(OPTERON_ERRATUM_131) 1148 if (opteron_erratum_131) { 1149 cmn_err(CE_WARN, 1150 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1151 " processor\nerratum 131 was not detected; updating your" 1152 " system's BIOS to a version\ncontaining this" 1153 " microcode patch is HIGHLY recommended or erroneous" 1154 " system\noperation may occur.\n"); 1155 } 1156 #endif 1157 #if defined(OPTERON_WORKAROUND_6336786) 1158 if (opteron_workaround_6336786) 1159 workaround_applied(6336786); 1160 #endif 1161 #if defined(OPTERON_WORKAROUND_6323525) 1162 if (opteron_workaround_6323525) 1163 workaround_applied(6323525); 1164 #endif 1165 } 1166 1167 static cpuset_t procset; 1168 1169 /* 1170 * Start a single cpu, assuming that the kernel context is available 1171 * to successfully start another cpu. 1172 * 1173 * (For example, real mode code is mapped into the right place 1174 * in memory and is ready to be run.) 1175 */ 1176 int 1177 start_cpu(processorid_t who) 1178 { 1179 void *ctx; 1180 cpu_t *cp; 1181 int delays; 1182 int error = 0; 1183 1184 ASSERT(who != 0); 1185 1186 /* 1187 * Check if there's at least a Mbyte of kmem available 1188 * before attempting to start the cpu. 1189 */ 1190 if (kmem_avail() < 1024 * 1024) { 1191 /* 1192 * Kick off a reap in case that helps us with 1193 * later attempts .. 1194 */ 1195 kmem_reap(); 1196 return (ENOMEM); 1197 } 1198 1199 cp = mp_startup_init(who); 1200 if ((ctx = mach_cpucontext_alloc(cp)) == NULL || 1201 (error = mach_cpu_start(cp, ctx)) != 0) { 1202 1203 /* 1204 * Something went wrong before we even started it 1205 */ 1206 if (ctx) 1207 cmn_err(CE_WARN, 1208 "cpu%d: failed to start error %d", 1209 cp->cpu_id, error); 1210 else 1211 cmn_err(CE_WARN, 1212 "cpu%d: failed to allocate context", cp->cpu_id); 1213 1214 if (ctx) 1215 mach_cpucontext_free(cp, ctx, error); 1216 else 1217 error = EAGAIN; /* hmm. */ 1218 mp_startup_fini(cp, error); 1219 return (error); 1220 } 1221 1222 for (delays = 0; !CPU_IN_SET(procset, who); delays++) { 1223 if (delays == 500) { 1224 /* 1225 * After five seconds, things are probably looking 1226 * a bit bleak - explain the hang. 1227 */ 1228 cmn_err(CE_NOTE, "cpu%d: started, " 1229 "but not running in the kernel yet", who); 1230 } else if (delays > 2000) { 1231 /* 1232 * We waited at least 20 seconds, bail .. 1233 */ 1234 error = ETIMEDOUT; 1235 cmn_err(CE_WARN, "cpu%d: timed out", who); 1236 mach_cpucontext_free(cp, ctx, error); 1237 mp_startup_fini(cp, error); 1238 return (error); 1239 } 1240 1241 /* 1242 * wait at least 10ms, then check again.. 1243 */ 1244 delay(USEC_TO_TICK_ROUNDUP(10000)); 1245 } 1246 1247 mach_cpucontext_free(cp, ctx, 0); 1248 1249 #ifndef __xpv 1250 if (tsc_gethrtime_enable) 1251 tsc_sync_master(who); 1252 #endif 1253 1254 if (dtrace_cpu_init != NULL) { 1255 /* 1256 * DTrace CPU initialization expects cpu_lock to be held. 1257 */ 1258 mutex_enter(&cpu_lock); 1259 (*dtrace_cpu_init)(who); 1260 mutex_exit(&cpu_lock); 1261 } 1262 1263 while (!CPU_IN_SET(cpu_ready_set, who)) 1264 delay(1); 1265 1266 return (0); 1267 } 1268 1269 1270 /*ARGSUSED*/ 1271 void 1272 start_other_cpus(int cprboot) 1273 { 1274 uint_t who; 1275 uint_t skipped = 0; 1276 uint_t bootcpuid = 0; 1277 1278 /* 1279 * Initialize our own cpu_info. 1280 */ 1281 init_cpu_info(CPU); 1282 1283 /* 1284 * Initialize our syscall handlers 1285 */ 1286 init_cpu_syscall(CPU); 1287 1288 /* 1289 * Take the boot cpu out of the mp_cpus set because we know 1290 * it's already running. Add it to the cpu_ready_set for 1291 * precisely the same reason. 1292 */ 1293 CPUSET_DEL(mp_cpus, bootcpuid); 1294 CPUSET_ADD(cpu_ready_set, bootcpuid); 1295 1296 /* 1297 * if only 1 cpu or not using MP, skip the rest of this 1298 */ 1299 if (CPUSET_ISNULL(mp_cpus) || use_mp == 0) { 1300 if (use_mp == 0) 1301 cmn_err(CE_CONT, "?***** Not in MP mode\n"); 1302 goto done; 1303 } 1304 1305 /* 1306 * perform such initialization as is needed 1307 * to be able to take CPUs on- and off-line. 1308 */ 1309 cpu_pause_init(); 1310 1311 xc_init(); /* initialize processor crosscalls */ 1312 1313 if (mach_cpucontext_init() != 0) 1314 goto done; 1315 1316 flushes_require_xcalls = 1; 1317 1318 /* 1319 * We lock our affinity to the master CPU to ensure that all slave CPUs 1320 * do their TSC syncs with the same CPU. 1321 */ 1322 affinity_set(CPU_CURRENT); 1323 1324 for (who = 0; who < NCPU; who++) { 1325 1326 if (!CPU_IN_SET(mp_cpus, who)) 1327 continue; 1328 ASSERT(who != bootcpuid); 1329 if (ncpus >= max_ncpus) { 1330 skipped = who; 1331 continue; 1332 } 1333 if (start_cpu(who) != 0) 1334 CPUSET_DEL(mp_cpus, who); 1335 } 1336 1337 #if !defined(__xpv) 1338 /* Free the space allocated to hold the microcode file */ 1339 ucode_free(); 1340 #endif 1341 1342 affinity_clear(); 1343 1344 if (skipped) { 1345 cmn_err(CE_NOTE, 1346 "System detected %d cpus, but " 1347 "only %d cpu(s) were enabled during boot.", 1348 skipped + 1, ncpus); 1349 cmn_err(CE_NOTE, 1350 "Use \"boot-ncpus\" parameter to enable more CPU(s). " 1351 "See eeprom(1M)."); 1352 } 1353 1354 done: 1355 workaround_errata_end(); 1356 mach_cpucontext_fini(); 1357 1358 cmi_post_mpstartup(); 1359 } 1360 1361 /* 1362 * Dummy functions - no i86pc platforms support dynamic cpu allocation. 1363 */ 1364 /*ARGSUSED*/ 1365 int 1366 mp_cpu_configure(int cpuid) 1367 { 1368 return (ENOTSUP); /* not supported */ 1369 } 1370 1371 /*ARGSUSED*/ 1372 int 1373 mp_cpu_unconfigure(int cpuid) 1374 { 1375 return (ENOTSUP); /* not supported */ 1376 } 1377 1378 /* 1379 * Startup function for 'other' CPUs (besides boot cpu). 1380 * Called from real_mode_start. 1381 * 1382 * WARNING: until CPU_READY is set, mp_startup and routines called by 1383 * mp_startup should not call routines (e.g. kmem_free) that could call 1384 * hat_unload which requires CPU_READY to be set. 1385 */ 1386 void 1387 mp_startup(void) 1388 { 1389 struct cpu *cp = CPU; 1390 uint_t new_x86_feature; 1391 1392 /* 1393 * We need to get TSC on this proc synced (i.e., any delta 1394 * from cpu0 accounted for) as soon as we can, because many 1395 * many things use gethrtime/pc_gethrestime, including 1396 * interrupts, cmn_err, etc. 1397 */ 1398 1399 /* Let cpu0 continue into tsc_sync_master() */ 1400 CPUSET_ATOMIC_ADD(procset, cp->cpu_id); 1401 1402 #ifndef __xpv 1403 if (tsc_gethrtime_enable) 1404 tsc_sync_slave(); 1405 #endif 1406 1407 /* 1408 * Once this was done from assembly, but it's safer here; if 1409 * it blocks, we need to be able to swtch() to and from, and 1410 * since we get here by calling t_pc, we need to do that call 1411 * before swtch() overwrites it. 1412 */ 1413 1414 (void) (*ap_mlsetup)(); 1415 1416 new_x86_feature = cpuid_pass1(cp); 1417 1418 #ifndef __xpv 1419 /* 1420 * Program this cpu's PAT 1421 */ 1422 if (x86_feature & X86_PAT) 1423 pat_sync(); 1424 #endif 1425 1426 /* 1427 * Set up TSC_AUX to contain the cpuid for this processor 1428 * for the rdtscp instruction. 1429 */ 1430 if (x86_feature & X86_TSCP) 1431 (void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id); 1432 1433 /* 1434 * Initialize this CPU's syscall handlers 1435 */ 1436 init_cpu_syscall(cp); 1437 1438 /* 1439 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the 1440 * highest level at which a routine is permitted to block on 1441 * an adaptive mutex (allows for cpu poke interrupt in case 1442 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks 1443 * device interrupts that may end up in the hat layer issuing cross 1444 * calls before CPU_READY is set. 1445 */ 1446 splx(ipltospl(LOCK_LEVEL)); 1447 sti(); 1448 1449 /* 1450 * Do a sanity check to make sure this new CPU is a sane thing 1451 * to add to the collection of processors running this system. 1452 * 1453 * XXX Clearly this needs to get more sophisticated, if x86 1454 * systems start to get built out of heterogenous CPUs; as is 1455 * likely to happen once the number of processors in a configuration 1456 * gets large enough. 1457 */ 1458 if ((x86_feature & new_x86_feature) != x86_feature) { 1459 cmn_err(CE_CONT, "?cpu%d: %b\n", 1460 cp->cpu_id, new_x86_feature, FMT_X86_FEATURE); 1461 cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id); 1462 } 1463 1464 /* 1465 * We do not support cpus with mixed monitor/mwait support if the 1466 * boot cpu supports monitor/mwait. 1467 */ 1468 if ((x86_feature & ~new_x86_feature) & X86_MWAIT) 1469 panic("unsupported mixed cpu monitor/mwait support detected"); 1470 1471 /* 1472 * We could be more sophisticated here, and just mark the CPU 1473 * as "faulted" but at this point we'll opt for the easier 1474 * answer of dieing horribly. Provided the boot cpu is ok, 1475 * the system can be recovered by booting with use_mp set to zero. 1476 */ 1477 if (workaround_errata(cp) != 0) 1478 panic("critical workaround(s) missing for cpu%d", cp->cpu_id); 1479 1480 cpuid_pass2(cp); 1481 cpuid_pass3(cp); 1482 (void) cpuid_pass4(cp); 1483 1484 init_cpu_info(cp); 1485 1486 mutex_enter(&cpu_lock); 1487 /* 1488 * Processor group initialization for this CPU is dependent on the 1489 * cpuid probing, which must be done in the context of the current 1490 * CPU. 1491 */ 1492 pghw_physid_create(cp); 1493 pg_cpu_init(cp); 1494 pg_cmt_cpu_startup(cp); 1495 1496 cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_ENABLE | CPU_EXISTS; 1497 cpu_add_active(cp); 1498 1499 if (dtrace_cpu_init != NULL) { 1500 (*dtrace_cpu_init)(cp->cpu_id); 1501 } 1502 1503 #if !defined(__xpv) 1504 /* 1505 * Fill out cpu_ucode_info. Update microcode if necessary. 1506 */ 1507 ucode_check(cp); 1508 #endif 1509 1510 mutex_exit(&cpu_lock); 1511 1512 /* 1513 * Enable preemption here so that contention for any locks acquired 1514 * later in mp_startup may be preempted if the thread owning those 1515 * locks is continously executing on other CPUs (for example, this 1516 * CPU must be preemptible to allow other CPUs to pause it during their 1517 * startup phases). It's safe to enable preemption here because the 1518 * CPU state is pretty-much fully constructed. 1519 */ 1520 curthread->t_preempt = 0; 1521 1522 add_cpunode2devtree(cp->cpu_id, cp->cpu_m.mcpu_cpi); 1523 1524 /* The base spl should still be at LOCK LEVEL here */ 1525 ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL)); 1526 set_base_spl(); /* Restore the spl to its proper value */ 1527 1528 (void) spl0(); /* enable interrupts */ 1529 1530 #ifndef __xpv 1531 { 1532 /* 1533 * Set up the CPU module for this CPU. This can't be done 1534 * before this CPU is made CPU_READY, because we may (in 1535 * heterogeneous systems) need to go load another CPU module. 1536 * The act of attempting to load a module may trigger a 1537 * cross-call, which will ASSERT unless this cpu is CPU_READY. 1538 */ 1539 cmi_hdl_t hdl; 1540 1541 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU), 1542 cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) { 1543 if (x86_feature & X86_MCA) 1544 cmi_mca_init(hdl); 1545 } 1546 } 1547 #endif /* __xpv */ 1548 1549 if (boothowto & RB_DEBUG) 1550 kdi_cpu_init(); 1551 1552 /* 1553 * Setting the bit in cpu_ready_set must be the last operation in 1554 * processor initialization; the boot CPU will continue to boot once 1555 * it sees this bit set for all active CPUs. 1556 */ 1557 CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id); 1558 1559 /* 1560 * Because mp_startup() gets fired off after init() starts, we 1561 * can't use the '?' trick to do 'boot -v' printing - so we 1562 * always direct the 'cpu .. online' messages to the log. 1563 */ 1564 cmn_err(CE_CONT, "!cpu%d initialization complete - online\n", 1565 cp->cpu_id); 1566 1567 /* 1568 * Now we are done with the startup thread, so free it up. 1569 */ 1570 thread_exit(); 1571 panic("mp_startup: cannot return"); 1572 /*NOTREACHED*/ 1573 } 1574 1575 1576 /* 1577 * Start CPU on user request. 1578 */ 1579 /* ARGSUSED */ 1580 int 1581 mp_cpu_start(struct cpu *cp) 1582 { 1583 ASSERT(MUTEX_HELD(&cpu_lock)); 1584 return (0); 1585 } 1586 1587 /* 1588 * Stop CPU on user request. 1589 */ 1590 /* ARGSUSED */ 1591 int 1592 mp_cpu_stop(struct cpu *cp) 1593 { 1594 extern int cbe_psm_timer_mode; 1595 ASSERT(MUTEX_HELD(&cpu_lock)); 1596 1597 #ifdef __xpv 1598 /* 1599 * We can't offline vcpu0. 1600 */ 1601 if (cp->cpu_id == 0) 1602 return (EBUSY); 1603 #endif 1604 1605 /* 1606 * If TIMER_PERIODIC mode is used, CPU0 is the one running it; 1607 * can't stop it. (This is true only for machines with no TSC.) 1608 */ 1609 1610 if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0)) 1611 return (EBUSY); 1612 1613 return (0); 1614 } 1615 1616 /* 1617 * Take the specified CPU out of participation in interrupts. 1618 */ 1619 int 1620 cpu_disable_intr(struct cpu *cp) 1621 { 1622 if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS) 1623 return (EBUSY); 1624 1625 cp->cpu_flags &= ~CPU_ENABLE; 1626 return (0); 1627 } 1628 1629 /* 1630 * Allow the specified CPU to participate in interrupts. 1631 */ 1632 void 1633 cpu_enable_intr(struct cpu *cp) 1634 { 1635 ASSERT(MUTEX_HELD(&cpu_lock)); 1636 cp->cpu_flags |= CPU_ENABLE; 1637 psm_enable_intr(cp->cpu_id); 1638 } 1639 1640 1641 /*ARGSUSED*/ 1642 void 1643 mp_cpu_faulted_enter(struct cpu *cp) 1644 { 1645 #ifndef __xpv 1646 cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1647 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1648 1649 if (hdl != NULL) { 1650 cmi_faulted_enter(hdl); 1651 cmi_hdl_rele(hdl); 1652 } 1653 #endif 1654 } 1655 1656 /*ARGSUSED*/ 1657 void 1658 mp_cpu_faulted_exit(struct cpu *cp) 1659 { 1660 #ifndef __xpv 1661 cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1662 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1663 1664 if (hdl != NULL) { 1665 cmi_faulted_exit(hdl); 1666 cmi_hdl_rele(hdl); 1667 } 1668 #endif 1669 } 1670 1671 /* 1672 * The following two routines are used as context operators on threads belonging 1673 * to processes with a private LDT (see sysi86). Due to the rarity of such 1674 * processes, these routines are currently written for best code readability and 1675 * organization rather than speed. We could avoid checking x86_feature at every 1676 * context switch by installing different context ops, depending on the 1677 * x86_feature flags, at LDT creation time -- one for each combination of fast 1678 * syscall feature flags. 1679 */ 1680 1681 /*ARGSUSED*/ 1682 void 1683 cpu_fast_syscall_disable(void *arg) 1684 { 1685 if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) 1686 cpu_sep_disable(); 1687 if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) 1688 cpu_asysc_disable(); 1689 } 1690 1691 /*ARGSUSED*/ 1692 void 1693 cpu_fast_syscall_enable(void *arg) 1694 { 1695 if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) 1696 cpu_sep_enable(); 1697 if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) 1698 cpu_asysc_enable(); 1699 } 1700 1701 static void 1702 cpu_sep_enable(void) 1703 { 1704 ASSERT(x86_feature & X86_SEP); 1705 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1706 1707 wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL); 1708 } 1709 1710 static void 1711 cpu_sep_disable(void) 1712 { 1713 ASSERT(x86_feature & X86_SEP); 1714 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1715 1716 /* 1717 * Setting the SYSENTER_CS_MSR register to 0 causes software executing 1718 * the sysenter or sysexit instruction to trigger a #gp fault. 1719 */ 1720 wrmsr(MSR_INTC_SEP_CS, 0); 1721 } 1722 1723 static void 1724 cpu_asysc_enable(void) 1725 { 1726 ASSERT(x86_feature & X86_ASYSC); 1727 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1728 1729 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) | 1730 (uint64_t)(uintptr_t)AMD_EFER_SCE); 1731 } 1732 1733 static void 1734 cpu_asysc_disable(void) 1735 { 1736 ASSERT(x86_feature & X86_ASYSC); 1737 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 1738 1739 /* 1740 * Turn off the SCE (syscall enable) bit in the EFER register. Software 1741 * executing syscall or sysret with this bit off will incur a #ud trap. 1742 */ 1743 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) & 1744 ~((uint64_t)(uintptr_t)AMD_EFER_SCE)); 1745 } 1746