1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2010, Intel Corporation. 27 * All rights reserved. 28 */ 29 /* 30 * Copyright 2020 Joyent, Inc. 31 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 32 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/thread.h> 37 #include <sys/cpuvar.h> 38 #include <sys/cpu.h> 39 #include <sys/t_lock.h> 40 #include <sys/param.h> 41 #include <sys/proc.h> 42 #include <sys/disp.h> 43 #include <sys/class.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/note.h> 47 #include <sys/asm_linkage.h> 48 #include <sys/x_call.h> 49 #include <sys/systm.h> 50 #include <sys/var.h> 51 #include <sys/vtrace.h> 52 #include <vm/hat.h> 53 #include <vm/as.h> 54 #include <vm/seg_kmem.h> 55 #include <vm/seg_kp.h> 56 #include <sys/segments.h> 57 #include <sys/kmem.h> 58 #include <sys/stack.h> 59 #include <sys/smp_impldefs.h> 60 #include <sys/x86_archext.h> 61 #include <sys/machsystm.h> 62 #include <sys/traptrace.h> 63 #include <sys/clock.h> 64 #include <sys/cpc_impl.h> 65 #include <sys/pg.h> 66 #include <sys/cmt.h> 67 #include <sys/dtrace.h> 68 #include <sys/archsystm.h> 69 #include <sys/fp.h> 70 #include <sys/reboot.h> 71 #include <sys/kdi_machimpl.h> 72 #include <vm/hat_i86.h> 73 #include <vm/vm_dep.h> 74 #include <sys/memnode.h> 75 #include <sys/pci_cfgspace.h> 76 #include <sys/mach_mmu.h> 77 #include <sys/sysmacros.h> 78 #if defined(__xpv) 79 #include <sys/hypervisor.h> 80 #else 81 #include <sys/hma.h> 82 #endif 83 #include <sys/cpu_module.h> 84 #include <sys/ontrap.h> 85 86 struct cpu cpus[1] __aligned(MMU_PAGESIZE); 87 struct cpu *cpu[NCPU] = {&cpus[0]}; 88 struct cpu *cpu_free_list; 89 cpu_core_t cpu_core[NCPU]; 90 91 #define cpu_next_free cpu_prev 92 93 /* 94 * Useful for disabling MP bring-up on a MP capable system. 95 */ 96 int use_mp = 1; 97 98 /* 99 * to be set by a PSM to indicate what cpus 100 * are sitting around on the system. 101 */ 102 cpuset_t mp_cpus; 103 104 /* 105 * This variable is used by the hat layer to decide whether or not 106 * critical sections are needed to prevent race conditions. For sun4m, 107 * this variable is set once enough MP initialization has been done in 108 * order to allow cross calls. 109 */ 110 int flushes_require_xcalls; 111 112 cpuset_t cpu_ready_set; /* initialized in startup() */ 113 114 static void mp_startup_boot(void); 115 static void mp_startup_hotplug(void); 116 117 static void cpu_sep_enable(void); 118 static void cpu_sep_disable(void); 119 static void cpu_asysc_enable(void); 120 static void cpu_asysc_disable(void); 121 122 /* 123 * Init CPU info - get CPU type info for processor_info system call. 124 */ 125 void 126 init_cpu_info(struct cpu *cp) 127 { 128 processor_info_t *pi = &cp->cpu_type_info; 129 130 /* 131 * Get clock-frequency property for the CPU. 132 */ 133 pi->pi_clock = cpu_freq; 134 135 /* 136 * Current frequency in Hz. 137 */ 138 cp->cpu_curr_clock = cpu_freq_hz; 139 140 /* 141 * Supported frequencies. 142 */ 143 if (cp->cpu_supp_freqs == NULL) { 144 cpu_set_supp_freqs(cp, NULL); 145 } 146 147 (void) strcpy(pi->pi_processor_type, "i386"); 148 if (fpu_exists) 149 (void) strcpy(pi->pi_fputypes, "i387 compatible"); 150 151 cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP); 152 cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP); 153 154 /* 155 * If called for the BSP, cp is equal to current CPU. 156 * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info 157 * of current CPU as default values for cpu_idstr and cpu_brandstr. 158 * They will be corrected in mp_startup_common() after cpuid_pass1() 159 * has been invoked on target CPU. 160 */ 161 (void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN); 162 (void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN); 163 } 164 165 /* 166 * Configure syscall support on this CPU. 167 */ 168 /*ARGSUSED*/ 169 void 170 init_cpu_syscall(struct cpu *cp) 171 { 172 kpreempt_disable(); 173 174 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 175 is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 176 uint64_t flags; 177 178 #if !defined(__xpv) 179 /* 180 * The syscall instruction imposes a certain ordering on 181 * segment selectors, so we double-check that ordering 182 * here. 183 */ 184 CTASSERT(KDS_SEL == KCS_SEL + 8); 185 CTASSERT(UDS_SEL == U32CS_SEL + 8); 186 CTASSERT(UCS_SEL == U32CS_SEL + 16); 187 #endif 188 189 /* 190 * Turn syscall/sysret extensions on. 191 */ 192 cpu_asysc_enable(); 193 194 /* 195 * Program the magic registers .. 196 */ 197 wrmsr(MSR_AMD_STAR, 198 ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); 199 if (kpti_enable == 1) { 200 wrmsr(MSR_AMD_LSTAR, 201 (uint64_t)(uintptr_t)tr_sys_syscall); 202 wrmsr(MSR_AMD_CSTAR, 203 (uint64_t)(uintptr_t)tr_sys_syscall32); 204 } else { 205 wrmsr(MSR_AMD_LSTAR, 206 (uint64_t)(uintptr_t)sys_syscall); 207 wrmsr(MSR_AMD_CSTAR, 208 (uint64_t)(uintptr_t)sys_syscall32); 209 } 210 211 /* 212 * This list of flags is masked off the incoming 213 * %rfl when we enter the kernel. 214 */ 215 flags = PS_IE | PS_T; 216 if (is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE) 217 flags |= PS_ACHK; 218 wrmsr(MSR_AMD_SFMASK, flags); 219 } 220 221 /* 222 * On 64-bit kernels on Nocona machines, the 32-bit syscall 223 * variant isn't available to 32-bit applications, but sysenter is. 224 */ 225 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 226 is_x86_feature(x86_featureset, X86FSET_SEP)) { 227 228 #if !defined(__xpv) 229 /* 230 * The sysenter instruction imposes a certain ordering on 231 * segment selectors, so we double-check that ordering 232 * here. See "sysenter" in Intel document 245471-012, "IA-32 233 * Intel Architecture Software Developer's Manual Volume 2: 234 * Instruction Set Reference" 235 */ 236 CTASSERT(KDS_SEL == KCS_SEL + 8); 237 238 CTASSERT(U32CS_SEL == ((KCS_SEL + 16) | 3)); 239 CTASSERT(UDS_SEL == U32CS_SEL + 8); 240 #endif 241 242 cpu_sep_enable(); 243 244 /* 245 * resume() sets this value to the base of the threads stack 246 * via a context handler. 247 */ 248 wrmsr(MSR_INTC_SEP_ESP, 0); 249 250 if (kpti_enable == 1) { 251 wrmsr(MSR_INTC_SEP_EIP, 252 (uint64_t)(uintptr_t)tr_sys_sysenter); 253 } else { 254 wrmsr(MSR_INTC_SEP_EIP, 255 (uint64_t)(uintptr_t)sys_sysenter); 256 } 257 } 258 259 kpreempt_enable(); 260 } 261 262 #if !defined(__xpv) 263 /* 264 * Configure per-cpu ID GDT 265 */ 266 static void 267 init_cpu_id_gdt(struct cpu *cp) 268 { 269 /* Write cpu_id into limit field of GDT for usermode retrieval */ 270 #if defined(__amd64) 271 set_usegd(&cp->cpu_gdt[GDT_CPUID], SDP_SHORT, NULL, cp->cpu_id, 272 SDT_MEMRODA, SEL_UPL, SDP_BYTES, SDP_OP32); 273 #elif defined(__i386) 274 set_usegd(&cp->cpu_gdt[GDT_CPUID], NULL, cp->cpu_id, SDT_MEMRODA, 275 SEL_UPL, SDP_BYTES, SDP_OP32); 276 #endif 277 } 278 #endif /* !defined(__xpv) */ 279 280 /* 281 * Multiprocessor initialization. 282 * 283 * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the 284 * startup and idle threads for the specified CPU. 285 * Parameter boot is true for boot time operations and is false for CPU 286 * DR operations. 287 */ 288 static struct cpu * 289 mp_cpu_configure_common(int cpun, boolean_t boot) 290 { 291 struct cpu *cp; 292 kthread_id_t tp; 293 caddr_t sp; 294 proc_t *procp; 295 #if !defined(__xpv) 296 extern int idle_cpu_prefer_mwait; 297 extern void cpu_idle_mwait(); 298 #endif 299 extern void idle(); 300 extern void cpu_idle(); 301 302 #ifdef TRAPTRACE 303 trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun]; 304 #endif 305 306 ASSERT(MUTEX_HELD(&cpu_lock)); 307 ASSERT(cpun < NCPU && cpu[cpun] == NULL); 308 309 if (cpu_free_list == NULL) { 310 cp = kmem_zalloc(sizeof (*cp), KM_SLEEP); 311 } else { 312 cp = cpu_free_list; 313 cpu_free_list = cp->cpu_next_free; 314 } 315 316 cp->cpu_m.mcpu_istamp = cpun << 16; 317 318 /* Create per CPU specific threads in the process p0. */ 319 procp = &p0; 320 321 /* 322 * Initialize the dispatcher first. 323 */ 324 disp_cpu_init(cp); 325 326 cpu_vm_data_init(cp); 327 328 /* 329 * Allocate and initialize the startup thread for this CPU. 330 * Interrupt and process switch stacks get allocated later 331 * when the CPU starts running. 332 */ 333 tp = thread_create(NULL, 0, NULL, NULL, 0, procp, 334 TS_STOPPED, maxclsyspri); 335 336 /* 337 * Set state to TS_ONPROC since this thread will start running 338 * as soon as the CPU comes online. 339 * 340 * All the other fields of the thread structure are setup by 341 * thread_create(). 342 */ 343 THREAD_ONPROC(tp, cp); 344 tp->t_preempt = 1; 345 tp->t_bound_cpu = cp; 346 tp->t_affinitycnt = 1; 347 tp->t_cpu = cp; 348 tp->t_disp_queue = cp->cpu_disp; 349 350 /* 351 * Setup thread to start in mp_startup_common. 352 */ 353 sp = tp->t_stk; 354 tp->t_sp = (uintptr_t)(sp - MINFRAME); 355 #if defined(__amd64) 356 tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */ 357 #endif 358 /* 359 * Setup thread start entry point for boot or hotplug. 360 */ 361 if (boot) { 362 tp->t_pc = (uintptr_t)mp_startup_boot; 363 } else { 364 tp->t_pc = (uintptr_t)mp_startup_hotplug; 365 } 366 367 cp->cpu_id = cpun; 368 cp->cpu_self = cp; 369 cp->cpu_thread = tp; 370 cp->cpu_lwp = NULL; 371 cp->cpu_dispthread = tp; 372 cp->cpu_dispatch_pri = DISP_PRIO(tp); 373 374 /* 375 * cpu_base_spl must be set explicitly here to prevent any blocking 376 * operations in mp_startup_common from causing the spl of the cpu 377 * to drop to 0 (allowing device interrupts before we're ready) in 378 * resume(). 379 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY. 380 * As an extra bit of security on DEBUG kernels, this is enforced with 381 * an assertion in mp_startup_common() -- before cpu_base_spl is set 382 * to its proper value. 383 */ 384 cp->cpu_base_spl = ipltospl(LOCK_LEVEL); 385 386 /* 387 * Now, initialize per-CPU idle thread for this CPU. 388 */ 389 tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1); 390 391 cp->cpu_idle_thread = tp; 392 393 tp->t_preempt = 1; 394 tp->t_bound_cpu = cp; 395 tp->t_affinitycnt = 1; 396 tp->t_cpu = cp; 397 tp->t_disp_queue = cp->cpu_disp; 398 399 /* 400 * Bootstrap the CPU's PG data 401 */ 402 pg_cpu_bootstrap(cp); 403 404 /* 405 * Perform CPC initialization on the new CPU. 406 */ 407 kcpc_hw_init(cp); 408 409 /* 410 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2 411 * for each CPU. 412 */ 413 setup_vaddr_for_ppcopy(cp); 414 415 /* 416 * Allocate page for new GDT and initialize from current GDT. 417 */ 418 #if !defined(__lint) 419 ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE); 420 #endif 421 cp->cpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP); 422 bcopy(CPU->cpu_gdt, cp->cpu_gdt, (sizeof (*cp->cpu_gdt) * NGDT)); 423 424 #if defined(__i386) 425 /* 426 * setup kernel %gs. 427 */ 428 set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, 429 SEL_KPL, 0, 1); 430 #endif 431 432 /* 433 * Allocate pages for the CPU LDT. 434 */ 435 cp->cpu_m.mcpu_ldt = kmem_zalloc(LDT_CPU_SIZE, KM_SLEEP); 436 cp->cpu_m.mcpu_ldt_len = 0; 437 438 /* 439 * Allocate a per-CPU IDT and initialize the new IDT to the currently 440 * runing CPU. 441 */ 442 #if !defined(__lint) 443 ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); 444 #endif 445 cp->cpu_idt = kmem_alloc(PAGESIZE, KM_SLEEP); 446 bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); 447 448 /* 449 * alloc space for cpuid info 450 */ 451 cpuid_alloc_space(cp); 452 #if !defined(__xpv) 453 if (is_x86_feature(x86_featureset, X86FSET_MWAIT) && 454 idle_cpu_prefer_mwait) { 455 cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp); 456 cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait; 457 } else 458 #endif 459 cp->cpu_m.mcpu_idle_cpu = cpu_idle; 460 461 init_cpu_info(cp); 462 463 #if !defined(__xpv) 464 init_cpu_id_gdt(cp); 465 #endif 466 467 /* 468 * alloc space for ucode_info 469 */ 470 ucode_alloc_space(cp); 471 xc_init_cpu(cp); 472 hat_cpu_online(cp); 473 474 #ifdef TRAPTRACE 475 /* 476 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers 477 */ 478 ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP); 479 ttc->ttc_next = ttc->ttc_first; 480 ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize; 481 #endif 482 483 /* 484 * Record that we have another CPU. 485 */ 486 /* 487 * Initialize the interrupt threads for this CPU 488 */ 489 cpu_intr_alloc(cp, NINTR_THREADS); 490 491 cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF; 492 cpu_set_state(cp); 493 494 /* 495 * Add CPU to list of available CPUs. It'll be on the active list 496 * after mp_startup_common(). 497 */ 498 cpu_add_unit(cp); 499 500 return (cp); 501 } 502 503 /* 504 * Undo what was done in mp_cpu_configure_common 505 */ 506 static void 507 mp_cpu_unconfigure_common(struct cpu *cp, int error) 508 { 509 ASSERT(MUTEX_HELD(&cpu_lock)); 510 511 /* 512 * Remove the CPU from the list of available CPUs. 513 */ 514 cpu_del_unit(cp->cpu_id); 515 516 if (error == ETIMEDOUT) { 517 /* 518 * The cpu was started, but never *seemed* to run any 519 * code in the kernel; it's probably off spinning in its 520 * own private world, though with potential references to 521 * our kmem-allocated IDTs and GDTs (for example). 522 * 523 * Worse still, it may actually wake up some time later, 524 * so rather than guess what it might or might not do, we 525 * leave the fundamental data structures intact. 526 */ 527 cp->cpu_flags = 0; 528 return; 529 } 530 531 /* 532 * At this point, the only threads bound to this CPU should 533 * special per-cpu threads: it's idle thread, it's pause threads, 534 * and it's interrupt threads. Clean these up. 535 */ 536 cpu_destroy_bound_threads(cp); 537 cp->cpu_idle_thread = NULL; 538 539 /* 540 * Free the interrupt stack. 541 */ 542 segkp_release(segkp, 543 cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME))); 544 cp->cpu_intr_stack = NULL; 545 546 #ifdef TRAPTRACE 547 /* 548 * Discard the trap trace buffer 549 */ 550 { 551 trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id]; 552 553 kmem_free((void *)ttc->ttc_first, trap_trace_bufsize); 554 ttc->ttc_first = (uintptr_t)NULL; 555 } 556 #endif 557 558 hat_cpu_offline(cp); 559 560 ucode_free_space(cp); 561 562 /* Free CPU ID string and brand string. */ 563 if (cp->cpu_idstr) { 564 kmem_free(cp->cpu_idstr, CPU_IDSTRLEN); 565 cp->cpu_idstr = NULL; 566 } 567 if (cp->cpu_brandstr) { 568 kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN); 569 cp->cpu_brandstr = NULL; 570 } 571 572 #if !defined(__xpv) 573 if (cp->cpu_m.mcpu_mwait != NULL) { 574 cpuid_mwait_free(cp); 575 cp->cpu_m.mcpu_mwait = NULL; 576 } 577 #endif 578 cpuid_free_space(cp); 579 580 if (cp->cpu_idt != CPU->cpu_idt) 581 kmem_free(cp->cpu_idt, PAGESIZE); 582 cp->cpu_idt = NULL; 583 584 kmem_free(cp->cpu_m.mcpu_ldt, LDT_CPU_SIZE); 585 cp->cpu_m.mcpu_ldt = NULL; 586 cp->cpu_m.mcpu_ldt_len = 0; 587 588 kmem_free(cp->cpu_gdt, PAGESIZE); 589 cp->cpu_gdt = NULL; 590 591 if (cp->cpu_supp_freqs != NULL) { 592 size_t len = strlen(cp->cpu_supp_freqs) + 1; 593 kmem_free(cp->cpu_supp_freqs, len); 594 cp->cpu_supp_freqs = NULL; 595 } 596 597 teardown_vaddr_for_ppcopy(cp); 598 599 kcpc_hw_fini(cp); 600 601 cp->cpu_dispthread = NULL; 602 cp->cpu_thread = NULL; /* discarded by cpu_destroy_bound_threads() */ 603 604 cpu_vm_data_destroy(cp); 605 606 xc_fini_cpu(cp); 607 disp_cpu_fini(cp); 608 609 ASSERT(cp != CPU0); 610 bzero(cp, sizeof (*cp)); 611 cp->cpu_next_free = cpu_free_list; 612 cpu_free_list = cp; 613 } 614 615 /* 616 * Apply workarounds for known errata, and warn about those that are absent. 617 * 618 * System vendors occasionally create configurations which contain different 619 * revisions of the CPUs that are almost but not exactly the same. At the 620 * time of writing, this meant that their clock rates were the same, their 621 * feature sets were the same, but the required workaround were -not- 622 * necessarily the same. So, this routine is invoked on -every- CPU soon 623 * after starting to make sure that the resulting system contains the most 624 * pessimal set of workarounds needed to cope with *any* of the CPUs in the 625 * system. 626 * 627 * workaround_errata is invoked early in mlsetup() for CPU 0, and in 628 * mp_startup_common() for all slave CPUs. Slaves process workaround_errata 629 * prior to acknowledging their readiness to the master, so this routine will 630 * never be executed by multiple CPUs in parallel, thus making updates to 631 * global data safe. 632 * 633 * These workarounds are based on Rev 3.57 of the Revision Guide for 634 * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005. 635 */ 636 637 #if defined(OPTERON_ERRATUM_88) 638 int opteron_erratum_88; /* if non-zero -> at least one cpu has it */ 639 #endif 640 641 #if defined(OPTERON_ERRATUM_91) 642 int opteron_erratum_91; /* if non-zero -> at least one cpu has it */ 643 #endif 644 645 #if defined(OPTERON_ERRATUM_93) 646 int opteron_erratum_93; /* if non-zero -> at least one cpu has it */ 647 #endif 648 649 #if defined(OPTERON_ERRATUM_95) 650 int opteron_erratum_95; /* if non-zero -> at least one cpu has it */ 651 #endif 652 653 #if defined(OPTERON_ERRATUM_100) 654 int opteron_erratum_100; /* if non-zero -> at least one cpu has it */ 655 #endif 656 657 #if defined(OPTERON_ERRATUM_108) 658 int opteron_erratum_108; /* if non-zero -> at least one cpu has it */ 659 #endif 660 661 #if defined(OPTERON_ERRATUM_109) 662 int opteron_erratum_109; /* if non-zero -> at least one cpu has it */ 663 #endif 664 665 #if defined(OPTERON_ERRATUM_121) 666 int opteron_erratum_121; /* if non-zero -> at least one cpu has it */ 667 #endif 668 669 #if defined(OPTERON_ERRATUM_122) 670 int opteron_erratum_122; /* if non-zero -> at least one cpu has it */ 671 #endif 672 673 #if defined(OPTERON_ERRATUM_123) 674 int opteron_erratum_123; /* if non-zero -> at least one cpu has it */ 675 #endif 676 677 #if defined(OPTERON_ERRATUM_131) 678 int opteron_erratum_131; /* if non-zero -> at least one cpu has it */ 679 #endif 680 681 #if defined(OPTERON_WORKAROUND_6336786) 682 int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */ 683 int opteron_workaround_6336786_UP = 0; /* Not needed for UP */ 684 #endif 685 686 #if defined(OPTERON_WORKAROUND_6323525) 687 int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ 688 #endif 689 690 #if defined(OPTERON_ERRATUM_298) 691 int opteron_erratum_298; 692 #endif 693 694 #if defined(OPTERON_ERRATUM_721) 695 int opteron_erratum_721; 696 #endif 697 698 static void 699 workaround_warning(cpu_t *cp, uint_t erratum) 700 { 701 cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u", 702 cp->cpu_id, erratum); 703 } 704 705 static void 706 workaround_applied(uint_t erratum) 707 { 708 if (erratum > 1000000) 709 cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n", 710 erratum); 711 else 712 cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n", 713 erratum); 714 } 715 716 static void 717 msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error) 718 { 719 cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d", 720 cp->cpu_id, rw, msr, error); 721 } 722 723 /* 724 * Determine the number of nodes in a Hammer / Greyhound / Griffin family 725 * system. 726 */ 727 static uint_t 728 opteron_get_nnodes(void) 729 { 730 static uint_t nnodes = 0; 731 732 if (nnodes == 0) { 733 #ifdef DEBUG 734 uint_t family; 735 736 /* 737 * This routine uses a PCI config space based mechanism 738 * for retrieving the number of nodes in the system. 739 * Device 24, function 0, offset 0x60 as used here is not 740 * AMD processor architectural, and may not work on processor 741 * families other than those listed below. 742 * 743 * Callers of this routine must ensure that we're running on 744 * a processor which supports this mechanism. 745 * The assertion below is meant to catch calls on unsupported 746 * processors. 747 */ 748 family = cpuid_getfamily(CPU); 749 ASSERT(family == 0xf || family == 0x10 || family == 0x11); 750 #endif /* DEBUG */ 751 752 /* 753 * Obtain the number of nodes in the system from 754 * bits [6:4] of the Node ID register on node 0. 755 * 756 * The actual node count is NodeID[6:4] + 1 757 * 758 * The Node ID register is accessed via function 0, 759 * offset 0x60. Node 0 is device 24. 760 */ 761 nnodes = ((pci_getl_func(0, 24, 0, 0x60) & 0x70) >> 4) + 1; 762 } 763 return (nnodes); 764 } 765 766 uint_t 767 do_erratum_298(struct cpu *cpu) 768 { 769 static int osvwrc = -3; 770 extern int osvw_opteron_erratum(cpu_t *, uint_t); 771 772 /* 773 * L2 Eviction May Occur During Processor Operation To Set 774 * Accessed or Dirty Bit. 775 */ 776 if (osvwrc == -3) { 777 osvwrc = osvw_opteron_erratum(cpu, 298); 778 } else { 779 /* osvw return codes should be consistent for all cpus */ 780 ASSERT(osvwrc == osvw_opteron_erratum(cpu, 298)); 781 } 782 783 switch (osvwrc) { 784 case 0: /* erratum is not present: do nothing */ 785 break; 786 case 1: /* erratum is present: BIOS workaround applied */ 787 /* 788 * check if workaround is actually in place and issue warning 789 * if not. 790 */ 791 if (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || 792 ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0)) { 793 #if defined(OPTERON_ERRATUM_298) 794 opteron_erratum_298++; 795 #else 796 workaround_warning(cpu, 298); 797 return (1); 798 #endif 799 } 800 break; 801 case -1: /* cannot determine via osvw: check cpuid */ 802 if ((cpuid_opteron_erratum(cpu, 298) > 0) && 803 (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || 804 ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0))) { 805 #if defined(OPTERON_ERRATUM_298) 806 opteron_erratum_298++; 807 #else 808 workaround_warning(cpu, 298); 809 return (1); 810 #endif 811 } 812 break; 813 } 814 return (0); 815 } 816 817 uint_t 818 workaround_errata(struct cpu *cpu) 819 { 820 uint_t missing = 0; 821 822 ASSERT(cpu == CPU); 823 824 /*LINTED*/ 825 if (cpuid_opteron_erratum(cpu, 88) > 0) { 826 /* 827 * SWAPGS May Fail To Read Correct GS Base 828 */ 829 #if defined(OPTERON_ERRATUM_88) 830 /* 831 * The workaround is an mfence in the relevant assembler code 832 */ 833 opteron_erratum_88++; 834 #else 835 workaround_warning(cpu, 88); 836 missing++; 837 #endif 838 } 839 840 if (cpuid_opteron_erratum(cpu, 91) > 0) { 841 /* 842 * Software Prefetches May Report A Page Fault 843 */ 844 #if defined(OPTERON_ERRATUM_91) 845 /* 846 * fix is in trap.c 847 */ 848 opteron_erratum_91++; 849 #else 850 workaround_warning(cpu, 91); 851 missing++; 852 #endif 853 } 854 855 if (cpuid_opteron_erratum(cpu, 93) > 0) { 856 /* 857 * RSM Auto-Halt Restart Returns to Incorrect RIP 858 */ 859 #if defined(OPTERON_ERRATUM_93) 860 /* 861 * fix is in trap.c 862 */ 863 opteron_erratum_93++; 864 #else 865 workaround_warning(cpu, 93); 866 missing++; 867 #endif 868 } 869 870 /*LINTED*/ 871 if (cpuid_opteron_erratum(cpu, 95) > 0) { 872 /* 873 * RET Instruction May Return to Incorrect EIP 874 */ 875 #if defined(OPTERON_ERRATUM_95) 876 #if defined(_LP64) 877 /* 878 * Workaround this by ensuring that 32-bit user code and 879 * 64-bit kernel code never occupy the same address 880 * range mod 4G. 881 */ 882 if (_userlimit32 > 0xc0000000ul) 883 *(uintptr_t *)&_userlimit32 = 0xc0000000ul; 884 885 /*LINTED*/ 886 ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u); 887 opteron_erratum_95++; 888 #endif /* _LP64 */ 889 #else 890 workaround_warning(cpu, 95); 891 missing++; 892 #endif 893 } 894 895 if (cpuid_opteron_erratum(cpu, 100) > 0) { 896 /* 897 * Compatibility Mode Branches Transfer to Illegal Address 898 */ 899 #if defined(OPTERON_ERRATUM_100) 900 /* 901 * fix is in trap.c 902 */ 903 opteron_erratum_100++; 904 #else 905 workaround_warning(cpu, 100); 906 missing++; 907 #endif 908 } 909 910 /*LINTED*/ 911 if (cpuid_opteron_erratum(cpu, 108) > 0) { 912 /* 913 * CPUID Instruction May Return Incorrect Model Number In 914 * Some Processors 915 */ 916 #if defined(OPTERON_ERRATUM_108) 917 /* 918 * (Our cpuid-handling code corrects the model number on 919 * those processors) 920 */ 921 #else 922 workaround_warning(cpu, 108); 923 missing++; 924 #endif 925 } 926 927 /*LINTED*/ 928 if (cpuid_opteron_erratum(cpu, 109) > 0) do { 929 /* 930 * Certain Reverse REP MOVS May Produce Unpredictable Behavior 931 */ 932 #if defined(OPTERON_ERRATUM_109) 933 /* 934 * The "workaround" is to print a warning to upgrade the BIOS 935 */ 936 uint64_t value; 937 const uint_t msr = MSR_AMD_PATCHLEVEL; 938 int err; 939 940 if ((err = checked_rdmsr(msr, &value)) != 0) { 941 msr_warning(cpu, "rd", msr, err); 942 workaround_warning(cpu, 109); 943 missing++; 944 } 945 if (value == 0) 946 opteron_erratum_109++; 947 #else 948 workaround_warning(cpu, 109); 949 missing++; 950 #endif 951 /*CONSTANTCONDITION*/ 952 } while (0); 953 954 /*LINTED*/ 955 if (cpuid_opteron_erratum(cpu, 121) > 0) { 956 /* 957 * Sequential Execution Across Non_Canonical Boundary Caused 958 * Processor Hang 959 */ 960 #if defined(OPTERON_ERRATUM_121) 961 #if defined(_LP64) 962 /* 963 * Erratum 121 is only present in long (64 bit) mode. 964 * Workaround is to include the page immediately before the 965 * va hole to eliminate the possibility of system hangs due to 966 * sequential execution across the va hole boundary. 967 */ 968 if (opteron_erratum_121) 969 opteron_erratum_121++; 970 else { 971 if (hole_start) { 972 hole_start -= PAGESIZE; 973 } else { 974 /* 975 * hole_start not yet initialized by 976 * mmu_init. Initialize hole_start 977 * with value to be subtracted. 978 */ 979 hole_start = PAGESIZE; 980 } 981 opteron_erratum_121++; 982 } 983 #endif /* _LP64 */ 984 #else 985 workaround_warning(cpu, 121); 986 missing++; 987 #endif 988 } 989 990 /*LINTED*/ 991 if (cpuid_opteron_erratum(cpu, 122) > 0) do { 992 /* 993 * TLB Flush Filter May Cause Coherency Problem in 994 * Multiprocessor Systems 995 */ 996 #if defined(OPTERON_ERRATUM_122) 997 uint64_t value; 998 const uint_t msr = MSR_AMD_HWCR; 999 int error; 1000 1001 /* 1002 * Erratum 122 is only present in MP configurations (multi-core 1003 * or multi-processor). 1004 */ 1005 #if defined(__xpv) 1006 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 1007 break; 1008 if (!opteron_erratum_122 && xpv_nr_phys_cpus() == 1) 1009 break; 1010 #else 1011 if (!opteron_erratum_122 && opteron_get_nnodes() == 1 && 1012 cpuid_get_ncpu_per_chip(cpu) == 1) 1013 break; 1014 #endif 1015 /* disable TLB Flush Filter */ 1016 1017 if ((error = checked_rdmsr(msr, &value)) != 0) { 1018 msr_warning(cpu, "rd", msr, error); 1019 workaround_warning(cpu, 122); 1020 missing++; 1021 } else { 1022 value |= (uint64_t)AMD_HWCR_FFDIS; 1023 if ((error = checked_wrmsr(msr, value)) != 0) { 1024 msr_warning(cpu, "wr", msr, error); 1025 workaround_warning(cpu, 122); 1026 missing++; 1027 } 1028 } 1029 opteron_erratum_122++; 1030 #else 1031 workaround_warning(cpu, 122); 1032 missing++; 1033 #endif 1034 /*CONSTANTCONDITION*/ 1035 } while (0); 1036 1037 /*LINTED*/ 1038 if (cpuid_opteron_erratum(cpu, 123) > 0) do { 1039 /* 1040 * Bypassed Reads May Cause Data Corruption of System Hang in 1041 * Dual Core Processors 1042 */ 1043 #if defined(OPTERON_ERRATUM_123) 1044 uint64_t value; 1045 const uint_t msr = MSR_AMD_PATCHLEVEL; 1046 int err; 1047 1048 /* 1049 * Erratum 123 applies only to multi-core cpus. 1050 */ 1051 if (cpuid_get_ncpu_per_chip(cpu) < 2) 1052 break; 1053 #if defined(__xpv) 1054 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 1055 break; 1056 #endif 1057 /* 1058 * The "workaround" is to print a warning to upgrade the BIOS 1059 */ 1060 if ((err = checked_rdmsr(msr, &value)) != 0) { 1061 msr_warning(cpu, "rd", msr, err); 1062 workaround_warning(cpu, 123); 1063 missing++; 1064 } 1065 if (value == 0) 1066 opteron_erratum_123++; 1067 #else 1068 workaround_warning(cpu, 123); 1069 missing++; 1070 1071 #endif 1072 /*CONSTANTCONDITION*/ 1073 } while (0); 1074 1075 /*LINTED*/ 1076 if (cpuid_opteron_erratum(cpu, 131) > 0) do { 1077 /* 1078 * Multiprocessor Systems with Four or More Cores May Deadlock 1079 * Waiting for a Probe Response 1080 */ 1081 #if defined(OPTERON_ERRATUM_131) 1082 uint64_t nbcfg; 1083 const uint_t msr = MSR_AMD_NB_CFG; 1084 const uint64_t wabits = 1085 AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR; 1086 int error; 1087 1088 /* 1089 * Erratum 131 applies to any system with four or more cores. 1090 */ 1091 if (opteron_erratum_131) 1092 break; 1093 #if defined(__xpv) 1094 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 1095 break; 1096 if (xpv_nr_phys_cpus() < 4) 1097 break; 1098 #else 1099 if (opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu) < 4) 1100 break; 1101 #endif 1102 /* 1103 * Print a warning if neither of the workarounds for 1104 * erratum 131 is present. 1105 */ 1106 if ((error = checked_rdmsr(msr, &nbcfg)) != 0) { 1107 msr_warning(cpu, "rd", msr, error); 1108 workaround_warning(cpu, 131); 1109 missing++; 1110 } else if ((nbcfg & wabits) == 0) { 1111 opteron_erratum_131++; 1112 } else { 1113 /* cannot have both workarounds set */ 1114 ASSERT((nbcfg & wabits) != wabits); 1115 } 1116 #else 1117 workaround_warning(cpu, 131); 1118 missing++; 1119 #endif 1120 /*CONSTANTCONDITION*/ 1121 } while (0); 1122 1123 /* 1124 * This isn't really an erratum, but for convenience the 1125 * detection/workaround code lives here and in cpuid_opteron_erratum. 1126 * Note, the technique only is valid on families before 12h and 1127 * certainly doesn't work when we're virtualized. This is checked for in 1128 * the erratum workaround. 1129 */ 1130 if (cpuid_opteron_erratum(cpu, 6336786) > 0) { 1131 #if defined(OPTERON_WORKAROUND_6336786) 1132 /* 1133 * Disable C1-Clock ramping on multi-core/multi-processor 1134 * K8 platforms to guard against TSC drift. 1135 */ 1136 if (opteron_workaround_6336786) { 1137 opteron_workaround_6336786++; 1138 #if defined(__xpv) 1139 } else if ((DOMAIN_IS_INITDOMAIN(xen_info) && 1140 xpv_nr_phys_cpus() > 1) || 1141 opteron_workaround_6336786_UP) { 1142 /* 1143 * XXPV Hmm. We can't walk the Northbridges on 1144 * the hypervisor; so just complain and drive 1145 * on. This probably needs to be fixed in 1146 * the hypervisor itself. 1147 */ 1148 opteron_workaround_6336786++; 1149 workaround_warning(cpu, 6336786); 1150 #else /* __xpv */ 1151 } else if ((opteron_get_nnodes() * 1152 cpuid_get_ncpu_per_chip(cpu) > 1) || 1153 opteron_workaround_6336786_UP) { 1154 1155 uint_t node, nnodes; 1156 uint8_t data; 1157 1158 nnodes = opteron_get_nnodes(); 1159 for (node = 0; node < nnodes; node++) { 1160 /* 1161 * Clear PMM7[1:0] (function 3, offset 0x87) 1162 * Northbridge device is the node id + 24. 1163 */ 1164 data = pci_getb_func(0, node + 24, 3, 0x87); 1165 data &= 0xFC; 1166 pci_putb_func(0, node + 24, 3, 0x87, data); 1167 } 1168 opteron_workaround_6336786++; 1169 #endif /* __xpv */ 1170 } 1171 #else 1172 workaround_warning(cpu, 6336786); 1173 missing++; 1174 #endif 1175 } 1176 1177 /*LINTED*/ 1178 /* 1179 * Mutex primitives don't work as expected. This is erratum #147 from 1180 * 'Revision Guide for AMD Athlon 64 and AMD Opteron Processors' 1181 * document 25759. 1182 */ 1183 if (cpuid_opteron_erratum(cpu, 6323525) > 0) { 1184 #if defined(OPTERON_WORKAROUND_6323525) 1185 /* 1186 * This problem only occurs with 2 or more cores. If bit in 1187 * MSR_AMD_BU_CFG set, then not applicable. The workaround 1188 * is to patch the semaphone routines with the lfence 1189 * instruction to provide necessary load memory barrier with 1190 * possible subsequent read-modify-write ops. 1191 * 1192 * It is too early in boot to call the patch routine so 1193 * set erratum variable to be done in startup_end(). 1194 */ 1195 if (opteron_workaround_6323525) { 1196 opteron_workaround_6323525++; 1197 #if defined(__xpv) 1198 } else if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { 1199 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1200 /* 1201 * XXPV Use dom0_msr here when extended 1202 * operations are supported? 1203 */ 1204 if (xpv_nr_phys_cpus() > 1) 1205 opteron_workaround_6323525++; 1206 } else { 1207 /* 1208 * We have no way to tell how many physical 1209 * cpus there are, or even if this processor 1210 * has the problem, so enable the workaround 1211 * unconditionally (at some performance cost). 1212 */ 1213 opteron_workaround_6323525++; 1214 } 1215 #else /* __xpv */ 1216 } else if (is_x86_feature(x86_featureset, X86FSET_SSE2) && 1217 ((opteron_get_nnodes() * 1218 cpuid_get_ncpu_per_chip(cpu)) > 1)) { 1219 if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0) 1220 opteron_workaround_6323525++; 1221 #endif /* __xpv */ 1222 } 1223 #else 1224 workaround_warning(cpu, 6323525); 1225 missing++; 1226 #endif 1227 } 1228 1229 missing += do_erratum_298(cpu); 1230 1231 if (cpuid_opteron_erratum(cpu, 721) > 0) { 1232 #if defined(OPTERON_ERRATUM_721) 1233 on_trap_data_t otd; 1234 1235 if (!on_trap(&otd, OT_DATA_ACCESS)) 1236 wrmsr(MSR_AMD_DE_CFG, 1237 rdmsr(MSR_AMD_DE_CFG) | AMD_DE_CFG_E721); 1238 no_trap(); 1239 1240 opteron_erratum_721++; 1241 #else 1242 workaround_warning(cpu, 721); 1243 missing++; 1244 #endif 1245 } 1246 1247 #ifdef __xpv 1248 return (0); 1249 #else 1250 return (missing); 1251 #endif 1252 } 1253 1254 void 1255 workaround_errata_end() 1256 { 1257 #if defined(OPTERON_ERRATUM_88) 1258 if (opteron_erratum_88) 1259 workaround_applied(88); 1260 #endif 1261 #if defined(OPTERON_ERRATUM_91) 1262 if (opteron_erratum_91) 1263 workaround_applied(91); 1264 #endif 1265 #if defined(OPTERON_ERRATUM_93) 1266 if (opteron_erratum_93) 1267 workaround_applied(93); 1268 #endif 1269 #if defined(OPTERON_ERRATUM_95) 1270 if (opteron_erratum_95) 1271 workaround_applied(95); 1272 #endif 1273 #if defined(OPTERON_ERRATUM_100) 1274 if (opteron_erratum_100) 1275 workaround_applied(100); 1276 #endif 1277 #if defined(OPTERON_ERRATUM_108) 1278 if (opteron_erratum_108) 1279 workaround_applied(108); 1280 #endif 1281 #if defined(OPTERON_ERRATUM_109) 1282 if (opteron_erratum_109) { 1283 cmn_err(CE_WARN, 1284 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1285 " processor\nerratum 109 was not detected; updating your" 1286 " system's BIOS to a version\ncontaining this" 1287 " microcode patch is HIGHLY recommended or erroneous" 1288 " system\noperation may occur.\n"); 1289 } 1290 #endif 1291 #if defined(OPTERON_ERRATUM_121) 1292 if (opteron_erratum_121) 1293 workaround_applied(121); 1294 #endif 1295 #if defined(OPTERON_ERRATUM_122) 1296 if (opteron_erratum_122) 1297 workaround_applied(122); 1298 #endif 1299 #if defined(OPTERON_ERRATUM_123) 1300 if (opteron_erratum_123) { 1301 cmn_err(CE_WARN, 1302 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1303 " processor\nerratum 123 was not detected; updating your" 1304 " system's BIOS to a version\ncontaining this" 1305 " microcode patch is HIGHLY recommended or erroneous" 1306 " system\noperation may occur.\n"); 1307 } 1308 #endif 1309 #if defined(OPTERON_ERRATUM_131) 1310 if (opteron_erratum_131) { 1311 cmn_err(CE_WARN, 1312 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1313 " processor\nerratum 131 was not detected; updating your" 1314 " system's BIOS to a version\ncontaining this" 1315 " microcode patch is HIGHLY recommended or erroneous" 1316 " system\noperation may occur.\n"); 1317 } 1318 #endif 1319 #if defined(OPTERON_WORKAROUND_6336786) 1320 if (opteron_workaround_6336786) 1321 workaround_applied(6336786); 1322 #endif 1323 #if defined(OPTERON_WORKAROUND_6323525) 1324 if (opteron_workaround_6323525) 1325 workaround_applied(6323525); 1326 #endif 1327 #if defined(OPTERON_ERRATUM_298) 1328 if (opteron_erratum_298) { 1329 cmn_err(CE_WARN, 1330 "BIOS microcode patch for AMD 64/Opteron(tm)" 1331 " processor\nerratum 298 was not detected; updating your" 1332 " system's BIOS to a version\ncontaining this" 1333 " microcode patch is HIGHLY recommended or erroneous" 1334 " system\noperation may occur.\n"); 1335 } 1336 #endif 1337 #if defined(OPTERON_ERRATUM_721) 1338 if (opteron_erratum_721) 1339 workaround_applied(721); 1340 #endif 1341 } 1342 1343 /* 1344 * The procset_slave and procset_master are used to synchronize 1345 * between the control CPU and the target CPU when starting CPUs. 1346 */ 1347 static cpuset_t procset_slave, procset_master; 1348 1349 static void 1350 mp_startup_wait(cpuset_t *sp, processorid_t cpuid) 1351 { 1352 cpuset_t tempset; 1353 1354 for (tempset = *sp; !CPU_IN_SET(tempset, cpuid); 1355 tempset = *(volatile cpuset_t *)sp) { 1356 SMT_PAUSE(); 1357 } 1358 CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid); 1359 } 1360 1361 static void 1362 mp_startup_signal(cpuset_t *sp, processorid_t cpuid) 1363 { 1364 cpuset_t tempset; 1365 1366 CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid); 1367 for (tempset = *sp; CPU_IN_SET(tempset, cpuid); 1368 tempset = *(volatile cpuset_t *)sp) { 1369 SMT_PAUSE(); 1370 } 1371 } 1372 1373 int 1374 mp_start_cpu_common(cpu_t *cp, boolean_t boot) 1375 { 1376 _NOTE(ARGUNUSED(boot)); 1377 1378 void *ctx; 1379 int delays; 1380 int error = 0; 1381 cpuset_t tempset; 1382 processorid_t cpuid; 1383 #ifndef __xpv 1384 extern void cpupm_init(cpu_t *); 1385 #endif 1386 1387 ASSERT(cp != NULL); 1388 cpuid = cp->cpu_id; 1389 ctx = mach_cpucontext_alloc(cp); 1390 if (ctx == NULL) { 1391 cmn_err(CE_WARN, 1392 "cpu%d: failed to allocate context", cp->cpu_id); 1393 return (EAGAIN); 1394 } 1395 error = mach_cpu_start(cp, ctx); 1396 if (error != 0) { 1397 cmn_err(CE_WARN, 1398 "cpu%d: failed to start, error %d", cp->cpu_id, error); 1399 mach_cpucontext_free(cp, ctx, error); 1400 return (error); 1401 } 1402 1403 for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid); 1404 delays++) { 1405 if (delays == 500) { 1406 /* 1407 * After five seconds, things are probably looking 1408 * a bit bleak - explain the hang. 1409 */ 1410 cmn_err(CE_NOTE, "cpu%d: started, " 1411 "but not running in the kernel yet", cpuid); 1412 } else if (delays > 2000) { 1413 /* 1414 * We waited at least 20 seconds, bail .. 1415 */ 1416 error = ETIMEDOUT; 1417 cmn_err(CE_WARN, "cpu%d: timed out", cpuid); 1418 mach_cpucontext_free(cp, ctx, error); 1419 return (error); 1420 } 1421 1422 /* 1423 * wait at least 10ms, then check again.. 1424 */ 1425 delay(USEC_TO_TICK_ROUNDUP(10000)); 1426 tempset = *((volatile cpuset_t *)&procset_slave); 1427 } 1428 CPUSET_ATOMIC_DEL(procset_slave, cpuid); 1429 1430 mach_cpucontext_free(cp, ctx, 0); 1431 1432 #ifndef __xpv 1433 if (tsc_gethrtime_enable) 1434 tsc_sync_master(cpuid); 1435 #endif 1436 1437 if (dtrace_cpu_init != NULL) { 1438 (*dtrace_cpu_init)(cpuid); 1439 } 1440 1441 /* 1442 * During CPU DR operations, the cpu_lock is held by current 1443 * (the control) thread. We can't release the cpu_lock here 1444 * because that will break the CPU DR logic. 1445 * On the other hand, CPUPM and processor group initialization 1446 * routines need to access the cpu_lock. So we invoke those 1447 * routines here on behalf of mp_startup_common(). 1448 * 1449 * CPUPM and processor group initialization routines depend 1450 * on the cpuid probing results. Wait for mp_startup_common() 1451 * to signal that cpuid probing is done. 1452 */ 1453 mp_startup_wait(&procset_slave, cpuid); 1454 #ifndef __xpv 1455 cpupm_init(cp); 1456 #endif 1457 (void) pg_cpu_init(cp, B_FALSE); 1458 cpu_set_state(cp); 1459 mp_startup_signal(&procset_master, cpuid); 1460 1461 return (0); 1462 } 1463 1464 /* 1465 * Start a single cpu, assuming that the kernel context is available 1466 * to successfully start another cpu. 1467 * 1468 * (For example, real mode code is mapped into the right place 1469 * in memory and is ready to be run.) 1470 */ 1471 int 1472 start_cpu(processorid_t who) 1473 { 1474 cpu_t *cp; 1475 int error = 0; 1476 cpuset_t tempset; 1477 1478 ASSERT(who != 0); 1479 1480 /* 1481 * Check if there's at least a Mbyte of kmem available 1482 * before attempting to start the cpu. 1483 */ 1484 if (kmem_avail() < 1024 * 1024) { 1485 /* 1486 * Kick off a reap in case that helps us with 1487 * later attempts .. 1488 */ 1489 kmem_reap(); 1490 return (ENOMEM); 1491 } 1492 1493 /* 1494 * First configure cpu. 1495 */ 1496 cp = mp_cpu_configure_common(who, B_TRUE); 1497 ASSERT(cp != NULL); 1498 1499 /* 1500 * Then start cpu. 1501 */ 1502 error = mp_start_cpu_common(cp, B_TRUE); 1503 if (error != 0) { 1504 mp_cpu_unconfigure_common(cp, error); 1505 return (error); 1506 } 1507 1508 mutex_exit(&cpu_lock); 1509 tempset = cpu_ready_set; 1510 while (!CPU_IN_SET(tempset, who)) { 1511 drv_usecwait(1); 1512 tempset = *((volatile cpuset_t *)&cpu_ready_set); 1513 } 1514 mutex_enter(&cpu_lock); 1515 1516 return (0); 1517 } 1518 1519 void 1520 start_other_cpus(int cprboot) 1521 { 1522 _NOTE(ARGUNUSED(cprboot)); 1523 1524 uint_t who; 1525 uint_t bootcpuid = 0; 1526 1527 /* 1528 * Initialize our own cpu_info. 1529 */ 1530 init_cpu_info(CPU); 1531 1532 #if !defined(__xpv) 1533 init_cpu_id_gdt(CPU); 1534 #endif 1535 1536 cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_idstr); 1537 cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_brandstr); 1538 1539 /* 1540 * KPTI initialisation happens very early in boot, before logging is 1541 * set up. Output a status message now as the boot CPU comes online. 1542 */ 1543 cmn_err(CE_CONT, "?KPTI %s (PCID %s, INVPCID %s)\n", 1544 kpti_enable ? "enabled" : "disabled", 1545 x86_use_pcid == 1 ? "in use" : 1546 (is_x86_feature(x86_featureset, X86FSET_PCID) ? "disabled" : 1547 "not supported"), 1548 x86_use_pcid == 1 && x86_use_invpcid == 1 ? "in use" : 1549 (is_x86_feature(x86_featureset, X86FSET_INVPCID) ? "disabled" : 1550 "not supported")); 1551 1552 /* 1553 * Initialize our syscall handlers 1554 */ 1555 init_cpu_syscall(CPU); 1556 1557 /* 1558 * Take the boot cpu out of the mp_cpus set because we know 1559 * it's already running. Add it to the cpu_ready_set for 1560 * precisely the same reason. 1561 */ 1562 CPUSET_DEL(mp_cpus, bootcpuid); 1563 CPUSET_ADD(cpu_ready_set, bootcpuid); 1564 1565 /* 1566 * skip the rest of this if 1567 * . only 1 cpu dectected and system isn't hotplug-capable 1568 * . not using MP 1569 */ 1570 if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) || 1571 use_mp == 0) { 1572 if (use_mp == 0) 1573 cmn_err(CE_CONT, "?***** Not in MP mode\n"); 1574 goto done; 1575 } 1576 1577 /* 1578 * perform such initialization as is needed 1579 * to be able to take CPUs on- and off-line. 1580 */ 1581 cpu_pause_init(); 1582 1583 xc_init_cpu(CPU); /* initialize processor crosscalls */ 1584 1585 if (mach_cpucontext_init() != 0) 1586 goto done; 1587 1588 flushes_require_xcalls = 1; 1589 1590 /* 1591 * We lock our affinity to the master CPU to ensure that all slave CPUs 1592 * do their TSC syncs with the same CPU. 1593 */ 1594 affinity_set(CPU_CURRENT); 1595 1596 for (who = 0; who < NCPU; who++) { 1597 if (!CPU_IN_SET(mp_cpus, who)) 1598 continue; 1599 ASSERT(who != bootcpuid); 1600 1601 mutex_enter(&cpu_lock); 1602 if (start_cpu(who) != 0) 1603 CPUSET_DEL(mp_cpus, who); 1604 cpu_state_change_notify(who, CPU_SETUP); 1605 mutex_exit(&cpu_lock); 1606 } 1607 1608 /* Free the space allocated to hold the microcode file */ 1609 ucode_cleanup(); 1610 1611 affinity_clear(); 1612 1613 mach_cpucontext_fini(); 1614 1615 done: 1616 if (get_hwenv() == HW_NATIVE) 1617 workaround_errata_end(); 1618 cmi_post_mpstartup(); 1619 1620 #if !defined(__xpv) 1621 /* 1622 * Once other CPUs have completed startup procedures, perform 1623 * initialization of hypervisor resources for HMA. 1624 */ 1625 hma_init(); 1626 #endif 1627 1628 if (use_mp && ncpus != boot_max_ncpus) { 1629 cmn_err(CE_NOTE, 1630 "System detected %d cpus, but " 1631 "only %d cpu(s) were enabled during boot.", 1632 boot_max_ncpus, ncpus); 1633 cmn_err(CE_NOTE, 1634 "Use \"boot-ncpus\" parameter to enable more CPU(s). " 1635 "See eeprom(1M)."); 1636 } 1637 } 1638 1639 int 1640 mp_cpu_configure(int cpuid) 1641 { 1642 cpu_t *cp; 1643 1644 if (use_mp == 0 || plat_dr_support_cpu() == 0) { 1645 return (ENOTSUP); 1646 } 1647 1648 cp = cpu_get(cpuid); 1649 if (cp != NULL) { 1650 return (EALREADY); 1651 } 1652 1653 /* 1654 * Check if there's at least a Mbyte of kmem available 1655 * before attempting to start the cpu. 1656 */ 1657 if (kmem_avail() < 1024 * 1024) { 1658 /* 1659 * Kick off a reap in case that helps us with 1660 * later attempts .. 1661 */ 1662 kmem_reap(); 1663 return (ENOMEM); 1664 } 1665 1666 cp = mp_cpu_configure_common(cpuid, B_FALSE); 1667 ASSERT(cp != NULL && cpu_get(cpuid) == cp); 1668 1669 return (cp != NULL ? 0 : EAGAIN); 1670 } 1671 1672 int 1673 mp_cpu_unconfigure(int cpuid) 1674 { 1675 cpu_t *cp; 1676 1677 if (use_mp == 0 || plat_dr_support_cpu() == 0) { 1678 return (ENOTSUP); 1679 } else if (cpuid < 0 || cpuid >= max_ncpus) { 1680 return (EINVAL); 1681 } 1682 1683 cp = cpu_get(cpuid); 1684 if (cp == NULL) { 1685 return (ENODEV); 1686 } 1687 mp_cpu_unconfigure_common(cp, 0); 1688 1689 return (0); 1690 } 1691 1692 /* 1693 * Startup function for 'other' CPUs (besides boot cpu). 1694 * Called from real_mode_start. 1695 * 1696 * WARNING: until CPU_READY is set, mp_startup_common and routines called by 1697 * mp_startup_common should not call routines (e.g. kmem_free) that could call 1698 * hat_unload which requires CPU_READY to be set. 1699 */ 1700 static void 1701 mp_startup_common(boolean_t boot) 1702 { 1703 cpu_t *cp = CPU; 1704 uchar_t new_x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 1705 extern void cpu_event_init_cpu(cpu_t *); 1706 1707 /* 1708 * We need to get TSC on this proc synced (i.e., any delta 1709 * from cpu0 accounted for) as soon as we can, because many 1710 * many things use gethrtime/pc_gethrestime, including 1711 * interrupts, cmn_err, etc. Before we can do that, we want to 1712 * clear TSC if we're on a buggy Sandy/Ivy Bridge CPU, so do that 1713 * right away. 1714 */ 1715 bzero(new_x86_featureset, BT_SIZEOFMAP(NUM_X86_FEATURES)); 1716 cpuid_pass1(cp, new_x86_featureset); 1717 1718 if (boot && get_hwenv() == HW_NATIVE && 1719 cpuid_getvendor(CPU) == X86_VENDOR_Intel && 1720 cpuid_getfamily(CPU) == 6 && 1721 (cpuid_getmodel(CPU) == 0x2d || cpuid_getmodel(CPU) == 0x3e) && 1722 is_x86_feature(new_x86_featureset, X86FSET_TSC)) { 1723 (void) wrmsr(REG_TSC, 0UL); 1724 } 1725 1726 /* Let the control CPU continue into tsc_sync_master() */ 1727 mp_startup_signal(&procset_slave, cp->cpu_id); 1728 1729 #ifndef __xpv 1730 if (tsc_gethrtime_enable) 1731 tsc_sync_slave(); 1732 #endif 1733 1734 /* 1735 * Once this was done from assembly, but it's safer here; if 1736 * it blocks, we need to be able to swtch() to and from, and 1737 * since we get here by calling t_pc, we need to do that call 1738 * before swtch() overwrites it. 1739 */ 1740 (void) (*ap_mlsetup)(); 1741 1742 #ifndef __xpv 1743 /* 1744 * Program this cpu's PAT 1745 */ 1746 pat_sync(); 1747 #endif 1748 1749 /* 1750 * Set up TSC_AUX to contain the cpuid for this processor 1751 * for the rdtscp instruction. 1752 */ 1753 if (is_x86_feature(x86_featureset, X86FSET_TSCP)) 1754 (void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id); 1755 1756 /* 1757 * Initialize this CPU's syscall handlers 1758 */ 1759 init_cpu_syscall(cp); 1760 1761 /* 1762 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the 1763 * highest level at which a routine is permitted to block on 1764 * an adaptive mutex (allows for cpu poke interrupt in case 1765 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks 1766 * device interrupts that may end up in the hat layer issuing cross 1767 * calls before CPU_READY is set. 1768 */ 1769 splx(ipltospl(LOCK_LEVEL)); 1770 sti(); 1771 1772 /* 1773 * There exists a small subset of systems which expose differing 1774 * MWAIT/MONITOR support between CPUs. If MWAIT support is absent from 1775 * the boot CPU, but is found on a later CPU, the system continues to 1776 * operate as if no MWAIT support is available. 1777 * 1778 * The reverse case, where MWAIT is available on the boot CPU but not 1779 * on a subsequently initialized CPU, is not presently allowed and will 1780 * result in a panic. 1781 */ 1782 if (is_x86_feature(x86_featureset, X86FSET_MWAIT) != 1783 is_x86_feature(new_x86_featureset, X86FSET_MWAIT)) { 1784 if (!is_x86_feature(x86_featureset, X86FSET_MWAIT)) { 1785 remove_x86_feature(new_x86_featureset, X86FSET_MWAIT); 1786 } else { 1787 panic("unsupported mixed cpu mwait support detected"); 1788 } 1789 } 1790 1791 /* 1792 * We could be more sophisticated here, and just mark the CPU 1793 * as "faulted" but at this point we'll opt for the easier 1794 * answer of dying horribly. Provided the boot cpu is ok, 1795 * the system can be recovered by booting with use_mp set to zero. 1796 */ 1797 if (workaround_errata(cp) != 0) 1798 panic("critical workaround(s) missing for cpu%d", cp->cpu_id); 1799 1800 /* 1801 * We can touch cpu_flags here without acquiring the cpu_lock here 1802 * because the cpu_lock is held by the control CPU which is running 1803 * mp_start_cpu_common(). 1804 * Need to clear CPU_QUIESCED flag before calling any function which 1805 * may cause thread context switching, such as kmem_alloc() etc. 1806 * The idle thread checks for CPU_QUIESCED flag and loops for ever if 1807 * it's set. So the startup thread may have no chance to switch back 1808 * again if it's switched away with CPU_QUIESCED set. 1809 */ 1810 cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED); 1811 1812 enable_pcid(); 1813 1814 /* 1815 * Setup this processor for XSAVE. 1816 */ 1817 if (fp_save_mech == FP_XSAVE) { 1818 xsave_setup_msr(cp); 1819 } 1820 1821 cpuid_pass2(cp); 1822 cpuid_pass3(cp); 1823 cpuid_pass4(cp, NULL); 1824 1825 /* 1826 * Correct cpu_idstr and cpu_brandstr on target CPU after 1827 * cpuid_pass1() is done. 1828 */ 1829 (void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN); 1830 (void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN); 1831 1832 cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS; 1833 1834 post_startup_cpu_fixups(); 1835 1836 cpu_event_init_cpu(cp); 1837 1838 /* 1839 * Enable preemption here so that contention for any locks acquired 1840 * later in mp_startup_common may be preempted if the thread owning 1841 * those locks is continuously executing on other CPUs (for example, 1842 * this CPU must be preemptible to allow other CPUs to pause it during 1843 * their startup phases). It's safe to enable preemption here because 1844 * the CPU state is pretty-much fully constructed. 1845 */ 1846 curthread->t_preempt = 0; 1847 1848 /* The base spl should still be at LOCK LEVEL here */ 1849 ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL)); 1850 set_base_spl(); /* Restore the spl to its proper value */ 1851 1852 pghw_physid_create(cp); 1853 /* 1854 * Delegate initialization tasks, which need to access the cpu_lock, 1855 * to mp_start_cpu_common() because we can't acquire the cpu_lock here 1856 * during CPU DR operations. 1857 */ 1858 mp_startup_signal(&procset_slave, cp->cpu_id); 1859 mp_startup_wait(&procset_master, cp->cpu_id); 1860 pg_cmt_cpu_startup(cp); 1861 1862 if (boot) { 1863 mutex_enter(&cpu_lock); 1864 cp->cpu_flags &= ~CPU_OFFLINE; 1865 cpu_enable_intr(cp); 1866 cpu_add_active(cp); 1867 mutex_exit(&cpu_lock); 1868 } 1869 1870 /* Enable interrupts */ 1871 (void) spl0(); 1872 1873 /* 1874 * Fill out cpu_ucode_info. Update microcode if necessary. Note that 1875 * this is done after pass1 on the boot CPU, but it needs to be later on 1876 * for the other CPUs. 1877 */ 1878 ucode_check(cp); 1879 cpuid_pass_ucode(cp, new_x86_featureset); 1880 1881 /* 1882 * Do a sanity check to make sure this new CPU is a sane thing 1883 * to add to the collection of processors running this system. 1884 * 1885 * XXX Clearly this needs to get more sophisticated, if x86 1886 * systems start to get built out of heterogenous CPUs; as is 1887 * likely to happen once the number of processors in a configuration 1888 * gets large enough. 1889 */ 1890 if (compare_x86_featureset(x86_featureset, new_x86_featureset) == 1891 B_FALSE) { 1892 cmn_err(CE_CONT, "cpu%d: featureset\n", cp->cpu_id); 1893 print_x86_featureset(new_x86_featureset); 1894 cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id); 1895 } 1896 1897 #ifndef __xpv 1898 { 1899 /* 1900 * Set up the CPU module for this CPU. This can't be done 1901 * before this CPU is made CPU_READY, because we may (in 1902 * heterogeneous systems) need to go load another CPU module. 1903 * The act of attempting to load a module may trigger a 1904 * cross-call, which will ASSERT unless this cpu is CPU_READY. 1905 */ 1906 cmi_hdl_t hdl; 1907 1908 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU), 1909 cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) { 1910 if (is_x86_feature(x86_featureset, X86FSET_MCA)) 1911 cmi_mca_init(hdl); 1912 cp->cpu_m.mcpu_cmi_hdl = hdl; 1913 } 1914 } 1915 #endif /* __xpv */ 1916 1917 if (boothowto & RB_DEBUG) 1918 kdi_cpu_init(); 1919 1920 (void) mach_cpu_create_device_node(cp, NULL); 1921 1922 /* 1923 * Setting the bit in cpu_ready_set must be the last operation in 1924 * processor initialization; the boot CPU will continue to boot once 1925 * it sees this bit set for all active CPUs. 1926 */ 1927 CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id); 1928 1929 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr); 1930 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr); 1931 cmn_err(CE_CONT, "?cpu%d initialization complete - online\n", 1932 cp->cpu_id); 1933 1934 /* 1935 * Now we are done with the startup thread, so free it up. 1936 */ 1937 thread_exit(); 1938 /*NOTREACHED*/ 1939 } 1940 1941 /* 1942 * Startup function for 'other' CPUs at boot time (besides boot cpu). 1943 */ 1944 static void 1945 mp_startup_boot(void) 1946 { 1947 mp_startup_common(B_TRUE); 1948 } 1949 1950 /* 1951 * Startup function for hotplug CPUs at runtime. 1952 */ 1953 void 1954 mp_startup_hotplug(void) 1955 { 1956 mp_startup_common(B_FALSE); 1957 } 1958 1959 /* 1960 * Start CPU on user request. 1961 */ 1962 /* ARGSUSED */ 1963 int 1964 mp_cpu_start(struct cpu *cp) 1965 { 1966 ASSERT(MUTEX_HELD(&cpu_lock)); 1967 return (0); 1968 } 1969 1970 /* 1971 * Stop CPU on user request. 1972 */ 1973 int 1974 mp_cpu_stop(struct cpu *cp) 1975 { 1976 extern int cbe_psm_timer_mode; 1977 ASSERT(MUTEX_HELD(&cpu_lock)); 1978 1979 #ifdef __xpv 1980 /* 1981 * We can't offline vcpu0. 1982 */ 1983 if (cp->cpu_id == 0) 1984 return (EBUSY); 1985 #endif 1986 1987 /* 1988 * If TIMER_PERIODIC mode is used, CPU0 is the one running it; 1989 * can't stop it. (This is true only for machines with no TSC.) 1990 */ 1991 1992 if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0)) 1993 return (EBUSY); 1994 1995 return (0); 1996 } 1997 1998 /* 1999 * Take the specified CPU out of participation in interrupts. 2000 * 2001 * Usually, we hold cpu_lock. But we cannot assert as such due to the 2002 * exception - i_cpr_save_context() - where we have mutual exclusion via a 2003 * separate mechanism. 2004 */ 2005 int 2006 cpu_disable_intr(struct cpu *cp) 2007 { 2008 if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS) 2009 return (EBUSY); 2010 2011 cp->cpu_flags &= ~CPU_ENABLE; 2012 ncpus_intr_enabled--; 2013 return (0); 2014 } 2015 2016 /* 2017 * Allow the specified CPU to participate in interrupts. 2018 */ 2019 void 2020 cpu_enable_intr(struct cpu *cp) 2021 { 2022 ASSERT(MUTEX_HELD(&cpu_lock)); 2023 cp->cpu_flags |= CPU_ENABLE; 2024 ncpus_intr_enabled++; 2025 psm_enable_intr(cp->cpu_id); 2026 } 2027 2028 void 2029 mp_cpu_faulted_enter(struct cpu *cp) 2030 { 2031 #ifdef __xpv 2032 _NOTE(ARGUNUSED(cp)); 2033 #else 2034 cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl; 2035 2036 if (hdl != NULL) { 2037 cmi_hdl_hold(hdl); 2038 } else { 2039 hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 2040 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 2041 } 2042 if (hdl != NULL) { 2043 cmi_faulted_enter(hdl); 2044 cmi_hdl_rele(hdl); 2045 } 2046 #endif 2047 } 2048 2049 void 2050 mp_cpu_faulted_exit(struct cpu *cp) 2051 { 2052 #ifdef __xpv 2053 _NOTE(ARGUNUSED(cp)); 2054 #else 2055 cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl; 2056 2057 if (hdl != NULL) { 2058 cmi_hdl_hold(hdl); 2059 } else { 2060 hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 2061 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 2062 } 2063 if (hdl != NULL) { 2064 cmi_faulted_exit(hdl); 2065 cmi_hdl_rele(hdl); 2066 } 2067 #endif 2068 } 2069 2070 /* 2071 * The following two routines are used as context operators on threads belonging 2072 * to processes with a private LDT (see sysi86). Due to the rarity of such 2073 * processes, these routines are currently written for best code readability and 2074 * organization rather than speed. We could avoid checking x86_featureset at 2075 * every context switch by installing different context ops, depending on 2076 * x86_featureset, at LDT creation time -- one for each combination of fast 2077 * syscall features. 2078 */ 2079 2080 void 2081 cpu_fast_syscall_disable(void) 2082 { 2083 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 2084 is_x86_feature(x86_featureset, X86FSET_SEP)) 2085 cpu_sep_disable(); 2086 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 2087 is_x86_feature(x86_featureset, X86FSET_ASYSC)) 2088 cpu_asysc_disable(); 2089 } 2090 2091 void 2092 cpu_fast_syscall_enable(void) 2093 { 2094 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 2095 is_x86_feature(x86_featureset, X86FSET_SEP)) 2096 cpu_sep_enable(); 2097 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 2098 is_x86_feature(x86_featureset, X86FSET_ASYSC)) 2099 cpu_asysc_enable(); 2100 } 2101 2102 static void 2103 cpu_sep_enable(void) 2104 { 2105 ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP)); 2106 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2107 2108 wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL); 2109 2110 CPU->cpu_m.mcpu_fast_syscall_state |= FSS_SEP_ENABLED; 2111 } 2112 2113 static void 2114 cpu_sep_disable(void) 2115 { 2116 ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP)); 2117 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2118 2119 /* 2120 * Setting the SYSENTER_CS_MSR register to 0 causes software executing 2121 * the sysenter or sysexit instruction to trigger a #gp fault. 2122 */ 2123 wrmsr(MSR_INTC_SEP_CS, 0); 2124 2125 CPU->cpu_m.mcpu_fast_syscall_state &= ~FSS_SEP_ENABLED; 2126 } 2127 2128 static void 2129 cpu_asysc_enable(void) 2130 { 2131 ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC)); 2132 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2133 2134 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) | 2135 (uint64_t)(uintptr_t)AMD_EFER_SCE); 2136 2137 CPU->cpu_m.mcpu_fast_syscall_state |= FSS_ASYSC_ENABLED; 2138 } 2139 2140 static void 2141 cpu_asysc_disable(void) 2142 { 2143 ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC)); 2144 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2145 2146 /* 2147 * Turn off the SCE (syscall enable) bit in the EFER register. Software 2148 * executing syscall or sysret with this bit off will incur a #ud trap. 2149 */ 2150 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) & 2151 ~((uint64_t)(uintptr_t)AMD_EFER_SCE)); 2152 2153 CPU->cpu_m.mcpu_fast_syscall_state &= ~FSS_ASYSC_ENABLED; 2154 } 2155