/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2020 Joyent, Inc. * Copyright 2024 MNX Cloud, Inc. */ /* * Process switching routines. */ #include #include #include #include #include #include #include #include "assym.h" /* * resume(thread_id_t t); * * a thread can only run on one processor at a time. there * exists a window on MPs where the current thread on one * processor is capable of being dispatched by another processor. * some overlap between outgoing and incoming threads can happen * when they are the same thread. in this case where the threads * are the same, resume() on one processor will spin on the incoming * thread until resume() on the other processor has finished with * the outgoing thread. * * The MMU context changes when the resuming thread resides in a different * process. Kernel threads are known by resume to reside in process 0. * The MMU context, therefore, only changes when resuming a thread in * a process different from curproc. * * resume_from_intr() is called when the thread being resumed was not * passivated by resume (e.g. was interrupted). This means that the * resume lock is already held and that a restore context is not needed. * Also, the MMU context is not changed on the resume in this case. * * resume_from_zombie() is the same as resume except the calling thread * is a zombie and must be put on the deathrow list after the CPU is * off the stack. */ #if LWP_PCB_FPU != 0 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work #endif /* LWP_PCB_FPU != 0 */ /* * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) * * The stack frame must be created before the save of %rsp so that tracebacks * of swtch()ed-out processes show the process as having last called swtch(). */ #define SAVE_REGS(thread_t, retaddr) \ movq %rbp, T_RBP(thread_t); \ movq %rbx, T_RBX(thread_t); \ movq %r12, T_R12(thread_t); \ movq %r13, T_R13(thread_t); \ movq %r14, T_R14(thread_t); \ movq %r15, T_R15(thread_t); \ pushq %rbp; \ movq %rsp, %rbp; \ movq %rsp, T_SP(thread_t); \ movq retaddr, T_PC(thread_t); \ movq %rdi, %r12; \ call __dtrace_probe___sched_off__cpu /* * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) * * We load up %rsp from the label_t as part of the context switch, so * we don't repeat that here. * * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t * already has the effect of putting the stack back the way it was when * we came in. */ #define RESTORE_REGS(scratch_reg) \ movq %gs:CPU_THREAD, scratch_reg; \ movq T_RBP(scratch_reg), %rbp; \ movq T_RBX(scratch_reg), %rbx; \ movq T_R12(scratch_reg), %r12; \ movq T_R13(scratch_reg), %r13; \ movq T_R14(scratch_reg), %r14; \ movq T_R15(scratch_reg), %r15 /* * Get pointer to a thread's hat structure */ #define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ movq T_PROCP(thread_t), hatp; \ movq P_AS(hatp), scratch_reg; \ movq A_HAT(scratch_reg), hatp #define TSC_READ() \ call tsc_read; \ movq %rax, %r14; /* * If we are resuming an interrupt thread, store a timestamp in the thread * structure. If an interrupt occurs between tsc_read() and its subsequent * store, the timestamp will be stale by the time it is stored. We can detect * this by doing a compare-and-swap on the thread's timestamp, since any * interrupt occurring in this window will put a new timestamp in the thread's * t_intr_start field. */ #define STORE_INTR_START(thread_t) \ testw $T_INTR_THREAD, T_FLAGS(thread_t); \ jz 1f; \ 0: \ TSC_READ(); \ movq T_INTR_START(thread_t), %rax; \ cmpxchgq %r14, T_INTR_START(thread_t); \ jnz 0b; \ 1: .global kpti_enable ENTRY(resume) movq %gs:CPU_THREAD, %rax leaq resume_return(%rip), %r11 /* * Deal with SMAP here. A thread may be switched out at any point while * it is executing. The thread could be under on_fault() or it could be * pre-empted while performing a copy interruption. If this happens and * we're not in the context of an interrupt which happens to handle * saving and restoring rflags correctly, we may lose our SMAP related * state. * * To handle this, as part of being switched out, we first save whether * or not userland access is allowed ($PS_ACHK in rflags) and store that * in t_useracc on the kthread_t and unconditionally enable SMAP to * protect the system. * * Later, when the thread finishes resuming, we potentially disable smap * if PS_ACHK was present in rflags. See uts/intel/ml/copy.s for * more information on rflags and SMAP. */ pushfq popq %rsi andq $PS_ACHK, %rsi movq %rsi, T_USERACC(%rax) call smap_enable /* * Take a moment to potentially clear the RSB buffer. This is done to * prevent various Spectre variant 2 and SpectreRSB attacks. This may * not be sufficient. Please see uts/intel/ml/retpoline.S for more * information about this. */ call x86_rsb_stuff /* * Take another moment to potentially clear the branch history buffer * (BHB). This is done to prevent recent discoveries that branch * history can also be trained to exploit certain compiler-generated * instruction sequences (known as "gadgets") to leak data * speculatively. As with x86_rsb_stuff, see retpoline.S, and this * may not be sufficient. */ call x86_bhb_clear /* * Save non-volatile registers, and set return address for current * thread to resume_return. * * %r12 = t (new thread) when done */ SAVE_REGS(%rax, %r11) LOADCPU(%r15) /* %r15 = CPU */ movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */ /* * Call savectx if thread has installed context ops. * * Note that if we have floating point context, the save op * (either fpsave_begin or fpxsave_begin) will issue the * async save instruction (fnsave or fxsave respectively) * that we fwait for below. */ cmpq $0, T_CTX(%r13) /* should current thread savectx? */ je .nosavectx /* skip call when zero */ movq %r13, %rdi /* arg = thread pointer */ call savectx /* call ctx ops */ .nosavectx: /* * Check that the curthread is not using the FPU while in the kernel. */ call kernel_fpu_no_swtch /* * Call savepctx if process has installed context ops. */ movq T_PROCP(%r13), %r14 /* %r14 = proc */ cmpq $0, P_PCTX(%r14) /* should current thread savepctx? */ je .nosavepctx /* skip call when zero */ movq %r14, %rdi /* arg = proc pointer */ call savepctx /* call ctx ops */ .nosavepctx: /* * Temporarily switch to the idle thread's stack */ movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */ /* * Set the idle thread as the current thread */ movq T_SP(%rax), %rsp /* It is safe to set rsp */ movq %rax, CPU_THREAD(%r15) /* * Switch in the hat context for the new thread * */ GET_THREAD_HATP(%rdi, %r12, %r11) call hat_switch /* * Clear and unlock previous thread's t_lock * to allow it to be dispatched by another processor. */ movb $0, T_LOCK(%r13) /* * IMPORTANT: Registers at this point must be: * %r12 = new thread * * Here we are in the idle thread, have dropped the old thread. */ ALTENTRY(_resume_from_idle) /* * spin until dispatched thread's mutex has * been unlocked. this mutex is unlocked when * it becomes safe for the thread to run. */ .lock_thread_mutex: lock btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */ jnc .thread_mutex_locked /* got it */ .spin_thread_mutex: pause cmpb $0, T_LOCK(%r12) /* check mutex status */ jz .lock_thread_mutex /* clear, retry lock */ jmp .spin_thread_mutex /* still locked, spin... */ .thread_mutex_locked: /* * Fix CPU structure to indicate new running thread. * Set pointer in new thread to the CPU structure. */ LOADCPU(%r13) /* load current CPU pointer */ cmpq %r13, T_CPU(%r12) je .setup_cpu /* cp->cpu_stats.sys.cpumigrate++ */ incq CPU_STATS_SYS_CPUMIGRATE(%r13) movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */ .setup_cpu: /* * Setup rsp0 (kernel stack) in TSS to curthread's saved regs * structure. If this thread doesn't have a regs structure above * the stack -- that is, if lwp_stk_init() was never called for the * thread -- this will set rsp0 to the wrong value, but it's harmless * as it's a kernel thread, and it won't actually attempt to implicitly * use the rsp0 via a privilege change. * * Note that when we have KPTI enabled on amd64, we never use this * value at all (since all the interrupts have an IST set). */ movq CPU_TSS(%r13), %r14 #if !defined(__xpv) cmpq $1, kpti_enable jne 1f leaq CPU_KPTI_TR_RSP(%r13), %rax jmp 2f 1: movq T_STACK(%r12), %rax addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 2: movq %rax, TSS_RSP0(%r14) #else movq T_STACK(%r12), %rax addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ movl $KDS_SEL, %edi movq %rax, %rsi call HYPERVISOR_stack_switch #endif /* __xpv */ movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */ mfence /* synchronize with mutex_exit() */ xorl %ebp, %ebp /* make $