1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* 27 * Copyright 2020 Joyent, Inc. 28 */ 29 30/* 31 * Process switching routines. 32 */ 33 34#include <sys/asm_linkage.h> 35#include <sys/asm_misc.h> 36#include <sys/regset.h> 37#include <sys/privregs.h> 38#include <sys/stack.h> 39#include <sys/segments.h> 40#include <sys/psw.h> 41 42#include "assym.h" 43 44/* 45 * resume(thread_id_t t); 46 * 47 * a thread can only run on one processor at a time. there 48 * exists a window on MPs where the current thread on one 49 * processor is capable of being dispatched by another processor. 50 * some overlap between outgoing and incoming threads can happen 51 * when they are the same thread. in this case where the threads 52 * are the same, resume() on one processor will spin on the incoming 53 * thread until resume() on the other processor has finished with 54 * the outgoing thread. 55 * 56 * The MMU context changes when the resuming thread resides in a different 57 * process. Kernel threads are known by resume to reside in process 0. 58 * The MMU context, therefore, only changes when resuming a thread in 59 * a process different from curproc. 60 * 61 * resume_from_intr() is called when the thread being resumed was not 62 * passivated by resume (e.g. was interrupted). This means that the 63 * resume lock is already held and that a restore context is not needed. 64 * Also, the MMU context is not changed on the resume in this case. 65 * 66 * resume_from_zombie() is the same as resume except the calling thread 67 * is a zombie and must be put on the deathrow list after the CPU is 68 * off the stack. 69 */ 70 71#if LWP_PCB_FPU != 0 72#error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work 73#endif /* LWP_PCB_FPU != 0 */ 74 75/* 76 * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) 77 * 78 * The stack frame must be created before the save of %rsp so that tracebacks 79 * of swtch()ed-out processes show the process as having last called swtch(). 80 */ 81#define SAVE_REGS(thread_t, retaddr) \ 82 movq %rbp, T_RBP(thread_t); \ 83 movq %rbx, T_RBX(thread_t); \ 84 movq %r12, T_R12(thread_t); \ 85 movq %r13, T_R13(thread_t); \ 86 movq %r14, T_R14(thread_t); \ 87 movq %r15, T_R15(thread_t); \ 88 pushq %rbp; \ 89 movq %rsp, %rbp; \ 90 movq %rsp, T_SP(thread_t); \ 91 movq retaddr, T_PC(thread_t); \ 92 movq %rdi, %r12; \ 93 call __dtrace_probe___sched_off__cpu 94 95/* 96 * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) 97 * 98 * We load up %rsp from the label_t as part of the context switch, so 99 * we don't repeat that here. 100 * 101 * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t 102 * already has the effect of putting the stack back the way it was when 103 * we came in. 104 */ 105#define RESTORE_REGS(scratch_reg) \ 106 movq %gs:CPU_THREAD, scratch_reg; \ 107 movq T_RBP(scratch_reg), %rbp; \ 108 movq T_RBX(scratch_reg), %rbx; \ 109 movq T_R12(scratch_reg), %r12; \ 110 movq T_R13(scratch_reg), %r13; \ 111 movq T_R14(scratch_reg), %r14; \ 112 movq T_R15(scratch_reg), %r15 113 114/* 115 * Get pointer to a thread's hat structure 116 */ 117#define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ 118 movq T_PROCP(thread_t), hatp; \ 119 movq P_AS(hatp), scratch_reg; \ 120 movq A_HAT(scratch_reg), hatp 121 122#define TSC_READ() \ 123 call tsc_read; \ 124 movq %rax, %r14; 125 126/* 127 * If we are resuming an interrupt thread, store a timestamp in the thread 128 * structure. If an interrupt occurs between tsc_read() and its subsequent 129 * store, the timestamp will be stale by the time it is stored. We can detect 130 * this by doing a compare-and-swap on the thread's timestamp, since any 131 * interrupt occurring in this window will put a new timestamp in the thread's 132 * t_intr_start field. 133 */ 134#define STORE_INTR_START(thread_t) \ 135 testw $T_INTR_THREAD, T_FLAGS(thread_t); \ 136 jz 1f; \ 1370: \ 138 TSC_READ(); \ 139 movq T_INTR_START(thread_t), %rax; \ 140 cmpxchgq %r14, T_INTR_START(thread_t); \ 141 jnz 0b; \ 1421: 143 144 .global kpti_enable 145 146 ENTRY(resume) 147 movq %gs:CPU_THREAD, %rax 148 leaq resume_return(%rip), %r11 149 150 /* 151 * Deal with SMAP here. A thread may be switched out at any point while 152 * it is executing. The thread could be under on_fault() or it could be 153 * pre-empted while performing a copy interruption. If this happens and 154 * we're not in the context of an interrupt which happens to handle 155 * saving and restoring rflags correctly, we may lose our SMAP related 156 * state. 157 * 158 * To handle this, as part of being switched out, we first save whether 159 * or not userland access is allowed ($PS_ACHK in rflags) and store that 160 * in t_useracc on the kthread_t and unconditionally enable SMAP to 161 * protect the system. 162 * 163 * Later, when the thread finishes resuming, we potentially disable smap 164 * if PS_ACHK was present in rflags. See uts/intel/ml/copy.s for 165 * more information on rflags and SMAP. 166 */ 167 pushfq 168 popq %rsi 169 andq $PS_ACHK, %rsi 170 movq %rsi, T_USERACC(%rax) 171 call smap_enable 172 173 /* 174 * Take a moment to potentially clear the RSB buffer. This is done to 175 * prevent various Spectre variant 2 and SpectreRSB attacks. This may 176 * not be sufficient. Please see uts/intel/ml/retpoline.s for more 177 * information about this. 178 */ 179 call x86_rsb_stuff 180 181 /* 182 * Save non-volatile registers, and set return address for current 183 * thread to resume_return. 184 * 185 * %r12 = t (new thread) when done 186 */ 187 SAVE_REGS(%rax, %r11) 188 189 190 LOADCPU(%r15) /* %r15 = CPU */ 191 movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */ 192 193 /* 194 * Call savectx if thread has installed context ops. 195 * 196 * Note that if we have floating point context, the save op 197 * (either fpsave_begin or fpxsave_begin) will issue the 198 * async save instruction (fnsave or fxsave respectively) 199 * that we fwait for below. 200 */ 201 cmpq $0, T_CTX(%r13) /* should current thread savectx? */ 202 je .nosavectx /* skip call when zero */ 203 204 movq %r13, %rdi /* arg = thread pointer */ 205 call savectx /* call ctx ops */ 206.nosavectx: 207 208 /* 209 * Check that the curthread is not using the FPU while in the kernel. 210 */ 211 call kernel_fpu_no_swtch 212 213 /* 214 * Call savepctx if process has installed context ops. 215 */ 216 movq T_PROCP(%r13), %r14 /* %r14 = proc */ 217 cmpq $0, P_PCTX(%r14) /* should current thread savepctx? */ 218 je .nosavepctx /* skip call when zero */ 219 220 movq %r14, %rdi /* arg = proc pointer */ 221 call savepctx /* call ctx ops */ 222.nosavepctx: 223 224 /* 225 * Temporarily switch to the idle thread's stack 226 */ 227 movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */ 228 229 /* 230 * Set the idle thread as the current thread 231 */ 232 movq T_SP(%rax), %rsp /* It is safe to set rsp */ 233 movq %rax, CPU_THREAD(%r15) 234 235 /* 236 * Switch in the hat context for the new thread 237 * 238 */ 239 GET_THREAD_HATP(%rdi, %r12, %r11) 240 call hat_switch 241 242 /* 243 * Clear and unlock previous thread's t_lock 244 * to allow it to be dispatched by another processor. 245 */ 246 movb $0, T_LOCK(%r13) 247 248 /* 249 * IMPORTANT: Registers at this point must be: 250 * %r12 = new thread 251 * 252 * Here we are in the idle thread, have dropped the old thread. 253 */ 254 ALTENTRY(_resume_from_idle) 255 /* 256 * spin until dispatched thread's mutex has 257 * been unlocked. this mutex is unlocked when 258 * it becomes safe for the thread to run. 259 */ 260.lock_thread_mutex: 261 lock 262 btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */ 263 jnc .thread_mutex_locked /* got it */ 264 265.spin_thread_mutex: 266 pause 267 cmpb $0, T_LOCK(%r12) /* check mutex status */ 268 jz .lock_thread_mutex /* clear, retry lock */ 269 jmp .spin_thread_mutex /* still locked, spin... */ 270 271.thread_mutex_locked: 272 /* 273 * Fix CPU structure to indicate new running thread. 274 * Set pointer in new thread to the CPU structure. 275 */ 276 LOADCPU(%r13) /* load current CPU pointer */ 277 cmpq %r13, T_CPU(%r12) 278 je .setup_cpu 279 280 /* cp->cpu_stats.sys.cpumigrate++ */ 281 incq CPU_STATS_SYS_CPUMIGRATE(%r13) 282 movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */ 283 284.setup_cpu: 285 /* 286 * Setup rsp0 (kernel stack) in TSS to curthread's saved regs 287 * structure. If this thread doesn't have a regs structure above 288 * the stack -- that is, if lwp_stk_init() was never called for the 289 * thread -- this will set rsp0 to the wrong value, but it's harmless 290 * as it's a kernel thread, and it won't actually attempt to implicitly 291 * use the rsp0 via a privilege change. 292 * 293 * Note that when we have KPTI enabled on amd64, we never use this 294 * value at all (since all the interrupts have an IST set). 295 */ 296 movq CPU_TSS(%r13), %r14 297#if !defined(__xpv) 298 cmpq $1, kpti_enable 299 jne 1f 300 leaq CPU_KPTI_TR_RSP(%r13), %rax 301 jmp 2f 3021: 303 movq T_STACK(%r12), %rax 304 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 3052: 306 movq %rax, TSS_RSP0(%r14) 307#else 308 movq T_STACK(%r12), %rax 309 addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ 310 movl $KDS_SEL, %edi 311 movq %rax, %rsi 312 call HYPERVISOR_stack_switch 313#endif /* __xpv */ 314 315 movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */ 316 mfence /* synchronize with mutex_exit() */ 317 xorl %ebp, %ebp /* make $<threadlist behave better */ 318 movq T_LWP(%r12), %rax /* set associated lwp to */ 319 movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */ 320 321 movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */ 322 movq T_PC(%r12), %r13 /* saved return addr */ 323 324 /* 325 * Call restorectx if context ops have been installed. 326 */ 327 cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */ 328 jz .norestorectx /* skip call when zero */ 329 movq %r12, %rdi /* arg = thread pointer */ 330 call restorectx /* call ctx ops */ 331.norestorectx: 332 333 /* 334 * Call restorepctx if context ops have been installed for the proc. 335 */ 336 movq T_PROCP(%r12), %rcx 337 cmpq $0, P_PCTX(%rcx) 338 jz .norestorepctx 339 movq %rcx, %rdi 340 call restorepctx 341.norestorepctx: 342 343 STORE_INTR_START(%r12) 344 345 /* 346 * If we came into swtch with the ability to access userland pages, go 347 * ahead and restore that fact by disabling SMAP. Clear the indicator 348 * flag out of paranoia. 349 */ 350 movq T_USERACC(%r12), %rax /* should we disable smap? */ 351 cmpq $0, %rax /* skip call when zero */ 352 jz .nosmap 353 xorq %rax, %rax 354 movq %rax, T_USERACC(%r12) 355 call smap_disable 356.nosmap: 357 358 call smt_mark 359 360 /* 361 * Restore non-volatile registers, then have spl0 return to the 362 * resuming thread's PC after first setting the priority as low as 363 * possible and blocking all interrupt threads that may be active. 364 */ 365 movq %r13, %rax /* save return address */ 366 RESTORE_REGS(%r11) 367 pushq %rax /* push return address for spl0() */ 368 call __dtrace_probe___sched_on__cpu 369 jmp spl0 370 371resume_return: 372 /* 373 * Remove stack frame created in SAVE_REGS() 374 */ 375 addq $CLONGSIZE, %rsp 376 ret 377 SET_SIZE(_resume_from_idle) 378 SET_SIZE(resume) 379 380 ENTRY(resume_from_zombie) 381 movq %gs:CPU_THREAD, %rax 382 leaq resume_from_zombie_return(%rip), %r11 383 384 /* 385 * Save non-volatile registers, and set return address for current 386 * thread to resume_from_zombie_return. 387 * 388 * %r12 = t (new thread) when done 389 */ 390 SAVE_REGS(%rax, %r11) 391 392 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ 393 394 /* clean up the fp unit. It might be left enabled */ 395 396#if defined(__xpv) /* XXPV XXtclayton */ 397 /* 398 * Remove this after bringup. 399 * (Too many #gp's for an instrumented hypervisor.) 400 */ 401 STTS(%rax) 402#else 403 movq %cr0, %rax 404 testq $CR0_TS, %rax 405 jnz .zfpu_disabled /* if TS already set, nothing to do */ 406 fninit /* init fpu & discard pending error */ 407 orq $CR0_TS, %rax 408 movq %rax, %cr0 409.zfpu_disabled: 410 411#endif /* __xpv */ 412 413 /* 414 * Temporarily switch to the idle thread's stack so that the zombie 415 * thread's stack can be reclaimed by the reaper. 416 */ 417 movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */ 418 movq T_SP(%rax), %rsp /* get onto idle thread stack */ 419 420 /* 421 * Sigh. If the idle thread has never run thread_start() 422 * then t_sp is mis-aligned by thread_load(). 423 */ 424 andq $_BITNOT(STACK_ALIGN-1), %rsp 425 426 /* 427 * Set the idle thread as the current thread. 428 */ 429 movq %rax, %gs:CPU_THREAD 430 431 /* switch in the hat context for the new thread */ 432 GET_THREAD_HATP(%rdi, %r12, %r11) 433 call hat_switch 434 435 /* 436 * Put the zombie on death-row. 437 */ 438 movq %r13, %rdi 439 call reapq_add 440 441 jmp _resume_from_idle /* finish job of resume */ 442 443resume_from_zombie_return: 444 RESTORE_REGS(%r11) /* restore non-volatile registers */ 445 call __dtrace_probe___sched_on__cpu 446 447 /* 448 * Remove stack frame created in SAVE_REGS() 449 */ 450 addq $CLONGSIZE, %rsp 451 ret 452 SET_SIZE(resume_from_zombie) 453 454 ENTRY(resume_from_intr) 455 movq %gs:CPU_THREAD, %rax 456 leaq resume_from_intr_return(%rip), %r11 457 458 /* 459 * Save non-volatile registers, and set return address for current 460 * thread to resume_from_intr_return. 461 * 462 * %r12 = t (new thread) when done 463 */ 464 SAVE_REGS(%rax, %r11) 465 466 movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ 467 movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */ 468 mfence /* synchronize with mutex_exit() */ 469 movq T_SP(%r12), %rsp /* restore resuming thread's sp */ 470 xorl %ebp, %ebp /* make $<threadlist behave better */ 471 472 /* 473 * Unlock outgoing thread's mutex dispatched by another processor. 474 */ 475 xorl %eax, %eax 476 xchgb %al, T_LOCK(%r13) 477 478 STORE_INTR_START(%r12) 479 480 call smt_mark 481 482 /* 483 * Restore non-volatile registers, then have spl0 return to the 484 * resuming thread's PC after first setting the priority as low as 485 * possible and blocking all interrupt threads that may be active. 486 */ 487 movq T_PC(%r12), %rax /* saved return addr */ 488 RESTORE_REGS(%r11); 489 pushq %rax /* push return address for spl0() */ 490 call __dtrace_probe___sched_on__cpu 491 jmp spl0 492 493resume_from_intr_return: 494 /* 495 * Remove stack frame created in SAVE_REGS() 496 */ 497 addq $CLONGSIZE, %rsp 498 ret 499 SET_SIZE(resume_from_intr) 500 501 ENTRY(thread_start) 502 popq %rax /* start() */ 503 popq %rdi /* arg */ 504 popq %rsi /* len */ 505 movq %rsp, %rbp 506 INDIRECT_CALL_REG(rax) 507 call thread_exit /* destroy thread if it returns. */ 508 /*NOTREACHED*/ 509 SET_SIZE(thread_start) 510