xref: /freebsd/sys/amd64/amd64/vm_machdep.c (revision 16ce7f1a3603fce5262cf0656a01c4177c75fa50)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1982, 1986 The Regents of the University of California.
5  * Copyright (c) 1989, 1990 William Jolitz
6  * Copyright (c) 1994 John Dyson
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department, and William Jolitz.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41  */
42 
43 #include <sys/cdefs.h>
44 #include "opt_isa.h"
45 #include "opt_cpu.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/kernel.h>
52 #include <sys/ktr.h>
53 #include <sys/lock.h>
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/mutex.h>
57 #include <sys/priv.h>
58 #include <sys/proc.h>
59 #include <sys/procctl.h>
60 #include <sys/smp.h>
61 #include <sys/sysctl.h>
62 #include <sys/sysent.h>
63 #include <sys/thr.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/vmmeter.h>
67 #include <sys/wait.h>
68 
69 #include <machine/cpu.h>
70 #include <machine/md_var.h>
71 #include <machine/pcb.h>
72 #include <machine/smp.h>
73 #include <machine/specialreg.h>
74 #include <machine/tss.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_extern.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_param.h>
82 
83 _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
84     "OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf.");
85 
86 void
set_top_of_stack_td(struct thread * td)87 set_top_of_stack_td(struct thread *td)
88 {
89 	td->td_md.md_stack_base = td->td_kstack +
90 	    td->td_kstack_pages * PAGE_SIZE;
91 }
92 
93 struct savefpu *
get_pcb_user_save_td(struct thread * td)94 get_pcb_user_save_td(struct thread *td)
95 {
96 	KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
97 	    XSAVE_AREA_ALIGN) == 0,
98 	    ("Unaligned pcb_user_save area ptr %p td %p",
99 	    td->td_md.md_usr_fpu_save, td));
100 	return (td->td_md.md_usr_fpu_save);
101 }
102 
103 struct pcb *
get_pcb_td(struct thread * td)104 get_pcb_td(struct thread *td)
105 {
106 
107 	return (&td->td_md.md_pcb);
108 }
109 
110 struct savefpu *
get_pcb_user_save_pcb(struct pcb * pcb)111 get_pcb_user_save_pcb(struct pcb *pcb)
112 {
113 	struct thread *td;
114 
115 	td = __containerof(pcb, struct thread, td_md.md_pcb);
116 	return (get_pcb_user_save_td(td));
117 }
118 
119 void *
alloc_fpusave(int flags)120 alloc_fpusave(int flags)
121 {
122 	void *res;
123 	struct savefpu_ymm *sf;
124 
125 	res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
126 	if (use_xsave) {
127 		sf = (struct savefpu_ymm *)res;
128 		bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
129 		sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
130 	}
131 	return (res);
132 }
133 
134 /*
135  * Common code shared between cpu_fork() and cpu_copy_thread() for
136  * initializing a thread.
137  */
138 static void
copy_thread(struct thread * td1,struct thread * td2)139 copy_thread(struct thread *td1, struct thread *td2)
140 {
141 	struct pcb *pcb2;
142 
143 	pcb2 = td2->td_pcb;
144 
145 	/* Ensure that td1's pcb is up to date for user threads. */
146 	if ((td2->td_pflags & TDP_KTHREAD) == 0) {
147 		MPASS(td1 == curthread);
148 		fpuexit(td1);
149 		update_pcb_bases(td1->td_pcb);
150 	}
151 
152 	/* Copy td1's pcb */
153 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
154 
155 	/* Properly initialize pcb_save */
156 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
157 
158 	/* Kernel threads start with clean FPU and segment bases. */
159 	if ((td2->td_pflags & TDP_KTHREAD) != 0) {
160 		pcb2->pcb_fsbase = pcb2->pcb_tlsbase = 0;
161 		pcb2->pcb_gsbase = 0;
162 		clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE |
163 		    PCB_KERNFPU | PCB_KERNFPU_THR);
164 	} else {
165 		MPASS((pcb2->pcb_flags & (PCB_KERNFPU | PCB_KERNFPU_THR)) == 0);
166 		bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
167 		    cpu_max_ext_state_size);
168 		clear_pcb_flags(pcb2, PCB_TLSBASE);
169 	}
170 
171 	td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1;
172 
173 	/*
174 	 * Set registers for trampoline to user mode.  Leave space for the
175 	 * return address on stack.  These are the kernel mode register values.
176 	 */
177 	pcb2->pcb_r12 = (register_t)fork_return;	/* fork_trampoline argument */
178 	pcb2->pcb_rbp = 0;
179 	pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
180 	pcb2->pcb_rbx = (register_t)td2;		/* fork_trampoline argument */
181 	pcb2->pcb_rip = (register_t)fork_trampoline;
182 	/*-
183 	 * pcb2->pcb_dr*:	cloned above.
184 	 * pcb2->pcb_savefpu:	cloned above.
185 	 * pcb2->pcb_flags:	cloned above.
186 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
187 	 * pcb2->pcb_[f,g,tls]sbase:	cloned above
188 	 */
189 
190 	pcb2->pcb_tssp = NULL;
191 
192 	/* Setup to release spin count in fork_exit(). */
193 	td2->td_md.md_spinlock_count = 1;
194 	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
195 	pmap_thread_init_invl_gen(td2);
196 
197 	/*
198 	 * Copy the trap frame for the return to user mode as if from a syscall.
199 	 * This copies most of the user mode register values.  Some of these
200 	 * registers are rewritten by cpu_set_upcall() and linux_set_upcall().
201 	 */
202 	if ((td1->td_proc->p_flag & P_KPROC) == 0) {
203 		bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
204 
205 		/*
206 		 * If the current thread has the trap bit set (i.e. a debugger
207 		 * had single stepped the process to the system call), we need
208 		 * to clear the trap flag from the new frame. Otherwise, the new
209 		 * thread will receive a (likely unexpected) SIGTRAP when it
210 		 * executes the first instruction after returning to userland.
211 		 */
212 		td2->td_frame->tf_rflags &= ~PSL_T;
213 	}
214 }
215 
216 /*
217  * Finish a fork operation, with process p2 nearly set up.
218  * Copy and update the pcb, set up the stack so that the child
219  * ready to run and return to user mode.
220  */
221 void
cpu_fork(struct thread * td1,struct proc * p2,struct thread * td2,int flags)222 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
223 {
224 	struct proc *p1;
225 	struct pcb *pcb2;
226 	struct mdproc *mdp1, *mdp2;
227 	struct proc_ldt *pldt;
228 
229 	p1 = td1->td_proc;
230 	if ((flags & RFPROC) == 0) {
231 		if ((flags & RFMEM) == 0) {
232 			/* unshare user LDT */
233 			mdp1 = &p1->p_md;
234 			mtx_lock(&dt_lock);
235 			if ((pldt = mdp1->md_ldt) != NULL &&
236 			    pldt->ldt_refcnt > 1 &&
237 			    user_ldt_alloc(p1, 1) == NULL)
238 				panic("could not copy LDT");
239 			mtx_unlock(&dt_lock);
240 		}
241 		return;
242 	}
243 
244 	/* Point the stack and pcb to the actual location */
245 	set_top_of_stack_td(td2);
246 	td2->td_pcb = pcb2 = get_pcb_td(td2);
247 
248 	copy_thread(td1, td2);
249 
250 	/* Reset debug registers in the new process */
251 	x86_clear_dbregs(pcb2);
252 
253 	/* Point mdproc and then copy over p1's contents */
254 	mdp2 = &p2->p_md;
255 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
256 
257 	/* Set child return values. */
258 	p2->p_sysent->sv_set_fork_retval(td2);
259 
260 	/* As on i386, do not copy io permission bitmap. */
261 	pcb2->pcb_tssp = NULL;
262 
263 	/* New segment registers. */
264 	set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
265 
266 	/* Copy the LDT, if necessary. */
267 	mdp1 = &td1->td_proc->p_md;
268 	mdp2 = &p2->p_md;
269 	if (mdp1->md_ldt == NULL) {
270 		mdp2->md_ldt = NULL;
271 		return;
272 	}
273 	mtx_lock(&dt_lock);
274 	if (mdp1->md_ldt != NULL) {
275 		if (flags & RFMEM) {
276 			mdp1->md_ldt->ldt_refcnt++;
277 			mdp2->md_ldt = mdp1->md_ldt;
278 			bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct
279 			    system_segment_descriptor));
280 		} else {
281 			mdp2->md_ldt = NULL;
282 			mdp2->md_ldt = user_ldt_alloc(p2, 0);
283 			if (mdp2->md_ldt == NULL)
284 				panic("could not copy LDT");
285 			amd64_set_ldt_data(td2, 0, max_ldt_segment,
286 			    (struct user_segment_descriptor *)
287 			    mdp1->md_ldt->ldt_base);
288 		}
289 	} else
290 		mdp2->md_ldt = NULL;
291 	mtx_unlock(&dt_lock);
292 
293 	/*
294 	 * Now, cpu_switch() can schedule the new process.
295 	 * pcb_rsp is loaded pointing to the cpu_switch() stack frame
296 	 * containing the return address when exiting cpu_switch.
297 	 * This will normally be to fork_trampoline(), which will have
298 	 * %rbx loaded with the new proc's pointer.  fork_trampoline()
299 	 * will set up a stack to call fork_return(p, frame); to complete
300 	 * the return to user-mode.
301 	 */
302 }
303 
304 void
x86_set_fork_retval(struct thread * td)305 x86_set_fork_retval(struct thread *td)
306 {
307 	struct trapframe *frame = td->td_frame;
308 
309 	frame->tf_rax = 0;		/* Child returns zero */
310 	frame->tf_rflags &= ~PSL_C;	/* success */
311 	frame->tf_rdx = 1;		/* System V emulation */
312 }
313 
314 /*
315  * Intercept the return address from a freshly forked process that has NOT
316  * been scheduled yet.
317  *
318  * This is needed to make kernel threads stay in kernel mode.
319  */
320 void
cpu_fork_kthread_handler(struct thread * td,void (* func)(void *),void * arg)321 cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
322 {
323 	/*
324 	 * Note that the trap frame follows the args, so the function
325 	 * is really called like this:  func(arg, frame);
326 	 */
327 	td->td_pcb->pcb_r12 = (long) func;	/* function */
328 	td->td_pcb->pcb_rbx = (long) arg;	/* first arg */
329 }
330 
331 void
cpu_exit(struct thread * td)332 cpu_exit(struct thread *td)
333 {
334 
335 	/*
336 	 * If this process has a custom LDT, release it.
337 	 */
338 	if (td->td_proc->p_md.md_ldt != NULL)
339 		user_ldt_free(td);
340 }
341 
342 void
cpu_thread_exit(struct thread * td)343 cpu_thread_exit(struct thread *td)
344 {
345 	struct pcb *pcb;
346 
347 	critical_enter();
348 	if (td == PCPU_GET(fpcurthread))
349 		fpudrop();
350 	critical_exit();
351 
352 	pcb = td->td_pcb;
353 
354 	/* Disable any hardware breakpoints. */
355 	if (pcb->pcb_flags & PCB_DBREGS) {
356 		reset_dbregs();
357 		clear_pcb_flags(pcb, PCB_DBREGS);
358 	}
359 }
360 
361 void
cpu_thread_clean(struct thread * td)362 cpu_thread_clean(struct thread *td)
363 {
364 	struct pcb *pcb;
365 
366 	pcb = td->td_pcb;
367 
368 	/*
369 	 * Clean TSS/iomap
370 	 */
371 	if (pcb->pcb_tssp != NULL) {
372 		pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp,
373 		    (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1));
374 		kmem_free(pcb->pcb_tssp, ctob(IOPAGES + 1));
375 		pcb->pcb_tssp = NULL;
376 	}
377 }
378 
379 void
cpu_thread_alloc(struct thread * td)380 cpu_thread_alloc(struct thread *td)
381 {
382 	struct pcb *pcb;
383 	struct xstate_hdr *xhdr;
384 
385 	set_top_of_stack_td(td);
386 	td->td_pcb = pcb = get_pcb_td(td);
387 	td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
388 	td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
389 	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
390 	if (use_xsave) {
391 		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
392 		bzero(xhdr, sizeof(*xhdr));
393 		xhdr->xstate_bv = xsave_mask;
394 	}
395 }
396 
397 void
cpu_thread_free(struct thread * td)398 cpu_thread_free(struct thread *td)
399 {
400 	cpu_thread_clean(td);
401 
402 	fpu_save_area_free(td->td_md.md_usr_fpu_save);
403 	td->td_md.md_usr_fpu_save = NULL;
404 }
405 
406 bool
cpu_exec_vmspace_reuse(struct proc * p,vm_map_t map)407 cpu_exec_vmspace_reuse(struct proc *p, vm_map_t map)
408 {
409 
410 	return (((curproc->p_md.md_flags & P_MD_KPTI) != 0) ==
411 	    (vm_map_pmap(map)->pm_ucr3 != PMAP_NO_CR3));
412 }
413 
414 static void
cpu_procctl_kpti_ctl(struct proc * p,int val)415 cpu_procctl_kpti_ctl(struct proc *p, int val)
416 {
417 
418 	if (pti && val == PROC_KPTI_CTL_ENABLE_ON_EXEC)
419 		p->p_md.md_flags |= P_MD_KPTI;
420 	if (val == PROC_KPTI_CTL_DISABLE_ON_EXEC)
421 		p->p_md.md_flags &= ~P_MD_KPTI;
422 }
423 
424 static void
cpu_procctl_kpti_status(struct proc * p,int * val)425 cpu_procctl_kpti_status(struct proc *p, int *val)
426 {
427 	*val = (p->p_md.md_flags & P_MD_KPTI) != 0 ?
428 	    PROC_KPTI_CTL_ENABLE_ON_EXEC:
429 	    PROC_KPTI_CTL_DISABLE_ON_EXEC;
430 	if (vmspace_pmap(p->p_vmspace)->pm_ucr3 != PMAP_NO_CR3)
431 		*val |= PROC_KPTI_STATUS_ACTIVE;
432 }
433 
434 static int
cpu_procctl_la_ctl(struct proc * p,int val)435 cpu_procctl_la_ctl(struct proc *p, int val)
436 {
437 	int error;
438 
439 	error = 0;
440 	switch (val) {
441 	case PROC_LA_CTL_LA48_ON_EXEC:
442 		p->p_md.md_flags |= P_MD_LA48;
443 		p->p_md.md_flags &= ~P_MD_LA57;
444 		break;
445 	case PROC_LA_CTL_LA57_ON_EXEC:
446 		if (la57) {
447 			p->p_md.md_flags &= ~P_MD_LA48;
448 			p->p_md.md_flags |= P_MD_LA57;
449 		} else {
450 			error = ENOTSUP;
451 		}
452 		break;
453 	case PROC_LA_CTL_DEFAULT_ON_EXEC:
454 		p->p_md.md_flags &= ~(P_MD_LA48 | P_MD_LA57);
455 		break;
456 	}
457 	return (error);
458 }
459 
460 static void
cpu_procctl_la_status(struct proc * p,int * val)461 cpu_procctl_la_status(struct proc *p, int *val)
462 {
463 	int res;
464 
465 	if ((p->p_md.md_flags & P_MD_LA48) != 0)
466 		res = PROC_LA_CTL_LA48_ON_EXEC;
467 	else if ((p->p_md.md_flags & P_MD_LA57) != 0)
468 		res = PROC_LA_CTL_LA57_ON_EXEC;
469 	else
470 		res = PROC_LA_CTL_DEFAULT_ON_EXEC;
471 	if (p->p_sysent->sv_maxuser == VM_MAXUSER_ADDRESS_LA48)
472 		res |= PROC_LA_STATUS_LA48;
473 	else
474 		res |= PROC_LA_STATUS_LA57;
475 	*val = res;
476 }
477 
478 int
cpu_procctl(struct thread * td,int idtype,id_t id,int com,void * data)479 cpu_procctl(struct thread *td, int idtype, id_t id, int com, void *data)
480 {
481 	struct proc *p;
482 	int error, val;
483 
484 	switch (com) {
485 	case PROC_KPTI_CTL:
486 	case PROC_KPTI_STATUS:
487 	case PROC_LA_CTL:
488 	case PROC_LA_STATUS:
489 		if (idtype != P_PID) {
490 			error = EINVAL;
491 			break;
492 		}
493 		if (com == PROC_KPTI_CTL) {
494 			/* sad but true and not a joke */
495 			error = priv_check(td, PRIV_IO);
496 			if (error != 0)
497 				break;
498 		}
499 		if (com == PROC_KPTI_CTL || com == PROC_LA_CTL) {
500 			error = copyin(data, &val, sizeof(val));
501 			if (error != 0)
502 				break;
503 		}
504 		if (com == PROC_KPTI_CTL &&
505 		    val != PROC_KPTI_CTL_ENABLE_ON_EXEC &&
506 		    val != PROC_KPTI_CTL_DISABLE_ON_EXEC) {
507 			error = EINVAL;
508 			break;
509 		}
510 		if (com == PROC_LA_CTL &&
511 		    val != PROC_LA_CTL_LA48_ON_EXEC &&
512 		    val != PROC_LA_CTL_LA57_ON_EXEC &&
513 		    val != PROC_LA_CTL_DEFAULT_ON_EXEC) {
514 			error = EINVAL;
515 			break;
516 		}
517 		error = pget(id, PGET_CANSEE | PGET_NOTWEXIT | PGET_NOTID, &p);
518 		if (error != 0)
519 			break;
520 		switch (com) {
521 		case PROC_KPTI_CTL:
522 			cpu_procctl_kpti_ctl(p, val);
523 			break;
524 		case PROC_KPTI_STATUS:
525 			cpu_procctl_kpti_status(p, &val);
526 			break;
527 		case PROC_LA_CTL:
528 			error = cpu_procctl_la_ctl(p, val);
529 			break;
530 		case PROC_LA_STATUS:
531 			cpu_procctl_la_status(p, &val);
532 			break;
533 		}
534 		PROC_UNLOCK(p);
535 		if (com == PROC_KPTI_STATUS || com == PROC_LA_STATUS)
536 			error = copyout(&val, data, sizeof(val));
537 		break;
538 	default:
539 		error = EINVAL;
540 		break;
541 	}
542 	return (error);
543 }
544 
545 void
cpu_set_syscall_retval(struct thread * td,int error)546 cpu_set_syscall_retval(struct thread *td, int error)
547 {
548 	struct trapframe *frame;
549 
550 	frame = td->td_frame;
551 	if (__predict_true(error == 0)) {
552 		frame->tf_rax = td->td_retval[0];
553 		frame->tf_rdx = td->td_retval[1];
554 		frame->tf_rflags &= ~PSL_C;
555 		return;
556 	}
557 
558 	switch (error) {
559 	case ERESTART:
560 		/*
561 		 * Reconstruct pc, we know that 'syscall' is 2 bytes,
562 		 * lcall $X,y is 7 bytes, int 0x80 is 2 bytes.
563 		 * We saved this in tf_err.
564 		 * %r10 (which was holding the value of %rcx) is restored
565 		 * for the next iteration.
566 		 * %r10 restore is only required for freebsd/amd64 processes,
567 		 * but shall be innocent for any ia32 ABI.
568 		 *
569 		 * Require full context restore to get the arguments
570 		 * in the registers reloaded at return to usermode.
571 		 */
572 		frame->tf_rip -= frame->tf_err;
573 		frame->tf_r10 = frame->tf_rcx;
574 		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
575 		break;
576 
577 	case EJUSTRETURN:
578 		break;
579 
580 	default:
581 		frame->tf_rax = error;
582 		frame->tf_rflags |= PSL_C;
583 		break;
584 	}
585 }
586 
587 /*
588  * Initialize machine state, mostly pcb and trap frame for a new
589  * thread, about to return to userspace.  Put enough state in the new
590  * thread's PCB to get it to go back to the fork_return(), which
591  * finalizes the thread state and handles peculiarities of the first
592  * return to userspace for the new thread.
593  */
594 void
cpu_copy_thread(struct thread * td,struct thread * td0)595 cpu_copy_thread(struct thread *td, struct thread *td0)
596 {
597 	copy_thread(td0, td);
598 
599 	set_pcb_flags_raw(td->td_pcb, PCB_FULL_IRET);
600 }
601 
602 /*
603  * Set that machine state for performing an upcall that starts
604  * the entry function with the given argument.
605  */
606 int
cpu_set_upcall(struct thread * td,void (* entry)(void *),void * arg,stack_t * stack)607 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
608     stack_t *stack)
609 {
610 #ifdef COMPAT_FREEBSD32
611 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
612 		/*
613 		 * Set the trap frame to point at the beginning of the entry
614 		 * function.
615 		 */
616 		td->td_frame->tf_rbp = 0;
617 		td->td_frame->tf_rsp =
618 		   (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
619 		td->td_frame->tf_rip = (uintptr_t)entry;
620 
621 		/* Return address sentinel value to stop stack unwinding. */
622 		if (suword32((void *)td->td_frame->tf_rsp, 0) != 0)
623 			return (EFAULT);
624 
625 		/* Pass the argument to the entry point. */
626 		if (suword32(
627 		    (void *)(td->td_frame->tf_rsp + sizeof(int32_t)),
628 		    (uint32_t)(uintptr_t)arg) != 0)
629 			return (EFAULT);
630 		return (0);
631 	}
632 #endif
633 
634 	/*
635 	 * Set the trap frame to point at the beginning of the uts
636 	 * function.
637 	 */
638 	td->td_frame->tf_rbp = 0;
639 	td->td_frame->tf_rsp =
640 	    ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
641 	td->td_frame->tf_rsp -= 8;
642 	td->td_frame->tf_rip = (register_t)entry;
643 	td->td_frame->tf_ds = _udatasel;
644 	td->td_frame->tf_es = _udatasel;
645 	td->td_frame->tf_fs = _ufssel;
646 	td->td_frame->tf_gs = _ugssel;
647 	td->td_frame->tf_flags = TF_HASSEGS;
648 
649 	/* Return address sentinel value to stop stack unwinding. */
650 	if (suword((void *)td->td_frame->tf_rsp, 0) != 0)
651 		return (EFAULT);
652 
653 	/* Pass the argument to the entry point. */
654 	td->td_frame->tf_rdi = (register_t)arg;
655 
656 	return (0);
657 }
658 
659 int
cpu_set_user_tls(struct thread * td,void * tls_base,int thr_flags)660 cpu_set_user_tls(struct thread *td, void *tls_base, int thr_flags)
661 {
662 	struct pcb *pcb;
663 
664 	if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS)
665 		return (EINVAL);
666 
667 	pcb = td->td_pcb;
668 	set_pcb_flags(pcb, PCB_FULL_IRET | ((thr_flags &
669 	    THR_C_RUNTIME) != 0 ? PCB_TLSBASE : 0));
670 #ifdef COMPAT_FREEBSD32
671 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
672 		pcb->pcb_gsbase = (register_t)tls_base;
673 		return (0);
674 	}
675 #endif
676 	pcb->pcb_fsbase = pcb->pcb_tlsbase = (register_t)tls_base;
677 	return (0);
678 }
679 
680 void
cpu_update_pcb(struct thread * td)681 cpu_update_pcb(struct thread *td)
682 {
683 	MPASS(td == curthread);
684 	update_pcb_bases(td->td_pcb);
685 }
686