1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * Copyright 2023 Oxide Computer Company
32 */
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/vmparam.h>
37 #include <sys/systm.h>
38 #include <sys/signal.h>
39 #include <sys/stack.h>
40 #include <sys/regset.h>
41 #include <sys/privregs.h>
42 #include <sys/frame.h>
43 #include <sys/proc.h>
44 #include <sys/brand.h>
45 #include <sys/psw.h>
46 #include <sys/ucontext.h>
47 #include <sys/asm_linkage.h>
48 #include <sys/errno.h>
49 #include <sys/archsystm.h>
50 #include <sys/schedctl.h>
51 #include <sys/debug.h>
52 #include <sys/sysmacros.h>
53
54 /*
55 * This is a wrapper around copyout_noerr that returns a guaranteed error code.
56 * Because we're using copyout_noerr(), we need to bound the time we're under an
57 * on_fault/no_fault and attempt to do so only while we're actually copying data
58 * out. The main reason for this is because we're being called back from the
59 * FPU, which is being held with a kpreempt_disable() and related, we can't use
60 * a larger on_fault()/no_fault() as that would both hide legitimate errors we
61 * make, masquerading as user issues, and it gets trickier to reason about the
62 * correct restoration of our state.
63 */
64 static int
savecontext_copyout(const void * kaddr,void * uaddr,size_t size)65 savecontext_copyout(const void *kaddr, void *uaddr, size_t size)
66 {
67 label_t ljb;
68 if (!on_fault(&ljb)) {
69 copyout_noerr(kaddr, uaddr, size);
70 no_fault();
71 return (0);
72 } else {
73 no_fault();
74 return (EFAULT);
75 }
76 }
77
78 /*
79 * Save user context.
80 *
81 * ucp is itself always a pointer to the kernel's copy of a ucontext_t. In the
82 * traditional version of this (when flags is 0), then we just write and fill
83 * out all of the ucontext_t without any care for what was there ahead of this.
84 * Our callers are responsible for coyping out that state if required. When
85 * there is extended state to deal with (flags include SAVECTXT_F_EXTD), our
86 * callers will have already copied in and pre-populated the structure with
87 * values from userland. When those pointers are non-zero then we will copy out
88 * that extended state directly to the user pointer. Currently this is only done
89 * for uc_xsave. Even when we perform this, the rest of the structure stays as
90 * is.
91 *
92 * We allow the copying to happen in two different ways mostly because this is
93 * also used in the signal handling context where we must be much more careful
94 * about how to copy out data.
95 */
96 int
savecontext(ucontext_t * ucp,const k_sigset_t * mask,savecontext_flags_t flags)97 savecontext(ucontext_t *ucp, const k_sigset_t *mask, savecontext_flags_t flags)
98 {
99 proc_t *p = ttoproc(curthread);
100 klwp_t *lwp = ttolwp(curthread);
101 struct regs *rp = lwptoregs(lwp);
102 boolean_t need_xsave = B_FALSE;
103 boolean_t fpu_en;
104 long user_xsave = 0;
105 int ret;
106
107 VERIFY0(flags & ~(SAVECTXT_F_EXTD | SAVECTXT_F_ONFAULT));
108
109 /*
110 * We unconditionally assign to every field through the end
111 * of the gregs, but we need to bzero() everything -after- that
112 * to avoid having any kernel stack garbage escape to userland.
113 *
114 * If we have been asked to save extended state, then we must make sure
115 * that we don't clobber that value. We must also determine if the
116 * processor has xsave state. If it does not, then we just simply honor
117 * the pointer, but do not write anything out and do not set the flag.
118 */
119 if ((flags & SAVECTXT_F_EXTD) != 0) {
120 user_xsave = ucp->uc_xsave;
121 if (fpu_xsave_enabled() && user_xsave != 0) {
122 need_xsave = B_TRUE;
123 }
124 } else {
125 /*
126 * The only other flag that we have right now is about modifying
127 * the copyout behavior when we're copying out extended
128 * information. If it's not here, we should not do anything.
129 */
130 VERIFY0(flags);
131 }
132 bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext_t) -
133 offsetof(ucontext_t, uc_mcontext.fpregs));
134 ucp->uc_xsave = user_xsave;
135
136 ucp->uc_flags = UC_ALL;
137 ucp->uc_link = (struct ucontext *)lwp->lwp_oldcontext;
138
139 /*
140 * Try to copyin() the ustack if one is registered. If the stack
141 * has zero size, this indicates that stack bounds checking has
142 * been disabled for this LWP. If stack bounds checking is disabled
143 * or the copyin() fails, we fall back to the legacy behavior.
144 */
145 if (lwp->lwp_ustack == (uintptr_t)NULL ||
146 copyin((void *)lwp->lwp_ustack, &ucp->uc_stack,
147 sizeof (ucp->uc_stack)) != 0 ||
148 ucp->uc_stack.ss_size == 0) {
149
150 if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) {
151 ucp->uc_stack = lwp->lwp_sigaltstack;
152 } else {
153 ucp->uc_stack.ss_sp = p->p_usrstack - p->p_stksize;
154 ucp->uc_stack.ss_size = p->p_stksize;
155 ucp->uc_stack.ss_flags = 0;
156 }
157 }
158
159 /*
160 * If either the trace flag or REQUEST_STEP is set,
161 * arrange for single-stepping and turn off the trace flag.
162 */
163 if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) {
164 /*
165 * Clear PS_T so that saved user context won't have trace
166 * flag set.
167 */
168 rp->r_ps &= ~PS_T;
169
170 if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) {
171 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
172 /*
173 * trap() always checks DEBUG_PENDING before
174 * checking for any pending signal. This at times
175 * can potentially lead to DEBUG_PENDING not being
176 * honoured. (for eg: the lwp is stopped by
177 * stop_on_fault() called from trap(), after being
178 * awakened it might see a pending signal and call
179 * savecontext(), however on the way back to userland
180 * there is no place it can be detected). Hence in
181 * anticipation of such occasions, set AST flag for
182 * the thread which will make the thread take an
183 * excursion through trap() where it will be handled
184 * appropriately.
185 */
186 aston(curthread);
187 }
188 }
189
190 getgregs(lwp, ucp->uc_mcontext.gregs);
191 fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
192 if (fpu_en)
193 getfpregs(lwp, &ucp->uc_mcontext.fpregs);
194 else
195 ucp->uc_flags &= ~UC_FPU;
196
197 sigktou(mask, &ucp->uc_sigmask);
198
199 /*
200 * Determine if we need to get the rest of the xsave context out here.
201 * If the thread doesn't actually have the FPU enabled, then we don't
202 * actually need to do this. We also don't have to if it wasn't
203 * requested.
204 */
205 if (!need_xsave || !fpu_en) {
206 return (0);
207 }
208
209 ucp->uc_flags |= UC_XSAVE;
210
211 /*
212 * While you might be asking why and contemplating despair, just know
213 * that some things need to just be done in the face of signal (half the
214 * reason this function exists). Basically when in signal context we
215 * can't trigger watch points. This means we need to tell the FPU copy
216 * logic to actually use the on_fault/no_fault and the non-error form of
217 * copyout (which still checks if it's a user address at least).
218 */
219 if ((flags & SAVECTXT_F_ONFAULT) != 0) {
220 ret = fpu_signal_copyout(lwp, ucp->uc_xsave,
221 savecontext_copyout);
222 } else {
223 ret = fpu_signal_copyout(lwp, ucp->uc_xsave, copyout);
224 }
225
226 return (ret);
227 }
228
229 /*
230 * Restore user context.
231 */
232 void
restorecontext(ucontext_t * ucp)233 restorecontext(ucontext_t *ucp)
234 {
235 kthread_t *t = curthread;
236 klwp_t *lwp = ttolwp(t);
237
238 lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link;
239
240 if (ucp->uc_flags & UC_STACK) {
241 if (ucp->uc_stack.ss_flags == SS_ONSTACK)
242 lwp->lwp_sigaltstack = ucp->uc_stack;
243 else
244 lwp->lwp_sigaltstack.ss_flags &= ~SS_ONSTACK;
245 }
246
247 if (ucp->uc_flags & UC_CPU) {
248 /*
249 * If the trace flag is set, mark the lwp to take a
250 * single-step trap on return to user level (below).
251 * The x86 lcall interface and sysenter has already done this,
252 * and turned off the flag, but amd64 syscall interface has not.
253 */
254 if (lwptoregs(lwp)->r_ps & PS_T)
255 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
256 setgregs(lwp, ucp->uc_mcontext.gregs);
257 lwp->lwp_eosys = JUSTRETURN;
258 t->t_post_sys = 1;
259 aston(curthread);
260 }
261
262 /*
263 * The logic to copy in the ucontex_t takes care of combining the UC_FPU
264 * and UC_XSAVE, so at this point only one of them should be set, if
265 * any.
266 */
267 if (ucp->uc_flags & UC_XSAVE) {
268 ASSERT0(ucp->uc_flags & UC_FPU);
269 ASSERT3U((uintptr_t)ucp->uc_xsave, >=, _kernelbase);
270 fpu_set_xsave(lwp, (const void *)ucp->uc_xsave);
271 } else if (ucp->uc_flags & UC_FPU) {
272 setfpregs(lwp, &ucp->uc_mcontext.fpregs);
273 }
274
275 if (ucp->uc_flags & UC_SIGMASK) {
276 /*
277 * We don't need to acquire p->p_lock here;
278 * we are manipulating thread-private data.
279 */
280 schedctl_finish_sigblock(t);
281 sigutok(&ucp->uc_sigmask, &t->t_hold);
282 if (sigcheck(ttoproc(t), t))
283 t->t_sig_check = 1;
284 }
285 }
286
287
288 int
getsetcontext(int flag,void * arg)289 getsetcontext(int flag, void *arg)
290 {
291 ucontext_t uc;
292 ucontext_t *ucp;
293 klwp_t *lwp = ttolwp(curthread);
294 void *fpu = NULL;
295 stack_t dummy_stk;
296 int ret;
297
298 /*
299 * In future releases, when the ucontext structure grows,
300 * getcontext should be modified to only return the fields
301 * specified in the uc_flags. That way, the structure can grow
302 * and still be binary compatible will all .o's which will only
303 * have old fields defined in uc_flags
304 */
305
306 switch (flag) {
307 default:
308 return (set_errno(EINVAL));
309
310 case GETCONTEXT:
311 schedctl_finish_sigblock(curthread);
312 ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
313 if (ret != 0)
314 return (set_errno(ret));
315 if (uc.uc_flags & UC_SIGMASK)
316 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
317 if (copyout(&uc, arg, sizeof (uc)))
318 return (set_errno(EFAULT));
319 return (0);
320
321 /*
322 * In the case of GETCONTEXT_EXTD, we've theoretically been given all
323 * the required pointers of the appropriate length by libc in the
324 * ucontext_t. We must first copyin the offsets that we care about to
325 * seed the known extensions. Right now that is just the uc_xsave
326 * member. As we are setting uc_flags, we only look at the members we
327 * need to care about.
328 *
329 * The main reason that we have a different entry point is that we don't
330 * want to assume that callers have always properly zeroed their
331 * ucontext_t ahead of calling into libc. In fact, it often is just
332 * declared on the stack so we can't assume that at all. Instead,
333 * getcontext_extd does require that.
334 */
335 case GETCONTEXT_EXTD:
336 schedctl_finish_sigblock(curthread);
337 ucp = arg;
338 if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
339 sizeof (uc.uc_xsave)) != 0) {
340 return (set_errno(EFAULT));
341 }
342 ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
343 if (ret != 0)
344 return (set_errno(ret));
345 if (uc.uc_flags & UC_SIGMASK)
346 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
347 if (copyout(&uc, arg, sizeof (uc)))
348 return (set_errno(EFAULT));
349 return (0);
350
351
352 case SETCONTEXT:
353 ucp = arg;
354 if (ucp == NULL)
355 exit(CLD_EXITED, 0);
356 /*
357 * Don't copyin filler or floating state unless we need it.
358 * The ucontext_t struct and fields are specified in the ABI.
359 */
360 if (copyin(ucp, &uc, offsetof(ucontext_t, uc_filler) -
361 sizeof (uc.uc_mcontext.fpregs))) {
362 return (set_errno(EFAULT));
363 }
364 if (uc.uc_flags & UC_SIGMASK)
365 SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
366
367 if ((uc.uc_flags & UC_FPU) &&
368 copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs,
369 sizeof (uc.uc_mcontext.fpregs))) {
370 return (set_errno(EFAULT));
371 }
372
373 uc.uc_xsave = 0;
374 if ((uc.uc_flags & UC_XSAVE) != 0) {
375 int ret;
376
377 if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
378 sizeof (uc.uc_xsave)) != 0) {
379 return (set_errno(EFAULT));
380 }
381
382 ret = fpu_signal_copyin(lwp, &uc);
383 if (ret != 0) {
384 return (set_errno(ret));
385 }
386 }
387
388 restorecontext(&uc);
389
390 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
391 (void) copyout(&uc.uc_stack, (stack_t *)lwp->lwp_ustack,
392 sizeof (uc.uc_stack));
393 return (0);
394
395 case GETUSTACK:
396 if (copyout(&lwp->lwp_ustack, arg, sizeof (caddr_t)))
397 return (set_errno(EFAULT));
398 return (0);
399
400 case SETUSTACK:
401 if (copyin(arg, &dummy_stk, sizeof (dummy_stk)))
402 return (set_errno(EFAULT));
403 lwp->lwp_ustack = (uintptr_t)arg;
404 return (0);
405 }
406 }
407
408 #ifdef _SYSCALL32_IMPL
409
410 /*
411 * Save user context for 32-bit processes.
412 */
413 int
savecontext32(ucontext32_t * ucp,const k_sigset_t * mask,savecontext_flags_t flags)414 savecontext32(ucontext32_t *ucp, const k_sigset_t *mask,
415 savecontext_flags_t flags)
416 {
417 proc_t *p = ttoproc(curthread);
418 klwp_t *lwp = ttolwp(curthread);
419 struct regs *rp = lwptoregs(lwp);
420 boolean_t need_xsave = B_FALSE;
421 boolean_t fpu_en;
422 int32_t user_xsave = 0;
423 uintptr_t uaddr;
424 int ret;
425
426 /*
427 * See savecontext for an explanation of this.
428 */
429 if ((flags & SAVECTXT_F_EXTD) != 0) {
430 user_xsave = ucp->uc_xsave;
431 if (fpu_xsave_enabled() && user_xsave != 0) {
432 need_xsave = B_TRUE;
433 }
434 } else {
435 VERIFY0(flags);
436 }
437 bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext32_t) -
438 offsetof(ucontext32_t, uc_mcontext.fpregs));
439 ucp->uc_xsave = user_xsave;
440
441 ucp->uc_flags = UC_ALL;
442 ucp->uc_link = (caddr32_t)lwp->lwp_oldcontext;
443
444 if (lwp->lwp_ustack == (uintptr_t)NULL ||
445 copyin((void *)lwp->lwp_ustack, &ucp->uc_stack,
446 sizeof (ucp->uc_stack)) != 0 ||
447 ucp->uc_stack.ss_size == 0) {
448
449 if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) {
450 ucp->uc_stack.ss_sp =
451 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
452 ucp->uc_stack.ss_size =
453 (size32_t)lwp->lwp_sigaltstack.ss_size;
454 ucp->uc_stack.ss_flags = SS_ONSTACK;
455 } else {
456 ucp->uc_stack.ss_sp = (caddr32_t)(uintptr_t)
457 (p->p_usrstack - p->p_stksize);
458 ucp->uc_stack.ss_size = (size32_t)p->p_stksize;
459 ucp->uc_stack.ss_flags = 0;
460 }
461 }
462
463 /*
464 * If either the trace flag or REQUEST_STEP is set, arrange
465 * for single-stepping and turn off the trace flag.
466 */
467 if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) {
468 /*
469 * Clear PS_T so that saved user context won't have trace
470 * flag set.
471 */
472 rp->r_ps &= ~PS_T;
473
474 if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) {
475 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
476 /*
477 * See comments in savecontext().
478 */
479 aston(curthread);
480 }
481 }
482
483 getgregs32(lwp, ucp->uc_mcontext.gregs);
484 fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
485 if (fpu_en)
486 getfpregs32(lwp, &ucp->uc_mcontext.fpregs);
487 else
488 ucp->uc_flags &= ~UC_FPU;
489
490 sigktou(mask, &ucp->uc_sigmask);
491
492 if (!need_xsave || !fpu_en) {
493 return (0);
494 }
495
496 ucp->uc_flags |= UC_XSAVE;
497
498 /*
499 * Due to not wanting to change or break programs, the filler in the
500 * ucontext_t was always declared as a long, which is signed. Because
501 * this is the 32-bit version, this is an int32_t. We cannot directly go
502 * to a uintptr_t otherwise we might get sign extension, so we first
503 * have to go through a uint32_t and then a uintptr_t. Otherwise, see
504 * savecontext().
505 */
506 uaddr = (uintptr_t)(uint32_t)ucp->uc_xsave;
507 if ((flags & SAVECTXT_F_ONFAULT) != 0) {
508 ret = fpu_signal_copyout(lwp, uaddr, savecontext_copyout);
509 } else {
510 ret = fpu_signal_copyout(lwp, uaddr, copyout);
511 }
512
513 return (ret);
514 }
515
516 int
getsetcontext32(int flag,void * arg)517 getsetcontext32(int flag, void *arg)
518 {
519 ucontext32_t uc;
520 ucontext_t ucnat;
521 ucontext32_t *ucp;
522 klwp_t *lwp = ttolwp(curthread);
523 caddr32_t ustack32;
524 stack32_t dummy_stk32;
525 int ret;
526
527 switch (flag) {
528 default:
529 return (set_errno(EINVAL));
530
531 case GETCONTEXT:
532 schedctl_finish_sigblock(curthread);
533 ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
534 if (ret != 0)
535 return (set_errno(ret));
536 if (uc.uc_flags & UC_SIGMASK)
537 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
538 if (copyout(&uc, arg, sizeof (uc)))
539 return (set_errno(EFAULT));
540 return (0);
541
542 /*
543 * See getsetcontext() for an explanation of what is going on here.
544 */
545 case GETCONTEXT_EXTD:
546 schedctl_finish_sigblock(curthread);
547 ucp = arg;
548 if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
549 sizeof (uc.uc_xsave)) != 0) {
550 return (set_errno(EFAULT));
551 }
552 ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
553 if (ret != 0)
554 return (set_errno(ret));
555 if (uc.uc_flags & UC_SIGMASK)
556 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
557 if (copyout(&uc, arg, sizeof (uc)))
558 return (set_errno(EFAULT));
559 return (0);
560
561 case SETCONTEXT:
562 ucp = arg;
563 if (ucp == NULL)
564 exit(CLD_EXITED, 0);
565 if (copyin(ucp, &uc, offsetof(ucontext32_t, uc_filler) -
566 sizeof (uc.uc_mcontext.fpregs))) {
567 return (set_errno(EFAULT));
568 }
569 if (uc.uc_flags & UC_SIGMASK)
570 SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
571 if ((uc.uc_flags & UC_FPU) &&
572 copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs,
573 sizeof (uc.uc_mcontext.fpregs))) {
574 return (set_errno(EFAULT));
575 }
576
577 uc.uc_xsave = 0;
578 if ((uc.uc_flags & UC_XSAVE) != 0 &&
579 copyin(&ucp->uc_xsave, &uc.uc_xsave,
580 sizeof (uc.uc_xsave)) != 0) {
581 return (set_errno(EFAULT));
582 }
583
584 ucontext_32ton(&uc, &ucnat);
585
586 if ((ucnat.uc_flags & UC_XSAVE) != 0) {
587 int ret = fpu_signal_copyin(lwp, &ucnat);
588 if (ret != 0) {
589 return (set_errno(ret));
590 }
591 }
592
593 restorecontext(&ucnat);
594
595 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
596 (void) copyout(&uc.uc_stack,
597 (stack32_t *)lwp->lwp_ustack, sizeof (uc.uc_stack));
598 return (0);
599
600 case GETUSTACK:
601 ustack32 = (caddr32_t)lwp->lwp_ustack;
602 if (copyout(&ustack32, arg, sizeof (ustack32)))
603 return (set_errno(EFAULT));
604 return (0);
605
606 case SETUSTACK:
607 if (copyin(arg, &dummy_stk32, sizeof (dummy_stk32)))
608 return (set_errno(EFAULT));
609 lwp->lwp_ustack = (uintptr_t)arg;
610 return (0);
611 }
612 }
613
614 #endif /* _SYSCALL32_IMPL */
615