1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Copyright 2023 Oxide Computer Company 32 */ 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/vmparam.h> 37 #include <sys/systm.h> 38 #include <sys/signal.h> 39 #include <sys/stack.h> 40 #include <sys/regset.h> 41 #include <sys/privregs.h> 42 #include <sys/frame.h> 43 #include <sys/proc.h> 44 #include <sys/brand.h> 45 #include <sys/psw.h> 46 #include <sys/ucontext.h> 47 #include <sys/asm_linkage.h> 48 #include <sys/errno.h> 49 #include <sys/archsystm.h> 50 #include <sys/schedctl.h> 51 #include <sys/debug.h> 52 #include <sys/sysmacros.h> 53 54 /* 55 * This is a wrapper around copyout_noerr that returns a guaranteed error code. 56 * Because we're using copyout_noerr(), we need to bound the time we're under an 57 * on_fault/no_fault and attempt to do so only while we're actually copying data 58 * out. The main reason for this is because we're being called back from the 59 * FPU, which is being held with a kpreempt_disable() and related, we can't use 60 * a larger on_fault()/no_fault() as that would both hide legitimate errors we 61 * make, masquerading as user issues, and it gets trickier to reason about the 62 * correct restoration of our state. 63 */ 64 static int 65 savecontext_copyout(const void *kaddr, void *uaddr, size_t size) 66 { 67 label_t ljb; 68 if (!on_fault(&ljb)) { 69 copyout_noerr(kaddr, uaddr, size); 70 no_fault(); 71 return (0); 72 } else { 73 no_fault(); 74 return (EFAULT); 75 } 76 } 77 78 /* 79 * Save user context. 80 * 81 * ucp is itself always a pointer to the kernel's copy of a ucontext_t. In the 82 * traditional version of this (when flags is 0), then we just write and fill 83 * out all of the ucontext_t without any care for what was there ahead of this. 84 * Our callers are responsible for coyping out that state if required. When 85 * there is extended state to deal with (flags include SAVECTXT_F_EXTD), our 86 * callers will have already copied in and pre-populated the structure with 87 * values from userland. When those pointers are non-zero then we will copy out 88 * that extended state directly to the user pointer. Currently this is only done 89 * for uc_xsave. Even when we perform this, the rest of the structure stays as 90 * is. 91 * 92 * We allow the copying to happen in two different ways mostly because this is 93 * also used in the signal handling context where we must be much more careful 94 * about how to copy out data. 95 */ 96 int 97 savecontext(ucontext_t *ucp, const k_sigset_t *mask, savecontext_flags_t flags) 98 { 99 proc_t *p = ttoproc(curthread); 100 klwp_t *lwp = ttolwp(curthread); 101 struct regs *rp = lwptoregs(lwp); 102 boolean_t need_xsave = B_FALSE; 103 boolean_t fpu_en; 104 long user_xsave = 0; 105 int ret; 106 107 VERIFY0(flags & ~(SAVECTXT_F_EXTD | SAVECTXT_F_ONFAULT)); 108 109 /* 110 * We unconditionally assign to every field through the end 111 * of the gregs, but we need to bzero() everything -after- that 112 * to avoid having any kernel stack garbage escape to userland. 113 * 114 * If we have been asked to save extended state, then we must make sure 115 * that we don't clobber that value. We must also determine if the 116 * processor has xsave state. If it does not, then we just simply honor 117 * the pointer, but do not write anything out and do not set the flag. 118 */ 119 if ((flags & SAVECTXT_F_EXTD) != 0) { 120 user_xsave = ucp->uc_xsave; 121 if (fpu_xsave_enabled() && user_xsave != 0) { 122 need_xsave = B_TRUE; 123 } 124 } else { 125 /* 126 * The only other flag that we have right now is about modifying 127 * the copyout behavior when we're copying out extended 128 * information. If it's not here, we should not do anything. 129 */ 130 VERIFY0(flags); 131 } 132 bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext_t) - 133 offsetof(ucontext_t, uc_mcontext.fpregs)); 134 ucp->uc_xsave = user_xsave; 135 136 ucp->uc_flags = UC_ALL; 137 ucp->uc_link = (struct ucontext *)lwp->lwp_oldcontext; 138 139 /* 140 * Try to copyin() the ustack if one is registered. If the stack 141 * has zero size, this indicates that stack bounds checking has 142 * been disabled for this LWP. If stack bounds checking is disabled 143 * or the copyin() fails, we fall back to the legacy behavior. 144 */ 145 if (lwp->lwp_ustack == (uintptr_t)NULL || 146 copyin((void *)lwp->lwp_ustack, &ucp->uc_stack, 147 sizeof (ucp->uc_stack)) != 0 || 148 ucp->uc_stack.ss_size == 0) { 149 150 if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) { 151 ucp->uc_stack = lwp->lwp_sigaltstack; 152 } else { 153 ucp->uc_stack.ss_sp = p->p_usrstack - p->p_stksize; 154 ucp->uc_stack.ss_size = p->p_stksize; 155 ucp->uc_stack.ss_flags = 0; 156 } 157 } 158 159 /* 160 * If either the trace flag or REQUEST_STEP is set, 161 * arrange for single-stepping and turn off the trace flag. 162 */ 163 if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) { 164 /* 165 * Clear PS_T so that saved user context won't have trace 166 * flag set. 167 */ 168 rp->r_ps &= ~PS_T; 169 170 if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) { 171 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; 172 /* 173 * trap() always checks DEBUG_PENDING before 174 * checking for any pending signal. This at times 175 * can potentially lead to DEBUG_PENDING not being 176 * honoured. (for eg: the lwp is stopped by 177 * stop_on_fault() called from trap(), after being 178 * awakened it might see a pending signal and call 179 * savecontext(), however on the way back to userland 180 * there is no place it can be detected). Hence in 181 * anticipation of such occasions, set AST flag for 182 * the thread which will make the thread take an 183 * excursion through trap() where it will be handled 184 * appropriately. 185 */ 186 aston(curthread); 187 } 188 } 189 190 getgregs(lwp, ucp->uc_mcontext.gregs); 191 fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0; 192 if (fpu_en) 193 getfpregs(lwp, &ucp->uc_mcontext.fpregs); 194 else 195 ucp->uc_flags &= ~UC_FPU; 196 197 sigktou(mask, &ucp->uc_sigmask); 198 199 /* 200 * Determine if we need to get the rest of the xsave context out here. 201 * If the thread doesn't actually have the FPU enabled, then we don't 202 * actually need to do this. We also don't have to if it wasn't 203 * requested. 204 */ 205 if (!need_xsave || !fpu_en) { 206 return (0); 207 } 208 209 ucp->uc_flags |= UC_XSAVE; 210 211 /* 212 * While you might be asking why and contemplating despair, just know 213 * that some things need to just be done in the face of signal (half the 214 * reason this function exists). Basically when in signal context we 215 * can't trigger watch points. This means we need to tell the FPU copy 216 * logic to actually use the on_fault/no_fault and the non-error form of 217 * copyout (which still checks if it's a user address at least). 218 */ 219 if ((flags & SAVECTXT_F_ONFAULT) != 0) { 220 ret = fpu_signal_copyout(lwp, ucp->uc_xsave, 221 savecontext_copyout); 222 } else { 223 ret = fpu_signal_copyout(lwp, ucp->uc_xsave, copyout); 224 } 225 226 return (ret); 227 } 228 229 /* 230 * Restore user context. 231 */ 232 void 233 restorecontext(ucontext_t *ucp) 234 { 235 kthread_t *t = curthread; 236 klwp_t *lwp = ttolwp(t); 237 238 lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link; 239 240 if (ucp->uc_flags & UC_STACK) { 241 if (ucp->uc_stack.ss_flags == SS_ONSTACK) 242 lwp->lwp_sigaltstack = ucp->uc_stack; 243 else 244 lwp->lwp_sigaltstack.ss_flags &= ~SS_ONSTACK; 245 } 246 247 if (ucp->uc_flags & UC_CPU) { 248 /* 249 * If the trace flag is set, mark the lwp to take a 250 * single-step trap on return to user level (below). 251 * The x86 lcall interface and sysenter has already done this, 252 * and turned off the flag, but amd64 syscall interface has not. 253 */ 254 if (lwptoregs(lwp)->r_ps & PS_T) 255 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; 256 setgregs(lwp, ucp->uc_mcontext.gregs); 257 lwp->lwp_eosys = JUSTRETURN; 258 t->t_post_sys = 1; 259 aston(curthread); 260 } 261 262 /* 263 * The logic to copy in the ucontex_t takes care of combining the UC_FPU 264 * and UC_XSAVE, so at this point only one of them should be set, if 265 * any. 266 */ 267 if (ucp->uc_flags & UC_XSAVE) { 268 ASSERT0(ucp->uc_flags & UC_FPU); 269 ASSERT3U((uintptr_t)ucp->uc_xsave, >=, _kernelbase); 270 fpu_set_xsave(lwp, (const void *)ucp->uc_xsave); 271 } else if (ucp->uc_flags & UC_FPU) { 272 setfpregs(lwp, &ucp->uc_mcontext.fpregs); 273 } 274 275 if (ucp->uc_flags & UC_SIGMASK) { 276 /* 277 * We don't need to acquire p->p_lock here; 278 * we are manipulating thread-private data. 279 */ 280 schedctl_finish_sigblock(t); 281 sigutok(&ucp->uc_sigmask, &t->t_hold); 282 if (sigcheck(ttoproc(t), t)) 283 t->t_sig_check = 1; 284 } 285 } 286 287 288 int 289 getsetcontext(int flag, void *arg) 290 { 291 ucontext_t uc; 292 ucontext_t *ucp; 293 klwp_t *lwp = ttolwp(curthread); 294 void *fpu = NULL; 295 stack_t dummy_stk; 296 int ret; 297 298 /* 299 * In future releases, when the ucontext structure grows, 300 * getcontext should be modified to only return the fields 301 * specified in the uc_flags. That way, the structure can grow 302 * and still be binary compatible will all .o's which will only 303 * have old fields defined in uc_flags 304 */ 305 306 switch (flag) { 307 default: 308 return (set_errno(EINVAL)); 309 310 case GETCONTEXT: 311 schedctl_finish_sigblock(curthread); 312 ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_NONE); 313 if (ret != 0) 314 return (set_errno(ret)); 315 if (uc.uc_flags & UC_SIGMASK) 316 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask); 317 if (copyout(&uc, arg, sizeof (uc))) 318 return (set_errno(EFAULT)); 319 return (0); 320 321 /* 322 * In the case of GETCONTEXT_EXTD, we've theoretically been given all 323 * the required pointers of the appropriate length by libc in the 324 * ucontext_t. We must first copyin the offsets that we care about to 325 * seed the known extensions. Right now that is just the uc_xsave 326 * member. As we are setting uc_flags, we only look at the members we 327 * need to care about. 328 * 329 * The main reason that we have a different entry point is that we don't 330 * want to assume that callers have always properly zeroed their 331 * ucontext_t ahead of calling into libc. In fact, it often is just 332 * declared on the stack so we can't assume that at all. Instead, 333 * getcontext_extd does require that. 334 */ 335 case GETCONTEXT_EXTD: 336 schedctl_finish_sigblock(curthread); 337 ucp = arg; 338 if (copyin(&ucp->uc_xsave, &uc.uc_xsave, 339 sizeof (uc.uc_xsave)) != 0) { 340 return (set_errno(EFAULT)); 341 } 342 ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_EXTD); 343 if (ret != 0) 344 return (set_errno(ret)); 345 if (uc.uc_flags & UC_SIGMASK) 346 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask); 347 if (copyout(&uc, arg, sizeof (uc))) 348 return (set_errno(EFAULT)); 349 return (0); 350 351 352 case SETCONTEXT: 353 ucp = arg; 354 if (ucp == NULL) 355 exit(CLD_EXITED, 0); 356 /* 357 * Don't copyin filler or floating state unless we need it. 358 * The ucontext_t struct and fields are specified in the ABI. 359 */ 360 if (copyin(ucp, &uc, offsetof(ucontext_t, uc_filler) - 361 sizeof (uc.uc_mcontext.fpregs))) { 362 return (set_errno(EFAULT)); 363 } 364 if (uc.uc_flags & UC_SIGMASK) 365 SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask); 366 367 if ((uc.uc_flags & UC_FPU) && 368 copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs, 369 sizeof (uc.uc_mcontext.fpregs))) { 370 return (set_errno(EFAULT)); 371 } 372 373 uc.uc_xsave = 0; 374 if ((uc.uc_flags & UC_XSAVE) != 0) { 375 int ret; 376 377 if (copyin(&ucp->uc_xsave, &uc.uc_xsave, 378 sizeof (uc.uc_xsave)) != 0) { 379 return (set_errno(EFAULT)); 380 } 381 382 ret = fpu_signal_copyin(lwp, &uc); 383 if (ret != 0) { 384 return (set_errno(ret)); 385 } 386 } 387 388 restorecontext(&uc); 389 390 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0)) 391 (void) copyout(&uc.uc_stack, (stack_t *)lwp->lwp_ustack, 392 sizeof (uc.uc_stack)); 393 return (0); 394 395 case GETUSTACK: 396 if (copyout(&lwp->lwp_ustack, arg, sizeof (caddr_t))) 397 return (set_errno(EFAULT)); 398 return (0); 399 400 case SETUSTACK: 401 if (copyin(arg, &dummy_stk, sizeof (dummy_stk))) 402 return (set_errno(EFAULT)); 403 lwp->lwp_ustack = (uintptr_t)arg; 404 return (0); 405 } 406 } 407 408 #ifdef _SYSCALL32_IMPL 409 410 /* 411 * Save user context for 32-bit processes. 412 */ 413 int 414 savecontext32(ucontext32_t *ucp, const k_sigset_t *mask, 415 savecontext_flags_t flags) 416 { 417 proc_t *p = ttoproc(curthread); 418 klwp_t *lwp = ttolwp(curthread); 419 struct regs *rp = lwptoregs(lwp); 420 boolean_t need_xsave = B_FALSE; 421 boolean_t fpu_en; 422 int32_t user_xsave = 0; 423 uintptr_t uaddr; 424 int ret; 425 426 /* 427 * See savecontext for an explanation of this. 428 */ 429 if ((flags & SAVECTXT_F_EXTD) != 0) { 430 user_xsave = ucp->uc_xsave; 431 if (fpu_xsave_enabled() && user_xsave != 0) { 432 need_xsave = B_TRUE; 433 } 434 } else { 435 VERIFY0(flags); 436 } 437 bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext32_t) - 438 offsetof(ucontext32_t, uc_mcontext.fpregs)); 439 ucp->uc_xsave = user_xsave; 440 441 ucp->uc_flags = UC_ALL; 442 ucp->uc_link = (caddr32_t)lwp->lwp_oldcontext; 443 444 if (lwp->lwp_ustack == (uintptr_t)NULL || 445 copyin((void *)lwp->lwp_ustack, &ucp->uc_stack, 446 sizeof (ucp->uc_stack)) != 0 || 447 ucp->uc_stack.ss_size == 0) { 448 449 if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) { 450 ucp->uc_stack.ss_sp = 451 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 452 ucp->uc_stack.ss_size = 453 (size32_t)lwp->lwp_sigaltstack.ss_size; 454 ucp->uc_stack.ss_flags = SS_ONSTACK; 455 } else { 456 ucp->uc_stack.ss_sp = (caddr32_t)(uintptr_t) 457 (p->p_usrstack - p->p_stksize); 458 ucp->uc_stack.ss_size = (size32_t)p->p_stksize; 459 ucp->uc_stack.ss_flags = 0; 460 } 461 } 462 463 /* 464 * If either the trace flag or REQUEST_STEP is set, arrange 465 * for single-stepping and turn off the trace flag. 466 */ 467 if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) { 468 /* 469 * Clear PS_T so that saved user context won't have trace 470 * flag set. 471 */ 472 rp->r_ps &= ~PS_T; 473 474 if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) { 475 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; 476 /* 477 * See comments in savecontext(). 478 */ 479 aston(curthread); 480 } 481 } 482 483 getgregs32(lwp, ucp->uc_mcontext.gregs); 484 fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0; 485 if (fpu_en) 486 getfpregs32(lwp, &ucp->uc_mcontext.fpregs); 487 else 488 ucp->uc_flags &= ~UC_FPU; 489 490 sigktou(mask, &ucp->uc_sigmask); 491 492 if (!need_xsave || !fpu_en) { 493 return (0); 494 } 495 496 ucp->uc_flags |= UC_XSAVE; 497 498 /* 499 * Due to not wanting to change or break programs, the filler in the 500 * ucontext_t was always declared as a long, which is signed. Because 501 * this is the 32-bit version, this is an int32_t. We cannot directly go 502 * to a uintptr_t otherwise we might get sign extension, so we first 503 * have to go through a uint32_t and then a uintptr_t. Otherwise, see 504 * savecontext(). 505 */ 506 uaddr = (uintptr_t)(uint32_t)ucp->uc_xsave; 507 if ((flags & SAVECTXT_F_ONFAULT) != 0) { 508 ret = fpu_signal_copyout(lwp, uaddr, savecontext_copyout); 509 } else { 510 ret = fpu_signal_copyout(lwp, uaddr, copyout); 511 } 512 513 return (ret); 514 } 515 516 int 517 getsetcontext32(int flag, void *arg) 518 { 519 ucontext32_t uc; 520 ucontext_t ucnat; 521 ucontext32_t *ucp; 522 klwp_t *lwp = ttolwp(curthread); 523 caddr32_t ustack32; 524 stack32_t dummy_stk32; 525 int ret; 526 527 switch (flag) { 528 default: 529 return (set_errno(EINVAL)); 530 531 case GETCONTEXT: 532 schedctl_finish_sigblock(curthread); 533 ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_NONE); 534 if (ret != 0) 535 return (set_errno(ret)); 536 if (uc.uc_flags & UC_SIGMASK) 537 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask); 538 if (copyout(&uc, arg, sizeof (uc))) 539 return (set_errno(EFAULT)); 540 return (0); 541 542 /* 543 * See getsetcontext() for an explanation of what is going on here. 544 */ 545 case GETCONTEXT_EXTD: 546 schedctl_finish_sigblock(curthread); 547 ucp = arg; 548 if (copyin(&ucp->uc_xsave, &uc.uc_xsave, 549 sizeof (uc.uc_xsave)) != 0) { 550 return (set_errno(EFAULT)); 551 } 552 ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_EXTD); 553 if (ret != 0) 554 return (set_errno(ret)); 555 if (uc.uc_flags & UC_SIGMASK) 556 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask); 557 if (copyout(&uc, arg, sizeof (uc))) 558 return (set_errno(EFAULT)); 559 return (0); 560 561 case SETCONTEXT: 562 ucp = arg; 563 if (ucp == NULL) 564 exit(CLD_EXITED, 0); 565 if (copyin(ucp, &uc, offsetof(ucontext32_t, uc_filler) - 566 sizeof (uc.uc_mcontext.fpregs))) { 567 return (set_errno(EFAULT)); 568 } 569 if (uc.uc_flags & UC_SIGMASK) 570 SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask); 571 if ((uc.uc_flags & UC_FPU) && 572 copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs, 573 sizeof (uc.uc_mcontext.fpregs))) { 574 return (set_errno(EFAULT)); 575 } 576 577 uc.uc_xsave = 0; 578 if ((uc.uc_flags & UC_XSAVE) != 0 && 579 copyin(&ucp->uc_xsave, &uc.uc_xsave, 580 sizeof (uc.uc_xsave)) != 0) { 581 return (set_errno(EFAULT)); 582 } 583 584 ucontext_32ton(&uc, &ucnat); 585 586 if ((ucnat.uc_flags & UC_XSAVE) != 0) { 587 int ret = fpu_signal_copyin(lwp, &ucnat); 588 if (ret != 0) { 589 return (set_errno(ret)); 590 } 591 } 592 593 restorecontext(&ucnat); 594 595 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0)) 596 (void) copyout(&uc.uc_stack, 597 (stack32_t *)lwp->lwp_ustack, sizeof (uc.uc_stack)); 598 return (0); 599 600 case GETUSTACK: 601 ustack32 = (caddr32_t)lwp->lwp_ustack; 602 if (copyout(&ustack32, arg, sizeof (ustack32))) 603 return (set_errno(EFAULT)); 604 return (0); 605 606 case SETUSTACK: 607 if (copyin(arg, &dummy_stk32, sizeof (dummy_stk32))) 608 return (set_errno(EFAULT)); 609 lwp->lwp_ustack = (uintptr_t)arg; 610 return (0); 611 } 612 } 613 614 #endif /* _SYSCALL32_IMPL */ 615