1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
29 /* All Rights Reserved */
30
31 /*
32 * Copyright 2023 Oxide Computer Company
33 */
34
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/sysmacros.h>
38 #include <sys/signal.h>
39 #include <sys/systm.h>
40 #include <sys/user.h>
41 #include <sys/mman.h>
42 #include <sys/class.h>
43 #include <sys/proc.h>
44 #include <sys/procfs.h>
45 #include <sys/buf.h>
46 #include <sys/kmem.h>
47 #include <sys/cred.h>
48 #include <sys/archsystm.h>
49 #include <sys/vmparam.h>
50 #include <sys/prsystm.h>
51 #include <sys/reboot.h>
52 #include <sys/uadmin.h>
53 #include <sys/vfs.h>
54 #include <sys/vnode.h>
55 #include <sys/file.h>
56 #include <sys/session.h>
57 #include <sys/ucontext.h>
58 #include <sys/dnlc.h>
59 #include <sys/var.h>
60 #include <sys/cmn_err.h>
61 #include <sys/debugreg.h>
62 #include <sys/thread.h>
63 #include <sys/vtrace.h>
64 #include <sys/consdev.h>
65 #include <sys/psw.h>
66 #include <sys/regset.h>
67
68 #include <sys/privregs.h>
69
70 #include <sys/stack.h>
71 #include <sys/swap.h>
72 #include <vm/hat.h>
73 #include <vm/anon.h>
74 #include <vm/as.h>
75 #include <vm/page.h>
76 #include <vm/seg.h>
77 #include <vm/seg_kmem.h>
78 #include <vm/seg_map.h>
79 #include <vm/seg_vn.h>
80 #include <sys/exec.h>
81 #include <sys/acct.h>
82 #include <sys/core.h>
83 #include <sys/corectl.h>
84 #include <sys/modctl.h>
85 #include <sys/tuneable.h>
86 #include <c2/audit.h>
87 #include <sys/bootconf.h>
88 #include <sys/dumphdr.h>
89 #include <sys/promif.h>
90 #include <sys/systeminfo.h>
91 #include <sys/kdi.h>
92 #include <sys/contract_impl.h>
93 #include <sys/x86_archext.h>
94
95 /*
96 * Construct the execution environment for the user's signal
97 * handler and arrange for control to be given to it on return
98 * to userland. The library code now calls setcontext() to
99 * clean up after the signal handler, so sigret() is no longer
100 * needed.
101 *
102 * (The various 'volatile' declarations are need to ensure that values
103 * are correct on the error return from on_fault().)
104 */
105
106
107 /*
108 * An amd64 signal frame looks like this on the stack:
109 *
110 * old %rsp:
111 * <128 bytes of untouched stack space>
112 * <a siginfo_t [optional]>
113 * <a ucontext_t>
114 * <a ucontext_t's xsave state>
115 * <siginfo_t *> ---+
116 * <signal number> | sigframe
117 * new %rsp: <return address (deliberately invalid)> ---+
118 *
119 * The signal number and siginfo_t pointer are only pushed onto the stack in
120 * order to allow stack backtraces. The actual signal handling code expects the
121 * arguments in registers.
122 */
123
124 struct sigframe {
125 caddr_t retaddr;
126 long signo;
127 siginfo_t *sip;
128 };
129
130 int
sendsig(int sig,k_siginfo_t * sip,void (* hdlr)())131 sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
132 {
133 volatile size_t minstacksz;
134 boolean_t newstack;
135 size_t xsave_size;
136 int ret;
137 label_t ljb;
138 volatile caddr_t sp;
139 caddr_t fp;
140 volatile struct regs *rp;
141 volatile greg_t upc;
142 volatile proc_t *p = ttoproc(curthread);
143 struct as *as = p->p_as;
144 klwp_t *lwp = ttolwp(curthread);
145 ucontext_t *volatile tuc = NULL;
146 ucontext_t *uc;
147 siginfo_t *sip_addr;
148 volatile int watched;
149
150 /*
151 * This routine is utterly dependent upon STACK_ALIGN being
152 * 16 and STACK_ENTRY_ALIGN being 8. Let's just acknowledge
153 * that and require it.
154 */
155
156 #if STACK_ALIGN != 16 || STACK_ENTRY_ALIGN != 8
157 #error "sendsig() amd64 did not find the expected stack alignments"
158 #endif
159
160 rp = lwptoregs(lwp);
161 upc = rp->r_pc;
162
163 /*
164 * Since we're setting up to run the signal handler we have to
165 * arrange that the stack at entry to the handler is (only)
166 * STACK_ENTRY_ALIGN (i.e. 8) byte aligned so that when the handler
167 * executes its push of %rbp, the stack realigns to STACK_ALIGN
168 * (i.e. 16) correctly.
169 *
170 * The new sp will point to the sigframe and the ucontext_t. The
171 * above means that sp (and thus sigframe) will be 8-byte aligned,
172 * but not 16-byte aligned. ucontext_t, however, contains %xmm regs
173 * which must be 16-byte aligned. Because of this, for correct
174 * alignment, sigframe must be a multiple of 8-bytes in length, but
175 * not 16-bytes. This will place ucontext_t at a nice 16-byte boundary.
176 *
177 * When we move onto the xsave state, right now, we don't guarantee any
178 * alignment of the resulting data, but we will ensure that the
179 * resulting sp does have proper alignment. This will ensure that the
180 * guarantee on the ucontex_t is not violated.
181 */
182
183 CTASSERT((sizeof (struct sigframe) % 16) == 8);
184
185 minstacksz = sizeof (struct sigframe) + SA(sizeof (*uc));
186 if (sip != NULL)
187 minstacksz += SA(sizeof (siginfo_t));
188
189 if (fpu_xsave_enabled()) {
190 xsave_size = SA(fpu_signal_size(lwp));
191 minstacksz += xsave_size;
192 } else {
193 xsave_size = 0;
194 }
195
196 ASSERT((minstacksz & (STACK_ENTRY_ALIGN - 1ul)) == 0);
197
198 /*
199 * Figure out whether we will be handling this signal on
200 * an alternate stack specified by the user. Then allocate
201 * and validate the stack requirements for the signal handler
202 * context. on_fault will catch any faults.
203 */
204 newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) &&
205 !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE));
206
207 if (newstack) {
208 fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) +
209 SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN);
210 } else {
211 /*
212 * Drop below the 128-byte reserved region of the stack frame
213 * we're interrupting.
214 */
215 fp = (caddr_t)rp->r_sp - STACK_RESERVE;
216 }
217
218 /*
219 * Force proper stack pointer alignment, even in the face of a
220 * misaligned stack pointer from user-level before the signal.
221 */
222 fp = (caddr_t)((uintptr_t)fp & ~(STACK_ENTRY_ALIGN - 1ul));
223
224 /*
225 * Most of the time during normal execution, the stack pointer
226 * is aligned on a STACK_ALIGN (i.e. 16 byte) boundary. However,
227 * (for example) just after a call instruction (which pushes
228 * the return address), the callers stack misaligns until the
229 * 'push %rbp' happens in the callee prolog. So while we should
230 * expect the stack pointer to be always at least STACK_ENTRY_ALIGN
231 * aligned, we should -not- expect it to always be STACK_ALIGN aligned.
232 * We now adjust to ensure that the new sp is aligned to
233 * STACK_ENTRY_ALIGN but not to STACK_ALIGN.
234 */
235 sp = fp - minstacksz;
236 if (((uintptr_t)sp & (STACK_ALIGN - 1ul)) == 0) {
237 sp -= STACK_ENTRY_ALIGN;
238 minstacksz = fp - sp;
239 }
240
241 /*
242 * Now, make sure the resulting signal frame address is sane
243 */
244 if (sp >= as->a_userlimit || fp >= as->a_userlimit) {
245 #ifdef DEBUG
246 printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n",
247 PTOU(p)->u_comm, p->p_pid, sig);
248 printf("sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n",
249 (void *)sp, (void *)hdlr, (uintptr_t)upc);
250 printf("sp above USERLIMIT\n");
251 #endif
252 return (0);
253 }
254
255 watched = watch_disable_addr((caddr_t)sp, minstacksz, S_WRITE);
256
257 if (on_fault(&ljb))
258 goto badstack;
259
260 if (sip != NULL) {
261 zoneid_t zoneid;
262
263 fp -= SA(sizeof (siginfo_t));
264 uzero(fp, sizeof (siginfo_t));
265 if (SI_FROMUSER(sip) &&
266 (zoneid = p->p_zone->zone_id) != GLOBAL_ZONEID &&
267 zoneid != sip->si_zoneid) {
268 k_siginfo_t sani_sip = *sip;
269
270 sani_sip.si_pid = p->p_zone->zone_zsched->p_pid;
271 sani_sip.si_uid = 0;
272 sani_sip.si_ctid = -1;
273 sani_sip.si_zoneid = zoneid;
274 copyout_noerr(&sani_sip, fp, sizeof (sani_sip));
275 } else
276 copyout_noerr(sip, fp, sizeof (*sip));
277 sip_addr = (siginfo_t *)fp;
278
279 if (sig == SIGPROF &&
280 curthread->t_rprof != NULL &&
281 curthread->t_rprof->rp_anystate) {
282 /*
283 * We stand on our head to deal with
284 * the real time profiling signal.
285 * Fill in the stuff that doesn't fit
286 * in a normal k_siginfo structure.
287 */
288 int i = sip->si_nsysarg;
289
290 while (--i >= 0)
291 sulword_noerr(
292 (ulong_t *)&(sip_addr->si_sysarg[i]),
293 (ulong_t)lwp->lwp_arg[i]);
294 copyout_noerr(curthread->t_rprof->rp_state,
295 sip_addr->si_mstate,
296 sizeof (curthread->t_rprof->rp_state));
297 }
298 } else
299 sip_addr = NULL;
300
301 no_fault();
302
303 /*
304 * Save the current context on the user stack directly after the
305 * sigframe. Since sigframe is 8-byte-but-not-16-byte aligned, and since
306 * sizeof (struct sigframe) is 24, this guarantees 16-byte alignment for
307 * ucontext_t and its %xmm registers. The xsave state part of the
308 * ucontext_t may be inbetween these two. However, we have ensured that
309 * the size of the stack space is 16-byte aligned as the actual size may
310 * vary.
311 */
312 tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP);
313 if (xsave_size != 0) {
314 tuc->uc_xsave = (unsigned long)(sp + sizeof (struct sigframe));
315 }
316 uc = (ucontext_t *)(sp + sizeof (struct sigframe) + xsave_size);
317 ret = savecontext(tuc, &lwp->lwp_sigoldmask, SAVECTXT_F_EXTD |
318 SAVECTXT_F_ONFAULT);
319 if (ret != 0)
320 goto postfault;
321 if (on_fault(&ljb))
322 goto badstack;
323 copyout_noerr(tuc, uc, sizeof (*tuc));
324 kmem_free(tuc, sizeof (*tuc));
325 tuc = NULL;
326
327 lwp->lwp_oldcontext = (uintptr_t)uc;
328
329 if (newstack) {
330 lwp->lwp_sigaltstack.ss_flags |= SS_ONSTACK;
331 if (lwp->lwp_ustack)
332 copyout_noerr(&lwp->lwp_sigaltstack,
333 (stack_t *)lwp->lwp_ustack, sizeof (stack_t));
334 }
335
336 /*
337 * Set up signal handler return and stack linkage
338 */
339 {
340 struct sigframe frame;
341
342 /*
343 * ensure we never return "normally"
344 */
345 frame.retaddr = (caddr_t)(uintptr_t)-1L;
346 frame.signo = sig;
347 frame.sip = sip_addr;
348 copyout_noerr(&frame, sp, sizeof (frame));
349 }
350
351 no_fault();
352 if (watched)
353 watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE);
354
355 /*
356 * Set up user registers for execution of signal handler.
357 */
358 rp->r_sp = (greg_t)sp;
359 rp->r_pc = (greg_t)hdlr;
360 rp->r_ps = PSL_USER | (rp->r_ps & PS_IOPL);
361
362 rp->r_rdi = sig;
363 rp->r_rsi = (uintptr_t)sip_addr;
364 rp->r_rdx = (uintptr_t)uc;
365
366 if ((rp->r_cs & 0xffff) != UCS_SEL ||
367 (rp->r_ss & 0xffff) != UDS_SEL) {
368 /*
369 * Try our best to deliver the signal.
370 */
371 rp->r_cs = UCS_SEL;
372 rp->r_ss = UDS_SEL;
373 }
374
375 /*
376 * Don't set lwp_eosys here. sendsig() is called via psig() after
377 * lwp_eosys is handled, so setting it here would affect the next
378 * system call.
379 */
380 return (1);
381
382 badstack:
383 no_fault();
384 postfault:
385 if (watched)
386 watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE);
387 if (tuc)
388 kmem_free(tuc, sizeof (*tuc));
389 #ifdef DEBUG
390 printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n",
391 PTOU(p)->u_comm, p->p_pid, sig);
392 printf("on fault, sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n",
393 (void *)sp, (void *)hdlr, (uintptr_t)upc);
394 #endif
395 return (0);
396 }
397
398 #ifdef _SYSCALL32_IMPL
399
400 /*
401 * An i386 SVR4/ABI signal frame looks like this on the stack:
402 *
403 * old %esp:
404 * <a siginfo32_t [optional]>
405 * <a ucontext32_t>
406 * <a ucontext32_t's xsave state>
407 * <pointer to that ucontext32_t>
408 * <pointer to that siginfo32_t>
409 * <signo>
410 * new %esp: <return address (deliberately invalid)>
411 */
412 struct sigframe32 {
413 caddr32_t retaddr;
414 uint32_t signo;
415 caddr32_t sip;
416 caddr32_t ucp;
417 };
418
419 int
sendsig32(int sig,k_siginfo_t * sip,void (* hdlr)())420 sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)())
421 {
422 volatile size_t minstacksz;
423 boolean_t newstack;
424 size_t xsave_size;
425 int ret;
426 label_t ljb;
427 volatile caddr_t sp;
428 caddr_t fp;
429 volatile struct regs *rp;
430 volatile greg_t upc;
431 volatile proc_t *p = ttoproc(curthread);
432 klwp_t *lwp = ttolwp(curthread);
433 ucontext32_t *volatile tuc = NULL;
434 ucontext32_t *uc;
435 siginfo32_t *sip_addr;
436 volatile int watched;
437
438 rp = lwptoregs(lwp);
439 upc = rp->r_pc;
440
441 minstacksz = SA32(sizeof (struct sigframe32)) + SA32(sizeof (*uc));
442 if (sip != NULL)
443 minstacksz += SA32(sizeof (siginfo32_t));
444
445 if (fpu_xsave_enabled()) {
446 xsave_size = SA32(fpu_signal_size(lwp));
447 minstacksz += xsave_size;
448 } else {
449 xsave_size = 0;
450 }
451 ASSERT((minstacksz & (STACK_ALIGN32 - 1)) == 0);
452
453 /*
454 * Figure out whether we will be handling this signal on
455 * an alternate stack specified by the user. Then allocate
456 * and validate the stack requirements for the signal handler
457 * context. on_fault will catch any faults.
458 */
459 newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) &&
460 !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE));
461
462 if (newstack) {
463 fp = (caddr_t)(SA32((uintptr_t)lwp->lwp_sigaltstack.ss_sp) +
464 SA32(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN32);
465 } else if ((rp->r_ss & 0xffff) != UDS_SEL) {
466 user_desc_t *ldt;
467 /*
468 * If the stack segment selector is -not- pointing at
469 * the UDS_SEL descriptor and we have an LDT entry for
470 * it instead, add the base address to find the effective va.
471 */
472 if ((ldt = p->p_ldt) != NULL)
473 fp = (caddr_t)rp->r_sp +
474 USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]);
475 else
476 fp = (caddr_t)rp->r_sp;
477 } else
478 fp = (caddr_t)rp->r_sp;
479
480 /*
481 * Force proper stack pointer alignment, even in the face of a
482 * misaligned stack pointer from user-level before the signal.
483 * Don't use the SA32() macro because that rounds up, not down.
484 */
485 fp = (caddr_t)((uintptr_t)fp & ~(STACK_ALIGN32 - 1));
486 sp = fp - minstacksz;
487
488 /*
489 * Make sure lwp hasn't trashed its stack
490 */
491 if (sp >= (caddr_t)(uintptr_t)USERLIMIT32 ||
492 fp >= (caddr_t)(uintptr_t)USERLIMIT32) {
493 #ifdef DEBUG
494 printf("sendsig32: bad signal stack cmd=%s, pid=%d, sig=%d\n",
495 PTOU(p)->u_comm, p->p_pid, sig);
496 printf("sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n",
497 (void *)sp, (void *)hdlr, (uintptr_t)upc);
498 printf("sp above USERLIMIT\n");
499 #endif
500 return (0);
501 }
502
503 watched = watch_disable_addr((caddr_t)sp, minstacksz, S_WRITE);
504
505 if (on_fault(&ljb))
506 goto badstack;
507
508 if (sip != NULL) {
509 siginfo32_t si32;
510 zoneid_t zoneid;
511
512 siginfo_kto32(sip, &si32);
513 if (SI_FROMUSER(sip) &&
514 (zoneid = p->p_zone->zone_id) != GLOBAL_ZONEID &&
515 zoneid != sip->si_zoneid) {
516 si32.si_pid = p->p_zone->zone_zsched->p_pid;
517 si32.si_uid = 0;
518 si32.si_ctid = -1;
519 si32.si_zoneid = zoneid;
520 }
521 fp -= SA32(sizeof (si32));
522 uzero(fp, sizeof (si32));
523 copyout_noerr(&si32, fp, sizeof (si32));
524 sip_addr = (siginfo32_t *)fp;
525
526 if (sig == SIGPROF &&
527 curthread->t_rprof != NULL &&
528 curthread->t_rprof->rp_anystate) {
529 /*
530 * We stand on our head to deal with
531 * the real-time profiling signal.
532 * Fill in the stuff that doesn't fit
533 * in a normal k_siginfo structure.
534 */
535 int i = sip->si_nsysarg;
536
537 while (--i >= 0)
538 suword32_noerr(&(sip_addr->si_sysarg[i]),
539 (uint32_t)lwp->lwp_arg[i]);
540 copyout_noerr(curthread->t_rprof->rp_state,
541 sip_addr->si_mstate,
542 sizeof (curthread->t_rprof->rp_state));
543 }
544 } else
545 sip_addr = NULL;
546 no_fault();
547
548 /* save the current context on the user stack */
549 tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP);
550 fp -= SA32(sizeof (*tuc));
551 uc = (ucontext32_t *)fp;
552 if (xsave_size != 0) {
553 fp -= xsave_size;
554 tuc->uc_xsave = (int32_t)(uintptr_t)fp;
555 }
556 ret = savecontext32(tuc, &lwp->lwp_sigoldmask, SAVECTXT_F_EXTD |
557 SAVECTXT_F_ONFAULT);
558 if (ret != 0)
559 goto postfault;
560 if (on_fault(&ljb))
561 goto badstack;
562 copyout_noerr(tuc, uc, sizeof (*tuc));
563 kmem_free(tuc, sizeof (*tuc));
564 tuc = NULL;
565
566 lwp->lwp_oldcontext = (uintptr_t)uc;
567
568 if (newstack) {
569 lwp->lwp_sigaltstack.ss_flags |= SS_ONSTACK;
570 if (lwp->lwp_ustack) {
571 stack32_t stk32;
572
573 stk32.ss_sp = (caddr32_t)(uintptr_t)
574 lwp->lwp_sigaltstack.ss_sp;
575 stk32.ss_size = (size32_t)
576 lwp->lwp_sigaltstack.ss_size;
577 stk32.ss_flags = (int32_t)
578 lwp->lwp_sigaltstack.ss_flags;
579 copyout_noerr(&stk32,
580 (stack32_t *)lwp->lwp_ustack, sizeof (stk32));
581 }
582 }
583
584 /*
585 * Set up signal handler arguments
586 */
587 {
588 struct sigframe32 frame32;
589
590 frame32.sip = (caddr32_t)(uintptr_t)sip_addr;
591 frame32.ucp = (caddr32_t)(uintptr_t)uc;
592 frame32.signo = sig;
593 frame32.retaddr = 0xffffffff; /* never return! */
594 copyout_noerr(&frame32, sp, sizeof (frame32));
595 }
596
597 no_fault();
598 if (watched)
599 watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE);
600
601 rp->r_sp = (greg_t)(uintptr_t)sp;
602 rp->r_pc = (greg_t)(uintptr_t)hdlr;
603 rp->r_ps = PSL_USER | (rp->r_ps & PS_IOPL);
604
605 if ((rp->r_cs & 0xffff) != U32CS_SEL ||
606 (rp->r_ss & 0xffff) != UDS_SEL) {
607 /*
608 * Try our best to deliver the signal.
609 */
610 rp->r_cs = U32CS_SEL;
611 rp->r_ss = UDS_SEL;
612 }
613
614 /*
615 * Don't set lwp_eosys here. sendsig() is called via psig() after
616 * lwp_eosys is handled, so setting it here would affect the next
617 * system call.
618 */
619 return (1);
620
621 badstack:
622 no_fault();
623 postfault:
624 if (watched)
625 watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE);
626 if (tuc)
627 kmem_free(tuc, sizeof (*tuc));
628 #ifdef DEBUG
629 printf("sendsig32: bad signal stack cmd=%s pid=%d, sig=%d\n",
630 PTOU(p)->u_comm, p->p_pid, sig);
631 printf("on fault, sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n",
632 (void *)sp, (void *)hdlr, (uintptr_t)upc);
633 #endif
634 return (0);
635 }
636
637 #endif /* _SYSCALL32_IMPL */
638