1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> 4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 5 * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 6 */ 7 8 #include <stdlib.h> 9 #include <stdbool.h> 10 #include <unistd.h> 11 #include <sched.h> 12 #include <errno.h> 13 #include <string.h> 14 #include <fcntl.h> 15 #include <mem_user.h> 16 #include <sys/mman.h> 17 #include <sys/wait.h> 18 #include <sys/stat.h> 19 #include <sys/socket.h> 20 #include <asm/unistd.h> 21 #include <as-layout.h> 22 #include <init.h> 23 #include <kern_util.h> 24 #include <mem.h> 25 #include <os.h> 26 #include <ptrace_user.h> 27 #include <registers.h> 28 #include <skas.h> 29 #include <sysdep/stub.h> 30 #include <sysdep/mcontext.h> 31 #include <linux/futex.h> 32 #include <linux/threads.h> 33 #include <timetravel.h> 34 #include <asm-generic/rwonce.h> 35 #include "../internal.h" 36 37 int is_skas_winch(int pid, int fd, void *data) 38 { 39 return pid == getpgrp(); 40 } 41 42 static const char *ptrace_reg_name(int idx) 43 { 44 #define R(n) case HOST_##n: return #n 45 46 switch (idx) { 47 #ifdef __x86_64__ 48 R(BX); 49 R(CX); 50 R(DI); 51 R(SI); 52 R(DX); 53 R(BP); 54 R(AX); 55 R(R8); 56 R(R9); 57 R(R10); 58 R(R11); 59 R(R12); 60 R(R13); 61 R(R14); 62 R(R15); 63 R(ORIG_AX); 64 R(CS); 65 R(SS); 66 R(EFLAGS); 67 #elif defined(__i386__) 68 R(IP); 69 R(SP); 70 R(EFLAGS); 71 R(AX); 72 R(BX); 73 R(CX); 74 R(DX); 75 R(SI); 76 R(DI); 77 R(BP); 78 R(CS); 79 R(SS); 80 R(DS); 81 R(FS); 82 R(ES); 83 R(GS); 84 R(ORIG_AX); 85 #endif 86 } 87 return ""; 88 } 89 90 static int ptrace_dump_regs(int pid) 91 { 92 unsigned long regs[MAX_REG_NR]; 93 int i; 94 95 if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) 96 return -errno; 97 98 printk(UM_KERN_ERR "Stub registers -\n"); 99 for (i = 0; i < ARRAY_SIZE(regs); i++) { 100 const char *regname = ptrace_reg_name(i); 101 102 printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]); 103 } 104 105 return 0; 106 } 107 108 /* 109 * Signals that are OK to receive in the stub - we'll just continue it. 110 * SIGWINCH will happen when UML is inside a detached screen. 111 */ 112 #define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) 113 114 /* Signals that the stub will finish with - anything else is an error */ 115 #define STUB_DONE_MASK (1 << SIGTRAP) 116 117 void wait_stub_done(int pid) 118 { 119 int n, status, err; 120 121 while (1) { 122 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); 123 if ((n < 0) || !WIFSTOPPED(status)) 124 goto bad_wait; 125 126 if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) 127 break; 128 129 err = ptrace(PTRACE_CONT, pid, 0, 0); 130 if (err) { 131 printk(UM_KERN_ERR "%s : continue failed, errno = %d\n", 132 __func__, errno); 133 fatal_sigsegv(); 134 } 135 } 136 137 if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) 138 return; 139 140 bad_wait: 141 err = ptrace_dump_regs(pid); 142 if (err) 143 printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n", 144 -err); 145 printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n", 146 __func__, pid, n, errno, status); 147 fatal_sigsegv(); 148 } 149 150 void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys) 151 { 152 struct stub_data *data = (void *)mm_idp->stack; 153 int ret; 154 155 do { 156 const char byte = 0; 157 struct iovec iov = { 158 .iov_base = (void *)&byte, 159 .iov_len = sizeof(byte), 160 }; 161 union { 162 char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))]; 163 struct cmsghdr align; 164 } ctrl; 165 struct msghdr msgh = { 166 .msg_iov = &iov, 167 .msg_iovlen = 1, 168 }; 169 170 if (!running) { 171 if (mm_idp->syscall_fd_num) { 172 unsigned int fds_size = 173 sizeof(int) * mm_idp->syscall_fd_num; 174 struct cmsghdr *cmsg; 175 176 msgh.msg_control = ctrl.data; 177 msgh.msg_controllen = CMSG_SPACE(fds_size); 178 cmsg = CMSG_FIRSTHDR(&msgh); 179 cmsg->cmsg_level = SOL_SOCKET; 180 cmsg->cmsg_type = SCM_RIGHTS; 181 cmsg->cmsg_len = CMSG_LEN(fds_size); 182 memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map, 183 fds_size); 184 185 CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock, 186 &msgh, 0)); 187 } 188 189 data->signal = 0; 190 data->futex = FUTEX_IN_CHILD; 191 CATCH_EINTR(syscall(__NR_futex, &data->futex, 192 FUTEX_WAKE, 1, NULL, NULL, 0)); 193 } 194 195 do { 196 /* 197 * We need to check whether the child is still alive 198 * before and after the FUTEX_WAIT call. Before, in 199 * case it just died but we still updated data->futex 200 * to FUTEX_IN_CHILD. And after, in case it died while 201 * we were waiting (and SIGCHLD woke us up, see the 202 * IRQ handler in mmu.c). 203 * 204 * Either way, if PID is negative, then we have no 205 * choice but to kill the task. 206 */ 207 if (__READ_ONCE(mm_idp->pid) < 0) 208 goto out_kill; 209 210 ret = syscall(__NR_futex, &data->futex, 211 FUTEX_WAIT, FUTEX_IN_CHILD, 212 NULL, NULL, 0); 213 if (ret < 0 && errno != EINTR && errno != EAGAIN) { 214 printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n", 215 __func__, errno); 216 goto out_kill; 217 } 218 } while (data->futex == FUTEX_IN_CHILD); 219 220 if (__READ_ONCE(mm_idp->pid) < 0) 221 goto out_kill; 222 223 running = 0; 224 225 /* We may receive a SIGALRM before SIGSYS, iterate again. */ 226 } while (wait_sigsys && data->signal == SIGALRM); 227 228 if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) { 229 printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__); 230 goto out_kill; 231 } 232 233 if (wait_sigsys && data->signal != SIGSYS) { 234 printk(UM_KERN_ERR "%s : expected SIGSYS but got %d", 235 __func__, data->signal); 236 goto out_kill; 237 } 238 239 return; 240 241 out_kill: 242 printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n", 243 __func__, mm_idp->pid, errno); 244 /* This is not true inside start_userspace */ 245 if (current_mm_id() == mm_idp) 246 fatal_sigsegv(); 247 } 248 249 extern unsigned long current_stub_stack(void); 250 251 static void get_skas_faultinfo(int pid, struct faultinfo *fi) 252 { 253 int err; 254 255 err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); 256 if (err) { 257 printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " 258 "errno = %d\n", pid, errno); 259 fatal_sigsegv(); 260 } 261 wait_stub_done(pid); 262 263 /* 264 * faultinfo is prepared by the stub_segv_handler at start of 265 * the stub stack page. We just have to copy it. 266 */ 267 memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); 268 } 269 270 static void handle_trap(struct uml_pt_regs *regs) 271 { 272 if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) 273 fatal_sigsegv(); 274 275 handle_syscall(regs); 276 } 277 278 extern char __syscall_stub_start[]; 279 280 static int stub_exe_fd; 281 282 struct tramp_data { 283 struct stub_data *stub_data; 284 /* 0 is inherited, 1 is the kernel side */ 285 int sockpair[2]; 286 }; 287 288 #ifndef CLOSE_RANGE_CLOEXEC 289 #define CLOSE_RANGE_CLOEXEC (1U << 2) 290 #endif 291 292 static int userspace_tramp(void *data) 293 { 294 struct tramp_data *tramp_data = data; 295 char *const argv[] = { "uml-userspace", NULL }; 296 unsigned long long offset; 297 struct stub_init_data init_data = { 298 .seccomp = using_seccomp, 299 .stub_start = STUB_START, 300 }; 301 struct iomem_region *iomem; 302 int ret; 303 304 if (using_seccomp) { 305 init_data.signal_handler = STUB_CODE + 306 (unsigned long) stub_signal_interrupt - 307 (unsigned long) __syscall_stub_start; 308 init_data.signal_restorer = STUB_CODE + 309 (unsigned long) stub_signal_restorer - 310 (unsigned long) __syscall_stub_start; 311 } else { 312 init_data.signal_handler = STUB_CODE + 313 (unsigned long) stub_segv_handler - 314 (unsigned long) __syscall_stub_start; 315 init_data.signal_restorer = 0; 316 } 317 318 init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), 319 &offset); 320 init_data.stub_code_offset = MMAP_OFFSET(offset); 321 322 init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data), 323 &offset); 324 init_data.stub_data_offset = MMAP_OFFSET(offset); 325 326 /* 327 * Avoid leaking unneeded FDs to the stub by setting CLOEXEC on all FDs 328 * and then unsetting it on all memory related FDs. 329 * This is not strictly necessary from a safety perspective. 330 */ 331 syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); 332 333 fcntl(init_data.stub_data_fd, F_SETFD, 0); 334 335 /* In SECCOMP mode, these FDs are passed when needed */ 336 if (!using_seccomp) { 337 for (iomem = iomem_regions; iomem; iomem = iomem->next) 338 fcntl(iomem->fd, F_SETFD, 0); 339 } 340 341 /* dup2 signaling FD/socket to STDIN */ 342 if (dup2(tramp_data->sockpair[0], 0) < 0) 343 exit(3); 344 close(tramp_data->sockpair[0]); 345 346 /* Write init_data and close write side */ 347 ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data)); 348 close(tramp_data->sockpair[1]); 349 350 if (ret != sizeof(init_data)) 351 exit(4); 352 353 /* Raw execveat for compatibility with older libc versions */ 354 syscall(__NR_execveat, stub_exe_fd, (unsigned long)"", 355 (unsigned long)argv, NULL, AT_EMPTY_PATH); 356 357 exit(5); 358 } 359 360 extern char stub_exe_start[]; 361 extern char stub_exe_end[]; 362 363 extern char *tempdir; 364 365 #define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX" 366 367 #ifndef MFD_EXEC 368 #define MFD_EXEC 0x0010U 369 #endif 370 371 static int __init init_stub_exe_fd(void) 372 { 373 size_t written = 0; 374 char *tmpfile = NULL; 375 376 stub_exe_fd = memfd_create("uml-userspace", 377 MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING); 378 379 if (stub_exe_fd < 0) { 380 printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!"); 381 382 tmpfile = malloc(strlen(tempdir) + 383 strlen(STUB_EXE_NAME_TEMPLATE) + 1); 384 if (tmpfile == NULL) 385 panic("Failed to allocate memory for stub binary name"); 386 387 strcpy(tmpfile, tempdir); 388 strcat(tmpfile, STUB_EXE_NAME_TEMPLATE); 389 390 stub_exe_fd = mkstemp(tmpfile); 391 if (stub_exe_fd < 0) 392 panic("Could not create temporary file for stub binary: %d", 393 -errno); 394 } 395 396 while (written < stub_exe_end - stub_exe_start) { 397 ssize_t res = write(stub_exe_fd, stub_exe_start + written, 398 stub_exe_end - stub_exe_start - written); 399 if (res < 0) { 400 if (errno == EINTR) 401 continue; 402 403 if (tmpfile) 404 unlink(tmpfile); 405 panic("Failed write stub binary: %d", -errno); 406 } 407 408 written += res; 409 } 410 411 if (!tmpfile) { 412 fcntl(stub_exe_fd, F_ADD_SEALS, 413 F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL); 414 } else { 415 if (fchmod(stub_exe_fd, 00500) < 0) { 416 unlink(tmpfile); 417 panic("Could not make stub binary executable: %d", 418 -errno); 419 } 420 421 close(stub_exe_fd); 422 stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); 423 if (stub_exe_fd < 0) { 424 unlink(tmpfile); 425 panic("Could not reopen stub binary: %d", -errno); 426 } 427 428 unlink(tmpfile); 429 free(tmpfile); 430 } 431 432 return 0; 433 } 434 __initcall(init_stub_exe_fd); 435 436 int using_seccomp; 437 438 /** 439 * start_userspace() - prepare a new userspace process 440 * @mm_id: The corresponding struct mm_id 441 * 442 * Setups a new temporary stack page that is used while userspace_tramp() runs 443 * Clones the kernel process into a new userspace process, with FDs only. 444 * 445 * Return: When positive: the process id of the new userspace process, 446 * when negative: an error number. 447 * FIXME: can PIDs become negative?! 448 */ 449 int start_userspace(struct mm_id *mm_id) 450 { 451 struct stub_data *proc_data = (void *)mm_id->stack; 452 struct tramp_data tramp_data = { 453 .stub_data = proc_data, 454 }; 455 void *stack; 456 unsigned long sp; 457 int status, n, err; 458 459 /* setup a temporary stack page */ 460 stack = mmap(NULL, UM_KERN_PAGE_SIZE, 461 PROT_READ | PROT_WRITE | PROT_EXEC, 462 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 463 if (stack == MAP_FAILED) { 464 err = -errno; 465 printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n", 466 __func__, errno); 467 return err; 468 } 469 470 /* set stack pointer to the end of the stack page, so it can grow downwards */ 471 sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; 472 473 /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */ 474 if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) { 475 err = -errno; 476 printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n", 477 __func__, errno); 478 return err; 479 } 480 481 if (using_seccomp) 482 proc_data->futex = FUTEX_IN_CHILD; 483 484 mm_id->pid = clone(userspace_tramp, (void *) sp, 485 CLONE_VFORK | CLONE_VM | SIGCHLD, 486 (void *)&tramp_data); 487 if (mm_id->pid < 0) { 488 err = -errno; 489 printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", 490 __func__, errno); 491 goto out_close; 492 } 493 494 if (using_seccomp) { 495 wait_stub_done_seccomp(mm_id, 1, 1); 496 } else { 497 do { 498 CATCH_EINTR(n = waitpid(mm_id->pid, &status, 499 WUNTRACED | __WALL)); 500 if (n < 0) { 501 err = -errno; 502 printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", 503 __func__, errno); 504 goto out_kill; 505 } 506 } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 507 508 if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 509 err = -EINVAL; 510 printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", 511 __func__, status); 512 goto out_kill; 513 } 514 515 if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL, 516 (void *) PTRACE_O_TRACESYSGOOD) < 0) { 517 err = -errno; 518 printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", 519 __func__, errno); 520 goto out_kill; 521 } 522 } 523 524 if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { 525 err = -errno; 526 printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n", 527 __func__, errno); 528 goto out_kill; 529 } 530 531 close(tramp_data.sockpair[0]); 532 if (using_seccomp) 533 mm_id->sock = tramp_data.sockpair[1]; 534 else 535 close(tramp_data.sockpair[1]); 536 537 return 0; 538 539 out_kill: 540 os_kill_ptraced_process(mm_id->pid, 1); 541 out_close: 542 close(tramp_data.sockpair[0]); 543 close(tramp_data.sockpair[1]); 544 545 mm_id->pid = -1; 546 547 return err; 548 } 549 550 static int unscheduled_userspace_iterations; 551 extern unsigned long tt_extra_sched_jiffies; 552 553 void userspace(struct uml_pt_regs *regs) 554 { 555 int err, status, op; 556 siginfo_t si_ptrace; 557 siginfo_t *si; 558 int sig; 559 560 /* Handle any immediate reschedules or signals */ 561 interrupt_end(); 562 563 while (1) { 564 struct mm_id *mm_id = current_mm_id(); 565 566 /* 567 * When we are in time-travel mode, userspace can theoretically 568 * do a *lot* of work without being scheduled. The problem with 569 * this is that it will prevent kernel bookkeeping (primarily 570 * the RCU) from running and this can for example cause OOM 571 * situations. 572 * 573 * This code accounts a jiffie against the scheduling clock 574 * after the defined userspace iterations in the same thread. 575 * By doing so the situation is effectively prevented. 576 */ 577 if (time_travel_mode == TT_MODE_INFCPU || 578 time_travel_mode == TT_MODE_EXTERNAL) { 579 #ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS 580 if (CONFIG_UML_MAX_USERSPACE_ITERATIONS && 581 unscheduled_userspace_iterations++ > 582 CONFIG_UML_MAX_USERSPACE_ITERATIONS) { 583 tt_extra_sched_jiffies += 1; 584 unscheduled_userspace_iterations = 0; 585 } 586 #endif 587 } 588 589 time_travel_print_bc_msg(); 590 591 current_mm_sync(); 592 593 if (using_seccomp) { 594 struct stub_data *proc_data = (void *) mm_id->stack; 595 596 err = set_stub_state(regs, proc_data, singlestepping()); 597 if (err) { 598 printk(UM_KERN_ERR "%s - failed to set regs: %d", 599 __func__, err); 600 fatal_sigsegv(); 601 } 602 603 /* Must have been reset by the syscall caller */ 604 if (proc_data->restart_wait != 0) 605 panic("Programming error: Flag to only run syscalls in child was not cleared!"); 606 607 /* Mark pending syscalls for flushing */ 608 proc_data->syscall_data_len = mm_id->syscall_data_len; 609 610 wait_stub_done_seccomp(mm_id, 0, 0); 611 612 sig = proc_data->signal; 613 614 if (sig == SIGTRAP && proc_data->err != 0) { 615 printk(UM_KERN_ERR "%s - Error flushing stub syscalls", 616 __func__); 617 syscall_stub_dump_error(mm_id); 618 mm_id->syscall_data_len = proc_data->err; 619 fatal_sigsegv(); 620 } 621 622 mm_id->syscall_data_len = 0; 623 mm_id->syscall_fd_num = 0; 624 625 err = get_stub_state(regs, proc_data, NULL); 626 if (err) { 627 printk(UM_KERN_ERR "%s - failed to get regs: %d", 628 __func__, err); 629 fatal_sigsegv(); 630 } 631 632 if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) 633 panic("%s - Invalid siginfo offset from child", 634 __func__); 635 si = (void *)&proc_data->sigstack[proc_data->si_offset]; 636 637 regs->is_user = 1; 638 639 /* Fill in ORIG_RAX and extract fault information */ 640 PT_SYSCALL_NR(regs->gp) = si->si_syscall; 641 if (sig == SIGSEGV) { 642 mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset]; 643 644 GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext); 645 } 646 } else { 647 int pid = mm_id->pid; 648 649 /* Flush out any pending syscalls */ 650 err = syscall_stub_flush(mm_id); 651 if (err) { 652 if (err == -ENOMEM) 653 report_enomem(); 654 655 printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", 656 __func__, -err); 657 fatal_sigsegv(); 658 } 659 660 /* 661 * This can legitimately fail if the process loads a 662 * bogus value into a segment register. It will 663 * segfault and PTRACE_GETREGS will read that value 664 * out of the process. However, PTRACE_SETREGS will 665 * fail. In this case, there is nothing to do but 666 * just kill the process. 667 */ 668 if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { 669 printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", 670 __func__, errno); 671 fatal_sigsegv(); 672 } 673 674 if (put_fp_registers(pid, regs->fp)) { 675 printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", 676 __func__, errno); 677 fatal_sigsegv(); 678 } 679 680 if (singlestepping()) 681 op = PTRACE_SYSEMU_SINGLESTEP; 682 else 683 op = PTRACE_SYSEMU; 684 685 if (ptrace(op, pid, 0, 0)) { 686 printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", 687 __func__, op, errno); 688 fatal_sigsegv(); 689 } 690 691 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); 692 if (err < 0) { 693 printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", 694 __func__, errno); 695 fatal_sigsegv(); 696 } 697 698 regs->is_user = 1; 699 if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { 700 printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", 701 __func__, errno); 702 fatal_sigsegv(); 703 } 704 705 if (get_fp_registers(pid, regs->fp)) { 706 printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", 707 __func__, errno); 708 fatal_sigsegv(); 709 } 710 711 if (WIFSTOPPED(status)) { 712 sig = WSTOPSIG(status); 713 714 /* 715 * These signal handlers need the si argument 716 * and SIGSEGV needs the faultinfo. 717 * The SIGIO and SIGALARM handlers which constitute 718 * the majority of invocations, do not use it. 719 */ 720 switch (sig) { 721 case SIGSEGV: 722 get_skas_faultinfo(pid, 723 ®s->faultinfo); 724 fallthrough; 725 case SIGTRAP: 726 case SIGILL: 727 case SIGBUS: 728 case SIGFPE: 729 case SIGWINCH: 730 ptrace(PTRACE_GETSIGINFO, pid, 0, 731 (struct siginfo *)&si_ptrace); 732 si = &si_ptrace; 733 break; 734 default: 735 si = NULL; 736 break; 737 } 738 } else { 739 sig = 0; 740 } 741 } 742 743 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 744 745 if (sig) { 746 switch (sig) { 747 case SIGSEGV: 748 if (using_seccomp || PTRACE_FULL_FAULTINFO) 749 (*sig_info[SIGSEGV])(SIGSEGV, 750 (struct siginfo *)si, 751 regs, NULL); 752 else 753 segv(regs->faultinfo, 0, 1, NULL, NULL); 754 755 break; 756 case SIGSYS: 757 handle_syscall(regs); 758 break; 759 case SIGTRAP + 0x80: 760 handle_trap(regs); 761 break; 762 case SIGTRAP: 763 relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL); 764 break; 765 case SIGALRM: 766 break; 767 case SIGIO: 768 case SIGILL: 769 case SIGBUS: 770 case SIGFPE: 771 case SIGWINCH: 772 block_signals_trace(); 773 (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL); 774 unblock_signals_trace(); 775 break; 776 default: 777 printk(UM_KERN_ERR "%s - child stopped with signal %d\n", 778 __func__, sig); 779 fatal_sigsegv(); 780 } 781 interrupt_end(); 782 783 /* Avoid -ERESTARTSYS handling in host */ 784 if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) 785 PT_SYSCALL_NR(regs->gp) = -1; 786 } 787 } 788 } 789 790 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) 791 { 792 (*buf)[0].JB_IP = (unsigned long) handler; 793 (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - 794 sizeof(void *); 795 } 796 797 #define INIT_JMP_NEW_THREAD 0 798 #define INIT_JMP_CALLBACK 1 799 #define INIT_JMP_HALT 2 800 #define INIT_JMP_REBOOT 3 801 802 void switch_threads(jmp_buf *me, jmp_buf *you) 803 { 804 unscheduled_userspace_iterations = 0; 805 806 if (UML_SETJMP(me) == 0) 807 UML_LONGJMP(you, 1); 808 } 809 810 static jmp_buf initial_jmpbuf; 811 812 /* XXX Make these percpu */ 813 static void (*cb_proc)(void *arg); 814 static void *cb_arg; 815 static jmp_buf *cb_back; 816 817 int start_idle_thread(void *stack, jmp_buf *switch_buf) 818 { 819 int n; 820 821 set_handler(SIGWINCH); 822 823 /* 824 * Can't use UML_SETJMP or UML_LONGJMP here because they save 825 * and restore signals, with the possible side-effect of 826 * trying to handle any signals which came when they were 827 * blocked, which can't be done on this stack. 828 * Signals must be blocked when jumping back here and restored 829 * after returning to the jumper. 830 */ 831 n = setjmp(initial_jmpbuf); 832 switch (n) { 833 case INIT_JMP_NEW_THREAD: 834 (*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup; 835 (*switch_buf)[0].JB_SP = (unsigned long) stack + 836 UM_THREAD_SIZE - sizeof(void *); 837 break; 838 case INIT_JMP_CALLBACK: 839 (*cb_proc)(cb_arg); 840 longjmp(*cb_back, 1); 841 break; 842 case INIT_JMP_HALT: 843 kmalloc_ok = 0; 844 return 0; 845 case INIT_JMP_REBOOT: 846 kmalloc_ok = 0; 847 return 1; 848 default: 849 printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n", 850 __func__, n); 851 fatal_sigsegv(); 852 } 853 longjmp(*switch_buf, 1); 854 855 /* unreachable */ 856 printk(UM_KERN_ERR "impossible long jump!"); 857 fatal_sigsegv(); 858 return 0; 859 } 860 861 void initial_thread_cb_skas(void (*proc)(void *), void *arg) 862 { 863 jmp_buf here; 864 865 cb_proc = proc; 866 cb_arg = arg; 867 cb_back = &here; 868 869 block_signals_trace(); 870 if (UML_SETJMP(&here) == 0) 871 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); 872 unblock_signals_trace(); 873 874 cb_proc = NULL; 875 cb_arg = NULL; 876 cb_back = NULL; 877 } 878 879 void halt_skas(void) 880 { 881 block_signals_trace(); 882 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); 883 } 884 885 static bool noreboot; 886 887 static int __init noreboot_cmd_param(char *str, int *add) 888 { 889 *add = 0; 890 noreboot = true; 891 return 0; 892 } 893 894 __uml_setup("noreboot", noreboot_cmd_param, 895 "noreboot\n" 896 " Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n" 897 " This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n" 898 " crashes in CI\n"); 899 900 void reboot_skas(void) 901 { 902 block_signals_trace(); 903 UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); 904 } 905