1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 4 * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 5 */ 6 7 #include <stdlib.h> 8 #include <stdbool.h> 9 #include <unistd.h> 10 #include <sched.h> 11 #include <errno.h> 12 #include <string.h> 13 #include <sys/mman.h> 14 #include <sys/wait.h> 15 #include <asm/unistd.h> 16 #include <as-layout.h> 17 #include <init.h> 18 #include <kern_util.h> 19 #include <mem.h> 20 #include <os.h> 21 #include <ptrace_user.h> 22 #include <registers.h> 23 #include <skas.h> 24 #include <sysdep/stub.h> 25 #include <linux/threads.h> 26 #include <timetravel.h> 27 #include "../internal.h" 28 29 int is_skas_winch(int pid, int fd, void *data) 30 { 31 return pid == getpgrp(); 32 } 33 34 static const char *ptrace_reg_name(int idx) 35 { 36 #define R(n) case HOST_##n: return #n 37 38 switch (idx) { 39 #ifdef __x86_64__ 40 R(BX); 41 R(CX); 42 R(DI); 43 R(SI); 44 R(DX); 45 R(BP); 46 R(AX); 47 R(R8); 48 R(R9); 49 R(R10); 50 R(R11); 51 R(R12); 52 R(R13); 53 R(R14); 54 R(R15); 55 R(ORIG_AX); 56 R(CS); 57 R(SS); 58 R(EFLAGS); 59 #elif defined(__i386__) 60 R(IP); 61 R(SP); 62 R(EFLAGS); 63 R(AX); 64 R(BX); 65 R(CX); 66 R(DX); 67 R(SI); 68 R(DI); 69 R(BP); 70 R(CS); 71 R(SS); 72 R(DS); 73 R(FS); 74 R(ES); 75 R(GS); 76 R(ORIG_AX); 77 #endif 78 } 79 return ""; 80 } 81 82 static int ptrace_dump_regs(int pid) 83 { 84 unsigned long regs[MAX_REG_NR]; 85 int i; 86 87 if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) 88 return -errno; 89 90 printk(UM_KERN_ERR "Stub registers -\n"); 91 for (i = 0; i < ARRAY_SIZE(regs); i++) { 92 const char *regname = ptrace_reg_name(i); 93 94 printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]); 95 } 96 97 return 0; 98 } 99 100 /* 101 * Signals that are OK to receive in the stub - we'll just continue it. 102 * SIGWINCH will happen when UML is inside a detached screen. 103 */ 104 #define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) 105 106 /* Signals that the stub will finish with - anything else is an error */ 107 #define STUB_DONE_MASK (1 << SIGTRAP) 108 109 void wait_stub_done(int pid) 110 { 111 int n, status, err; 112 113 while (1) { 114 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); 115 if ((n < 0) || !WIFSTOPPED(status)) 116 goto bad_wait; 117 118 if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) 119 break; 120 121 err = ptrace(PTRACE_CONT, pid, 0, 0); 122 if (err) { 123 printk(UM_KERN_ERR "%s : continue failed, errno = %d\n", 124 __func__, errno); 125 fatal_sigsegv(); 126 } 127 } 128 129 if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) 130 return; 131 132 bad_wait: 133 err = ptrace_dump_regs(pid); 134 if (err) 135 printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n", 136 -err); 137 printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n", 138 __func__, pid, n, errno, status); 139 fatal_sigsegv(); 140 } 141 142 extern unsigned long current_stub_stack(void); 143 144 static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs) 145 { 146 int err; 147 148 err = get_fp_registers(pid, aux_fp_regs); 149 if (err < 0) { 150 printk(UM_KERN_ERR "save_fp_registers returned %d\n", 151 err); 152 fatal_sigsegv(); 153 } 154 err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); 155 if (err) { 156 printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " 157 "errno = %d\n", pid, errno); 158 fatal_sigsegv(); 159 } 160 wait_stub_done(pid); 161 162 /* 163 * faultinfo is prepared by the stub_segv_handler at start of 164 * the stub stack page. We just have to copy it. 165 */ 166 memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); 167 168 err = put_fp_registers(pid, aux_fp_regs); 169 if (err < 0) { 170 printk(UM_KERN_ERR "put_fp_registers returned %d\n", 171 err); 172 fatal_sigsegv(); 173 } 174 } 175 176 static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs) 177 { 178 get_skas_faultinfo(pid, ®s->faultinfo, aux_fp_regs); 179 segv(regs->faultinfo, 0, 1, NULL); 180 } 181 182 static void handle_trap(int pid, struct uml_pt_regs *regs) 183 { 184 if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) 185 fatal_sigsegv(); 186 187 handle_syscall(regs); 188 } 189 190 extern char __syscall_stub_start[]; 191 192 /** 193 * userspace_tramp() - userspace trampoline 194 * @stack: pointer to the new userspace stack page 195 * 196 * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed. 197 * This function will run on a temporary stack page. 198 * It ptrace()'es itself, then 199 * Two pages are mapped into the userspace address space: 200 * - STUB_CODE (with EXEC), which contains the skas stub code 201 * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel. 202 * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process. 203 * And last the process stops itself to give control to the UML kernel for this userspace process. 204 * 205 * Return: Always zero, otherwise the current userspace process is ended with non null exit() call 206 */ 207 static int userspace_tramp(void *stack) 208 { 209 struct sigaction sa; 210 void *addr; 211 int fd; 212 unsigned long long offset; 213 unsigned long segv_handler = STUB_CODE + 214 (unsigned long) stub_segv_handler - 215 (unsigned long) __syscall_stub_start; 216 217 ptrace(PTRACE_TRACEME, 0, 0, 0); 218 219 signal(SIGTERM, SIG_DFL); 220 signal(SIGWINCH, SIG_IGN); 221 222 fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); 223 addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, 224 PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); 225 if (addr == MAP_FAILED) { 226 os_info("mapping mmap stub at 0x%lx failed, errno = %d\n", 227 STUB_CODE, errno); 228 exit(1); 229 } 230 231 fd = phys_mapping(uml_to_phys(stack), &offset); 232 addr = mmap((void *) STUB_DATA, 233 STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, 234 MAP_FIXED | MAP_SHARED, fd, offset); 235 if (addr == MAP_FAILED) { 236 os_info("mapping segfault stack at 0x%lx failed, errno = %d\n", 237 STUB_DATA, errno); 238 exit(1); 239 } 240 241 set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); 242 sigemptyset(&sa.sa_mask); 243 sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; 244 sa.sa_sigaction = (void *) segv_handler; 245 sa.sa_restorer = NULL; 246 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 247 os_info("%s - setting SIGSEGV handler failed - errno = %d\n", 248 __func__, errno); 249 exit(1); 250 } 251 252 kill(os_getpid(), SIGSTOP); 253 return 0; 254 } 255 256 int userspace_pid[NR_CPUS]; 257 258 /** 259 * start_userspace() - prepare a new userspace process 260 * @stub_stack: pointer to the stub stack. 261 * 262 * Setups a new temporary stack page that is used while userspace_tramp() runs 263 * Clones the kernel process into a new userspace process, with FDs only. 264 * 265 * Return: When positive: the process id of the new userspace process, 266 * when negative: an error number. 267 * FIXME: can PIDs become negative?! 268 */ 269 int start_userspace(unsigned long stub_stack) 270 { 271 void *stack; 272 unsigned long sp; 273 int pid, status, n, flags, err; 274 275 /* setup a temporary stack page */ 276 stack = mmap(NULL, UM_KERN_PAGE_SIZE, 277 PROT_READ | PROT_WRITE | PROT_EXEC, 278 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 279 if (stack == MAP_FAILED) { 280 err = -errno; 281 printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n", 282 __func__, errno); 283 return err; 284 } 285 286 /* set stack pointer to the end of the stack page, so it can grow downwards */ 287 sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; 288 289 flags = CLONE_FILES | SIGCHLD; 290 291 /* clone into new userspace process */ 292 pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); 293 if (pid < 0) { 294 err = -errno; 295 printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", 296 __func__, errno); 297 return err; 298 } 299 300 do { 301 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); 302 if (n < 0) { 303 err = -errno; 304 printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", 305 __func__, errno); 306 goto out_kill; 307 } 308 } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 309 310 if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 311 err = -EINVAL; 312 printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", 313 __func__, status); 314 goto out_kill; 315 } 316 317 if (ptrace(PTRACE_SETOPTIONS, pid, NULL, 318 (void *) PTRACE_O_TRACESYSGOOD) < 0) { 319 err = -errno; 320 printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", 321 __func__, errno); 322 goto out_kill; 323 } 324 325 if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { 326 err = -errno; 327 printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n", 328 __func__, errno); 329 goto out_kill; 330 } 331 332 return pid; 333 334 out_kill: 335 os_kill_ptraced_process(pid, 1); 336 return err; 337 } 338 339 void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) 340 { 341 int err, status, op, pid = userspace_pid[0]; 342 siginfo_t si; 343 344 /* Handle any immediate reschedules or signals */ 345 interrupt_end(); 346 347 while (1) { 348 time_travel_print_bc_msg(); 349 350 current_mm_sync(); 351 352 /* Flush out any pending syscalls */ 353 err = syscall_stub_flush(current_mm_id()); 354 if (err) { 355 if (err == -ENOMEM) 356 report_enomem(); 357 358 printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", 359 __func__, -err); 360 fatal_sigsegv(); 361 } 362 363 /* 364 * This can legitimately fail if the process loads a 365 * bogus value into a segment register. It will 366 * segfault and PTRACE_GETREGS will read that value 367 * out of the process. However, PTRACE_SETREGS will 368 * fail. In this case, there is nothing to do but 369 * just kill the process. 370 */ 371 if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { 372 printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", 373 __func__, errno); 374 fatal_sigsegv(); 375 } 376 377 if (put_fp_registers(pid, regs->fp)) { 378 printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", 379 __func__, errno); 380 fatal_sigsegv(); 381 } 382 383 if (singlestepping()) 384 op = PTRACE_SYSEMU_SINGLESTEP; 385 else 386 op = PTRACE_SYSEMU; 387 388 if (ptrace(op, pid, 0, 0)) { 389 printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", 390 __func__, op, errno); 391 fatal_sigsegv(); 392 } 393 394 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); 395 if (err < 0) { 396 printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", 397 __func__, errno); 398 fatal_sigsegv(); 399 } 400 401 regs->is_user = 1; 402 if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { 403 printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", 404 __func__, errno); 405 fatal_sigsegv(); 406 } 407 408 if (get_fp_registers(pid, regs->fp)) { 409 printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", 410 __func__, errno); 411 fatal_sigsegv(); 412 } 413 414 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 415 416 if (WIFSTOPPED(status)) { 417 int sig = WSTOPSIG(status); 418 419 /* These signal handlers need the si argument. 420 * The SIGIO and SIGALARM handlers which constitute the 421 * majority of invocations, do not use it. 422 */ 423 switch (sig) { 424 case SIGSEGV: 425 case SIGTRAP: 426 case SIGILL: 427 case SIGBUS: 428 case SIGFPE: 429 case SIGWINCH: 430 ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); 431 break; 432 } 433 434 switch (sig) { 435 case SIGSEGV: 436 if (PTRACE_FULL_FAULTINFO) { 437 get_skas_faultinfo(pid, 438 ®s->faultinfo, aux_fp_regs); 439 (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, 440 regs); 441 } 442 else handle_segv(pid, regs, aux_fp_regs); 443 break; 444 case SIGTRAP + 0x80: 445 handle_trap(pid, regs); 446 break; 447 case SIGTRAP: 448 relay_signal(SIGTRAP, (struct siginfo *)&si, regs); 449 break; 450 case SIGALRM: 451 break; 452 case SIGIO: 453 case SIGILL: 454 case SIGBUS: 455 case SIGFPE: 456 case SIGWINCH: 457 block_signals_trace(); 458 (*sig_info[sig])(sig, (struct siginfo *)&si, regs); 459 unblock_signals_trace(); 460 break; 461 default: 462 printk(UM_KERN_ERR "%s - child stopped with signal %d\n", 463 __func__, sig); 464 fatal_sigsegv(); 465 } 466 pid = userspace_pid[0]; 467 interrupt_end(); 468 469 /* Avoid -ERESTARTSYS handling in host */ 470 if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) 471 PT_SYSCALL_NR(regs->gp) = -1; 472 } 473 } 474 } 475 476 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) 477 { 478 (*buf)[0].JB_IP = (unsigned long) handler; 479 (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - 480 sizeof(void *); 481 } 482 483 #define INIT_JMP_NEW_THREAD 0 484 #define INIT_JMP_CALLBACK 1 485 #define INIT_JMP_HALT 2 486 #define INIT_JMP_REBOOT 3 487 488 void switch_threads(jmp_buf *me, jmp_buf *you) 489 { 490 if (UML_SETJMP(me) == 0) 491 UML_LONGJMP(you, 1); 492 } 493 494 static jmp_buf initial_jmpbuf; 495 496 /* XXX Make these percpu */ 497 static void (*cb_proc)(void *arg); 498 static void *cb_arg; 499 static jmp_buf *cb_back; 500 501 int start_idle_thread(void *stack, jmp_buf *switch_buf) 502 { 503 int n; 504 505 set_handler(SIGWINCH); 506 507 /* 508 * Can't use UML_SETJMP or UML_LONGJMP here because they save 509 * and restore signals, with the possible side-effect of 510 * trying to handle any signals which came when they were 511 * blocked, which can't be done on this stack. 512 * Signals must be blocked when jumping back here and restored 513 * after returning to the jumper. 514 */ 515 n = setjmp(initial_jmpbuf); 516 switch (n) { 517 case INIT_JMP_NEW_THREAD: 518 (*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup; 519 (*switch_buf)[0].JB_SP = (unsigned long) stack + 520 UM_THREAD_SIZE - sizeof(void *); 521 break; 522 case INIT_JMP_CALLBACK: 523 (*cb_proc)(cb_arg); 524 longjmp(*cb_back, 1); 525 break; 526 case INIT_JMP_HALT: 527 kmalloc_ok = 0; 528 return 0; 529 case INIT_JMP_REBOOT: 530 kmalloc_ok = 0; 531 return 1; 532 default: 533 printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n", 534 __func__, n); 535 fatal_sigsegv(); 536 } 537 longjmp(*switch_buf, 1); 538 539 /* unreachable */ 540 printk(UM_KERN_ERR "impossible long jump!"); 541 fatal_sigsegv(); 542 return 0; 543 } 544 545 void initial_thread_cb_skas(void (*proc)(void *), void *arg) 546 { 547 jmp_buf here; 548 549 cb_proc = proc; 550 cb_arg = arg; 551 cb_back = &here; 552 553 block_signals_trace(); 554 if (UML_SETJMP(&here) == 0) 555 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); 556 unblock_signals_trace(); 557 558 cb_proc = NULL; 559 cb_arg = NULL; 560 cb_back = NULL; 561 } 562 563 void halt_skas(void) 564 { 565 block_signals_trace(); 566 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); 567 } 568 569 static bool noreboot; 570 571 static int __init noreboot_cmd_param(char *str, int *add) 572 { 573 noreboot = true; 574 return 0; 575 } 576 577 __uml_setup("noreboot", noreboot_cmd_param, 578 "noreboot\n" 579 " Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n" 580 " This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n" 581 " crashes in CI\n"); 582 583 void reboot_skas(void) 584 { 585 block_signals_trace(); 586 UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); 587 } 588 589 void __switch_mm(struct mm_id *mm_idp) 590 { 591 userspace_pid[0] = mm_idp->pid; 592 } 593