1 /* 2 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 3 * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 * Licensed under the GPL 5 */ 6 7 #include <stdlib.h> 8 #include <unistd.h> 9 #include <sched.h> 10 #include <errno.h> 11 #include <string.h> 12 #include <sys/mman.h> 13 #include <sys/wait.h> 14 #include <asm/unistd.h> 15 #include <as-layout.h> 16 #include <init.h> 17 #include <kern_util.h> 18 #include <mem.h> 19 #include <os.h> 20 #include <ptrace_user.h> 21 #include <registers.h> 22 #include <skas.h> 23 #include <sysdep/stub.h> 24 #include <linux/threads.h> 25 26 int is_skas_winch(int pid, int fd, void *data) 27 { 28 return pid == getpgrp(); 29 } 30 31 static int ptrace_dump_regs(int pid) 32 { 33 unsigned long regs[MAX_REG_NR]; 34 int i; 35 36 if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) 37 return -errno; 38 39 printk(UM_KERN_ERR "Stub registers -\n"); 40 for (i = 0; i < ARRAY_SIZE(regs); i++) 41 printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]); 42 43 return 0; 44 } 45 46 /* 47 * Signals that are OK to receive in the stub - we'll just continue it. 48 * SIGWINCH will happen when UML is inside a detached screen. 49 */ 50 #define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) 51 52 /* Signals that the stub will finish with - anything else is an error */ 53 #define STUB_DONE_MASK (1 << SIGTRAP) 54 55 void wait_stub_done(int pid) 56 { 57 int n, status, err; 58 59 while (1) { 60 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); 61 if ((n < 0) || !WIFSTOPPED(status)) 62 goto bad_wait; 63 64 if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) 65 break; 66 67 err = ptrace(PTRACE_CONT, pid, 0, 0); 68 if (err) { 69 printk(UM_KERN_ERR "wait_stub_done : continue failed, " 70 "errno = %d\n", errno); 71 fatal_sigsegv(); 72 } 73 } 74 75 if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) 76 return; 77 78 bad_wait: 79 err = ptrace_dump_regs(pid); 80 if (err) 81 printk(UM_KERN_ERR "Failed to get registers from stub, " 82 "errno = %d\n", -err); 83 printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " 84 "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, 85 status); 86 fatal_sigsegv(); 87 } 88 89 extern unsigned long current_stub_stack(void); 90 91 static void get_skas_faultinfo(int pid, struct faultinfo *fi) 92 { 93 int err; 94 unsigned long fpregs[FP_SIZE]; 95 96 err = get_fp_registers(pid, fpregs); 97 if (err < 0) { 98 printk(UM_KERN_ERR "save_fp_registers returned %d\n", 99 err); 100 fatal_sigsegv(); 101 } 102 err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); 103 if (err) { 104 printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " 105 "errno = %d\n", pid, errno); 106 fatal_sigsegv(); 107 } 108 wait_stub_done(pid); 109 110 /* 111 * faultinfo is prepared by the stub-segv-handler at start of 112 * the stub stack page. We just have to copy it. 113 */ 114 memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); 115 116 err = put_fp_registers(pid, fpregs); 117 if (err < 0) { 118 printk(UM_KERN_ERR "put_fp_registers returned %d\n", 119 err); 120 fatal_sigsegv(); 121 } 122 } 123 124 static void handle_segv(int pid, struct uml_pt_regs * regs) 125 { 126 get_skas_faultinfo(pid, ®s->faultinfo); 127 segv(regs->faultinfo, 0, 1, NULL); 128 } 129 130 /* 131 * To use the same value of using_sysemu as the caller, ask it that value 132 * (in local_using_sysemu 133 */ 134 static void handle_trap(int pid, struct uml_pt_regs *regs, 135 int local_using_sysemu) 136 { 137 int err, status; 138 139 if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) 140 fatal_sigsegv(); 141 142 if (!local_using_sysemu) 143 { 144 err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, 145 __NR_getpid); 146 if (err < 0) { 147 printk(UM_KERN_ERR "handle_trap - nullifying syscall " 148 "failed, errno = %d\n", errno); 149 fatal_sigsegv(); 150 } 151 152 err = ptrace(PTRACE_SYSCALL, pid, 0, 0); 153 if (err < 0) { 154 printk(UM_KERN_ERR "handle_trap - continuing to end of " 155 "syscall failed, errno = %d\n", errno); 156 fatal_sigsegv(); 157 } 158 159 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); 160 if ((err < 0) || !WIFSTOPPED(status) || 161 (WSTOPSIG(status) != SIGTRAP + 0x80)) { 162 err = ptrace_dump_regs(pid); 163 if (err) 164 printk(UM_KERN_ERR "Failed to get registers " 165 "from process, errno = %d\n", -err); 166 printk(UM_KERN_ERR "handle_trap - failed to wait at " 167 "end of syscall, errno = %d, status = %d\n", 168 errno, status); 169 fatal_sigsegv(); 170 } 171 } 172 173 handle_syscall(regs); 174 } 175 176 extern char __syscall_stub_start[]; 177 178 static int userspace_tramp(void *stack) 179 { 180 void *addr; 181 int fd; 182 unsigned long long offset; 183 184 ptrace(PTRACE_TRACEME, 0, 0, 0); 185 186 signal(SIGTERM, SIG_DFL); 187 signal(SIGWINCH, SIG_IGN); 188 189 /* 190 * This has a pte, but it can't be mapped in with the usual 191 * tlb_flush mechanism because this is part of that mechanism 192 */ 193 fd = phys_mapping(to_phys(__syscall_stub_start), &offset); 194 addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, 195 PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); 196 if (addr == MAP_FAILED) { 197 printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, " 198 "errno = %d\n", STUB_CODE, errno); 199 exit(1); 200 } 201 202 if (stack != NULL) { 203 fd = phys_mapping(to_phys(stack), &offset); 204 addr = mmap((void *) STUB_DATA, 205 UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, 206 MAP_FIXED | MAP_SHARED, fd, offset); 207 if (addr == MAP_FAILED) { 208 printk(UM_KERN_ERR "mapping segfault stack " 209 "at 0x%lx failed, errno = %d\n", 210 STUB_DATA, errno); 211 exit(1); 212 } 213 } 214 if (stack != NULL) { 215 struct sigaction sa; 216 217 unsigned long v = STUB_CODE + 218 (unsigned long) stub_segv_handler - 219 (unsigned long) __syscall_stub_start; 220 221 set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); 222 sigemptyset(&sa.sa_mask); 223 sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; 224 sa.sa_sigaction = (void *) v; 225 sa.sa_restorer = NULL; 226 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 227 printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV " 228 "handler failed - errno = %d\n", errno); 229 exit(1); 230 } 231 } 232 233 kill(os_getpid(), SIGSTOP); 234 return 0; 235 } 236 237 int userspace_pid[NR_CPUS]; 238 239 int start_userspace(unsigned long stub_stack) 240 { 241 void *stack; 242 unsigned long sp; 243 int pid, status, n, flags, err; 244 245 stack = mmap(NULL, UM_KERN_PAGE_SIZE, 246 PROT_READ | PROT_WRITE | PROT_EXEC, 247 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 248 if (stack == MAP_FAILED) { 249 err = -errno; 250 printk(UM_KERN_ERR "start_userspace : mmap failed, " 251 "errno = %d\n", errno); 252 return err; 253 } 254 255 sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); 256 257 flags = CLONE_FILES | SIGCHLD; 258 259 pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); 260 if (pid < 0) { 261 err = -errno; 262 printk(UM_KERN_ERR "start_userspace : clone failed, " 263 "errno = %d\n", errno); 264 return err; 265 } 266 267 do { 268 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); 269 if (n < 0) { 270 err = -errno; 271 printk(UM_KERN_ERR "start_userspace : wait failed, " 272 "errno = %d\n", errno); 273 goto out_kill; 274 } 275 } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 276 277 if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 278 err = -EINVAL; 279 printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got " 280 "status = %d\n", status); 281 goto out_kill; 282 } 283 284 if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, 285 (void *) PTRACE_O_TRACESYSGOOD) < 0) { 286 err = -errno; 287 printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS " 288 "failed, errno = %d\n", errno); 289 goto out_kill; 290 } 291 292 if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { 293 err = -errno; 294 printk(UM_KERN_ERR "start_userspace : munmap failed, " 295 "errno = %d\n", errno); 296 goto out_kill; 297 } 298 299 return pid; 300 301 out_kill: 302 os_kill_ptraced_process(pid, 1); 303 return err; 304 } 305 306 void userspace(struct uml_pt_regs *regs) 307 { 308 int err, status, op, pid = userspace_pid[0]; 309 /* To prevent races if using_sysemu changes under us.*/ 310 int local_using_sysemu; 311 siginfo_t si; 312 313 /* Handle any immediate reschedules or signals */ 314 interrupt_end(); 315 316 while (1) { 317 318 /* 319 * This can legitimately fail if the process loads a 320 * bogus value into a segment register. It will 321 * segfault and PTRACE_GETREGS will read that value 322 * out of the process. However, PTRACE_SETREGS will 323 * fail. In this case, there is nothing to do but 324 * just kill the process. 325 */ 326 if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) 327 fatal_sigsegv(); 328 329 if (put_fp_registers(pid, regs->fp)) 330 fatal_sigsegv(); 331 332 /* Now we set local_using_sysemu to be used for one loop */ 333 local_using_sysemu = get_using_sysemu(); 334 335 op = SELECT_PTRACE_OPERATION(local_using_sysemu, 336 singlestepping(NULL)); 337 338 if (ptrace(op, pid, 0, 0)) { 339 printk(UM_KERN_ERR "userspace - ptrace continue " 340 "failed, op = %d, errno = %d\n", op, errno); 341 fatal_sigsegv(); 342 } 343 344 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); 345 if (err < 0) { 346 printk(UM_KERN_ERR "userspace - wait failed, " 347 "errno = %d\n", errno); 348 fatal_sigsegv(); 349 } 350 351 regs->is_user = 1; 352 if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { 353 printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, " 354 "errno = %d\n", errno); 355 fatal_sigsegv(); 356 } 357 358 if (get_fp_registers(pid, regs->fp)) { 359 printk(UM_KERN_ERR "userspace - get_fp_registers failed, " 360 "errno = %d\n", errno); 361 fatal_sigsegv(); 362 } 363 364 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 365 366 if (WIFSTOPPED(status)) { 367 int sig = WSTOPSIG(status); 368 369 ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); 370 371 switch (sig) { 372 case SIGSEGV: 373 if (PTRACE_FULL_FAULTINFO) { 374 get_skas_faultinfo(pid, 375 ®s->faultinfo); 376 (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, 377 regs); 378 } 379 else handle_segv(pid, regs); 380 break; 381 case SIGTRAP + 0x80: 382 handle_trap(pid, regs, local_using_sysemu); 383 break; 384 case SIGTRAP: 385 relay_signal(SIGTRAP, (struct siginfo *)&si, regs); 386 break; 387 case SIGALRM: 388 break; 389 case SIGIO: 390 case SIGILL: 391 case SIGBUS: 392 case SIGFPE: 393 case SIGWINCH: 394 block_signals(); 395 (*sig_info[sig])(sig, (struct siginfo *)&si, regs); 396 unblock_signals(); 397 break; 398 default: 399 printk(UM_KERN_ERR "userspace - child stopped " 400 "with signal %d\n", sig); 401 fatal_sigsegv(); 402 } 403 pid = userspace_pid[0]; 404 interrupt_end(); 405 406 /* Avoid -ERESTARTSYS handling in host */ 407 if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) 408 PT_SYSCALL_NR(regs->gp) = -1; 409 } 410 } 411 } 412 413 static unsigned long thread_regs[MAX_REG_NR]; 414 static unsigned long thread_fp_regs[FP_SIZE]; 415 416 static int __init init_thread_regs(void) 417 { 418 get_safe_registers(thread_regs, thread_fp_regs); 419 /* Set parent's instruction pointer to start of clone-stub */ 420 thread_regs[REGS_IP_INDEX] = STUB_CODE + 421 (unsigned long) stub_clone_handler - 422 (unsigned long) __syscall_stub_start; 423 thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - 424 sizeof(void *); 425 #ifdef __SIGNAL_FRAMESIZE 426 thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; 427 #endif 428 return 0; 429 } 430 431 __initcall(init_thread_regs); 432 433 int copy_context_skas0(unsigned long new_stack, int pid) 434 { 435 int err; 436 unsigned long current_stack = current_stub_stack(); 437 struct stub_data *data = (struct stub_data *) current_stack; 438 struct stub_data *child_data = (struct stub_data *) new_stack; 439 unsigned long long new_offset; 440 int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); 441 442 /* 443 * prepare offset and fd of child's stack as argument for parent's 444 * and child's mmap2 calls 445 */ 446 *data = ((struct stub_data) { 447 .offset = MMAP_OFFSET(new_offset), 448 .fd = new_fd 449 }); 450 451 err = ptrace_setregs(pid, thread_regs); 452 if (err < 0) { 453 err = -errno; 454 printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS " 455 "failed, pid = %d, errno = %d\n", pid, -err); 456 return err; 457 } 458 459 err = put_fp_registers(pid, thread_fp_regs); 460 if (err < 0) { 461 printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers " 462 "failed, pid = %d, err = %d\n", pid, err); 463 return err; 464 } 465 466 /* set a well known return code for detection of child write failure */ 467 child_data->err = 12345678; 468 469 /* 470 * Wait, until parent has finished its work: read child's pid from 471 * parent's stack, and check, if bad result. 472 */ 473 err = ptrace(PTRACE_CONT, pid, 0, 0); 474 if (err) { 475 err = -errno; 476 printk(UM_KERN_ERR "Failed to continue new process, pid = %d, " 477 "errno = %d\n", pid, errno); 478 return err; 479 } 480 481 wait_stub_done(pid); 482 483 pid = data->err; 484 if (pid < 0) { 485 printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " 486 "error %d\n", -pid); 487 return pid; 488 } 489 490 /* 491 * Wait, until child has finished too: read child's result from 492 * child's stack and check it. 493 */ 494 wait_stub_done(pid); 495 if (child_data->err != STUB_DATA) { 496 printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports " 497 "error %ld\n", child_data->err); 498 err = child_data->err; 499 goto out_kill; 500 } 501 502 if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, 503 (void *)PTRACE_O_TRACESYSGOOD) < 0) { 504 err = -errno; 505 printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS " 506 "failed, errno = %d\n", errno); 507 goto out_kill; 508 } 509 510 return pid; 511 512 out_kill: 513 os_kill_ptraced_process(pid, 1); 514 return err; 515 } 516 517 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) 518 { 519 (*buf)[0].JB_IP = (unsigned long) handler; 520 (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - 521 sizeof(void *); 522 } 523 524 #define INIT_JMP_NEW_THREAD 0 525 #define INIT_JMP_CALLBACK 1 526 #define INIT_JMP_HALT 2 527 #define INIT_JMP_REBOOT 3 528 529 void switch_threads(jmp_buf *me, jmp_buf *you) 530 { 531 if (UML_SETJMP(me) == 0) 532 UML_LONGJMP(you, 1); 533 } 534 535 static jmp_buf initial_jmpbuf; 536 537 /* XXX Make these percpu */ 538 static void (*cb_proc)(void *arg); 539 static void *cb_arg; 540 static jmp_buf *cb_back; 541 542 int start_idle_thread(void *stack, jmp_buf *switch_buf) 543 { 544 int n; 545 546 set_handler(SIGWINCH); 547 548 /* 549 * Can't use UML_SETJMP or UML_LONGJMP here because they save 550 * and restore signals, with the possible side-effect of 551 * trying to handle any signals which came when they were 552 * blocked, which can't be done on this stack. 553 * Signals must be blocked when jumping back here and restored 554 * after returning to the jumper. 555 */ 556 n = setjmp(initial_jmpbuf); 557 switch (n) { 558 case INIT_JMP_NEW_THREAD: 559 (*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup; 560 (*switch_buf)[0].JB_SP = (unsigned long) stack + 561 UM_THREAD_SIZE - sizeof(void *); 562 break; 563 case INIT_JMP_CALLBACK: 564 (*cb_proc)(cb_arg); 565 longjmp(*cb_back, 1); 566 break; 567 case INIT_JMP_HALT: 568 kmalloc_ok = 0; 569 return 0; 570 case INIT_JMP_REBOOT: 571 kmalloc_ok = 0; 572 return 1; 573 default: 574 printk(UM_KERN_ERR "Bad sigsetjmp return in " 575 "start_idle_thread - %d\n", n); 576 fatal_sigsegv(); 577 } 578 longjmp(*switch_buf, 1); 579 } 580 581 void initial_thread_cb_skas(void (*proc)(void *), void *arg) 582 { 583 jmp_buf here; 584 585 cb_proc = proc; 586 cb_arg = arg; 587 cb_back = &here; 588 589 block_signals(); 590 if (UML_SETJMP(&here) == 0) 591 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); 592 unblock_signals(); 593 594 cb_proc = NULL; 595 cb_arg = NULL; 596 cb_back = NULL; 597 } 598 599 void halt_skas(void) 600 { 601 block_signals(); 602 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); 603 } 604 605 void reboot_skas(void) 606 { 607 block_signals(); 608 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); 609 } 610 611 void __switch_mm(struct mm_id *mm_idp) 612 { 613 userspace_pid[0] = mm_idp->u.pid; 614 } 615