1 /* 2 * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) 3 * Licensed under the GPL 4 */ 5 6 #include <stdlib.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <errno.h> 10 #include <signal.h> 11 #include <sched.h> 12 #include "ptrace_user.h" 13 #include <sys/wait.h> 14 #include <sys/mman.h> 15 #include <sys/user.h> 16 #include <sys/time.h> 17 #include <sys/syscall.h> 18 #include <asm/types.h> 19 #include "user.h" 20 #include "sysdep/ptrace.h" 21 #include "kern_util.h" 22 #include "skas.h" 23 #include "stub-data.h" 24 #include "mm_id.h" 25 #include "sysdep/sigcontext.h" 26 #include "sysdep/stub.h" 27 #include "os.h" 28 #include "proc_mm.h" 29 #include "skas_ptrace.h" 30 #include "chan_user.h" 31 #include "registers.h" 32 #include "mem.h" 33 #include "uml-config.h" 34 #include "process.h" 35 #include "longjmp.h" 36 #include "kern_constants.h" 37 #include "as-layout.h" 38 39 int is_skas_winch(int pid, int fd, void *data) 40 { 41 if(pid != os_getpgrp()) 42 return(0); 43 44 register_winch_irq(-1, fd, -1, data); 45 return(1); 46 } 47 48 static int ptrace_dump_regs(int pid) 49 { 50 unsigned long regs[MAX_REG_NR]; 51 int i; 52 53 if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) 54 return -errno; 55 else { 56 printk("Stub registers -\n"); 57 for(i = 0; i < ARRAY_SIZE(regs); i++) 58 printk("\t%d - %lx\n", i, regs[i]); 59 } 60 61 return 0; 62 } 63 64 /* 65 * Signals that are OK to receive in the stub - we'll just continue it. 66 * SIGWINCH will happen when UML is inside a detached screen. 67 */ 68 #define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) 69 70 /* Signals that the stub will finish with - anything else is an error */ 71 #define STUB_DONE_MASK ((1 << SIGUSR1) | (1 << SIGTRAP)) 72 73 void wait_stub_done(int pid) 74 { 75 int n, status, err; 76 77 while(1){ 78 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); 79 if((n < 0) || !WIFSTOPPED(status)) 80 goto bad_wait; 81 82 if(((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) 83 break; 84 85 err = ptrace(PTRACE_CONT, pid, 0, 0); 86 if(err) 87 panic("wait_stub_done : continue failed, errno = %d\n", 88 errno); 89 } 90 91 if(((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) 92 return; 93 94 bad_wait: 95 err = ptrace_dump_regs(pid); 96 if(err) 97 printk("Failed to get registers from stub, errno = %d\n", -err); 98 panic("wait_stub_done : failed to wait for SIGUSR1/SIGTRAP, pid = %d, " 99 "n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status); 100 } 101 102 extern unsigned long current_stub_stack(void); 103 104 void get_skas_faultinfo(int pid, struct faultinfo * fi) 105 { 106 int err; 107 108 if(ptrace_faultinfo){ 109 err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); 110 if(err) 111 panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " 112 "errno = %d\n", errno); 113 114 /* Special handling for i386, which has different structs */ 115 if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) 116 memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, 117 sizeof(struct faultinfo) - 118 sizeof(struct ptrace_faultinfo)); 119 } 120 else { 121 err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); 122 if(err) 123 panic("Failed to continue stub, pid = %d, errno = %d\n", 124 pid, errno); 125 wait_stub_done(pid); 126 127 /* faultinfo is prepared by the stub-segv-handler at start of 128 * the stub stack page. We just have to copy it. 129 */ 130 memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); 131 } 132 } 133 134 static void handle_segv(int pid, union uml_pt_regs * regs) 135 { 136 get_skas_faultinfo(pid, ®s->skas.faultinfo); 137 segv(regs->skas.faultinfo, 0, 1, NULL); 138 } 139 140 /*To use the same value of using_sysemu as the caller, ask it that value (in local_using_sysemu)*/ 141 static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu) 142 { 143 int err, status; 144 145 /* Mark this as a syscall */ 146 UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->skas.regs); 147 148 if (!local_using_sysemu) 149 { 150 err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, 151 __NR_getpid); 152 if(err < 0) 153 panic("handle_trap - nullifying syscall failed errno = %d\n", 154 errno); 155 156 err = ptrace(PTRACE_SYSCALL, pid, 0, 0); 157 if(err < 0) 158 panic("handle_trap - continuing to end of syscall failed, " 159 "errno = %d\n", errno); 160 161 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); 162 if((err < 0) || !WIFSTOPPED(status) || 163 (WSTOPSIG(status) != SIGTRAP + 0x80)){ 164 err = ptrace_dump_regs(pid); 165 if(err) 166 printk("Failed to get registers from process, " 167 "errno = %d\n", -err); 168 panic("handle_trap - failed to wait at end of syscall, " 169 "errno = %d, status = %d\n", errno, status); 170 } 171 } 172 173 handle_syscall(regs); 174 } 175 176 extern int __syscall_stub_start; 177 178 static int userspace_tramp(void *stack) 179 { 180 void *addr; 181 int err; 182 183 ptrace(PTRACE_TRACEME, 0, 0, 0); 184 185 init_new_thread_signals(); 186 err = set_interval(1); 187 if(err) 188 panic("userspace_tramp - setting timer failed, errno = %d\n", 189 err); 190 191 if(!proc_mm){ 192 /* This has a pte, but it can't be mapped in with the usual 193 * tlb_flush mechanism because this is part of that mechanism 194 */ 195 int fd; 196 __u64 offset; 197 fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); 198 addr = mmap64((void *) UML_CONFIG_STUB_CODE, UM_KERN_PAGE_SIZE, 199 PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); 200 if(addr == MAP_FAILED){ 201 printk("mapping mmap stub failed, errno = %d\n", 202 errno); 203 exit(1); 204 } 205 206 if(stack != NULL){ 207 fd = phys_mapping(to_phys(stack), &offset); 208 addr = mmap((void *) UML_CONFIG_STUB_DATA, 209 UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, 210 MAP_FIXED | MAP_SHARED, fd, offset); 211 if(addr == MAP_FAILED){ 212 printk("mapping segfault stack failed, " 213 "errno = %d\n", errno); 214 exit(1); 215 } 216 } 217 } 218 if(!ptrace_faultinfo && (stack != NULL)){ 219 struct sigaction sa; 220 221 unsigned long v = UML_CONFIG_STUB_CODE + 222 (unsigned long) stub_segv_handler - 223 (unsigned long) &__syscall_stub_start; 224 225 set_sigstack((void *) UML_CONFIG_STUB_DATA, UM_KERN_PAGE_SIZE); 226 sigemptyset(&sa.sa_mask); 227 sigaddset(&sa.sa_mask, SIGIO); 228 sigaddset(&sa.sa_mask, SIGWINCH); 229 sigaddset(&sa.sa_mask, SIGALRM); 230 sigaddset(&sa.sa_mask, SIGVTALRM); 231 sigaddset(&sa.sa_mask, SIGUSR1); 232 sa.sa_flags = SA_ONSTACK; 233 sa.sa_handler = (void *) v; 234 sa.sa_restorer = NULL; 235 if(sigaction(SIGSEGV, &sa, NULL) < 0) 236 panic("userspace_tramp - setting SIGSEGV handler " 237 "failed - errno = %d\n", errno); 238 } 239 240 os_stop_process(os_getpid()); 241 return(0); 242 } 243 244 /* Each element set once, and only accessed by a single processor anyway */ 245 #undef NR_CPUS 246 #define NR_CPUS 1 247 int userspace_pid[NR_CPUS]; 248 249 int start_userspace(unsigned long stub_stack) 250 { 251 void *stack; 252 unsigned long sp; 253 int pid, status, n, flags; 254 255 stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, 256 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 257 if(stack == MAP_FAILED) 258 panic("start_userspace : mmap failed, errno = %d", errno); 259 sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); 260 261 flags = CLONE_FILES | SIGCHLD; 262 if(proc_mm) flags |= CLONE_VM; 263 pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); 264 if(pid < 0) 265 panic("start_userspace : clone failed, errno = %d", errno); 266 267 do { 268 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); 269 if(n < 0) 270 panic("start_userspace : wait failed, errno = %d", 271 errno); 272 } while(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); 273 274 if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) 275 panic("start_userspace : expected SIGSTOP, got status = %d", 276 status); 277 278 if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, (void *)PTRACE_O_TRACESYSGOOD) < 0) 279 panic("start_userspace : PTRACE_OLDSETOPTIONS failed, errno=%d\n", 280 errno); 281 282 if(munmap(stack, PAGE_SIZE) < 0) 283 panic("start_userspace : munmap failed, errno = %d\n", errno); 284 285 return(pid); 286 } 287 288 void userspace(union uml_pt_regs *regs) 289 { 290 int err, status, op, pid = userspace_pid[0]; 291 /* To prevent races if using_sysemu changes under us.*/ 292 int local_using_sysemu; 293 294 while(1){ 295 restore_registers(pid, regs); 296 297 /* Now we set local_using_sysemu to be used for one loop */ 298 local_using_sysemu = get_using_sysemu(); 299 300 op = SELECT_PTRACE_OPERATION(local_using_sysemu, 301 singlestepping(NULL)); 302 303 err = ptrace(op, pid, 0, 0); 304 if(err) 305 panic("userspace - could not resume userspace process, " 306 "pid=%d, ptrace operation = %d, errno = %d\n", 307 pid, op, errno); 308 309 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); 310 if(err < 0) 311 panic("userspace - waitpid failed, errno = %d\n", 312 errno); 313 314 regs->skas.is_user = 1; 315 save_registers(pid, regs); 316 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 317 318 if(WIFSTOPPED(status)){ 319 int sig = WSTOPSIG(status); 320 switch(sig){ 321 case SIGSEGV: 322 if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo){ 323 get_skas_faultinfo(pid, ®s->skas.faultinfo); 324 (*sig_info[SIGSEGV])(SIGSEGV, regs); 325 } 326 else handle_segv(pid, regs); 327 break; 328 case SIGTRAP + 0x80: 329 handle_trap(pid, regs, local_using_sysemu); 330 break; 331 case SIGTRAP: 332 relay_signal(SIGTRAP, regs); 333 break; 334 case SIGIO: 335 case SIGVTALRM: 336 case SIGILL: 337 case SIGBUS: 338 case SIGFPE: 339 case SIGWINCH: 340 block_signals(); 341 (*sig_info[sig])(sig, regs); 342 unblock_signals(); 343 break; 344 default: 345 printk("userspace - child stopped with signal " 346 "%d\n", sig); 347 } 348 pid = userspace_pid[0]; 349 interrupt_end(); 350 351 /* Avoid -ERESTARTSYS handling in host */ 352 if(PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) 353 PT_SYSCALL_NR(regs->skas.regs) = -1; 354 } 355 } 356 } 357 358 static unsigned long thread_regs[MAX_REG_NR]; 359 static unsigned long thread_fp_regs[HOST_FP_SIZE]; 360 361 static int __init init_thread_regs(void) 362 { 363 get_safe_registers(thread_regs, thread_fp_regs); 364 /* Set parent's instruction pointer to start of clone-stub */ 365 thread_regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + 366 (unsigned long) stub_clone_handler - 367 (unsigned long) &__syscall_stub_start; 368 thread_regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + PAGE_SIZE - 369 sizeof(void *); 370 #ifdef __SIGNAL_FRAMESIZE 371 thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; 372 #endif 373 return 0; 374 } 375 376 __initcall(init_thread_regs); 377 378 int copy_context_skas0(unsigned long new_stack, int pid) 379 { 380 int err; 381 unsigned long current_stack = current_stub_stack(); 382 struct stub_data *data = (struct stub_data *) current_stack; 383 struct stub_data *child_data = (struct stub_data *) new_stack; 384 __u64 new_offset; 385 int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); 386 387 /* prepare offset and fd of child's stack as argument for parent's 388 * and child's mmap2 calls 389 */ 390 *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), 391 .fd = new_fd, 392 .timer = ((struct itimerval) 393 { { 0, 1000000 / hz() }, 394 { 0, 1000000 / hz() }})}); 395 err = ptrace_setregs(pid, thread_regs); 396 if(err < 0) 397 panic("copy_context_skas0 : PTRACE_SETREGS failed, " 398 "pid = %d, errno = %d\n", pid, -err); 399 400 err = ptrace_setfpregs(pid, thread_fp_regs); 401 if(err < 0) 402 panic("copy_context_skas0 : PTRACE_SETFPREGS failed, " 403 "pid = %d, errno = %d\n", pid, -err); 404 405 /* set a well known return code for detection of child write failure */ 406 child_data->err = 12345678; 407 408 /* Wait, until parent has finished its work: read child's pid from 409 * parent's stack, and check, if bad result. 410 */ 411 err = ptrace(PTRACE_CONT, pid, 0, 0); 412 if(err) 413 panic("Failed to continue new process, pid = %d, " 414 "errno = %d\n", pid, errno); 415 wait_stub_done(pid); 416 417 pid = data->err; 418 if(pid < 0) 419 panic("copy_context_skas0 - stub-parent reports error %d\n", 420 -pid); 421 422 /* Wait, until child has finished too: read child's result from 423 * child's stack and check it. 424 */ 425 wait_stub_done(pid); 426 if (child_data->err != UML_CONFIG_STUB_DATA) 427 panic("copy_context_skas0 - stub-child reports error %ld\n", 428 child_data->err); 429 430 if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, 431 (void *)PTRACE_O_TRACESYSGOOD) < 0) 432 panic("copy_context_skas0 : PTRACE_OLDSETOPTIONS failed, " 433 "errno = %d\n", errno); 434 435 return pid; 436 } 437 438 /* 439 * This is used only, if stub pages are needed, while proc_mm is 440 * available. Opening /proc/mm creates a new mm_context, which lacks 441 * the stub-pages. Thus, we map them using /proc/mm-fd 442 */ 443 void map_stub_pages(int fd, unsigned long code, 444 unsigned long data, unsigned long stack) 445 { 446 struct proc_mm_op mmop; 447 int n; 448 __u64 code_offset; 449 int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start), 450 &code_offset); 451 452 mmop = ((struct proc_mm_op) { .op = MM_MMAP, 453 .u = 454 { .mmap = 455 { .addr = code, 456 .len = PAGE_SIZE, 457 .prot = PROT_EXEC, 458 .flags = MAP_FIXED | MAP_PRIVATE, 459 .fd = code_fd, 460 .offset = code_offset 461 } } }); 462 CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); 463 if(n != sizeof(mmop)){ 464 n = errno; 465 printk("mmap args - addr = 0x%lx, fd = %d, offset = %llx\n", 466 code, code_fd, (unsigned long long) code_offset); 467 panic("map_stub_pages : /proc/mm map for code failed, " 468 "err = %d\n", n); 469 } 470 471 if ( stack ) { 472 __u64 map_offset; 473 int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); 474 mmop = ((struct proc_mm_op) 475 { .op = MM_MMAP, 476 .u = 477 { .mmap = 478 { .addr = data, 479 .len = PAGE_SIZE, 480 .prot = PROT_READ | PROT_WRITE, 481 .flags = MAP_FIXED | MAP_SHARED, 482 .fd = map_fd, 483 .offset = map_offset 484 } } }); 485 CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); 486 if(n != sizeof(mmop)) 487 panic("map_stub_pages : /proc/mm map for data failed, " 488 "err = %d\n", errno); 489 } 490 } 491 492 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) 493 { 494 (*buf)[0].JB_IP = (unsigned long) handler; 495 (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - 496 sizeof(void *); 497 } 498 499 #define INIT_JMP_NEW_THREAD 0 500 #define INIT_JMP_CALLBACK 1 501 #define INIT_JMP_HALT 2 502 #define INIT_JMP_REBOOT 3 503 504 void switch_threads(jmp_buf *me, jmp_buf *you) 505 { 506 if(UML_SETJMP(me) == 0) 507 UML_LONGJMP(you, 1); 508 } 509 510 static jmp_buf initial_jmpbuf; 511 512 /* XXX Make these percpu */ 513 static void (*cb_proc)(void *arg); 514 static void *cb_arg; 515 static jmp_buf *cb_back; 516 517 int start_idle_thread(void *stack, jmp_buf *switch_buf) 518 { 519 int n; 520 521 set_handler(SIGWINCH, (__sighandler_t) sig_handler, 522 SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGALRM, 523 SIGVTALRM, -1); 524 525 /* 526 * Can't use UML_SETJMP or UML_LONGJMP here because they save 527 * and restore signals, with the possible side-effect of 528 * trying to handle any signals which came when they were 529 * blocked, which can't be done on this stack. 530 * Signals must be blocked when jumping back here and restored 531 * after returning to the jumper. 532 */ 533 n = setjmp(initial_jmpbuf); 534 switch(n){ 535 case INIT_JMP_NEW_THREAD: 536 (*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler; 537 (*switch_buf)[0].JB_SP = (unsigned long) stack + 538 UM_THREAD_SIZE - sizeof(void *); 539 break; 540 case INIT_JMP_CALLBACK: 541 (*cb_proc)(cb_arg); 542 longjmp(*cb_back, 1); 543 break; 544 case INIT_JMP_HALT: 545 kmalloc_ok = 0; 546 return(0); 547 case INIT_JMP_REBOOT: 548 kmalloc_ok = 0; 549 return(1); 550 default: 551 panic("Bad sigsetjmp return in start_idle_thread - %d\n", n); 552 } 553 longjmp(*switch_buf, 1); 554 } 555 556 void initial_thread_cb_skas(void (*proc)(void *), void *arg) 557 { 558 jmp_buf here; 559 560 cb_proc = proc; 561 cb_arg = arg; 562 cb_back = &here; 563 564 block_signals(); 565 if(UML_SETJMP(&here) == 0) 566 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); 567 unblock_signals(); 568 569 cb_proc = NULL; 570 cb_arg = NULL; 571 cb_back = NULL; 572 } 573 574 void halt_skas(void) 575 { 576 block_signals(); 577 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); 578 } 579 580 void reboot_skas(void) 581 { 582 block_signals(); 583 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); 584 } 585 586 void switch_mm_skas(struct mm_id *mm_idp) 587 { 588 int err; 589 590 /* FIXME: need cpu pid in switch_mm_skas */ 591 if(proc_mm){ 592 err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, 593 mm_idp->u.mm_fd); 594 if(err) 595 panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " 596 "errno = %d\n", errno); 597 } 598 else userspace_pid[0] = mm_idp->u.pid; 599 } 600