1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* XXX we use functions that might not exist. */ 33 #include "opt_compat.h" 34 35 #ifndef COMPAT_43 36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/exec.h> 42 #include <sys/imgact.h> 43 #include <sys/imgact_aout.h> 44 #include <sys/imgact_elf.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/module.h> 49 #include <sys/mutex.h> 50 #include <sys/proc.h> 51 #include <sys/signalvar.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/user.h> 56 #include <sys/vnode.h> 57 58 #include <vm/vm.h> 59 #include <vm/pmap.h> 60 #include <vm/vm_extern.h> 61 #include <vm/vm_map.h> 62 #include <vm/vm_object.h> 63 #include <vm/vm_page.h> 64 #include <vm/vm_param.h> 65 66 #include <machine/cpu.h> 67 #include <machine/md_var.h> 68 69 #include <i386/linux/linux.h> 70 #include <i386/linux/linux_proto.h> 71 #include <compat/linux/linux_mib.h> 72 #include <compat/linux/linux_signal.h> 73 #include <compat/linux/linux_util.h> 74 75 MODULE_VERSION(linux, 1); 76 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 77 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 78 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 79 80 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 81 82 #if BYTE_ORDER == LITTLE_ENDIAN 83 #define SHELLMAGIC 0x2123 /* #! */ 84 #else 85 #define SHELLMAGIC 0x2321 86 #endif 87 88 /* 89 * Allow the sendsig functions to use the ldebug() facility 90 * even though they are not syscalls themselves. Map them 91 * to syscall 0. This is slightly less bogus than using 92 * ldebug(sigreturn). 93 */ 94 #define LINUX_SYS_linux_rt_sendsig 0 95 #define LINUX_SYS_linux_sendsig 0 96 97 #define uarea_pages 1 98 99 extern char linux_sigcode[]; 100 extern int linux_szsigcode; 101 102 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 103 104 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 105 106 static int linux_fixup(register_t **stack_base, 107 struct image_params *iparams); 108 static int elf_linux_fixup(register_t **stack_base, 109 struct image_params *iparams); 110 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 111 caddr_t *params); 112 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 113 u_long code); 114 static void exec_linux_setregs(struct thread *td, u_long entry, 115 u_long stack, u_long ps_strings); 116 117 /* 118 * Linux syscalls return negative errno's, we do positive and map them 119 */ 120 static int bsd_to_linux_errno[ELAST + 1] = { 121 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 122 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 123 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 124 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 125 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 126 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 127 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 128 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 129 -6, -6, -43, -42, -75, -6, -84 130 }; 131 132 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 133 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 134 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 135 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 136 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 137 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 138 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 139 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 140 0, LINUX_SIGUSR1, LINUX_SIGUSR2 141 }; 142 143 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 144 SIGHUP, SIGINT, SIGQUIT, SIGILL, 145 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 146 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 147 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 148 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 149 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 150 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 151 SIGIO, SIGURG, SIGSYS 152 }; 153 154 #define LINUX_T_UNKNOWN 255 155 static int _bsd_to_linux_trapcode[] = { 156 LINUX_T_UNKNOWN, /* 0 */ 157 6, /* 1 T_PRIVINFLT */ 158 LINUX_T_UNKNOWN, /* 2 */ 159 3, /* 3 T_BPTFLT */ 160 LINUX_T_UNKNOWN, /* 4 */ 161 LINUX_T_UNKNOWN, /* 5 */ 162 16, /* 6 T_ARITHTRAP */ 163 254, /* 7 T_ASTFLT */ 164 LINUX_T_UNKNOWN, /* 8 */ 165 13, /* 9 T_PROTFLT */ 166 1, /* 10 T_TRCTRAP */ 167 LINUX_T_UNKNOWN, /* 11 */ 168 14, /* 12 T_PAGEFLT */ 169 LINUX_T_UNKNOWN, /* 13 */ 170 17, /* 14 T_ALIGNFLT */ 171 LINUX_T_UNKNOWN, /* 15 */ 172 LINUX_T_UNKNOWN, /* 16 */ 173 LINUX_T_UNKNOWN, /* 17 */ 174 0, /* 18 T_DIVIDE */ 175 2, /* 19 T_NMI */ 176 4, /* 20 T_OFLOW */ 177 5, /* 21 T_BOUND */ 178 7, /* 22 T_DNA */ 179 8, /* 23 T_DOUBLEFLT */ 180 9, /* 24 T_FPOPFLT */ 181 10, /* 25 T_TSSFLT */ 182 11, /* 26 T_SEGNPFLT */ 183 12, /* 27 T_STKFLT */ 184 18, /* 28 T_MCHK */ 185 19, /* 29 T_XMMFLT */ 186 15 /* 30 T_RESERVED */ 187 }; 188 #define bsd_to_linux_trapcode(code) \ 189 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 190 _bsd_to_linux_trapcode[(code)]: \ 191 LINUX_T_UNKNOWN) 192 193 /* 194 * If FreeBSD & Linux have a difference of opinion about what a trap 195 * means, deal with it here. 196 * 197 * MPSAFE 198 */ 199 static int 200 translate_traps(int signal, int trap_code) 201 { 202 if (signal != SIGBUS) 203 return signal; 204 switch (trap_code) { 205 case T_PROTFLT: 206 case T_TSSFLT: 207 case T_DOUBLEFLT: 208 case T_PAGEFLT: 209 return SIGSEGV; 210 default: 211 return signal; 212 } 213 } 214 215 static int 216 linux_fixup(register_t **stack_base, struct image_params *imgp) 217 { 218 register_t *argv, *envp; 219 220 argv = *stack_base; 221 envp = *stack_base + (imgp->argc + 1); 222 (*stack_base)--; 223 **stack_base = (intptr_t)(void *)envp; 224 (*stack_base)--; 225 **stack_base = (intptr_t)(void *)argv; 226 (*stack_base)--; 227 **stack_base = imgp->argc; 228 return 0; 229 } 230 231 static int 232 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 233 { 234 Elf32_Auxargs *args; 235 register_t *pos; 236 237 KASSERT(curthread->td_proc == imgp->proc && 238 (curthread->td_proc->p_flag & P_SA) == 0, 239 ("unsafe elf_linux_fixup(), should be curproc")); 240 args = (Elf32_Auxargs *)imgp->auxargs; 241 pos = *stack_base + (imgp->argc + imgp->envc + 2); 242 243 if (args->trace) 244 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 245 if (args->execfd != -1) 246 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 247 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 248 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 249 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 250 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 251 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 252 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 253 AUXARGS_ENTRY(pos, AT_BASE, args->base); 254 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 255 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 256 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 257 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 258 AUXARGS_ENTRY(pos, AT_NULL, 0); 259 260 free(imgp->auxargs, M_TEMP); 261 imgp->auxargs = NULL; 262 263 (*stack_base)--; 264 **stack_base = (register_t)imgp->argc; 265 return 0; 266 } 267 268 extern int _ucodesel, _udatasel; 269 extern unsigned long linux_sznonrtsigcode; 270 271 static void 272 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 273 { 274 struct thread *td = curthread; 275 struct proc *p = td->td_proc; 276 struct sigacts *psp; 277 struct trapframe *regs; 278 struct l_rt_sigframe *fp, frame; 279 int oonstack; 280 281 PROC_LOCK_ASSERT(p, MA_OWNED); 282 psp = p->p_sigacts; 283 mtx_assert(&psp->ps_mtx, MA_OWNED); 284 regs = td->td_frame; 285 oonstack = sigonstack(regs->tf_esp); 286 287 #ifdef DEBUG 288 if (ldebug(rt_sendsig)) 289 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 290 catcher, sig, (void*)mask, code); 291 #endif 292 /* 293 * Allocate space for the signal handler context. 294 */ 295 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 296 SIGISMEMBER(psp->ps_sigonstack, sig)) { 297 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 298 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 299 } else 300 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 301 mtx_unlock(&psp->ps_mtx); 302 303 /* 304 * Build the argument list for the signal handler. 305 */ 306 if (p->p_sysent->sv_sigtbl) 307 if (sig <= p->p_sysent->sv_sigsize) 308 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 309 310 bzero(&frame, sizeof(frame)); 311 312 frame.sf_handler = catcher; 313 frame.sf_sig = sig; 314 frame.sf_siginfo = &fp->sf_si; 315 frame.sf_ucontext = &fp->sf_sc; 316 317 /* Fill in POSIX parts */ 318 frame.sf_si.lsi_signo = sig; 319 frame.sf_si.lsi_code = code; 320 frame.sf_si.lsi_addr = (void *)regs->tf_err; 321 322 /* 323 * Build the signal context to be used by sigreturn. 324 */ 325 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 326 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 327 328 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 329 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 330 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 332 PROC_UNLOCK(p); 333 334 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 335 336 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 337 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 338 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 339 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 340 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 341 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 342 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 343 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 344 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 345 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 346 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 347 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 348 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 349 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 350 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 351 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 352 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 353 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 354 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 355 356 #ifdef DEBUG 357 if (ldebug(rt_sendsig)) 358 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 359 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 360 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 361 #endif 362 363 if (copyout(&frame, fp, sizeof(frame)) != 0) { 364 /* 365 * Process has trashed its stack; give it an illegal 366 * instruction to halt it in its tracks. 367 */ 368 #ifdef DEBUG 369 if (ldebug(rt_sendsig)) 370 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 371 fp, oonstack); 372 #endif 373 PROC_LOCK(p); 374 sigexit(td, SIGILL); 375 } 376 377 /* 378 * Build context to run handler in. 379 */ 380 regs->tf_esp = (int)fp; 381 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 382 linux_sznonrtsigcode; 383 regs->tf_eflags &= ~(PSL_T | PSL_VM); 384 regs->tf_cs = _ucodesel; 385 regs->tf_ds = _udatasel; 386 regs->tf_es = _udatasel; 387 regs->tf_fs = _udatasel; 388 regs->tf_ss = _udatasel; 389 PROC_LOCK(p); 390 mtx_lock(&psp->ps_mtx); 391 } 392 393 394 /* 395 * Send an interrupt to process. 396 * 397 * Stack is set up to allow sigcode stored 398 * in u. to call routine, followed by kcall 399 * to sigreturn routine below. After sigreturn 400 * resets the signal mask, the stack, and the 401 * frame pointer, it returns to the user 402 * specified pc, psl. 403 */ 404 static void 405 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 406 { 407 struct thread *td = curthread; 408 struct proc *p = td->td_proc; 409 struct sigacts *psp; 410 struct trapframe *regs; 411 struct l_sigframe *fp, frame; 412 l_sigset_t lmask; 413 int oonstack, i; 414 415 PROC_LOCK_ASSERT(p, MA_OWNED); 416 psp = p->p_sigacts; 417 mtx_assert(&psp->ps_mtx, MA_OWNED); 418 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 419 /* Signal handler installed with SA_SIGINFO. */ 420 linux_rt_sendsig(catcher, sig, mask, code); 421 return; 422 } 423 424 regs = td->td_frame; 425 oonstack = sigonstack(regs->tf_esp); 426 427 #ifdef DEBUG 428 if (ldebug(sendsig)) 429 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 430 catcher, sig, (void*)mask, code); 431 #endif 432 433 /* 434 * Allocate space for the signal handler context. 435 */ 436 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 437 SIGISMEMBER(psp->ps_sigonstack, sig)) { 438 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 439 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 440 } else 441 fp = (struct l_sigframe *)regs->tf_esp - 1; 442 mtx_unlock(&psp->ps_mtx); 443 PROC_UNLOCK(p); 444 445 /* 446 * Build the argument list for the signal handler. 447 */ 448 if (p->p_sysent->sv_sigtbl) 449 if (sig <= p->p_sysent->sv_sigsize) 450 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 451 452 bzero(&frame, sizeof(frame)); 453 454 frame.sf_handler = catcher; 455 frame.sf_sig = sig; 456 457 bsd_to_linux_sigset(mask, &lmask); 458 459 /* 460 * Build the signal context to be used by sigreturn. 461 */ 462 frame.sf_sc.sc_mask = lmask.__bits[0]; 463 frame.sf_sc.sc_gs = rgs(); 464 frame.sf_sc.sc_fs = regs->tf_fs; 465 frame.sf_sc.sc_es = regs->tf_es; 466 frame.sf_sc.sc_ds = regs->tf_ds; 467 frame.sf_sc.sc_edi = regs->tf_edi; 468 frame.sf_sc.sc_esi = regs->tf_esi; 469 frame.sf_sc.sc_ebp = regs->tf_ebp; 470 frame.sf_sc.sc_ebx = regs->tf_ebx; 471 frame.sf_sc.sc_edx = regs->tf_edx; 472 frame.sf_sc.sc_ecx = regs->tf_ecx; 473 frame.sf_sc.sc_eax = regs->tf_eax; 474 frame.sf_sc.sc_eip = regs->tf_eip; 475 frame.sf_sc.sc_cs = regs->tf_cs; 476 frame.sf_sc.sc_eflags = regs->tf_eflags; 477 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 478 frame.sf_sc.sc_ss = regs->tf_ss; 479 frame.sf_sc.sc_err = regs->tf_err; 480 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 481 482 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 483 frame.sf_extramask[i] = lmask.__bits[i+1]; 484 485 if (copyout(&frame, fp, sizeof(frame)) != 0) { 486 /* 487 * Process has trashed its stack; give it an illegal 488 * instruction to halt it in its tracks. 489 */ 490 PROC_LOCK(p); 491 sigexit(td, SIGILL); 492 } 493 494 /* 495 * Build context to run handler in. 496 */ 497 regs->tf_esp = (int)fp; 498 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 499 regs->tf_eflags &= ~(PSL_T | PSL_VM); 500 regs->tf_cs = _ucodesel; 501 regs->tf_ds = _udatasel; 502 regs->tf_es = _udatasel; 503 regs->tf_fs = _udatasel; 504 regs->tf_ss = _udatasel; 505 PROC_LOCK(p); 506 mtx_lock(&psp->ps_mtx); 507 } 508 509 /* 510 * System call to cleanup state after a signal 511 * has been taken. Reset signal mask and 512 * stack state from context left by sendsig (above). 513 * Return to previous pc and psl as specified by 514 * context left by sendsig. Check carefully to 515 * make sure that the user has not modified the 516 * psl to gain improper privileges or to cause 517 * a machine fault. 518 */ 519 int 520 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 521 { 522 struct proc *p = td->td_proc; 523 struct l_sigframe frame; 524 struct trapframe *regs; 525 l_sigset_t lmask; 526 int eflags, i; 527 528 regs = td->td_frame; 529 530 #ifdef DEBUG 531 if (ldebug(sigreturn)) 532 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 533 #endif 534 /* 535 * The trampoline code hands us the sigframe. 536 * It is unsafe to keep track of it ourselves, in the event that a 537 * program jumps out of a signal handler. 538 */ 539 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 540 return (EFAULT); 541 542 /* 543 * Check for security violations. 544 */ 545 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 546 eflags = frame.sf_sc.sc_eflags; 547 /* 548 * XXX do allow users to change the privileged flag PSL_RF. The 549 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 550 * sometimes set it there too. tf_eflags is kept in the signal 551 * context during signal handling and there is no other place 552 * to remember it, so the PSL_RF bit may be corrupted by the 553 * signal handler without us knowing. Corruption of the PSL_RF 554 * bit at worst causes one more or one less debugger trap, so 555 * allowing it is fairly harmless. 556 */ 557 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 558 return(EINVAL); 559 560 /* 561 * Don't allow users to load a valid privileged %cs. Let the 562 * hardware check for invalid selectors, excess privilege in 563 * other selectors, invalid %eip's and invalid %esp's. 564 */ 565 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 566 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 567 trapsignal(td, SIGBUS, T_PROTFLT); 568 return(EINVAL); 569 } 570 571 lmask.__bits[0] = frame.sf_sc.sc_mask; 572 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 573 lmask.__bits[i+1] = frame.sf_extramask[i]; 574 PROC_LOCK(p); 575 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 576 SIG_CANTMASK(td->td_sigmask); 577 signotify(td); 578 PROC_UNLOCK(p); 579 580 /* 581 * Restore signal context. 582 */ 583 /* %gs was restored by the trampoline. */ 584 regs->tf_fs = frame.sf_sc.sc_fs; 585 regs->tf_es = frame.sf_sc.sc_es; 586 regs->tf_ds = frame.sf_sc.sc_ds; 587 regs->tf_edi = frame.sf_sc.sc_edi; 588 regs->tf_esi = frame.sf_sc.sc_esi; 589 regs->tf_ebp = frame.sf_sc.sc_ebp; 590 regs->tf_ebx = frame.sf_sc.sc_ebx; 591 regs->tf_edx = frame.sf_sc.sc_edx; 592 regs->tf_ecx = frame.sf_sc.sc_ecx; 593 regs->tf_eax = frame.sf_sc.sc_eax; 594 regs->tf_eip = frame.sf_sc.sc_eip; 595 regs->tf_cs = frame.sf_sc.sc_cs; 596 regs->tf_eflags = eflags; 597 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 598 regs->tf_ss = frame.sf_sc.sc_ss; 599 600 return (EJUSTRETURN); 601 } 602 603 /* 604 * System call to cleanup state after a signal 605 * has been taken. Reset signal mask and 606 * stack state from context left by rt_sendsig (above). 607 * Return to previous pc and psl as specified by 608 * context left by sendsig. Check carefully to 609 * make sure that the user has not modified the 610 * psl to gain improper privileges or to cause 611 * a machine fault. 612 */ 613 int 614 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 615 { 616 struct proc *p = td->td_proc; 617 struct l_ucontext uc; 618 struct l_sigcontext *context; 619 l_stack_t *lss; 620 stack_t ss; 621 struct trapframe *regs; 622 int eflags; 623 624 regs = td->td_frame; 625 626 #ifdef DEBUG 627 if (ldebug(rt_sigreturn)) 628 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 629 #endif 630 /* 631 * The trampoline code hands us the ucontext. 632 * It is unsafe to keep track of it ourselves, in the event that a 633 * program jumps out of a signal handler. 634 */ 635 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 636 return (EFAULT); 637 638 context = &uc.uc_mcontext; 639 640 /* 641 * Check for security violations. 642 */ 643 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 644 eflags = context->sc_eflags; 645 /* 646 * XXX do allow users to change the privileged flag PSL_RF. The 647 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 648 * sometimes set it there too. tf_eflags is kept in the signal 649 * context during signal handling and there is no other place 650 * to remember it, so the PSL_RF bit may be corrupted by the 651 * signal handler without us knowing. Corruption of the PSL_RF 652 * bit at worst causes one more or one less debugger trap, so 653 * allowing it is fairly harmless. 654 */ 655 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 656 return(EINVAL); 657 658 /* 659 * Don't allow users to load a valid privileged %cs. Let the 660 * hardware check for invalid selectors, excess privilege in 661 * other selectors, invalid %eip's and invalid %esp's. 662 */ 663 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 664 if (!CS_SECURE(context->sc_cs)) { 665 trapsignal(td, SIGBUS, T_PROTFLT); 666 return(EINVAL); 667 } 668 669 PROC_LOCK(p); 670 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 671 SIG_CANTMASK(td->td_sigmask); 672 signotify(td); 673 PROC_UNLOCK(p); 674 675 /* 676 * Restore signal context 677 */ 678 /* %gs was restored by the trampoline. */ 679 regs->tf_fs = context->sc_fs; 680 regs->tf_es = context->sc_es; 681 regs->tf_ds = context->sc_ds; 682 regs->tf_edi = context->sc_edi; 683 regs->tf_esi = context->sc_esi; 684 regs->tf_ebp = context->sc_ebp; 685 regs->tf_ebx = context->sc_ebx; 686 regs->tf_edx = context->sc_edx; 687 regs->tf_ecx = context->sc_ecx; 688 regs->tf_eax = context->sc_eax; 689 regs->tf_eip = context->sc_eip; 690 regs->tf_cs = context->sc_cs; 691 regs->tf_eflags = eflags; 692 regs->tf_esp = context->sc_esp_at_signal; 693 regs->tf_ss = context->sc_ss; 694 695 /* 696 * call sigaltstack & ignore results.. 697 */ 698 lss = &uc.uc_stack; 699 ss.ss_sp = lss->ss_sp; 700 ss.ss_size = lss->ss_size; 701 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 702 703 #ifdef DEBUG 704 if (ldebug(rt_sigreturn)) 705 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 706 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 707 #endif 708 (void)kern_sigaltstack(td, &ss, NULL); 709 710 return (EJUSTRETURN); 711 } 712 713 /* 714 * MPSAFE 715 */ 716 static void 717 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 718 { 719 args[0] = tf->tf_ebx; 720 args[1] = tf->tf_ecx; 721 args[2] = tf->tf_edx; 722 args[3] = tf->tf_esi; 723 args[4] = tf->tf_edi; 724 args[5] = tf->tf_ebp; /* Unconfirmed */ 725 *params = NULL; /* no copyin */ 726 } 727 728 729 730 /* 731 * Dump core, into a file named as described in the comments for 732 * expand_name(), unless the process was setuid/setgid. 733 */ 734 static int 735 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 736 { 737 struct proc *p = td->td_proc; 738 struct ucred *cred = td->td_ucred; 739 struct vmspace *vm = p->p_vmspace; 740 char *tempuser; 741 int error; 742 743 if (ctob((uarea_pages + kstack_pages) + 744 vm->vm_dsize + vm->vm_ssize) >= limit) 745 return (EFAULT); 746 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 747 M_WAITOK | M_ZERO); 748 if (tempuser == NULL) 749 return (ENOMEM); 750 PROC_LOCK(p); 751 fill_user(p, (struct user *)tempuser); 752 PROC_UNLOCK(p); 753 bcopy(td->td_frame, 754 tempuser + ctob(uarea_pages) + 755 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 756 sizeof(struct trapframe)); 757 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 758 ctob(uarea_pages + kstack_pages), 759 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 760 (int *)NULL, td); 761 free(tempuser, M_TEMP); 762 if (error == 0) 763 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 764 (int)ctob(vm->vm_dsize), 765 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 766 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 767 if (error == 0) 768 error = vn_rdwr_inchunks(UIO_WRITE, vp, 769 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 770 round_page(ctob(vm->vm_ssize)), 771 (off_t)ctob(uarea_pages + kstack_pages) + 772 ctob(vm->vm_dsize), UIO_USERSPACE, 773 IO_UNIT | IO_DIRECT, cred, NOCRED, NULL, td); 774 return (error); 775 } 776 /* 777 * If a linux binary is exec'ing something, try this image activator 778 * first. We override standard shell script execution in order to 779 * be able to modify the interpreter path. We only do this if a linux 780 * binary is doing the exec, so we do not create an EXEC module for it. 781 */ 782 static int exec_linux_imgact_try(struct image_params *iparams); 783 784 static int 785 exec_linux_imgact_try(struct image_params *imgp) 786 { 787 const char *head = (const char *)imgp->image_header; 788 int error = -1; 789 790 /* 791 * The interpreter for shell scripts run from a linux binary needs 792 * to be located in /compat/linux if possible in order to recursively 793 * maintain linux path emulation. 794 */ 795 if (((const short *)head)[0] == SHELLMAGIC) { 796 /* 797 * Run our normal shell image activator. If it succeeds attempt 798 * to use the alternate path for the interpreter. If an alternate 799 * path is found, use our stringspace to store it. 800 */ 801 if ((error = exec_shell_imgact(imgp)) == 0) { 802 char *rpath = NULL; 803 804 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 805 imgp->interpreter_name, &rpath, 0); 806 if (rpath != imgp->interpreter_name) { 807 int len = strlen(rpath) + 1; 808 809 if (len <= MAXSHELLCMDLEN) { 810 memcpy(imgp->interpreter_name, rpath, len); 811 } 812 free(rpath, M_TEMP); 813 } 814 } 815 } 816 return(error); 817 } 818 819 /* 820 * exec_setregs may initialize some registers differently than Linux 821 * does, thus potentially confusing Linux binaries. If necessary, we 822 * override the exec_setregs default(s) here. 823 */ 824 static void 825 exec_linux_setregs(struct thread *td, u_long entry, 826 u_long stack, u_long ps_strings) 827 { 828 struct pcb *pcb = td->td_pcb; 829 830 exec_setregs(td, entry, stack, ps_strings); 831 832 /* Linux sets %gs to 0, we default to _udatasel */ 833 pcb->pcb_gs = 0; load_gs(0); 834 } 835 836 struct sysentvec linux_sysvec = { 837 LINUX_SYS_MAXSYSCALL, 838 linux_sysent, 839 0xff, 840 LINUX_SIGTBLSZ, 841 bsd_to_linux_signal, 842 ELAST + 1, 843 bsd_to_linux_errno, 844 translate_traps, 845 linux_fixup, 846 linux_sendsig, 847 linux_sigcode, 848 &linux_szsigcode, 849 linux_prepsyscall, 850 "Linux a.out", 851 linux_aout_coredump, 852 exec_linux_imgact_try, 853 LINUX_MINSIGSTKSZ, 854 PAGE_SIZE, 855 VM_MIN_ADDRESS, 856 VM_MAXUSER_ADDRESS, 857 USRSTACK, 858 PS_STRINGS, 859 VM_PROT_ALL, 860 exec_copyout_strings, 861 exec_linux_setregs, 862 NULL 863 }; 864 865 struct sysentvec elf_linux_sysvec = { 866 LINUX_SYS_MAXSYSCALL, 867 linux_sysent, 868 0xff, 869 LINUX_SIGTBLSZ, 870 bsd_to_linux_signal, 871 ELAST + 1, 872 bsd_to_linux_errno, 873 translate_traps, 874 elf_linux_fixup, 875 linux_sendsig, 876 linux_sigcode, 877 &linux_szsigcode, 878 linux_prepsyscall, 879 "Linux ELF", 880 elf32_coredump, 881 exec_linux_imgact_try, 882 LINUX_MINSIGSTKSZ, 883 PAGE_SIZE, 884 VM_MIN_ADDRESS, 885 VM_MAXUSER_ADDRESS, 886 USRSTACK, 887 PS_STRINGS, 888 VM_PROT_ALL, 889 exec_copyout_strings, 890 exec_linux_setregs, 891 NULL 892 }; 893 894 static Elf32_Brandinfo linux_brand = { 895 ELFOSABI_LINUX, 896 EM_386, 897 "Linux", 898 "/compat/linux", 899 "/lib/ld-linux.so.1", 900 &elf_linux_sysvec, 901 NULL, 902 }; 903 904 static Elf32_Brandinfo linux_glibc2brand = { 905 ELFOSABI_LINUX, 906 EM_386, 907 "Linux", 908 "/compat/linux", 909 "/lib/ld-linux.so.2", 910 &elf_linux_sysvec, 911 NULL, 912 }; 913 914 Elf32_Brandinfo *linux_brandlist[] = { 915 &linux_brand, 916 &linux_glibc2brand, 917 NULL 918 }; 919 920 static int 921 linux_elf_modevent(module_t mod, int type, void *data) 922 { 923 Elf32_Brandinfo **brandinfo; 924 int error; 925 struct linux_ioctl_handler **lihp; 926 927 error = 0; 928 929 switch(type) { 930 case MOD_LOAD: 931 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 932 ++brandinfo) 933 if (elf32_insert_brand_entry(*brandinfo) < 0) 934 error = EINVAL; 935 if (error == 0) { 936 SET_FOREACH(lihp, linux_ioctl_handler_set) 937 linux_ioctl_register_handler(*lihp); 938 if (bootverbose) 939 printf("Linux ELF exec handler installed\n"); 940 } else 941 printf("cannot insert Linux ELF brand handler\n"); 942 break; 943 case MOD_UNLOAD: 944 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 945 ++brandinfo) 946 if (elf32_brand_inuse(*brandinfo)) 947 error = EBUSY; 948 if (error == 0) { 949 for (brandinfo = &linux_brandlist[0]; 950 *brandinfo != NULL; ++brandinfo) 951 if (elf32_remove_brand_entry(*brandinfo) < 0) 952 error = EINVAL; 953 } 954 if (error == 0) { 955 SET_FOREACH(lihp, linux_ioctl_handler_set) 956 linux_ioctl_unregister_handler(*lihp); 957 if (bootverbose) 958 printf("Linux ELF exec handler removed\n"); 959 linux_mib_destroy(); 960 } else 961 printf("Could not deinstall ELF interpreter entry\n"); 962 break; 963 default: 964 return EOPNOTSUPP; 965 } 966 return error; 967 } 968 969 static moduledata_t linux_elf_mod = { 970 "linuxelf", 971 linux_elf_modevent, 972 0 973 }; 974 975 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 976