1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* XXX we use functions that might not exist. */ 33 #include "opt_compat.h" 34 35 #ifndef COMPAT_43 36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/imgact.h> 42 #include <sys/imgact_aout.h> 43 #include <sys/imgact_elf.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/signalvar.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/sysent.h> 51 #include <sys/sysproto.h> 52 #include <sys/user.h> 53 #include <sys/vnode.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_param.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_extern.h> 59 #include <sys/exec.h> 60 #include <sys/kernel.h> 61 #include <sys/module.h> 62 #include <machine/cpu.h> 63 #include <machine/md_var.h> 64 #include <sys/mutex.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_param.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_map.h> 70 #include <vm/vm_object.h> 71 72 #include <i386/linux/linux.h> 73 #include <i386/linux/linux_proto.h> 74 #include <compat/linux/linux_mib.h> 75 #include <compat/linux/linux_signal.h> 76 #include <compat/linux/linux_util.h> 77 78 MODULE_VERSION(linux, 1); 79 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 80 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 81 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 82 83 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 84 85 #if BYTE_ORDER == LITTLE_ENDIAN 86 #define SHELLMAGIC 0x2123 /* #! */ 87 #else 88 #define SHELLMAGIC 0x2321 89 #endif 90 91 /* 92 * Allow the sendsig functions to use the ldebug() facility 93 * even though they are not syscalls themselves. Map them 94 * to syscall 0. This is slightly less bogus than using 95 * ldebug(sigreturn). 96 */ 97 #define LINUX_SYS_linux_rt_sendsig 0 98 #define LINUX_SYS_linux_sendsig 0 99 100 extern char linux_sigcode[]; 101 extern int linux_szsigcode; 102 103 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 104 105 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 106 107 static int linux_fixup(register_t **stack_base, 108 struct image_params *iparams); 109 static int elf_linux_fixup(register_t **stack_base, 110 struct image_params *iparams); 111 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 112 caddr_t *params); 113 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 114 u_long code); 115 static void exec_linux_setregs(struct thread *td, u_long entry, 116 u_long stack, u_long ps_strings); 117 118 /* 119 * Linux syscalls return negative errno's, we do positive and map them 120 */ 121 static int bsd_to_linux_errno[ELAST + 1] = { 122 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 123 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 124 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 125 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 126 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 127 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 128 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 129 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 130 -6, -6, -43, -42, -75, -6, -84 131 }; 132 133 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 134 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 135 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 136 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 137 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 138 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 139 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 140 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 141 0, LINUX_SIGUSR1, LINUX_SIGUSR2 142 }; 143 144 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 145 SIGHUP, SIGINT, SIGQUIT, SIGILL, 146 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 147 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 148 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 149 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 150 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 151 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 152 SIGIO, SIGURG, SIGSYS 153 }; 154 155 #define LINUX_T_UNKNOWN 255 156 static int _bsd_to_linux_trapcode[] = { 157 LINUX_T_UNKNOWN, /* 0 */ 158 6, /* 1 T_PRIVINFLT */ 159 LINUX_T_UNKNOWN, /* 2 */ 160 3, /* 3 T_BPTFLT */ 161 LINUX_T_UNKNOWN, /* 4 */ 162 LINUX_T_UNKNOWN, /* 5 */ 163 16, /* 6 T_ARITHTRAP */ 164 254, /* 7 T_ASTFLT */ 165 LINUX_T_UNKNOWN, /* 8 */ 166 13, /* 9 T_PROTFLT */ 167 1, /* 10 T_TRCTRAP */ 168 LINUX_T_UNKNOWN, /* 11 */ 169 14, /* 12 T_PAGEFLT */ 170 LINUX_T_UNKNOWN, /* 13 */ 171 17, /* 14 T_ALIGNFLT */ 172 LINUX_T_UNKNOWN, /* 15 */ 173 LINUX_T_UNKNOWN, /* 16 */ 174 LINUX_T_UNKNOWN, /* 17 */ 175 0, /* 18 T_DIVIDE */ 176 2, /* 19 T_NMI */ 177 4, /* 20 T_OFLOW */ 178 5, /* 21 T_BOUND */ 179 7, /* 22 T_DNA */ 180 8, /* 23 T_DOUBLEFLT */ 181 9, /* 24 T_FPOPFLT */ 182 10, /* 25 T_TSSFLT */ 183 11, /* 26 T_SEGNPFLT */ 184 12, /* 27 T_STKFLT */ 185 18, /* 28 T_MCHK */ 186 19, /* 29 T_XMMFLT */ 187 15 /* 30 T_RESERVED */ 188 }; 189 #define bsd_to_linux_trapcode(code) \ 190 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 191 _bsd_to_linux_trapcode[(code)]: \ 192 LINUX_T_UNKNOWN) 193 194 /* 195 * If FreeBSD & Linux have a difference of opinion about what a trap 196 * means, deal with it here. 197 * 198 * MPSAFE 199 */ 200 static int 201 translate_traps(int signal, int trap_code) 202 { 203 if (signal != SIGBUS) 204 return signal; 205 switch (trap_code) { 206 case T_PROTFLT: 207 case T_TSSFLT: 208 case T_DOUBLEFLT: 209 case T_PAGEFLT: 210 return SIGSEGV; 211 default: 212 return signal; 213 } 214 } 215 216 static int 217 linux_fixup(register_t **stack_base, struct image_params *imgp) 218 { 219 register_t *argv, *envp; 220 221 argv = *stack_base; 222 envp = *stack_base + (imgp->argc + 1); 223 (*stack_base)--; 224 **stack_base = (intptr_t)(void *)envp; 225 (*stack_base)--; 226 **stack_base = (intptr_t)(void *)argv; 227 (*stack_base)--; 228 **stack_base = imgp->argc; 229 return 0; 230 } 231 232 static int 233 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 234 { 235 Elf32_Auxargs *args; 236 register_t *pos; 237 238 KASSERT(curthread->td_proc == imgp->proc && 239 (curthread->td_proc->p_flag & P_SA) == 0, 240 ("unsafe elf_linux_fixup(), should be curproc")); 241 args = (Elf32_Auxargs *)imgp->auxargs; 242 pos = *stack_base + (imgp->argc + imgp->envc + 2); 243 244 if (args->trace) 245 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 246 if (args->execfd != -1) 247 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 248 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 249 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 250 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 251 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 252 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 253 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 254 AUXARGS_ENTRY(pos, AT_BASE, args->base); 255 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 256 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 257 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 258 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 259 AUXARGS_ENTRY(pos, AT_NULL, 0); 260 261 free(imgp->auxargs, M_TEMP); 262 imgp->auxargs = NULL; 263 264 (*stack_base)--; 265 **stack_base = (register_t)imgp->argc; 266 return 0; 267 } 268 269 extern int _ucodesel, _udatasel; 270 extern unsigned long linux_sznonrtsigcode; 271 272 static void 273 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 274 { 275 struct thread *td = curthread; 276 struct proc *p = td->td_proc; 277 struct sigacts *psp; 278 struct trapframe *regs; 279 struct l_rt_sigframe *fp, frame; 280 int oonstack; 281 282 PROC_LOCK_ASSERT(p, MA_OWNED); 283 psp = p->p_sigacts; 284 mtx_assert(&psp->ps_mtx, MA_OWNED); 285 regs = td->td_frame; 286 oonstack = sigonstack(regs->tf_esp); 287 288 #ifdef DEBUG 289 if (ldebug(rt_sendsig)) 290 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 291 catcher, sig, (void*)mask, code); 292 #endif 293 /* 294 * Allocate space for the signal handler context. 295 */ 296 if ((p->p_flag & P_ALTSTACK) && !oonstack && 297 SIGISMEMBER(psp->ps_sigonstack, sig)) { 298 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp + 299 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 300 } else 301 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 302 mtx_unlock(&psp->ps_mtx); 303 304 /* 305 * Build the argument list for the signal handler. 306 */ 307 if (p->p_sysent->sv_sigtbl) 308 if (sig <= p->p_sysent->sv_sigsize) 309 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 310 311 bzero(&frame, sizeof(frame)); 312 313 frame.sf_handler = catcher; 314 frame.sf_sig = sig; 315 frame.sf_siginfo = &fp->sf_si; 316 frame.sf_ucontext = &fp->sf_sc; 317 318 /* Fill in POSIX parts */ 319 frame.sf_si.lsi_signo = sig; 320 frame.sf_si.lsi_code = code; 321 frame.sf_si.lsi_addr = (void *)regs->tf_err; 322 323 /* 324 * Build the signal context to be used by sigreturn. 325 */ 326 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 327 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 328 329 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp; 330 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size; 331 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) 332 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 333 PROC_UNLOCK(p); 334 335 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 336 337 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 338 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 339 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 340 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 341 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 342 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 343 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 344 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 345 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 346 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 347 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 348 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 349 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 350 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 351 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 352 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 353 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 354 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 355 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 356 357 #ifdef DEBUG 358 if (ldebug(rt_sendsig)) 359 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 360 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp, 361 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 362 #endif 363 364 if (copyout(&frame, fp, sizeof(frame)) != 0) { 365 /* 366 * Process has trashed its stack; give it an illegal 367 * instruction to halt it in its tracks. 368 */ 369 #ifdef DEBUG 370 if (ldebug(rt_sendsig)) 371 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 372 fp, oonstack); 373 #endif 374 PROC_LOCK(p); 375 sigexit(td, SIGILL); 376 } 377 378 /* 379 * Build context to run handler in. 380 */ 381 regs->tf_esp = (int)fp; 382 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 383 linux_sznonrtsigcode; 384 regs->tf_eflags &= ~(PSL_T | PSL_VM); 385 regs->tf_cs = _ucodesel; 386 regs->tf_ds = _udatasel; 387 regs->tf_es = _udatasel; 388 regs->tf_fs = _udatasel; 389 regs->tf_ss = _udatasel; 390 PROC_LOCK(p); 391 mtx_lock(&psp->ps_mtx); 392 } 393 394 395 /* 396 * Send an interrupt to process. 397 * 398 * Stack is set up to allow sigcode stored 399 * in u. to call routine, followed by kcall 400 * to sigreturn routine below. After sigreturn 401 * resets the signal mask, the stack, and the 402 * frame pointer, it returns to the user 403 * specified pc, psl. 404 */ 405 static void 406 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 407 { 408 struct thread *td = curthread; 409 struct proc *p = td->td_proc; 410 struct sigacts *psp; 411 struct trapframe *regs; 412 struct l_sigframe *fp, frame; 413 l_sigset_t lmask; 414 int oonstack, i; 415 416 PROC_LOCK_ASSERT(p, MA_OWNED); 417 psp = p->p_sigacts; 418 mtx_assert(&psp->ps_mtx, MA_OWNED); 419 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 420 /* Signal handler installed with SA_SIGINFO. */ 421 linux_rt_sendsig(catcher, sig, mask, code); 422 return; 423 } 424 425 regs = td->td_frame; 426 oonstack = sigonstack(regs->tf_esp); 427 428 #ifdef DEBUG 429 if (ldebug(sendsig)) 430 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 431 catcher, sig, (void*)mask, code); 432 #endif 433 434 /* 435 * Allocate space for the signal handler context. 436 */ 437 if ((p->p_flag & P_ALTSTACK) && !oonstack && 438 SIGISMEMBER(psp->ps_sigonstack, sig)) { 439 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp + 440 p->p_sigstk.ss_size - sizeof(struct l_sigframe)); 441 } else 442 fp = (struct l_sigframe *)regs->tf_esp - 1; 443 mtx_unlock(&psp->ps_mtx); 444 PROC_UNLOCK(p); 445 446 /* 447 * Build the argument list for the signal handler. 448 */ 449 if (p->p_sysent->sv_sigtbl) 450 if (sig <= p->p_sysent->sv_sigsize) 451 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 452 453 bzero(&frame, sizeof(frame)); 454 455 frame.sf_handler = catcher; 456 frame.sf_sig = sig; 457 458 bsd_to_linux_sigset(mask, &lmask); 459 460 /* 461 * Build the signal context to be used by sigreturn. 462 */ 463 frame.sf_sc.sc_mask = lmask.__bits[0]; 464 frame.sf_sc.sc_gs = rgs(); 465 frame.sf_sc.sc_fs = regs->tf_fs; 466 frame.sf_sc.sc_es = regs->tf_es; 467 frame.sf_sc.sc_ds = regs->tf_ds; 468 frame.sf_sc.sc_edi = regs->tf_edi; 469 frame.sf_sc.sc_esi = regs->tf_esi; 470 frame.sf_sc.sc_ebp = regs->tf_ebp; 471 frame.sf_sc.sc_ebx = regs->tf_ebx; 472 frame.sf_sc.sc_edx = regs->tf_edx; 473 frame.sf_sc.sc_ecx = regs->tf_ecx; 474 frame.sf_sc.sc_eax = regs->tf_eax; 475 frame.sf_sc.sc_eip = regs->tf_eip; 476 frame.sf_sc.sc_cs = regs->tf_cs; 477 frame.sf_sc.sc_eflags = regs->tf_eflags; 478 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 479 frame.sf_sc.sc_ss = regs->tf_ss; 480 frame.sf_sc.sc_err = regs->tf_err; 481 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 482 483 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 484 frame.sf_extramask[i] = lmask.__bits[i+1]; 485 486 if (copyout(&frame, fp, sizeof(frame)) != 0) { 487 /* 488 * Process has trashed its stack; give it an illegal 489 * instruction to halt it in its tracks. 490 */ 491 PROC_LOCK(p); 492 sigexit(td, SIGILL); 493 } 494 495 /* 496 * Build context to run handler in. 497 */ 498 regs->tf_esp = (int)fp; 499 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 500 regs->tf_eflags &= ~(PSL_T | PSL_VM); 501 regs->tf_cs = _ucodesel; 502 regs->tf_ds = _udatasel; 503 regs->tf_es = _udatasel; 504 regs->tf_fs = _udatasel; 505 regs->tf_ss = _udatasel; 506 PROC_LOCK(p); 507 mtx_lock(&psp->ps_mtx); 508 } 509 510 /* 511 * System call to cleanup state after a signal 512 * has been taken. Reset signal mask and 513 * stack state from context left by sendsig (above). 514 * Return to previous pc and psl as specified by 515 * context left by sendsig. Check carefully to 516 * make sure that the user has not modified the 517 * psl to gain improper privileges or to cause 518 * a machine fault. 519 */ 520 int 521 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 522 { 523 struct proc *p = td->td_proc; 524 struct l_sigframe frame; 525 struct trapframe *regs; 526 l_sigset_t lmask; 527 int eflags, i; 528 529 regs = td->td_frame; 530 531 #ifdef DEBUG 532 if (ldebug(sigreturn)) 533 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 534 #endif 535 /* 536 * The trampoline code hands us the sigframe. 537 * It is unsafe to keep track of it ourselves, in the event that a 538 * program jumps out of a signal handler. 539 */ 540 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 541 return (EFAULT); 542 543 /* 544 * Check for security violations. 545 */ 546 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 547 eflags = frame.sf_sc.sc_eflags; 548 /* 549 * XXX do allow users to change the privileged flag PSL_RF. The 550 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 551 * sometimes set it there too. tf_eflags is kept in the signal 552 * context during signal handling and there is no other place 553 * to remember it, so the PSL_RF bit may be corrupted by the 554 * signal handler without us knowing. Corruption of the PSL_RF 555 * bit at worst causes one more or one less debugger trap, so 556 * allowing it is fairly harmless. 557 */ 558 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 559 return(EINVAL); 560 561 /* 562 * Don't allow users to load a valid privileged %cs. Let the 563 * hardware check for invalid selectors, excess privilege in 564 * other selectors, invalid %eip's and invalid %esp's. 565 */ 566 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 567 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 568 trapsignal(td, SIGBUS, T_PROTFLT); 569 return(EINVAL); 570 } 571 572 lmask.__bits[0] = frame.sf_sc.sc_mask; 573 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 574 lmask.__bits[i+1] = frame.sf_extramask[i]; 575 PROC_LOCK(p); 576 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 577 SIG_CANTMASK(td->td_sigmask); 578 signotify(td); 579 PROC_UNLOCK(p); 580 581 /* 582 * Restore signal context. 583 */ 584 /* %gs was restored by the trampoline. */ 585 regs->tf_fs = frame.sf_sc.sc_fs; 586 regs->tf_es = frame.sf_sc.sc_es; 587 regs->tf_ds = frame.sf_sc.sc_ds; 588 regs->tf_edi = frame.sf_sc.sc_edi; 589 regs->tf_esi = frame.sf_sc.sc_esi; 590 regs->tf_ebp = frame.sf_sc.sc_ebp; 591 regs->tf_ebx = frame.sf_sc.sc_ebx; 592 regs->tf_edx = frame.sf_sc.sc_edx; 593 regs->tf_ecx = frame.sf_sc.sc_ecx; 594 regs->tf_eax = frame.sf_sc.sc_eax; 595 regs->tf_eip = frame.sf_sc.sc_eip; 596 regs->tf_cs = frame.sf_sc.sc_cs; 597 regs->tf_eflags = eflags; 598 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 599 regs->tf_ss = frame.sf_sc.sc_ss; 600 601 return (EJUSTRETURN); 602 } 603 604 /* 605 * System call to cleanup state after a signal 606 * has been taken. Reset signal mask and 607 * stack state from context left by rt_sendsig (above). 608 * Return to previous pc and psl as specified by 609 * context left by sendsig. Check carefully to 610 * make sure that the user has not modified the 611 * psl to gain improper privileges or to cause 612 * a machine fault. 613 */ 614 int 615 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 616 { 617 struct proc *p = td->td_proc; 618 struct l_ucontext uc; 619 struct l_sigcontext *context; 620 l_stack_t *lss; 621 stack_t ss; 622 struct trapframe *regs; 623 int eflags; 624 625 regs = td->td_frame; 626 627 #ifdef DEBUG 628 if (ldebug(rt_sigreturn)) 629 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 630 #endif 631 /* 632 * The trampoline code hands us the ucontext. 633 * It is unsafe to keep track of it ourselves, in the event that a 634 * program jumps out of a signal handler. 635 */ 636 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 637 return (EFAULT); 638 639 context = &uc.uc_mcontext; 640 641 /* 642 * Check for security violations. 643 */ 644 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 645 eflags = context->sc_eflags; 646 /* 647 * XXX do allow users to change the privileged flag PSL_RF. The 648 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 649 * sometimes set it there too. tf_eflags is kept in the signal 650 * context during signal handling and there is no other place 651 * to remember it, so the PSL_RF bit may be corrupted by the 652 * signal handler without us knowing. Corruption of the PSL_RF 653 * bit at worst causes one more or one less debugger trap, so 654 * allowing it is fairly harmless. 655 */ 656 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 657 return(EINVAL); 658 659 /* 660 * Don't allow users to load a valid privileged %cs. Let the 661 * hardware check for invalid selectors, excess privilege in 662 * other selectors, invalid %eip's and invalid %esp's. 663 */ 664 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 665 if (!CS_SECURE(context->sc_cs)) { 666 trapsignal(td, SIGBUS, T_PROTFLT); 667 return(EINVAL); 668 } 669 670 PROC_LOCK(p); 671 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 672 SIG_CANTMASK(td->td_sigmask); 673 signotify(td); 674 PROC_UNLOCK(p); 675 676 /* 677 * Restore signal context 678 */ 679 /* %gs was restored by the trampoline. */ 680 regs->tf_fs = context->sc_fs; 681 regs->tf_es = context->sc_es; 682 regs->tf_ds = context->sc_ds; 683 regs->tf_edi = context->sc_edi; 684 regs->tf_esi = context->sc_esi; 685 regs->tf_ebp = context->sc_ebp; 686 regs->tf_ebx = context->sc_ebx; 687 regs->tf_edx = context->sc_edx; 688 regs->tf_ecx = context->sc_ecx; 689 regs->tf_eax = context->sc_eax; 690 regs->tf_eip = context->sc_eip; 691 regs->tf_cs = context->sc_cs; 692 regs->tf_eflags = eflags; 693 regs->tf_esp = context->sc_esp_at_signal; 694 regs->tf_ss = context->sc_ss; 695 696 /* 697 * call sigaltstack & ignore results.. 698 */ 699 lss = &uc.uc_stack; 700 ss.ss_sp = lss->ss_sp; 701 ss.ss_size = lss->ss_size; 702 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 703 704 #ifdef DEBUG 705 if (ldebug(rt_sigreturn)) 706 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 707 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 708 #endif 709 (void)kern_sigaltstack(td, &ss, NULL); 710 711 return (EJUSTRETURN); 712 } 713 714 /* 715 * MPSAFE 716 */ 717 static void 718 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 719 { 720 args[0] = tf->tf_ebx; 721 args[1] = tf->tf_ecx; 722 args[2] = tf->tf_edx; 723 args[3] = tf->tf_esi; 724 args[4] = tf->tf_edi; 725 args[5] = tf->tf_ebp; /* Unconfirmed */ 726 *params = NULL; /* no copyin */ 727 } 728 729 730 731 /* 732 * Dump core, into a file named as described in the comments for 733 * expand_name(), unless the process was setuid/setgid. 734 */ 735 static int 736 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 737 { 738 struct proc *p = td->td_proc; 739 struct ucred *cred = td->td_ucred; 740 struct vmspace *vm = p->p_vmspace; 741 char *tempuser; 742 int error; 743 744 if (ctob((uarea_pages + kstack_pages) + 745 vm->vm_dsize + vm->vm_ssize) >= limit) 746 return (EFAULT); 747 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 748 M_WAITOK | M_ZERO); 749 if (tempuser == NULL) 750 return (ENOMEM); 751 PROC_LOCK(p); 752 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 753 PROC_UNLOCK(p); 754 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 755 bcopy(td->td_frame, 756 tempuser + ctob(uarea_pages) + 757 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 758 sizeof(struct trapframe)); 759 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 760 ctob(uarea_pages + kstack_pages), 761 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 762 (int *)NULL, td); 763 free(tempuser, M_TEMP); 764 if (error == 0) 765 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 766 (int)ctob(vm->vm_dsize), 767 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 768 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 769 if (error == 0) 770 error = vn_rdwr_inchunks(UIO_WRITE, vp, 771 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 772 round_page(ctob(vm->vm_ssize)), 773 (off_t)ctob(uarea_pages + kstack_pages) + 774 ctob(vm->vm_dsize), UIO_USERSPACE, 775 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 776 return (error); 777 } 778 /* 779 * If a linux binary is exec'ing something, try this image activator 780 * first. We override standard shell script execution in order to 781 * be able to modify the interpreter path. We only do this if a linux 782 * binary is doing the exec, so we do not create an EXEC module for it. 783 */ 784 static int exec_linux_imgact_try(struct image_params *iparams); 785 786 static int 787 exec_linux_imgact_try(struct image_params *imgp) 788 { 789 const char *head = (const char *)imgp->image_header; 790 int error = -1; 791 792 /* 793 * The interpreter for shell scripts run from a linux binary needs 794 * to be located in /compat/linux if possible in order to recursively 795 * maintain linux path emulation. 796 */ 797 if (((const short *)head)[0] == SHELLMAGIC) { 798 /* 799 * Run our normal shell image activator. If it succeeds attempt 800 * to use the alternate path for the interpreter. If an alternate 801 * path is found, use our stringspace to store it. 802 */ 803 if ((error = exec_shell_imgact(imgp)) == 0) { 804 char *rpath = NULL; 805 806 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 807 imgp->interpreter_name, &rpath, 0); 808 if (rpath != imgp->interpreter_name) { 809 int len = strlen(rpath) + 1; 810 811 if (len <= MAXSHELLCMDLEN) { 812 memcpy(imgp->interpreter_name, rpath, len); 813 } 814 free(rpath, M_TEMP); 815 } 816 } 817 } 818 return(error); 819 } 820 821 /* 822 * exec_setregs may initialize some registers differently than Linux 823 * does, thus potentially confusing Linux binaries. If necessary, we 824 * override the exec_setregs default(s) here. 825 */ 826 static void 827 exec_linux_setregs(struct thread *td, u_long entry, 828 u_long stack, u_long ps_strings) 829 { 830 struct pcb *pcb = td->td_pcb; 831 832 exec_setregs(td, entry, stack, ps_strings); 833 834 /* Linux sets %gs to 0, we default to _udatasel */ 835 pcb->pcb_gs = 0; load_gs(0); 836 } 837 838 struct sysentvec linux_sysvec = { 839 LINUX_SYS_MAXSYSCALL, 840 linux_sysent, 841 0xff, 842 LINUX_SIGTBLSZ, 843 bsd_to_linux_signal, 844 ELAST + 1, 845 bsd_to_linux_errno, 846 translate_traps, 847 linux_fixup, 848 linux_sendsig, 849 linux_sigcode, 850 &linux_szsigcode, 851 linux_prepsyscall, 852 "Linux a.out", 853 linux_aout_coredump, 854 exec_linux_imgact_try, 855 LINUX_MINSIGSTKSZ, 856 PAGE_SIZE, 857 VM_MIN_ADDRESS, 858 VM_MAXUSER_ADDRESS, 859 USRSTACK, 860 PS_STRINGS, 861 VM_PROT_ALL, 862 exec_copyout_strings, 863 exec_linux_setregs 864 }; 865 866 struct sysentvec elf_linux_sysvec = { 867 LINUX_SYS_MAXSYSCALL, 868 linux_sysent, 869 0xff, 870 LINUX_SIGTBLSZ, 871 bsd_to_linux_signal, 872 ELAST + 1, 873 bsd_to_linux_errno, 874 translate_traps, 875 elf_linux_fixup, 876 linux_sendsig, 877 linux_sigcode, 878 &linux_szsigcode, 879 linux_prepsyscall, 880 "Linux ELF", 881 elf32_coredump, 882 exec_linux_imgact_try, 883 LINUX_MINSIGSTKSZ, 884 PAGE_SIZE, 885 VM_MIN_ADDRESS, 886 VM_MAXUSER_ADDRESS, 887 USRSTACK, 888 PS_STRINGS, 889 VM_PROT_ALL, 890 exec_copyout_strings, 891 exec_linux_setregs 892 }; 893 894 static Elf32_Brandinfo linux_brand = { 895 ELFOSABI_LINUX, 896 EM_386, 897 "Linux", 898 "/compat/linux", 899 "/lib/ld-linux.so.1", 900 &elf_linux_sysvec 901 }; 902 903 static Elf32_Brandinfo linux_glibc2brand = { 904 ELFOSABI_LINUX, 905 EM_386, 906 "Linux", 907 "/compat/linux", 908 "/lib/ld-linux.so.2", 909 &elf_linux_sysvec 910 }; 911 912 Elf32_Brandinfo *linux_brandlist[] = { 913 &linux_brand, 914 &linux_glibc2brand, 915 NULL 916 }; 917 918 static int 919 linux_elf_modevent(module_t mod, int type, void *data) 920 { 921 Elf32_Brandinfo **brandinfo; 922 int error; 923 struct linux_ioctl_handler **lihp; 924 925 error = 0; 926 927 switch(type) { 928 case MOD_LOAD: 929 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 930 ++brandinfo) 931 if (elf32_insert_brand_entry(*brandinfo) < 0) 932 error = EINVAL; 933 if (error == 0) { 934 SET_FOREACH(lihp, linux_ioctl_handler_set) 935 linux_ioctl_register_handler(*lihp); 936 if (bootverbose) 937 printf("Linux ELF exec handler installed\n"); 938 } else 939 printf("cannot insert Linux ELF brand handler\n"); 940 break; 941 case MOD_UNLOAD: 942 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 943 ++brandinfo) 944 if (elf32_brand_inuse(*brandinfo)) 945 error = EBUSY; 946 if (error == 0) { 947 for (brandinfo = &linux_brandlist[0]; 948 *brandinfo != NULL; ++brandinfo) 949 if (elf32_remove_brand_entry(*brandinfo) < 0) 950 error = EINVAL; 951 } 952 if (error == 0) { 953 SET_FOREACH(lihp, linux_ioctl_handler_set) 954 linux_ioctl_unregister_handler(*lihp); 955 if (bootverbose) 956 printf("Linux ELF exec handler removed\n"); 957 linux_mib_destroy(); 958 } else 959 printf("Could not deinstall ELF interpreter entry\n"); 960 break; 961 default: 962 break; 963 } 964 return error; 965 } 966 967 static moduledata_t linux_elf_mod = { 968 "linuxelf", 969 linux_elf_modevent, 970 0 971 }; 972 973 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 974