1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* XXX we use functions that might not exist. */ 33 #include "opt_compat.h" 34 35 #ifndef COMPAT_43 36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/exec.h> 42 #include <sys/imgact.h> 43 #include <sys/imgact_aout.h> 44 #include <sys/imgact_elf.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/module.h> 49 #include <sys/mutex.h> 50 #include <sys/proc.h> 51 #include <sys/signalvar.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/user.h> 56 #include <sys/vnode.h> 57 58 #include <vm/vm.h> 59 #include <vm/pmap.h> 60 #include <vm/vm_extern.h> 61 #include <vm/vm_map.h> 62 #include <vm/vm_object.h> 63 #include <vm/vm_page.h> 64 #include <vm/vm_param.h> 65 66 #include <machine/cpu.h> 67 #include <machine/md_var.h> 68 69 #include <i386/linux/linux.h> 70 #include <i386/linux/linux_proto.h> 71 #include <compat/linux/linux_mib.h> 72 #include <compat/linux/linux_signal.h> 73 #include <compat/linux/linux_util.h> 74 75 MODULE_VERSION(linux, 1); 76 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 77 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 78 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 79 80 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 81 82 #if BYTE_ORDER == LITTLE_ENDIAN 83 #define SHELLMAGIC 0x2123 /* #! */ 84 #else 85 #define SHELLMAGIC 0x2321 86 #endif 87 88 /* 89 * Allow the sendsig functions to use the ldebug() facility 90 * even though they are not syscalls themselves. Map them 91 * to syscall 0. This is slightly less bogus than using 92 * ldebug(sigreturn). 93 */ 94 #define LINUX_SYS_linux_rt_sendsig 0 95 #define LINUX_SYS_linux_sendsig 0 96 97 extern char linux_sigcode[]; 98 extern int linux_szsigcode; 99 100 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 101 102 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 103 104 static int linux_fixup(register_t **stack_base, 105 struct image_params *iparams); 106 static int elf_linux_fixup(register_t **stack_base, 107 struct image_params *iparams); 108 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 109 caddr_t *params); 110 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 111 u_long code); 112 static void exec_linux_setregs(struct thread *td, u_long entry, 113 u_long stack, u_long ps_strings); 114 115 /* 116 * Linux syscalls return negative errno's, we do positive and map them 117 */ 118 static int bsd_to_linux_errno[ELAST + 1] = { 119 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 120 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 121 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 122 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 123 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 124 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 125 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 126 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 127 -6, -6, -43, -42, -75, -6, -84 128 }; 129 130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 131 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 132 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 133 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 134 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 135 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 136 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 137 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 138 0, LINUX_SIGUSR1, LINUX_SIGUSR2 139 }; 140 141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 142 SIGHUP, SIGINT, SIGQUIT, SIGILL, 143 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 144 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 145 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 146 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 147 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 148 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 149 SIGIO, SIGURG, SIGSYS 150 }; 151 152 #define LINUX_T_UNKNOWN 255 153 static int _bsd_to_linux_trapcode[] = { 154 LINUX_T_UNKNOWN, /* 0 */ 155 6, /* 1 T_PRIVINFLT */ 156 LINUX_T_UNKNOWN, /* 2 */ 157 3, /* 3 T_BPTFLT */ 158 LINUX_T_UNKNOWN, /* 4 */ 159 LINUX_T_UNKNOWN, /* 5 */ 160 16, /* 6 T_ARITHTRAP */ 161 254, /* 7 T_ASTFLT */ 162 LINUX_T_UNKNOWN, /* 8 */ 163 13, /* 9 T_PROTFLT */ 164 1, /* 10 T_TRCTRAP */ 165 LINUX_T_UNKNOWN, /* 11 */ 166 14, /* 12 T_PAGEFLT */ 167 LINUX_T_UNKNOWN, /* 13 */ 168 17, /* 14 T_ALIGNFLT */ 169 LINUX_T_UNKNOWN, /* 15 */ 170 LINUX_T_UNKNOWN, /* 16 */ 171 LINUX_T_UNKNOWN, /* 17 */ 172 0, /* 18 T_DIVIDE */ 173 2, /* 19 T_NMI */ 174 4, /* 20 T_OFLOW */ 175 5, /* 21 T_BOUND */ 176 7, /* 22 T_DNA */ 177 8, /* 23 T_DOUBLEFLT */ 178 9, /* 24 T_FPOPFLT */ 179 10, /* 25 T_TSSFLT */ 180 11, /* 26 T_SEGNPFLT */ 181 12, /* 27 T_STKFLT */ 182 18, /* 28 T_MCHK */ 183 19, /* 29 T_XMMFLT */ 184 15 /* 30 T_RESERVED */ 185 }; 186 #define bsd_to_linux_trapcode(code) \ 187 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 188 _bsd_to_linux_trapcode[(code)]: \ 189 LINUX_T_UNKNOWN) 190 191 /* 192 * If FreeBSD & Linux have a difference of opinion about what a trap 193 * means, deal with it here. 194 * 195 * MPSAFE 196 */ 197 static int 198 translate_traps(int signal, int trap_code) 199 { 200 if (signal != SIGBUS) 201 return signal; 202 switch (trap_code) { 203 case T_PROTFLT: 204 case T_TSSFLT: 205 case T_DOUBLEFLT: 206 case T_PAGEFLT: 207 return SIGSEGV; 208 default: 209 return signal; 210 } 211 } 212 213 static int 214 linux_fixup(register_t **stack_base, struct image_params *imgp) 215 { 216 register_t *argv, *envp; 217 218 argv = *stack_base; 219 envp = *stack_base + (imgp->argc + 1); 220 (*stack_base)--; 221 **stack_base = (intptr_t)(void *)envp; 222 (*stack_base)--; 223 **stack_base = (intptr_t)(void *)argv; 224 (*stack_base)--; 225 **stack_base = imgp->argc; 226 return 0; 227 } 228 229 static int 230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 231 { 232 Elf32_Auxargs *args; 233 register_t *pos; 234 235 KASSERT(curthread->td_proc == imgp->proc && 236 (curthread->td_proc->p_flag & P_SA) == 0, 237 ("unsafe elf_linux_fixup(), should be curproc")); 238 args = (Elf32_Auxargs *)imgp->auxargs; 239 pos = *stack_base + (imgp->argc + imgp->envc + 2); 240 241 if (args->trace) 242 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 243 if (args->execfd != -1) 244 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 245 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 246 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 247 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 248 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 249 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 250 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 251 AUXARGS_ENTRY(pos, AT_BASE, args->base); 252 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 253 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 254 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 255 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 256 AUXARGS_ENTRY(pos, AT_NULL, 0); 257 258 free(imgp->auxargs, M_TEMP); 259 imgp->auxargs = NULL; 260 261 (*stack_base)--; 262 **stack_base = (register_t)imgp->argc; 263 return 0; 264 } 265 266 extern int _ucodesel, _udatasel; 267 extern unsigned long linux_sznonrtsigcode; 268 269 static void 270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 271 { 272 struct thread *td = curthread; 273 struct proc *p = td->td_proc; 274 struct sigacts *psp; 275 struct trapframe *regs; 276 struct l_rt_sigframe *fp, frame; 277 int oonstack; 278 279 PROC_LOCK_ASSERT(p, MA_OWNED); 280 psp = p->p_sigacts; 281 mtx_assert(&psp->ps_mtx, MA_OWNED); 282 regs = td->td_frame; 283 oonstack = sigonstack(regs->tf_esp); 284 285 #ifdef DEBUG 286 if (ldebug(rt_sendsig)) 287 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 288 catcher, sig, (void*)mask, code); 289 #endif 290 /* 291 * Allocate space for the signal handler context. 292 */ 293 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 294 SIGISMEMBER(psp->ps_sigonstack, sig)) { 295 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 296 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 297 } else 298 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 299 mtx_unlock(&psp->ps_mtx); 300 301 /* 302 * Build the argument list for the signal handler. 303 */ 304 if (p->p_sysent->sv_sigtbl) 305 if (sig <= p->p_sysent->sv_sigsize) 306 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 307 308 bzero(&frame, sizeof(frame)); 309 310 frame.sf_handler = catcher; 311 frame.sf_sig = sig; 312 frame.sf_siginfo = &fp->sf_si; 313 frame.sf_ucontext = &fp->sf_sc; 314 315 /* Fill in POSIX parts */ 316 frame.sf_si.lsi_signo = sig; 317 frame.sf_si.lsi_code = code; 318 frame.sf_si.lsi_addr = (void *)regs->tf_err; 319 320 /* 321 * Build the signal context to be used by sigreturn. 322 */ 323 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 324 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 325 326 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 327 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 328 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 329 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 330 PROC_UNLOCK(p); 331 332 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 333 334 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 335 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 336 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 337 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 338 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 339 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 340 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 341 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 342 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 343 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 344 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 345 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 346 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 347 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 348 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 349 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 350 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 351 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 352 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 353 354 #ifdef DEBUG 355 if (ldebug(rt_sendsig)) 356 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 357 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 358 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 359 #endif 360 361 if (copyout(&frame, fp, sizeof(frame)) != 0) { 362 /* 363 * Process has trashed its stack; give it an illegal 364 * instruction to halt it in its tracks. 365 */ 366 #ifdef DEBUG 367 if (ldebug(rt_sendsig)) 368 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 369 fp, oonstack); 370 #endif 371 PROC_LOCK(p); 372 sigexit(td, SIGILL); 373 } 374 375 /* 376 * Build context to run handler in. 377 */ 378 regs->tf_esp = (int)fp; 379 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 380 linux_sznonrtsigcode; 381 regs->tf_eflags &= ~(PSL_T | PSL_VM); 382 regs->tf_cs = _ucodesel; 383 regs->tf_ds = _udatasel; 384 regs->tf_es = _udatasel; 385 regs->tf_fs = _udatasel; 386 regs->tf_ss = _udatasel; 387 PROC_LOCK(p); 388 mtx_lock(&psp->ps_mtx); 389 } 390 391 392 /* 393 * Send an interrupt to process. 394 * 395 * Stack is set up to allow sigcode stored 396 * in u. to call routine, followed by kcall 397 * to sigreturn routine below. After sigreturn 398 * resets the signal mask, the stack, and the 399 * frame pointer, it returns to the user 400 * specified pc, psl. 401 */ 402 static void 403 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 404 { 405 struct thread *td = curthread; 406 struct proc *p = td->td_proc; 407 struct sigacts *psp; 408 struct trapframe *regs; 409 struct l_sigframe *fp, frame; 410 l_sigset_t lmask; 411 int oonstack, i; 412 413 PROC_LOCK_ASSERT(p, MA_OWNED); 414 psp = p->p_sigacts; 415 mtx_assert(&psp->ps_mtx, MA_OWNED); 416 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 417 /* Signal handler installed with SA_SIGINFO. */ 418 linux_rt_sendsig(catcher, sig, mask, code); 419 return; 420 } 421 422 regs = td->td_frame; 423 oonstack = sigonstack(regs->tf_esp); 424 425 #ifdef DEBUG 426 if (ldebug(sendsig)) 427 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 428 catcher, sig, (void*)mask, code); 429 #endif 430 431 /* 432 * Allocate space for the signal handler context. 433 */ 434 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 435 SIGISMEMBER(psp->ps_sigonstack, sig)) { 436 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 437 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 438 } else 439 fp = (struct l_sigframe *)regs->tf_esp - 1; 440 mtx_unlock(&psp->ps_mtx); 441 PROC_UNLOCK(p); 442 443 /* 444 * Build the argument list for the signal handler. 445 */ 446 if (p->p_sysent->sv_sigtbl) 447 if (sig <= p->p_sysent->sv_sigsize) 448 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 449 450 bzero(&frame, sizeof(frame)); 451 452 frame.sf_handler = catcher; 453 frame.sf_sig = sig; 454 455 bsd_to_linux_sigset(mask, &lmask); 456 457 /* 458 * Build the signal context to be used by sigreturn. 459 */ 460 frame.sf_sc.sc_mask = lmask.__bits[0]; 461 frame.sf_sc.sc_gs = rgs(); 462 frame.sf_sc.sc_fs = regs->tf_fs; 463 frame.sf_sc.sc_es = regs->tf_es; 464 frame.sf_sc.sc_ds = regs->tf_ds; 465 frame.sf_sc.sc_edi = regs->tf_edi; 466 frame.sf_sc.sc_esi = regs->tf_esi; 467 frame.sf_sc.sc_ebp = regs->tf_ebp; 468 frame.sf_sc.sc_ebx = regs->tf_ebx; 469 frame.sf_sc.sc_edx = regs->tf_edx; 470 frame.sf_sc.sc_ecx = regs->tf_ecx; 471 frame.sf_sc.sc_eax = regs->tf_eax; 472 frame.sf_sc.sc_eip = regs->tf_eip; 473 frame.sf_sc.sc_cs = regs->tf_cs; 474 frame.sf_sc.sc_eflags = regs->tf_eflags; 475 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 476 frame.sf_sc.sc_ss = regs->tf_ss; 477 frame.sf_sc.sc_err = regs->tf_err; 478 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 479 480 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 481 frame.sf_extramask[i] = lmask.__bits[i+1]; 482 483 if (copyout(&frame, fp, sizeof(frame)) != 0) { 484 /* 485 * Process has trashed its stack; give it an illegal 486 * instruction to halt it in its tracks. 487 */ 488 PROC_LOCK(p); 489 sigexit(td, SIGILL); 490 } 491 492 /* 493 * Build context to run handler in. 494 */ 495 regs->tf_esp = (int)fp; 496 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 497 regs->tf_eflags &= ~(PSL_T | PSL_VM); 498 regs->tf_cs = _ucodesel; 499 regs->tf_ds = _udatasel; 500 regs->tf_es = _udatasel; 501 regs->tf_fs = _udatasel; 502 regs->tf_ss = _udatasel; 503 PROC_LOCK(p); 504 mtx_lock(&psp->ps_mtx); 505 } 506 507 /* 508 * System call to cleanup state after a signal 509 * has been taken. Reset signal mask and 510 * stack state from context left by sendsig (above). 511 * Return to previous pc and psl as specified by 512 * context left by sendsig. Check carefully to 513 * make sure that the user has not modified the 514 * psl to gain improper privileges or to cause 515 * a machine fault. 516 */ 517 int 518 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 519 { 520 struct proc *p = td->td_proc; 521 struct l_sigframe frame; 522 struct trapframe *regs; 523 l_sigset_t lmask; 524 int eflags, i; 525 526 regs = td->td_frame; 527 528 #ifdef DEBUG 529 if (ldebug(sigreturn)) 530 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 531 #endif 532 /* 533 * The trampoline code hands us the sigframe. 534 * It is unsafe to keep track of it ourselves, in the event that a 535 * program jumps out of a signal handler. 536 */ 537 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 538 return (EFAULT); 539 540 /* 541 * Check for security violations. 542 */ 543 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 544 eflags = frame.sf_sc.sc_eflags; 545 /* 546 * XXX do allow users to change the privileged flag PSL_RF. The 547 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 548 * sometimes set it there too. tf_eflags is kept in the signal 549 * context during signal handling and there is no other place 550 * to remember it, so the PSL_RF bit may be corrupted by the 551 * signal handler without us knowing. Corruption of the PSL_RF 552 * bit at worst causes one more or one less debugger trap, so 553 * allowing it is fairly harmless. 554 */ 555 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 556 return(EINVAL); 557 558 /* 559 * Don't allow users to load a valid privileged %cs. Let the 560 * hardware check for invalid selectors, excess privilege in 561 * other selectors, invalid %eip's and invalid %esp's. 562 */ 563 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 564 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 565 trapsignal(td, SIGBUS, T_PROTFLT); 566 return(EINVAL); 567 } 568 569 lmask.__bits[0] = frame.sf_sc.sc_mask; 570 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 571 lmask.__bits[i+1] = frame.sf_extramask[i]; 572 PROC_LOCK(p); 573 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 574 SIG_CANTMASK(td->td_sigmask); 575 signotify(td); 576 PROC_UNLOCK(p); 577 578 /* 579 * Restore signal context. 580 */ 581 /* %gs was restored by the trampoline. */ 582 regs->tf_fs = frame.sf_sc.sc_fs; 583 regs->tf_es = frame.sf_sc.sc_es; 584 regs->tf_ds = frame.sf_sc.sc_ds; 585 regs->tf_edi = frame.sf_sc.sc_edi; 586 regs->tf_esi = frame.sf_sc.sc_esi; 587 regs->tf_ebp = frame.sf_sc.sc_ebp; 588 regs->tf_ebx = frame.sf_sc.sc_ebx; 589 regs->tf_edx = frame.sf_sc.sc_edx; 590 regs->tf_ecx = frame.sf_sc.sc_ecx; 591 regs->tf_eax = frame.sf_sc.sc_eax; 592 regs->tf_eip = frame.sf_sc.sc_eip; 593 regs->tf_cs = frame.sf_sc.sc_cs; 594 regs->tf_eflags = eflags; 595 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 596 regs->tf_ss = frame.sf_sc.sc_ss; 597 598 return (EJUSTRETURN); 599 } 600 601 /* 602 * System call to cleanup state after a signal 603 * has been taken. Reset signal mask and 604 * stack state from context left by rt_sendsig (above). 605 * Return to previous pc and psl as specified by 606 * context left by sendsig. Check carefully to 607 * make sure that the user has not modified the 608 * psl to gain improper privileges or to cause 609 * a machine fault. 610 */ 611 int 612 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 613 { 614 struct proc *p = td->td_proc; 615 struct l_ucontext uc; 616 struct l_sigcontext *context; 617 l_stack_t *lss; 618 stack_t ss; 619 struct trapframe *regs; 620 int eflags; 621 622 regs = td->td_frame; 623 624 #ifdef DEBUG 625 if (ldebug(rt_sigreturn)) 626 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 627 #endif 628 /* 629 * The trampoline code hands us the ucontext. 630 * It is unsafe to keep track of it ourselves, in the event that a 631 * program jumps out of a signal handler. 632 */ 633 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 634 return (EFAULT); 635 636 context = &uc.uc_mcontext; 637 638 /* 639 * Check for security violations. 640 */ 641 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 642 eflags = context->sc_eflags; 643 /* 644 * XXX do allow users to change the privileged flag PSL_RF. The 645 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 646 * sometimes set it there too. tf_eflags is kept in the signal 647 * context during signal handling and there is no other place 648 * to remember it, so the PSL_RF bit may be corrupted by the 649 * signal handler without us knowing. Corruption of the PSL_RF 650 * bit at worst causes one more or one less debugger trap, so 651 * allowing it is fairly harmless. 652 */ 653 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 654 return(EINVAL); 655 656 /* 657 * Don't allow users to load a valid privileged %cs. Let the 658 * hardware check for invalid selectors, excess privilege in 659 * other selectors, invalid %eip's and invalid %esp's. 660 */ 661 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 662 if (!CS_SECURE(context->sc_cs)) { 663 trapsignal(td, SIGBUS, T_PROTFLT); 664 return(EINVAL); 665 } 666 667 PROC_LOCK(p); 668 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 669 SIG_CANTMASK(td->td_sigmask); 670 signotify(td); 671 PROC_UNLOCK(p); 672 673 /* 674 * Restore signal context 675 */ 676 /* %gs was restored by the trampoline. */ 677 regs->tf_fs = context->sc_fs; 678 regs->tf_es = context->sc_es; 679 regs->tf_ds = context->sc_ds; 680 regs->tf_edi = context->sc_edi; 681 regs->tf_esi = context->sc_esi; 682 regs->tf_ebp = context->sc_ebp; 683 regs->tf_ebx = context->sc_ebx; 684 regs->tf_edx = context->sc_edx; 685 regs->tf_ecx = context->sc_ecx; 686 regs->tf_eax = context->sc_eax; 687 regs->tf_eip = context->sc_eip; 688 regs->tf_cs = context->sc_cs; 689 regs->tf_eflags = eflags; 690 regs->tf_esp = context->sc_esp_at_signal; 691 regs->tf_ss = context->sc_ss; 692 693 /* 694 * call sigaltstack & ignore results.. 695 */ 696 lss = &uc.uc_stack; 697 ss.ss_sp = lss->ss_sp; 698 ss.ss_size = lss->ss_size; 699 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 700 701 #ifdef DEBUG 702 if (ldebug(rt_sigreturn)) 703 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 704 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 705 #endif 706 (void)kern_sigaltstack(td, &ss, NULL); 707 708 return (EJUSTRETURN); 709 } 710 711 /* 712 * MPSAFE 713 */ 714 static void 715 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 716 { 717 args[0] = tf->tf_ebx; 718 args[1] = tf->tf_ecx; 719 args[2] = tf->tf_edx; 720 args[3] = tf->tf_esi; 721 args[4] = tf->tf_edi; 722 args[5] = tf->tf_ebp; /* Unconfirmed */ 723 *params = NULL; /* no copyin */ 724 } 725 726 727 728 /* 729 * Dump core, into a file named as described in the comments for 730 * expand_name(), unless the process was setuid/setgid. 731 */ 732 static int 733 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 734 { 735 struct proc *p = td->td_proc; 736 struct ucred *cred = td->td_ucred; 737 struct vmspace *vm = p->p_vmspace; 738 char *tempuser; 739 int error; 740 741 if (ctob((uarea_pages + kstack_pages) + 742 vm->vm_dsize + vm->vm_ssize) >= limit) 743 return (EFAULT); 744 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 745 M_WAITOK | M_ZERO); 746 if (tempuser == NULL) 747 return (ENOMEM); 748 PROC_LOCK(p); 749 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 750 PROC_UNLOCK(p); 751 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 752 bcopy(td->td_frame, 753 tempuser + ctob(uarea_pages) + 754 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 755 sizeof(struct trapframe)); 756 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 757 ctob(uarea_pages + kstack_pages), 758 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 759 (int *)NULL, td); 760 free(tempuser, M_TEMP); 761 if (error == 0) 762 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 763 (int)ctob(vm->vm_dsize), 764 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 765 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 766 if (error == 0) 767 error = vn_rdwr_inchunks(UIO_WRITE, vp, 768 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 769 round_page(ctob(vm->vm_ssize)), 770 (off_t)ctob(uarea_pages + kstack_pages) + 771 ctob(vm->vm_dsize), UIO_USERSPACE, 772 IO_UNIT | IO_DIRECT, cred, NOCRED, NULL, td); 773 return (error); 774 } 775 /* 776 * If a linux binary is exec'ing something, try this image activator 777 * first. We override standard shell script execution in order to 778 * be able to modify the interpreter path. We only do this if a linux 779 * binary is doing the exec, so we do not create an EXEC module for it. 780 */ 781 static int exec_linux_imgact_try(struct image_params *iparams); 782 783 static int 784 exec_linux_imgact_try(struct image_params *imgp) 785 { 786 const char *head = (const char *)imgp->image_header; 787 int error = -1; 788 789 /* 790 * The interpreter for shell scripts run from a linux binary needs 791 * to be located in /compat/linux if possible in order to recursively 792 * maintain linux path emulation. 793 */ 794 if (((const short *)head)[0] == SHELLMAGIC) { 795 /* 796 * Run our normal shell image activator. If it succeeds attempt 797 * to use the alternate path for the interpreter. If an alternate 798 * path is found, use our stringspace to store it. 799 */ 800 if ((error = exec_shell_imgact(imgp)) == 0) { 801 char *rpath = NULL; 802 803 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 804 imgp->interpreter_name, &rpath, 0); 805 if (rpath != imgp->interpreter_name) { 806 int len = strlen(rpath) + 1; 807 808 if (len <= MAXSHELLCMDLEN) { 809 memcpy(imgp->interpreter_name, rpath, len); 810 } 811 free(rpath, M_TEMP); 812 } 813 } 814 } 815 return(error); 816 } 817 818 /* 819 * exec_setregs may initialize some registers differently than Linux 820 * does, thus potentially confusing Linux binaries. If necessary, we 821 * override the exec_setregs default(s) here. 822 */ 823 static void 824 exec_linux_setregs(struct thread *td, u_long entry, 825 u_long stack, u_long ps_strings) 826 { 827 struct pcb *pcb = td->td_pcb; 828 829 exec_setregs(td, entry, stack, ps_strings); 830 831 /* Linux sets %gs to 0, we default to _udatasel */ 832 pcb->pcb_gs = 0; load_gs(0); 833 } 834 835 struct sysentvec linux_sysvec = { 836 LINUX_SYS_MAXSYSCALL, 837 linux_sysent, 838 0xff, 839 LINUX_SIGTBLSZ, 840 bsd_to_linux_signal, 841 ELAST + 1, 842 bsd_to_linux_errno, 843 translate_traps, 844 linux_fixup, 845 linux_sendsig, 846 linux_sigcode, 847 &linux_szsigcode, 848 linux_prepsyscall, 849 "Linux a.out", 850 linux_aout_coredump, 851 exec_linux_imgact_try, 852 LINUX_MINSIGSTKSZ, 853 PAGE_SIZE, 854 VM_MIN_ADDRESS, 855 VM_MAXUSER_ADDRESS, 856 USRSTACK, 857 PS_STRINGS, 858 VM_PROT_ALL, 859 exec_copyout_strings, 860 exec_linux_setregs, 861 NULL 862 }; 863 864 struct sysentvec elf_linux_sysvec = { 865 LINUX_SYS_MAXSYSCALL, 866 linux_sysent, 867 0xff, 868 LINUX_SIGTBLSZ, 869 bsd_to_linux_signal, 870 ELAST + 1, 871 bsd_to_linux_errno, 872 translate_traps, 873 elf_linux_fixup, 874 linux_sendsig, 875 linux_sigcode, 876 &linux_szsigcode, 877 linux_prepsyscall, 878 "Linux ELF", 879 elf32_coredump, 880 exec_linux_imgact_try, 881 LINUX_MINSIGSTKSZ, 882 PAGE_SIZE, 883 VM_MIN_ADDRESS, 884 VM_MAXUSER_ADDRESS, 885 USRSTACK, 886 PS_STRINGS, 887 VM_PROT_ALL, 888 exec_copyout_strings, 889 exec_linux_setregs, 890 NULL 891 }; 892 893 static Elf32_Brandinfo linux_brand = { 894 ELFOSABI_LINUX, 895 EM_386, 896 "Linux", 897 "/compat/linux", 898 "/lib/ld-linux.so.1", 899 &elf_linux_sysvec, 900 NULL, 901 }; 902 903 static Elf32_Brandinfo linux_glibc2brand = { 904 ELFOSABI_LINUX, 905 EM_386, 906 "Linux", 907 "/compat/linux", 908 "/lib/ld-linux.so.2", 909 &elf_linux_sysvec, 910 NULL, 911 }; 912 913 Elf32_Brandinfo *linux_brandlist[] = { 914 &linux_brand, 915 &linux_glibc2brand, 916 NULL 917 }; 918 919 static int 920 linux_elf_modevent(module_t mod, int type, void *data) 921 { 922 Elf32_Brandinfo **brandinfo; 923 int error; 924 struct linux_ioctl_handler **lihp; 925 926 error = 0; 927 928 switch(type) { 929 case MOD_LOAD: 930 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 931 ++brandinfo) 932 if (elf32_insert_brand_entry(*brandinfo) < 0) 933 error = EINVAL; 934 if (error == 0) { 935 SET_FOREACH(lihp, linux_ioctl_handler_set) 936 linux_ioctl_register_handler(*lihp); 937 if (bootverbose) 938 printf("Linux ELF exec handler installed\n"); 939 } else 940 printf("cannot insert Linux ELF brand handler\n"); 941 break; 942 case MOD_UNLOAD: 943 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 944 ++brandinfo) 945 if (elf32_brand_inuse(*brandinfo)) 946 error = EBUSY; 947 if (error == 0) { 948 for (brandinfo = &linux_brandlist[0]; 949 *brandinfo != NULL; ++brandinfo) 950 if (elf32_remove_brand_entry(*brandinfo) < 0) 951 error = EINVAL; 952 } 953 if (error == 0) { 954 SET_FOREACH(lihp, linux_ioctl_handler_set) 955 linux_ioctl_unregister_handler(*lihp); 956 if (bootverbose) 957 printf("Linux ELF exec handler removed\n"); 958 linux_mib_destroy(); 959 } else 960 printf("Could not deinstall ELF interpreter entry\n"); 961 break; 962 default: 963 return EOPNOTSUPP; 964 } 965 return error; 966 } 967 968 static moduledata_t linux_elf_mod = { 969 "linuxelf", 970 linux_elf_modevent, 971 0 972 }; 973 974 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 975