1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* XXX we use functions that might not exist. */ 33 #include "opt_compat.h" 34 35 #ifndef COMPAT_43 36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/exec.h> 42 #include <sys/imgact.h> 43 #include <sys/imgact_aout.h> 44 #include <sys/imgact_elf.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/module.h> 49 #include <sys/mutex.h> 50 #include <sys/proc.h> 51 #include <sys/signalvar.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/vnode.h> 56 57 #include <vm/vm.h> 58 #include <vm/pmap.h> 59 #include <vm/vm_extern.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_param.h> 64 65 #include <machine/cpu.h> 66 #include <machine/md_var.h> 67 #include <machine/pcb.h> 68 69 #include <i386/linux/linux.h> 70 #include <i386/linux/linux_proto.h> 71 #include <compat/linux/linux_mib.h> 72 #include <compat/linux/linux_signal.h> 73 #include <compat/linux/linux_util.h> 74 75 MODULE_VERSION(linux, 1); 76 77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 78 79 #if BYTE_ORDER == LITTLE_ENDIAN 80 #define SHELLMAGIC 0x2123 /* #! */ 81 #else 82 #define SHELLMAGIC 0x2321 83 #endif 84 85 /* 86 * Allow the sendsig functions to use the ldebug() facility 87 * even though they are not syscalls themselves. Map them 88 * to syscall 0. This is slightly less bogus than using 89 * ldebug(sigreturn). 90 */ 91 #define LINUX_SYS_linux_rt_sendsig 0 92 #define LINUX_SYS_linux_sendsig 0 93 94 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 95 #define __LINUX_NPXCW__ 0x37f 96 97 extern char linux_sigcode[]; 98 extern int linux_szsigcode; 99 100 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 101 102 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 103 104 static int linux_fixup(register_t **stack_base, 105 struct image_params *iparams); 106 static int elf_linux_fixup(register_t **stack_base, 107 struct image_params *iparams); 108 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 109 caddr_t *params); 110 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 111 static void exec_linux_setregs(struct thread *td, u_long entry, 112 u_long stack, u_long ps_strings); 113 114 /* 115 * Linux syscalls return negative errno's, we do positive and map them 116 */ 117 static int bsd_to_linux_errno[ELAST + 1] = { 118 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 119 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 120 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 121 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 122 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 123 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 124 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 125 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 126 -6, -6, -43, -42, -75, -6, -84 127 }; 128 129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 130 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 131 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 132 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 133 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 134 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 135 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 136 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 137 0, LINUX_SIGUSR1, LINUX_SIGUSR2 138 }; 139 140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 141 SIGHUP, SIGINT, SIGQUIT, SIGILL, 142 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 143 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 144 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 145 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 146 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 147 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 148 SIGIO, SIGURG, SIGSYS 149 }; 150 151 #define LINUX_T_UNKNOWN 255 152 static int _bsd_to_linux_trapcode[] = { 153 LINUX_T_UNKNOWN, /* 0 */ 154 6, /* 1 T_PRIVINFLT */ 155 LINUX_T_UNKNOWN, /* 2 */ 156 3, /* 3 T_BPTFLT */ 157 LINUX_T_UNKNOWN, /* 4 */ 158 LINUX_T_UNKNOWN, /* 5 */ 159 16, /* 6 T_ARITHTRAP */ 160 254, /* 7 T_ASTFLT */ 161 LINUX_T_UNKNOWN, /* 8 */ 162 13, /* 9 T_PROTFLT */ 163 1, /* 10 T_TRCTRAP */ 164 LINUX_T_UNKNOWN, /* 11 */ 165 14, /* 12 T_PAGEFLT */ 166 LINUX_T_UNKNOWN, /* 13 */ 167 17, /* 14 T_ALIGNFLT */ 168 LINUX_T_UNKNOWN, /* 15 */ 169 LINUX_T_UNKNOWN, /* 16 */ 170 LINUX_T_UNKNOWN, /* 17 */ 171 0, /* 18 T_DIVIDE */ 172 2, /* 19 T_NMI */ 173 4, /* 20 T_OFLOW */ 174 5, /* 21 T_BOUND */ 175 7, /* 22 T_DNA */ 176 8, /* 23 T_DOUBLEFLT */ 177 9, /* 24 T_FPOPFLT */ 178 10, /* 25 T_TSSFLT */ 179 11, /* 26 T_SEGNPFLT */ 180 12, /* 27 T_STKFLT */ 181 18, /* 28 T_MCHK */ 182 19, /* 29 T_XMMFLT */ 183 15 /* 30 T_RESERVED */ 184 }; 185 #define bsd_to_linux_trapcode(code) \ 186 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 187 _bsd_to_linux_trapcode[(code)]: \ 188 LINUX_T_UNKNOWN) 189 190 /* 191 * If FreeBSD & Linux have a difference of opinion about what a trap 192 * means, deal with it here. 193 * 194 * MPSAFE 195 */ 196 static int 197 translate_traps(int signal, int trap_code) 198 { 199 if (signal != SIGBUS) 200 return signal; 201 switch (trap_code) { 202 case T_PROTFLT: 203 case T_TSSFLT: 204 case T_DOUBLEFLT: 205 case T_PAGEFLT: 206 return SIGSEGV; 207 default: 208 return signal; 209 } 210 } 211 212 static int 213 linux_fixup(register_t **stack_base, struct image_params *imgp) 214 { 215 register_t *argv, *envp; 216 217 argv = *stack_base; 218 envp = *stack_base + (imgp->args->argc + 1); 219 (*stack_base)--; 220 **stack_base = (intptr_t)(void *)envp; 221 (*stack_base)--; 222 **stack_base = (intptr_t)(void *)argv; 223 (*stack_base)--; 224 **stack_base = imgp->args->argc; 225 return 0; 226 } 227 228 static int 229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 230 { 231 Elf32_Auxargs *args; 232 register_t *pos; 233 234 KASSERT(curthread->td_proc == imgp->proc && 235 (curthread->td_proc->p_flag & P_SA) == 0, 236 ("unsafe elf_linux_fixup(), should be curproc")); 237 args = (Elf32_Auxargs *)imgp->auxargs; 238 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 239 240 if (args->trace) 241 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 242 if (args->execfd != -1) 243 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 244 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 245 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 246 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 247 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 248 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 249 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 250 AUXARGS_ENTRY(pos, AT_BASE, args->base); 251 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 252 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 253 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 254 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 255 AUXARGS_ENTRY(pos, AT_NULL, 0); 256 257 free(imgp->auxargs, M_TEMP); 258 imgp->auxargs = NULL; 259 260 (*stack_base)--; 261 **stack_base = (register_t)imgp->args->argc; 262 return 0; 263 } 264 265 extern int _ucodesel, _udatasel; 266 extern unsigned long linux_sznonrtsigcode; 267 268 static void 269 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 270 { 271 struct thread *td = curthread; 272 struct proc *p = td->td_proc; 273 struct sigacts *psp; 274 struct trapframe *regs; 275 struct l_rt_sigframe *fp, frame; 276 int sig, code; 277 int oonstack; 278 279 sig = ksi->ksi_signo; 280 code = ksi->ksi_code; 281 PROC_LOCK_ASSERT(p, MA_OWNED); 282 psp = p->p_sigacts; 283 mtx_assert(&psp->ps_mtx, MA_OWNED); 284 regs = td->td_frame; 285 oonstack = sigonstack(regs->tf_esp); 286 287 #ifdef DEBUG 288 if (ldebug(rt_sendsig)) 289 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 290 catcher, sig, (void*)mask, code); 291 #endif 292 /* 293 * Allocate space for the signal handler context. 294 */ 295 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 296 SIGISMEMBER(psp->ps_sigonstack, sig)) { 297 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 298 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 299 } else 300 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 301 mtx_unlock(&psp->ps_mtx); 302 303 /* 304 * Build the argument list for the signal handler. 305 */ 306 if (p->p_sysent->sv_sigtbl) 307 if (sig <= p->p_sysent->sv_sigsize) 308 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 309 310 bzero(&frame, sizeof(frame)); 311 312 frame.sf_handler = catcher; 313 frame.sf_sig = sig; 314 frame.sf_siginfo = &fp->sf_si; 315 frame.sf_ucontext = &fp->sf_sc; 316 317 /* Fill in POSIX parts */ 318 frame.sf_si.lsi_signo = sig; 319 frame.sf_si.lsi_code = code; 320 frame.sf_si.lsi_addr = ksi->ksi_addr; 321 322 /* 323 * Build the signal context to be used by sigreturn. 324 */ 325 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 326 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 327 328 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 329 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 330 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 332 PROC_UNLOCK(p); 333 334 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 335 336 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 337 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 338 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 339 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 340 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 341 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 342 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 343 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 344 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 345 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 346 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 347 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 348 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 349 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 350 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 351 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 352 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 353 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 354 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 355 356 #ifdef DEBUG 357 if (ldebug(rt_sendsig)) 358 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 359 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 360 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 361 #endif 362 363 if (copyout(&frame, fp, sizeof(frame)) != 0) { 364 /* 365 * Process has trashed its stack; give it an illegal 366 * instruction to halt it in its tracks. 367 */ 368 #ifdef DEBUG 369 if (ldebug(rt_sendsig)) 370 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 371 fp, oonstack); 372 #endif 373 PROC_LOCK(p); 374 sigexit(td, SIGILL); 375 } 376 377 /* 378 * Build context to run handler in. 379 */ 380 regs->tf_esp = (int)fp; 381 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 382 linux_sznonrtsigcode; 383 regs->tf_eflags &= ~(PSL_T | PSL_VM); 384 regs->tf_cs = _ucodesel; 385 regs->tf_ds = _udatasel; 386 regs->tf_es = _udatasel; 387 regs->tf_fs = _udatasel; 388 regs->tf_ss = _udatasel; 389 PROC_LOCK(p); 390 mtx_lock(&psp->ps_mtx); 391 } 392 393 394 /* 395 * Send an interrupt to process. 396 * 397 * Stack is set up to allow sigcode stored 398 * in u. to call routine, followed by kcall 399 * to sigreturn routine below. After sigreturn 400 * resets the signal mask, the stack, and the 401 * frame pointer, it returns to the user 402 * specified pc, psl. 403 */ 404 static void 405 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 406 { 407 struct thread *td = curthread; 408 struct proc *p = td->td_proc; 409 struct sigacts *psp; 410 struct trapframe *regs; 411 struct l_sigframe *fp, frame; 412 l_sigset_t lmask; 413 int sig, code; 414 int oonstack, i; 415 416 PROC_LOCK_ASSERT(p, MA_OWNED); 417 psp = p->p_sigacts; 418 sig = ksi->ksi_signo; 419 code = ksi->ksi_code; 420 mtx_assert(&psp->ps_mtx, MA_OWNED); 421 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 422 /* Signal handler installed with SA_SIGINFO. */ 423 linux_rt_sendsig(catcher, ksi, mask); 424 return; 425 } 426 regs = td->td_frame; 427 oonstack = sigonstack(regs->tf_esp); 428 429 #ifdef DEBUG 430 if (ldebug(sendsig)) 431 printf(ARGS(sendsig, "%p, %d, %p, %u"), 432 catcher, sig, (void*)mask, code); 433 #endif 434 435 /* 436 * Allocate space for the signal handler context. 437 */ 438 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 439 SIGISMEMBER(psp->ps_sigonstack, sig)) { 440 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 441 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 442 } else 443 fp = (struct l_sigframe *)regs->tf_esp - 1; 444 mtx_unlock(&psp->ps_mtx); 445 PROC_UNLOCK(p); 446 447 /* 448 * Build the argument list for the signal handler. 449 */ 450 if (p->p_sysent->sv_sigtbl) 451 if (sig <= p->p_sysent->sv_sigsize) 452 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 453 454 bzero(&frame, sizeof(frame)); 455 456 frame.sf_handler = catcher; 457 frame.sf_sig = sig; 458 459 bsd_to_linux_sigset(mask, &lmask); 460 461 /* 462 * Build the signal context to be used by sigreturn. 463 */ 464 frame.sf_sc.sc_mask = lmask.__bits[0]; 465 frame.sf_sc.sc_gs = rgs(); 466 frame.sf_sc.sc_fs = regs->tf_fs; 467 frame.sf_sc.sc_es = regs->tf_es; 468 frame.sf_sc.sc_ds = regs->tf_ds; 469 frame.sf_sc.sc_edi = regs->tf_edi; 470 frame.sf_sc.sc_esi = regs->tf_esi; 471 frame.sf_sc.sc_ebp = regs->tf_ebp; 472 frame.sf_sc.sc_ebx = regs->tf_ebx; 473 frame.sf_sc.sc_edx = regs->tf_edx; 474 frame.sf_sc.sc_ecx = regs->tf_ecx; 475 frame.sf_sc.sc_eax = regs->tf_eax; 476 frame.sf_sc.sc_eip = regs->tf_eip; 477 frame.sf_sc.sc_cs = regs->tf_cs; 478 frame.sf_sc.sc_eflags = regs->tf_eflags; 479 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 480 frame.sf_sc.sc_ss = regs->tf_ss; 481 frame.sf_sc.sc_err = regs->tf_err; 482 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 483 484 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 485 frame.sf_extramask[i] = lmask.__bits[i+1]; 486 487 if (copyout(&frame, fp, sizeof(frame)) != 0) { 488 /* 489 * Process has trashed its stack; give it an illegal 490 * instruction to halt it in its tracks. 491 */ 492 PROC_LOCK(p); 493 sigexit(td, SIGILL); 494 } 495 496 /* 497 * Build context to run handler in. 498 */ 499 regs->tf_esp = (int)fp; 500 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 501 regs->tf_eflags &= ~(PSL_T | PSL_VM); 502 regs->tf_cs = _ucodesel; 503 regs->tf_ds = _udatasel; 504 regs->tf_es = _udatasel; 505 regs->tf_fs = _udatasel; 506 regs->tf_ss = _udatasel; 507 PROC_LOCK(p); 508 mtx_lock(&psp->ps_mtx); 509 } 510 511 /* 512 * System call to cleanup state after a signal 513 * has been taken. Reset signal mask and 514 * stack state from context left by sendsig (above). 515 * Return to previous pc and psl as specified by 516 * context left by sendsig. Check carefully to 517 * make sure that the user has not modified the 518 * psl to gain improper privileges or to cause 519 * a machine fault. 520 */ 521 int 522 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 523 { 524 struct proc *p = td->td_proc; 525 struct l_sigframe frame; 526 struct trapframe *regs; 527 l_sigset_t lmask; 528 int eflags, i; 529 ksiginfo_t ksi; 530 531 regs = td->td_frame; 532 533 #ifdef DEBUG 534 if (ldebug(sigreturn)) 535 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 536 #endif 537 /* 538 * The trampoline code hands us the sigframe. 539 * It is unsafe to keep track of it ourselves, in the event that a 540 * program jumps out of a signal handler. 541 */ 542 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 543 return (EFAULT); 544 545 /* 546 * Check for security violations. 547 */ 548 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 549 eflags = frame.sf_sc.sc_eflags; 550 /* 551 * XXX do allow users to change the privileged flag PSL_RF. The 552 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 553 * sometimes set it there too. tf_eflags is kept in the signal 554 * context during signal handling and there is no other place 555 * to remember it, so the PSL_RF bit may be corrupted by the 556 * signal handler without us knowing. Corruption of the PSL_RF 557 * bit at worst causes one more or one less debugger trap, so 558 * allowing it is fairly harmless. 559 */ 560 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 561 return(EINVAL); 562 563 /* 564 * Don't allow users to load a valid privileged %cs. Let the 565 * hardware check for invalid selectors, excess privilege in 566 * other selectors, invalid %eip's and invalid %esp's. 567 */ 568 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 569 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 570 ksiginfo_init_trap(&ksi); 571 ksi.ksi_signo = SIGBUS; 572 ksi.ksi_code = BUS_OBJERR; 573 ksi.ksi_trapno = T_PROTFLT; 574 ksi.ksi_addr = (void *)regs->tf_eip; 575 trapsignal(td, &ksi); 576 return(EINVAL); 577 } 578 579 lmask.__bits[0] = frame.sf_sc.sc_mask; 580 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 581 lmask.__bits[i+1] = frame.sf_extramask[i]; 582 PROC_LOCK(p); 583 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 584 SIG_CANTMASK(td->td_sigmask); 585 signotify(td); 586 PROC_UNLOCK(p); 587 588 /* 589 * Restore signal context. 590 */ 591 /* %gs was restored by the trampoline. */ 592 regs->tf_fs = frame.sf_sc.sc_fs; 593 regs->tf_es = frame.sf_sc.sc_es; 594 regs->tf_ds = frame.sf_sc.sc_ds; 595 regs->tf_edi = frame.sf_sc.sc_edi; 596 regs->tf_esi = frame.sf_sc.sc_esi; 597 regs->tf_ebp = frame.sf_sc.sc_ebp; 598 regs->tf_ebx = frame.sf_sc.sc_ebx; 599 regs->tf_edx = frame.sf_sc.sc_edx; 600 regs->tf_ecx = frame.sf_sc.sc_ecx; 601 regs->tf_eax = frame.sf_sc.sc_eax; 602 regs->tf_eip = frame.sf_sc.sc_eip; 603 regs->tf_cs = frame.sf_sc.sc_cs; 604 regs->tf_eflags = eflags; 605 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 606 regs->tf_ss = frame.sf_sc.sc_ss; 607 608 return (EJUSTRETURN); 609 } 610 611 /* 612 * System call to cleanup state after a signal 613 * has been taken. Reset signal mask and 614 * stack state from context left by rt_sendsig (above). 615 * Return to previous pc and psl as specified by 616 * context left by sendsig. Check carefully to 617 * make sure that the user has not modified the 618 * psl to gain improper privileges or to cause 619 * a machine fault. 620 */ 621 int 622 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 623 { 624 struct proc *p = td->td_proc; 625 struct l_ucontext uc; 626 struct l_sigcontext *context; 627 l_stack_t *lss; 628 stack_t ss; 629 struct trapframe *regs; 630 int eflags; 631 ksiginfo_t ksi; 632 633 regs = td->td_frame; 634 635 #ifdef DEBUG 636 if (ldebug(rt_sigreturn)) 637 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 638 #endif 639 /* 640 * The trampoline code hands us the ucontext. 641 * It is unsafe to keep track of it ourselves, in the event that a 642 * program jumps out of a signal handler. 643 */ 644 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 645 return (EFAULT); 646 647 context = &uc.uc_mcontext; 648 649 /* 650 * Check for security violations. 651 */ 652 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 653 eflags = context->sc_eflags; 654 /* 655 * XXX do allow users to change the privileged flag PSL_RF. The 656 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 657 * sometimes set it there too. tf_eflags is kept in the signal 658 * context during signal handling and there is no other place 659 * to remember it, so the PSL_RF bit may be corrupted by the 660 * signal handler without us knowing. Corruption of the PSL_RF 661 * bit at worst causes one more or one less debugger trap, so 662 * allowing it is fairly harmless. 663 */ 664 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 665 return(EINVAL); 666 667 /* 668 * Don't allow users to load a valid privileged %cs. Let the 669 * hardware check for invalid selectors, excess privilege in 670 * other selectors, invalid %eip's and invalid %esp's. 671 */ 672 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 673 if (!CS_SECURE(context->sc_cs)) { 674 ksiginfo_init_trap(&ksi); 675 ksi.ksi_signo = SIGBUS; 676 ksi.ksi_code = BUS_OBJERR; 677 ksi.ksi_trapno = T_PROTFLT; 678 ksi.ksi_addr = (void *)regs->tf_eip; 679 trapsignal(td, &ksi); 680 return(EINVAL); 681 } 682 683 PROC_LOCK(p); 684 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 685 SIG_CANTMASK(td->td_sigmask); 686 signotify(td); 687 PROC_UNLOCK(p); 688 689 /* 690 * Restore signal context 691 */ 692 /* %gs was restored by the trampoline. */ 693 regs->tf_fs = context->sc_fs; 694 regs->tf_es = context->sc_es; 695 regs->tf_ds = context->sc_ds; 696 regs->tf_edi = context->sc_edi; 697 regs->tf_esi = context->sc_esi; 698 regs->tf_ebp = context->sc_ebp; 699 regs->tf_ebx = context->sc_ebx; 700 regs->tf_edx = context->sc_edx; 701 regs->tf_ecx = context->sc_ecx; 702 regs->tf_eax = context->sc_eax; 703 regs->tf_eip = context->sc_eip; 704 regs->tf_cs = context->sc_cs; 705 regs->tf_eflags = eflags; 706 regs->tf_esp = context->sc_esp_at_signal; 707 regs->tf_ss = context->sc_ss; 708 709 /* 710 * call sigaltstack & ignore results.. 711 */ 712 lss = &uc.uc_stack; 713 ss.ss_sp = lss->ss_sp; 714 ss.ss_size = lss->ss_size; 715 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 716 717 #ifdef DEBUG 718 if (ldebug(rt_sigreturn)) 719 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 720 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 721 #endif 722 (void)kern_sigaltstack(td, &ss, NULL); 723 724 return (EJUSTRETURN); 725 } 726 727 /* 728 * MPSAFE 729 */ 730 static void 731 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 732 { 733 args[0] = tf->tf_ebx; 734 args[1] = tf->tf_ecx; 735 args[2] = tf->tf_edx; 736 args[3] = tf->tf_esi; 737 args[4] = tf->tf_edi; 738 args[5] = tf->tf_ebp; /* Unconfirmed */ 739 *params = NULL; /* no copyin */ 740 } 741 742 /* 743 * If a linux binary is exec'ing something, try this image activator 744 * first. We override standard shell script execution in order to 745 * be able to modify the interpreter path. We only do this if a linux 746 * binary is doing the exec, so we do not create an EXEC module for it. 747 */ 748 static int exec_linux_imgact_try(struct image_params *iparams); 749 750 static int 751 exec_linux_imgact_try(struct image_params *imgp) 752 { 753 const char *head = (const char *)imgp->image_header; 754 char *rpath; 755 int error = -1, len; 756 757 /* 758 * The interpreter for shell scripts run from a linux binary needs 759 * to be located in /compat/linux if possible in order to recursively 760 * maintain linux path emulation. 761 */ 762 if (((const short *)head)[0] == SHELLMAGIC) { 763 /* 764 * Run our normal shell image activator. If it succeeds attempt 765 * to use the alternate path for the interpreter. If an alternate 766 * path is found, use our stringspace to store it. 767 */ 768 if ((error = exec_shell_imgact(imgp)) == 0) { 769 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 770 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 771 if (rpath != NULL) { 772 len = strlen(rpath) + 1; 773 774 if (len <= MAXSHELLCMDLEN) { 775 memcpy(imgp->interpreter_name, rpath, len); 776 } 777 free(rpath, M_TEMP); 778 } 779 } 780 } 781 return(error); 782 } 783 784 /* 785 * exec_setregs may initialize some registers differently than Linux 786 * does, thus potentially confusing Linux binaries. If necessary, we 787 * override the exec_setregs default(s) here. 788 */ 789 static void 790 exec_linux_setregs(struct thread *td, u_long entry, 791 u_long stack, u_long ps_strings) 792 { 793 static const u_short control = __LINUX_NPXCW__; 794 struct pcb *pcb = td->td_pcb; 795 796 exec_setregs(td, entry, stack, ps_strings); 797 798 /* Linux sets %gs to 0, we default to _udatasel */ 799 pcb->pcb_gs = 0; load_gs(0); 800 801 /* Linux sets the i387 to extended precision. */ 802 fldcw(&control); 803 } 804 805 struct sysentvec linux_sysvec = { 806 LINUX_SYS_MAXSYSCALL, 807 linux_sysent, 808 0xff, 809 LINUX_SIGTBLSZ, 810 bsd_to_linux_signal, 811 ELAST + 1, 812 bsd_to_linux_errno, 813 translate_traps, 814 linux_fixup, 815 linux_sendsig, 816 linux_sigcode, 817 &linux_szsigcode, 818 linux_prepsyscall, 819 "Linux a.out", 820 NULL, 821 exec_linux_imgact_try, 822 LINUX_MINSIGSTKSZ, 823 PAGE_SIZE, 824 VM_MIN_ADDRESS, 825 VM_MAXUSER_ADDRESS, 826 USRSTACK, 827 PS_STRINGS, 828 VM_PROT_ALL, 829 exec_copyout_strings, 830 exec_linux_setregs, 831 NULL 832 }; 833 834 struct sysentvec elf_linux_sysvec = { 835 LINUX_SYS_MAXSYSCALL, 836 linux_sysent, 837 0xff, 838 LINUX_SIGTBLSZ, 839 bsd_to_linux_signal, 840 ELAST + 1, 841 bsd_to_linux_errno, 842 translate_traps, 843 elf_linux_fixup, 844 linux_sendsig, 845 linux_sigcode, 846 &linux_szsigcode, 847 linux_prepsyscall, 848 "Linux ELF", 849 elf32_coredump, 850 exec_linux_imgact_try, 851 LINUX_MINSIGSTKSZ, 852 PAGE_SIZE, 853 VM_MIN_ADDRESS, 854 VM_MAXUSER_ADDRESS, 855 USRSTACK, 856 PS_STRINGS, 857 VM_PROT_ALL, 858 exec_copyout_strings, 859 exec_linux_setregs, 860 NULL 861 }; 862 863 static Elf32_Brandinfo linux_brand = { 864 ELFOSABI_LINUX, 865 EM_386, 866 "Linux", 867 "/compat/linux", 868 "/lib/ld-linux.so.1", 869 &elf_linux_sysvec, 870 NULL, 871 }; 872 873 static Elf32_Brandinfo linux_glibc2brand = { 874 ELFOSABI_LINUX, 875 EM_386, 876 "Linux", 877 "/compat/linux", 878 "/lib/ld-linux.so.2", 879 &elf_linux_sysvec, 880 NULL, 881 }; 882 883 Elf32_Brandinfo *linux_brandlist[] = { 884 &linux_brand, 885 &linux_glibc2brand, 886 NULL 887 }; 888 889 static int 890 linux_elf_modevent(module_t mod, int type, void *data) 891 { 892 Elf32_Brandinfo **brandinfo; 893 int error; 894 struct linux_ioctl_handler **lihp; 895 896 error = 0; 897 898 switch(type) { 899 case MOD_LOAD: 900 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 901 ++brandinfo) 902 if (elf32_insert_brand_entry(*brandinfo) < 0) 903 error = EINVAL; 904 if (error == 0) { 905 SET_FOREACH(lihp, linux_ioctl_handler_set) 906 linux_ioctl_register_handler(*lihp); 907 if (bootverbose) 908 printf("Linux ELF exec handler installed\n"); 909 } else 910 printf("cannot insert Linux ELF brand handler\n"); 911 break; 912 case MOD_UNLOAD: 913 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 914 ++brandinfo) 915 if (elf32_brand_inuse(*brandinfo)) 916 error = EBUSY; 917 if (error == 0) { 918 for (brandinfo = &linux_brandlist[0]; 919 *brandinfo != NULL; ++brandinfo) 920 if (elf32_remove_brand_entry(*brandinfo) < 0) 921 error = EINVAL; 922 } 923 if (error == 0) { 924 SET_FOREACH(lihp, linux_ioctl_handler_set) 925 linux_ioctl_unregister_handler(*lihp); 926 if (bootverbose) 927 printf("Linux ELF exec handler removed\n"); 928 linux_mib_destroy(); 929 } else 930 printf("Could not deinstall ELF interpreter entry\n"); 931 break; 932 default: 933 return EOPNOTSUPP; 934 } 935 return error; 936 } 937 938 static moduledata_t linux_elf_mod = { 939 "linuxelf", 940 linux_elf_modevent, 941 0 942 }; 943 944 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 945